## Core places processing
This notebook should read in the coreplaces files from the raw data folder and:
    1. Associate a date to each file
    2. Filter down each file to Philadelphia places
    3. Concatenate the files together
    4. Write out the result to a file in the proessed data folder

In [1]:
import pandas as pd
from safegraph_py_functions import safegraph_py_functions as sgpy
import os
from dotenv import load_dotenv, find_dotenv

In [2]:
# find .env automagically by walking up directories until it's found
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)
root_dir = os.environ.get("ROOT_DIR")
raw_data_dir = os.path.join(root_dir,'data/raw')

In [3]:
import zipfile

In [5]:
# local directory where we want to put all the data
core_path = os.path.join(raw_data_dir,'core')
# print(local)
archives = []
# r=root, d=directories, f = files
for r, d, f in os.walk(core_path):
    for file in f:
        if file.endswith('.zip'):
            archives.append(os.path.join(core_path, r, file))
archives

[]

In [6]:
for archive in archives:
    with zipfile.ZipFile(archive,"r") as zip_ref:
        zip_ref.extractall(archive[:-4])
    os.unlink(archive)

In [7]:
files = []
# r=root, d=directories, f = files
for r, d, f in os.walk(core_path):
    for file in f:
        if file.endswith('.csv.gz') and 'core_poi-part' in file:
            files.append(os.path.join(core_path, r, file))

In [8]:
files

['/Users/hannahkronenberg/SafegraphCOVIDPhilly/data/raw/core/2020/03/CoreRecords-CORE_POI-2019_03-2020-03-25/core_poi-part5.csv.gz',
 '/Users/hannahkronenberg/SafegraphCOVIDPhilly/data/raw/core/2020/03/CoreRecords-CORE_POI-2019_03-2020-03-25/core_poi-part3.csv.gz',
 '/Users/hannahkronenberg/SafegraphCOVIDPhilly/data/raw/core/2020/03/CoreRecords-CORE_POI-2019_03-2020-03-25/core_poi-part1.csv.gz',
 '/Users/hannahkronenberg/SafegraphCOVIDPhilly/data/raw/core/2020/03/CoreRecords-CORE_POI-2019_03-2020-03-25/core_poi-part4.csv.gz',
 '/Users/hannahkronenberg/SafegraphCOVIDPhilly/data/raw/core/2020/03/CoreRecords-CORE_POI-2019_03-2020-03-25/core_poi-part2.csv.gz',
 '/Users/hannahkronenberg/SafegraphCOVIDPhilly/data/raw/core/2020/04/CoreApr2020Release-CORE_POI-2020_03-2020-04-07/core_poi-part5.csv.gz',
 '/Users/hannahkronenberg/SafegraphCOVIDPhilly/data/raw/core/2020/04/CoreApr2020Release-CORE_POI-2020_03-2020-04-07/core_poi-part3.csv.gz',
 '/Users/hannahkronenberg/SafegraphCOVIDPhilly/data/raw

In [11]:
file = files[0]
core_poi_df = pd.read_csv(file)

In [13]:
core_poi_df.head()

Unnamed: 0,safegraph_place_id,parent_safegraph_place_id,location_name,safegraph_brand_ids,brands,top_category,sub_category,naics_code,latitude,longitude,street_address,city,region,postal_code,iso_country_code,phone_number,open_hours,category_tags
0,sg:000a5de0eefc4789abf9d086689ebdb8,sg:ade7403da00e423a8721abad99704e34,Jackson Hewitt Tax Service,SG_BRAND_83bae7a11a4b9f1fb651baa1b26a2ec4,Jackson Hewitt Tax Service,"Accounting, Tax Preparation, Bookkeeping, and ...",Tax Preparation Services,541213.0,34.146075,-77.89712,5135 Carolina Beach Rd,Wilmington,NC,28412,US,,"{ ""Mon"": [[""10:00"", ""18:00""]], ""Tue"": [[""10:00...",
1,sg:001a8475c9b449a8814e7ce1eb8eb2aa,,Christ Team Super Thrift,,,Used Merchandise Stores,Used Merchandise Stores,453310.0,29.246192,-81.065776,1861 N Nova Rd,Holly Hill,FL,32117,US,13862550000.0,"{ ""Mon"": [[""10:00"", ""18:00""]], ""Tue"": [[""10:00...",
2,sg:001b580473bd459b91e16e1197b3f9c4,,The UPS Store,SG_BRAND_d100d4d8528c773aac249aa76b9eed07,The UPS Store,Couriers and Express Delivery Services,Couriers and Express Delivery Services,492110.0,33.872319,-84.455373,4355 Cobb Pkwy SE Ste J,Atlanta,GA,30339,US,17709530000.0,,
3,sg:0020aff773964d0aba38d42744fa91ce,,Taste Budz,,,Restaurants and Other Eating Places,Full-Service Restaurants,722511.0,28.903064,-82.377246,2780 N Florida Ave Ste 14,Hernando,FL,34442,US,13527260000.0,"{ ""Mon"": [[""7:00"", ""20:00""]], ""Tue"": [[""7:00"",...","BBQ and Southern Food,Brunch"
4,sg:00485528339f4255be17e27076ce62a0,,Citrus Park Bikes,,,"Sporting Goods, Hobby, and Musical Instrument ...",Sporting Goods Stores,451110.0,28.068126,-82.56972,7424 Edgemere Rd,Tampa,FL,33625,US,18137500000.0,"{ ""Mon"": [[""9:30"", ""18:00""]], ""Tue"": [[""9:30"",...",
