# Collect POIs

#### Primary Author
Chris Carey

#### Description:
This notebook collects unique points of interest (POIs) across weekly POI visitation data and exports them with only necessary columns for smaller storage and faster processing.

#### Inputs:
```
data/weekly_and_core_with_area.csv
```
 
#### Outputs:
```
exports/poi.csv
```

In [1]:
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [2]:
def peek(df):
    display(df.iloc[0:3, :])
    print(len(df))

In [3]:
poi_weekly_df = pd.read_csv('./data/weekly_and_core_with_area.csv')
peek(poi_weekly_df)

Unnamed: 0,placekey,parent_placekey,safegraph_brand_ids,date_range_start,date_range_end,raw_visit_counts,raw_visitor_counts,visits_by_day,visits_by_each_hour,poi_cbg,...,open_hours,category_tags,opened_on,closed_on,tracking_opened_since,tracking_closed_since,category,date,safegraph_place_id,area_square_feet
0,222-222@627-s94-nwk,,,2020-12-21 05:00:00+00:00,2020-12-28T00:00:00-05:00,39,24,"[7,9,6,5,3,5,4]","[0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,4,0,0,0,0,0,0,0...",360470395002,...,"{ ""Mon"": [[""8:00"", ""19:00""]], ""Tue"": [[""8:00"",...",,,,,2019-07,Supermarkets and Specialty Food Stores,2020-12-21,sg:bbe025bf97774f46b165507367517013,3177.0
1,222-222@627-s94-nwk,,,2021-01-11 05:00:00+00:00,2021-01-18T00:00:00-05:00,41,27,"[3,5,7,4,6,13,3]","[0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0...",360470395002,...,"{ ""Mon"": [[""8:00"", ""19:00""]], ""Tue"": [[""8:00"",...",,,,,2019-07,Supermarkets and Specialty Food Stores,2021-01-11,sg:bbe025bf97774f46b165507367517013,3177.0
2,222-222@627-s94-nwk,,,2021-01-18 05:00:00+00:00,2021-01-25T00:00:00-05:00,39,21,"[3,6,7,6,7,8,2]","[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1...",360470395002,...,"{ ""Mon"": [[""8:00"", ""19:00""]], ""Tue"": [[""8:00"",...",,,,,2019-07,Supermarkets and Specialty Food Stores,2021-01-18,sg:bbe025bf97774f46b165507367517013,3177.0


3693204


In [4]:
poi_weekly_df.columns

Index(['placekey', 'parent_placekey', 'safegraph_brand_ids',
       'date_range_start', 'date_range_end', 'raw_visit_counts',
       'raw_visitor_counts', 'visits_by_day', 'visits_by_each_hour', 'poi_cbg',
       'visitor_home_cbgs', 'visitor_daytime_cbgs',
       'visitor_country_of_origin', 'distance_from_home', 'median_dwell',
       'bucketed_dwell_times', 'related_same_day_brand',
       'related_same_week_brand', 'location_name', 'brands', 'top_category',
       'sub_category', 'naics_code', 'latitude', 'longitude', 'street_address',
       'city', 'region', 'postal_code', 'iso_country_code', 'open_hours',
       'category_tags', 'opened_on', 'closed_on', 'tracking_opened_since',
       'tracking_closed_since', 'category', 'date', 'safegraph_place_id',
       'area_square_feet'],
      dtype='object')

In [5]:
poi_df = poi_weekly_df[[
    'placekey',
    'poi_cbg',
    'naics_code',
    'category',
    'sub_category',
    'location_name',
    'area_square_feet',
    'latitude',
    'longitude',
]]
poi_df = poi_df.drop_duplicates().reset_index(drop=True)
poi_df = poi_df.rename(columns={'poi_cbg': 'cbg'})
peek(poi_df)

Unnamed: 0,placekey,cbg,naics_code,category,sub_category,location_name,area_square_feet,latitude,longitude
0,222-222@627-s94-nwk,360470395002,445210,Supermarkets and Specialty Food Stores,Meat Markets,Broadway Meats,3177.0,40.691436,-73.924891
1,223-222@627-rw6-zfz,360050386008,445110,Supermarkets and Specialty Food Stores,Supermarkets and Other Grocery (except Conveni...,Foodtown,3401.0,40.87689,-73.847776
2,223-222@627-rwq-vcq,360050117001,445110,Supermarkets and Specialty Food Stores,Supermarkets and Other Grocery (except Conveni...,Kirsch Mushroom Company,10079.0,40.816779,-73.883401


36475


In [8]:
poi_df.to_csv('./exports/poi.csv', index=None)