In [76]:
import geopandas as gpd
import math
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

In [77]:
def peek(df):
    display(df.iloc[0:3, :])
    print(len(df))

In [78]:
poi_df = pd.read_csv('./exports/poi.csv')
peek(poi_df)

Unnamed: 0,placekey,cbg,naics_code,category,sub_category,location_name,area_square_feet,latitude,longitude
0,226-222@627-s4n-pqf,360610158022,311811,Supermarkets and Specialty Food Stores,Retail Bakeries,Unna Bakery,,40.787027,-73.950435
1,225-225@627-s99-9xq,360810849002,311811,Supermarkets and Specialty Food Stores,Retail Bakeries,Fay Da Bakery,24759.0,40.756244,-73.829009
2,225-225@627-vsw-7nq,360850146071,311811,Supermarkets and Specialty Food Stores,Retail Bakeries,Amici Bakery,16166.0,40.559894,-74.166016


36467


## Pull out Fast-Food Restaurants (FFRs)

In [79]:
FAST_FOOD_NAMES = set([
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4783380/
    "McDonald's",
    "Burger King",
    "Starbucks",
    "Dunkin'",
    "Pizza Hut",
    "Subway",
    "Taco Bell",
    "KFC",
    "Chick-fil-A",
    "Wendy's",
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2446463/
    "Au Bon Pain",
    "Papa John's",
    "Popeyes Louisiana Kitchen",
# https://dol.ny.gov/system/files/documents/2021/07/p716.pdf
    "Ben & Jerry's",
    "Chipotle Mexican Grill",
    "Golden Krust Caribbean Bakery and Grill",
    "Jamba",
    "Nathan's Famous",
    "Shake Shack",
    "Tim Hortons",
    "Uno Chicago Grill",
    "White Castle",
# https://s27147.pcdn.co/wp-content/uploads/NELP-Fact-Sheet-Fast-Food-Employment-New-York.pdf
    "Baskin Robbins",
    "Carvel",
    "Little Caesers Pizza",
    "Domino's Pizza",
    "Panera Bread",
    "Auntie Anne's",
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4967005/
    "Arby's",
])

In [80]:
condition_ffr = poi_df['location_name'].isin(FAST_FOOD_NAMES)
poi_df.loc[condition_ffr, 'category'] = 'Fast-Food Restaurants'
poi_df.loc[condition_ffr, 'sub_category'] = 'Fast-Food Restaurants'
peek(poi_df.loc[condition_ffr, :])

Unnamed: 0,placekey,cbg,naics_code,category,sub_category,location_name,area_square_feet,latitude,longitude
14743,232-222@627-wg6-vs5,360470968001,722511,Fast-Food Restaurants,Fast-Food Restaurants,Golden Krust Caribbean Bakery and Grill,605.0,40.645151,-73.902657
14790,234-223@627-s6k-6rk,360050151003,722511,Fast-Food Restaurants,Fast-Food Restaurants,Golden Krust Caribbean Bakery and Grill,10769.0,40.831446,-73.900305
15467,234-222@627-s26-zfz,360810632002,722511,Fast-Food Restaurants,Fast-Food Restaurants,Golden Krust Caribbean Bakery and Grill,2747.0,40.675867,-73.740355


2362


## Pull out Delis and combine with Convenience Stores

In [81]:
# Consider anything with "Deli" or "Delicatessen" in the name as a deli.
condition_dcs = (
    (poi_df['location_name'].str.match(r'.*(\bDeli\b)|Delicatessen.*')) |
    (poi_df['sub_category'] == 'Convenience Stores')
)
poi_df.loc[condition_dcs, 'category'] = 'Delis & Convenience Stores'
poi_df.loc[condition_dcs, 'sub_category'] = 'Delis & Convenience Stores'
peek(poi_df.loc[condition_dcs, :])

Unnamed: 0,placekey,cbg,naics_code,category,sub_category,location_name,area_square_feet,latitude,longitude
8,22b-222@627-s7n-73q,360810707002,311811,Delis & Convenience Stores,Delis & Convenience Stores,Gourmet Bakery & Deli,2829.0,40.711436,-73.855736
136,22d-222@627-wdm-jqf,360470258003,311811,Delis & Convenience Stores,Delis & Convenience Stores,Morelos Bakery & Deli,1991.0,40.614867,-73.987992
545,zzw-223@627-s8g-d35,360470571002,311811,Delis & Convenience Stores,Delis & Convenience Stores,Star Deli & Bakery,2544.0,40.725134,-73.946337


1885


In [82]:
poi_df.to_csv('./exports/poi_health_recategorized.csv', index=None)