The purpose of this notebook are as follows: 
1. Identify fast food restaurants in each Brooklyn and Manhattan NTA
2. Perform clustering 

In [96]:
# Setting up modules
import geopandas as gpd
from geopandas import GeoDataFrame
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import re

In [97]:
# data paths
data_path = "./data"
Yelp_BK_path = data_path +  "/Yelp/BK/"
Yelp_MN_path = data_path + "/Yelp/MN/"
path_census_tracts = './data/ACS/CensusTracts'

In [98]:
# Reading in Yelp shapefiles 
MN_Yelp = gpd.read_file(Yelp_MN_path + "MN_Yelp_CensusTract_NTA.shp")
BK_Yelp = gpd.read_file(Yelp_BK_path + "BK_Yelp_CensusTract_NTA.shp")

In [99]:
MN_Yelp.head()

Unnamed: 0,id,alias,name,is_closed,review_cou,rating,price,categories,latitude,longitude,...,NTAName,0-25k,25-50k,50-75k,75-100k,100-125k,125-150k,> 150k,MeanLifeEx,geometry
0,ksksxd8J2SIs97ccPFV75A,brown-sugar-restaurant-new-york,Brown Sugar Restaurant,0,204,3.5,$$,"cuban,seafood,steak",40.869926,-73.915466,...,Marble Hill-Inwood,0.32,0.21,0.15,0.14,0.07,0.04,0.07,84.028571,POINT (-73.91546600000001 40.869926)
1,IHGm6huN_Z48KdorBjztSQ,guacamole-taqueria-new-york,Guacamole Taqueria,0,122,3.5,$$,mexican,40.86952,-73.91674,...,Marble Hill-Inwood,0.32,0.21,0.15,0.14,0.07,0.04,0.07,84.028571,POINT (-73.91674 40.86952)
2,rUBFgZU3QTk7IOgpccE8Og,indian-road-cafe-new-york,Indian Road Cafe,0,580,4.0,$$,"coffee,newamerican,bars",40.872915,-73.918441,...,Marble Hill-Inwood,0.32,0.21,0.15,0.14,0.07,0.04,0.07,84.028571,POINT (-73.91844082 40.87291516)
3,AflnoQBr01QmQIq5hVH-iA,la-essencia-restaurant-new-york,La Essencia Restaurant,0,13,3.5,$$,"mexican,dominican",40.87087,-73.91505,...,Marble Hill-Inwood,0.32,0.21,0.15,0.14,0.07,0.04,0.07,84.028571,POINT (-73.91504999999999 40.87087)
4,8o-B_1q4XB48CmdaXdm2KQ,raices-new-york,Raices,0,127,3.5,$$,"bars,latin",40.86633,-73.92016,...,Marble Hill-Inwood,0.32,0.21,0.15,0.14,0.07,0.04,0.07,84.028571,POINT (-73.92016 40.86633)


In [100]:
# String query to select for fast food restaurants
BK_fast_food = BK_Yelp[BK_Yelp["categories"].str.contains(r"hotdog", na = False)]
BK_fast_food = BK_fast_food[BK_fast_food["price"].str.contains(r"\$|\$\$|MISSING", na = False)]
MN_fast_food = MN_Yelp[MN_Yelp["categories"].str.contains(r"hotdog", na = False)]
MN_fast_food = MN_fast_food[MN_fast_food["price"].str.contains(r"\$|\$\$|MISSING", na = False)]

In [101]:
# # Check and some random stats
# print(MN_fast_food.shape)
# print(BK_fast_food.shape)
# print(MN_fast_food.name.nunique())
# print(BK_fast_food.name.nunique())

In [102]:
# Coerce into pd df to for grouping 
MN_fast_food_df = pd.DataFrame(MN_fast_food)
BK_fast_food_df = pd.DataFrame(BK_fast_food)

# Group by NTA and look for chains
MN_fast_food_NTA_df = pd.DataFrame(MN_fast_food_df.groupby(["NTAName", "name"])["name"].count())
BK_fast_food_NTA_df = pd.DataFrame(BK_fast_food_df.groupby(["NTAName", "name"])["name"].count())

# Unstack the first level
MN_fast_food_NTA_df = MN_fast_food_NTA_df.unstack(level = "name", fill_value = 0).reset_index()
BK_fast_food_NTA_df = BK_fast_food_NTA_df.unstack(level = "name", fill_value = 0).reset_index()

# Drop by one level
MN_fast_food_NTA_df.columns = MN_fast_food_NTA_df.columns.droplevel(0)
BK_fast_food_NTA_df.columns = BK_fast_food_NTA_df.columns.droplevel(0)

# Reassign NTAName which was lost when level was dropped
MN_fast_food_NTA_df = MN_fast_food_NTA_df.rename(columns = {'': "NTAName"})
BK_fast_food_NTA_df = BK_fast_food_NTA_df.rename(columns = {'': "NTAName"}) 

# Row sum to get number of fast food restaurants per NTA
BK_fast_food_NTA_df["Total"] = BK_fast_food_NTA_df.sum(axis = 1)
MN_fast_food_NTA_df["Total"] = MN_fast_food_NTA_df.sum(axis = 1)

# Check
MN_fast_food_NTA_df.head()

name,NTAName,375 Fries,5 Boroughs Pizza,Amazon Go,American Trash,Amsterdam Gourmet,Arby's,Armonie,At the Wallace,Auntie Anne's Pretzels,...,Village Fried Chicken,Wah Fung No 1 Fast Food,Wendy's,Westville - Wall St,White Castle,Wok To Walk,Wonder Fried Chicken & Pizza,Yaso Noodle Bar,Yia Yia's- Homemade Greek Food,Total
0,Battery Park City-Lower Manhattan,0,0,0,0,0,0,0,0,0,...,0,0,2,1,0,0,0,0,0,31
1,Central Harlem North-Polo Grounds,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,11
2,Central Harlem South,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,0,17
3,Chinatown,1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,9
4,Clinton,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,20


In [103]:
# Export to CSV
BK_fast_food_NTA_df.to_csv(Yelp_BK_path + '/BK_fast_food_NTA_df.csv')
MN_fast_food_NTA_df.to_csv(Yelp_MN_path + '/MN_fast_food_NTA_df.csv')
