## Foursquare Data Collection
### Imports

In [463]:
import pandas as pd
import numpy as np
import geopandas as gp
import folium
import requests
from geopy import distance
import re

### Cleaned Data

In [318]:
df=gp.read_file("Data/Cleaned/data.shp")
df.crs={'init': 'epsg:4326'}
df.head()

Unnamed: 0,code,name,lat,long,area,borough,population,medAge,popDensity,dayPop,medValue,medRent,medIncome,meanIncome,veryLow,low,middle,high,veryHigh,geometry
0,BK09,Brooklyn Heights-Cobble Hill,40.695469,-73.994871,1.615327,Brooklyn,24212,37.1,14988,21307.0,856535.0,2278.0,125817.0,205275.0,1279,1201,2008,3355,3272,"POLYGON ((-73.99236367043254 40.689690123777, ..."
1,BK17,Sheepshead Bay-Gerritsen Beach-Manhattan Beach,40.5883,-73.941511,10.214922,Brooklyn,67681,43.9,6625,59559.0,476965.0,1180.0,57150.0,79613.0,6637,5298,6797,5785,1633,POLYGON ((-73.91809256480843 40.58657033500475...
2,BK19,Brighton Beach,40.580922,-73.961217,2.770061,Brooklyn,35811,44.3,12927,31514.0,561046.0,1194.0,36802.0,63703.0,5762,2697,3301,2006,791,POLYGON ((-73.96034953585246 40.58730628557137...
3,BK21,Seagate-Coney Island,40.57648,-73.991231,6.242851,Brooklyn,31132,39.0,4986,27396.0,457834.0,676.0,27345.0,49358.0,5381,2285,2158,1115,297,POLYGON ((-73.97459000582634 40.58313882075885...
4,BK23,West Brighton,40.579088,-73.973391,1.409979,Brooklyn,16436,58.0,11656,14464.0,311186.0,905.0,40316.0,58752.0,3169,1790,2212,955,275,"POLYGON ((-73.9688899587795 40.57526123899416,..."


### Foursquare Options

In [51]:
CLIENT_ID="SQQXQW23MRDH3TM4FCYAGGBG4KI5TNOMNMJYZTPSX0QFMRW4"
CLIENT_SECRET="RLT452NQ14X0VBLLKQOB4ZIDN3RQACAUN4B2Z1NRHOE21OOG"
VERSION="20190726"

### Foursquare Categories

In [143]:
catUrl="https://api.foursquare.com/v2/venues/categories?&client_id={}&client_secret={}&v={}"\
        .format(CLIENT_ID,CLIENT_SECRET,VERSION)
catJson = requests.get(catUrl).json()

In [665]:
import collections
def getSubCategories(cats,keywords,depth,parent=""):
    subCategories=[]
    if(keywords in cats['name'].split() or keywords==""):
        subCategories.append((cats['id']+";"+cats['name']+";"+str(depth)+";"+parent))
    if(depth>0):
        if(cats['categories']!=[]):
            for i in cats['categories']:
                subCategories.append(getSubCategories(i,keywords,depth-1,cats['name']))
    if (subCategories!=[]):
        return subCategories
    
categoryList=[getSubCategories(i,'',5) for i in catJson['response']['categories']]
categoriesD=[pd.DataFrame(i) for i in categoryList]
categories=pd.DataFrame()
for i in range(len(categoriesD)):
    categories=categories.append(categoriesD[i])
categories.reset_index(inplace=True,drop=True)

In [667]:
def fun(x):
    if isinstance(x, list):
        return x[0].split(';')
    else:
        return x.split(';')
Categories=pd.DataFrame(list(categories[0].apply(fun)),columns=['id','category','depth','parent'])
Categories['parent'].replace("",np.NaN,inplace=True)
Categories['depth']=5-(Categories['depth'].astype(int))
Categories.head()

Unnamed: 0,id,category,depth,parent
0,4d4b7104d754a06370d81259,Arts & Entertainment,0,
1,56aa371be4b08b9a8d5734db,Amphitheater,1,Arts & Entertainment
2,4fceea171983d5d06c3e9823,Aquarium,1,Arts & Entertainment
3,4bf58dd8d48988d1e1931735,Arcade,1,Arts & Entertainment
4,4bf58dd8d48988d1e2931735,Art Gallery,1,Arts & Entertainment


In [698]:
def getCategory(string):
    tmp=[]
    for i,x in Categories.iterrows():
        for words in string.split(','):
            if(bool(re.match('.*'+words+'.*',x['category']))):
#                 print(x['category'])
                tmp.append(x['id'])
    return tmp    

### Radius of Neighborhoods in API calls

In [462]:
bbs=df.geometry.bounds
radii=[]
for i,pt in bbs.iterrows():
    radii.append((distance.geodesic((pt['minx'],pt['miny']),(pt['maxx'],pt['maxy']),ellipsoid='WGS-84').m/3))

### API Call Function

In [741]:
def makeAPICall(queryType,place,radius,cat,section=""):
    url="https://api.foursquare.com/v2/venues/{}?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit=200&query={}&section={}"\
    .format(queryType,CLIENT_ID,CLIENT_SECRET,VERSION,place['lat'],place['long'],radius,cat,section)
    result = requests.get(url).json()
    return result

### get Nearby Cultural Zones & Tourism Landmarks Data

In [601]:
from IPython.display import clear_output
siteCategories=getCategory("Site,Beach,Bridge,Museum,Art")
siteCategories=siteCategories[1:9]+["4bf58dd8d48988d12d941735"]#Monuments added
siteCategories=','.join(siteCategories)
venues=[]
for i,row in df.iterrows():
    landJson=makeAPICall('search',row,radii[i],siteCategories)
    clear_output()
    print("Number of Neighborhood Data Obtained:",i+1)
    items=landJson['response']['venues']
    for item in items:
        venues.append([row['code'],
                       item['name'],
                       item['categories'][0]['name'],
                       row['name'],
                       item['location']["lat"],
                       item['location']["lng"],
                        ])

Number of Neighborhood Data Obtained: 188


In [760]:
landmarks=pd.DataFrame(data=venues,columns=["code","venue","category","name",'vLat','vLong'])
landmarks#['category'].value_counts()>=10
landmarks=landmarks.groupby('category').filter(lambda x: len(x) >= 10)
remove=['Dance Studio','School','High School','Building','Tattoo Parlor']
landmarks=landmarks.loc[~landmarks['category'].isin(remove)].reset_index(drop=True)

In [761]:
landmarks.to_csv("Data/Cleaned/LandmarkData.csv")
landmarks.head()

Unnamed: 0,code,venue,category,name,vLat,vLong
0,BK09,Pier 4 Beach,Beach,Brooklyn Heights-Cobble Hill,40.696595,-73.999084
1,BK09,Brooklyn War Memorial,Monument / Landmark,Brooklyn Heights-Cobble Hill,40.698657,-73.990776
2,BK09,Brooklyn Historical Society,History Museum,Brooklyn Heights-Cobble Hill,40.694942,-73.992333
3,BK09,The Heights Players Theatre,Indie Theater,Brooklyn Heights-Cobble Hill,40.693005,-73.998293
4,BK09,Truman Capote House,Historic Site,Brooklyn Heights-Cobble Hill,40.698906,-73.99495


### Get Food Outlet Data from Foursquare

In [657]:
foodCategories=getCategory("Food")[0]
foodVenues=[]
missedRows=[]
for i,row in df.iterrows():
    foodJson=makeAPICall('explore',row,radii[i],foodCategories,'food')
    if(foodJson['meta']['code']==200):
        clear_output()
        print("Number of Neighborhood Data Obtained:",i+1)
        items=foodJson['response']['groups'][0]['items']
        for item in items:
            foodVenues.append([row['code'],
                               item['venue']['name'],
                               item['venue']['categories'][0]['name'],
                               row['name'],
                               item['venue']['location']["lat"],
                               item['venue']['location']["lng"],
                            ])
    else:
        missedRows.append(row['code'])
        print('Failed Read',row['name'])

Number of Neighborhood Data Obtained: 188


In [762]:
foods=pd.DataFrame(data=foodVenues,columns=["code","venue","category","name",'vLat','vLong'])
foods=foods.groupby('category').filter(lambda x: len(x) >= 15).reset_index(drop=True)

In [763]:
foods.to_csv("Data/Cleaned/foodData.csv")
foods.head()

Unnamed: 0,code,venue,category,name,vLat,vLong
0,BK09,Dellarocco's,Pizza Place,Brooklyn Heights-Cobble Hill,40.694992,-73.995924
1,BK09,Lassen & Hennigs,Deli / Bodega,Brooklyn Heights-Cobble Hill,40.69497,-73.994857
2,BK09,Sushi Gallery,Sushi Restaurant,Brooklyn Heights-Cobble Hill,40.697595,-73.993236
3,BK09,Saketumi Asian Bistro,Asian Restaurant,Brooklyn Heights-Cobble Hill,40.69491,-73.994578
4,BK09,Iron Chef House,Japanese Restaurant,Brooklyn Heights-Cobble Hill,40.697406,-73.99256


### Get Shops Data from Foursquare

In [823]:
shopCategories=["4d4b7105d754a06378d81259","4bf58dd8d48988d175941735"]
shopCategories=','.join(shopCategories)
shopCategories='gym'
shopVenues=[]
gymVenues=[]
missedRows=[]
for i,row in df.iterrows():
    shopJson=makeAPICall('explore',row,radii[i],shopCategories,'')
    if(shopJson['meta']['code']==200):
        clear_output()
        print("Number of Neighborhood Data Obtained:",i+1)
        items=shopJson['response']['groups'][0]['items']
        for item in items:
            gymVenues.append([row['code'],
                               item['venue']['name'],
                               item['venue']['categories'][0]['name'],
                               row['name'],
                               item['venue']['location']["lat"],
                               item['venue']['location']["lng"],
                            ])
    else:
        missedRows.append(row['code'])
        print('Failed Read:',shopJson['meta']['code'],row['name'])

Number of Neighborhood Data Obtained: 188


In [820]:
shops=pd.read_csv("Data/Cleaned/shopData.csv",index_col=0)
# gyms=pd.DataFrame(data=gymVenues,columns=["code","venue","category","name",'vLat','vLong'])
shops=shops.append(gyms).sort_values('code').reset_index(drop=True)#['category'].value_counts()#.head(120)

In [821]:
shops.to_csv("Data/Cleaned/shopData.csv")

In [365]:
ny=folium.Map((40.7128,-74.0060),zoom_start=10)
choro=folium.Choropleth(df,
                  columns=['name','popDensity'],
                  data=df,
                  key_on='feature.properties.name',
                  fill_color='YlOrRd',
                    highlight=True
                 )
choro.add_to(ny)
for i,row in df.iterrows():
    folium.Circle((row['lat'],row['long']),radii[i],tooltip=str(row['name'])).add_to(ny)
ny