In [12]:
#!conda install -c conda-forge geopy --yes
import requests
import pandas as pd
import numpy as np
import json
from bs4 import BeautifulSoup as bs
from geopy.geocoders import Nominatim

## We try to get Chinatown location first

In [13]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [14]:
neighborhoods_data = newyork_data['features']
#take a look at structure in feature
newyork_data['features'][1]

{'type': 'Feature',
 'id': 'nyu_2451_34572.2',
 'geometry': {'type': 'Point',
  'coordinates': [-73.82993910812398, 40.87429419303012]},
 'geometry_name': 'geom',
 'properties': {'name': 'Co-op City',
  'stacked': 2,
  'annoline1': 'Co-op',
  'annoline2': 'City',
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.82993910812398,
   40.87429419303012,
   -73.82993910812398,
   40.87429419303012]}}

In [15]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [16]:
#get all info into dataframe
for x in neighborhoods_data:
    borough = x['properties']['borough'] 
    neighborhood_name = x['properties']['name']
        
    neighborhood_latlon = x['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [17]:
neighborhoods.head(20)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
5,Bronx,Kingsbridge,40.881687,-73.902818
6,Manhattan,Marble Hill,40.876551,-73.91066
7,Bronx,Woodlawn,40.898273,-73.867315
8,Bronx,Norwood,40.877224,-73.879391
9,Bronx,Williamsbridge,40.881039,-73.857446


In [18]:
neighborhoods.loc[neighborhoods['Neighborhood']=='Chinatown']

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
100,Manhattan,Chinatown,40.715618,-73.994279


In [19]:
#get chinatown latitude and longtitude
latitude=neighborhoods.iloc[100,-2]
longitude=neighborhoods.iloc[100,-1]
print('Chinatown in New York is at ({},{})'.format(latitude,longitude))

Chinatown in New York is at (40.71561842231432,-73.99427936255978)


In [20]:
#next, use explore function in Foursquare API to find 100 Chinese Restaurant

In [49]:
# we can use parameter 'categoryId' to filter all Chinese Restaurant
CLIENT_ID='0BPBI1NPMF11CVC2IP2AOOGYLSCFZIZCHTN4UKHQJCCFVHFN'
CLIENT_SECRET='5S1BMTKQ50OSBHWTO1EAHRBTOYBXPGZLIR1G1SZUFIBU103F'
VERSION='20200701'
radius=2000
LIMIT=300
categoryId='4bf58dd8d48988d145941735'
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}&categoryId={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT,categoryId)
url

'https://api.foursquare.com/v2/venues/explore?client_id=0BPBI1NPMF11CVC2IP2AOOGYLSCFZIZCHTN4UKHQJCCFVHFN&client_secret=5S1BMTKQ50OSBHWTO1EAHRBTOYBXPGZLIR1G1SZUFIBU103F&ll=40.71561842231432,-73.99427936255978&v=20200701&radius=2000&limit=300&categoryId=4bf58dd8d48988d145941735'

In [51]:
results = requests.get(url).json()

In [52]:
df_init=[]

In [53]:
columnname=['NAME','ID','ADDRESS']

In [54]:
df_init=pd.DataFrame(columns=columnname)
df_init

Unnamed: 0,NAME,ID,ADDRESS


In [55]:
#use for loop to add all results into dataframe, note that due to limit by API, we can only retrive 100 spots at one time
for item in results['response']['groups'][0]['items']:
    NAME=item['venue']['name']
    ID=item['venue']['id']
    try:
        ADDRESS=item['venue']['location']['address']
    except:
        ADDRESS='N/A'
    df_init=df_init.append({'NAME':NAME,'ID':ID,'ADDRESS':ADDRESS},ignore_index=True)
df_init

Unnamed: 0,NAME,ID,ADDRESS
0,Big Wong King 大旺,3fd66200f964a520ede41ee3,67 Mott St
1,Great N.Y. Noodletown,3fd66200f964a520b1ea1ee3,28 Bowery
2,Mission Chinese Food,4fa89bb2e4b0bad89524b84a,171 E Broadway
3,Nom Wah Tea Parlor,49f220a3f964a520ee691fe3,13 Doyers St
4,Wah Fung Number 1 Fast Food 華豐快飯店,4a96bf8ff964a520ce2620e3,79 Chrystie St
...,...,...,...
95,Ho Foods,5a06749bd3cce816fb29ce65,110 E 7th St
96,99 Favor Taste,5974056d4b78c57f67d093b4,37 Saint Marks Pl
97,Golden Fung Wong Bakery,49fd0549f964a520ff6e1fe3,41 Mott St
98,New Kim Tuong 金祥飯店,4c50ba549d642d7fecd95ddc,83 Chrystie St


#now we use id in each row and Premium endpoint of Foursquare API ' https://api.foursquare.com/v2/venues/VENUE_ID' to search for categories,price tier,likes count,rating,
#ratingSignals,likes count,popular timeframe,latitude,longitude
## 1. we use a function to find out Chinese Restaurant and Non-Chinese Restaurant 
## 2. we use first for loop to filter the most liked tip
## 3. we use second for loop to combine all popular timeframe into one,cos timeframes can be more than one

In [71]:
# we design a function to find out Chinese Restaurant and Non-Chinese Restaurant,cos one venue can have more than one categories, so we use for loop to filter all categories
def if_cnr(venue_input):
    
        list1=[]
        try:
            for item in venue_input['response']['groups'][0]['items']:
                x=('4bf58dd8d48988d145941735' in item['venue']['categories'][0]['id'])
                list1.append(x)
            if True in list1:
                return 'Chinese Restaurant'
            else:
                return 'Non-Chinese Restaurant'
        except:
            return 'N/A'

In [72]:
# results['response']['groups'][0]['items'][0]['venue']['categories'][0]['id']

'4bf58dd8d48988d145941735'

In [73]:
# testing tips API
url_tips='https://api.foursquare.com/v2/venues/{}/tips?&sort=popular&client_id={}&client_secret={}&v={}'.format('5a06749bd3cce816fb29ce65',CLIENT_ID, CLIENT_SECRET,VERSION)
print(url_tips)
tips_detail=requests.get(url_tips).json()
tips_detail['response']['tips']['items'][0]['agreeCount']

https://api.foursquare.com/v2/venues/5a06749bd3cce816fb29ce65/tips?&sort=popular&client_id=0BPBI1NPMF11CVC2IP2AOOGYLSCFZIZCHTN4UKHQJCCFVHFN&client_secret=5S1BMTKQ50OSBHWTO1EAHRBTOYBXPGZLIR1G1SZUFIBU103F&v=20200701


5

In [76]:
#1. we use two premium API-- venue details and venue tips
#2. we use first for loop to filter the most liked tip
#3. we use second for loop to combine all popular timeframe into one,cos timeframes can be more than one
#4. Due to venues details are not the same, for example, some have tips some don't, so we use try/except to catach some details in case errors occur

df=df_init
df['Price_Tier']=''
df['Likes_Count']=''
df['Rating']=''
df['Rating_Signals']=''
df['Tips']=''
df['Agree_Count']=''
df['Polular_Timeframe_Today']=''
df['Latitude']=''
df['Longitude']=''

for index,row in df_init.iterrows():
    id_venue=row['ID']
    url_venue='https://api.foursquare.com/v2/venues/{}?&client_id={}&client_secret={}&v={}'.format(id_venue,CLIENT_ID, CLIENT_SECRET,VERSION)
    url_tips='https://api.foursquare.com/v2/venues/{}/tips?&sort=popular&client_id={}&client_secret={}&v={}'.format(id_venue,CLIENT_ID, CLIENT_SECRET,VERSION)
    venue_detail=requests.get(url_venue).json()
    tips_detail=requests.get(url_tips).json()

    try:
        Price_Tier=venue_detail['response']['venue']['price']['tier']
    except:
        Price_Tier='N/A'
    try:
        Likes_Count=venue_detail['response']['venue']['likes']['count']
    except:
        Likes_Count='N/A'
    try:
        Rating=venue_detail['response']['venue']['rating']
    except:
        Rating='N/A'
    try:
        Rating_Signals=venue_detail['response']['venue']['ratingSignals']
    except:
        Rating_Signals='N/A'
    #use API parameter 'sort=popular' to retrive most liked tips
    Tips=''
    Agree_Count=0
    try:
        Tips=tips_detail['response']['tips']['items'][0]['text']
        Agree_Count=tips_detail['response']['tips']['items'][0]['agreeCount']
    except:
        Agree_Count='N/A'
        Tips=='N/A'

    #deal with today's popular timeframes,combine all timeframe into one
    Polular_Timeframe_Today=''
    try:
        for y in venue_detail['response']['venue']['popular']['timeframes'][0]['open']:
            Polular_Timeframe_Today+=y['renderedTime']
    except:
        Polular_Timeframe_Today='N/A'
    
    try:
        Latitude=venue_detail['response']['venue']['location']['lat']
        Longitude=venue_detail['response']['venue']['location']['lng']
    except:
        Latitude='N/A'
        Longitude='N/A'
    #all retrived data into columns
    
    row['Price_Tier']=Price_Tier
    row['Likes_Count']=Likes_Count
    row['Rating']=Rating
    row['Rating_Signals']=Rating_Signals
    row['Tips']=Tips
    row['Agree_Count']=Agree_Count
    row['Polular_Timeframe_Today']=Polular_Timeframe_Today
    row['Latitude']=Latitude
    row['Longitude']=Longitude


In [80]:
# show dataframe
df

Unnamed: 0,NAME,ID,ADDRESS,Category,Price_Tier,Likes_Count,Rating,Rating_Signals,Tips,Agree_Count,Polular_Timeframe_Today,Latitude,Longitude
0,Big Wong King 大旺,3fd66200f964a520ede41ee3,67 Mott St,,1,422,8.4,646,Take rice box 2 go - BEST BBQ Roast Pork (beat...,6,10:00 AM–9:00 PM,40.7162,-73.9983
1,Great N.Y. Noodletown,3fd66200f964a520b1ea1ee3,28 Bowery,,1,547,8,910,"If it’s soft-shell crab season, you know what ...",9,Noon–Midnight,40.715,-73.9969
2,Mission Chinese Food,4fa89bb2e4b0bad89524b84a,171 E Broadway,,3,945,8.1,1374,NYers keep turning out in droves at this nonde...,21,Noon–3:00 PM5:00 PM–10:00 PM,40.7141,-73.9898
3,Nom Wah Tea Parlor,49f220a3f964a520ee691fe3,13 Doyers St,,1,1258,8.4,1790,This 92-year-old Chinatown restaurant serves t...,28,10:00 AM–9:00 PM,40.7145,-73.9982
4,Wah Fung Number 1 Fast Food 華豐快飯店,4a96bf8ff964a520ce2620e3,79 Chrystie St,,1,192,8.5,281,Go big or go home. $3 for more-than-you-can-ea...,13,11:00 AM–7:00 PM,40.7173,-73.9942
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Ho Foods,5a06749bd3cce816fb29ce65,110 E 7th St,,,,,,,,,,
96,99 Favor Taste,5974056d4b78c57f67d093b4,37 Saint Marks Pl,,,,,,,,,,
97,Golden Fung Wong Bakery,49fd0549f964a520ff6e1fe3,41 Mott St,,,,,,,,,,
98,New Kim Tuong 金祥飯店,4c50ba549d642d7fecd95ddc,83 Chrystie St,,,,,,,,,,


In [106]:
# take out rating=N/A items
df1=df[df['Rating']!='N/A']
df1

Unnamed: 0,NAME,ID,ADDRESS,Category,Price_Tier,Likes_Count,Rating,Rating_Signals,Tips,Agree_Count,Polular_Timeframe_Today,Latitude,Longitude
0,Big Wong King 大旺,3fd66200f964a520ede41ee3,67 Mott St,,1.0,422,8.4,646,Take rice box 2 go - BEST BBQ Roast Pork (beat...,6.0,10:00 AM–9:00 PM,40.7162,-73.9983
1,Great N.Y. Noodletown,3fd66200f964a520b1ea1ee3,28 Bowery,,1.0,547,8.0,910,"If it’s soft-shell crab season, you know what ...",9.0,Noon–Midnight,40.715,-73.9969
2,Mission Chinese Food,4fa89bb2e4b0bad89524b84a,171 E Broadway,,3.0,945,8.1,1374,NYers keep turning out in droves at this nonde...,21.0,Noon–3:00 PM5:00 PM–10:00 PM,40.7141,-73.9898
3,Nom Wah Tea Parlor,49f220a3f964a520ee691fe3,13 Doyers St,,1.0,1258,8.4,1790,This 92-year-old Chinatown restaurant serves t...,28.0,10:00 AM–9:00 PM,40.7145,-73.9982
4,Wah Fung Number 1 Fast Food 華豐快飯店,4a96bf8ff964a520ce2620e3,79 Chrystie St,,1.0,192,8.5,281,Go big or go home. $3 for more-than-you-can-ea...,13.0,11:00 AM–7:00 PM,40.7173,-73.9942
5,House Special 甘來飯店,59ad625a2619ee5cdded881f,171 Hester St,,,22,8.2,36,Famous for our House Special Chicken - crispy ...,8.0,,40.7178,-73.9968
6,Deluxe Green Bo Restaurant,3fd66200f964a520ceea1ee3,66 Bayard St,,1.0,185,8.1,321,The Shanghainese food here is great. Order the...,5.0,11:00 AM–10:00 PM,40.7155,-73.9981
7,Hop Kee,3fd66200f964a5206be61ee3,21 Mott St,,2.0,203,7.9,353,Rarely do you return to a childhood spot and n...,3.0,Noon–10:00 PM,40.7144,-73.9989
8,Congee Village 粥之家,425f0400f964a52011211fe3,100 Allen St,,2.0,551,8.0,851,Congee village saved my relationship,20.0,Noon–10:00 PM,40.7187,-73.9905
9,Shanghai Café Deluxe,46936ee3f964a520d0481fe3,100 Mott St,,1.0,895,7.9,1337,Soup dumplings are both fascinating and delici...,15.0,11:00 AM–9:00 PM,40.7173,-73.9975


In [81]:
#sort by rating for next analysis
df2=df1.sort_values(by='Rating', ascending=False)
df2

Unnamed: 0,NAME,ID,ADDRESS,Category,Price_Tier,Likes_Count,Rating,Rating_Signals,Tips,Agree_Count,Polular_Timeframe_Today,Latitude,Longitude
23,Kings County Imperial,5b380f649deb7d00399fdf9d,168 1/2 Delancey St,,,67,9.0,91,Delicious and great for a group dinner. Soup d...,3.0,Noon–1:00 PM6:00 PM–10:00 PM,40.7178,-73.9856
34,Málà Project,5647ee82498e8bfc0ddef53d,122 1st Ave,,2.0,436,8.9,550,I could come here just for the Mala peanuts (a...,10.0,Noon–3:00 PM5:00 PM–10:00 PM,40.7271,-73.9855
10,Shanghai 21,4bcf9774a8b3a5939497625f,21 Mott St,,2.0,741,8.8,1028,This place is my new go-to dimsum spot. For a ...,9.0,11:00 AM–9:00 PM,40.7144,-73.9989
35,Han Dynasty,52169fba11d21db81bdab2a0,90 3rd Ave,,2.0,1153,8.8,1552,"The menu includes things like Dan Dan noodles,...",16.0,Noon–10:00 PM,40.7321,-73.9881
12,Xi'an Famous Foods,5894c9a15e56b417cf79e553,45 Bayard St,,,104,8.8,138,staff is cute. food is very delicious. had the...,3.0,11:00 AM–9:00 PM,40.7152,-73.9973
11,Spicy Village,4db3374590a0843f295fb69b,68 Forsyth St Frnt B,,1.0,500,8.7,690,"Having studied abroad in Henan province, this ...",11.0,,40.717,-73.9935
4,Wah Fung Number 1 Fast Food 華豐快飯店,4a96bf8ff964a520ce2620e3,79 Chrystie St,,1.0,192,8.5,281,Go big or go home. $3 for more-than-you-can-ea...,13.0,11:00 AM–7:00 PM,40.7173,-73.9942
17,Yi Ji Shi Mo Noodle Corp,5c965dad5455b2002c058659,88 Elizabeth St,,,19,8.5,24,"Super thin and light, just crazy packed. Shrim...",2.0,8:00 AM–4:00 PM,40.7183,-73.9959
0,Big Wong King 大旺,3fd66200f964a520ede41ee3,67 Mott St,,1.0,422,8.4,646,Take rice box 2 go - BEST BBQ Roast Pork (beat...,6.0,10:00 AM–9:00 PM,40.7162,-73.9983
24,North Dumpling,4aca9125f964a52060c220e3,27 Essex St,,1.0,137,8.4,211,Get a #1 and a number #7 (beef noodle soup). H...,3.0,Noon–10:00 PM,40.7158,-73.9896


In [82]:
#we only trace top 20 rating places
df3=df2[0:21]
df3

Unnamed: 0,NAME,ID,ADDRESS,Category,Price_Tier,Likes_Count,Rating,Rating_Signals,Tips,Agree_Count,Polular_Timeframe_Today,Latitude,Longitude
23,Kings County Imperial,5b380f649deb7d00399fdf9d,168 1/2 Delancey St,,,67,9.0,91,Delicious and great for a group dinner. Soup d...,3,Noon–1:00 PM6:00 PM–10:00 PM,40.7178,-73.9856
34,Málà Project,5647ee82498e8bfc0ddef53d,122 1st Ave,,2.0,436,8.9,550,I could come here just for the Mala peanuts (a...,10,Noon–3:00 PM5:00 PM–10:00 PM,40.7271,-73.9855
10,Shanghai 21,4bcf9774a8b3a5939497625f,21 Mott St,,2.0,741,8.8,1028,This place is my new go-to dimsum spot. For a ...,9,11:00 AM–9:00 PM,40.7144,-73.9989
35,Han Dynasty,52169fba11d21db81bdab2a0,90 3rd Ave,,2.0,1153,8.8,1552,"The menu includes things like Dan Dan noodles,...",16,Noon–10:00 PM,40.7321,-73.9881
12,Xi'an Famous Foods,5894c9a15e56b417cf79e553,45 Bayard St,,,104,8.8,138,staff is cute. food is very delicious. had the...,3,11:00 AM–9:00 PM,40.7152,-73.9973
11,Spicy Village,4db3374590a0843f295fb69b,68 Forsyth St Frnt B,,1.0,500,8.7,690,"Having studied abroad in Henan province, this ...",11,,40.717,-73.9935
4,Wah Fung Number 1 Fast Food 華豐快飯店,4a96bf8ff964a520ce2620e3,79 Chrystie St,,1.0,192,8.5,281,Go big or go home. $3 for more-than-you-can-ea...,13,11:00 AM–7:00 PM,40.7173,-73.9942
17,Yi Ji Shi Mo Noodle Corp,5c965dad5455b2002c058659,88 Elizabeth St,,,19,8.5,24,"Super thin and light, just crazy packed. Shrim...",2,8:00 AM–4:00 PM,40.7183,-73.9959
0,Big Wong King 大旺,3fd66200f964a520ede41ee3,67 Mott St,,1.0,422,8.4,646,Take rice box 2 go - BEST BBQ Roast Pork (beat...,6,10:00 AM–9:00 PM,40.7162,-73.9983
24,North Dumpling,4aca9125f964a52060c220e3,27 Essex St,,1.0,137,8.4,211,Get a #1 and a number #7 (beef noodle soup). H...,3,Noon–10:00 PM,40.7158,-73.9896


In [83]:
#install and import folium function
#! conda install -c conda-forge folium
import folium 
import webbrowser
from folium.plugins import MarkerCluster

In [84]:
# create map of New York using latitude and longitude values
# https://python-visualization.github.io/folium/quickstart.html
map_chinatown = folium.Map(location=[latitude, longitude], zoom_start=14)
map_chinatown

In [85]:
# add markers to map
for name, lat, lng, rating, address,price_tier, Polular_Timeframe_Today in zip(df3['NAME'],df3['Latitude'], df3['Longitude'], df3['Rating'], df3['ADDRESS'],df3['Price_Tier'],df3['Polular_Timeframe_Today']):
    label = 'Rating:{}, Address:{}, Price_Tier:{},\n Avoid Time:{}'.format(rating,address,price_tier,Polular_Timeframe_Today )
    tooltip= '{}'.format(name)
    if price_tier==1 or price_tier==2:
        folium.Marker([lat, lng],popup=label,icon=folium.Icon(color='blue',icon='info-sign'),tooltip=tooltip).add_to(map_chinatown)
    elif price_tier==3 or price_tier==4:
        folium.Marker([lat, lng],popup=label,icon=folium.Icon(color='red',icon='info-sign'),tooltip=tooltip).add_to(map_chinatown)
    else:
        folium.Marker([lat, lng],popup=label,icon=folium.Icon(color='black',icon='info-sign'),tooltip=tooltip).add_to(map_chinatown)
        
map_chinatown


In [88]:
# add clusters into map
map_chinatown = folium.Map(location=[latitude, longitude], zoom_start=14)

In [89]:
marker_cluster = folium.plugins.MarkerCluster().add_to(map_chinatown) 
for index, row in df3.iterrows():
    locationlist = row[['Latitude','Longitude']].values.tolist()
    tooltip= '{}'.format(row['NAME'])
    if row['Price_Tier']==1 or row['Price_Tier']==2:
        folium.Marker(location=locationlist, popup='Address:{}, Rating:{}, Price Tier:{},/n Avoid Time:{}'.format(row['ADDRESS'],row['Rating'],row['Price_Tier'],row['Polular_Timeframe_Today']), icon=folium.Icon(color='green', icon='ok-sign'),tooltip=tooltip).add_to(marker_cluster)
    elif row['Price_Tier']==3 or row['Price_Tier']==4:
        folium.Marker(location=locationlist, popup='Address:{}, Rating:{}, Price Tier:{},/n Avoid Time:{}'.format(row['ADDRESS'],row['Rating'],row['Price_Tier'],row['Polular_Timeframe_Today']), icon=folium.Icon(color='red', icon='ok-sign'),tooltip=tooltip).add_to(marker_cluster)
    else:
        folium.Marker(location=locationlist, popup='Address:{}, Rating:{}, Price Tier:{},/n Avoid Time:{}'.format(row['ADDRESS'],row['Rating'],row['Price_Tier'],row['Polular_Timeframe_Today']), icon=folium.Icon(color='black', icon='ok-sign'),tooltip=tooltip).add_to(marker_cluster)
map_chinatown
    

## Now we have:
## 1. A intuitive map with top 20 rating chinese restaurants around Chinatown in NY
## 2. If there are more than two restaurants nearby one location, there will be clucters button to zoom in
## 3. We can mouseover to those locations to see their names
## 4. We can choose different price tiers by looking at their colors, green means cheap, red means a bit expensive, black means we dont have price info of these locations
## 5. By clicking those locations we can see details of address,rating,price tier and popular time frame to avoid queuing

## Next we can use initial 100 data and KNN clustering means for further analysis

In [90]:
# let's take a look at the dataframe with 100 rows first --- df1
df1.head(20)

Unnamed: 0,NAME,ID,ADDRESS,Category,Price_Tier,Likes_Count,Rating,Rating_Signals,Tips,Agree_Count,Polular_Timeframe_Today,Latitude,Longitude
0,Big Wong King 大旺,3fd66200f964a520ede41ee3,67 Mott St,,1.0,422,8.4,646,Take rice box 2 go - BEST BBQ Roast Pork (beat...,6,10:00 AM–9:00 PM,40.7162,-73.9983
1,Great N.Y. Noodletown,3fd66200f964a520b1ea1ee3,28 Bowery,,1.0,547,8.0,910,"If it’s soft-shell crab season, you know what ...",9,Noon–Midnight,40.715,-73.9969
2,Mission Chinese Food,4fa89bb2e4b0bad89524b84a,171 E Broadway,,3.0,945,8.1,1374,NYers keep turning out in droves at this nonde...,21,Noon–3:00 PM5:00 PM–10:00 PM,40.7141,-73.9898
3,Nom Wah Tea Parlor,49f220a3f964a520ee691fe3,13 Doyers St,,1.0,1258,8.4,1790,This 92-year-old Chinatown restaurant serves t...,28,10:00 AM–9:00 PM,40.7145,-73.9982
4,Wah Fung Number 1 Fast Food 華豐快飯店,4a96bf8ff964a520ce2620e3,79 Chrystie St,,1.0,192,8.5,281,Go big or go home. $3 for more-than-you-can-ea...,13,11:00 AM–7:00 PM,40.7173,-73.9942
5,House Special 甘來飯店,59ad625a2619ee5cdded881f,171 Hester St,,,22,8.2,36,Famous for our House Special Chicken - crispy ...,8,,40.7178,-73.9968
6,Deluxe Green Bo Restaurant,3fd66200f964a520ceea1ee3,66 Bayard St,,1.0,185,8.1,321,The Shanghainese food here is great. Order the...,5,11:00 AM–10:00 PM,40.7155,-73.9981
7,Hop Kee,3fd66200f964a5206be61ee3,21 Mott St,,2.0,203,7.9,353,Rarely do you return to a childhood spot and n...,3,Noon–10:00 PM,40.7144,-73.9989
8,Congee Village 粥之家,425f0400f964a52011211fe3,100 Allen St,,2.0,551,8.0,851,Congee village saved my relationship,20,Noon–10:00 PM,40.7187,-73.9905
9,Shanghai Café Deluxe,46936ee3f964a520d0481fe3,100 Mott St,,1.0,895,7.9,1337,Soup dumplings are both fascinating and delici...,15,11:00 AM–9:00 PM,40.7173,-73.9975


## Pre-processing

In [91]:
#take out N/A items and columns ID/ADDRESS/TIPS/AGREE COUNT/POPULAR TIMEFRAME/LATTITUDE/LONGITUDE
df4=df1[df1['Price_Tier']!='N/A']
df4=df4.iloc[:,[0,4,5,6,7]]
df4

Unnamed: 0,NAME,Price_Tier,Likes_Count,Rating,Rating_Signals
0,Big Wong King 大旺,1,422,8.4,646
1,Great N.Y. Noodletown,1,547,8.0,910
2,Mission Chinese Food,3,945,8.1,1374
3,Nom Wah Tea Parlor,1,1258,8.4,1790
4,Wah Fung Number 1 Fast Food 華豐快飯店,1,192,8.5,281
6,Deluxe Green Bo Restaurant,1,185,8.1,321
7,Hop Kee,2,203,7.9,353
8,Congee Village 粥之家,2,551,8.0,851
9,Shanghai Café Deluxe,1,895,7.9,1337
10,Shanghai 21,2,741,8.8,1028


In [92]:
df4.shape

(35, 5)

In [97]:
from sklearn.preprocessing import StandardScaler

X = df4.values[:,1:]
X = np.nan_to_num(X)
cluster_dataset = StandardScaler().fit_transform(X)
cluster_dataset

array([[-0.8273403 , -0.05733765,  0.88008132, -0.00942921],
       [-0.8273403 ,  0.30102265, -0.0325956 ,  0.52838487],
       [ 2.79227353,  1.44204184,  0.19557363,  1.47363386],
       [-0.8273403 ,  2.33937603,  0.88008132,  2.32109846],
       [-0.8273403 , -0.7167206 ,  1.10825056, -0.75299791],
       [-0.8273403 , -0.73678878,  0.19557363, -0.67151093],
       [ 0.98246661, -0.68518489, -0.26076484, -0.60632135],
       [ 0.98246661,  0.31249018, -0.0325956 ,  0.40819157],
       [-0.8273403 ,  1.29869772, -0.26076484,  1.3982584 ],
       [ 0.98246661,  0.85719784,  1.79275825,  0.76877147],
       [-0.8273403 ,  0.16627918,  1.56458902,  0.08020647],
       [ 0.98246661,  0.07167206, -0.94527253,  0.23910609],
       [-0.8273403 , -0.24941877, -0.0325956 , -0.29259647],
       [-0.8273403 , -0.56764271, -0.26076484, -0.51261132],
       [ 0.98246661,  1.05501272, -0.7171033 ,  1.09471939],
       [ 0.98246661,  1.59685349, -1.17344177,  1.68753718],
       [-0.8273403 ,  2.

In [99]:
from sklearn.cluster import KMeans

num_clusters = 3

k_means = KMeans(init="k-means++", n_clusters=num_clusters, n_init=12)
k_means.fit(cluster_dataset)
labels = k_means.labels_

print(labels)

[0 0 1 1 0 0 2 2 1 1 0 2 0 0 1 1 1 0 0 0 0 2 2 0 0 2 1 0 2 2 0 0 1 2 0]


In [107]:
# add clustering labels
df_final=df1[df1['Price_Tier']!='N/A']
df_final["Labels"] = labels
df_final

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,NAME,ID,ADDRESS,Category,Price_Tier,Likes_Count,Rating,Rating_Signals,Tips,Agree_Count,Polular_Timeframe_Today,Latitude,Longitude,Labels
0,Big Wong King 大旺,3fd66200f964a520ede41ee3,67 Mott St,,1,422,8.4,646,Take rice box 2 go - BEST BBQ Roast Pork (beat...,6,10:00 AM–9:00 PM,40.7162,-73.9983,0
1,Great N.Y. Noodletown,3fd66200f964a520b1ea1ee3,28 Bowery,,1,547,8.0,910,"If it’s soft-shell crab season, you know what ...",9,Noon–Midnight,40.715,-73.9969,0
2,Mission Chinese Food,4fa89bb2e4b0bad89524b84a,171 E Broadway,,3,945,8.1,1374,NYers keep turning out in droves at this nonde...,21,Noon–3:00 PM5:00 PM–10:00 PM,40.7141,-73.9898,1
3,Nom Wah Tea Parlor,49f220a3f964a520ee691fe3,13 Doyers St,,1,1258,8.4,1790,This 92-year-old Chinatown restaurant serves t...,28,10:00 AM–9:00 PM,40.7145,-73.9982,1
4,Wah Fung Number 1 Fast Food 華豐快飯店,4a96bf8ff964a520ce2620e3,79 Chrystie St,,1,192,8.5,281,Go big or go home. $3 for more-than-you-can-ea...,13,11:00 AM–7:00 PM,40.7173,-73.9942,0
6,Deluxe Green Bo Restaurant,3fd66200f964a520ceea1ee3,66 Bayard St,,1,185,8.1,321,The Shanghainese food here is great. Order the...,5,11:00 AM–10:00 PM,40.7155,-73.9981,0
7,Hop Kee,3fd66200f964a5206be61ee3,21 Mott St,,2,203,7.9,353,Rarely do you return to a childhood spot and n...,3,Noon–10:00 PM,40.7144,-73.9989,2
8,Congee Village 粥之家,425f0400f964a52011211fe3,100 Allen St,,2,551,8.0,851,Congee village saved my relationship,20,Noon–10:00 PM,40.7187,-73.9905,2
9,Shanghai Café Deluxe,46936ee3f964a520d0481fe3,100 Mott St,,1,895,7.9,1337,Soup dumplings are both fascinating and delici...,15,11:00 AM–9:00 PM,40.7173,-73.9975,1
10,Shanghai 21,4bcf9774a8b3a5939497625f,21 Mott St,,2,741,8.8,1028,This place is my new go-to dimsum spot. For a ...,9,11:00 AM–9:00 PM,40.7144,-73.9989,1


In [111]:
# add clusters into map
map_chinatown = folium.Map(location=[latitude, longitude], zoom_start=14)

In [112]:
marker_cluster = folium.plugins.MarkerCluster().add_to(map_chinatown) 
for index, row in df_final.iterrows():
    locationlist = row[['Latitude','Longitude']].values.tolist()
    tooltip= '{}'.format(row['NAME'])
    if row['Labels']==0:
        folium.Marker(location=locationlist, popup='Rating:{}, Price Tier:{}'.format(row['Rating'],row['Price_Tier']), icon=folium.Icon(color='green', icon='ok-sign'),tooltip=tooltip).add_to(marker_cluster)
    elif row['Price_Tier']==1:
        folium.Marker(location=locationlist, popup='Rating:{}, Price Tier:{}'.format(row['Rating'],row['Price_Tier']), icon=folium.Icon(color='red', icon='ok-sign'),tooltip=tooltip).add_to(marker_cluster)
    else:
        folium.Marker(location=locationlist, popup='Rating:{}, Price Tier:{}'.format(row['Rating'],row['Price_Tier']), icon=folium.Icon(color='black', icon='ok-sign'),tooltip=tooltip).add_to(marker_cluster)
map_chinatown
    

## Now we can see that green means cluster 0, red means cluster 1, black means cluster 2