# Compare cities by Restaurants

### cluster cities around the world based on dining venue types 

#### import data

In [None]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
#from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import math

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

#### Define Foursquare Credentials

In [15]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: OZTWX1GVNKLGY4KOX2IE3MREWMHGWQK3IIHC3G4KNKTJRFC2
CLIENT_SECRET:OAYII0VTBHM4GY5CBDSC322O5T1LHUVUT4ALJJO2JVHYLTHB


#### make city dataframe and map

In [10]:
data = {'city':  ['New York City', 'Chicago','DC','LA','London','Bejing','Dubai','Sydney','Cairo','Hong Kong','Madrid','Moscow','Paris','Venice','Mexico City','Berlin','Liverpool','Johannesburg','Hamburg','Dublin','Miami','Portland','Dallas'],
        'latitude': ['40.7128', '41.8781','38.9072', '34.0522', '51.5074','39.9042','25.2048','-33.8688','30.0444','22.3193','40.4168','55.7558','48.8566','45.4408','19.4326','52.5200','53.4084','-26.2041','53.5511','53.3498','25.7617','45.5051','32.7767'],
        'longitude':['-74.0060','-87.6298','-77.0369','-118.2437','-0.1278','116.4074','55.2708','151.2093','31.2357','114.1694','-3.7038','37.6173','2.3522','12.3155','-99.1332','13.4050','-2.9916','28.0473','9.9937','-6.2603','-80.1918','-122.6750','-96.7970']
        }

df_coord = pd.DataFrame (data, columns = ['city','latitude','longitude'])
df_coord['latitude']=df_coord['latitude'].astype(float)
df_coord['longitude']=df_coord['longitude'].astype(float)
df_coord.head()

Unnamed: 0,city,latitude,longitude
0,New York City,40.7128,-74.006
1,Chicago,41.8781,-87.6298
2,DC,38.9072,-77.0369
3,LA,34.0522,-118.2437
4,London,51.5074,-0.1278


In [11]:
# create map of Manhattan using latitude and longitude values
map_world = folium.Map(location=[0, 0], zoom_start=2)

# add markers to map
for lat, lng, label in zip(df_coord['latitude'], df_coord['longitude'], df_coord['city']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_world)  
    
map_world

#### create and call a function to get the top restaurants from every city

In [177]:
def getRestaurantTypes(city, latitudes, longitudes):
    
    names_list=[]
    category_list=[]
    city_list=[]
    for name, lat, lng in zip(city, latitudes, longitudes):
    
        radius=10000
        LIMIT=100
        search_query='restaurants'
    
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&query={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            search_query,
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()
        results=results['response']['venues']
    

        for v in range (0,len(results)):
            
            try:
                category_list.append([(
                  results[v]['categories'][0]['name'])])
                
                names_list.append([(
                results[v]['name'])])
                
                city_list.append([(
                name)])
            except:
                continue

    #df_cat['name']=names_list
    df_cat['city']=city_list
    df_cat['name']=names_list
    df_cat['category']=category_list
    return(df_cat)
    


In [204]:
# call function
df_cat=pd.DataFrame()
city_restaurants = getRestaurantTypes(city=df_coord['city'],
                                   latitudes=df_coord['latitude'],
                                   longitudes=df_coord['longitude']
                                  )

In [205]:
# clean data
df_cat['category'] = df_cat['category'].str[0]
df_cat['name'] = df_cat['name'].str[0]
df_cat['city'] = df_cat['city'].str[0]
df_cat.head()

Unnamed: 0,city,name,category
0,New York City,The Shops & Restaurants at Hudson Yards,Shopping Mall
1,New York City,WFC Shops & Restaurants,Food Court
2,New York City,Restaurants Open 24,Falafel Restaurant
3,New York City,Zhou Restaurants,Food
4,New York City,Barilla Restaurants,Italian Restaurant


#### analyze and transform restaurant data

In [206]:
# count results from each city
df_cat.groupby('city').count()

Unnamed: 0_level_0,name,category
city,Unnamed: 1_level_1,Unnamed: 2_level_1
Bejing,5,5
Berlin,35,35
Cairo,8,8
Chicago,38,38
DC,25,25
Dallas,34,34
Dubai,17,17
Dublin,17,17
Hamburg,17,17
Hong Kong,17,17


In [207]:
print('There are {} uniques categories of restaurants.'.format(len(df_cat['category'].unique())))

There are 93 uniques categories of restaurants.


In [208]:

# one hot encoding
city_onehot = pd.get_dummies(df_cat[['category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
city_onehot['city'] = df_cat['city'] 

# move neighborhood column to the first column
fixed_columns = [city_onehot.columns[-1]] + list(city_onehot.columns[:-1])
city_onehot = city_onehot[fixed_columns]

city_onehot.head()

Unnamed: 0,city,Advertising Agency,African Restaurant,American Restaurant,Arepa Restaurant,Asian Restaurant,BBQ Joint,Bank,Bar,Bistro,Boarding House,Brazilian Restaurant,Breakfast Spot,Brewery,Buffet,Building,Burger Joint,Business Center,Business Service,Cafeteria,Café,Cajun / Creole Restaurant,Cantonese Restaurant,Chinese Restaurant,Coffee Shop,College Cafeteria,Comfort Food Restaurant,Corporate Cafeteria,Coworking Space,Cruise,Cupcake Shop,Department Store,Dim Sum Restaurant,Diner,Distribution Center,Eastern European Restaurant,Event Space,Falafel Restaurant,Fast Food Restaurant,Fish Market,Food,Food Court,French Restaurant,Furniture / Home Store,Gastropub,German Restaurant,Greek Restaurant,Grocery Store,Hardware Store,Hotel,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Market,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Moroccan Restaurant,Motel,Neighborhood,New American Restaurant,Office,Peruvian Restaurant,Pizza Place,Portuguese Restaurant,Print Shop,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,Shop & Service,Shopping Mall,Snack Place,South American Restaurant,Spanish Restaurant,Sri Lankan Restaurant,Steakhouse,Sushi Restaurant,Syrian Restaurant,Szechuan Restaurant,Taco Place,Tapas Restaurant,Thai Restaurant,Theme Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yemeni Restaurant
0,New York City,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,New York City,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,New York City,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,New York City,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,New York City,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [210]:
# find the mean number of occurances of each type of restaurant in each city
city_grouped=city_onehot.groupby('city').mean().reset_index()
city_grouped

Unnamed: 0,city,Advertising Agency,African Restaurant,American Restaurant,Arepa Restaurant,Asian Restaurant,BBQ Joint,Bank,Bar,Bistro,Boarding House,Brazilian Restaurant,Breakfast Spot,Brewery,Buffet,Building,Burger Joint,Business Center,Business Service,Cafeteria,Café,Cajun / Creole Restaurant,Cantonese Restaurant,Chinese Restaurant,Coffee Shop,College Cafeteria,Comfort Food Restaurant,Corporate Cafeteria,Coworking Space,Cruise,Cupcake Shop,Department Store,Dim Sum Restaurant,Diner,Distribution Center,Eastern European Restaurant,Event Space,Falafel Restaurant,Fast Food Restaurant,Fish Market,Food,Food Court,French Restaurant,Furniture / Home Store,Gastropub,German Restaurant,Greek Restaurant,Grocery Store,Hardware Store,Hotel,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Market,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Moroccan Restaurant,Motel,Neighborhood,New American Restaurant,Office,Peruvian Restaurant,Pizza Place,Portuguese Restaurant,Print Shop,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,Shop & Service,Shopping Mall,Snack Place,South American Restaurant,Spanish Restaurant,Sri Lankan Restaurant,Steakhouse,Sushi Restaurant,Syrian Restaurant,Szechuan Restaurant,Taco Place,Tapas Restaurant,Thai Restaurant,Theme Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yemeni Restaurant
0,Bejing,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Berlin,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.2,0.028571,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0
2,Cairo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125
3,Chicago,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0,0.026316,0.210526,0.026316,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.131579,0.0,0.052632,0.0,0.0,0.0,0.0,0.263158,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0
4,DC,0.0,0.0,0.04,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.56,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Dallas,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.029412,0.088235,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.088235,0.0,0.088235,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.117647,0.029412,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.029412,0.029412,0.205882,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Dubai,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.235294,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.235294,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Dublin,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.705882,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Hamburg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.235294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.117647,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Hong Kong,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.470588,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0


In [211]:
# repot the top 5 restuarant types from each city
num_top_venues = 5

for City in city_grouped['city']:
    print("----"+City+'----')
    temp = city_grouped[city_grouped['city'] == City].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bejing----
                             venue  freq
0               Chinese Restaurant   0.4
1                Indian Restaurant   0.2
2                French Restaurant   0.2
3          New American Restaurant   0.2
4  Molecular Gastronomy Restaurant   0.0


----Berlin----
                         venue  freq
0                   Restaurant  0.20
1            German Restaurant  0.20
2  Eastern European Restaurant  0.14
3               Sandwich Place  0.09
4           Italian Restaurant  0.09


----Cairo----
               venue  freq
0         Restaurant  0.38
1              Hotel  0.25
2         Food Court  0.12
3             Cruise  0.12
4  Yemeni Restaurant  0.12


----Chicago----
                venue  freq
0      Sandwich Place  0.26
1                Food  0.21
2              Office  0.13
3  Mexican Restaurant  0.05
4         Pizza Place  0.05


----DC----
                       venue  freq
0             Sandwich Place  0.56
1                       Food  0.12
2                 

In [212]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [213]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['city']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Restaurant'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Restaurant'.format(ind+1))

# create a new dataframe
city_venues_sorted = pd.DataFrame(columns=columns)
city_venues_sorted['city'] = city_grouped['city']

for ind in np.arange(city_grouped.shape[0]):
    city_venues_sorted.iloc[ind, 1:] = return_most_common_venues(city_grouped.iloc[ind, :], num_top_venues)

city_venues_sorted.head()

Unnamed: 0,city,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bejing,Chinese Restaurant,French Restaurant,Indian Restaurant,New American Restaurant,Diner,Comfort Food Restaurant,Corporate Cafeteria,Coworking Space,Cruise,Cupcake Shop
1,Berlin,German Restaurant,Restaurant,Eastern European Restaurant,Sandwich Place,Italian Restaurant,Thai Restaurant,Chinese Restaurant,Sri Lankan Restaurant,Boarding House,French Restaurant
2,Cairo,Restaurant,Hotel,Yemeni Restaurant,Food Court,Cruise,Diner,College Cafeteria,Comfort Food Restaurant,Corporate Cafeteria,Coworking Space
3,Chicago,Sandwich Place,Food,Office,Pizza Place,Mexican Restaurant,Fish Market,Event Space,Indian Restaurant,Eastern European Restaurant,Middle Eastern Restaurant
4,DC,Sandwich Place,Food,Hotel,Bank,Diner,Office,Middle Eastern Restaurant,Mexican Restaurant,BBQ Joint,American Restaurant


#### Cluster Cities by restaurant type

In [215]:
# set number of clusters
clusters = 5

city_grouped_clustering = city_grouped.drop('city', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=clusters, random_state=0).fit(city_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 1, 1, 0, 4, 0, 1, 4, 0, 2], dtype=int32)

In [218]:
 # add clustering labels
#city_venues_sorted.insert(0, 'Cluster', kmeans.labels_)

city_merged = df_coord

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
city_merged =city_merged.join(city_venues_sorted.set_index('city'), on='city')

city_merged.head()

Unnamed: 0,city,latitude,longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,New York City,40.7128,-74.006,0.0,Food,Office,Sandwich Place,Miscellaneous Shop,Furniture / Home Store,Italian Restaurant,Food Court,American Restaurant,Falafel Restaurant,Coworking Space
1,Chicago,41.8781,-87.6298,0.0,Sandwich Place,Food,Office,Pizza Place,Mexican Restaurant,Fish Market,Event Space,Indian Restaurant,Eastern European Restaurant,Middle Eastern Restaurant
2,DC,38.9072,-77.0369,4.0,Sandwich Place,Food,Hotel,Bank,Diner,Office,Middle Eastern Restaurant,Mexican Restaurant,BBQ Joint,American Restaurant
3,LA,34.0522,-118.2437,0.0,Food,Sandwich Place,Mexican Restaurant,Japanese Restaurant,Restaurant,Spanish Restaurant,American Restaurant,Seafood Restaurant,Department Store,Peruvian Restaurant
4,London,51.5074,-0.1278,0.0,Sandwich Place,Restaurant,Office,African Restaurant,Turkish Restaurant,Italian Restaurant,Miscellaneous Shop,Pizza Place,Seafood Restaurant,Food Court


In [222]:
city_merged=city_merged.dropna()

In [224]:
city_merged['Cluster']=city_merged['Cluster'].astype(int)

In [225]:
# create map
map_clusters = folium.Map(location=[0, 0], zoom_start=1)

# set color scheme for the clusters
x = np.arange(clusters)
ys = [i + x + (i*x)**2 for i in range(clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(city_merged['latitude'], city_merged['longitude'], city_merged['city'], city_merged['Cluster']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters