# Preprare Data

## Steps for preparing Toronto neighborhood data
1. Import Library.
2. Crawl Toronto neighborhood from wikipedia data and push to pandas dataframe.
3. Cleaning Data: change cloumns name, drop not assign rows, regex job.
4. Get Location data of Toronto, push to dataframe and clean data.
5. Merge Neighborhood data and location data


In [21]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

from bs4 import BeautifulSoup
from urllib.request import urlopen
import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Libraries imported.')

Libraries imported.


In [22]:
req = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

soup = BeautifulSoup(req.content,'lxml')

table = soup.find_all('table')[0]

df = pd.read_html(str(table))

neighborhood=pd.DataFrame(df[0])
neighborhood.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [23]:
column_names = ['PostalCode', 'Borough', 'Neighborhood']
neighborhood = neighborhood.rename(columns={neighborhood.columns[0]: column_names[0], neighborhood.columns[1]: column_names[1],\
                                          neighborhood.columns[2]: column_names[2]  })
neighborhood = neighborhood[neighborhood['Borough']!='Not assigned']
neighborhood = neighborhood.sort_values(by=['PostalCode', 'Borough'])
neighborhood['Neighborhood'] = neighborhood['Neighborhood'].str.replace(' /', ',', regex=True)
neighborhood.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
9,M1B,Scarborough,"Malvern, Rouge"
18,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
27,M1E,Scarborough,"Guildwood, Morningside, West Hill"
36,M1G,Scarborough,Woburn
45,M1H,Scarborough,Cedarbrae


In [24]:
url = 'https://cocl.us/Geospatial_data' # I need to download file directly because geocoder is unreliable and I get stuck in infinity while loop!
r = requests.get(url, allow_redirects=True)
open('Geospatial_Coordinates.csv', 'wb').write(r.content)
coordinates_df = pd.read_csv('Geospatial_Coordinates.csv')
column_names = ['PostalCode', 'Latitude', 'Longitude']
coordinates_df = coordinates_df.rename(columns={coordinates_df.columns[0]: column_names[0], coordinates_df.columns[1]: column_names[1],\
                                          coordinates_df.columns[2]: column_names[2]  }) 
coordinates_df.head()


Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [25]:
neighborhood_coordinates_df = pd.merge(neighborhood, coordinates_df, on="PostalCode")
neighborhood_coordinates_df.head()
# neighborhood_coordinates_df.shape

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Explore Toronto Neighborhood using FourSquare  

1. Get location of Toronto 
2. Visualize Toronto neighborhood
3. Define Foursquare Credentials and Version
4. Get Venue Information of Toronto from FourQuare
5. Get category information of neighborhood from FourSquare
6. Convert venue categories to one-hot
7. Get information about top 10 categories for each neighborhood

In [26]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="tr_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, Ontario are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, Ontario are 43.6534817, -79.3839347.


In [27]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhood_coordinates_df['Latitude'], neighborhood_coordinates_df['Longitude'], neighborhood_coordinates_df['Borough'],\
                                           neighborhood_coordinates_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [28]:
CLIENT_ID = '3MSYVKOATZT5LXLXACLRNCB5DIUFISU0MTIS3ARW022CXLED' # your Foursquare ID
CLIENT_SECRET = 'SLSVZXCKX4HYUGJU2BWVBEZGGWQVC1YH5GARWGR3QTZR1O3N' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 3MSYVKOATZT5LXLXACLRNCB5DIUFISU0MTIS3ARW022CXLED
CLIENT_SECRET:SLSVZXCKX4HYUGJU2BWVBEZGGWQVC1YH5GARWGR3QTZR1O3N


In [0]:
# neighborhood_latitude = toronto_data.loc[10, 'Latitude'] # neighborhood latitude value
# neighborhood_longitude = toronto_data.loc[10, 'Longitude'] # neighborhood longitude value
# neighborhood_name = toronto_data.loc[10, 'Neighborhood'] # neighborhood name
# LIMIT = 100
# radius = 500
# # CLIENT_ID = '3MSYVKOATZT5LXLXACLRNCB5DIUFISU0MTIS3ARW022CXLEDf'
# # CLIENT_SECRET = 'SLSVZXCKX4HYUGJU2BWVBEZGGWQVC1YH5GARWGR3QTZR1O3N'
# url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
#     CLIENT_ID, 
#     CLIENT_SECRET, 
#     VERSION, 
#     neighborhood_latitude, 
#     neighborhood_longitude, 
#     radius, 
#     LIMIT)
# url # display URL
# results = requests.get(url).json()
# results

In [30]:
LIMIT = 105
radius = 500
def getNearbyVenues(names, latitudes, longitudes, radius=500, category_id = ''):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        if (category_id != ''):
            url = url + '&categoryId={}'
            url = url.format(category_id)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)
# electric_store_id = '4bf58dd8d48988d122951735'
chinese_restaurant_id = '4bf58dd8d48988d145941735'
toronto_data = neighborhood_coordinates_df
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude'],
                                   category_id=chinese_restaurant_id
                                  )
print(toronto_venues.shape)
toronto_venues.head()

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
Willowdale
York Mills West
Willowdale
Parkwoods
Don Mills
Don Mills
Bathurst Manor, Wilson Heights, Downsview North
Northwood Park, York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Parkview Hill, Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Lawrence P

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Cedarbrae,43.773136,-79.239476,Federick Restaurant,43.774697,-79.241142,Hakka Restaurant
1,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029,Hakka No.1,43.727688,-79.266057,Chinese Restaurant
2,"Dorset Park, Wexford Heights, Scarborough Town...",43.75741,-79.273304,Kim Kim restaurant,43.753833,-79.276611,Chinese Restaurant
3,"Clarks Corners, Tam O'Shanter, Sullivan",43.781638,-79.304302,The Royal Chinese Restaurant 避風塘小炒,43.780505,-79.298844,Chinese Restaurant
4,"Steeles West, L'Amoreaux West",43.799525,-79.318389,Mr Congee Chinese Cuisine 龍粥記,43.798879,-79.318335,Chinese Restaurant


In [31]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Cedarbrae,43.773136,-79.239476,Federick Restaurant,43.774697,-79.241142,Hakka Restaurant
1,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029,Hakka No.1,43.727688,-79.266057,Chinese Restaurant
2,"Dorset Park, Wexford Heights, Scarborough Town...",43.75741,-79.273304,Kim Kim restaurant,43.753833,-79.276611,Chinese Restaurant
3,"Clarks Corners, Tam O'Shanter, Sullivan",43.781638,-79.304302,The Royal Chinese Restaurant 避風塘小炒,43.780505,-79.298844,Chinese Restaurant
4,"Steeles West, L'Amoreaux West",43.799525,-79.318389,Mr Congee Chinese Cuisine 龍粥記,43.798879,-79.318335,Chinese Restaurant


In [32]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 18 uniques categories.


In [33]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
# add neighborhood column back to dataframe
toronto_onehot['NEIGHBORHOOD'] = toronto_venues['Neighborhood']
print(toronto_onehot.columns)

# move neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-1:]) + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Index(['Asian Restaurant', 'Bar', 'Bubble Tea Shop', 'Cantonese Restaurant',
       'Chinese Restaurant', 'Comfort Food Restaurant', 'Dim Sum Restaurant',
       'Dumpling Restaurant', 'Fried Chicken Joint', 'Hakka Restaurant',
       'Hong Kong Restaurant', 'Hotpot Restaurant', 'Noodle House',
       'Peking Duck Restaurant', 'Sushi Restaurant', 'Szechuan Restaurant',
       'Taiwanese Restaurant', 'Thai Restaurant', 'NEIGHBORHOOD'],
      dtype='object')


Unnamed: 0,NEIGHBORHOOD,Asian Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Chinese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,Hakka Restaurant,Hong Kong Restaurant,Hotpot Restaurant,Noodle House,Peking Duck Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Thai Restaurant
0,Cedarbrae,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,"Kennedy Park, Ionview, East Birchmount Park",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Dorset Park, Wexford Heights, Scarborough Town...",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Clarks Corners, Tam O'Shanter, Sullivan",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Steeles West, L'Amoreaux West",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [34]:
toronto_grouped = toronto_onehot.groupby('NEIGHBORHOOD').mean().reset_index()
toronto_grouped 

Unnamed: 0,NEIGHBORHOOD,Asian Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Chinese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,Hakka Restaurant,Hong Kong Restaurant,Hotpot Restaurant,Noodle House,Peking Duck Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Thai Restaurant
0,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bayview Village,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Berczy Park,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Central Bay Street,0.0,0.0,0.052632,0.0,0.789474,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.052632
6,Church and Wellesley,0.125,0.0,0.125,0.0,0.625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0
7,"Clarks Corners, Tam O'Shanter, Sullivan",0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
num_top_venues = 5

for hood in toronto_grouped['NEIGHBORHOOD']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['NEIGHBORHOOD'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alderwood, Long Branch----
                  venue  freq
0    Chinese Restaurant   1.0
1      Asian Restaurant   0.0
2  Hong Kong Restaurant   0.0
3  Taiwanese Restaurant   0.0
4   Szechuan Restaurant   0.0


----Bathurst Manor, Wilson Heights, Downsview North----
                  venue  freq
0    Chinese Restaurant   1.0
1      Asian Restaurant   0.0
2  Hong Kong Restaurant   0.0
3  Taiwanese Restaurant   0.0
4   Szechuan Restaurant   0.0


----Bayview Village----
                  venue  freq
0    Chinese Restaurant   1.0
1      Asian Restaurant   0.0
2  Hong Kong Restaurant   0.0
3  Taiwanese Restaurant   0.0
4   Szechuan Restaurant   0.0


----Berczy Park----
                  venue  freq
0    Chinese Restaurant   1.0
1      Asian Restaurant   0.0
2  Hong Kong Restaurant   0.0
3  Taiwanese Restaurant   0.0
4   Szechuan Restaurant   0.0


----Cedarbrae----
                  venue  freq
0      Hakka Restaurant   1.0
1                   Bar   0.0
2  Taiwanese Restaurant   0.0
3  

In [36]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['NEIGHBORHOOD']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['NEIGHBORHOOD'] = toronto_grouped['NEIGHBORHOOD']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted['Neighborhood Latitude'] = toronto_venues['Neighborhood Latitude']
neighborhoods_venues_sorted['Neighborhood Longitude'] = toronto_venues['Neighborhood Longitude']

neighborhoods_venues_sorted.head()

Unnamed: 0,NEIGHBORHOOD,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Neighborhood Latitude,Neighborhood Longitude
0,"Alderwood, Long Branch",Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,43.773136,-79.239476
1,"Bathurst Manor, Wilson Heights, Downsview North",Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,43.727929,-79.262029
2,Bayview Village,Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,43.75741,-79.273304
3,Berczy Park,Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,43.781638,-79.304302
4,Cedarbrae,Hakka Restaurant,Thai Restaurant,Dumpling Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Chinese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Fried Chicken Joint,43.799525,-79.318389


# Clustering 

In [37]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop(['NEIGHBORHOOD'], 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]
# toronto_grouped_clustering.head()


array([1, 1, 1, 1, 2, 0, 0, 1, 1, 1], dtype=int32)

In [38]:
neighborhoods_venues_sorted.head()

Unnamed: 0,NEIGHBORHOOD,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Neighborhood Latitude,Neighborhood Longitude
0,"Alderwood, Long Branch",Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,43.773136,-79.239476
1,"Bathurst Manor, Wilson Heights, Downsview North",Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,43.727929,-79.262029
2,Bayview Village,Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,43.75741,-79.273304
3,Berczy Park,Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,43.781638,-79.304302
4,Cedarbrae,Hakka Restaurant,Thai Restaurant,Dumpling Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Chinese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Fried Chicken Joint,43.799525,-79.318389


In [39]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_merged = neighborhoods_venues_sorted.copy()

# toronto_merged = toronto_data.copy()
# toronto_merged = toronto_merged.rename(columns={'Neighborhood': 'NEIGHBORHOOD'})
# # merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
# toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('NEIGHBORHOOD'), on='NEIGHBORHOOD')

toronto_merged.head() # check the last columns!

Unnamed: 0,Cluster Labels,NEIGHBORHOOD,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Neighborhood Latitude,Neighborhood Longitude
0,1,"Alderwood, Long Branch",Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,43.773136,-79.239476
1,1,"Bathurst Manor, Wilson Heights, Downsview North",Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,43.727929,-79.262029
2,1,Bayview Village,Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,43.75741,-79.273304
3,1,Berczy Park,Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint,43.781638,-79.304302
4,2,Cedarbrae,Hakka Restaurant,Thai Restaurant,Dumpling Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Chinese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Fried Chicken Joint,43.799525,-79.318389


In [40]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
print(rainbow)
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Neighborhood Latitude'], toronto_merged['Neighborhood Longitude'], toronto_merged['NEIGHBORHOOD'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

['#8000ff', '#00b5eb', '#80ffb4', '#ffb360', '#ff0000']


# Check each cluster

In [41]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(2, toronto_merged.shape[1]-2))]]

Unnamed: 0,NEIGHBORHOOD,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Central Bay Street,Chinese Restaurant,Thai Restaurant,Bubble Tea Shop,Fried Chicken Joint,Sushi Restaurant,Peking Duck Restaurant,Noodle House,Hotpot Restaurant,Hong Kong Restaurant,Hakka Restaurant
6,Church and Wellesley,Chinese Restaurant,Asian Restaurant,Taiwanese Restaurant,Bubble Tea Shop,Bar,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Thai Restaurant
16,"Garden District, Ryerson",Chinese Restaurant,Thai Restaurant,Fried Chicken Joint,Sushi Restaurant,Peking Duck Restaurant,Noodle House,Hotpot Restaurant,Hong Kong Restaurant,Hakka Restaurant,Taiwanese Restaurant
19,"Kensington Market, Chinatown, Grange Park",Chinese Restaurant,Dumpling Restaurant,Fried Chicken Joint,Bar,Bubble Tea Shop,Cantonese Restaurant,Dim Sum Restaurant,Asian Restaurant,Hong Kong Restaurant,Hotpot Restaurant
24,"Queen's Park, Ontario Provincial Government",Chinese Restaurant,Taiwanese Restaurant,Thai Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint
29,"St. James Town, Cabbagetown",Chinese Restaurant,Taiwanese Restaurant,Thai Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint
35,"University of Toronto, Harbord",Chinese Restaurant,Comfort Food Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint


In [46]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(2, toronto_merged.shape[1]-2))]]

Unnamed: 0,NEIGHBORHOOD,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Alderwood, Long Branch",Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint
1,"Bathurst Manor, Wilson Heights, Downsview North",Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint
2,Bayview Village,Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint
3,Berczy Park,Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint
7,"Clarks Corners, Tam O'Shanter, Sullivan",Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint
8,"Commerce Court, Victoria Hotel",Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint
9,Davisville,Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint
10,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint
12,"Dorset Park, Wexford Heights, Scarborough Town...",Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint
13,Downsview,Chinese Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fried Chicken Joint


In [43]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(2, toronto_merged.shape[1]-2))]]

Unnamed: 0,NEIGHBORHOOD,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Cedarbrae,Hakka Restaurant,Thai Restaurant,Dumpling Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Chinese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Fried Chicken Joint


In [44]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(2, toronto_merged.shape[1]-2))]]

Unnamed: 0,NEIGHBORHOOD,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Leaside,Peking Duck Restaurant,Thai Restaurant,Dumpling Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Chinese Restaurant,Comfort Food Restaurant,Dim Sum Restaurant,Fried Chicken Joint


In [45]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(2, toronto_merged.shape[1]-2))]]

Unnamed: 0,NEIGHBORHOOD,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Don Mills,Asian Restaurant,Chinese Restaurant,Dim Sum Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dumpling Restaurant,Thai Restaurant
20,Lawrence Park,Dim Sum Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Chinese Restaurant,Comfort Food Restaurant,Dumpling Restaurant,Fried Chicken Joint
27,"Runnymede, Swansea",Chinese Restaurant,Dim Sum Restaurant,Thai Restaurant,Taiwanese Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Comfort Food Restaurant,Dumpling Restaurant,Fried Chicken Joint
