In [163]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

In [164]:
#!conda install -c conda-forge folium=0.5.0 --yes

### Scrape Canada Postal Codes from Wiki into a DF

In [165]:
url='https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=890001695.'
df=pd.read_html(url, header=0)[0]
df.rename(columns={'Postcode':'PostalCode'},inplace = True)
print(df.shape)
df.head()

(288, 3)


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Delete rows with Borough='Not assigned'

In [166]:
df.drop(df[df.Borough == 'Not assigned'].index, inplace = True)
print(df.shape)
df.head()

(211, 3)


Unnamed: 0,PostalCode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


### Club rows with same postcodes (Neighbourhood join by comma)

In [167]:
aggregation_functions = {'Borough': 'first', 'Neighbourhood':lambda col: ','.join(col)}
df = df.groupby(df['PostalCode']).aggregate(aggregation_functions)
print(df.shape)
df.head()

(103, 2)


Unnamed: 0_level_0,Borough,Neighbourhood
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Rouge,Malvern"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"Guildwood,Morningside,West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


### If Neighbourhood == 'Not assigned', replace with value from Borough

In [168]:
df.loc[df['Neighbourhood'] == 'Not assigned']

Unnamed: 0_level_0,Borough,Neighbourhood
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M7A,Queen's Park,Not assigned


In [169]:
df.loc[df['Neighbourhood'] == 'Not assigned'] = df['Borough']
df.loc[df['Neighbourhood'] == 'Not assigned']

Unnamed: 0_level_0,Borough,Neighbourhood
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1


In [170]:
print(df.shape)
df.head()

(103, 2)


Unnamed: 0_level_0,Borough,Neighbourhood
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Rouge,Malvern"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"Guildwood,Morningside,West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


### Load latitude and longitude of neighbourhoods using Geocoder package
(As I was unable to call the geocoder API, I'm using the csv provided)

In [171]:
# The code was removed by Watson Studio for sharing.

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [172]:
df_geoco.rename(columns = {'Postal Code':'PostalCode'}, inplace=True)
print(df_geoco.shape)
df_geoco.head()

(103, 3)


Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merge coordinates with postal codes DF

In [173]:
df_merged = pd.merge(df, df_geoco, how='left', on=['PostalCode'])
print(df_merged.shape)
df_merged.head()

(103, 5)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Filter DF by boroughs containing word 'Toronto'

In [174]:
df_merged = df_merged[df_merged['Borough'].str.contains('Toronto')]
print(df_merged.shape)
df_merged.head()

(38, 5)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [175]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_merged['Borough'].unique()),
        df_merged.shape[0]
    )
)

The dataframe has 4 boroughs and 38 neighborhoods.


#### Get coordinates of Toronto

In [176]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium
import requests
from pandas.io.json import json_normalize

address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [177]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Borough'], df_merged['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='orange',
        fill=True,
        fill_color='yellow',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto    

##### Define fourSquare Credentials:

In [178]:
CLIENT_ID = '540KCP3G3PFJDDFABLXCBUVYGAKGZXUGPRRQ1AZKOQKPA00X' # your Foursquare ID
CLIENT_SECRET = 'VS2AJ02WMC5EJWK0SX3YSH3BV5S14R44NG2FMU0SGR3Q0OZP' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 540KCP3G3PFJDDFABLXCBUVYGAKGZXUGPRRQ1AZKOQKPA00X
CLIENT_SECRET:VS2AJ02WMC5EJWK0SX3YSH3BV5S14R44NG2FMU0SGR3Q0OZP


#### Explore first borough - East Toronto

In [179]:
df_eastToronto = df_merged[df_merged['Borough'] == 'East Toronto'].reset_index(drop=True)
df_eastToronto.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558


In [180]:
address = 'East Toronto, Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of East Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of East Toronto are 43.653963, -79.387207.


In [181]:
map_eastToronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(df_eastToronto['Latitude'], df_eastToronto['Longitude'], df_eastToronto['Borough'], df_eastToronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='orange',
        fill=True,
        fill_color='yellow',
        fill_opacity=0.7,
        parse_html=False).add_to(map_eastToronto)  
    
map_eastToronto   
 

#### Explore first neighbourhood in East Toronto

In [182]:
df_eastToronto.loc[0, 'Neighbourhood']

'The Beaches'

In [183]:
neigh_latitude = df_eastToronto.loc[0, 'Latitude']
neigh_longitude = df_eastToronto.loc[0, 'Longitude']
neigh_name = df_eastToronto.loc[0, 'Neighbourhood']

print('Latitude and Longitude of {} are {}, {}'.format(neigh_name, neigh_latitude, neigh_longitude))

Latitude and Longitude of The Beaches are 43.67635739999999, -79.2930312


#### Get venues around the above neighbourhood

In [184]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neigh_latitude, 
    neigh_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=540KCP3G3PFJDDFABLXCBUVYGAKGZXUGPRRQ1AZKOQKPA00X&client_secret=VS2AJ02WMC5EJWK0SX3YSH3BV5S14R44NG2FMU0SGR3Q0OZP&v=20180605&ll=43.67635739999999,-79.2930312&radius=500&limit=100'

In [185]:
results = requests.get(url).json()

In [186]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [187]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()


Unnamed: 0,name,categories,lat,lng
0,Glen Manor Ravine,Trail,43.676821,-79.293942
1,The Big Carrot Natural Food Market,Health Food Store,43.678879,-79.297734
2,Grover Pub and Grub,Pub,43.679181,-79.297215
3,Upper Beaches,Neighborhood,43.680563,-79.292869


### Let's repeat the process for all neighbourhoods in East Toronto

In [188]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [189]:
eastToronto_venues = getNearbyVenues(names=df_eastToronto['Neighbourhood'],
                                   latitudes=df_eastToronto['Latitude'],
                                   longitudes=df_eastToronto['Longitude']
                                  )

The Beaches
The Danforth West,Riverdale
The Beaches West,India Bazaar
Studio District
Business Reply Mail Processing Centre 969 Eastern


In [190]:
print(eastToronto_venues.shape)
eastToronto_venues.head()

(124, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West,Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


In [191]:
eastToronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Business Reply Mail Processing Centre 969 Eastern,18,18,18,18,18,18
Studio District,41,41,41,41,41,41
The Beaches,4,4,4,4,4,4
"The Beaches West,India Bazaar",20,20,20,20,20,20
"The Danforth West,Riverdale",41,41,41,41,41,41


#### Analyze each neighborhood

In [192]:
# one hot encoding
eastToronto_onehot = pd.get_dummies(eastToronto_venues[['Venue Category']], prefix="", prefix_sep="")

eastToronto_onehot.drop(labels=['Neighborhood'], axis=1,inplace = True)

eastToronto_onehot['Neighborhood'] = eastToronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [eastToronto_onehot.columns[-1]] + list(eastToronto_onehot.columns[:-1])
eastToronto_onehot = eastToronto_onehot[fixed_columns]

eastToronto_onehot.head()


Unnamed: 0,Neighborhood,American Restaurant,Auto Workshop,Bakery,Bank,Bar,Board Shop,Bookstore,Brewery,Bubble Tea Shop,...,Smoke Shop,Spa,Stationery Store,Steakhouse,Sushi Restaurant,Thai Restaurant,Thrift / Vintage Store,Trail,Wine Bar,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West,Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [193]:
eastToronto_grouped = eastToronto_onehot.groupby('Neighborhood').mean().reset_index()
eastToronto_grouped

Unnamed: 0,Neighborhood,American Restaurant,Auto Workshop,Bakery,Bank,Bar,Board Shop,Bookstore,Brewery,Bubble Tea Shop,...,Smoke Shop,Spa,Stationery Store,Steakhouse,Sushi Restaurant,Thai Restaurant,Thrift / Vintage Store,Trail,Wine Bar,Yoga Studio
0,Business Reply Mail Processing Centre 969 Eastern,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,...,0.055556,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Studio District,0.04878,0.0,0.04878,0.02439,0.02439,0.0,0.02439,0.04878,0.0,...,0.0,0.0,0.02439,0.0,0.0,0.02439,0.02439,0.0,0.02439,0.02439
2,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0
3,"The Beaches West,India Bazaar",0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.05,0.0,...,0.0,0.0,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0
4,"The Danforth West,Riverdale",0.02439,0.0,0.02439,0.0,0.0,0.0,0.04878,0.02439,0.02439,...,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.02439


In [194]:
num_top_venues = 5

for hood in eastToronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = eastToronto_grouped[eastToronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Business Reply Mail Processing Centre 969 Eastern----
                venue  freq
0  Light Rail Station  0.11
1             Butcher  0.06
2         Pizza Place  0.06
3                Park  0.06
4    Recording Studio  0.06


----Studio District----
                 venue  freq
0                 Café  0.10
1          Coffee Shop  0.07
2  American Restaurant  0.05
3               Bakery  0.05
4   Italian Restaurant  0.05


----The Beaches----
                       venue  freq
0                        Pub  0.25
1                      Trail  0.25
2          Health Food Store  0.25
3  Latin American Restaurant  0.00
4                       Park  0.00


----The Beaches West,India Bazaar----
                  venue  freq
0                  Park  0.10
1  Fast Food Restaurant  0.05
2             Pet Store  0.05
3           Pizza Place  0.05
4                   Pub  0.05


----The Danforth West,Riverdale----
                    venue  freq
0        Greek Restaurant  0.20
1             Coffee

In [195]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [196]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = eastToronto_grouped['Neighborhood']

for ind in np.arange(eastToronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(eastToronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Comic Shop,Brewery,Park,Butcher,Pizza Place,Burrito Place,Recording Studio,Restaurant,Skate Park
1,Studio District,Café,Coffee Shop,Brewery,American Restaurant,Italian Restaurant,Bakery,Coworking Space,Ice Cream Shop,Gym / Fitness Center,Gay Bar
2,The Beaches,Trail,Pub,Health Food Store,Yoga Studio,Diner,Comic Shop,Convenience Store,Cosmetics Shop,Coworking Space,Dessert Shop
3,"The Beaches West,India Bazaar",Park,Movie Theater,Restaurant,Gym,Coffee Shop,Ice Cream Shop,Italian Restaurant,Liquor Store,Fish & Chips Shop,Pet Store
4,"The Danforth West,Riverdale",Greek Restaurant,Coffee Shop,Italian Restaurant,Bookstore,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Diner,Grocery Store,Fruit & Vegetable Store


## Cluster neighborhoods

In [197]:
# set number of clusters
kclusters = 5

eastToronto_grouped_clustering = eastToronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(eastToronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 4, 1, 2, 0], dtype=int32)

In [203]:
#neighborhoods_venues_sorted.drop('Cluster Labels')
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [204]:
eastToronto_merged = df_eastToronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
eastToronto_merged = eastToronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

eastToronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,1,Trail,Pub,Health Food Store,Yoga Studio,Diner,Comic Shop,Convenience Store,Cosmetics Shop,Coworking Space,Dessert Shop
1,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Bookstore,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Diner,Grocery Store,Fruit & Vegetable Store
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572,2,Park,Movie Theater,Restaurant,Gym,Coffee Shop,Ice Cream Shop,Italian Restaurant,Liquor Store,Fish & Chips Shop,Pet Store
3,M4M,East Toronto,Studio District,43.659526,-79.340923,4,Café,Coffee Shop,Brewery,American Restaurant,Italian Restaurant,Bakery,Coworking Space,Ice Cream Shop,Gym / Fitness Center,Gay Bar
4,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,3,Light Rail Station,Comic Shop,Brewery,Park,Butcher,Pizza Place,Burrito Place,Recording Studio,Restaurant,Skate Park


In [205]:
eastToronto_merged['Cluster Labels']

0    1
1    0
2    2
3    4
4    3
Name: Cluster Labels, dtype: int32

In [206]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(eastToronto_merged['Latitude'], eastToronto_merged['Longitude'], eastToronto_merged['Neighbourhood'], eastToronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

In [207]:
eastToronto_merged.loc[eastToronto_merged['Cluster Labels'] == 0, eastToronto_merged.columns[[1] + list(range(5, eastToronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,East Toronto,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Bookstore,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Diner,Grocery Store,Fruit & Vegetable Store
