In [27]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

In [28]:
df = pd.read_html('https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=942655599')[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [29]:
df = df.loc[df['Borough'] != "Not assigned"]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [30]:
df = df.groupby(['Postcode', 'Borough'], sort=False)['Neighbourhood'].apply(', '.join).reset_index()
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park


In [31]:
df.shape

(103, 3)

In [32]:
postal_code = pd.read_csv('https://cocl.us/Geospatial_data')
postal_code.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [33]:
df = df.rename(columns = {'Postcode': 'Postal Code'})
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park


In [34]:
df1 = pd.merge(df, postal_code, on = 'Postal Code')
df1.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494


In [35]:
print('The dataframe has {} boroughs and {} neighbourhoods.'.format(len(df1['Borough'].unique()), df1['Neighbourhood'].shape[0]))

The dataframe has 10 boroughs and 103 neighbourhoods.


In [36]:
address = 'Toronto, Canada'
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent = "Toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinate of Toronto are {}, {}'.format(latitude, longitude))

The geographical coordinate of Toronto are 43.6534817, -79.3839347


In [37]:
map_toronto = folium.Map(location = [latitude, longitude], zoom_start = 10)
for lat, long, borough, neighbourhood in zip(df1['Latitude'], df1['Longitude'], df1['Borough'], df1['Neighbourhood']):
    label = '{},{}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker([lat, long], radius=5, popup = label, color = 'blue', fill = True, fill_color= '#3186cc', fill_opacity=0.7, parse_html=False).add_to(map_toronto)
    
map_toronto

In [38]:
toronto_data = df1[df1['Borough'].str.contains("Toronto")].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
1,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
2,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [39]:
toronto_data.shape

(39, 5)

In [40]:
client_id = 'KSB2DKVHTZSWEA2UI5D4NHG3RP1CCWR3VKVGOJMTTM202DG2'
client_secret = 'O22Y134QZXTDIYLA1E10YUB0I1D0MHOI1X3GRBW2HMJIKLR0'
version = '20190425'

In [41]:
toronto_data.loc[0,'Neighbourhood']

'Harbourfront'

In [42]:
nbh_lat = toronto_data.loc[0,'Latitude']
nbh_lng = toronto_data.loc[0,'Longitude']
nbh_name = toronto_data.loc[0,'Neighbourhood']
print('Latitude and Longitude values of {} are {},{}'.format(nbh_name, nbh_lat, nbh_lng))

Latitude and Longitude values of Harbourfront are 43.6542599,-79.3606359


In [43]:
limit = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(client_id, client_secret, version, nbh_lat, nbh_lng, radius, limit)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=KSB2DKVHTZSWEA2UI5D4NHG3RP1CCWR3VKVGOJMTTM202DG2&client_secret=O22Y134QZXTDIYLA1E10YUB0I1D0MHOI1X3GRBW2HMJIKLR0&v=20190425&ll=43.6542599,-79.3606359&radius=500&limit=100'

In [44]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e8402c4882fc7001bf0e570'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 46,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
 

In [45]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [46]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues)


filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]


nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)


nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149


In [47]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

46 venues were returned by Foursquare.


In [48]:
nearby_venues.shape

(46, 4)

In [49]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            client_id, 
            client_secret, 
            version, 
            nbh_lat, 
            nbh_lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [50]:
toronto_venues = getNearbyVenues(names = toronto_data['Neighbourhood'], latitudes = toronto_data['Latitude'], longitudes = toronto_data['Longitude'])

Harbourfront
Queen's Park
Ryerson, Garden District
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Adelaide, King, Richmond
Dovercourt Village, Dufferin
Harbourfront East, Toronto Islands, Union Station
Little Portugal, Trinity
The Danforth West, Riverdale
Design Exchange, Toronto Dominion Centre
Brockton, Exhibition Place, Parkdale Village
The Beaches West, India Bazaar
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North, Forest Hill West
High Park, The Junction South
North Toronto West
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
Harbord, University of Toronto
Runnymede, Swansea
Moore Park, Summerhill East
Chinatown, Grange Park, Kensington Market
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown, St. James Town
Fir

In [51]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Harbourfront,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Harbourfront,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


In [52]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",46,46,46,46,46,46
Berczy Park,46,46,46,46,46,46
"Brockton, Exhibition Place, Parkdale Village",46,46,46,46,46,46
Business Reply Mail Processing Centre 969 Eastern,46,46,46,46,46,46
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",46,46,46,46,46,46
"Cabbagetown, St. James Town",46,46,46,46,46,46
Central Bay Street,46,46,46,46,46,46
"Chinatown, Grange Park, Kensington Market",46,46,46,46,46,46
Christie,46,46,46,46,46,46
Church and Wellesley,46,46,46,46,46,46


# Analyse Each Neighborhood

In [53]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix = "",prefix_sep = "")
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood']
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]
toronto_onehot.head()

Unnamed: 0,Neighborhood,Antique Shop,Asian Restaurant,Bakery,Bank,Beer Store,Breakfast Spot,Café,Chocolate Shop,Coffee Shop,Cosmetics Shop,Dessert Shop,Distribution Center,Electronics Store,Event Space,Farmers Market,French Restaurant,Health Food Store,Historic Site,Hotel,Ice Cream Shop,Mexican Restaurant,Park,Performing Arts Venue,Pub,Restaurant,Shoe Store,Spa,Theater,Yoga Studio
0,Harbourfront,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Harbourfront,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Harbourfront,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Harbourfront,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
4,Harbourfront,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [54]:
toronto_onehot.shape

(1794, 30)

In [55]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Antique Shop,Asian Restaurant,Bakery,Bank,Beer Store,Breakfast Spot,Café,Chocolate Shop,Coffee Shop,Cosmetics Shop,Dessert Shop,Distribution Center,Electronics Store,Event Space,Farmers Market,French Restaurant,Health Food Store,Historic Site,Hotel,Ice Cream Shop,Mexican Restaurant,Park,Performing Arts Venue,Pub,Restaurant,Shoe Store,Spa,Theater,Yoga Studio
0,"Adelaide, King, Richmond",0.021739,0.021739,0.043478,0.021739,0.021739,0.043478,0.043478,0.021739,0.173913,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.043478,0.065217,0.021739,0.065217,0.043478,0.021739,0.021739,0.043478,0.021739
1,Berczy Park,0.021739,0.021739,0.043478,0.021739,0.021739,0.043478,0.043478,0.021739,0.173913,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.043478,0.065217,0.021739,0.065217,0.043478,0.021739,0.021739,0.043478,0.021739
2,"Brockton, Exhibition Place, Parkdale Village",0.021739,0.021739,0.043478,0.021739,0.021739,0.043478,0.043478,0.021739,0.173913,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.043478,0.065217,0.021739,0.065217,0.043478,0.021739,0.021739,0.043478,0.021739
3,Business Reply Mail Processing Centre 969 Eastern,0.021739,0.021739,0.043478,0.021739,0.021739,0.043478,0.043478,0.021739,0.173913,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.043478,0.065217,0.021739,0.065217,0.043478,0.021739,0.021739,0.043478,0.021739
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.021739,0.021739,0.043478,0.021739,0.021739,0.043478,0.043478,0.021739,0.173913,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.043478,0.065217,0.021739,0.065217,0.043478,0.021739,0.021739,0.043478,0.021739
5,"Cabbagetown, St. James Town",0.021739,0.021739,0.043478,0.021739,0.021739,0.043478,0.043478,0.021739,0.173913,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.043478,0.065217,0.021739,0.065217,0.043478,0.021739,0.021739,0.043478,0.021739
6,Central Bay Street,0.021739,0.021739,0.043478,0.021739,0.021739,0.043478,0.043478,0.021739,0.173913,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.043478,0.065217,0.021739,0.065217,0.043478,0.021739,0.021739,0.043478,0.021739
7,"Chinatown, Grange Park, Kensington Market",0.021739,0.021739,0.043478,0.021739,0.021739,0.043478,0.043478,0.021739,0.173913,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.043478,0.065217,0.021739,0.065217,0.043478,0.021739,0.021739,0.043478,0.021739
8,Christie,0.021739,0.021739,0.043478,0.021739,0.021739,0.043478,0.043478,0.021739,0.173913,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.043478,0.065217,0.021739,0.065217,0.043478,0.021739,0.021739,0.043478,0.021739
9,Church and Wellesley,0.021739,0.021739,0.043478,0.021739,0.021739,0.043478,0.043478,0.021739,0.173913,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.021739,0.043478,0.065217,0.021739,0.065217,0.043478,0.021739,0.021739,0.043478,0.021739


In [56]:
toronto_grouped.shape

(39, 30)

In [57]:
num_top_venues = 5
for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
         venue  freq
0  Coffee Shop  0.17
1          Pub  0.07
2         Park  0.07
3       Bakery  0.04
4      Theater  0.04


----Berczy Park----
         venue  freq
0  Coffee Shop  0.17
1          Pub  0.07
2         Park  0.07
3       Bakery  0.04
4      Theater  0.04


----Brockton, Exhibition Place, Parkdale Village----
         venue  freq
0  Coffee Shop  0.17
1          Pub  0.07
2         Park  0.07
3       Bakery  0.04
4      Theater  0.04


----Business Reply Mail Processing Centre 969 Eastern----
         venue  freq
0  Coffee Shop  0.17
1          Pub  0.07
2         Park  0.07
3       Bakery  0.04
4      Theater  0.04


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
         venue  freq
0  Coffee Shop  0.17
1          Pub  0.07
2         Park  0.07
3       Bakery  0.04
4      Theater  0.04


----Cabbagetown, St. James Town----
         venue  freq
0  Coffee Shop  0.17
1  

In [58]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [59]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
1,Berczy Park,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
2,"Brockton, Exhibition Place, Parkdale Village",Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
3,Business Reply Mail Processing Centre 969 Eastern,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop


# Cluster Neighborhoods

In [60]:
# set number of clusters
kclusters = 3

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', axis = 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

  import sys


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [61]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
1,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
2,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop


In [62]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

In [63]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
1,Downtown Toronto,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
2,Downtown Toronto,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
3,Downtown Toronto,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
4,East Toronto,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
5,Downtown Toronto,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
6,Downtown Toronto,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
7,Downtown Toronto,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
8,Downtown Toronto,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
9,West Toronto,0,Coffee Shop,Pub,Park,Mexican Restaurant,Café,Bakery,Restaurant,Theater,Breakfast Spot,Cosmetics Shop
