# Assignment: Segmenting and Clustering Neighborhoods in Toronto

## <a name="dataframe"></a>1. Create Dataframe

### 1.1. Retrieve data

In [1]:
import pandas as pd

# Data source
urlWiki = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

# Read in data
torontoTable = pd.read_html(urlWiki)[0]

# The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood
colNames = list(['PostalCode', 'Borough', 'Neighborhood'])
torontoTable.columns = colNames


### 1.2. Remove unassigned Boroughs

In [2]:
# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
#
# Count entries without assigned borough
cntNotAss = torontoTable[torontoTable['Borough']=='Not assigned']['Borough'].count()
# New dataframe: reset index to make it start at zero
torontoTableClean = torontoTable[torontoTable['Borough']!='Not assigned'].reset_index(drop=True)
# Check results
print('Entries without assigned borough:  {}'.format(cntNotAss))
print('Data size of original table:      {} lines'.format(torontoTable.shape[0]))
print('Data size of new table:           {} lines'.format(torontoTableClean.shape[0]))

Entries without assigned borough:  77
Data size of original table:      288 lines
Data size of new table:           211 lines


### 1.3. Name unassigned Neighbourhoods

In [3]:
# Find entries with unassigned Neighbourhoods
tmpIdx = torontoTableClean['Neighborhood']=='Not assigned'

# Before
torontoTableClean[tmpIdx]

Unnamed: 0,PostalCode,Borough,Neighborhood
6,M7A,Queen's Park,Not assigned


In [4]:
# Rename: it is only one entry. We could do it manually but this will work just fine.
torontoTableClean.loc[tmpIdx, 'Neighborhood'] = torontoTableClean.loc[tmpIdx, 'Borough']

In [5]:
# After
torontoTableClean[tmpIdx]

Unnamed: 0,PostalCode,Borough,Neighborhood
6,M7A,Queen's Park,Queen's Park


### 1.4. Combine Neighborhoods

#### 1.4.1 Without groupby()

In [6]:
# More than one neighborhood can exist in one postal code area.
# These rows will be combined into one row with the neighborhoods separated with a comma

# Get set of unique PostalCode entries
uniquePostalCodes = torontoTableClean['PostalCode'].unique()
print('Number of unique postal codes: {}'.format(len(uniquePostalCodes)))

# New dataframe with combined neighbourhoods
torontoTableComb1 = pd.DataFrame(columns = colNames)

# Loop over unique postal codes
for postalCode in uniquePostalCodes:
    # Find all entries matching the current postal code
    tmpDf = torontoTableClean[torontoTableClean['PostalCode']==postalCode].reset_index(drop=True)
    # Loop over temporary table
    tmpStr=[]
    for idx, row in tmpDf.iterrows():
        # Combine neighbourhoods
        tmpStr.append(row.Neighborhood)
    # Create comma-separated string
    strNeighb = ', '.join(tmpStr)
    # Append new entry
    torontoTableComb1 = torontoTableComb1.append({'PostalCode': postalCode, 'Borough': tmpDf.loc[0, 'Borough'], 'Neighborhood': strNeighb}, ignore_index=True)


Number of unique postal codes: 103


#### 1.4.2 Using groupby()

In [7]:
# More than one neighborhood can exist in one postal code area.
# These rows will be combined into one row with the neighborhoods separated with a comma

# Get set of unique PostalCode entries
uniquePostalCodes = torontoTableClean['PostalCode'].unique()
print('Number of unique postal codes: {}'.format(len(uniquePostalCodes)))

torontoTableComb2 = torontoTableClean.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(list)
torontoTableComb2 = torontoTableComb2.sample(frac=1).reset_index()
torontoTableComb2['Neighborhood'] = torontoTableComb2['Neighborhood'].str.join(', ')

Number of unique postal codes: 103


### 1.5. Dataframe size

In [8]:
print('Number of rows in combined dataframe: {}'.format(torontoTableComb2.shape[0]))

Number of rows in combined dataframe: 103


## <a name="latlong"></a>2. Get latitude and longitude

### 2.1 Using geopy

In [9]:
# ArcGIS seems to provide results for all postal codes

import numpy as np

# Use Nomimatim instead of geocoder
#!conda install -c conda-forge geopy --yes
#from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
from geopy.geocoders import ArcGIS
geolocator = ArcGIS(user_agent="toronto_explorer")

# Arrays for latitude and longitude
arrLat = np.zeros(torontoTableComb2.shape[0])
arrLon = np.zeros(torontoTableComb2.shape[0])

# Loop over all rows and retrieve latitude and longitude for each postal code
for idx, row in torontoTableComb2.iterrows():
    print(idx, end=" ")
    address = '{}, Toronto, Ontario, CA'.format(row.PostalCode)
    print(address, end= "")
    
    location = geolocator.geocode(address)
    
    if (location != None):
        arrLat[idx] = location.latitude
        arrLon[idx] = location.longitude
        print(', lat: {}, long: {}'.format(location.latitude, location.longitude))
    else:
        print(', None')


0 M3N, Toronto, Ontario, CA, lat: 43.75537065200007, long: -79.51958999999994
1 M5J, Toronto, Ontario, CA, lat: 43.630210000000034, long: -79.36243320899996
2 M9P, Toronto, Ontario, CA, lat: 43.69650500000006, long: -79.53025233799997
3 M9R, Toronto, Ontario, CA, lat: 43.68681000000004, long: -79.55728354099995
4 M2R, Toronto, Ontario, CA, lat: 43.77769500000005, long: -79.44579657299994
5 M4V, Toronto, Ontario, CA, lat: 43.68607377100005, long: -79.40226499999994
6 M5R, Toronto, Ontario, CA, lat: 43.674840000000074, long: -79.40376823999998
7 M5N, Toronto, Ontario, CA, lat: 43.711941154000044, long: -79.41911999999996
8 M1E, Toronto, Ontario, CA, lat: 43.76581500000003, long: -79.17519294699997
9 M9N, Toronto, Ontario, CA, lat: 43.704845000000034, long: -79.51754601599998
10 M4B, Toronto, Ontario, CA, lat: 43.707535000000064, long: -79.31177329699995
11 M2K, Toronto, Ontario, CA, lat: 43.781015000000025, long: -79.38052867199997
12 M1G, Toronto, Ontario, CA, lat: 43.768369121000035, l

In [10]:
torontoTableComplete1 = torontoTableComb1
torontoTableComplete1['Latitude'] = arrLat
torontoTableComplete1['Longitude'] = arrLon

# Compare to sample of entries shown in assignment
torontoTableComplete1[torontoTableComplete1.isin({'PostalCode': ['M5G', 'M2H', 'M4B', 'M1J', 'M4G']})['PostalCode']]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.765815,-79.175193
23,M4G,East York,Leaside,43.70124,-79.349825
24,M5G,Downtown Toronto,Central Bay Street,43.662299,-79.528195
27,M2H,North York,Hillcrest Village,43.656091,-79.38493
32,M1J,Scarborough,Scarborough Village,43.747895,-79.399919


### 2.2 Using CSV file

In [11]:
# Read in CSV
dfLatLong = pd.read_csv('https://cocl.us/Geospatial_data')

In [12]:
# Rename column for join
dfLatLong.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)

In [13]:
# Merge dataframes
torontoTableComplete2 = torontoTableComb2.merge(dfLatLong, on='PostalCode')

In [14]:
# Compare to sample of entries shown in assignment
torontoTableComplete2[torontoTableComplete2.isin({'PostalCode': ['M5G', 'M2H', 'M4B', 'M1J', 'M4G']})['PostalCode']]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
10,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
27,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
30,M2H,North York,Hillcrest Village,43.803762,-79.363452
45,M4G,East York,Leaside,43.70906,-79.363452
48,M1J,Scarborough,Scarborough Village,43.744734,-79.239476


### 2.3 Show dataframe

In [15]:
# The results from ArcGIS differ from the values in the CSV file.
# I will only show the values from the CSV

# Show entire dataframe
pd.set_option('display.max_colwidth', -1, 'display.max_rows', 150)
# Show
torontoTableComplete2

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3N,North York,Downsview Northwest,43.761631,-79.520999
1,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752
2,M9P,Etobicoke,Westmount,43.696319,-79.532242
3,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richview Gardens, St. Phillips",43.688905,-79.554724
4,M2R,North York,Willowdale West,43.782736,-79.442259
5,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West",43.686412,-79.400049
6,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678
7,M5N,Central Toronto,Roselawn,43.711695,-79.416936
8,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
9,M9N,York,Weston,43.706876,-79.518188


## <a name="clustering"></a> 3. Explore and Cluster the Neighborhoods

### 3.1 Reduce Data Set

In [16]:
# You can decide to work with only boroughs that contain the word Toronto and then replicate 
# the same analysis we did to the New York City data. It is up to you.

torontoTableReduced = torontoTableComplete2[torontoTableComplete2['Borough'].str.contains('Toronto')].reset_index(drop=True)
torontoTableComplete2.head()

# Just make sure:
#  to add enough Markdown cells to explain what you decided to do and to report any observations you make.
#  to generate maps to visualize your neighborhoods and how they cluster together. 

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3N,North York,Downsview Northwest,43.761631,-79.520999
1,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752
2,M9P,Etobicoke,Westmount,43.696319,-79.532242
3,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richview Gardens, St. Phillips",43.688905,-79.554724
4,M2R,North York,Willowdale West,43.782736,-79.442259


In [17]:
# Check our data
print('The dataframe has {} boroughs, {} neighborhoods and {} postal codes.'.format(
        len(torontoTableReduced['Borough'].unique()),
        len(torontoTableReduced['Neighborhood'].unique()),
        torontoTableReduced.shape[0]
    )
)

The dataframe has 4 boroughs, 38 neighborhoods and 38 postal codes.


### 3.2 Show a Map of Toronto with Neighborhoods from the new Data Set

In [18]:
import folium

# Location of Toronto
locToronto = geolocator.geocode('Toronto, Ontario, CA')


In [19]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[locToronto.latitude, locToronto.longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood, postcode in zip(torontoTableReduced['Latitude'], torontoTableReduced['Longitude'], torontoTableReduced['Borough'], torontoTableReduced['Neighborhood'], torontoTableReduced['PostalCode']):
    #label = '{}, {}'.format(borough, postcode)
    label = neighborhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### 3.3 Explore the Neighbourhood

In [20]:
import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe


CLIENT_ID = '' # removed for upload
CLIENT_SECRET = '' # removed for upload
VERSION = '20180605' # Foursquare API version

In [21]:
radius = 500
LIMIT = 100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [22]:
toronto_venues = getNearbyVenues(names=torontoTableReduced['Neighborhood'],
                                   latitudes=torontoTableReduced['Latitude'],
                                   longitudes=torontoTableReduced['Longitude']
                                  )

Harbourfront East, Toronto Islands, Union Station
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
The Annex, North Midtown, Yorkville
Roselawn
Stn A PO Boxes 25 The Esplanade
Chinatown, Grange Park, Kensington Market
Christie
Brockton, Exhibition Place, Parkdale Village
Central Bay Street
Rosedale
North Toronto West
Harbord, University of Toronto
High Park, The Junction South
Church and Wellesley
The Beaches
Parkdale, Roncesvalles
Business Reply Mail Processing Centre 969 Eastern
Adelaide, King, Richmond
St. James Town
Commerce Court, Victoria Hotel
The Beaches West, India Bazaar
Runnymede, Swansea
Studio District
First Canadian Place, Underground city
Forest Hill North, Forest Hill West
Dovercourt Village, Dufferin
Berczy Park
Harbourfront, Regent Park
Little Portugal, Trinity
Design Exchange, Toronto Dominion Centre
Lawrence Park
Davisville North
Davisville
Ryerson, Garden District
The Danforth West, Riverdale
CN Tower, Bathurst Quay, Island airport, Harbourfront We

### 3.4 Show Foursquare Results

In [23]:
print('Data set size: {}'.format(toronto_venues.shape))
toronto_venues.head()

Data set size: (1713, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,Harbourfront,43.639526,-79.380688,Neighborhood
1,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,Roundhouse Park,43.641745,-79.384279,Park
2,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,iQ Food Co,43.642851,-79.382081,Salad Place
3,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,BeaverTails,43.639736,-79.380068,Dessert Shop
4,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,Aroma Espresso Bar,43.642321,-79.383749,Café


In [24]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,57,57,57,57,57,57
"Brockton, Exhibition Place, Parkdale Village",21,21,21,21,21,21
Business Reply Mail Processing Centre 969 Eastern,18,18,18,18,18,18
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",17,17,17,17,17,17
"Cabbagetown, St. James Town",43,43,43,43,43,43
Central Bay Street,85,85,85,85,85,85
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Christie,16,16,16,16,16,16
Church and Wellesley,85,85,85,85,85,85


### 3.5 Analyse Each Neighborhood

In [25]:
# One hot encoding
torontoOneHot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# Add neighbourhoods
torontoOneHot['Neighborhoods'] = toronto_venues['Neighborhood'] 

# Move neighbourhoods to first column
newColOrder = [torontoOneHot.columns[-1]] + list(torontoOneHot.columns[:-1])
torontoOneHot = torontoOneHot[newColOrder]

torontoOneHot.head()

Unnamed: 0,Neighborhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Transportation Service,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Harbourfront East, Toronto Islands, Union Station",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Harbourfront East, Toronto Islands, Union Station",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Harbourfront East, Toronto Islands, Union Station",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Harbourfront East, Toronto Islands, Union Station",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Harbourfront East, Toronto Islands, Union Station",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### 3.6 Group Rows by Neighbourhoods

In [26]:
torontoGrouped = torontoOneHot.groupby('Neighborhoods').mean().reset_index()
torontoGrouped.head()

Unnamed: 0,Neighborhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Transportation Service,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556
4,"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 3.7  Most Common Venues

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhoods']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhoods'] = torontoGrouped['Neighborhoods']

for ind in np.arange(torontoGrouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(torontoGrouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Thai Restaurant,American Restaurant,Bar,Steakhouse,Hotel,Restaurant,Burger Joint,Asian Restaurant
1,Berczy Park,Coffee Shop,Cocktail Bar,Seafood Restaurant,Steakhouse,Bakery,Café,Beer Bar,Cheese Shop,Farmers Market,Park
2,"Brockton, Exhibition Place, Parkdale Village",Café,Breakfast Spot,Coffee Shop,Bakery,Climbing Gym,Bar,Furniture / Home Store,Stadium,Italian Restaurant,Caribbean Restaurant
3,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Spa,Garden Center,Garden,Fast Food Restaurant,Farmers Market,Park,Comic Shop,Recording Studio
4,"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",Airport Service,Airport Lounge,Airport Terminal,Plane,Harbor / Marina,Coffee Shop,Sculpture Garden,Boat or Ferry,Bar,Boutique


### 3.8 Cluster Neighbourhoods

In [29]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import numpy as np

# set number of clusters
# I chose a high number to see an effect (otherwise most are in the same cluster)
kclusters = 10

torontoGroupedClustering = torontoGrouped.drop('Neighborhoods', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(torontoGroupedClustering)

# check cluster labels generated for each row in the dataframe
plt.hist(kmeans.labels_, bins=np.arange(0,kclusters+1))
plt.xlabel('Cluster Label')
plt.ylabel('Frequency')
plt.title('Distribution of Clusters')
plt.xticks(np.arange(0,kclusters) + 0.5, list(np.arange(0,kclusters)))
plt.show()

<Figure size 640x480 with 1 Axes>

In [30]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [31]:
toronto_merged = torontoTableReduced
toronto_merged = toronto_merged.rename(columns={'Neighborhood': 'Neighborhoods'})

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhoods'), on='Neighborhoods')

toronto_merged.head(10) # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhoods,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,6,Coffee Shop,Aquarium,Hotel,Café,Fried Chicken Joint,Scenic Lookout,Brewery,Baseball Stadium,Pizza Place,Italian Restaurant
1,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West",43.686412,-79.400049,6,Pub,Coffee Shop,Bagel Shop,American Restaurant,Supermarket,Restaurant,Sports Bar,Sushi Restaurant,Fried Chicken Joint,Pizza Place
2,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,1,Coffee Shop,Café,Sandwich Place,Pizza Place,Pharmacy,BBQ Joint,Pub,Donut Shop,Martial Arts Dojo,Burger Joint
3,M5N,Central Toronto,Roselawn,43.711695,-79.416936,2,Home Service,Garden,Yoga Studio,Doner Restaurant,Fish & Chips Shop,Filipino Restaurant,Festival,Fast Food Restaurant,Farmers Market,Falafel Restaurant
4,M5W,Downtown Toronto,Stn A PO Boxes 25 The Esplanade,43.646435,-79.374846,6,Coffee Shop,Restaurant,Café,Cocktail Bar,Hotel,Beer Bar,Fast Food Restaurant,Italian Restaurant,Seafood Restaurant,Gym
5,M5T,Downtown Toronto,"Chinatown, Grange Park, Kensington Market",43.653206,-79.400049,6,Café,Vegetarian / Vegan Restaurant,Chinese Restaurant,Bar,Vietnamese Restaurant,Bakery,Mexican Restaurant,Coffee Shop,Dumpling Restaurant,Arts & Crafts Store
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564,9,Café,Grocery Store,Park,Convenience Store,Restaurant,Italian Restaurant,Baby Store,Diner,Athletics & Sports,Nightclub
7,M6K,West Toronto,"Brockton, Exhibition Place, Parkdale Village",43.636847,-79.428191,6,Café,Breakfast Spot,Coffee Shop,Bakery,Climbing Gym,Bar,Furniture / Home Store,Stadium,Italian Restaurant,Caribbean Restaurant
8,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,6,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Burger Joint,Ice Cream Shop,Japanese Restaurant,Salad Place,Bubble Tea Shop,Spa
9,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,5,Park,Trail,Playground,Building,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant


### 3.9 Show clustered Neighbourhoods on the Map

In [32]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map of New York using latitude and longitude values
map_toronto_clust = folium.Map(location=[locToronto.latitude, locToronto.longitude], zoom_start=11)

# color map
colors_array = cm.rainbow(np.linspace(0, 1, kclusters))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to map
for lat, lng, neighborhood, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhoods'], toronto_merged['Cluster Labels']):
    #label = '{}, {}'.format(borough, postcode)
    label = neighborhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color = rainbow[cluster-1],
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto