### Download all dependencies

In [1]:
import pandas as pd # primary data structure library
import numpy as np  # useful for many scientific computing in Pyth
wiki = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
Data_table = wiki[0]
Data_table.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


### Remove cells with a borough that is not assigned

In [2]:

Data_table_temp=Data_table.drop(Data_table[Data_table["Borough"] == "Not assigned"].index)
Data_table_temp.head()

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


### Aggreagte rows with the same postal code

In [3]:

Data_table_temp1 = Data_table_temp.groupby(["Postal code", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
Data_table_temp1.head()


Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Check If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

In [4]:
Data_table_temp1.loc[Data_table_temp1['Neighborhood'] == 'Not assigned', 'Neighborhood'] = Data_table_temp1.loc[Data_table_temp1['Neighborhood'] == 'Not assigned', 'Borough']

print(Data_table_temp1.loc[Data_table_temp1['Neighborhood'] == 'Not assigned'])

Empty DataFrame
Columns: [Postal code, Borough, Neighborhood]
Index: []


In [5]:

Data_table_temp1.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [6]:

df = Data_table_temp1
print("Your table has {} rows.".format(df.shape[0]))

Your table has 103 rows.


In [7]:
# install required package
!pip install geocoder 



In [8]:
import geocoder # import geocoder

In [9]:
coordinates = pd.read_csv('https://cocl.us/Geospatial_data')
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:

coordinates.rename(columns={"Postal Code": "Postal code"}, inplace=True)
coordinates.head()

Unnamed: 0,Postal code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
Data_table_temp1_new = Data_table_temp1.merge(coordinates, on="Postal code", how="left")
Data_table_temp1_new.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [12]:
column_names = ["Postal code", "Borough", "Neighborhood", "Latitude", "Longitude"]
test_df = pd.DataFrame(columns=column_names)

test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in test_list:
    test_df = test_df.append(Data_table_temp1_new[Data_table_temp1_new["Postal code"]==postcode], ignore_index=True)
    
test_df

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
1,M2H,North York,Hillcrest Village,43.803762,-79.363452
2,M4B,East York,Parkview Hill / Woodbine Gardens,43.706397,-79.309937
3,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
4,M4G,East York,Leaside,43.70906,-79.363452
5,M4M,East Toronto,Studio District,43.659526,-79.340923
6,M1R,Scarborough,Wexford / Maryvale,43.750072,-79.295849
7,M9V,Etobicoke,South Steeles / Silverstone / Humbergate / Jam...,43.739416,-79.588437
8,M9L,North York,Humber Summit,43.756303,-79.565963
9,M5V,Downtown Toronto,CN Tower / King and Spadina / Railway Lands / ...,43.628947,-79.39442


### Export the df_with_cord as a csv file for using in next assignment.

In [13]:
test_df.to_csv(r'Canada_postcode_geospatial_agg.csv', index=None, header=True)

In [14]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

Solving environment: done

# All requested packages already installed.



In [15]:
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

Solving environment: done

# All requested packages already installed.



### Lat and Lon

In [18]:
address = 'Toronto'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Create a map and markers

In [20]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(Data_table_temp1_new['Latitude'], Data_table_temp1_new['Longitude'], Data_table_temp1_new['Borough'], Data_table_temp1_new['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

### Toronto Neaighbourhoods 

In [21]:
borough_names = list(Data_table_temp1_new.Borough.unique())

borough_with_toronto = []

for x in borough_names:
    if "toronto" in x.lower():
        borough_with_toronto.append(x)
        
borough_with_toronto

['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']

In [23]:

# create a new DataFrame with only boroughs that contain the word Toronto
Data_table_temp1_new = Data_table_temp1_new[Data_table_temp1_new['Borough'].isin(borough_with_toronto)].reset_index(drop=True)
print(Data_table_temp1_new.shape)
Data_table_temp1_new.head()

(39, 5)


Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,The Danforth West / Riverdale,43.679557,-79.352188
2,M4L,East Toronto,India Bazaar / The Beaches West,43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [24]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(Data_table_temp1_new['Latitude'], Data_table_temp1_new['Longitude'], Data_table_temp1_new['Borough'], Data_table_temp1_new['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

### Foursquare Credentials and Version

In [25]:
CLIENT_ID = 'ZN0KGBMSE3FT02FXQY4ZPQIJAX5F0DS03A3HBEL2THBSCJ2K' # your Foursquare ID
CLIENT_SECRET = 'XRNPINCGNOA2FUACSUDR4BJCRJTEDZJM1QPUTFUCIQ2DAUJY'  # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ZN0KGBMSE3FT02FXQY4ZPQIJAX5F0DS03A3HBEL2THBSCJ2K
CLIENT_SECRET:XRNPINCGNOA2FUACSUDR4BJCRJTEDZJM1QPUTFUCIQ2DAUJY


In [29]:
import requests

In [34]:
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(Data_table_temp1_new['Latitude'], Data_table_temp1_new['Longitude'], Data_table_temp1_new['Postal code'], Data_table_temp1_new['Borough'], 
                                                  Data_table_temp1_new['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id=ZN0KGBMSE3FT02FXQY4ZPQIJAX5F0DS03A3HBEL2THBSCJ2K&client_secret=XRNPINCGNOA2FUACSUDR4BJCRJTEDZJM1QPUTFUCIQ2DAUJY&v=20180605 \
     &ll=43.653963,-79.387207&radius=500&limit=100".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [37]:
#Convert venues list to new dataframe
venues_df = pd.DataFrame(venues)


venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1716, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,Downtown Toronto,43.653232,-79.385296,Neighborhood
1,M4E,East Toronto,The Beaches,43.676357,-79.293031,Textile Museum of Canada,43.654396,-79.3865,Art Museum
2,M4E,East Toronto,The Beaches,43.676357,-79.293031,Cafe Plenty,43.654571,-79.38945,Café
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,Sansotei Ramen 三草亭,43.655157,-79.386501,Ramen Restaurant
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,Japango,43.655268,-79.385165,Sushi Restaurant


In [36]:
#  many venues returned
venues_df.groupby(["PostalCode", "Borough", "Neighborhood"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
PostalCode,Borough,Neighborhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M4E,East Toronto,The Beaches,44,44,44,44,44,44
M4K,East Toronto,The Danforth West / Riverdale,44,44,44,44,44,44
M4L,East Toronto,India Bazaar / The Beaches West,44,44,44,44,44,44
M4M,East Toronto,Studio District,44,44,44,44,44,44
M4N,Central Toronto,Lawrence Park,44,44,44,44,44,44
M4P,Central Toronto,Davisville North,44,44,44,44,44,44
M4R,Central Toronto,North Toronto West,44,44,44,44,44,44
M4S,Central Toronto,Davisville,44,44,44,44,44,44
M4T,Central Toronto,Moore Park / Summerhill East,44,44,44,44,44,44
M4V,Central Toronto,Summerhill West / Rathnelly / South Hill / Forest Hill SE / Deer Park,44,44,44,44,44,44


In [38]:
# Anslysing Data
# one hot encoding
toronto_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe
toronto_onehot['PostalCode'] = venues_df['PostalCode'] 
toronto_onehot['Borough'] = venues_df['Borough'] 
toronto_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(1716, 37)


Unnamed: 0,PostalCode,Borough,Neighborhoods,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Bar,Breakfast Spot,Bubble Tea Shop,...,Plaza,Pub,Ramen Restaurant,Restaurant,Salon / Barbershop,Smoke Shop,Sushi Restaurant,Tapas Restaurant,University,Vegetarian / Vegan Restaurant
0,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M4E,East Toronto,The Beaches,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [39]:
#Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

toronto_grouped = toronto_onehot.groupby(["PostalCode", "Borough", "Neighborhoods"]).mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped

(39, 37)


Unnamed: 0,PostalCode,Borough,Neighborhoods,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Bar,Breakfast Spot,Bubble Tea Shop,...,Plaza,Pub,Ramen Restaurant,Restaurant,Salon / Barbershop,Smoke Shop,Sushi Restaurant,Tapas Restaurant,University,Vegetarian / Vegan Restaurant
0,M4E,East Toronto,The Beaches,0.022727,0.068182,0.022727,0.022727,0.022727,0.045455,0.022727,...,0.022727,0.022727,0.022727,0.022727,0.022727,0.022727,0.045455,0.022727,0.022727,0.022727
1,M4K,East Toronto,The Danforth West / Riverdale,0.022727,0.068182,0.022727,0.022727,0.022727,0.045455,0.022727,...,0.022727,0.022727,0.022727,0.022727,0.022727,0.022727,0.045455,0.022727,0.022727,0.022727
2,M4L,East Toronto,India Bazaar / The Beaches West,0.022727,0.068182,0.022727,0.022727,0.022727,0.045455,0.022727,...,0.022727,0.022727,0.022727,0.022727,0.022727,0.022727,0.045455,0.022727,0.022727,0.022727
3,M4M,East Toronto,Studio District,0.022727,0.068182,0.022727,0.022727,0.022727,0.045455,0.022727,...,0.022727,0.022727,0.022727,0.022727,0.022727,0.022727,0.045455,0.022727,0.022727,0.022727
4,M4N,Central Toronto,Lawrence Park,0.022727,0.068182,0.022727,0.022727,0.022727,0.045455,0.022727,...,0.022727,0.022727,0.022727,0.022727,0.022727,0.022727,0.045455,0.022727,0.022727,0.022727
5,M4P,Central Toronto,Davisville North,0.022727,0.068182,0.022727,0.022727,0.022727,0.045455,0.022727,...,0.022727,0.022727,0.022727,0.022727,0.022727,0.022727,0.045455,0.022727,0.022727,0.022727
6,M4R,Central Toronto,North Toronto West,0.022727,0.068182,0.022727,0.022727,0.022727,0.045455,0.022727,...,0.022727,0.022727,0.022727,0.022727,0.022727,0.022727,0.045455,0.022727,0.022727,0.022727
7,M4S,Central Toronto,Davisville,0.022727,0.068182,0.022727,0.022727,0.022727,0.045455,0.022727,...,0.022727,0.022727,0.022727,0.022727,0.022727,0.022727,0.045455,0.022727,0.022727,0.022727
8,M4T,Central Toronto,Moore Park / Summerhill East,0.022727,0.068182,0.022727,0.022727,0.022727,0.045455,0.022727,...,0.022727,0.022727,0.022727,0.022727,0.022727,0.022727,0.045455,0.022727,0.022727,0.022727
9,M4V,Central Toronto,Summerhill West / Rathnelly / South Hill / For...,0.022727,0.068182,0.022727,0.022727,0.022727,0.045455,0.022727,...,0.022727,0.022727,0.022727,0.022727,0.022727,0.022727,0.045455,0.022727,0.022727,0.022727


In [40]:
#New dataframe and display the top 10 venues for each PostalCode
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['PostalCode', 'Borough', 'Neighborhoods']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighborhoods'] = toronto_grouped['Neighborhoods']

for ind in np.arange(toronto_grouped.shape[0]):
    row_categories = toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted


(39, 13)


Unnamed: 0,PostalCode,Borough,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
1,M4K,East Toronto,The Danforth West / Riverdale,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
2,M4L,East Toronto,India Bazaar / The Beaches West,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
3,M4M,East Toronto,Studio District,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
4,M4N,Central Toronto,Lawrence Park,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
5,M4P,Central Toronto,Davisville North,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
6,M4R,Central Toronto,North Toronto West,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
7,M4S,Central Toronto,Davisville,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
8,M4T,Central Toronto,Moore Park / Summerhill East,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
9,M4V,Central Toronto,Summerhill West / Rathnelly / South Hill / For...,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub


### Clustering

In [42]:
from sklearn.cluster import KMeans

In [43]:
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop(["PostalCode", "Borough", "Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

  return_n_iter=True)


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [47]:
#create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
toronto_merged = Data_table_temp1_new.copy()

# add clustering labels
toronto_merged["Cluster Labels"] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(["Borough", "Neighborhoods"], 1).set_index("PostalCode"), on="Postal code")

print(toronto_merged.shape)
toronto_merged.head() # check the last columns!

(39, 16)


Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
1,M4K,East Toronto,The Danforth West / Riverdale,43.679557,-79.352188,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
2,M4L,East Toronto,India Bazaar / The Beaches West,43.668999,-79.315572,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub


In [48]:

# sort the results by Cluster Labels
print(toronto_merged.shape)
toronto_merged.sort_values(["Cluster Labels"], inplace=True)
toronto_merged

(39, 16)


Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
21,M5L,Downtown Toronto,Commerce Court / Victoria Hotel,43.648198,-79.379817,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
22,M5N,Central Toronto,Roselawn,43.711695,-79.416936,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
23,M5P,Central Toronto,Forest Hill North & West,43.696948,-79.411307,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
24,M5R,Central Toronto,The Annex / North Midtown / Yorkville,43.67271,-79.405678,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
25,M5S,Downtown Toronto,University of Toronto / Harbord,43.662696,-79.400049,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
26,M5T,Downtown Toronto,Kensington Market / Chinatown / Grange Park,43.653206,-79.400049,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
27,M5V,Downtown Toronto,CN Tower / King and Spadina / Railway Lands / ...,43.628947,-79.39442,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
20,M5K,Downtown Toronto,Toronto Dominion Centre / Design Exchange,43.647177,-79.381576,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
28,M5W,Downtown Toronto,Stn A PO Boxes,43.646435,-79.374846,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub


### Ckuster Visualisation

In [56]:
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

In [59]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Postal code'], toronto_merged['Borough'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
    map_clusters
       

### Cluster 1

In [60]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + \
                                                                                 list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
21,Downtown Toronto,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
22,Central Toronto,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
23,Central Toronto,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
24,Central Toronto,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
25,Downtown Toronto,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
26,Downtown Toronto,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
27,Downtown Toronto,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
20,Downtown Toronto,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub
28,Downtown Toronto,0,Coffee Shop,Art Gallery,Café,Sushi Restaurant,Breakfast Spot,Japanese Restaurant,Vegetarian / Vegan Restaurant,Donut Shop,Gift Shop,Gastropub


### Cluster 2

In [61]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + \
                                                                                 list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


### Cluster 3

In [62]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + \
                                                                                 list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


### Cluster 4

In [63]:
    toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + \
                                                                                 list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


### Cluster 5

In [64]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + \
                                                                                 list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


### Conclusion