# IBM Coursera Week 3




## Part 1: Loading Toronto's data on boroughs and neighborhoods

### 1.1. Transform data from wiki into a dataframe

In [240]:
import requests # to request website
import pandas as pd
import numpy as np
import csv # for saving table as a csv file

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library


tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
CanadaDS = tables[0]

In [241]:
CanadaDS.shape

(289, 3)

In [242]:
CanadaDS.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### 1.2. Replace "Not assigned" for NaN in Borough column

In [243]:
CanadaDS.replace(to_replace='Not assigned', value=np.NaN, inplace=True)
CanadaDS.dropna(axis=0, subset=['Borough'], inplace=True)
CanadaDS.reset_index(drop = True, inplace = True)
CanadaDS.head(7)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,


In [244]:
CanadaDS.shape

(212, 3)

### 1.3. Replace Nan values in Neighbourhood column with Borough values

In [245]:
Index = CanadaDS['Neighbourhood'].isna()
#print (CanadaDS['Borough'].loc[Index]) # just one replacement
CanadaDS['Neighbourhood'].loc[Index] = CanadaDS['Borough'].loc[Index]
CanadaDS.shape

(212, 3)

In [246]:
CanadaDS.head(7)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Queen's Park


### 1.4. 'Join' Postcode that have same values in 'Neighborhood'

In [247]:

CanadaDS = (CanadaDS.groupby(['Postcode','Borough'])['Neighbourhood']
       .apply(lambda x: ','.join(set(x.dropna())))
       .reset_index())

CanadaDS = pd.DataFrame(CanadaDS)
CanadaDS.head(7)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern,Rouge"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Morningside,Guildwood,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Ionview,Kennedy Park,East Birchmount Park"


## 1.5. Dataframe Shape

In [248]:
CanadaDS.shape

(103, 3)

# Part 2: Getting the latitudes and longitudes 

### Google Maps Geocoding API can be unreliable so we will use the csv that Coursera provided ("geospatial coordinates" csv file)


## 2.1. Get coordinates from csv file

In [249]:
!wget -q -O 'Geospatial_Coordinates.csv' https://cocl.us/Geospatial_data

## 2.2. Concatenate coordinates with dataset

In [250]:
Geospatial_Coordinates = pd.read_csv('Geospatial_Coordinates.csv', sep = ',') 
Geospatial_Coordinates.shape


(103, 3)

In [251]:
Geospatial_Coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [252]:
Geo = pd.DataFrame(Geospatial_Coordinates)
CanadaDS = pd.concat([CanadaDS, Geo], axis=1)
CanadaDS = CanadaDS.drop(['Postal Code'], axis = 1)
CanadaDS.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern,Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Morningside,Guildwood,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [253]:
CanadaDS.shape

(103, 5)

# Part 3 - Mapping

### Picking Boroughs that contain the string 'Toronto'

#### Removed all other Boroughs

In [254]:
CanadaDS = CanadaDS[CanadaDS.Borough.str.contains("Toronto")].reset_index(drop = True)

In [255]:
CanadaDS.shape

(38, 5)

In [256]:
CanadaDS.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"Riverdale,The Danforth West",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


## 3.1. Find the Geolocation of Toronto to plot in Folium

In [257]:
# library to handle JSON files 
import json 
# convert an address into latitude and longitude values
from geopy.geocoders import Nominatim
# tranform JSON file into a pandas dataframe
from pandas.io.json import json_normalize 

# import k-means from clustering stage
from sklearn.cluster import KMeans
import folium # map rendering library
print('Libraries imported!')

Libraries imported!


In [258]:
address = 'Toronto'
geolocator = Nominatim(user_agent="tr_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


## 3.2. Map of Toronto with the neighborhoods¶

In [259]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude+0.03, longitude], zoom_start=12)
    
# add markers to map
for lat, lng, borough, neighborhood in zip(CanadaDS['Latitude'], CanadaDS['Longitude'], CanadaDS['Borough'], CanadaDS['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.6).add_to(map_toronto)  
map_toronto

## 3.3. Using Foursquare API to explore the neighborhoods

### 3.3.1. Foursquare Credentials

In [260]:
# define Foursquare Credentials and Version
CLIENT_ID = 'ZEQE0BHD1SOI5MBDCI4TB3JSZQW3OZPUWSPTKA2E5L4QXPFZ' # your Foursquare ID
CLIENT_SECRET = 'LOIDIU3IZI1EDUMZKZRVDIT2MPMXMOB4344PEDRSVDQ2TKRF' # your Foursquare Secret
VERSION = '20190219' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ZEQE0*******************************************
CLIENT_SECRET:LOIDI*******************************************


### 3.3.2. Getting the top 100 venues that are within a radius of 500 meters.

In [261]:
# getting the top 100 venues that are within a radius of 500 meters.

radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(CanadaDS['Latitude'], CanadaDS['Longitude'], CanadaDS['Postcode'], CanadaDS['Borough'], CanadaDS['Neighbourhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

### 3.3.3. Converting the Venues into a new DataFrame

In [262]:
# converting the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# column names of venues dataframe
venues_df.columns = ['Postcode', 'Borough', 'Neighbourhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1697, 9)


Unnamed: 0,Postcode,Borough,Neighbourhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
1,M4E,East Toronto,The Beaches,43.676357,-79.293031,Starbucks,43.678798,-79.298045,Coffee Shop
2,M4E,East Toronto,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,Crazy Sexy Cool,43.680391,-79.290656,Boutique
4,M4K,East Toronto,"Riverdale,The Danforth West",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


### 3.3.4. Venue categories

In [263]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 234 uniques categories.


In [264]:
venues_unique_DF = venues_df['VenueCategory'].unique()
venues_unique_DF

array(['Pub', 'Coffee Shop', 'Neighborhood', 'Boutique',
       'Greek Restaurant', 'Ice Cream Shop', 'Cosmetics Shop',
       'Italian Restaurant', 'Yoga Studio', 'Health Food Store',
       'Brewery', 'Fruit & Vegetable Store', 'Pizza Place', 'Restaurant',
       'Juice Bar', 'Trail', 'Bookstore', 'Diner', 'Dessert Shop',
       'Bubble Tea Shop', 'Indian Restaurant', 'Spa', 'Grocery Store',
       'Bakery', 'Caribbean Restaurant', 'American Restaurant',
       'Liquor Store', 'Furniture / Home Store', 'Burger Joint', 'Gym',
       'Fish & Chips Shop', 'Park', 'Sushi Restaurant', 'Steakhouse',
       'Burrito Place', 'Pet Store', 'Fast Food Restaurant',
       'Movie Theater', 'Sandwich Place', 'Light Rail Station',
       'Food & Drink Shop', 'Fish Market', 'Cheese Shop', 'Café',
       'Comfort Food Restaurant', 'Chinese Restaurant',
       'Middle Eastern Restaurant', 'Stationery Store',
       'New American Restaurant', 'Seafood Restaurant', 'Coworking Space',
       'Music Store

### 3.3.4. Number/type of Venues for each PostalCode

In [265]:
venues_df.groupby(["Postcode", "Borough", "Neighbourhood"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Postcode,Borough,Neighbourhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M4E,East Toronto,The Beaches,4,4,4,4,4,4
M4K,East Toronto,"Riverdale,The Danforth West",42,42,42,42,42,42
M4L,East Toronto,"The Beaches West,India Bazaar",20,20,20,20,20,20
M4M,East Toronto,Studio District,41,41,41,41,41,41
M4N,Central Toronto,Lawrence Park,5,5,5,5,5,5
M4P,Central Toronto,Davisville North,8,8,8,8,8,8
M4R,Central Toronto,North Toronto West,22,22,22,22,22,22
M4S,Central Toronto,Davisville,32,32,32,32,32,32
M4T,Central Toronto,"Summerhill East,Moore Park",3,3,3,3,3,3
M4V,Central Toronto,"Summerhill West,Forest Hill SE,Deer Park,South Hill,Rathnelly",15,15,15,15,15,15


In [266]:

# one hot encoding
toronto_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe
toronto_onehot['Postcode'] = venues_df['Postcode'] 
toronto_onehot['Borough'] = venues_df['Borough'] 
toronto_onehot['Neighbourhood'] = venues_df['Neighbourhood'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head(2)

(1697, 237)


Unnamed: 0,Postcode,Borough,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### 3.3.5 Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [267]:
toronto_grouped = toronto_onehot.groupby(["Postcode", "Borough", "Neighbourhood"]).mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped.head(5)

(38, 237)


Unnamed: 0,Postcode,Borough,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,M4E,East Toronto,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,East Toronto,"Riverdale,The Danforth West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381
2,M4L,East Toronto,"The Beaches West,India Bazaar",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,East Toronto,Studio District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439
4,M4N,Central Toronto,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 3.3.6. Creating the new dataframe and display the top 10 venues for each Post Code.

In [268]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['Postcode', 'Borough', 'Neighbourhood']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postcode'] = toronto_grouped['Postcode']
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    row_categories = toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted.head()

(38, 13)


Unnamed: 0,Postcode,Borough,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,Neighborhood,Boutique,Coffee Shop,Pub,Dog Run,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space
1,M4K,East Toronto,"Riverdale,The Danforth West",Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Italian Restaurant,Cosmetics Shop,Brewery,Bubble Tea Shop,Restaurant,Caribbean Restaurant
2,M4L,East Toronto,"The Beaches West,India Bazaar",Sandwich Place,Liquor Store,Light Rail Station,Sushi Restaurant,Italian Restaurant,Brewery,Pub,Ice Cream Shop,Movie Theater,Steakhouse
3,M4M,East Toronto,Studio District,Café,Coffee Shop,Italian Restaurant,Bakery,American Restaurant,Cheese Shop,Juice Bar,Bookstore,Fish Market,Skate Park
4,M4N,Central Toronto,Lawrence Park,Park,Lake,Dim Sum Restaurant,Swim School,Bus Line,Yoga Studio,Donut Shop,Filipino Restaurant,Fast Food Restaurant,Farmers Market


### 3.3.7. Apply k-means to cluster the Toronto areas into 5 clusters.

In [269]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop(["Postcode", "Borough", "Neighbourhood"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([4, 0, 2, 0, 2, 2, 0, 0, 3, 4], dtype=int32)

### 3.3.8. 10 venues for each Neighbourhood

In [270]:
# creating a new dataframe that includes the cluster as well as the top 10 venues for each neighbourhood.
toronto_merged = CanadaDS.copy()

# adding clustering labels
toronto_merged["Cluster Labels"] = kmeans.labels_

# merging toronto_grouped with toronto_data to add latitude/longitude for each neighbourhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(["Borough", "Neighbourhood"], 1).set_index("Postcode"), on="Postcode")

print(toronto_merged.shape)
toronto_merged.head() # check the last columns!

(38, 16)


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Neighborhood,Boutique,Coffee Shop,Pub,Dog Run,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space
1,M4K,East Toronto,"Riverdale,The Danforth West",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Italian Restaurant,Cosmetics Shop,Brewery,Bubble Tea Shop,Restaurant,Caribbean Restaurant
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572,2,Sandwich Place,Liquor Store,Light Rail Station,Sushi Restaurant,Italian Restaurant,Brewery,Pub,Ice Cream Shop,Movie Theater,Steakhouse
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Italian Restaurant,Bakery,American Restaurant,Cheese Shop,Juice Bar,Bookstore,Fish Market,Skate Park
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,2,Park,Lake,Dim Sum Restaurant,Swim School,Bus Line,Yoga Studio,Donut Shop,Filipino Restaurant,Fast Food Restaurant,Farmers Market


In [271]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['Postcode', 'Borough', 'Neighbourhood']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postcode'] = toronto_grouped['Postcode']
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    row_categories = toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

# neighbourhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted.head()

(38, 13)


Unnamed: 0,Postcode,Borough,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,Neighborhood,Boutique,Coffee Shop,Pub,Dog Run,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space
1,M4K,East Toronto,"Riverdale,The Danforth West",Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Italian Restaurant,Cosmetics Shop,Brewery,Bubble Tea Shop,Restaurant,Caribbean Restaurant
2,M4L,East Toronto,"The Beaches West,India Bazaar",Sandwich Place,Liquor Store,Light Rail Station,Sushi Restaurant,Italian Restaurant,Brewery,Pub,Ice Cream Shop,Movie Theater,Steakhouse
3,M4M,East Toronto,Studio District,Café,Coffee Shop,Italian Restaurant,Bakery,American Restaurant,Cheese Shop,Juice Bar,Bookstore,Fish Market,Skate Park
4,M4N,Central Toronto,Lawrence Park,Park,Lake,Dim Sum Restaurant,Swim School,Bus Line,Yoga Studio,Donut Shop,Filipino Restaurant,Fast Food Restaurant,Farmers Market


In [272]:
# sorting the results by Cluster Labels
print(toronto_merged.shape)
toronto_merged.sort_values(["Cluster Labels"], inplace=True)
toronto_merged.head(5)

(38, 16)


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,M5H,Downtown Toronto,"Adelaide,Richmond,King",43.650571,-79.384568,0,Coffee Shop,Café,American Restaurant,Steakhouse,Thai Restaurant,Clothing Store,Gym,Restaurant,Hotel,Bar
25,M5S,Downtown Toronto,"University of Toronto,Harbord",43.662696,-79.400049,0,Café,Bar,Japanese Restaurant,Bookstore,Restaurant,Bakery,Coffee Shop,Chinese Restaurant,Beer Bar,Beer Store
24,M5R,Central Toronto,"Yorkville,The Annex,North Midtown",43.67271,-79.405678,0,Coffee Shop,Café,Sandwich Place,Pizza Place,Pharmacy,Indian Restaurant,Cosmetics Shop,Pub,Burger Joint,Jewish Restaurant
29,M5X,Downtown Toronto,"Underground city,First Canadian Place",43.648429,-79.38228,0,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Gastropub,Seafood Restaurant,Bar,Bakery,Steakhouse
21,M5L,Downtown Toronto,"Commerce Court,Victoria Hotel",43.648198,-79.379817,0,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Gastropub,Deli / Bodega,Seafood Restaurant,Bakery,Steakhouse


### 3.3.9. Visualizing the resulting clusters


In [273]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Postcode'], toronto_merged['Borough'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### 3.3.10. Examine Clusters

In [274]:
#Cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Downtown Toronto,0,Coffee Shop,Café,American Restaurant,Steakhouse,Thai Restaurant,Clothing Store,Gym,Restaurant,Hotel,Bar
25,Downtown Toronto,0,Café,Bar,Japanese Restaurant,Bookstore,Restaurant,Bakery,Coffee Shop,Chinese Restaurant,Beer Bar,Beer Store
24,Central Toronto,0,Coffee Shop,Café,Sandwich Place,Pizza Place,Pharmacy,Indian Restaurant,Cosmetics Shop,Pub,Burger Joint,Jewish Restaurant
29,Downtown Toronto,0,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Gastropub,Seafood Restaurant,Bar,Bakery,Steakhouse
21,Downtown Toronto,0,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Gastropub,Deli / Bodega,Seafood Restaurant,Bakery,Steakhouse
20,Downtown Toronto,0,Coffee Shop,Café,Hotel,American Restaurant,Restaurant,Deli / Bodega,Gastropub,Italian Restaurant,Bar,Burger Joint
19,Downtown Toronto,0,Coffee Shop,Hotel,Aquarium,Café,Pizza Place,Brewery,Restaurant,Scenic Lookout,Italian Restaurant,Bakery
36,West Toronto,0,Coffee Shop,Sushi Restaurant,Café,Pizza Place,Italian Restaurant,Diner,Smoothie Shop,Bookstore,Sandwich Place,Burrito Place
17,Downtown Toronto,0,Coffee Shop,Café,Italian Restaurant,Burger Joint,Bar,Middle Eastern Restaurant,Thai Restaurant,Salad Place,Bubble Tea Shop,Spa
16,Downtown Toronto,0,Coffee Shop,Restaurant,Cocktail Bar,Bakery,Farmers Market,Cheese Shop,Seafood Restaurant,Italian Restaurant,Beer Bar,Café


In [275]:
#Cluster 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Central Toronto,1,Home Service,Garden,Yoga Studio,Doner Restaurant,Fish & Chips Shop,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space


In [276]:
#cluster 3
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,East Toronto,2,Light Rail Station,Pizza Place,Garden,Comic Shop,Recording Studio,Restaurant,Butcher,Burrito Place,Brewery,Skate Park
23,Central Toronto,2,Mexican Restaurant,Trail,Sushi Restaurant,Jewelry Store,Yoga Studio,Doner Restaurant,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant
5,Central Toronto,2,Restaurant,Breakfast Spot,Sandwich Place,Burger Joint,Hotel,Food & Drink Shop,Gym,Park,Falafel Restaurant,Event Space
4,Central Toronto,2,Park,Lake,Dim Sum Restaurant,Swim School,Bus Line,Yoga Studio,Donut Shop,Filipino Restaurant,Fast Food Restaurant,Farmers Market
2,East Toronto,2,Sandwich Place,Liquor Store,Light Rail Station,Sushi Restaurant,Italian Restaurant,Brewery,Pub,Ice Cream Shop,Movie Theater,Steakhouse
27,Downtown Toronto,2,Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Airport Gate,Harbor / Marina,Airport


In [277]:
#cluster 4
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Downtown Toronto,3,Park,Playground,Trail,Yoga Studio,Discount Store,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant
8,Central Toronto,3,Trail,Playground,Gym,Ethiopian Restaurant,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Yoga Studio


In [278]:
#cluster 5
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Central Toronto,4,Convenience Store,Coffee Shop,Pub,Pizza Place,American Restaurant,Sports Bar,Bagel Shop,Supermarket,Sushi Restaurant,Fried Chicken Joint
0,East Toronto,4,Neighborhood,Boutique,Coffee Shop,Pub,Dog Run,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space


In [279]:
toronto_merged[['Neighbourhood','Cluster Labels']].groupby('Cluster Labels').count()

Unnamed: 0_level_0,Neighbourhood
Cluster Labels,Unnamed: 1_level_1
0,27
1,1
2,6
3,2
4,2
