### 1. Importing all required libraries

In [1]:
import numpy as np 

import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import random
from IPython.display import Image 
from IPython.core.display import HTML 

import json 

from geopy.geocoders import Nominatim 

import requests 
from pandas.io.json import json_normalize 

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

!pip install folium==0.5.0
import folium 

print('Libraries imported.')

Libraries imported.


### 2. Importing all Toronto neighbourhood data and merging longitude and latitude

In [2]:
import requests 

from bs4 import BeautifulSoup 

req = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M") 
soup = BeautifulSoup(req.content,'lxml') 
table = soup.find_all('table')[0]  
df = pd.read_html(str(table)) 

toronto=pd.DataFrame(df[0]) 

toronto_data = toronto[~toronto['Borough'].isin(['Not assigned'])] 
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [3]:
!wget -O Geospatial_Coordinates.csv https://cocl.us/Geospatial_data

coord = pd.read_csv("Geospatial_Coordinates.csv")
toronto_coord = pd.merge(left=toronto_data, right=coord, how='left', left_on='Postal Code', right_on='Postal Code')

toronto_coord.head()

--2021-01-07 09:53:38--  https://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 169.63.96.194, 169.63.96.176
Connecting to cocl.us (cocl.us)|169.63.96.194|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2021-01-07 09:53:39--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 107.152.29.197
Connecting to ibm.box.com (ibm.box.com)|107.152.29.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2021-01-07 09:53:40--  https://ibm.box.com/public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Reusing existing connection to ibm.box.com:443.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.ent.box.com/public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### 3. Visualizing Toronto's neighbourhood on map

In [4]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [5]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, borough, neighbourhood in zip(toronto_coord['Latitude'], toronto_coord['Longitude'], toronto_coord['Borough'], toronto_coord['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### 4. Here we import top 100 all venues in Toronto, convert into dataframe and clean the postcodes

In [6]:
CLIENT_ID = 'VKPKZCZD5KLWN3YEWTPQZTT0E3UPYTW0E5IHHYRKKE50V0AO' 
CLIENT_SECRET = '3MPNRKB0GKWFSNUIGAHMG41P2I5UD4H2KRZHUCPVFLTIHTFK' 
ACCESS_TOKEN = '4523LEEITHISYB0HHNYWJB434OOZTTIRJ3KHR4LZEDHIJGNH'
VERSION = '20200101'
LIMIT = 30

In [7]:
LIMIT = 100 

radius = 100000 

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)

results = requests.get(url).json() 

In [8]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [9]:
venues = results['response']['groups'][0]['items']
toronto_venues = json_normalize(venues)

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng', 'venue.location.postalCode']
toronto_venues = toronto_venues.loc[:, filtered_columns]

toronto_venues['venue.categories'] = toronto_venues.apply(get_category_type, axis=1)

toronto_venues.columns = [col.split(".")[-1] for col in toronto_venues.columns]

toronto_venues.dropna(subset =['postalCode'], inplace=True) 

toronto_venues['postalCode'] = [x[:3] for x in toronto_venues['postalCode']]

toronto_venues.rename(columns = {'name':'venue'}, inplace = True)

toronto_venues.head()

  from ipykernel import kernelapp as app


Unnamed: 0,venue,categories,lat,lng,postalCode
1,SOMA chocolatemaker,Dessert Shop,43.645328,-79.395714,M5V
2,St. Lawrence Market (South Building),Farmers Market,43.648743,-79.371597,M5E
3,Alo,French Restaurant,43.648574,-79.396243,M5V
4,Yeti Nails & Spa,Cosmetics Shop,43.647938,-79.39633,M5V
5,Hailed Coffee,Coffee Shop,43.658833,-79.383684,M5G


### 5. Here we visualize the venues on a map

In [10]:
toronto_venues_map = folium.Map(location=[latitude, longitude], zoom_start=11) 
    
    # generate map centred of toronto again

for lat, lng, borough, neighbourhood in zip(toronto_coord['Latitude'], toronto_coord['Longitude'], toronto_coord['Borough'], toronto_coord['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_venues_map)  


    # add the trending venues as blue circle markers
for lat, lng, label in zip(toronto_venues['lat'], toronto_venues['lng'], toronto_venues['venue']):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        poup=label,
        fill=True,
        color='green',
        fill_color='green',
        fill_opacity=0.6
    ).add_to(toronto_venues_map)
    
toronto_venues_map

### 6. Here we merge the venues dataframe with neighbourhood data frame based on post codes

In [11]:
toronto_final = pd.merge(left=toronto_data, right=toronto_venues, how='right', left_on='Postal Code', right_on='postalCode')

toronto_final.dropna(subset =['Borough'], inplace=True) 

toronto_final.drop(["postalCode", 'Borough', 'venue', 'Postal Code'], axis = 1, inplace = True) 

toronto_final = toronto_final[['Neighbourhood', 'categories', 'lat', 'lng']]

toronto_final

Unnamed: 0,Neighbourhood,categories,lat,lng
0,"Regent Park, Harbourfront",Historic Site,43.650244,-79.359323
1,"Regent Park, Harbourfront",Coffee Shop,43.649963,-79.361442
2,"Regent Park, Harbourfront",Bakery,43.653447,-79.362017
3,"Regent Park, Harbourfront",Park,43.666048,-79.360941
4,"Regent Park, Harbourfront",Coffee Shop,43.650702,-79.369259
5,"Regent Park, Harbourfront",Athletics & Sports,43.647088,-79.351306
6,"Regent Park, Harbourfront",Chocolate Shop,43.650622,-79.358127
7,"Garden District, Ryerson",Theater,43.653394,-79.378507
8,"Garden District, Ryerson",Café,43.657772,-79.376073
9,St. James Town,Coffee Shop,43.652384,-79.372719


### 7. Here we convert the dataframe into values for K-means clustering later on, and then grouped by neighbourhood,

In [12]:
toronto_onehot = pd.get_dummies(toronto_final[['categories']], prefix="", prefix_sep="")

toronto_onehot['Neighbourhood'] = toronto_final['Neighbourhood'] 

fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bar,Beach,Beer Bar,Brewery,Café,Chocolate Shop,Coffee Shop,Concert Hall,Cosmetics Shop,Dessert Shop,Diner,Farmers Market,Food Truck,French Restaurant,Gastropub,Grocery Store,Historic Site,Hostel,Hotel,Italian Restaurant,Japanese Restaurant,Liquor Store,Monument / Landmark,Organic Grocery,Park,Performing Arts Venue,Pizza Place,Plaza,Ramen Restaurant,Restaurant,Sandwich Place,Thai Restaurant,Theater,Train Station
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [13]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

toronto_grouped

Unnamed: 0,Neighbourhood,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bar,Beach,Beer Bar,Brewery,Café,Chocolate Shop,Coffee Shop,Concert Hall,Cosmetics Shop,Dessert Shop,Diner,Farmers Market,Food Truck,French Restaurant,Gastropub,Grocery Store,Historic Site,Hostel,Hotel,Italian Restaurant,Japanese Restaurant,Liquor Store,Monument / Landmark,Organic Grocery,Park,Performing Arts Venue,Pizza Place,Plaza,Ramen Restaurant,Restaurant,Sandwich Place,Thai Restaurant,Theater,Train Station
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"CN Tower, King and Spadina, Railway Lands, Har...",0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.125,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
2,Caledonia-Fairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Dufferin, Dovercourt Village",0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.333333,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Garden District, Ryerson",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0
7,"Harbourfront East, Union Station, Toronto Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333
8,"High Park, The Junction South",0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"India Bazaar, The Beaches West",0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 8. Here we select the top 10 venues for each neighbourhood of toronto. 

In [14]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
toronto_venues_sorted = pd.DataFrame(columns=columns)
toronto_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    toronto_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

toronto_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Farmers Market,Train Station,Café,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Coffee Shop,Chocolate Shop,Brewery
1,"CN Tower, King and Spadina, Railway Lands, Har...",Sandwich Place,Art Gallery,Monument / Landmark,Diner,Dessert Shop,Cosmetics Shop,French Restaurant,Bar,Beach,Beer Bar
2,Caledonia-Fairbanks,Italian Restaurant,Train Station,French Restaurant,Farmers Market,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Coffee Shop,Chocolate Shop
3,Central Bay Street,Coffee Shop,Train Station,Café,Farmers Market,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Chocolate Shop,Brewery
4,Christie,Café,Grocery Store,Park,Train Station,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Coffee Shop,Chocolate Shop
5,"Dufferin, Dovercourt Village",Brewery,Bar,Beer Bar,Chocolate Shop,Farmers Market,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Coffee Shop
6,"Garden District, Ryerson",Café,Theater,Arts & Crafts Store,Asian Restaurant,Food Truck,Farmers Market,Diner,Dessert Shop,Cosmetics Shop,Concert Hall
7,"Harbourfront East, Union Station, Toronto Islands",Train Station,Performing Arts Venue,Park,Brewery,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Coffee Shop,Chocolate Shop
8,"High Park, The Junction South",Gastropub,Bar,Café,Farmers Market,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Coffee Shop,Chocolate Shop
9,"India Bazaar, The Beaches West",Brewery,Beach,Food Truck,Asian Restaurant,Athletics & Sports,Bakery,Bar,Arts & Crafts Store,Beer Bar,French Restaurant


### 9. Here we cluster neighbourhoods by similarity of its venues

In [15]:
kclusters = 5

toronto_final_clust = toronto_grouped.drop('Neighbourhood', axis=1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_final_clust)

kmeans.labels_[0:10] 

array([3, 0, 0, 4, 2, 0, 2, 2, 1, 0], dtype=int32)

In [16]:
toronto_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = pd.merge(left=toronto_coord, right=toronto_venues_sorted, how='right', left_on='Neighbourhood', right_on='Neighbourhood')

toronto_merged

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Chocolate Shop,Historic Site,Athletics & Sports,Bakery,Park,Train Station,Diner,Dessert Shop,Cosmetics Shop
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,2,Café,Theater,Arts & Crafts Store,Asian Restaurant,Food Truck,Farmers Market,Diner,Dessert Shop,Cosmetics Shop,Concert Hall
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,4,Coffee Shop,Train Station,Café,Farmers Market,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Chocolate Shop,Brewery
3,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,3,Farmers Market,Train Station,Café,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Coffee Shop,Chocolate Shop,Brewery
4,M6E,York,Caledonia-Fairbanks,43.689026,-79.453512,0,Italian Restaurant,Train Station,French Restaurant,Farmers Market,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Coffee Shop,Chocolate Shop
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,4,Coffee Shop,Train Station,Café,Farmers Market,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Chocolate Shop,Brewery
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564,2,Café,Grocery Store,Park,Train Station,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Coffee Shop,Chocolate Shop
7,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,2,Thai Restaurant,Plaza,Park,Train Station,Brewery,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Coffee Shop
8,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,0,Brewery,Bar,Beer Bar,Chocolate Shop,Farmers Market,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Coffee Shop
9,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,2,Train Station,Performing Arts Venue,Park,Brewery,Diner,Dessert Shop,Cosmetics Shop,Concert Hall,Coffee Shop,Chocolate Shop


### 10. Here we plot the clusters, by colour, on a map

In [17]:
toronto_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(toronto_clusters)
       
toronto_clusters

### 11. With that, we now know whichneighbourhoods in toronto has unique venues to visit!