In [102]:
import pandas as pd

### Reading tables and transform data into dataframes using pandas

In [7]:
dfs = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

### Dropping rows with NaN

In [8]:
df = dfs[0]
df = df.dropna().reset_index(drop=True)
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


### Replacing the delimiter in the Neighborhood column

In [9]:
df['Neighborhood'] = df['Neighborhood'].str.replace(' / ', ', ')
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [10]:
df.shape[0]

103

### Reading the coordinates for each postal code

In [12]:
df_geo = pd.read_csv('Geospatial_Coordinates.csv', index_col=0)
df_geo.head()

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


### Renaming the column for postal codes

In [13]:
df = df.rename(columns={'Postal code': 'Postal Code'})
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Combining the two dataframes

In [14]:
df_neighborhood = df.join(df_geo, on='Postal Code')
df_neighborhood.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Selecting boroughs that contain 'Toronto'

In [15]:
df_borough_T = df_neighborhood.loc[df_neighborhood['Borough'].str.contains('Toronto', regex=False)].reset_index(drop=True)
df_borough_T

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [16]:
import requests

In [17]:
CLIENT_ID = '1D0VDVL40FGS5PIAWB3JCBXHNUXXALOC3RXJF24KO5MZZOL0'
CLIENT_SECRET = 'H150LZX3EHN1PIOHUQ45KP43PVVIGBXT1FC14X12SUO5DVQF'
VERSION = 20180605
RADIUS = 500
LIMIT = 100

In [117]:
venue_list = []
for pcode, neighborhood, lat, lng in zip(df_borough_T['Postal Code'], df_borough_T['Neighborhood'],
                                         df_borough_T['Latitude'], df_borough_T['Longitude']):

    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, RADIUS, LIMIT)
    results = requests.get(url).json()['response']['groups'][0]['items']
    
    for v in results:
        venue_list.append([pcode, neighborhood, v['venue']['name'], v['venue']['categories'][0]['name'],
                           v['venue']['location']['lat'], v['venue']['location']['lng']])

df_venues = pd.DataFrame(venue_list)
df_venues.head()

Unnamed: 0,0,1,2,3,4,5
0,M5A,"Regent Park, Harbourfront",Roselle Desserts,Bakery,43.653447,-79.362017
1,M5A,"Regent Park, Harbourfront",Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,M5A,"Regent Park, Harbourfront",Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,M5A,"Regent Park, Harbourfront",Body Blitz Spa East,Spa,43.654735,-79.359874
4,M5A,"Regent Park, Harbourfront",Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149


In [118]:
df_venues.columns = ['Postal Code', 'Neighborhood', 'Venue', 'Venue Category', 'Venue Latitude', 'Venue Longitude']
df_venues.head()

Unnamed: 0,Postal Code,Neighborhood,Venue,Venue Category,Venue Latitude,Venue Longitude
0,M5A,"Regent Park, Harbourfront",Roselle Desserts,Bakery,43.653447,-79.362017
1,M5A,"Regent Park, Harbourfront",Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,M5A,"Regent Park, Harbourfront",Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,M5A,"Regent Park, Harbourfront",Body Blitz Spa East,Spa,43.654735,-79.359874
4,M5A,"Regent Park, Harbourfront",Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149


In [119]:
df_venues.shape

(1602, 6)

In [120]:
df_venues['Venue Category'].unique().shape

(228,)

In [121]:
df_venue_dummies = pd.get_dummies(df_venues['Venue Category'])
df_venue_dummies.insert(0, 'Neighborhood(s)', df_venues['Neighborhood'])
df_venue_dummies.head()

Unnamed: 0,Neighborhood(s),Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [122]:
df_grouped = df_venue_dummies.groupby('Neighborhood(s)').mean()
df_grouped.head()

Unnamed: 0_level_0,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
Neighborhood(s),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,...,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0
"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Business reply mail Processing CentrE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",0.055556,0.055556,0.055556,0.111111,0.166667,0.111111,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.016949


In [154]:
top10_list=[]
for ind, row in df_grouped.iterrows():
    top10_list.append([ind] + list(row.sort_values(ascending=False)[:10].index))
df_top10 = pd.DataFrame(top10_list)

col_names = ['Neighborhood']
temp = ['st', 'nd', 'rd'] + ['th']*7
for i in range(10):
    col_names.append('{}{} Most Common Venue'.format(i+1, temp[i]))
    
df_top10.columns = col_names
df_top10.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Seafood Restaurant,Restaurant,Bakery,Beer Bar,Farmers Market,Cheese Shop,Italian Restaurant,Café
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Gym,Bakery,Nightclub,Convenience Store,Performing Arts Venue,Pet Store,Climbing Gym
2,Business reply mail Processing CentrE,Light Rail Station,Fast Food Restaurant,Brewery,Burrito Place,Auto Workshop,Spa,Garden,Restaurant,Garden Center,Pizza Place
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Airport Terminal,Airport,Coffee Shop,Boutique,Boat or Ferry,Rental Car Location,Bar,Plane
4,Central Bay Street,Coffee Shop,Italian Restaurant,Sandwich Place,Café,Ice Cream Shop,Burger Joint,Salad Place,Bubble Tea Shop,Japanese Restaurant,Diner


In [155]:
from sklearn.cluster import KMeans

In [156]:
k = 5

In [157]:
k_means = KMeans(n_clusters=k, n_init=12)
k_means.fit(df_grouped)
k_means.labels_

array([0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 2, 1, 3, 1,
       0, 0, 0, 0, 2, 4, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1])

In [158]:
df_top10.insert(1, 'Cluster Label', k_means.labels_)
df_top10.head()

Unnamed: 0,Neighborhood,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,0,Coffee Shop,Cocktail Bar,Seafood Restaurant,Restaurant,Bakery,Beer Bar,Farmers Market,Cheese Shop,Italian Restaurant,Café
1,"Brockton, Parkdale Village, Exhibition Place",1,Café,Coffee Shop,Breakfast Spot,Gym,Bakery,Nightclub,Convenience Store,Performing Arts Venue,Pet Store,Climbing Gym
2,Business reply mail Processing CentrE,0,Light Rail Station,Fast Food Restaurant,Brewery,Burrito Place,Auto Workshop,Spa,Garden,Restaurant,Garden Center,Pizza Place
3,"CN Tower, King and Spadina, Railway Lands, Har...",0,Airport Service,Airport Lounge,Airport Terminal,Airport,Coffee Shop,Boutique,Boat or Ferry,Rental Car Location,Bar,Plane
4,Central Bay Street,0,Coffee Shop,Italian Restaurant,Sandwich Place,Café,Ice Cream Shop,Burger Joint,Salad Place,Bubble Tea Shop,Japanese Restaurant,Diner


In [160]:
df_combined = df_borough_T.join(df_top10.set_index('Neighborhood'), on='Neighborhood')
df_combined.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Pub,Bakery,Park,Breakfast Spot,Restaurant,Café,Theater,Mexican Restaurant,Shoe Store
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Diner,Sushi Restaurant,Yoga Studio,Park,Mexican Restaurant,Juice Bar,Italian Restaurant,Hobby Shop,Fried Chicken Joint
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Coffee Shop,Clothing Store,Café,Bubble Tea Shop,Japanese Restaurant,Cosmetics Shop,Middle Eastern Restaurant,Restaurant,Tea Room,Diner
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Cocktail Bar,Gastropub,Italian Restaurant,American Restaurant,Seafood Restaurant,Farmers Market,Hotel,Department Store
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Neighborhood,Pub,Coffee Shop,Health Food Store,Trail,Asian Restaurant,Yoga Studio,Discount Store,Distribution Center,Dog Run


In [165]:
from geopy.geocoders import Nominatim
import folium

In [167]:
address = 'City of Toronto, ON, Canada'

geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.7170226, -79.41978303501344.


In [177]:
from matplotlib import cm
from matplotlib import colors
import numpy as np

In [179]:
colors_array = cm.rainbow(np.linspace(0, 1, k))
colors_list = [colors.rgb2hex(i) for i in colors_array]

In [188]:
Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=11)

for neighborhood, label, lat, lng in zip(df_combined['Neighborhood'], df_combined['Cluster Label'],
                          df_combined['Latitude'], df_combined['Longitude']):
    folium.CircleMarker(
        [lat, lng],
        popup='neighborhood' + 'cluster' + str(label),
        radius=5,
        color=colors_list[label],
        fill=True,
        fill_color=colors_list[label],
        fill_opacity=0.7).add_to(Toronto_map)

Toronto_map

In [205]:
df_combined.loc[df_combined['Cluster Label']==0].iloc[:, [2]+list(range(6,16))]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Regent Park, Harbourfront",Coffee Shop,Pub,Bakery,Park,Breakfast Spot,Restaurant,Café,Theater,Mexican Restaurant,Shoe Store
1,"Queen's Park, Ontario Provincial Government",Coffee Shop,Diner,Sushi Restaurant,Yoga Studio,Park,Mexican Restaurant,Juice Bar,Italian Restaurant,Hobby Shop,Fried Chicken Joint
2,"Garden District, Ryerson",Coffee Shop,Clothing Store,Café,Bubble Tea Shop,Japanese Restaurant,Cosmetics Shop,Middle Eastern Restaurant,Restaurant,Tea Room,Diner
3,St. James Town,Coffee Shop,Café,Cocktail Bar,Gastropub,Italian Restaurant,American Restaurant,Seafood Restaurant,Farmers Market,Hotel,Department Store
4,The Beaches,Neighborhood,Pub,Coffee Shop,Health Food Store,Trail,Asian Restaurant,Yoga Studio,Discount Store,Distribution Center,Dog Run
5,Berczy Park,Coffee Shop,Cocktail Bar,Seafood Restaurant,Restaurant,Bakery,Beer Bar,Farmers Market,Cheese Shop,Italian Restaurant,Café
6,Central Bay Street,Coffee Shop,Italian Restaurant,Sandwich Place,Café,Ice Cream Shop,Burger Joint,Salad Place,Bubble Tea Shop,Japanese Restaurant,Diner
8,"Richmond, Adelaide, King",Coffee Shop,Café,Restaurant,Gym,Thai Restaurant,Hotel,American Restaurant,Deli / Bodega,Pizza Place,Sushi Restaurant
10,"Harbourfront East, Union Station, Toronto Islands",Coffee Shop,Aquarium,Hotel,Restaurant,Italian Restaurant,Café,Brewery,Sporting Goods Shop,Fried Chicken Joint,Scenic Lookout
12,"The Danforth West, Riverdale",Greek Restaurant,Coffee Shop,Italian Restaurant,Bookstore,Frozen Yogurt Shop,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Bubble Tea Shop,Spa


In [206]:
df_combined.loc[df_combined['Cluster Label']==1].iloc[:, [2]+list(range(6,16))]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Christie,Grocery Store,Café,Park,Gas Station,Italian Restaurant,Candy Store,Restaurant,Diner,Athletics & Sports,Baby Store
9,"Dufferin, Dovercourt Village",Bakery,Pharmacy,Pizza Place,Gym,Grocery Store,Gym / Fitness Center,Middle Eastern Restaurant,Music Venue,Café,Brewery
11,"Little Portugal, Trinity",Bar,Restaurant,Asian Restaurant,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Café,Men's Store,Yoga Studio,Boutique,Brewery
14,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Gym,Bakery,Nightclub,Convenience Store,Performing Arts Venue,Pet Store,Climbing Gym
17,Studio District,Café,Coffee Shop,Brewery,Gastropub,Bakery,American Restaurant,Neighborhood,Sandwich Place,Cheese Shop,Clothing Store
21,Forest Hill North & West,Jewelry Store,Trail,Mexican Restaurant,Sushi Restaurant,Yoga Studio,Diner,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
22,"High Park, The Junction South",Mexican Restaurant,Café,Thai Restaurant,Bookstore,Speakeasy,Diner,Bar,Flea Market,Fried Chicken Joint,Italian Restaurant
23,North Toronto West,Clothing Store,Coffee Shop,Sporting Goods Shop,Fast Food Restaurant,Diner,Mexican Restaurant,Dessert Shop,Park,Chinese Restaurant,Café
24,"The Annex, North Midtown, Yorkville",Sandwich Place,Café,Coffee Shop,Park,History Museum,Donut Shop,Burger Joint,Liquor Store,Indian Restaurant,Pub
26,Davisville,Dessert Shop,Sandwich Place,Gym,Coffee Shop,Pizza Place,Italian Restaurant,Café,Sushi Restaurant,Brewery,Indian Restaurant


In [207]:
df_combined.loc[df_combined['Cluster Label']==2].iloc[:, [2]+list(range(6,16))]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Lawrence Park,Park,Bus Line,Swim School,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
33,Rosedale,Park,Playground,Trail,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


In [208]:
df_combined.loc[df_combined['Cluster Label']==3].iloc[:, [2]+list(range(6,16))]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,"Moore Park, Summerhill East",Playground,Restaurant,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


In [209]:
df_combined.loc[df_combined['Cluster Label']==4].iloc[:, [2]+list(range(6,16))]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Roselawn,Home Service,Garden,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
