# Import the necessary libraries

In [2]:
import pandas as pd
import numpy as np

# Read the html web page by using Pandas

In [3]:
df_list = pd.read_html(io='http://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
df = df_list[0]
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


# Create the Pandas Dataframe:
## Select just the records which have an associated borough

In [4]:
df = df[df['Borough']!='Not assigned']
df.reset_index(inplace=True,drop=True)
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


## Add the Latutude and Longitude of each postal code

In [5]:
import geocoder
import folium
from tqdm import tqdm

In [6]:
# Get the coordinates from the geocoder API in separate list (for latitude and longitude)
# Since this process can take some time, let's also add a progress bar by using tqdm
i = 0
lat = []
long = []

for address in tqdm(df['Neighbourhood'] + ', Toronto Ontario'):
    coordinates = None
    while (coordinates is None):
        g = geocoder.arcgis(address)
        coordinates = g.latlng
    
    lat.append(coordinates[i])
    long.append(coordinates[i + 1])

100%|████████████████████████████████████████████████████████████████████████████████| 103/103 [01:27<00:00,  1.17it/s]


In [7]:
#Add the latitude and longitude to the dataframe
df['Latitude'] = lat
df['Longitude'] = long
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.686588,-79.409996
1,M4A,North York,Victoria Village,43.731540,-79.314280
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.659743,-79.361561
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.723570,-79.437110
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.666622,-79.393264
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.651321,-79.523468
99,M4Y,Downtown Toronto,Church and Wellesley,43.665700,-79.380930
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.648690,-79.385440
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.650520,-79.484960


In [8]:
# Let's represent the neighbourhoods on a Map
map_toronto = folium.Map(location=[43.7612239, -79.3239857], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in tqdm(zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood'])):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

103it [00:00, 11441.03it/s]


# Explore and cluster the neighborhoods in Toronto.

## Request parameters for the first neighbourhood

In [112]:
CLIENT_ID = 'XZ00IYHHH1TQXGJPE44J24KGX2L00I4M4QNZ2QX3DSDWDUTN' # Foursquare ID
CLIENT_SECRET = 'HDXWM53CLLKAKRHUEH2KPCBHYJ2AHKD5PEI5WJF1DWA4JV0E' # Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # Foursquare API limit value
RADIUS = 500 # Foursquare API radious for exploration
LATITUDE = df.loc[50,'Latitude']
LONGITUDE = df.loc[50,'Longitude']

## Create a single request url

In [113]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    LATITUDE,
    LONGITUDE, 
    RADIUS, 
    LIMIT)
print(url)

https://api.foursquare.com/v2/venues/explore?&client_id=XZ00IYHHH1TQXGJPE44J24KGX2L00I4M4QNZ2QX3DSDWDUTN&client_secret=HDXWM53CLLKAKRHUEH2KPCBHYJ2AHKD5PEI5WJF1DWA4JV0E&v=20180605&ll=43.76016000000004,-79.55848999999995&radius=500&limit=100


## Make the request and explore answer

In [114]:
import requests

results = requests.get(url).json()
results.keys()

dict_keys(['meta', 'response'])

In [115]:
results['response'].keys()



In [116]:
results['response']['groups'][0]['items']

[{'reasons': {'count': 0,
   'items': [{'summary': 'This spot is popular',
     'type': 'general',
     'reasonName': 'globalInteractionReason'}]},
  'venue': {'id': '50f865c1067dcebf19c9ab03',
   'name': 'Bad Boy Furniture - Distribution Centre',
   'location': {'address': '500 Fenmar Drive',
    'lat': 43.762881017792985,
    'lng': -79.55644369125366,
    'labeledLatLngs': [{'label': 'display',
      'lat': 43.762881017792985,
      'lng': -79.55644369125366}],
    'distance': 344,
    'postalCode': 'M9L 2V5',
    'cc': 'CA',
    'city': 'North York',
    'state': 'ON',
    'country': 'Canada',
    'formattedAddress': ['500 Fenmar Drive',
     'North York ON M9L 2V5',
     'Canada']},
   'categories': [{'id': '4bf58dd8d48988d1f8941735',
     'name': 'Furniture / Home Store',
     'pluralName': 'Furniture / Home Stores',
     'shortName': 'Furniture / Home',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/furniture_',
      'suffix': '.png'},
     'primary': Tru

## Create a Pandas dataframe including all the dictionaries in the json answer

In [117]:
results['response'].keys()



In [118]:
information = results['response']['groups'][0]['items']
information_df = pd.json_normalize(information)
information_df

Unnamed: 0,referralId,reasons.count,reasons.items,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng,venue.location.labeledLatLngs,venue.location.distance,venue.location.postalCode,venue.location.cc,venue.location.city,venue.location.state,venue.location.country,venue.location.formattedAddress,venue.categories,venue.photos.count,venue.photos.groups
0,e-0-50f865c1067dcebf19c9ab03-0,0,"[{'summary': 'This spot is popular', 'type': '...",50f865c1067dcebf19c9ab03,Bad Boy Furniture - Distribution Centre,500 Fenmar Drive,43.762881,-79.556444,"[{'label': 'display', 'lat': 43.76288101779298...",344,M9L 2V5,CA,North York,ON,Canada,"[500 Fenmar Drive, North York ON M9L 2V5, Canada]","[{'id': '4bf58dd8d48988d1f8941735', 'name': 'F...",0,[]
1,e-0-5e80da811a23b1000819da88-1,0,"[{'summary': 'This spot is popular', 'type': '...",5e80da811a23b1000819da88,NTL Contracting and Waterproofing Inc,99 Milvan Dr.,43.756253,-79.558974,"[{'label': 'display', 'lat': 43.75625262108667...",436,M9L 1Z7,CA,North York,ON,Canada,"[99 Milvan Dr., North York ON M9L 1Z7, Canada]","[{'id': '5454144b498ec1f095bff2f2', 'name': 'C...",0,[]


In [119]:
columns = ['venue.name','venue.categories','venue.location.lat','venue.location.lng']
information_df = information_df[columns]
new_columns_names = ['Name','Category','Latitude','Longitude']
information_df.columns = new_columns_names
information_df

Unnamed: 0,Name,Category,Latitude,Longitude
0,Bad Boy Furniture - Distribution Centre,"[{'id': '4bf58dd8d48988d1f8941735', 'name': 'F...",43.762881,-79.556444
1,NTL Contracting and Waterproofing Inc,"[{'id': '5454144b498ec1f095bff2f2', 'name': 'C...",43.756253,-79.558974


## Get category and create a venues dataframe

In [120]:
list_df = []
for lat, long, neighbourhood in tqdm(zip(df['Latitude'],df['Longitude'],df['Neighbourhood'])):
    
    # Built url to request information to the explore endpoint
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    lat,
    long, 
    RADIUS, 
    LIMIT)
    
    # Make the request
    results = requests.get(url).json()
    
    # Extrac key information from json request response
    for dict in range(len(results['response']['groups'][0]['items'])):
        venue_name = results['response']['groups'][0]['items'][dict]['venue']['name']
        venue_lat = results['response']['groups'][0]['items'][dict]['venue']['location']['lat']
        venue_lng = results['response']['groups'][0]['items'][dict]['venue']['location']['lng']
        venue_cat = results['response']['groups'][0]['items'][dict]['venue']['categories'][0]['name']
    
        # Create the list to build the dataframe
        list_df.append((neighbourhood, lat, lng, venue_name, venue_cat, venue_lat, venue_lng))
# Build the dataframe
toronto_venues = pd.DataFrame(list_df)
columns = ['Neighbourhood','Neighbourhood Latitude','Neighbourhood Longitude','Venue Name','Venue Category',' Venue Latitude','Venue Longitude']
toronto_venues.columns = columns

103it [00:40,  2.52it/s]


In [121]:
toronto_venues

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue Name,Venue Category,Venue Latitude,Venue Longitude
0,Parkwoods,43.686588,-79.49885,Aroma Espresso Bar,Café,43.688170,-79.412599
1,Parkwoods,43.686588,-79.49885,Sir Winston Churchill Park,Park,43.683732,-79.409881
2,Parkwoods,43.686588,-79.49885,Mashu Mashu Mediterranean Grill,Middle Eastern Restaurant,43.688297,-79.412563
3,Parkwoods,43.686588,-79.49885,What A Bagel,Bagel Shop,43.688079,-79.414544
4,Parkwoods,43.686588,-79.49885,Starbucks,Coffee Shop,43.688970,-79.413097
...,...,...,...,...,...,...,...
2461,"Mimico NW, The Queensway West, South of Bloor,...",43.617290,-79.49885,SanRemo Bakery,Bakery,43.618542,-79.499485
2462,"Mimico NW, The Queensway West, South of Bloor,...",43.617290,-79.49885,The Blue Goose,Bar,43.616789,-79.495870
2463,"Mimico NW, The Queensway West, South of Bloor,...",43.617290,-79.49885,Jimmy’s Coffee,Café,43.618715,-79.499506
2464,"Mimico NW, The Queensway West, South of Bloor,...",43.617290,-79.49885,Audley Street Studios,Performing Arts Venue,43.619189,-79.494390


## Check the number of venues of each neighbourhood

In [122]:
toronto_venues.groupby(by='Neighbourhood', dropna = False).count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue Name,Venue Category,Venue Latitude,Venue Longitude
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,17,17,17,17,17,17
"Alderwood, Long Branch",15,15,15,15,15,15
"Bathurst Manor, Wilson Heights, Downsview North",13,13,13,13,13,13
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",13,13,13,13,13,13
...,...,...,...,...,...,...
"Willowdale, Willowdale West",2,2,2,2,2,2
Woburn,7,7,7,7,7,7
Woodbine Heights,17,17,17,17,17,17
York Mills West,17,17,17,17,17,17


## Check in which neighbourhoods we have venues of the same type

In [123]:
toronto_venues_category = pd.get_dummies(toronto_venues[['Venue Category']], prefix = '', prefix_sep = '')
toronto_venues_category['Neighbourhood'] = toronto_venues['Neighbourhood']
columns_df = list(toronto_venues_category.columns)
toronto_venues_category = toronto_venues_category[[columns_df[-1]] + columns_df[:-2]]

In [124]:
toronto_venues_category

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,American Restaurant,Animal Shelter,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2461,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2462,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2463,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2464,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Check how many venues of the same type we have in each neighbourhood

In [125]:
toronto_total = toronto_venues_category.groupby('Neighbourhood').sum()
toronto_total

Unnamed: 0_level_0,Accessories Store,Afghan Restaurant,American Restaurant,Animal Shelter,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Agincourt,0,0,0,0,0,0,0,0,3,0,...,0,0,0,0,0,1,0,0,0,0
"Alderwood, Long Branch",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
"Bathurst Manor, Wilson Heights, Downsview North",0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Bayview Village,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"Bedford Park, Lawrence Manor East",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Willowdale, Willowdale West",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Woburn,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Woodbine Heights,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
York Mills West,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Select the top-five categories of each neigbourhood

In [126]:
# Since qe cannot sort all Neighbourhoods at the same time, let's sort the first one
toronto_total.sort_values(by = toronto_total.index[0], ascending = False, axis=1)

Unnamed: 0_level_0,Chinese Restaurant,Asian Restaurant,Hong Kong Restaurant,Korean Restaurant,Food Court,Bakery,Rental Car Location,Shopping Mall,Intersection,Cantonese Restaurant,...,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food & Drink Shop,Women's Store
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Agincourt,4,3,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
"Alderwood, Long Branch",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"Bathurst Manor, Wilson Heights, Downsview North",0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
Bayview Village,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
"Bedford Park, Lawrence Manor East",1,0,0,0,0,1,0,0,0,0,...,0,2,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Willowdale, Willowdale West",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Woburn,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
Woodbine Heights,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
York Mills West,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [127]:
# Let's do the same for all colums in a loop and put them on a list to create a dataframe
i = 0
toronto_top_venues = []
for neighbourhood in toronto_total.index:
    sorted_df = toronto_total.sort_values(by = neighbourhood, ascending = False, axis=1)
    top_df = sorted_df.iloc[i,:5]
    i+=1
    toronto_top_venues.append(tuple(top_df.index))

toronto_top_venues_df = pd.DataFrame(toronto_top_venues)
toronto_top_venues_df.set_index(toronto_total.index, inplace=True)
toronto_top_venues_df.columns = ['1st Most Common Venue','2nd Most Common Venue','3rd Most Common Venue','4th Most Common Venue','5th Most Common Venue']
toronto_top_venues_df

Unnamed: 0_level_0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Agincourt,Chinese Restaurant,Asian Restaurant,Hong Kong Restaurant,Korean Restaurant,Food Court
"Alderwood, Long Branch",Grocery Store,Bank,Pizza Place,Italian Restaurant,Sushi Restaurant
"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Pharmacy,Asian Restaurant,Sandwich Place,Restaurant
Bayview Village,Flower Shop,Construction & Landscaping,Golf Driving Range,Trail,Accessories Store
"Bedford Park, Lawrence Manor East",Fast Food Restaurant,Caribbean Restaurant,Fried Chicken Joint,Chinese Restaurant,Grocery Store
...,...,...,...,...,...
"Willowdale, Willowdale West",Park,Mobile Phone Shop,Accessories Store,Nightclub,New American Restaurant
Woburn,Indian Restaurant,Fish & Chips Shop,South Indian Restaurant,Construction & Landscaping,French Restaurant
Woodbine Heights,Coffee Shop,Café,Gastropub,Burger Joint,Sandwich Place
York Mills West,Coffee Shop,Bus Station,Pub,Food Court,Restaurant


## Cluster the Neighbourhoods

In [128]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_total)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 0, 1, 0, 4, 4, 4, 0, 0, 2, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 0,
       4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 3, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 1, 1, 0, 4, 0, 0, 0, 0, 2, 4, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [130]:
# Let's add the cluster information to the toronto_top_venues_df
toronto_top_venues_df.insert(0, 'Cluster', kmeans.labels_)
toronto_top_venues_df

Unnamed: 0_level_0,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,0,Chinese Restaurant,Asian Restaurant,Hong Kong Restaurant,Korean Restaurant,Food Court
"Alderwood, Long Branch",0,Grocery Store,Bank,Pizza Place,Italian Restaurant,Sushi Restaurant
"Bathurst Manor, Wilson Heights, Downsview North",0,Coffee Shop,Pharmacy,Asian Restaurant,Sandwich Place,Restaurant
Bayview Village,0,Flower Shop,Construction & Landscaping,Golf Driving Range,Trail,Accessories Store
"Bedford Park, Lawrence Manor East",0,Fast Food Restaurant,Caribbean Restaurant,Fried Chicken Joint,Chinese Restaurant,Grocery Store
...,...,...,...,...,...,...
"Willowdale, Willowdale West",0,Park,Mobile Phone Shop,Accessories Store,Nightclub,New American Restaurant
Woburn,0,Indian Restaurant,Fish & Chips Shop,South Indian Restaurant,Construction & Landscaping,French Restaurant
Woodbine Heights,0,Coffee Shop,Café,Gastropub,Burger Joint,Sandwich Place
York Mills West,0,Coffee Shop,Bus Station,Pub,Food Court,Restaurant


In [9]:
df.shape

(103, 5)

In [132]:
# Let's complete the dataframe by adding the borought and lat, long information
toronto_final = toronto_top_venues_df

toronto_final = toronto_final.join(df.set_index('Neighbourhood'), on='Neighbourhood')

toronto_final.reset_index(inplace=True)
toronto_final

Unnamed: 0,Neighbourhood,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,Postal Code,Borough,Latitude,Longitude
0,Agincourt,0,Chinese Restaurant,Asian Restaurant,Hong Kong Restaurant,Korean Restaurant,Food Court,M1S,Scarborough,43.786260,-79.280840
1,"Alderwood, Long Branch",0,Grocery Store,Bank,Pizza Place,Italian Restaurant,Sushi Restaurant,M8W,Etobicoke,43.593540,-79.532750
2,"Bathurst Manor, Wilson Heights, Downsview North",0,Coffee Shop,Pharmacy,Asian Restaurant,Sandwich Place,Restaurant,M3H,North York,43.737370,-79.434170
3,Bayview Village,0,Flower Shop,Construction & Landscaping,Golf Driving Range,Trail,Accessories Store,M2K,North York,43.777100,-79.379570
4,"Bedford Park, Lawrence Manor East",0,Fast Food Restaurant,Caribbean Restaurant,Fried Chicken Joint,Chinese Restaurant,Grocery Store,M5M,North York,43.751459,-79.265483
...,...,...,...,...,...,...,...,...,...,...,...
97,"Willowdale, Willowdale West",0,Park,Mobile Phone Shop,Accessories Store,Nightclub,New American Restaurant,M2R,North York,43.771340,-79.428020
98,Woburn,0,Indian Restaurant,Fish & Chips Shop,South Indian Restaurant,Construction & Landscaping,French Restaurant,M1G,Scarborough,43.767480,-79.228290
99,Woodbine Heights,0,Coffee Shop,Café,Gastropub,Burger Joint,Sandwich Place,M4C,East York,43.687520,-79.320590
100,York Mills West,0,Coffee Shop,Bus Station,Pub,Food Court,Restaurant,M2P,North York,43.744159,-79.402843


## Let's represent the clusters

In [133]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
toronto_neighbourhood_clusters = folium.Map(location=[43.7612239, -79.3239857], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
i=0
for lat, lon, poi, cluster in zip(toronto_final['Latitude'], toronto_final['Longitude'], toronto_final['Neighbourhood'], toronto_final['Cluster']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(toronto_neighbourhood_clusters)
       
toronto_neighbourhood_clusters