# Toronto Neighborhoods Clusters

Scrapes the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order to obtain the data that is in the table of postal codes and to transform the data into a pandas dataframe.

Adds the geographical coordinates of each postal code from: http://cocl.us/Geospatial_data

Explores and clusters the neighborhoods in Toronto; generates a map to visualize your neighborhoods and how they cluster together. 

### Import Libraries

In [14]:
import numpy as np

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json

import requests
from urllib.request import urlopen

from geopy.geocoders import Nominatim 

import matplotlib.cm as cm
import matplotlib.colors as colors
import folium

from sklearn.cluster import KMeans

def BeautifulTablesFromPage(article):

    from pandas import DataFrame
    from bs4 import BeautifulSoup
    
    soup = BeautifulSoup(article, 'html.parser')
    tables = soup.find_all('table', class_='sortable')
    
    all_tables_content = []
    for table in tables:
        ths = table.find_all('th')
        table_headings = [th.text.strip() for th in ths]
    
        table_content = []
        for tr in table.find_all('tr'):
            tds = tr.find_all('td')
            if not tds:
                continue
            table_content.append([td.text.strip() for td in tds])
        
        df = DataFrame(table_content)
        df.columns = table_headings
        all_tables_content.append(df)
        
        return(all_tables_content)

#### Scraping and Data Manipulation

In [3]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

# save the file locally
with open('List_of_postal_codes_of_Canada:_M', 'w') as art: 
    art.write(urlopen(url).read().decode())

# Load article
article = open('List_of_postal_codes_of_Canada:_M').read()

all_tables = BeautifulTablesFromPage(article)

postal_codes_of_Canada = all_tables[0].copy()

#If Borough is Not assigned then drop line 
postal_codes_of_Canada = postal_codes_of_Canada[postal_codes_of_Canada['Borough'] != 'Not assigned']
postal_codes_of_Canada.reset_index(drop=True,inplace=True)

#If Neighbourhood is Not assigned then replace it with Borough
for i, row in postal_codes_of_Canada.iterrows():
    if row['Neighbourhood'] == 'Not assigned':
        postal_codes_of_Canada.loc[i]['Neighbourhood'] = row['Borough']

#### Imports Geospatial Coordinate data

In [12]:
Geospatial_Coordinates = pd.read_csv('https://cocl.us/Geospatial_data')
target_locations = postal_codes_of_Canada.set_index('Postal Code').merge(Geospatial_Coordinates.set_index('Postal Code'), on='Postal Code')
target_locations.reset_index(inplace=True)

#### Neighborhoods Dataframe

In [16]:
# Filtering Toronto Locations
target_locations = target_locations[target_locations['Borough'].apply(lambda x: x.find('Toronto')) > 0]

target_locations.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


## Foursquare API
### Get neighbourhoods venues

Defines a function to get the venues for a Toronto neighbourhood using the Foursquare API.

In [20]:
# Foursquare credentials

param_authentication = {'CLIENT_ID':'[REMOVED]',
                        'CLIENT_SECRET':'[REMOVED]'
                        }


In [22]:
def GetSurroundings(p_authentication, p_coordinates, param_radius, param_limit):
    
    url = 'https://api.foursquare.com/v2/venues/explore'
    
    params = dict(  client_id = p_authentication['CLIENT_ID'],
                    client_secret = p_authentication['CLIENT_SECRET'],
                    v = '20180604',
                    limit = param_limit,            
                    ll = p_coordinates,
                    radius = param_radius)
    
    raw_results = requests.get(url=url, params=params).json()
    
    df_results = pd.json_normalize(raw_results['response']['groups'][0]['items'])
    df_results = df_results.loc[:, ['venue.name','venue.location.lat', 'venue.location.lng','venue.categories']]
    df_results['venue.categories'] = df_results['venue.categories'].apply(lambda x: None if len(x) == 0 else x[0]['name'])
    df_results.columns = ['Venue','Venue Latitude','Venue Longitude','Venue Category']
        
    return(df_results)         

Uses the function in a loop to retrieve the venues for every Toronto neighbourhood and stores in a dataframe

In [107]:
nearby_venues = pd.DataFrame(columns = ['Venue',
                                        'Venue Latitude',
                                        'Venue Longitude',
                                        'Venue Category',
                                        'Borough',
                                        'Neighbourhood', 
                                        'Neighbourhood Latitude',
                                        'Neighbourhood Longitude'
                                        ])

for i, ith_neighbourhood in target_locations.iterrows():

    venues = GetSurroundings(p_authentication = param_authentication, 
                             p_coordinates = str(ith_neighbourhood['Latitude'])+', '+str(ith_neighbourhood['Longitude']), 
                             param_radius = 500, 
                             param_limit = 100)
    
    venues['Borough'] = ith_neighbourhood['Borough']
    venues['Neighbourhood'] = ith_neighbourhood['Neighbourhood']
    venues['Neighbourhood Latitude'] = ith_neighbourhood['Latitude']
    venues['Neighbourhood Longitude'] = ith_neighbourhood['Longitude']

    nearby_venues = nearby_venues.append(venues, ignore_index=True)

    del venues

Shows the number of venues retrieved for every Neighbourhood. The table shows how some areas have very few venues; 
all of those with less than 10 venues will be dropped and not used for the creation of the clusters

In [108]:
nearby_venues.groupby(['Neighbourhood'])['Venue'].count().sort_values()

Neighbourhood
Moore Park, Summerhill East                                                                                     2
Lawrence Park                                                                                                   3
Roselawn                                                                                                        3
The Beaches                                                                                                     4
Rosedale                                                                                                        5
Forest Hill North & West, Forest Hill Road Park                                                                 5
Davisville North                                                                                                9
Dufferin, Dovercourt Village                                                                                   13
Parkdale, Roncesvalles                                                    

In [109]:
few_venues_neigh = nearby_venues.groupby(['Neighbourhood'])['Venue'].count().sort_values()[0:7].index.values
nearby_venues = nearby_venues[~nearby_venues['Neighbourhood'].isin(few_venues_neigh)]

nearby_venues.head()

Unnamed: 0,Venue,Venue Latitude,Venue Longitude,Venue Category,Borough,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude
0,Roselle Desserts,43.653447,-79.362017,Bakery,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,Tandem Coffee,43.653559,-79.361809,Coffee Shop,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
2,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,Impact Kitchen,43.656369,-79.35698,Restaurant,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,Body Blitz Spa East,43.654735,-79.359874,Spa,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636


Shows the frequency of distinct types of venue (Venue Category):

In [110]:
nearby_venues.groupby(['Venue Category'])['Venue'].count().sort_values(ascending=False)

Venue Category
Coffee Shop                        159
Café                                84
Restaurant                          56
Hotel                               39
Italian Restaurant                  35
Japanese Restaurant                 32
Park                                31
Bakery                              30
Bar                                 29
Pizza Place                         28
Gym                                 26
Sushi Restaurant                    24
Sandwich Place                      23
Seafood Restaurant                  22
Pub                                 21
Bookstore                           20
American Restaurant                 20
Beer Bar                            19
Thai Restaurant                     19
Clothing Store                      19
Gastropub                           18
Breakfast Spot                      16
Vegetarian / Vegan Restaurant       16
Diner                               15
Grocery Store                       14
Steakhouse

### 2 main notes

- The number of venues per neighbourhood is unbalanced (some areas have more venues than other). This is an issue for the clustering algorithm as the clusters will be created by checking how frequently a type of venues (category) is present in a neighbourhood. Areas with fewer venues will have much fewer factors that can be used by the algorithm to determine the optimal cluster

- Although Venue Categories are unique, some of them are actually very related to each other (e.g. 'Pub' and 'Irish Pub'). Although semantlcally related, they will be treated as completely different entities by the clustering algorithm.

## Prepare Neighbourhoods/Venues Dataset for the clustering algorithm

In [111]:
nearby_venues = nearby_venues.groupby(['Neighbourhood','Venue Category'])['Venue'].count().reset_index()

nearby_venues = nearby_venues.pivot(index='Neighbourhood', columns='Venue Category', values='Venue')
nearby_venues.fillna(0, inplace=True)

for i, row in nearby_venues.iterrows():
    nearby_venues.loc[i] = nearby_venues.loc[i]/np.sum(row)
    
nearby_venues.head()

Venue Category,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Butcher,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Auditorium,College Cafeteria,College Gym,College Rec Center,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hookah Bar,Hospital,Hostel,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indoor Play Area,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Kitchen Supply Store,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Market,Martial Arts School,Massage Studio,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plane,Playground,Plaza,Poke Place,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Recording Studio,Rental Car Location,Restaurant,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soup Place,Southern / Soul Food Restaurant,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sri Lankan Restaurant,Stadium,Stationery Store,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1
Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.036364,0.0,0.0,0.0,0.018182,0.018182,0.0,0.036364,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.036364,0.0,0.0,0.0,0.0,0.0,0.036364,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.018182,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.036364,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.018182,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.018182,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.018182,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.036364,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.036364,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.018182,0.018182,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0
"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.04,0.0,0.12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",0.0,0.058824,0.058824,0.058824,0.117647,0.117647,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.029412,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.176471,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.014706,0.0,0.014706,0.014706,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.014706,0.0,0.0,0.0,0.0,0.044118,0.029412,0.0,0.0,0.014706,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.014706,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.014706,0.0,0.0,0.014706,0.0,0.0,0.0,0.014706,0.0,0.0,0.029412,0.0,0.044118,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.014706,0.029412,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0,0.014706


## Clustering algorithm

In [116]:
kclusters = 2
kmeans = KMeans(n_clusters=kclusters, n_init=10).fit(nearby_venues)

nearby_venues['Cluster Labels'] = kmeans.labels_
locations_venues = target_locations.set_index('Neighbourhood').join(nearby_venues['Cluster Labels'], on='Neighbourhood', how='inner')

## Exploring the Clusters

#### Cluster Density:

In [117]:
locations_venues['Cluster Labels'].value_counts()

0    26
1     6
Name: Cluster Labels, dtype: int64

#### Clusters Composition:

In [122]:
Cluster_nr = 1
pd.DataFrame({'frequency':nearby_venues.groupby(['Cluster Labels']).mean().loc[Cluster_nr-1].sort_values(ascending=False)[1:10]})

Unnamed: 0_level_0,frequency
Venue Category,Unnamed: 1_level_1
Café,0.038958
Restaurant,0.037783
Italian Restaurant,0.023962
Park,0.023328
Pizza Place,0.022568
Sushi Restaurant,0.018065
Bar,0.017583
Hotel,0.016406
Pub,0.016027


Cluster 1 seems to be an office/business - afterwork/nightlife cluster. Its neighbourhoods are very central.

In [119]:
Cluster_nr = 2
pd.DataFrame({'frequency':nearby_venues.groupby(['Cluster Labels']).mean().loc[Cluster_nr-1].sort_values(ascending=False)[1:10]})

Unnamed: 0_level_0,frequency
Venue Category,Unnamed: 1_level_1
Grocery Store,0.067564
Park,0.048836
Bakery,0.048522
Coffee Shop,0.046196
Bar,0.042112
Sandwich Place,0.03612
Pharmacy,0.034413
Nightclub,0.028652
Italian Restaurant,0.028396


Cluster 2 seems to be a residential cluster. Its neighbourhoods are slighly far off from the city centre.

## Create map with Clusters

In [120]:
location = 'Toronto, Ontario, Canada'
map_centre = Nominatim(user_agent="ny_explorer").geocode(location)

map_clusters = folium.Map(location=[map_centre.latitude,
                                   map_centre.longitude],
                          tiles = 'Stamen Terrain',
                         zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
    
for name, row in locations_venues.iterrows():

    label = folium.Popup(str(name) + ' Cluster ' + str(row['Cluster Labels']), parse_html=True)
    folium.CircleMarker([row['Latitude'], row['Longitude']],
                        radius=5,
                        popup=label,
                        color=rainbow[row['Cluster Labels']-1],
                        fill=True,
                        fill_color=rainbow[row['Cluster Labels']-1],
                        fill_opacity=0.7).add_to(map_clusters)

In [121]:
map_clusters