In [1]:
import pandas as pd # import required library
import numpy as np
# read the data using pandas read_html
tables = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

In [2]:
# since it's the first table, let's add it to a dataframe
df0 = tables[0]
df0.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [3]:
# remove cells with a borough that is Not assigned.
df0 = df0[df0['Borough'] != 'Not assigned']
df0[:10]

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Not assigned
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


In [4]:
# now, let's create a new data frame with each postal code per row
# initialize the DF with unique postal codes
# note: this is an alternative to groupby

df = pd.DataFrame({'PostalCode' : list(df0['Postcode'].unique())})
df[:10]

Unnamed: 0,PostalCode
0,M3A
1,M4A
2,M5A
3,M6A
4,M7A
5,M9A
6,M1B
7,M3B
8,M4B
9,M5B


In [5]:
# helper functions for getting boroughs and neighborhoods

# the borough is located in the 3rd column of df0
def getBorough(code):
    return(df0[df0['Postcode'] == code].iloc[0,1])

# the neighborhood is located on the 4th column
# extract the values and join them using a comma
def getNeighborhood(code):
    tempList = df0[df0['Postcode'] == code].iloc[:,2].values
    return ','.join(tempList)

In [6]:
# apply the functions to df


# get the borough for each postal code
df['Borough'] = df['PostalCode'].apply(getBorough)

# get the neighborhood for each postal code
df['Neighborhood'] = df['PostalCode'].apply(getNeighborhood)

df[:10]

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Queen's Park,Not assigned
6,M1B,Scarborough,"Rouge,Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens,Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson,Garden District"


If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough

In [7]:
for i in range(len(df)):
    if df.iloc[i,2] == 'Not assigned': # if the value is Not assigned
        df.iloc[i,2] = df.iloc[i,1] # assign the value of the borough

df[:10]       

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Queen's Park,Queen's Park
6,M1B,Scarborough,"Rouge,Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens,Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson,Garden District"


In [8]:
df.shape

(103, 3)

In [9]:
# codes_data = pd.read_csv('https://cocl.us/Geospatial_data')
# codes_data.to_csv('./codes_data.csv')
codes_data = pd.read_csv('./codes_data.csv')
codes_data.head()

Unnamed: 0.1,Unnamed: 0,Postal Code,Latitude,Longitude
0,0,M1B,43.806686,-79.194353
1,1,M1C,43.784535,-79.160497
2,2,M1E,43.763573,-79.188711
3,3,M1G,43.770992,-79.216917
4,4,M1H,43.773136,-79.239476


In [10]:
df2 = pd.merge(df,codes_data,left_on='PostalCode', right_on='Postal Code')
df2.drop(columns=['Postal Code'], inplace=True)
df2.head()

Unnamed: 0.1,PostalCode,Borough,Neighborhood,Unnamed: 0,Latitude,Longitude
0,M3A,North York,Parkwoods,25,43.753259,-79.329656
1,M4A,North York,Victoria Village,34,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,53,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",71,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,85,43.662301,-79.389494


# Clusters

In [11]:
import json
from geopy.geocoders import Nominatim
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

In [12]:
neighborhoods = df2
neighborhoods.head()

Unnamed: 0.1,PostalCode,Borough,Neighborhood,Unnamed: 0,Latitude,Longitude
0,M3A,North York,Parkwoods,25,43.753259,-79.329656
1,M4A,North York,Victoria Village,34,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,53,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",71,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,85,43.662301,-79.389494


You can decide to work with only boroughs that contain the word Toronto and then replicate the same analysis we did to the New York City data. It is up to you. 

In [13]:
neighborhoods['Borough'].unique()

array(['North York', 'Downtown Toronto', "Queen's Park", 'Scarborough',
       'East York', 'Etobicoke', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

we see that the following boroughs contain the word 'Toronto':
'Downtown Toronto',East Toronto,West Toronto and Central Toronto.
So let's use only these:

In [14]:
neighborhoods = neighborhoods[
    (neighborhoods['Borough'] == 'Downtown Toronto')|
    (neighborhoods['Borough'] == 'East Toronto')|
    (neighborhoods['Borough'] == 'West Toronto')|
    (neighborhoods['Borough'] == 'Central Toronto')
             ]
neighborhoods.head(5)

Unnamed: 0.1,PostalCode,Borough,Neighborhood,Unnamed: 0,Latitude,Longitude
2,M5A,Downtown Toronto,Harbourfront,53,43.65426,-79.360636
4,M7A,Downtown Toronto,Queen's Park,85,43.662301,-79.389494
9,M5B,Downtown Toronto,"Ryerson,Garden District",54,43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,55,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,37,43.676357,-79.293031


In [15]:
# address = 'Toronto, Canada'

# geolocator = Nominatim(user_agent="ny_explorer")
# location = geolocator.geocode(address)
# latitude = location.latitude
# longitude = location.longitude
# print('The geograpical coordinate of Torono are {}, {}.'.format(latitude, longitude))

# Nominatim is not working for me, let's hardcode lat and long
# 43.6532° N, 79.3832° W

latitude = 43.6532
longitude = -79.3832

In [16]:
# create map using latitude and longitude values
map = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map)  
    
map

In [17]:
CLIENT_ID = 'ZDBQLK3MNWFC2GWJ4YLR2ROADVTCLXJIEUPZBYHCCYU2E03U' # your Foursquare ID
CLIENT_SECRET = 'SZKUARM4PXEWLT21MLK1GFWXXUTWMXR5NS3TXHV2ZZCXGY4E' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

# Client Id
# ZDBQLK3MNWFC2GWJ4YLR2ROADVTCLXJIEUPZBYHCCYU2E03U

# Client Secret
# SZKUARM4PXEWLT21MLK1GFWXXUTWMXR5NS3TXHV2ZZCXGY4E

Your credentails:
CLIENT_ID: ZDBQLK3MNWFC2GWJ4YLR2ROADVTCLXJIEUPZBYHCCYU2E03U
CLIENT_SECRET:SZKUARM4PXEWLT21MLK1GFWXXUTWMXR5NS3TXHV2ZZCXGY4E


In [40]:
LIMIT = 100
radius = 500
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

NameError: name 'neighborhood_latitude' is not defined

In [21]:
results = requests.get(url).json()
results

NameError: name 'url' is not defined

In [None]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [None]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

In [None]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

In [None]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [22]:
toronto_venues = pd.read_csv('./toronto_venues.csv')
                             
# run only once to save time
# toronto_venues = getNearbyVenues(names=df2['Neighborhood'],
#                                    latitudes=df2['Latitude'],
#                                    longitudes=df2['Longitude']
#                                   )
# toronto_venues.to_csv('./toronto_venues.csv')

In [23]:
print(toronto_venues.shape)
toronto_venues.head()

(2214, 8)


Unnamed: 0.1,Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,2,Parkwoods,43.753259,-79.329656,TTC stop - 44 Valley Woods,43.755402,-79.333741,Bus Stop
3,3,Parkwoods,43.753259,-79.329656,GreenWin pool,43.756232,-79.333842,Pool
4,4,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena


In [24]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Unnamed: 0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100,100
Agincourt,4,4,4,4,4,4,4
"Agincourt North,L'Amoreaux East,Milliken,Steeles East",2,2,2,2,2,2,2
"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown",10,10,10,10,10,10,10
"Alderwood,Long Branch",11,11,11,11,11,11,11
...,...,...,...,...,...,...,...
Willowdale West,5,5,5,5,5,5,5
Woburn,3,3,3,3,3,3,3
"Woodbine Gardens,Parkview Hill",12,12,12,12,12,12,12
Woodbine Heights,10,10,10,10,10,10,10


## 3. Analyze Each Neighborhood

In [25]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
toronto_onehot.shape

(2214, 269)

Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [27]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,"Adelaide,King,Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00
4,"Alderwood,Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Willowdale West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00
96,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00
97,"Woodbine Gardens,Parkview Hill",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.00
98,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.1,0.0,0.0,0.00,0.0,0.0,0.00


In [28]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
         venue  freq
0  Coffee Shop  0.07
1   Steakhouse  0.04
2         Café  0.04
3   Restaurant  0.03
4          Bar  0.03


----Agincourt----
                       venue  freq
0             Breakfast Spot  0.25
1  Latin American Restaurant  0.25
2               Skating Rink  0.25
3                     Lounge  0.25
4                Yoga Studio  0.00


----Agincourt North,L'Amoreaux East,Milliken,Steeles East----
                             venue  freq
0                             Park   0.5
1                       Playground   0.5
2                      Yoga Studio   0.0
3                    Metro Station   0.0
4  Molecular Gastronomy Restaurant   0.0


----Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown----
                 venue  freq
0        Grocery Store   0.2
1          Pizza Place   0.1
2  Fried Chicken Joint   0.1
3          Coffee Shop   0.1
4       Sandwich Place   0.1


----Alderwood,

                venue  freq
0      Sandwich Place  0.33
1                Park  0.33
2            Bus Line  0.33
3         Yoga Studio  0.00
4  Mexican Restaurant  0.00


----Kingsway Park South West,Mimico NW,The Queensway West,Royal York South West,South of Bloor----
                  venue  freq
0        Hardware Store  0.08
1        Discount Store  0.08
2  Fast Food Restaurant  0.08
3          Burger Joint  0.08
4         Burrito Place  0.08


----L'Amoreaux West----
                  venue  freq
0  Fast Food Restaurant   0.2
1    Chinese Restaurant   0.2
2        Breakfast Spot   0.1
3           Pizza Place   0.1
4           Coffee Shop   0.1


----Lawrence Heights,Lawrence Manor----
               venue  freq
0     Clothing Store  0.25
1  Accessories Store  0.12
2      Women's Store  0.06
3          Gift Shop  0.06
4        Coffee Shop  0.06


----Lawrence Park----
                venue  freq
0                Park  0.33
1         Swim School  0.33
2            Bus Line  0.33
3    


Let's put that into a pandas dataframe

First, let's write a function to sort the venues in descending order.


In [29]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [30]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Steakhouse,Café,Thai Restaurant,Restaurant,Asian Restaurant,Bakery,Bar,Cosmetics Shop,Pizza Place
1,Agincourt,Latin American Restaurant,Skating Rink,Lounge,Breakfast Spot,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Park,Playground,Doner Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Fried Chicken Joint,Beer Store,Fast Food Restaurant,Sandwich Place,Liquor Store,Coffee Shop,Pizza Place,Pharmacy,Colombian Restaurant
4,"Alderwood,Long Branch",Pizza Place,Gym,Sandwich Place,Dance Studio,Athletics & Sports,Pub,Pool,Skating Rink,Pharmacy,Coffee Shop


# Cluster Neighborhoods

Run k-means to cluster the neighborhood into 5 clusters.


In [31]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)


# check cluster labels generated for each row in the dataframe
# kmeans.labels_[0:10] 
for i in set(list(kmeans.labels_)):
    print (f'{i}  {list(kmeans.labels_).count(i)} ')

0  1 
1  13 
2  83 
3  2 
4  1 


In [32]:
toronto_data = neighborhoods.reset_index(drop=True)
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # 

Unnamed: 0.1,PostalCode,Borough,Neighborhood,Unnamed: 0,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,Harbourfront,53,43.65426,-79.360636,2,Coffee Shop,Park,Pub,Café,Bakery,Restaurant,Mexican Restaurant,Shoe Store,Breakfast Spot,Event Space
1,M7A,Downtown Toronto,Queen's Park,85,43.662301,-79.389494,2,Coffee Shop,Gym,Park,Fast Food Restaurant,Salad Place,Portuguese Restaurant,Nightclub,Music Venue,Mexican Restaurant,Juice Bar
2,M5B,Downtown Toronto,"Ryerson,Garden District",54,43.657162,-79.378937,2,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Bakery,Japanese Restaurant,Bookstore,Electronics Store,Tea Room,Restaurant
3,M5C,Downtown Toronto,St. James Town,55,43.651494,-79.375418,2,Coffee Shop,Café,Restaurant,Cocktail Bar,Clothing Store,Hotel,Italian Restaurant,Bakery,Beer Bar,Breakfast Spot
4,M4E,East Toronto,The Beaches,37,43.676357,-79.293031,2,Trail,Pub,Health Food Store,Women's Store,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run


In [33]:
toronto_merged['Cluster Labels'].unique()

array([2, 1])

looks like some boroughs don't have venues and are assigned to nan clusters, let's remove these

In [34]:
toronto_merged = toronto_merged[toronto_merged['Cluster Labels'] > 0]
toronto_merged['Cluster Labels'].unique()

array([2, 1])

In [35]:

toronto_merged['Cluster Labels'] = toronto_merged['Cluster Labels'].astype('int32')
toronto_merged['Cluster Labels'].unique()

array([2, 1])

In [36]:


# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters



In [37]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Central Toronto,-79.38879,1,Park,Swim School,Bus Line,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop,Department Store
33,Downtown Toronto,-79.377529,1,Park,Trail,Playground,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant


In [38]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,-79.360636,2,Coffee Shop,Park,Pub,Café,Bakery,Restaurant,Mexican Restaurant,Shoe Store,Breakfast Spot,Event Space
1,Downtown Toronto,-79.389494,2,Coffee Shop,Gym,Park,Fast Food Restaurant,Salad Place,Portuguese Restaurant,Nightclub,Music Venue,Mexican Restaurant,Juice Bar
2,Downtown Toronto,-79.378937,2,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Bakery,Japanese Restaurant,Bookstore,Electronics Store,Tea Room,Restaurant
3,Downtown Toronto,-79.375418,2,Coffee Shop,Café,Restaurant,Cocktail Bar,Clothing Store,Hotel,Italian Restaurant,Bakery,Beer Bar,Breakfast Spot
4,East Toronto,-79.293031,2,Trail,Pub,Health Food Store,Women's Store,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
5,Downtown Toronto,-79.373306,2,Coffee Shop,Cocktail Bar,Café,Bakery,Seafood Restaurant,Beer Bar,Steakhouse,Farmers Market,Cheese Shop,Comfort Food Restaurant
6,Downtown Toronto,-79.387383,2,Coffee Shop,Café,Italian Restaurant,Burger Joint,Juice Bar,Ice Cream Shop,Japanese Restaurant,Sandwich Place,Gym / Fitness Center,Spa
7,Downtown Toronto,-79.422564,2,Grocery Store,Café,Park,Gas Station,Diner,Italian Restaurant,Restaurant,Baby Store,Athletics & Sports,Candy Store
8,Downtown Toronto,-79.384568,2,Coffee Shop,Steakhouse,Café,Thai Restaurant,Restaurant,Asian Restaurant,Bakery,Bar,Cosmetics Shop,Pizza Place
9,West Toronto,-79.442259,2,Pharmacy,Bakery,Park,Fast Food Restaurant,Café,Supermarket,Bar,Bank,Pizza Place,Middle Eastern Restaurant


In [39]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,-79.360636,2,Coffee Shop,Park,Pub,Café,Bakery,Restaurant,Mexican Restaurant,Shoe Store,Breakfast Spot,Event Space
1,Downtown Toronto,-79.389494,2,Coffee Shop,Gym,Park,Fast Food Restaurant,Salad Place,Portuguese Restaurant,Nightclub,Music Venue,Mexican Restaurant,Juice Bar
2,Downtown Toronto,-79.378937,2,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Bakery,Japanese Restaurant,Bookstore,Electronics Store,Tea Room,Restaurant
3,Downtown Toronto,-79.375418,2,Coffee Shop,Café,Restaurant,Cocktail Bar,Clothing Store,Hotel,Italian Restaurant,Bakery,Beer Bar,Breakfast Spot
4,East Toronto,-79.293031,2,Trail,Pub,Health Food Store,Women's Store,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
5,Downtown Toronto,-79.373306,2,Coffee Shop,Cocktail Bar,Café,Bakery,Seafood Restaurant,Beer Bar,Steakhouse,Farmers Market,Cheese Shop,Comfort Food Restaurant
6,Downtown Toronto,-79.387383,2,Coffee Shop,Café,Italian Restaurant,Burger Joint,Juice Bar,Ice Cream Shop,Japanese Restaurant,Sandwich Place,Gym / Fitness Center,Spa
7,Downtown Toronto,-79.422564,2,Grocery Store,Café,Park,Gas Station,Diner,Italian Restaurant,Restaurant,Baby Store,Athletics & Sports,Candy Store
8,Downtown Toronto,-79.384568,2,Coffee Shop,Steakhouse,Café,Thai Restaurant,Restaurant,Asian Restaurant,Bakery,Bar,Cosmetics Shop,Pizza Place
9,West Toronto,-79.442259,2,Pharmacy,Bakery,Park,Fast Food Restaurant,Café,Supermarket,Bar,Bank,Pizza Place,Middle Eastern Restaurant
