In [1]:
# Capstone Week 3 Toronto Parts 2+3
# Goal: Read Toronto postal codes from Wikipedia, and create a dataframe for clustering

In [2]:
# import libraries
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim

In [3]:
# libary to handale JSON files
import json
from pandas.io.json import json_normalize

In [4]:
# libaries for plotting
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium
# Library for clustering
from sklearn.cluster import KMeans

In [5]:
# Scrape data fromfrom Wikipedia url
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

# Read the contents of webpage using GET
r = requests.get(url)
# Create Beautiful Soup object 
soup = BeautifulSoup(r.content,'html5lib')

In [13]:
# Some global variables
postcode = ''
borough = ''
neighborhood = ''

In [14]:
# Create a BLANK dataframe with the required columns
column_names = ['Postcode','Borough','Neighborhoods']
Tor_post_codes = pd.DataFrame(columns=column_names)

In [15]:
table = soup.find_all('table',{'class':'wikitable sortable'})

for tab in table:
# get the row of the table body
    for row in tab.tbody.find_all('tr'):
        try:
            # read cells 
            cells = row.find_all('td')

            postcode = cells[0].text
            # Some cells have hyperlink. The <a href> tag is present for them
            try:
                borough = cells[1].a.text
            except:
            # others do not have <a> tag    
                borough = cells[1].text
            
            neighborhood = cells[2].text.strip('\n')
            # if Neighbourhood is 'Not Assigned' neighbourhood = borough
            if neighborhood == 'Not assigned':
                neighborhood = borough
                
             # keep only entries that are not 'Not assigned'
            if borough != 'Not assigned':
                Tor_post_codes = Tor_post_codes.append( {
                    'Postcode':postcode,
                    'Borough': borough,
                    'Neighborhoods': neighborhood
                }, ignore_index = True)
        except:
            print('Ignore Headers')

# inspect the newly created data frame
Tor_post_codes.head()

Ignore Headers


Unnamed: 0,Postcode,Borough,Neighborhoods
0,M1A\n,Not assigned\n,Not assigned\n
1,M2A\n,Not assigned\n,Not assigned\n
2,M3A\n,North York\n,Parkwoods
3,M4A\n,North York\n,Victoria Village
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront"


In [16]:
# Group neighborhood
Tor_post_codes = Tor_post_codes.groupby(['Postcode','Borough'])['Neighborhoods'].apply(', '.join).reset_index()
Tor_post_codes.head()

Unnamed: 0,Postcode,Borough,Neighborhoods
0,M1A\n,Not assigned\n,Not assigned\n
1,M1B\n,Scarborough\n,"Malvern, Rouge"
2,M1C\n,Scarborough\n,"Rouge Hill, Port Union, Highland Creek"
3,M1E\n,Scarborough\n,"Guildwood, Morningside, West Hill"
4,M1G\n,Scarborough\n,Woburn


In [17]:
Tor_post_codes.shape

(180, 3)

In [46]:
# Use Geocoder to obtain the coordinates
Tor_map_coor = pd.read_csv('Toronto_PROJ_2_df.csv')
Tor_map_coor.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [47]:
# create a new test dataframe
column_names = ['Postal Code', 'Borough', 'Neighbourhood', 'Latitude', 'Longitude']
test_df = pd.DataFrame(columns=column_names)

test_list = ['M5G', 'M2H', 'M4B', 'M1J', 'M4G', 'M4M', 'M1R', 'M9V', 'M9L', 'M5V', 'M1B', 'M5A']

for postcode in test_list:
    test_df = test_df.append(Tor_map_coor[Tor_map_coor['Postal Code']==postcode], ignore_index=True)
    
test_df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
1,M2H,North York,Hillcrest Village,43.803762,-79.363452
2,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
3,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
4,M4G,East York,Leaside,43.70906,-79.363452
5,M4M,East Toronto,Studio District,43.659526,-79.340923
6,M1R,Scarborough,"Wexford, Maryvale",43.750071,-79.295849
7,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
8,M9L,North York,Humber Summit,43.756303,-79.565963
9,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442


In [36]:
# Now, use GEOPY library to get the latitude and longitude values of Toronto

In [38]:
address = 'Toronto'

geolocator = Nominatim(user_agent='ira-deguzman')
location   = geolocator.geocode(address)
latitude   = location.latitude
longitude  = location.longitude
print('The geographical coorindate of Toronto are {}, {}.'.format(latitude,longitude))

The geographical coorindate of Toronto are 43.6534817, -79.3839347.


In [39]:
# Create a map of Toronto with neighborhoods superimposed on top

In [53]:
# Creating a map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude,longitude], zoom_start=10)

# add markers to map_toronto
for lat, lng, borough, neighborhood in zip(Tor_map_coor['Latitude'], Tor_map_coor['Longitude'], \
    Tor_map_coor['Borough'], Tor_map_coor['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat,lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)

map_toronto

In [42]:
# Filter only boroughs that contain "Toronto"

In [49]:
# Filter borough names that contain the word 'Toronto'
borough_names = list(Tor_map_coor.Borough.unique())
borough_with_toronto = []

for x in borough_names:
    if 'toronto' in x.lower():
        borough_with_toronto.append(x)
    
borough_with_toronto

['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']

In [64]:
# Create a NEW DATAFRAME with only boroughs that containes 'Toronto'
Tor_map_coor2 = Tor_map_coor[Tor_map_coor['Borough'].isin(borough_with_toronto)].reset_index(drop=True)
print(Tor_map_coor.shape)
Tor_map_coor2.head()

(39, 5)


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [62]:
# Create map that contains 'Toronto' 

In [74]:
# Create map using latitude and longitude values
map_toronto = folium.Map(location=[latitude,longitude], zoom_start=10)

# add markers to the map
for lat, lng, borough, neighborhood in zip(Tor_map_coor2['Latitude'], Tor_map_coor2['Longitude'], Tor_map_coor2['Borough'], \
                                  Tor_map_coor2['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat,lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)
    
map_toronto

In [75]:
# Using the Foursqure API to explore the neighborhoods

In [76]:
# Define Foursquare credentials and version
CLIENT_ID = 'YWKCLMLQSEGUK5P4U1ZHTLHOJC3N1UIZSCO3HUTT05MG4ZM1'
CLIENT_SECRET = 'RMUMRHTZMJZCF4JWMKFDJDEDHSAYNE1T0B3DSNJ3WBYRD4QF'
VERSION = '20180604'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: YWKCLMLQSEGUK5P4U1ZHTLHOJC3N1UIZSCO3HUTT05MG4ZM1
CLIENT_SECRET:RMUMRHTZMJZCF4JWMKFDJDEDHSAYNE1T0B3DSNJ3WBYRD4QF


In [78]:
# Lets get the Top 100 venues within a radius = 0.5 Km (500 m)
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(Tor_map_coor2['Latitude'], Tor_map_coor2['Longitude'], \
            Tor_map_coor2['Postal Code'], Tor_map_coor2['Borough'], Tor_map_coor2['Neighbourhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius,
        LIMIT)
    
    results = requests.get(url).json()['response']['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post,
            borough,
            neighborhood,
            lat,
            long,
            venue['venue']['name'],
            venue['venue']['location']['lat'],
            venue['venue']['location']['lng'],
            venue['venue']['categories'][0]['name']))

In [80]:
# convert the venues list into a NEW DATAFRAME
venues_df = pd.DataFrame(venues)

# define new column names
# define the column names
venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', \
                     'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
print(venues_df.shape)
venues_df.head()

(1638, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,M4E,East Toronto,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,M4E,East Toronto,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,MenEssentials,43.67782,-79.351265,Cosmetics Shop


In [82]:
venues_df.groupby(['PostalCode', 'Borough', 'Neighborhood']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
PostalCode,Borough,Neighborhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M4E,East Toronto,The Beaches,4,4,4,4,4,4
M4K,East Toronto,"The Danforth West, Riverdale",42,42,42,42,42,42
M4L,East Toronto,"India Bazaar, The Beaches West",20,20,20,20,20,20
M4M,East Toronto,Studio District,41,41,41,41,41,41
M4N,Central Toronto,Lawrence Park,4,4,4,4,4,4
M4P,Central Toronto,Davisville North,7,7,7,7,7,7
M4R,Central Toronto,"North Toronto West, Lawrence Park",18,18,18,18,18,18
M4S,Central Toronto,Davisville,34,34,34,34,34,34
M4T,Central Toronto,"Moore Park, Summerhill East",4,4,4,4,4,4
M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park",17,17,17,17,17,17


In [83]:
# Find out how many unique categories can be curated
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 234 uniques categories.


In [84]:
# Select only 50
venues_df['VenueCategory'].unique()[:50]

array(['Trail', 'Health Food Store', 'Pub', 'Neighborhood',
       'Cosmetics Shop', 'Ice Cream Shop', 'Greek Restaurant',
       'Italian Restaurant', 'Brewery', 'Yoga Studio', 'Juice Bar',
       'Fruit & Vegetable Store', 'Dessert Shop', 'Pizza Place',
       'Restaurant', 'Bookstore', 'Furniture / Home Store', 'Café', 'Spa',
       'Bubble Tea Shop', 'Grocery Store', 'Coffee Shop', 'Bakery',
       'Caribbean Restaurant', 'Indian Restaurant', 'American Restaurant',
       'Lounge', 'Frozen Yogurt Shop', 'Liquor Store', 'Toy / Game Store',
       'Gym', 'Fast Food Restaurant', 'Fish & Chips Shop',
       'Sushi Restaurant', 'Park', 'Burrito Place', 'Pet Store',
       'Steakhouse', 'Movie Theater', 'Sandwich Place', 'Board Shop',
       'Intersection', 'Food & Drink Shop', 'Fish Market',
       'Seafood Restaurant', 'Gay Bar', 'Cheese Shop',
       'Middle Eastern Restaurant', 'Stationery Store',
       'Comfort Food Restaurant'], dtype=object)

In [86]:
# Analyze hot encoding
Toronto_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix='', prefix_sep='')

# Add postal code, borough, neighborhood column back to DataFrame
Toronto_onehot['PostalCode'] = venues_df['PostalCode']
Toronto_onehot['Borough'] = venues_df['Borough']
Toronto_onehot['Neighborhoods'] = venues_df['Neighborhood']

# move posta, borough, neighborhood columns to the first column
fixed_columns = list(Toronto_onehot.columns[-3:]) + list(Toronto_onehot.columns[:-3])
Toronto_onehot = Toronto_onehot[fixed_columns]

print(Toronto_onehot.shape)
Toronto_onehot.head()

(1638, 237)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4K,East Toronto,"The Danforth West, Riverdale",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [109]:
# Group rows by neighborhoods, taking the MEAN OF FREQUENCY OF OCCURRENCE of each category
Toronto_grouped = Toronto_onehot.groupby(['PostalCode', 'Borough', 'Neighborhoods']).mean().reset_index()

print(Toronto_grouped.shape)
Toronto_grouped

(39, 237)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,M4E,East Toronto,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,East Toronto,"The Danforth West, Riverdale",0.0,0.0,0.0,0.0,0.0,0.0,0.02381,...,0.0,0.02381,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.02381
2,M4L,East Toronto,"India Bazaar, The Beaches West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,East Toronto,Studio District,0.0,0.0,0.0,0.0,0.0,0.0,0.04878,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.02439
4,M4N,Central Toronto,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M4P,Central Toronto,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M4R,Central Toronto,"North Toronto West, Lawrence Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556
7,M4S,Central Toronto,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M4T,Central Toronto,"Moore Park, Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",0.0,0.0,0.0,0.0,0.0,0.0,0.058824,...,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0


In [107]:
# Create a NEW DATAFRAME, displaying the Top 10 venues for each 'PostalCode'
num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['PostalCode', 'Borough', 'Neighborhoods']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = Toronto_grouped['PostalCode']
neighborhoods_venues_sorted['Borough'] = Toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighborhoods'] = Toronto_grouped['Neighborhoods']

for ind in np.arange(Toronto_grouped.shape[0]):
    row_categories = Toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted

(39, 13)


Unnamed: 0,PostalCode,Borough,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,Neighborhood,Pub,Health Food Store,Trail,Yoga Studio,Dog Run,Diner,Discount Store,Distribution Center,Donut Shop
1,M4K,East Toronto,"The Danforth West, Riverdale",Greek Restaurant,Coffee Shop,Italian Restaurant,Furniture / Home Store,Ice Cream Shop,Restaurant,Yoga Studio,Liquor Store,Spa,Juice Bar
2,M4L,East Toronto,"India Bazaar, The Beaches West",Gym,Food & Drink Shop,Sandwich Place,Burrito Place,Board Shop,Restaurant,Italian Restaurant,Fast Food Restaurant,Intersection,Fish & Chips Shop
3,M4M,East Toronto,Studio District,Café,Coffee Shop,Gastropub,Bakery,American Restaurant,Brewery,Yoga Studio,Latin American Restaurant,Fish Market,Italian Restaurant
4,M4N,Central Toronto,Lawrence Park,Park,Photography Studio,Bus Line,Swim School,Department Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
5,M4P,Central Toronto,Davisville North,Park,Food & Drink Shop,Hotel,Sandwich Place,Department Store,Gym / Fitness Center,Breakfast Spot,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
6,M4R,Central Toronto,"North Toronto West, Lawrence Park",Coffee Shop,Clothing Store,Yoga Studio,Sporting Goods Shop,Grocery Store,Gym / Fitness Center,Fast Food Restaurant,Mexican Restaurant,Diner,Park
7,M4S,Central Toronto,Davisville,Pizza Place,Sandwich Place,Dessert Shop,Gym,Coffee Shop,Italian Restaurant,Café,Sushi Restaurant,Toy / Game Store,Brewery
8,M4T,Central Toronto,"Moore Park, Summerhill East",Gym,Restaurant,Park,Tennis Court,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run
9,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",Coffee Shop,Pub,Bagel Shop,Fried Chicken Joint,Liquor Store,Restaurant,Café,Sports Bar,Bank,Supermarket


In [110]:
# CLUSTER AREAS
# Run KMeans to cluster the Toronto areas into 5 clusters

In [112]:
# Set number of clusters
kclusters = 5
Toronto_grouped_clustering = Toronto_grouped.drop(['PostalCode', 'Borough', 'Neighborhoods'],1)

# Run KMeans Clustering
kmeans = KMeans(n_clusters = kclusters, random_state=0).fit(Toronto_grouped_clustering)

# Check cluster labels generated for each row in the DataFrame
kmeans.labels_[0:10]

array([1, 1, 1, 1, 4, 1, 1, 1, 1, 1])

In [116]:
# Create a  NEW DATAFRAME that includes the cluster as well as the Top 10 venues for each neighborhood
Toronto_merged = Tor_map_coor2.copy()

# Add clustering labels
Toronto_merged['Cluster Labels'] = kmeans.labels_

# Merge Toronto_grouped with Tor_map_coor to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.drop(['Borough', 'Neighborhoods'],1).set_index('PostalCode'),\
                 on='Postal Code')

print(Toronto_merged.shape)
Toronto_merged.head()

(39, 16)


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,1,Neighborhood,Pub,Health Food Store,Trail,Yoga Studio,Dog Run,Diner,Discount Store,Distribution Center,Donut Shop
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,1,Greek Restaurant,Coffee Shop,Italian Restaurant,Furniture / Home Store,Ice Cream Shop,Restaurant,Yoga Studio,Liquor Store,Spa,Juice Bar
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,1,Gym,Food & Drink Shop,Sandwich Place,Burrito Place,Board Shop,Restaurant,Italian Restaurant,Fast Food Restaurant,Intersection,Fish & Chips Shop
3,M4M,East Toronto,Studio District,43.659526,-79.340923,1,Café,Coffee Shop,Gastropub,Bakery,American Restaurant,Brewery,Yoga Studio,Latin American Restaurant,Fish Market,Italian Restaurant
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,4,Park,Photography Studio,Bus Line,Swim School,Department Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant


In [117]:
# Sort the results by Cluster Labels
print(Toronto_merged.shape)
Toronto_merged.sort_values(['Cluster Labels'], inplace=True)
Toronto_merged

(39, 16)


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307,0,Jewelry Store,Trail,Mexican Restaurant,Sushi Restaurant,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,1,Neighborhood,Pub,Health Food Store,Trail,Yoga Studio,Dog Run,Diner,Discount Store,Distribution Center,Donut Shop
21,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648198,-79.379817,1,Coffee Shop,Restaurant,Café,Hotel,Gym,American Restaurant,Japanese Restaurant,Seafood Restaurant,Italian Restaurant,Beer Bar
24,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,1,Café,Sandwich Place,Coffee Shop,Donut Shop,Pub,Middle Eastern Restaurant,Liquor Store,BBQ Joint,History Museum,Pizza Place
25,M5S,Downtown Toronto,"University of Toronto, Harbord",43.662696,-79.400049,1,Café,Bar,Japanese Restaurant,Bookstore,Sandwich Place,Restaurant,Bakery,Yoga Studio,Pub,Beer Bar
26,M5T,Downtown Toronto,"Kensington Market, Chinatown, Grange Park",43.653206,-79.400049,1,Café,Vegetarian / Vegan Restaurant,Coffee Shop,Mexican Restaurant,Bar,Vietnamese Restaurant,Burger Joint,Bakery,Dessert Shop,Pizza Place
27,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,1,Airport Service,Airport Lounge,Boutique,Airport,Airport Food Court,Airport Terminal,Sculpture Garden,Harbor / Marina,Rental Car Location,Boat or Ferry
28,M5W,Downtown Toronto,Stn A PO Boxes,43.646435,-79.374846,1,Coffee Shop,Italian Restaurant,Café,Gym,Beer Bar,Japanese Restaurant,Pub,Seafood Restaurant,Hotel,Restaurant
29,M5X,Downtown Toronto,"First Canadian Place, Underground city",43.648429,-79.38228,1,Coffee Shop,Café,Hotel,Gym,Japanese Restaurant,Restaurant,Seafood Restaurant,American Restaurant,Steakhouse,Asian Restaurant
30,M6G,Downtown Toronto,Christie,43.669542,-79.422564,1,Grocery Store,Café,Park,Candy Store,Nightclub,Italian Restaurant,Diner,Restaurant,Baby Store,Coffee Shop


In [118]:
# FINALLY, visualize the resulting clusters

In [130]:
# Create MAP
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], \
                                             Toronto_merged['Postal Code'], Toronto_merged['Borough'], \
                                             Toronto_merged['Neighbourhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [122]:
# Examine the CLUSTERS

In [123]:
# Cluster 1 (Cluster Label = '0')
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,Central Toronto,0,Jewelry Store,Trail,Mexican Restaurant,Sushi Restaurant,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


In [124]:
# Cluster 2 (Cluster Label = '1')
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,1,Neighborhood,Pub,Health Food Store,Trail,Yoga Studio,Dog Run,Diner,Discount Store,Distribution Center,Donut Shop
21,Downtown Toronto,1,Coffee Shop,Restaurant,Café,Hotel,Gym,American Restaurant,Japanese Restaurant,Seafood Restaurant,Italian Restaurant,Beer Bar
24,Central Toronto,1,Café,Sandwich Place,Coffee Shop,Donut Shop,Pub,Middle Eastern Restaurant,Liquor Store,BBQ Joint,History Museum,Pizza Place
25,Downtown Toronto,1,Café,Bar,Japanese Restaurant,Bookstore,Sandwich Place,Restaurant,Bakery,Yoga Studio,Pub,Beer Bar
26,Downtown Toronto,1,Café,Vegetarian / Vegan Restaurant,Coffee Shop,Mexican Restaurant,Bar,Vietnamese Restaurant,Burger Joint,Bakery,Dessert Shop,Pizza Place
27,Downtown Toronto,1,Airport Service,Airport Lounge,Boutique,Airport,Airport Food Court,Airport Terminal,Sculpture Garden,Harbor / Marina,Rental Car Location,Boat or Ferry
28,Downtown Toronto,1,Coffee Shop,Italian Restaurant,Café,Gym,Beer Bar,Japanese Restaurant,Pub,Seafood Restaurant,Hotel,Restaurant
29,Downtown Toronto,1,Coffee Shop,Café,Hotel,Gym,Japanese Restaurant,Restaurant,Seafood Restaurant,American Restaurant,Steakhouse,Asian Restaurant
30,Downtown Toronto,1,Grocery Store,Café,Park,Candy Store,Nightclub,Italian Restaurant,Diner,Restaurant,Baby Store,Coffee Shop
31,West Toronto,1,Pharmacy,Bakery,Pet Store,Music Venue,Café,Middle Eastern Restaurant,Bar,Supermarket,Bank,Brewery


In [125]:
# Cluster 3 (Cluster Label = '2')
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Central Toronto,2,Home Service,Garden,Yoga Studio,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


In [126]:
# Cluster 4 (Cluster Label = '3')
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Downtown Toronto,3,Park,Playground,Trail,Yoga Studio,Deli / Bodega,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant


In [127]:
# Cluster 5 (Cluster Label = '4')
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,4,Park,Photography Studio,Bus Line,Swim School,Department Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant


In [None]:
# END!!
# by Ira de Guzman