In [1]:
#Import the pandas library.
import pandas as pd

In [2]:
#Set the wikipedia url for scraping the data.
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
url

'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
#Use pandas read_html method to read data from the url that specified above. In this case, we read the first data set 
#and saved it to df
webdata = pd.read_html(url, header=0)
df = webdata[0]
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [4]:
#Clean up the data, ignore the data if Borough is not assigned.
df = df[df.Borough!='Not assigned']
df.sort_values(by='Borough', inplace=True)
df.groupby(by='Borough').size()
#df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Borough
Central Toronto      9
Downtown Toronto    19
East Toronto         5
East York            5
Etobicoke           12
Mississauga          1
North York          24
Scarborough         17
West Toronto         6
York                 5
dtype: int64

In [5]:
#Display the number of rows of the dataframe which is 103 rows and 3 columns
df.shape

(103, 3)

In [6]:
#Read the geographical coordinates from the csv file.

lat_lng_coords = pd.read_csv('Geospatial_Coordinates.csv')
lat_lng_coords.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [7]:
#Merge the original dataframe with the csv file based on the Postal Code column.
df_merge = df.merge(lat_lng_coords, on = 'Postal Code')

In [8]:
df_merge.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049
1,M4S,Central Toronto,Davisville,43.704324,-79.38879
2,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
3,M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307
4,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678


In [9]:
#After merge the dataframe, number of records reminds the same but 2 extra columns.
df_merge.shape

(103, 5)

In [10]:
import numpy as np

import json

#!conda install -c conda-forge geopy
#from geopy.geocoders import Nominatim

import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0
import folium

print('Libraries imported.')

Libraries imported.


In [11]:
#I am going to select Borough contains "Toronto" for this assignment
Toronto_data = df_merge[df_merge['Borough'].str.contains('Toronto', regex=False)]
Toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049
1,M4S,Central Toronto,Davisville,43.704324,-79.38879
2,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
3,M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307
4,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678


In [12]:
#Install the folium 
#pip install folium

In [13]:
import folium as folium

#Assign latitude and longitude of Central Toronto 
latitude = 43.686412
longitude = -79.400049

Toronto_data.shape

(39, 5)

In [14]:
#Create map of Toronto city using Latitude and Longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
                         
#Add markers to map
for lat, lng, borough, neighbourhood in zip(Toronto_data['Latitude'], Toronto_data['Longitude'], Toronto_data['Borough'], Toronto_data['Neighbourhood']):
    label ='{},{}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)

map_Toronto

In [15]:
#Define Foursquare Credentials and Version

CLIENT_ID = '2DNKE3IIDD0KWZVLRXDAFBYBCAXFOIL00NVVFUCZPD1DCC2I' # your Foursquare ID
CLIENT_SECRET = '004DNJPNQTBGZOJCJJN4GEERXGFQXFN5HHRAI2WN0JV50AJW' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)



Your credentails:
CLIENT_ID: 2DNKE3IIDD0KWZVLRXDAFBYBCAXFOIL00NVVFUCZPD1DCC2I
CLIENT_SECRET:004DNJPNQTBGZOJCJJN4GEERXGFQXFN5HHRAI2WN0JV50AJW


In [16]:
#Get the neighborthood's name
Toronto_data.loc[0, 'Neighbourhood']


'Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park'

In [17]:
neighbourhood_latitude = Toronto_data.loc[0, 'Latitude']
neighbourhood_longitude = Toronto_data.loc[0, 'Longitude']
neighbourhood_name = Toronto_data.loc[0, 'Neighbourhood']

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, neighbourhood_latitude, neighbourhood_longitude))

Latitude and longitude values of Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park are 43.68641229999999, -79.4000493.


In [18]:
#Get the top 100 venues that are in Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park within a radius of 500 meters
#First create the GET request URL and name it as url.

LIMIT = 100
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, CLIENT_SECRET, VERSION, neighbourhood_latitude, neighbourhood_longitude, radius, LIMIT)

url


'https://api.foursquare.com/v2/venues/explore?client_id=2DNKE3IIDD0KWZVLRXDAFBYBCAXFOIL00NVVFUCZPD1DCC2I&client_secret=004DNJPNQTBGZOJCJJN4GEERXGFQXFN5HHRAI2WN0JV50AJW&v=20180605&ll=43.68641229999999,-79.4000493&radius=500&limit=100'

In [19]:
#Send the GET request and examine the results
results = requests.get(url).json()
results


{'meta': {'code': 200, 'requestId': '5f3f30ee82a8164a55b417db'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Deer Park',
  'headerFullLocation': 'Deer Park, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 16,
  'suggestedBounds': {'ne': {'lat': 43.690912304499996,
    'lng': -79.39383797359734},
   'sw': {'lat': 43.68191229549999, 'lng': -79.40626062640267}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5a67afb973fe2528841f60f3',
       'name': 'The Market By Longo’s',
       'location': {'address': '111 St Clair Ave W',
        'lat': 43.686711,
        'lng': -79.399536,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.686711,


In [20]:
#Create function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    

In [21]:
#Ready to clean the json and structure it into a pandas dataframe.
venues = results['response']['groups'][0]['items']

nearby_venues = json_normalize(venues) #flatten JSON

#Filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

#Filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

#Clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,The Market By Longo’s,Supermarket,43.686711,-79.399536
1,LCBO,Liquor Store,43.686991,-79.399238
2,Union Social Eatery,American Restaurant,43.687895,-79.394916
3,Daeco Sushi,Sushi Restaurant,43.687838,-79.395652
4,Mary Be Kitchen,Restaurant,43.687708,-79.395062


In [22]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

16 venues were returned by Foursquare.


In [23]:
#Create a function to repeat the above same process to all the neighbourhoods in Toronto

def getNearbyVenues(names, latitudes, longitudes, radius=500):

    venues_list = []
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        
        #Create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        
        #Make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        #Return only relevant information for each nearby venue
        venues_list.append([(
            name,
            lat,
            lng,
            v['venue']['name'],
            v['venue']['location']['lat'],
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results])
        
        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)
    

In [24]:
Toronto_venues = getNearbyVenues(names=Toronto_data['Neighbourhood'],
                                 latitudes=Toronto_data['Latitude'],
                                 longitudes=Toronto_data['Longitude']
                                )

Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Davisville
Moore Park, Summerhill East
Forest Hill North & West, Forest Hill Road Park
The Annex, North Midtown, Yorkville
Davisville North
Roselawn
North Toronto West, Lawrence Park
Lawrence Park
Berczy Park
Christie
Central Bay Street
First Canadian Place, Underground city
Toronto Dominion Centre, Design Exchange
Church and Wellesley
University of Toronto, Harbord
Queen's Park, Ontario Provincial Government
Rosedale
Harbourfront East, Union Station, Toronto Islands
Stn A PO Boxes
St. James Town
Richmond, Adelaide, King
Regent Park, Harbourfront
Commerce Court, Victoria Hotel
St. James Town, Cabbagetown
Garden District, Ryerson
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Kensington Market, Chinatown, Grange Park
The Beaches
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Business reply mail Processing Centre, South Central Letter 

In [25]:
#Check the size of ther resulting dataframe and the top 20 records of Toronto_venues
print(Toronto_venues.shape)
Toronto_venues.head(20)

(1637, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,The Market By Longo’s,43.686711,-79.399536,Supermarket
1,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,LCBO,43.686991,-79.399238,Liquor Store
2,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Union Social Eatery,43.687895,-79.394916,American Restaurant
3,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Daeco Sushi,43.687838,-79.395652,Sushi Restaurant
4,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Mary Be Kitchen,43.687708,-79.395062,Restaurant
5,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Tim Hortons,43.687682,-79.39684,Coffee Shop
6,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Raiders E-Sports Centre,43.687683,-79.395944,Sports Bar
7,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Fionn MacCool's,43.687921,-79.394783,Pub
8,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Starbucks,43.686756,-79.398292,Coffee Shop
9,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Popeyes Louisiana Kitchen,43.6893,-79.395302,Fried Chicken Joint


In [26]:
#Show how many venues were returned for each neighbourhood
Toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,57,57,57,57,57,57
"Brockton, Parkdale Village, Exhibition Place",24,24,24,24,24,24
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",18,18,18,18,18,18
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",17,17,17,17,17,17
Central Bay Street,63,63,63,63,63,63
Christie,16,16,16,16,16,16
Church and Wellesley,76,76,76,76,76,76
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,34,34,34,34,34,34
Davisville North,8,8,8,8,8,8


In [27]:
#Show how many unique categories can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

There are 234 uniques categories.


In [28]:
Toronto_venues

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,The Market By Longo’s,43.686711,-79.399536,Supermarket
1,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,LCBO,43.686991,-79.399238,Liquor Store
2,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Union Social Eatery,43.687895,-79.394916,American Restaurant
3,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Daeco Sushi,43.687838,-79.395652,Sushi Restaurant
4,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Mary Be Kitchen,43.687708,-79.395062,Restaurant
...,...,...,...,...,...,...,...
1632,"Dufferin, Dovercourt Village",43.669005,-79.442259,TD Canada Trust,43.667934,-79.441698,Bank
1633,"Dufferin, Dovercourt Village",43.669005,-79.442259,Rexall,43.667504,-79.442086,Pharmacy
1634,"Dufferin, Dovercourt Village",43.669005,-79.442259,Food Basics,43.666886,-79.446691,Supermarket
1635,"Dufferin, Dovercourt Village",43.669005,-79.442259,Shoppers Drug Mart,43.666745,-79.447446,Pharmacy


In [29]:
#One hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

Toronto_onehot  #1628 rows X 234 columns


Unnamed: 0,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1632,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1633,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1634,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1635,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
#Add neighbourhood column back to the new dataframe, Toronto_onehot
Toronto_onehot['Neighbourhood'] = Toronto_venues['Neighbourhood']

Toronto_onehot.columns.get_loc("Neighbourhood")
#Toronto_onehot.shape

234

In [31]:
Toronto_onehot.head()

Unnamed: 0,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio,Neighbourhood
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Summerhill West, Rathnelly, South Hill, Forest..."
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Summerhill West, Rathnelly, South Hill, Forest..."
2,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,"Summerhill West, Rathnelly, South Hill, Forest..."
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Summerhill West, Rathnelly, South Hill, Forest..."
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Summerhill West, Rathnelly, South Hill, Forest..."


In [32]:
mid = Toronto_onehot['Neighbourhood']
mid

0       Summerhill West, Rathnelly, South Hill, Forest...
1       Summerhill West, Rathnelly, South Hill, Forest...
2       Summerhill West, Rathnelly, South Hill, Forest...
3       Summerhill West, Rathnelly, South Hill, Forest...
4       Summerhill West, Rathnelly, South Hill, Forest...
                              ...                        
1632                         Dufferin, Dovercourt Village
1633                         Dufferin, Dovercourt Village
1634                         Dufferin, Dovercourt Village
1635                         Dufferin, Dovercourt Village
1636                         Dufferin, Dovercourt Village
Name: Neighbourhood, Length: 1637, dtype: object

In [33]:
Toronto_onehot.drop(labels=['Neighbourhood'], axis=1, inplace=True)
Toronto_onehot.insert(0, 'Neighbourhood', mid)
Toronto_onehot.head()

Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,"Summerhill West, Rathnelly, South Hill, Forest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Summerhill West, Rathnelly, South Hill, Forest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Summerhill West, Rathnelly, South Hill, Forest...",0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Summerhill West, Rathnelly, South Hill, Forest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Summerhill West, Rathnelly, South Hill, Forest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
Toronto_onehot.shape

(1637, 235)

In [35]:
#Let's group rows by neighbourhood and by taking the mean of the frequency of occurrence of each category
Toronto_grouped = Toronto_onehot.groupby('Neighbourhood').mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.058824,0.058824,0.058824,0.058824,0.176471,0.117647,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.015873,0.0,0.015873
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.013158,0.0,0.0,...,0.013158,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
#Confirm the new size
print('Toronto_group.shape: ', Toronto_grouped.shape)

tempCol = Toronto_grouped.columns.T
print('tempCol: ', tempCol)
print('tempCol.shape: ', tempCol.shape)
tempCol.columns = ['venue', 'freq']

print('tempCol: ', tempCol)

Toronto_group.shape:  (39, 235)
tempCol:  Index(['Neighbourhood', 'Airport', 'Airport Food Court', 'Airport Gate',
       'Airport Lounge', 'Airport Service', 'Airport Terminal',
       'American Restaurant', 'Antique Shop', 'Aquarium',
       ...
       'Theme Restaurant', 'Toy / Game Store', 'Trail', 'Train Station',
       'Vegetarian / Vegan Restaurant', 'Video Game Store',
       'Vietnamese Restaurant', 'Wine Bar', 'Women's Store', 'Yoga Studio'],
      dtype='object', length=235)
tempCol.shape:  (235,)
tempCol:  Index(['Neighbourhood', 'Airport', 'Airport Food Court', 'Airport Gate',
       'Airport Lounge', 'Airport Service', 'Airport Terminal',
       'American Restaurant', 'Antique Shop', 'Aquarium',
       ...
       'Theme Restaurant', 'Toy / Game Store', 'Trail', 'Train Station',
       'Vegetarian / Vegan Restaurant', 'Video Game Store',
       'Vietnamese Restaurant', 'Wine Bar', 'Women's Store', 'Yoga Studio'],
      dtype='object', length=235)


In [37]:
#Let's print each neighbourhood along with the top 5 most common venues
num_top_venues = 5

for hood in Toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')
    
    

----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1          Restaurant  0.04
2            Beer Bar  0.04
3        Cocktail Bar  0.04
4  Seafood Restaurant  0.04


----Brockton, Parkdale Village, Exhibition Place----
                   venue  freq
0                   Café  0.12
1         Breakfast Spot  0.08
2  Performing Arts Venue  0.08
3              Nightclub  0.08
4            Coffee Shop  0.08


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
                venue  freq
0  Light Rail Station  0.11
1         Yoga Studio  0.06
2       Auto Workshop  0.06
3             Brewery  0.06
4          Skate Park  0.06


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
              venue  freq
0   Airport Service  0.18
1  Airport Terminal  0.12
2           Airport  0.06
3               Bar  0.06
4  Sculpture Garden  0.06


----Central Bay Street----
      

In [38]:
#Create function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [39]:
#Let's create the new dataframe and display the top 10 venues for each neighbourhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = Toronto_grouped['Neighbourhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Beer Bar,Cocktail Bar,Bakery,Seafood Restaurant,Farmers Market,Cheese Shop,Café,Restaurant,Japanese Restaurant
1,"Brockton, Parkdale Village, Exhibition Place",Café,Breakfast Spot,Nightclub,Performing Arts Venue,Coffee Shop,Convenience Store,Climbing Gym,Burrito Place,Restaurant,Italian Restaurant
2,"Business reply mail Processing Centre, South C...",Light Rail Station,Yoga Studio,Smoke Shop,Auto Workshop,Brewery,Burrito Place,Butcher,Comic Shop,Farmers Market,Fast Food Restaurant
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Terminal,Airport,Bar,Coffee Shop,Plane,Rental Car Location,Sculpture Garden,Boat or Ferry,Boutique
4,Central Bay Street,Coffee Shop,Sandwich Place,Italian Restaurant,Café,Burger Joint,Japanese Restaurant,Bubble Tea Shop,Salad Place,Restaurant,Ramen Restaurant


In [40]:
Toronto_grouped.head()

Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.058824,0.058824,0.058824,0.058824,0.176471,0.117647,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.015873,0.0,0.015873


In [41]:
#Run k-means clustering

#Set number of clusters
kclusters = 5

Toronto_grouped_clu = Toronto_grouped.drop('Neighbourhood', 1)
#print('Toronto_grouped_clu: ', Toronto_grouped_clu)

#Run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clu)

#Check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [42]:
#Create a new dataframe that includes the cluster as well as the top 10 venues

#Add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)


In [43]:
Toronto_merged = Toronto_data

#Merge Toronto_grouped with Toronto_data to add latitude/longitude for each neighbourhood
Toronto_merged = Toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

#print('Toronto_merged: ', Toronto_merged.head())
#print('neighbourhoods_venues_sorted: ', neighbourhoods_venues_sorted.head())
Toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,0,Pub,Coffee Shop,Bagel Shop,Pizza Place,Liquor Store,Restaurant,Sports Bar,Bank,Supermarket,Sushi Restaurant
1,M4S,Central Toronto,Davisville,43.704324,-79.38879,0,Sandwich Place,Dessert Shop,Pizza Place,Italian Restaurant,Café,Gym,Coffee Shop,Sushi Restaurant,Pharmacy,Seafood Restaurant
2,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,2,Restaurant,Park,Tennis Court,Yoga Studio,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
3,M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307,0,Jewelry Store,Trail,Mexican Restaurant,Sushi Restaurant,Yoga Studio,Diner,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
4,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,0,Café,Sandwich Place,Coffee Shop,Liquor Store,Pharmacy,Pub,Middle Eastern Restaurant,BBQ Joint,Pizza Place,History Museum


In [44]:
#Drop the Postal Code column
Toronto_merged = Toronto_merged.drop('Postal Code', 1)

In [45]:
Toronto_merged

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,0,Pub,Coffee Shop,Bagel Shop,Pizza Place,Liquor Store,Restaurant,Sports Bar,Bank,Supermarket,Sushi Restaurant
1,Central Toronto,Davisville,43.704324,-79.38879,0,Sandwich Place,Dessert Shop,Pizza Place,Italian Restaurant,Café,Gym,Coffee Shop,Sushi Restaurant,Pharmacy,Seafood Restaurant
2,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,2,Restaurant,Park,Tennis Court,Yoga Studio,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
3,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307,0,Jewelry Store,Trail,Mexican Restaurant,Sushi Restaurant,Yoga Studio,Diner,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
4,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,0,Café,Sandwich Place,Coffee Shop,Liquor Store,Pharmacy,Pub,Middle Eastern Restaurant,BBQ Joint,Pizza Place,History Museum
5,Central Toronto,Davisville North,43.712751,-79.390197,0,Gym / Fitness Center,Hotel,Pizza Place,Department Store,Sandwich Place,Breakfast Spot,Food & Drink Shop,Park,Gay Bar,Electronics Store
6,Central Toronto,Roselawn,43.711695,-79.416936,1,Home Service,Garden,Yoga Studio,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
7,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678,0,Clothing Store,Coffee Shop,Yoga Studio,Sporting Goods Shop,Café,Chinese Restaurant,Diner,Fast Food Restaurant,Ice Cream Shop,Mexican Restaurant
8,Central Toronto,Lawrence Park,43.72802,-79.38879,3,Park,Bus Line,Swim School,Yoga Studio,Diner,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
9,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Beer Bar,Cocktail Bar,Bakery,Seafood Restaurant,Farmers Market,Cheese Shop,Café,Restaurant,Japanese Restaurant


In [46]:
#Drop the row with the NaN values.
Toronto_merged = Toronto_merged.dropna()

#Change Cluster Labels in Toronto_merged from float to int
Toronto_merged['Cluster Labels'] = Toronto_merged['Cluster Labels'].astype(int)

#Visualize the resulting clusters
map_clusters = folium.Map(location=[latitude, longitude], zoom_start = 11)

#Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

#Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighbourhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
    
map_clusters    


In [47]:
#Examine clusters

#Cluster 1
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Summerhill West, Rathnelly, South Hill, Forest...",Pub,Coffee Shop,Bagel Shop,Pizza Place,Liquor Store,Restaurant,Sports Bar,Bank,Supermarket,Sushi Restaurant
1,Davisville,Sandwich Place,Dessert Shop,Pizza Place,Italian Restaurant,Café,Gym,Coffee Shop,Sushi Restaurant,Pharmacy,Seafood Restaurant
3,"Forest Hill North & West, Forest Hill Road Park",Jewelry Store,Trail,Mexican Restaurant,Sushi Restaurant,Yoga Studio,Diner,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
4,"The Annex, North Midtown, Yorkville",Café,Sandwich Place,Coffee Shop,Liquor Store,Pharmacy,Pub,Middle Eastern Restaurant,BBQ Joint,Pizza Place,History Museum
5,Davisville North,Gym / Fitness Center,Hotel,Pizza Place,Department Store,Sandwich Place,Breakfast Spot,Food & Drink Shop,Park,Gay Bar,Electronics Store
7,"North Toronto West, Lawrence Park",Clothing Store,Coffee Shop,Yoga Studio,Sporting Goods Shop,Café,Chinese Restaurant,Diner,Fast Food Restaurant,Ice Cream Shop,Mexican Restaurant
9,Berczy Park,Coffee Shop,Beer Bar,Cocktail Bar,Bakery,Seafood Restaurant,Farmers Market,Cheese Shop,Café,Restaurant,Japanese Restaurant
10,Christie,Grocery Store,Café,Park,Nightclub,Italian Restaurant,Diner,Restaurant,Candy Store,Baby Store,Coffee Shop
11,Central Bay Street,Coffee Shop,Sandwich Place,Italian Restaurant,Café,Burger Joint,Japanese Restaurant,Bubble Tea Shop,Salad Place,Restaurant,Ramen Restaurant
12,"First Canadian Place, Underground city",Coffee Shop,Café,Hotel,Gym,Japanese Restaurant,Restaurant,Seafood Restaurant,American Restaurant,Steakhouse,Asian Restaurant


In [48]:
#Cluster 2
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Roselawn,Home Service,Garden,Yoga Studio,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


In [49]:
#Cluster 3
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,"Moore Park, Summerhill East",Restaurant,Park,Tennis Court,Yoga Studio,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
17,Rosedale,Park,Playground,Trail,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


In [50]:
#Cluster 4
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Lawrence Park,Park,Bus Line,Swim School,Yoga Studio,Diner,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


In [51]:
#Cluster 5
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
28,The Beaches,Pub,Health Food Store,Neighborhood,Trail,Yoga Studio,Doner Restaurant,Discount Store,Distribution Center,Dog Run,Dumpling Restaurant
