In [1]:
#Import Library
import pandas as pd # For creating and manipulating dataframe
import numpy as np # For scientific compiting

import requests #  To handle requests


In [2]:
import urllib.request # For dowloading files from the internet
                        #Similar to wget

In [3]:
# Downloading the html code for the wiki page
filename='toronto.txt'

#New page updateed
path='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

#Using old page
#path='https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&direction=prev&oldid=926287641'
urllib.request.urlretrieve(path,filename)


('toronto.txt', <http.client.HTTPMessage at 0x1bd056b5790>)

In [4]:
# Reading the table from html(.txt) file
table_toronto=pd.read_html(filename)


In [5]:
#Selecting the desired table
df=table_toronto[0]
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180 entries, 0 to 179
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Postal Code    180 non-null    object
 1   Borough        180 non-null    object
 2   Neighbourhood  180 non-null    object
dtypes: object(3)
memory usage: 4.3+ KB


In [7]:
#Check if the dataframe has miising values
df['Borough'].isnull().value_counts()

False    180
Name: Borough, dtype: int64

In [8]:
#Check if the dataframe has miising values
df['Neighbourhood'].isnull().value_counts()

False    180
Name: Neighbourhood, dtype: int64

In [9]:
#process the cells that have an assigned borough by filtering the dataframe
df=df[df['Borough']!='Not assigned']

In [10]:
df.head(3)

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [11]:
df['Borough'].value_counts()

North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East Toronto         5
East York            5
York                 5
Mississauga          1
Name: Borough, dtype: int64

In [12]:
df['Borough'].unique()

array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

In [13]:
#Arranging neighborhood on basis of Postal Code and Borough
df=df.groupby(by=['Postal Code','Borough'])['Neighbourhood'].apply(list)

In [14]:
df

Postal Code  Borough    
M1B          Scarborough                                     [Malvern, Rouge]
M1C          Scarborough             [Rouge Hill, Port Union, Highland Creek]
M1E          Scarborough                  [Guildwood, Morningside, West Hill]
M1G          Scarborough                                             [Woburn]
M1H          Scarborough                                          [Cedarbrae]
                                                  ...                        
M9N          York                                                    [Weston]
M9P          Etobicoke                                            [Westmount]
M9R          Etobicoke      [Kingsview Village, St. Phillips, Martin Grove...
M9V          Etobicoke      [South Steeles, Silverstone, Humbergate, James...
M9W          Etobicoke                  [Northwest, West Humber - Clairville]
Name: Neighbourhood, Length: 103, dtype: object

In [15]:
df=df.to_frame()

In [16]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighbourhood
Postal Code,Borough,Unnamed: 2_level_1
M1B,Scarborough,"[Malvern, Rouge]"
M1C,Scarborough,"[Rouge Hill, Port Union, Highland Creek]"
M1E,Scarborough,"[Guildwood, Morningside, West Hill]"
M1G,Scarborough,[Woburn]
M1H,Scarborough,[Cedarbrae]


In [17]:
df=df.reset_index()

In [18]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"[Malvern, Rouge]"
1,M1C,Scarborough,"[Rouge Hill, Port Union, Highland Creek]"
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]"
3,M1G,Scarborough,[Woburn]
4,M1H,Scarborough,[Cedarbrae]


In [19]:
#Defining function to convert list to string
def string_op(list):
    string=''
    for i in list:
        string= string + ',' + i
    return(string[1:])

In [20]:
df['Neighbourhood']=df['Neighbourhood'].apply (string_op)

# 1-Answer to Question 1 Final Dataframe

In [21]:
df.head(20)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [22]:
print('The number of rows of dataframe = {}  '.format(df.shape[0]))

The number of rows of dataframe = 103  


In [23]:
df['Postal Code']

0      M1B
1      M1C
2      M1E
3      M1G
4      M1H
      ... 
98     M9N
99     M9P
100    M9R
101    M9V
102    M9W
Name: Postal Code, Length: 103, dtype: object

In [24]:
# Downloading the lat and long coordinates from the link
filename1='toronto_latlong.csv'

#New page updateed
path1='http://cocl.us/Geospatial_data'

urllib.request.urlretrieve(path1,filename1)

('toronto_latlong.csv', <http.client.HTTPMessage at 0x1bd05bdf580>)

In [25]:
#Couldnot get the data from the Google Maps Geocoding API,so using the link in the lab
#to get the information about latitude and longitude
df_latlong=pd.read_csv(filename1)
df_latlong.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [26]:
df_latlong.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103 entries, 0 to 102
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Postal Code  103 non-null    object 
 1   Latitude     103 non-null    float64
 2   Longitude    103 non-null    float64
dtypes: float64(2), object(1)
memory usage: 2.5+ KB


In [27]:
print('The number of rows of dataframe = {}  '.format(df_latlong.shape[0]))

The number of rows of dataframe = 103  


In [28]:
#Checking whteher the two dataframes df and df_latlong have same elecments in Postal Code
flag=0
for i,j in zip(df['Postal Code'],df_latlong['Postal Code']):
    if (i!= j):
        flag=1
if flag==0:
    print('All elemnets of postal code are same')
else:
    print('Some elemnets of postal code are not same')

All elemnets of postal code are same


# 2-Answer to Question 2 Final Dataframe

In [29]:
#Merging the dataframes
df_merge=pd.merge(df, df_latlong, on=['Postal Code', 'Postal Code'])
df_merge.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [30]:
df_merge.shape

(103, 5)

In [31]:
# Using geopy library to get the latitude and longitude values of Toronto City

#Import Library
from geopy.geocoders import Nominatim

In [32]:
# Finding lat and long of Toronto 

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode("Toronto, Canada")
latitude=location.latitude
longitude=location.longitude
print("The latitude={} and longitude={} for Toronto city".format(latitude,longitude))

The latitude=43.6534817 and longitude=-79.3839347 for Toronto city


In [33]:
#For using map import folium library
import folium

In [34]:
# create map of Toronto using latitude and longitude values
toronto_map= folium.Map((latitude,longitude),
                       zoom_start=10)
toronto_map

In [35]:
# create map of Toronto using latitude and longitude values
toronto_map1= folium.Map((latitude,longitude),
                       zoom_start=10)
# add markers to map
for lat, lng, borough, neighborhood in zip(df_merge["Latitude"],df_merge["Longitude"],df_merge['Borough'],df_merge["Neighbourhood"]):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map1) 
    
toronto_map1

# Question 3 Segmenting
# 1-Explore the first neighborhood in our dataframe M1B,Scarborough	
Getting the top 100 venues that are in  Scarborough	within a radius of 500 meters

In [36]:
 df_merge.head

<bound method NDFrame.head of     Postal Code      Borough  \
0           M1B  Scarborough   
1           M1C  Scarborough   
2           M1E  Scarborough   
3           M1G  Scarborough   
4           M1H  Scarborough   
..          ...          ...   
98          M9N         York   
99          M9P    Etobicoke   
100         M9R    Etobicoke   
101         M9V    Etobicoke   
102         M9W    Etobicoke   

                                         Neighbourhood   Latitude  Longitude  
0                                       Malvern, Rouge  43.806686 -79.194353  
1               Rouge Hill, Port Union, Highland Creek  43.784535 -79.160497  
2                    Guildwood, Morningside, West Hill  43.763573 -79.188711  
3                                               Woburn  43.770992 -79.216917  
4                                            Cedarbrae  43.773136 -79.239476  
..                                                 ...        ...        ...  
98                              

In [37]:
Latitude=df_merge.loc[0,'Latitude']
Longitude=df_merge.loc[0,'Longitude']

In [38]:
#Define Foursquare Credentials and Version in hidden cell

In [40]:
#Creating url 
radius=1000
url= 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
CLIENT_ID,
    CLIENT_SECRET,
    VERSION,
    Latitude,
    Longitude,
    radius,
    LIMIT)

url

'https://api.foursquare.com/v2/venues/explore?&client_id=VBJ0FUWSMBYSYYOP5ZZUVFINI0RANKLNXZGNUPXIODTBYMDR&client_secret=QS2ADGSYHEYX3E5O2M5KBH5XDJT4KDQWLIYXXVP0WXJ4YM0Y&v=20180604&ll=43.806686299999996,-79.19435340000001&radius=1000&limit=30'

In [41]:
# Library for hadling requests
import requests

In [42]:
#Make an API call to foursquare and det the response
result=requests.get(url)
result

<Response [200]>

In [43]:
result=requests.get(url).json()
result

{'meta': {'code': 200, 'requestId': '5fdaaffa9df2156adb6e0bb2'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Malvern',
  'headerFullLocation': 'Malvern, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 18,
  'suggestedBounds': {'ne': {'lat': 43.81568630900001,
    'lng': -79.18190576146081},
   'sw': {'lat': 43.797686290999984, 'lng': -79.20680103853921}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4d669cba83865481c948fa53',
       'name': 'Images Salon & Spa',
       'location': {'address': '8130 Sheppard Ave E',
        'crossStreet': 'Morningside Ave',
        'lat': 43.80228301948931,
        'lng': -79.19856472801668,
        'labeledLatLngs'

In [44]:
i=0
result['response']['groups'][0]['items'][i]['venue']

{'id': '4d669cba83865481c948fa53',
 'name': 'Images Salon & Spa',
 'location': {'address': '8130 Sheppard Ave E',
  'crossStreet': 'Morningside Ave',
  'lat': 43.80228301948931,
  'lng': -79.19856472801668,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.80228301948931,
    'lng': -79.19856472801668}],
  'distance': 595,
  'postalCode': 'M1B 3W3',
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['8130 Sheppard Ave E (Morningside Ave)',
   'Toronto ON M1B 3W3',
   'Canada']},
 'categories': [{'id': '4bf58dd8d48988d1ed941735',
   'name': 'Spa',
   'pluralName': 'Spas',
   'shortName': 'Spa',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/spa_',
    'suffix': '.png'},
   'primary': True}],
 'photos': {'count': 0, 'groups': []}}

In [45]:
type(result['response']['groups'][0]['items'])

list

In [46]:
print('length of the result list ={}'.format(len((result['response']['groups'][0]['items']))))

length of the result list =18


In [47]:
# From the result we need to create name, category,  latitude and longitude and create a dataframe
i=0
name= result['response']['groups'][0]['items'][i]['venue'] ['name']
#print(name)
category= result['response']['groups'][0]['items'][i]['venue']['categories'][0]['name']
#category
latitude= result['response']['groups'][0]['items'][i]['venue']['location']['lat']
#latitude
longitude=result['response']['groups'][0]['items'][i]['venue']['location']['lng'] 
longitude

-79.19856472801668

In [48]:
# Create a dataframe
column_name= ['Name','Category','Latitude','Longitude']
df_venue= pd.DataFrame (columns=column_name)

for i in range( len(result['response']['groups'][0]['items']) ):
               name= result['response']['groups'][0]['items'][i]['venue'] ['name']
               category= result['response']['groups'][0]['items'][i]['venue']['categories'][0]['name']
               latitude= result['response']['groups'][0]['items'][i]['venue']['location']['lat']
               longitude=result['response']['groups'][0]['items'][i]['venue']['location']['lng']
               df_venue=df_venue.append({'Name': name, 'Category':category , 'Latitude' : latitude, 'Longitude' : longitude},ignore_index=True)

               
df_venue.head()


Unnamed: 0,Name,Category,Latitude,Longitude
0,Images Salon & Spa,Spa,43.802283,-79.198565
1,Harvey's,Restaurant,43.80002,-79.198307
2,Wendy's,Fast Food Restaurant,43.802008,-79.19808
3,RBC Royal Bank,Bank,43.798782,-79.19709
4,Wendy’s,Fast Food Restaurant,43.807448,-79.199056


# Question 3 Segmenting
# 2-Explore all Neighborhoods in Totonto 

In [49]:
df_merge

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [50]:
# Create a function that gets the venues of particulat location based on lat and long
def getNearbyVenues(names, latitudes, longitudes, radius):
    # Create a dataframe
    column_name= ['Neighbourhood','Neighborhood Latitude','Neighborhood Longitude','Venue Name','Venue Category','Venue Latitude','Venue Longitude']
    #column_name= ['Venue Name','Venue Category','Venue Latitude','Venue Longitude']
    df_venue= pd.DataFrame (columns=column_name)
    
    for (name,latitude,longitude) in zip(names, latitudes, longitudes):
        #print(name,latitude,longitude)     
        #Creating url 
        url= 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID,CLIENT_SECRET,VERSION,latitude,longitude,radius,LIMIT)
        #Make an API call to foursquare and det the response
        result=requests.get(url).json()
        
        
        for i in range( len(result['response']['groups'][0]['items']) ):
            neighbourhood_name= name
            neighborhood_latitude= latitude
            neighborhood_longitude= longitude
            
            venue_name= result['response']['groups'][0]['items'][i]['venue'] ['name']
            venue_category= result['response']['groups'][0]['items'][i]['venue']['categories'][0]['name']
            venue_latitude= result['response']['groups'][0]['items'][i]['venue']['location']['lat']
            venue_longitude=result['response']['groups'][0]['items'][i]['venue']['location']['lng']
            df_venue=df_venue.append({'Neighbourhood':name, 'Neighborhood Latitude' : latitude,
                                      'Neighborhood Longitude': longitude,
                                      'Venue Name': venue_name, 'Venue Category':venue_category,
                                      'Venue Latitude' : venue_latitude, 'Venue Longitude' : venue_longitude},
                                     ignore_index=True)
    return (df_venue)


    
            
 
        


In [52]:
#Calling the above function on each neighborhood and create a new dataframe called toronto_venues.
#toronto_venues= getNearbyVenues(df_merge['Borough'], df_merge['Latitude'], df_merge['Longitude'], radius=700) 
toronto_venues= getNearbyVenues(df_merge['Neighbourhood'], df_merge['Latitude'], df_merge['Longitude'], radius=700)

In [54]:
toronto_venues.head()

Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Category,Venue Latitude,Venue Longitude
0,"Malvern, Rouge",43.806686,-79.194353,Images Salon & Spa,Spa,43.802283,-79.198565
1,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,Fast Food Restaurant,43.807448,-79.199056
2,"Malvern, Rouge",43.806686,-79.194353,Wendy's,Fast Food Restaurant,43.802008,-79.19808
3,"Malvern, Rouge",43.806686,-79.194353,Tim Hortons,Coffee Shop,43.802,-79.198169
4,"Malvern, Rouge",43.806686,-79.194353,Lee Valley,Hobby Shop,43.803161,-79.199681


In [55]:
# Size of the resulting dataframe
toronto_venues.shape

(1847, 7)

In [56]:
#Total Venues that were returned for each neighborhood
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Category,Venue Latitude,Venue Longitude
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,13,13,13,13,13,13
"Alderwood, Long Branch",10,10,10,10,10,10
"Bathurst Manor, Wilson Heights, Downsview North",23,23,23,23,23,23
Bayview Village,8,8,8,8,8,8
"Bedford Park, Lawrence Manor East",29,29,29,29,29,29
...,...,...,...,...,...,...
"Willowdale, Willowdale West",6,6,6,6,6,6
Woburn,7,7,7,7,7,7
Woodbine Heights,10,10,10,10,10,10
York Mills West,8,8,8,8,8,8


In [57]:
#Unique categories that can be obtained from returned venues
print('There are {} unique venue categories.'.format(len(toronto_venues['Venue Category'].unique())))
print ('Examples of unique venue categories are={}'.format(toronto_venues['Venue Category'].unique()[0:10]))

There are 268 unique venue categories.
Examples of unique venue categories are=['Spa' 'Fast Food Restaurant' 'Coffee Shop' 'Hobby Shop' 'Burger Joint'
 'Breakfast Spot' 'Bar' 'Fried Chicken Joint' 'Restaurant' 'Bank']


# Question 3 Segmenting
# 3-Analyze Each Neighborhood by display the top 5 venues category for each neighborhood

In [58]:
toronto_venues.head()

Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Category,Venue Latitude,Venue Longitude
0,"Malvern, Rouge",43.806686,-79.194353,Images Salon & Spa,Spa,43.802283,-79.198565
1,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,Fast Food Restaurant,43.807448,-79.199056
2,"Malvern, Rouge",43.806686,-79.194353,Wendy's,Fast Food Restaurant,43.802008,-79.19808
3,"Malvern, Rouge",43.806686,-79.194353,Tim Hortons,Coffee Shop,43.802,-79.198169
4,"Malvern, Rouge",43.806686,-79.194353,Lee Valley,Hobby Shop,43.803161,-79.199681


In [59]:
#Converting  Categorical variable to numerical values
# one hot encoding
toronto_onehot=pd.get_dummies(toronto_venues[['Venue Category']],prefix='',prefix_sep='')
toronto_onehot.head()

Unnamed: 0,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [60]:
#Adding Neighbourhood to toronto_onehot dataframe
toronto_onehot[['Neighbourhood']]= toronto_venues[['Neighbourhood']]


#Assigning neighbourhood name as first column
fixed_columns= [(toronto_onehot.columns[-1])] + list(toronto_onehot.columns[0:len(toronto_onehot.columns)-1])
toronto_onehot=toronto_onehot[fixed_columns]

toronto_onehot


Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1842,"South Steeles, Silverstone, Humbergate, Jamest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1843,"Northwest, West Humber - Clairville",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1844,"Northwest, West Humber - Clairville",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1845,"Northwest, West Humber - Clairville",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [61]:
#Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [62]:
#toronto_group=toronto_onehot.groupby(by=['Neighbourhood Name']).mean().reset_index()
toronto_group=toronto_onehot.groupby(by=['Neighbourhood']).mean()

toronto_group.head(10)

Unnamed: 0_level_0,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,...,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",0.0,0.0,0.038462,0.038462,0.038462,0.076923,0.076923,0.076923,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [63]:
#Let's print each neighborhood along with the top 5 most common venue category

for i,name in enumerate(toronto_group.index.values.tolist()):
    print("-----{}-{}-----".format(i,name))
    temp=toronto_group.loc[name,:].sort_values(ascending=False).reset_index().head()
    temp.columns=['venue','freq']
    print(temp)



-----0-Agincourt-----
                       venue      freq
0                     Lounge  0.076923
1                Coffee Shop  0.076923
2             Sandwich Place  0.076923
3        Shanghai Restaurant  0.076923
4  Latin American Restaurant  0.076923
-----1-Alderwood, Long Branch-----
               venue  freq
0  Convenience Store   0.2
1        Pizza Place   0.2
2                Gym   0.1
3        Gas Station   0.1
4        Coffee Shop   0.1
-----2-Bathurst Manor, Wilson Heights, Downsview North-----
              venue      freq
0              Bank  0.086957
1       Coffee Shop  0.086957
2          Pharmacy  0.043478
3     Deli / Bodega  0.043478
4  Sushi Restaurant  0.043478
-----3-Bayview Village-----
                 venue   freq
0                 Bank  0.250
1   Chinese Restaurant  0.125
2                 Café  0.125
3  Japanese Restaurant  0.125
4         Skating Rink  0.125
-----4-Bedford Park, Lawrence Manor East-----
                venue      freq
0         Coffee Shop

4  Mediterranean Restaurant  0.033333
-----65-Richmond, Adelaide, King-----
                 venue      freq
0          Coffee Shop  0.100000
1           Restaurant  0.066667
2                 Café  0.066667
3  American Restaurant  0.066667
4   Seafood Restaurant  0.033333
-----66-Rosedale-----
         venue  freq
0         Park   0.6
1   Playground   0.2
2        Trail   0.2
3  Escape Room   0.0
4      Dog Run   0.0
-----67-Roselawn-----
          venue      freq
0    Playground  0.166667
1  Home Service  0.166667
2   Music Venue  0.166667
3     Pet Store  0.166667
4        Garden  0.166667
-----68-Rouge Hill, Port Union, Highland Creek-----
                venue  freq
0      Breakfast Spot  0.50
1        Burger Joint  0.25
2                 Bar  0.25
3         Yoga Studio  0.00
4  Falafel Restaurant  0.00
-----69-Runnymede, Swansea-----
                venue      freq
0  Italian Restaurant  0.100000
1                Café  0.100000
2         Coffee Shop  0.066667
3  Falafel Restauran

In [64]:
 #let's create the new dataframe and display the top 10 venues for each neighborhood.
column_names=['Neighbourhood']
indicators=['st','nd','rd']

for i in range(10):
    
    try:
        column_names.append('{}{} most common venue'.format(i+1,indicators[i]))   
    except:
        column_names.append('{}th most common venue'.format(i+1))



#Create dataframe
Neighbourhood_venues_sorted=pd.DataFrame(columns=column_names)
Neighbourhood_venues_sorted['Neighbourhood']= toronto_group.index #toronto_group.index.values.tolist() returns list

Neighbourhood_venues_sorted

Unnamed: 0,Neighbourhood,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,Agincourt,,,,,,,,,,
1,"Alderwood, Long Branch",,,,,,,,,,
2,"Bathurst Manor, Wilson Heights, Downsview North",,,,,,,,,,
3,Bayview Village,,,,,,,,,,
4,"Bedford Park, Lawrence Manor East",,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
93,"Willowdale, Willowdale West",,,,,,,,,,
94,Woburn,,,,,,,,,,
95,Woodbine Heights,,,,,,,,,,
96,York Mills West,,,,,,,,,,


In [65]:
for i,name in enumerate(toronto_group.index.values.tolist()):
    # Get top 10 venues
    temp=toronto_group.loc[name,:].sort_values(ascending=False).reset_index().head(10)
    temp.columns=['venue','freq']
    
    #Filling the dataframe one row at a time
    Neighbourhood_venues_sorted.iloc[i][1:]=temp['venue']

#Displaying the dataframe
Neighbourhood_venues_sorted


Unnamed: 0,Neighbourhood,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,Agincourt,Lounge,Coffee Shop,Sandwich Place,Shanghai Restaurant,Latin American Restaurant,Newsagent,Motorcycle Shop,Clothing Store,Badminton Court,Skating Rink
1,"Alderwood, Long Branch",Convenience Store,Pizza Place,Gym,Gas Station,Coffee Shop,Sandwich Place,Pharmacy,Pub,Coworking Space,Distribution Center
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Pharmacy,Deli / Bodega,Sushi Restaurant,Diner,Fried Chicken Joint,Frozen Yogurt Shop,Bridal Shop,Chinese Restaurant
3,Bayview Village,Bank,Chinese Restaurant,Café,Japanese Restaurant,Skating Rink,Grocery Store,Intersection,Yoga Studio,Electronics Store,Drugstore
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Cosmetics Shop,Sushi Restaurant,Restaurant,Bank,Bakery,Bagel Shop,Thai Restaurant
...,...,...,...,...,...,...,...,...,...,...,...
93,"Willowdale, Willowdale West",Coffee Shop,Butcher,Pizza Place,Pharmacy,Grocery Store,Park,Escape Room,Donut Shop,Drugstore,Eastern European Restaurant
94,Woburn,Coffee Shop,Park,Convenience Store,Business Service,Construction & Landscaping,Farm,Falafel Restaurant,Event Space,Farmers Market,Dog Run
95,Woodbine Heights,Park,Skating Rink,Beer Store,Intersection,Athletics & Sports,Curling Ice,Dance Studio,Bus Line,Electronics Store,Escape Room
96,York Mills West,Park,Convenience Store,Gym,Intersection,Pet Store,Bowling Alley,Curling Ice,Dog Run,Field,Fast Food Restaurant


# Question 3 Clustering Neighborhood

Run k-means to cluster the neighborhood into 5 clusters.

In [66]:
# Displaying the dataframe where we will run kmeans algorithm
toronto_group


Unnamed: 0_level_0,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,...,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Willowdale, Willowdale West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
York Mills West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [67]:
 #Dropping the Neighbourhood Name column
#toronto_df=toronto_group.drop('Neighbourhood Name',axis='columns')
#toronto_df

In [68]:
#Importing library
from sklearn.cluster import KMeans 

In [69]:
#Initialize KMeans with 5 clusters
kclusters = 5
k_means = KMeans(init="k-means++", n_clusters=kclusters, n_init=10, random_state=0)

#fit the KMeans model 
k_means.fit(toronto_group)

#grab the labels for each point in the model using KMeans .labels_ attribute 
k_means_labels = k_means.labels_
k_means_labels

array([4, 4, 4, 4, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 4,
       0, 0, 4, 0, 0, 4, 0, 0, 1, 0, 4, 4, 4, 0, 0, 4, 4, 4, 0, 0, 4, 0,
       0, 4, 0, 1, 0, 0, 0, 4, 0, 1, 0, 1, 0, 4, 0, 3, 0, 4, 1, 0, 0, 0,
       1, 4, 4, 0, 4, 4, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 4, 4,
       4, 4, 4, 1, 0, 4, 1, 1, 1, 2])

In [70]:
#Find out the number of labels generated which must be same as number of rows of processed dataframe
len(k_means_labels)

98

In [71]:
# add clustering labels to Neighbourhood_venues_sorted
Neighbourhood_venues_sorted.insert(0, 'Cluster Labels', k_means_labels)



In [72]:

Neighbourhood_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighbourhood,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,4,Agincourt,Lounge,Coffee Shop,Sandwich Place,Shanghai Restaurant,Latin American Restaurant,Newsagent,Motorcycle Shop,Clothing Store,Badminton Court,Skating Rink
1,4,"Alderwood, Long Branch",Convenience Store,Pizza Place,Gym,Gas Station,Coffee Shop,Sandwich Place,Pharmacy,Pub,Coworking Space,Distribution Center
2,4,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Pharmacy,Deli / Bodega,Sushi Restaurant,Diner,Fried Chicken Joint,Frozen Yogurt Shop,Bridal Shop,Chinese Restaurant
3,4,Bayview Village,Bank,Chinese Restaurant,Café,Japanese Restaurant,Skating Rink,Grocery Store,Intersection,Yoga Studio,Electronics Store,Drugstore
4,0,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Cosmetics Shop,Sushi Restaurant,Restaurant,Bank,Bakery,Bagel Shop,Thai Restaurant


In [73]:
#Dataframe with lat and long values
df_merge.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [74]:
#Joining the two dataframes
toronto_merged=df_merge
toronto_merged=toronto_merged.join(Neighbourhood_venues_sorted.set_index('Neighbourhood'),on='Neighbourhood')
toronto_merged

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,0.0,Fast Food Restaurant,Coffee Shop,Hobby Shop,Spa,Ethiopian Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store,Escape Room
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,4.0,Breakfast Spot,Burger Joint,Bar,Yoga Studio,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,4.0,Restaurant,Fast Food Restaurant,Park,Pizza Place,Breakfast Spot,Mexican Restaurant,Sports Bar,Electronics Store,Beer Store,Moving Target
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Park,Convenience Store,Business Service,Construction & Landscaping,Farm,Falafel Restaurant,Event Space,Farmers Market,Dog Run
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,Indian Restaurant,Coffee Shop,Thai Restaurant,Chinese Restaurant,Bank,Caribbean Restaurant,Athletics & Sports,Asian Restaurant,Gas Station,Bakery
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188,4.0,Diner,Pharmacy,Gift Shop,Fried Chicken Joint,Park,Grocery Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Dive Bar
99,M9P,Etobicoke,Westmount,43.696319,-79.532242,4.0,Golf Driving Range,Coffee Shop,Sandwich Place,Chinese Restaurant,Intersection,Golf Course,Supermarket,Ice Cream Shop,Discount Store,Flea Market
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724,4.0,Gas Station,American Restaurant,Bank,Chinese Restaurant,Intersection,Beer Store,Sandwich Place,Supermarket,Coffee Shop,Bus Line
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,4.0,Grocery Store,Pizza Place,Sandwich Place,Beer Store,Fried Chicken Joint,Liquor Store,Caribbean Restaurant,Fast Food Restaurant,Hardware Store,Pharmacy


In [75]:
toronto_merged.shape

(103, 16)

### Dealing with missing values in the dataframe by removing the whole row

In [76]:
missing_data = toronto_merged.isnull()
missing_data.head(5)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [77]:
for column in missing_data.columns.values.tolist():
    print(column)
    print (missing_data[column].value_counts())
    print("")   

Postal Code
False    103
Name: Postal Code, dtype: int64

Borough
False    103
Name: Borough, dtype: int64

Neighbourhood
False    103
Name: Neighbourhood, dtype: int64

Latitude
False    103
Name: Latitude, dtype: int64

Longitude
False    103
Name: Longitude, dtype: int64

Cluster Labels
False    102
True       1
Name: Cluster Labels, dtype: int64

1st most common venue
False    102
True       1
Name: 1st most common venue, dtype: int64

2nd most common venue
False    102
True       1
Name: 2nd most common venue, dtype: int64

3rd most common venue
False    102
True       1
Name: 3rd most common venue, dtype: int64

4th most common venue
False    102
True       1
Name: 4th most common venue, dtype: int64

5th most common venue
False    102
True       1
Name: 5th most common venue, dtype: int64

6th most common venue
False    102
True       1
Name: 6th most common venue, dtype: int64

7th most common venue
False    102
True       1
Name: 7th most common venue, dtype: int64

8th most c

In [78]:
# simply drop whole row with NaN in "Cluster Labels" column
toronto_merged.dropna(subset=["Cluster Labels"], axis=0, inplace=True)

In [79]:
toronto_merged.isnull().sum()

Postal Code               0
Borough                   0
Neighbourhood             0
Latitude                  0
Longitude                 0
Cluster Labels            0
1st most common venue     0
2nd most common venue     0
3rd most common venue     0
4th most common venue     0
5th most common venue     0
6th most common venue     0
7th most common venue     0
8th most common venue     0
9th most common venue     0
10th most common venue    0
dtype: int64

# Question 3 Clustering Neighborhood
### Finally, let's visualize the resulting clusters

In [80]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [81]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
kclusters=5
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

In [82]:
toronto_merged

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,0.0,Fast Food Restaurant,Coffee Shop,Hobby Shop,Spa,Ethiopian Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store,Escape Room
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,4.0,Breakfast Spot,Burger Joint,Bar,Yoga Studio,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,4.0,Restaurant,Fast Food Restaurant,Park,Pizza Place,Breakfast Spot,Mexican Restaurant,Sports Bar,Electronics Store,Beer Store,Moving Target
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Park,Convenience Store,Business Service,Construction & Landscaping,Farm,Falafel Restaurant,Event Space,Farmers Market,Dog Run
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,Indian Restaurant,Coffee Shop,Thai Restaurant,Chinese Restaurant,Bank,Caribbean Restaurant,Athletics & Sports,Asian Restaurant,Gas Station,Bakery
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188,4.0,Diner,Pharmacy,Gift Shop,Fried Chicken Joint,Park,Grocery Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Dive Bar
99,M9P,Etobicoke,Westmount,43.696319,-79.532242,4.0,Golf Driving Range,Coffee Shop,Sandwich Place,Chinese Restaurant,Intersection,Golf Course,Supermarket,Ice Cream Shop,Discount Store,Flea Market
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724,4.0,Gas Station,American Restaurant,Bank,Chinese Restaurant,Intersection,Beer Store,Sandwich Place,Supermarket,Coffee Shop,Bus Line
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,4.0,Grocery Store,Pizza Place,Sandwich Place,Beer Store,Fried Chicken Joint,Liquor Store,Caribbean Restaurant,Fast Food Restaurant,Hardware Store,Pharmacy


### Cluster 1

In [84]:
toronto_merged[toronto_merged['Cluster Labels'] == 0]

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,0.0,Fast Food Restaurant,Coffee Shop,Hobby Shop,Spa,Ethiopian Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store,Escape Room
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,Indian Restaurant,Coffee Shop,Thai Restaurant,Chinese Restaurant,Bank,Caribbean Restaurant,Athletics & Sports,Asian Restaurant,Gas Station,Bakery
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029,0.0,Coffee Shop,Hobby Shop,Train Station,Department Store,Discount Store,Sandwich Place,Bus Line,Light Rail Station,Intersection,Convenience Store
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476,0.0,Restaurant,Wings Joint,Chinese Restaurant,Hardware Store,Yoga Studio,Dog Run,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store
10,M1P,Scarborough,"Dorset Park, Wexford Heights, Scarborough Town...",43.75741,-79.273304,0.0,Electronics Store,Indian Restaurant,Chinese Restaurant,Gym,Bakery,Restaurant,Coffee Shop,Vietnamese Restaurant,Pet Store,Fast Food Restaurant
18,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,0.0,Clothing Store,Coffee Shop,Restaurant,Juice Bar,Bank,Shopping Mall,Liquor Store,Bakery,Movie Theater,Salon / Barbershop
22,M2N,North York,"Willowdale, Willowdale East",43.77012,-79.408493,0.0,Japanese Restaurant,Café,Ramen Restaurant,Coffee Shop,Pizza Place,Grocery Store,Steakhouse,Shopping Mall,Seafood Restaurant,Restaurant
26,M3B,North York,Don Mills,43.745906,-79.352188,0.0,Japanese Restaurant,Gym,Beer Store,Coffee Shop,Sandwich Place,Caribbean Restaurant,Supermarket,Bike Shop,Chinese Restaurant,Restaurant
27,M3C,North York,Don Mills,43.7259,-79.340923,0.0,Japanese Restaurant,Gym,Beer Store,Coffee Shop,Sandwich Place,Caribbean Restaurant,Supermarket,Bike Shop,Chinese Restaurant,Restaurant
29,M3J,North York,"Northwood Park, York University",43.76798,-79.487262,0.0,Metro Station,Coffee Shop,Furniture / Home Store,Bank,Bar,Japanese Restaurant,Road,Caribbean Restaurant,Massage Studio,Fast Food Restaurant


### Cluster 2

In [87]:
toronto_merged[toronto_merged['Cluster Labels'] == 1]

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Park,Convenience Store,Business Service,Construction & Landscaping,Farm,Falafel Restaurant,Event Space,Farmers Market,Dog Run
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848,1.0,Park,Café,Diner,College Stadium,General Entertainment,Thai Restaurant,Skating Rink,Eastern European Restaurant,Dog Run,Donut Shop
21,M2M,North York,"Willowdale, Newtonbrook",43.789053,-79.408493,1.0,Park,Trail,Coffee Shop,Ethiopian Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store,Escape Room,Yoga Studio
23,M2P,North York,York Mills West,43.752758,-79.400049,1.0,Park,Convenience Store,Gym,Intersection,Pet Store,Bowling Alley,Curling Ice,Dog Run,Field,Fast Food Restaurant
25,M3A,North York,Parkwoods,43.753259,-79.329656,1.0,Park,Food & Drink Shop,Pet Store,Burger Joint,Ethiopian Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store,Escape Room
36,M4C,East York,Woodbine Heights,43.695344,-79.318389,1.0,Park,Skating Rink,Beer Store,Intersection,Athletics & Sports,Curling Ice,Dance Studio,Bus Line,Electronics Store,Escape Room
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,1.0,Business Service,Swim School,Park,Bus Line,Yoga Studio,Ethiopian Restaurant,Drugstore,Eastern European Restaurant,Electronics Store,Escape Room
48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,1.0,Park,Gym,Trail,Thai Restaurant,Grocery Store,Playground,Ethiopian Restaurant,Event Space,Falafel Restaurant,Escape Room
50,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,1.0,Park,Playground,Trail,Escape Room,Dog Run,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store,Yoga Studio
64,M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307,1.0,Gym / Fitness Center,Jewelry Store,Trail,Sushi Restaurant,Park,Falafel Restaurant,Farm,Event Space,Farmers Market,Distribution Center


### Cluster 3


In [88]:
toronto_merged[toronto_merged['Cluster Labels'] == 2]

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
20,M2L,North York,"York Mills, Silver Hills",43.75749,-79.374714,2.0,Martial Arts School,Yoga Studio,Event Space,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Falafel Restaurant


### Cluster 4

In [89]:
toronto_merged[toronto_merged['Cluster Labels'] == 3]

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
91,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509,3.0,Construction & Landscaping,Baseball Field,Playground,Drugstore,Eastern European Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant


### Cluster 5

In [90]:
toronto_merged[toronto_merged['Cluster Labels'] == 4]

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,4.0,Breakfast Spot,Burger Joint,Bar,Yoga Studio,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,4.0,Restaurant,Fast Food Restaurant,Park,Pizza Place,Breakfast Spot,Mexican Restaurant,Sports Bar,Electronics Store,Beer Store,Moving Target
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,4.0,Ice Cream Shop,Convenience Store,Pizza Place,Coffee Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Field,Dive Bar
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577,4.0,Intersection,Bakery,Bus Line,Metro Station,Diner,Bus Station,Ice Cream Shop,Convenience Store,Park,Soccer Field
11,M1R,Scarborough,"Wexford, Maryvale",43.750072,-79.295849,4.0,Pizza Place,Burger Joint,Asian Restaurant,Rental Car Location,Gas Station,Seafood Restaurant,Bakery,Coffee Shop,Fish Market,Grocery Store
12,M1S,Scarborough,Agincourt,43.7942,-79.262029,4.0,Lounge,Coffee Shop,Sandwich Place,Shanghai Restaurant,Latin American Restaurant,Newsagent,Motorcycle Shop,Clothing Store,Badminton Court,Skating Rink
13,M1T,Scarborough,"Clarks Corners, Tam O'Shanter, Sullivan",43.781638,-79.304302,4.0,Pizza Place,Intersection,Pharmacy,Fast Food Restaurant,Bus Stop,Shopping Mall,Seafood Restaurant,Fried Chicken Joint,Bank,Sandwich Place
14,M1V,Scarborough,"Milliken, Agincourt North, Steeles East, L'Amo...",43.815252,-79.284577,4.0,Pizza Place,Noodle House,Intersection,Chinese Restaurant,Gym,Pharmacy,Malay Restaurant,Shop & Service,Caribbean Restaurant,Japanese Restaurant
15,M1W,Scarborough,"Steeles West, L'Amoreaux West",43.799525,-79.318389,4.0,Fast Food Restaurant,Indian Restaurant,Bank,Sandwich Place,Breakfast Spot,Other Great Outdoors,Coffee Shop,Pizza Place,Chinese Restaurant,Grocery Store
17,M2H,North York,Hillcrest Village,43.803762,-79.363452,4.0,Restaurant,Sandwich Place,Pharmacy,Bakery,Chinese Restaurant,Escape Room,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store


| Created By     |           Project                | Date       |
| ---------------| ---------------------------------| -----------|
| Ashish Sapkota | Applied Data Capstone Assignment | 18-12-2020 |