# Segmenting and Clustering Neighborhoods in Toronto

### Import Data from the Wikipedia website

In [1]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Solving environment: ...working... done

# All requested packages already installed.





  current version: 4.4.10
  latest version: 4.8.3

Please update conda by running

    $ conda update -n base conda




Solving environment: ...working... done

# All requested packages already installed.

Folium installed
Libraries imported.




  current version: 4.4.10
  latest version: 4.8.3

Please update conda by running

    $ conda update -n base conda




In [2]:
#! conda update -n base conda

In [3]:
import pandas as pd
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M' # link to the website
tables = pd.read_html(url)[0]                                         # Reading the table using pandas

In [4]:
df = pd.DataFrame(tables)  # Saving the table into a pandas data frame

### Table Exploration

In [5]:
# this cell displays the table 
df.columns = df.iloc[0]
df = df.drop(index=0)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
1,M1A,Not assigned,
2,M2A,Not assigned,
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## Data Cleaning

### Get Rid of 'Not Assigned' Borough

In [6]:
df = df[df['Borough']!='Not assigned'].reset_index(drop=True)
df_clean = df
df_clean.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


#### Total number of Boroughs and Neighborhoods

In [7]:
print('There are', len(set(df_clean['Borough'])), 'Boroughs and', len(df_clean['Neighborhood']), 'Neighborhoods')

There are 10 Boroughs and 103 Neighborhoods


## Using geopy to get the longitude and latitude of Toronto

In [8]:
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent='toronto_explorer')
toronto_location = geolocator.geocode(address)
latitude = toronto_location.latitude
longitude = toronto_location.longitude
print('The Latitude of Toronto is', latitude, 'and the longitude of Toronto is', longitude)

The Latitude of Toronto is 43.6534817 and the longitude of Toronto is -79.3839347


## Using pgeocode to get location coordinates of the Neighborhoods

In [9]:
! pip install pgeocode



You should consider upgrading via the 'c:\users\gideo\anaconda3\python.exe -m pip install --upgrade pip' command.


In [10]:
import pgeocode
nomi = pgeocode.Nominatim('ca')
nomi.query_postal_code('M7R')

postal_code       M7R
country code      NaN
place_name        NaN
state_name        NaN
state_code        NaN
county_name       NaN
county_code       NaN
community_name    NaN
community_code    NaN
latitude          NaN
longitude         NaN
accuracy          NaN
Name: 0, dtype: object

In [11]:
df_clean['Latitude'] = df_clean['Postal Code'].apply(lambda x: nomi.query_postal_code(x).latitude)
df_clean['Longitude'] = df_clean['Postal Code'].apply(lambda x: nomi.query_postal_code(x).longitude)

In [12]:
df_coord = df_clean
df_coord.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7545,-79.33
1,M4A,North York,Victoria Village,43.7276,-79.3148
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7223,-79.4504
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889


#### Drop Neigborhood without Location (Latitude and Longitude) Information

In [13]:
df_coord[df_coord['Latitude'].isnull()]

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
76,M7R,Mississauga,Canada Post Gateway Processing Centre,,


In [84]:
df_coord = df_coord.dropna(axis=0, how='any')
df_coord = df_coord.reset_index(drop=True)
print(len(df_coord['Neighborhood']))
df_coord.isnull().sum()

102


0
Postal Code     0
Borough         0
Neighborhood    0
Latitude        0
Longitude       0
dtype: int64

# Exploring the City of Toronto and its Neighborhoods with Folium and Foursquare API

In [15]:
import requests
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 
CLIENT_SECRET:


#### Overview of Toronto and its Neighborhoods

In [16]:
latitude = toronto_location.latitude
longitude = toronto_location.longitude
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for latitude, longitude, borough, neighborhood in zip(df_coord['Latitude'], df_coord['Longitude'], df_coord['Borough'], df_coord['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        location = [latitude, longitude],
        radius = 5,
        popup = label, 
        color = '#3388ff',
        fill = True,
        fill_color = '#3389ff',
        fill_opacity = 0.2
        
    

    ).add_to(map_toronto)
    
map_toronto

## Function to explore all the neighborhoods in Toronto

#### First, Let's Explore Venues in Parkwoods Neighborhood

In [17]:
radius = 500
LIMIT = 100
neighborhood = df_coord['Neighborhood'][0]
latitude = df_coord['Latitude'][0]  
longitude = df_coord.Longitude[0]
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
        CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
print(neighborhood, 'is located at', latitude, 'latitude and', longitude,'longitude')
print(neighborhood, "Foursquare request url is:", url)

Parkwoods is located at 43.7545 latitude and -79.33 longitude
Parkwoods Foursquare request url is: https://api.foursquare.com/v2/venues/explore?client_id=P0KZIXISOSSPENEPUTXWKBAWKF4MCRM5DQGBYBDNKLCHYGHY&client_secret=AX5JNFAMEZBVJURNFXBYBHQOA5BGA1SQJQCIJ3IYYADA0AAC&ll=43.7545,-79.33&v=20180605&radius=500&limit=100


In [18]:
result = requests.get(url).json()

value_list = json_normalize(result)['response.groups'][0][0]['items']
venue_name = [lists['venue']['name'] for lists in value_list]
venue_categories = [lists['venue']['categories'][0]['name'] for lists in value_list]
venue_latitude = [lists['venue']['location']['lat'] for lists in value_list]
venue_longitude = [lists['venue']['location']['lng'] for lists in value_list]
venue_neighborhood = [neighborhood for name in venue_name ]

venues_df = pd.DataFrame([venue_neighborhood, venue_name, venue_categories, venue_latitude, venue_longitude]).T

venues_df.columns = ['Neighborhood', 'Venue_name', 'Venue_categories', 'Venue_latitude', 'Venue_longitude']
venues = pd.DataFrame(columns=venues_df.columns) # initialize the venues dataframe holder
pd.concat([venues,venues_df], axis=0).reset_index(drop=True)

Unnamed: 0,Neighborhood,Venue_name,Venue_categories,Venue_latitude,Venue_longitude
0,Parkwoods,Brookbanks Park,Park,43.752,-79.3321
1,Parkwoods,Variety Store,Food & Drink Shop,43.752,-79.3331


In [19]:
print('There are', len(venue_name), 'venues in Parkwoods')

There are 2 venues in Parkwoods


#### Let's Write a Function to Explore all Neighborhoods in Tronto

In [20]:
def get_Venues(neighborhood, latitude, longitude):
    cols = ['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude', 'Venue Name', 'Venue Latitude', 'Venue Longitude', 'Venue Category']
    venues_df = pd.DataFrame(columns=cols)
    print('The Neighborhoods are: \n')
    for neighborhood, latitude, longitude in zip(neighborhood, latitude, longitude):
        print(neighborhood)
        url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
                CLIENT_ID, 
                CLIENT_SECRET, 
                latitude, 
                longitude, 
                VERSION, 
                radius, 
                LIMIT)

        result = requests.get(url).json()
        result = result['response']['groups'][0]['items']

        venues = [[
                    neighborhood,
                    latitude,
                    longitude,
                    result['venue']['name'], 
                    result['venue']['location']['lat'],
                    result['venue']['location']['lng'], 
                    result['venue']['categories'][0]['name']] for result in result]
        venues_df = pd.concat([venues_df, pd.DataFrame(venues,columns=cols)], axis=0)
    return venues_df

In [21]:
toronto_venues = get_Venues(neighborhood=df_coord['Neighborhood'],
                                   latitude=df_coord['Latitude'],
                                   longitude=df_coord['Longitude']
                                  )

The Neighborhoods are: 

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Pa

#### Check the size of the generated dataframe

In [22]:
cols = ['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude', 'Venue Name', 
        'Venue Latitude', 'Venue Longitude', 'Venue Category']
toronto_venues.columns = cols
print('The shape of the returned dataframe is: ', toronto_venues.shape)
toronto_venues.head()

The shape of the returned dataframe is:  (2141, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.7545,-79.33,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.7545,-79.33,Variety Store,43.751974,-79.333114,Food & Drink Shop
0,Victoria Village,43.7276,-79.3148,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
1,Victoria Village,43.7276,-79.3148,Tim Hortons,43.725517,-79.313103,Coffee Shop
2,Victoria Village,43.7276,-79.3148,Portugril,43.725819,-79.312785,Portuguese Restaurant


#### Let's group venues by neighborhood and get the total for each neighborhood

In [23]:
len(toronto_venues['Neighborhood'].unique())

96

In [24]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood, Long Branch",9,9,9,9,9,9
"Bathurst Manor, Wilson Heights, Downsview North",6,6,6,6,6,6
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",23,23,23,23,23,23
Berczy Park,89,89,89,89,89,89
"Birch Cliff, Cliffside West",4,4,4,4,4,4
"Brockton, Parkdale Village, Exhibition Place",38,38,38,38,38,38
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",14,14,14,14,14,14
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",57,57,57,57,57,57


#### Let's get the total number of unique venues

In [25]:
print('There are {} unique venue categories in the city of Toronto'.format(len(toronto_venues['Venue Category'].unique())))

There are 254 unique venue categories in the city of Toronto


#### Get one-hot encoding for the different venue categories

In [26]:
toronto_venuesOH = pd.get_dummies(toronto_venues['Venue Category'])
print(toronto_venuesOH.shape)
toronto_venuesOH.head()

(2141, 254)


Unnamed: 0,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Add the Neigbhorhood Column to the Toronto Venue Categories one-hot dataframe

In [27]:
new_col = toronto_venues[['Neighborhood']]
new_col.columns = ['N_hoods'] # renaming this because there is a venue category called Neighborhood
toronto_oneHot = pd.concat([new_col, toronto_venuesOH], axis=1).reset_index(drop=True)
print(toronto_oneHot.shape)
toronto_oneHot.head()

(2141, 255)


Unnamed: 0,N_hoods,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Group venues by Neighborhood taking the mean of the frequencies of occurrence

In [28]:
toronto_OHGrouped = toronto_oneHot.groupby('N_hoods').mean().reset_index()

In [29]:
print(toronto_OHGrouped.shape)
toronto_OHGrouped.head()

(96, 255)


Unnamed: 0,N_hoods,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Functions for the Most Common Venue Categories in each Neighborhood

In [30]:
def top_VenueColunms(top_number):
    """
    Function to get a list of the 'top_number' Most Common Venue Categories columns
    """
    top_cols = []
    for num in range(top_number):
        number_to_string = str(num+1)
        if number_to_string[-1] in ['1','2','3'] and len(number_to_string)>1 and number_to_string[-2] =='1':
            top_cols.append(number_to_string+'th'+' Most Common Venue')
        elif number_to_string[-1]=='1':
            top_cols.append(number_to_string+'st'+' Most Common Venue')
        elif number_to_string[-1]=='2':
            top_cols.append(number_to_string+'nd'+' Most Common Venue')
        elif number_to_string[-1]=='3':
            top_cols.append(number_to_string+'rd'+' Most Common Venue')
        else:
            top_cols.append(number_to_string+'th'+' Most Common Venue')

    return top_cols        

In [31]:
def getMostCommonVenues(top_number, neighborhoods):
    """
    This function returns the top Most Common Venues
    """
    topVenues_df = pd.DataFrame(columns=top_VenueColunms(top_number))
    for i, hood in zip(range(len(neighborhoods)), neighborhoods):
        venues = toronto_OHGrouped.iloc[i,1:].sort_values(ascending=False).reset_index()['index'].head(top_number)
        venues = pd.DataFrame(venues).T
        venues.columns = top_VenueColunms(top_number)
        topVenues_df = pd.concat([topVenues_df,venues]).reset_index(drop=True)
    return pd.concat([neighborhoods,topVenues_df], axis=1).reset_index(drop=True)

In [90]:
most_commonVenues = getMostCommonVenues(10, toronto_OHGrouped['N_hoods'])
most_commonVenues.head()

Unnamed: 0,N_hoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Breakfast Spot,Badminton Court,Skating Rink,Latin American Restaurant,Yoga Studio,Fast Food Restaurant,Event Space,Falafel Restaurant,Farmers Market,Financial or Legal Service
1,"Alderwood, Long Branch",Convenience Store,Athletics & Sports,Sandwich Place,Coffee Shop,Pub,Gym,Dance Studio,Pizza Place,Pharmacy,Farmers Market
2,"Bathurst Manor, Wilson Heights, Downsview North",Mediterranean Restaurant,Middle Eastern Restaurant,Coffee Shop,Fried Chicken Joint,Deli / Bodega,Pizza Place,Financial or Legal Service,Fish & Chips Shop,Field,Fish Market
3,Bayview Village,Construction & Landscaping,Park,Trail,Flower Shop,Cuban Restaurant,Creperie,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service
4,"Bedford Park, Lawrence Manor East",Sushi Restaurant,Restaurant,Sandwich Place,Coffee Shop,Italian Restaurant,Comfort Food Restaurant,Pizza Place,Pub,Juice Bar,Thai Restaurant


### Cluster Neighborhood Using KMean Clustering

#### Create KMeans Model

In [91]:
from sklearn.cluster import KMeans
cluster_size = 5
 
kModel = KMeans(n_clusters=cluster_size, init='k-means++', random_state=0)

K_cluster = kModel.fit(toronto_OHGrouped.drop(columns=['N_hoods']))
labels = K_cluster.labels_
print(labels)

[0 0 0 3 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 3 0 0 0 3 0 0 0 3 0 0 0 0 3 3 3
 4 0 0 0 0 0 0 0 0 3 0 0 2 0 3 0 0 3 0 0 3 0 0 3 0 0 0 3 0 0 0 0 3 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 3 1 0 3 0]


### Add the Labels to the data frame of the Most common venues

In [92]:
label_df = pd.DataFrame(labels)
label_df.columns = ['Label']
most_commonVenues = pd.concat([most_commonVenues,label_df], axis=1)
most_commonVenues.head()

Unnamed: 0,N_hoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Label
0,Agincourt,Breakfast Spot,Badminton Court,Skating Rink,Latin American Restaurant,Yoga Studio,Fast Food Restaurant,Event Space,Falafel Restaurant,Farmers Market,Financial or Legal Service,0
1,"Alderwood, Long Branch",Convenience Store,Athletics & Sports,Sandwich Place,Coffee Shop,Pub,Gym,Dance Studio,Pizza Place,Pharmacy,Farmers Market,0
2,"Bathurst Manor, Wilson Heights, Downsview North",Mediterranean Restaurant,Middle Eastern Restaurant,Coffee Shop,Fried Chicken Joint,Deli / Bodega,Pizza Place,Financial or Legal Service,Fish & Chips Shop,Field,Fish Market,0
3,Bayview Village,Construction & Landscaping,Park,Trail,Flower Shop,Cuban Restaurant,Creperie,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,3
4,"Bedford Park, Lawrence Manor East",Sushi Restaurant,Restaurant,Sandwich Place,Coffee Shop,Italian Restaurant,Comfort Food Restaurant,Pizza Place,Pub,Juice Bar,Thai Restaurant,0


#### Add the latitude and longitude of the neighborhoods to the most common venues

In [93]:
# Get the latitudes and longitudes for the neighborhoods
coord = []
for hood in most_commonVenues['N_hoods']:
    Latitude = df_coord[df_coord['Neighborhood'] == hood]['Latitude'].reset_index(drop=True)[0]    # Extracts Latitude
    Longitude = df_coord[df_coord['Neighborhood'] == hood]['Longitude'].reset_index(drop=True)[0]  # Etracts Longitude
    coord.append([Latitude, Longitude])                                                            # Append to coord list
latlng = pd.DataFrame(coord, columns=['Latitude', 'Longitude'])                                    # Creat a datafram for latitude and longitude

toronto_mostCommonVenues = pd.concat([latlng,most_commonVenues], axis=1)                           # Concate with the most common venues
# Rearrange the columns
columns =[toronto_mostCommonVenues.columns[2]] + list(toronto_mostCommonVenues.columns.drop('N_hoods'))                                          # Right column order
toronto_mostCommonVenues = toronto_mostCommonVenues[columns]                                       # New Datafram from rearrange columns
toronto_mostCommonVenues

Unnamed: 0,N_hoods,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Label
0,Agincourt,43.7946,-79.2644,Breakfast Spot,Badminton Court,Skating Rink,Latin American Restaurant,Yoga Studio,Fast Food Restaurant,Event Space,Falafel Restaurant,Farmers Market,Financial or Legal Service,0
1,"Alderwood, Long Branch",43.6021,-79.5402,Convenience Store,Athletics & Sports,Sandwich Place,Coffee Shop,Pub,Gym,Dance Studio,Pizza Place,Pharmacy,Farmers Market,0
2,"Bathurst Manor, Wilson Heights, Downsview North",43.7535,-79.4472,Mediterranean Restaurant,Middle Eastern Restaurant,Coffee Shop,Fried Chicken Joint,Deli / Bodega,Pizza Place,Financial or Legal Service,Fish & Chips Shop,Field,Fish Market,0
3,Bayview Village,43.7797,-79.3813,Construction & Landscaping,Park,Trail,Flower Shop,Cuban Restaurant,Creperie,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,3
4,"Bedford Park, Lawrence Manor East",43.7335,-79.4177,Sushi Restaurant,Restaurant,Sandwich Place,Coffee Shop,Italian Restaurant,Comfort Food Restaurant,Pizza Place,Pub,Juice Bar,Thai Restaurant,0
5,Berczy Park,43.6456,-79.3754,Coffee Shop,Café,Hotel,Beer Bar,Restaurant,Pub,Bakery,Japanese Restaurant,Cocktail Bar,Seafood Restaurant,0
6,"Birch Cliff, Cliffside West",43.6952,-79.2646,General Entertainment,Café,Skating Rink,College Stadium,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Yoga Studio,0
7,"Brockton, Parkdale Village, Exhibition Place",43.6383,-79.4301,Café,Coffee Shop,Breakfast Spot,Gift Shop,Thrift / Vintage Store,Accessories Store,Pet Store,Brewery,Sandwich Place,Restaurant,0
8,"Business reply mail Processing Centre, South C...",43.7804,-79.2505,Coffee Shop,Restaurant,Italian Restaurant,Yoga Studio,Breakfast Spot,Bookstore,Sushi Restaurant,Bank,Japanese Restaurant,Martial Arts Dojo,0
9,"CN Tower, King and Spadina, Railway Lands, Har...",43.6404,-79.3995,Coffee Shop,Café,Italian Restaurant,Bar,Speakeasy,Gym / Fitness Center,Park,Bank,Bakery,French Restaurant,0


#### Plot a Folium map of the new cluster

In [94]:
import matplotlib.cm as cm
import matplotlib.colors as colors
from IPython.display import display
rainbow = ['#FF0000', '#0000FF','#FF00FF','#9400D3','#FF00FF']
latitude = toronto_location.latitude
longitude = toronto_location.longitude
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for latitude, longitude, neighborhood, lab in zip(toronto_mostCommonVenues['Latitude'],toronto_mostCommonVenues['Longitude'],toronto_mostCommonVenues['N_hoods'],toronto_mostCommonVenues['Label']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        location = [latitude, longitude],
        radius = 5,
        popup = label, 
        color = rainbow[lab-1],
        fill = True,
        fill_color = rainbow[lab-1],
        fill_opacity = 0.7
    ).add_to(map_toronto)
    
display(map_toronto)

#### First Cluster

In [95]:
toronto_mostCommonVenues[toronto_mostCommonVenues['Label'] == 0][[toronto_mostCommonVenues.columns[0]] + list(toronto_mostCommonVenues.columns[3:])]

Unnamed: 0,N_hoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Label
0,Agincourt,Breakfast Spot,Badminton Court,Skating Rink,Latin American Restaurant,Yoga Studio,Fast Food Restaurant,Event Space,Falafel Restaurant,Farmers Market,Financial or Legal Service,0
1,"Alderwood, Long Branch",Convenience Store,Athletics & Sports,Sandwich Place,Coffee Shop,Pub,Gym,Dance Studio,Pizza Place,Pharmacy,Farmers Market,0
2,"Bathurst Manor, Wilson Heights, Downsview North",Mediterranean Restaurant,Middle Eastern Restaurant,Coffee Shop,Fried Chicken Joint,Deli / Bodega,Pizza Place,Financial or Legal Service,Fish & Chips Shop,Field,Fish Market,0
4,"Bedford Park, Lawrence Manor East",Sushi Restaurant,Restaurant,Sandwich Place,Coffee Shop,Italian Restaurant,Comfort Food Restaurant,Pizza Place,Pub,Juice Bar,Thai Restaurant,0
5,Berczy Park,Coffee Shop,Café,Hotel,Beer Bar,Restaurant,Pub,Bakery,Japanese Restaurant,Cocktail Bar,Seafood Restaurant,0
6,"Birch Cliff, Cliffside West",General Entertainment,Café,Skating Rink,College Stadium,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Yoga Studio,0
7,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Gift Shop,Thrift / Vintage Store,Accessories Store,Pet Store,Brewery,Sandwich Place,Restaurant,0
8,"Business reply mail Processing Centre, South C...",Coffee Shop,Restaurant,Italian Restaurant,Yoga Studio,Breakfast Spot,Bookstore,Sushi Restaurant,Bank,Japanese Restaurant,Martial Arts Dojo,0
9,"CN Tower, King and Spadina, Railway Lands, Har...",Coffee Shop,Café,Italian Restaurant,Bar,Speakeasy,Gym / Fitness Center,Park,Bank,Bakery,French Restaurant,0
11,Cedarbrae,Gaming Cafe,Lounge,Trail,Fast Food Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Yoga Studio,0


#### Second Cluster

In [96]:
toronto_mostCommonVenues[toronto_mostCommonVenues['Label'] == 1][[toronto_mostCommonVenues.columns[0]] + list(toronto_mostCommonVenues.columns[3:])]

Unnamed: 0,N_hoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Label
92,Woburn,Korean Restaurant,Yoga Studio,Food Court,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,Field,Fast Food Restaurant,1


#### Third Cluster

In [97]:
toronto_mostCommonVenues[toronto_mostCommonVenues['Label'] == 2][[toronto_mostCommonVenues.columns[0]] + list(toronto_mostCommonVenues.columns[3:])]

Unnamed: 0,N_hoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Label
49,"Milliken, Agincourt North, Steeles East, L'Amo...",Pharmacy,Yoga Studio,Doner Restaurant,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,Field,Fast Food Restaurant,Farmers Market,2


#### Fourth Cluster

In [98]:
toronto_mostCommonVenues[toronto_mostCommonVenues['Label'] == 3][[toronto_mostCommonVenues.columns[0]] + list(toronto_mostCommonVenues.columns[3:])]

Unnamed: 0,N_hoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Label
3,Bayview Village,Construction & Landscaping,Park,Trail,Flower Shop,Cuban Restaurant,Creperie,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,3
10,Caledonia-Fairbanks,Park,Women's Store,Bakery,Beer Store,Sporting Goods Shop,Mexican Restaurant,Gym,Yoga Studio,Farmers Market,Ethiopian Restaurant,3
21,Don Mills,Park,Construction & Landscaping,Gym,Pool,River,Field,Fast Food Restaurant,Financial or Legal Service,Farmers Market,Dog Run,3
25,"East Toronto, Broadview North (Old East York)",Convenience Store,Park,Coffee Shop,Farmers Market,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Yoga Studio,3
29,"Forest Hill North & West, Forest Hill Road Park",Business Service,Park,Trail,Yoga Studio,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,3
34,"Harbourfront East, Union Station, Toronto Islands",Harbor / Marina,Music Venue,Park,Café,Farmers Market,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Yoga Studio,3
35,"High Park, The Junction South",Residential Building (Apartment / Condo),Park,Yoga Studio,Farmers Market,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Fast Food Restaurant,3
36,Hillcrest Village,Residential Building (Apartment / Condo),Park,Yoga Studio,Farmers Market,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Fast Food Restaurant,3
46,Lawrence Park,Photography Studio,Park,Lawyer,Yoga Studio,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Fast Food Restaurant,3
51,"Moore Park, Summerhill East",Thai Restaurant,Grocery Store,Playground,Park,Gym,Doner Restaurant,Donut Shop,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,3


#### Fifth Cluster

In [99]:
toronto_mostCommonVenues[toronto_mostCommonVenues['Label'] == 0][[toronto_mostCommonVenues.columns[0]] + list(toronto_mostCommonVenues.columns[3:])]

Unnamed: 0,N_hoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Label
0,Agincourt,Breakfast Spot,Badminton Court,Skating Rink,Latin American Restaurant,Yoga Studio,Fast Food Restaurant,Event Space,Falafel Restaurant,Farmers Market,Financial or Legal Service,0
1,"Alderwood, Long Branch",Convenience Store,Athletics & Sports,Sandwich Place,Coffee Shop,Pub,Gym,Dance Studio,Pizza Place,Pharmacy,Farmers Market,0
2,"Bathurst Manor, Wilson Heights, Downsview North",Mediterranean Restaurant,Middle Eastern Restaurant,Coffee Shop,Fried Chicken Joint,Deli / Bodega,Pizza Place,Financial or Legal Service,Fish & Chips Shop,Field,Fish Market,0
4,"Bedford Park, Lawrence Manor East",Sushi Restaurant,Restaurant,Sandwich Place,Coffee Shop,Italian Restaurant,Comfort Food Restaurant,Pizza Place,Pub,Juice Bar,Thai Restaurant,0
5,Berczy Park,Coffee Shop,Café,Hotel,Beer Bar,Restaurant,Pub,Bakery,Japanese Restaurant,Cocktail Bar,Seafood Restaurant,0
6,"Birch Cliff, Cliffside West",General Entertainment,Café,Skating Rink,College Stadium,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Yoga Studio,0
7,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Gift Shop,Thrift / Vintage Store,Accessories Store,Pet Store,Brewery,Sandwich Place,Restaurant,0
8,"Business reply mail Processing Centre, South C...",Coffee Shop,Restaurant,Italian Restaurant,Yoga Studio,Breakfast Spot,Bookstore,Sushi Restaurant,Bank,Japanese Restaurant,Martial Arts Dojo,0
9,"CN Tower, King and Spadina, Railway Lands, Har...",Coffee Shop,Café,Italian Restaurant,Bar,Speakeasy,Gym / Fitness Center,Park,Bank,Bakery,French Restaurant,0
11,Cedarbrae,Gaming Cafe,Lounge,Trail,Fast Food Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Yoga Studio,0


#### It is obvious that classifying the neighhborhood into three clusters would be better

###  Re-Clusetering the neighborhoods into three clusters

In [100]:
# Clustering Model
cluster_size = 3   # Number of clusters
 
kModel = KMeans(n_clusters=cluster_size, init='k-means++', random_state=0) # KMeans model

K_cluster = kModel.fit(toronto_OHGrouped.drop(columns=['N_hoods'])) # Fitting the model to dataset
labels = K_cluster.labels_   # Model labels
print(labels)

## Add Labels to dataframe
label_df = pd.DataFrame(labels)
label_df.columns = ['Label']
most_commonVenues = getMostCommonVenues(10, toronto_OHGrouped['N_hoods']) # Re-initializing most_commonVenues
most_commonVenues = pd.concat([most_commonVenues,label_df], axis=1)       # Adding labels to dataframe
most_commonVenues.head()


# Get the latitudes and longitudes for the neighborhoods
coord = []
for hood in most_commonVenues['N_hoods']:
    Latitude = df_coord[df_coord['Neighborhood'] == hood]['Latitude'].reset_index(drop=True)[0]    # Extracts Latitude
    Longitude = df_coord[df_coord['Neighborhood'] == hood]['Longitude'].reset_index(drop=True)[0]  # Etracts Longitude
    coord.append([Latitude, Longitude])                                                            # Append to coord list
latlng = pd.DataFrame(coord, columns=['Latitude', 'Longitude'])                                    # Creat a datafram for latitude and longitude

toronto_mostCommonVenues = pd.concat([latlng,most_commonVenues], axis=1)                           # Concate with the most common venues
# Rearrange the columns
columns =[toronto_mostCommonVenues.columns[2]] + list(toronto_mostCommonVenues.columns.drop('N_hoods'))                                          # Right column order
toronto_mostCommonVenues = toronto_mostCommonVenues[columns]                                       # New Datafram from rearrange columns
toronto_mostCommonVenues

# Folium Map
rainbow = ['#FF0000', '#0000FF','#FF00FF']
latitude = toronto_location.latitude
longitude = toronto_location.longitude
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for latitude, longitude, neighborhood, lab in zip(toronto_mostCommonVenues['Latitude'],toronto_mostCommonVenues['Longitude'],toronto_mostCommonVenues['N_hoods'],toronto_mostCommonVenues['Label']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        location = [latitude, longitude],
        radius = 5,
        popup = label, 
        color = rainbow[lab-1],
        fill = True,
        fill_color = rainbow[lab-1],
        fill_opacity = 0.7
    ).add_to(map_toronto)
    
display(map_toronto)

[2 2 2 0 2 2 2 2 2 2 0 2 2 2 2 2 2 2 2 2 2 0 1 2 2 0 2 2 2 0 2 2 2 2 0 0 0
 2 2 2 2 2 2 2 2 2 0 2 2 2 2 0 2 1 0 2 2 0 2 2 0 2 2 2 0 2 2 2 2 0 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 0 2 2 0 2]


#### First Cluster

In [101]:
toronto_mostCommonVenues[toronto_mostCommonVenues['Label'] == 0][[toronto_mostCommonVenues.columns[0]] + list(toronto_mostCommonVenues.columns[3:])]

Unnamed: 0,N_hoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Label
3,Bayview Village,Construction & Landscaping,Park,Trail,Flower Shop,Cuban Restaurant,Creperie,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,0
10,Caledonia-Fairbanks,Park,Women's Store,Bakery,Beer Store,Sporting Goods Shop,Mexican Restaurant,Gym,Yoga Studio,Farmers Market,Ethiopian Restaurant,0
21,Don Mills,Park,Construction & Landscaping,Gym,Pool,River,Field,Fast Food Restaurant,Financial or Legal Service,Farmers Market,Dog Run,0
25,"East Toronto, Broadview North (Old East York)",Convenience Store,Park,Coffee Shop,Farmers Market,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Yoga Studio,0
29,"Forest Hill North & West, Forest Hill Road Park",Business Service,Park,Trail,Yoga Studio,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,0
34,"Harbourfront East, Union Station, Toronto Islands",Harbor / Marina,Music Venue,Park,Café,Farmers Market,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Yoga Studio,0
35,"High Park, The Junction South",Residential Building (Apartment / Condo),Park,Yoga Studio,Farmers Market,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Fast Food Restaurant,0
36,Hillcrest Village,Residential Building (Apartment / Condo),Park,Yoga Studio,Farmers Market,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Fast Food Restaurant,0
46,Lawrence Park,Photography Studio,Park,Lawyer,Yoga Studio,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Fast Food Restaurant,0
51,"Moore Park, Summerhill East",Thai Restaurant,Grocery Store,Playground,Park,Gym,Doner Restaurant,Donut Shop,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,0


#### Second Cluster

In [102]:
toronto_mostCommonVenues[toronto_mostCommonVenues['Label'] == 1][[toronto_mostCommonVenues.columns[0]] + list(toronto_mostCommonVenues.columns[3:])]

Unnamed: 0,N_hoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Label
22,"Dorset Park, Wexford Heights, Scarborough Town...",Bakery,Asian Restaurant,Food & Drink Shop,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,Donut Shop,Field,1
53,"North Park, Maple Leaf Park, Upwood Park",Bakery,Food Court,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,Field,Fast Food Restaurant,Farmers Market,1


#### Third Cluster

In [103]:
toronto_mostCommonVenues[toronto_mostCommonVenues['Label'] == 2][[toronto_mostCommonVenues.columns[0]] + list(toronto_mostCommonVenues.columns[3:])]

Unnamed: 0,N_hoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Label
0,Agincourt,Breakfast Spot,Badminton Court,Skating Rink,Latin American Restaurant,Yoga Studio,Fast Food Restaurant,Event Space,Falafel Restaurant,Farmers Market,Financial or Legal Service,2
1,"Alderwood, Long Branch",Convenience Store,Athletics & Sports,Sandwich Place,Coffee Shop,Pub,Gym,Dance Studio,Pizza Place,Pharmacy,Farmers Market,2
2,"Bathurst Manor, Wilson Heights, Downsview North",Mediterranean Restaurant,Middle Eastern Restaurant,Coffee Shop,Fried Chicken Joint,Deli / Bodega,Pizza Place,Financial or Legal Service,Fish & Chips Shop,Field,Fish Market,2
4,"Bedford Park, Lawrence Manor East",Sushi Restaurant,Restaurant,Sandwich Place,Coffee Shop,Italian Restaurant,Comfort Food Restaurant,Pizza Place,Pub,Juice Bar,Thai Restaurant,2
5,Berczy Park,Coffee Shop,Café,Hotel,Beer Bar,Restaurant,Pub,Bakery,Japanese Restaurant,Cocktail Bar,Seafood Restaurant,2
6,"Birch Cliff, Cliffside West",General Entertainment,Café,Skating Rink,College Stadium,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Yoga Studio,2
7,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Gift Shop,Thrift / Vintage Store,Accessories Store,Pet Store,Brewery,Sandwich Place,Restaurant,2
8,"Business reply mail Processing Centre, South C...",Coffee Shop,Restaurant,Italian Restaurant,Yoga Studio,Breakfast Spot,Bookstore,Sushi Restaurant,Bank,Japanese Restaurant,Martial Arts Dojo,2
9,"CN Tower, King and Spadina, Railway Lands, Har...",Coffee Shop,Café,Italian Restaurant,Bar,Speakeasy,Gym / Fitness Center,Park,Bank,Bakery,French Restaurant,2
11,Cedarbrae,Gaming Cafe,Lounge,Trail,Fast Food Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Yoga Studio,2
