## Segmenting and Clustering Neighborhoods in Toronto
### I have written the code for all the three parts in this single file.

In [1]:
# Code to display all the results for all the code in a cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Part 1
### Scraping the data from the wikipedia page and creating a dataframe with details "PostalCode, Borough, Neighborhood" in Canada

In [2]:
# Importing required libraries for scraping

import requests
import pandas as pd
import numpy as np

In [3]:
# Scarping the table from the wikipedia page

page_link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page_response = requests.get(page_link)
df = pd.read_html(page_link,header = 0, na_values = ['Not Assigned'])[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [4]:
# Replacing "Not Assigned" with "NaN"

df = df.replace('Not assigned', np.nan)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [5]:
# Dropping the postcodes with NaN under Borough

df.dropna(subset = ['Borough'],inplace = True)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [6]:
# Only one Nan value under Neighborhood column

no_neighbor = df[df['Neighbourhood'].isna()].shape[0]
print('Number of rows on which Neighborhood column is empty: {}'.format(no_neighbor))

Number of rows on which Neighborhood column is empty: 1


In [7]:
df[df['Neighbourhood'].isna()]

Unnamed: 0,Postcode,Borough,Neighbourhood
8,M7A,Queen's Park,


In [8]:
# Replacing the NaN value under Neighborhood with value in Borough

df['Neighbourhood'].fillna(df['Borough'], inplace=True)
print('Number of rows on which Neighborhood column is empty: {}'.format(df[df['Neighbourhood'].isna()].shape[0]))

Number of rows on which Neighborhood column is empty: 0


In [9]:
df[df['Borough'] =='Queen\'s Park']

Unnamed: 0,Postcode,Borough,Neighbourhood
8,M7A,Queen's Park,Queen's Park


In [10]:
# Combining the different neighborhood in the same postcode into single row 

df_toronto = df.groupby(['Postcode','Borough']).Neighbourhood.agg([('Neighbourhood', ', '.join)])
df_toronto.reset_index(inplace=True)

In [11]:
df_toronto.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [12]:
df_toronto.rename(columns={'Postcode':'PostalCode'},inplace = True)
df_toronto.rename(columns={'Neighbourhood':'Neighborhood'},inplace = True)
df_toronto.shape

(103, 3)

# Part 2

## Using the csv file to find the coordinates for the postalcode in Canada

In [13]:
url="http://cocl.us/Geospatial_data"
df_coordinates = pd.read_csv(url)
df_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [14]:
# Changing the column names so as to make the merging the dataframes possible

df_coordinates.rename(columns={'Postal Code':'PostalCode'},inplace = True)

In [15]:
# Merging the dataset containing postalcode, borough with their respective coordinates

df_toronto_coordinates = pd.merge(df_toronto, df_coordinates, on='PostalCode')
df_toronto_coordinates.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Part 3

### Exploring and clustering the neighborhoods in Toronto

In [16]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests  
from pandas.io.json import json_normalize 

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [17]:
address = 'Toronto'

geolocator = Nominatim(user_agent="toronto")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [18]:
# create map of Toronto using latitude and longitude values

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto_coordinates['Latitude'], df_toronto_coordinates['Longitude'], df_toronto['Borough'], df_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

<folium.vector_layers.CircleMarker at 0x12426c663c8>

<folium.vector_layers.CircleMarker at 0x12426c66550>

<folium.vector_layers.CircleMarker at 0x12426c666a0>

<folium.vector_layers.CircleMarker at 0x12426c667f0>

<folium.vector_layers.CircleMarker at 0x12426c66940>

<folium.vector_layers.CircleMarker at 0x12426c66a90>

<folium.vector_layers.CircleMarker at 0x12426c66be0>

<folium.vector_layers.CircleMarker at 0x12426c66d30>

<folium.vector_layers.CircleMarker at 0x12426c66e80>

<folium.vector_layers.CircleMarker at 0x12426c66fd0>

<folium.vector_layers.CircleMarker at 0x12426c7d128>

<folium.vector_layers.CircleMarker at 0x12426c7d2b0>

<folium.vector_layers.CircleMarker at 0x12426c7d400>

<folium.vector_layers.CircleMarker at 0x12426c7d550>

<folium.vector_layers.CircleMarker at 0x12426c7d6a0>

<folium.vector_layers.CircleMarker at 0x12426c7d7f0>

<folium.vector_layers.CircleMarker at 0x12426c7d940>

<folium.vector_layers.CircleMarker at 0x12426c7da90>

<folium.vector_layers.CircleMarker at 0x12426c7dbe0>

<folium.vector_layers.CircleMarker at 0x12426c7dd30>

<folium.vector_layers.CircleMarker at 0x12426c7de80>

<folium.vector_layers.CircleMarker at 0x12426c7dfd0>

<folium.vector_layers.CircleMarker at 0x12426c90128>

<folium.vector_layers.CircleMarker at 0x12426c902b0>

<folium.vector_layers.CircleMarker at 0x12426c90400>

<folium.vector_layers.CircleMarker at 0x12426c90550>

<folium.vector_layers.CircleMarker at 0x12426c906a0>

<folium.vector_layers.CircleMarker at 0x12426c907f0>

<folium.vector_layers.CircleMarker at 0x12426c90940>

<folium.vector_layers.CircleMarker at 0x12426c90a90>

<folium.vector_layers.CircleMarker at 0x12426c90be0>

<folium.vector_layers.CircleMarker at 0x12426c90d30>

<folium.vector_layers.CircleMarker at 0x12426c90e80>

<folium.vector_layers.CircleMarker at 0x12426c90fd0>

<folium.vector_layers.CircleMarker at 0x12426c9e128>

<folium.vector_layers.CircleMarker at 0x12426c9e2b0>

<folium.vector_layers.CircleMarker at 0x12426c9e400>

<folium.vector_layers.CircleMarker at 0x12426c9e550>

<folium.vector_layers.CircleMarker at 0x12426c9e6a0>

<folium.vector_layers.CircleMarker at 0x12426c9e7f0>

<folium.vector_layers.CircleMarker at 0x12426c9e940>

<folium.vector_layers.CircleMarker at 0x12426c9ea58>

<folium.vector_layers.CircleMarker at 0x12426c9eb70>

<folium.vector_layers.CircleMarker at 0x12426c9ecc0>

<folium.vector_layers.CircleMarker at 0x12426c9ee10>

<folium.vector_layers.CircleMarker at 0x12426c9ef60>

<folium.vector_layers.CircleMarker at 0x12426c9efd0>

<folium.vector_layers.CircleMarker at 0x12426cb6208>

<folium.vector_layers.CircleMarker at 0x12426cb6390>

<folium.vector_layers.CircleMarker at 0x12426cb64e0>

<folium.vector_layers.CircleMarker at 0x12426cb6630>

<folium.vector_layers.CircleMarker at 0x12426cb6780>

<folium.vector_layers.CircleMarker at 0x12426cb68d0>

<folium.vector_layers.CircleMarker at 0x12426cb6a20>

<folium.vector_layers.CircleMarker at 0x12426cb6b70>

<folium.vector_layers.CircleMarker at 0x12426cb6cc0>

<folium.vector_layers.CircleMarker at 0x12426cb6e10>

<folium.vector_layers.CircleMarker at 0x12426cb6f60>

<folium.vector_layers.CircleMarker at 0x12426cb6fd0>

<folium.vector_layers.CircleMarker at 0x12426cc6208>

<folium.vector_layers.CircleMarker at 0x12426cc6390>

<folium.vector_layers.CircleMarker at 0x12426cc64e0>

<folium.vector_layers.CircleMarker at 0x12426cc6630>

<folium.vector_layers.CircleMarker at 0x12426cc6780>

<folium.vector_layers.CircleMarker at 0x12426cc68d0>

<folium.vector_layers.CircleMarker at 0x12426cc6a20>

<folium.vector_layers.CircleMarker at 0x12426cc6b70>

<folium.vector_layers.CircleMarker at 0x12426cc6cc0>

<folium.vector_layers.CircleMarker at 0x12426cc6e10>

<folium.vector_layers.CircleMarker at 0x12426cc6f60>

<folium.vector_layers.CircleMarker at 0x12426cc6e80>

<folium.vector_layers.CircleMarker at 0x12426cbe208>

<folium.vector_layers.CircleMarker at 0x12426cbe390>

<folium.vector_layers.CircleMarker at 0x12426cbe4e0>

<folium.vector_layers.CircleMarker at 0x12426cbe630>

<folium.vector_layers.CircleMarker at 0x12426cbe780>

<folium.vector_layers.CircleMarker at 0x12426cbe8d0>

<folium.vector_layers.CircleMarker at 0x12426cbea20>

<folium.vector_layers.CircleMarker at 0x12426cbeb70>

<folium.vector_layers.CircleMarker at 0x12426cbecc0>

<folium.vector_layers.CircleMarker at 0x12426cbee10>

<folium.vector_layers.CircleMarker at 0x12426cbef60>

<folium.vector_layers.CircleMarker at 0x12426cbef28>

<folium.vector_layers.CircleMarker at 0x12426ce5208>

<folium.vector_layers.CircleMarker at 0x12426ce5390>

<folium.vector_layers.CircleMarker at 0x12426ce54e0>

<folium.vector_layers.CircleMarker at 0x12426ce5630>

<folium.vector_layers.CircleMarker at 0x12426ce5780>

<folium.vector_layers.CircleMarker at 0x12426ce58d0>

<folium.vector_layers.CircleMarker at 0x12426ce5a20>

<folium.vector_layers.CircleMarker at 0x12426ce5b70>

<folium.vector_layers.CircleMarker at 0x12426ce5cc0>

<folium.vector_layers.CircleMarker at 0x12426ce5e10>

<folium.vector_layers.CircleMarker at 0x12426ce5f60>

<folium.vector_layers.CircleMarker at 0x12426ce5fd0>

<folium.vector_layers.CircleMarker at 0x12426cf4208>

<folium.vector_layers.CircleMarker at 0x12426cf4390>

<folium.vector_layers.CircleMarker at 0x12426cf44e0>

<folium.vector_layers.CircleMarker at 0x12426cf4630>

<folium.vector_layers.CircleMarker at 0x12426cf4780>

<folium.vector_layers.CircleMarker at 0x12426cf48d0>

<folium.vector_layers.CircleMarker at 0x12426cf4a20>

<folium.vector_layers.CircleMarker at 0x12426cf4b70>

# Filtering the borough containing the word "Toronto"

In [19]:
df_toronto_only = df_toronto_coordinates[df_toronto_coordinates['Borough'].str.contains('Toronto')]
df_toronto_only.reset_index(inplace=True)
df_toronto_only.drop('index', axis=1, inplace=True)
df_toronto_only.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [20]:
#Check the number of neighborhoods

print(df_toronto_only.groupby('Borough').count()['Neighborhood'])

Borough
Central Toronto      9
Downtown Toronto    18
East Toronto         5
West Toronto         6
Name: Neighborhood, dtype: int64


In [21]:
#Create list with the Boroughs

boroughs = df_toronto_only['Borough'].unique().tolist()
boroughs

['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']

In [22]:
borough_color = {}
for borough in boroughs:
    borough_color[borough]= '#%02X%02X%02X' % tuple(np.random.choice(range(256), size=3)) #Random color

In [50]:
map_toronto_only = folium.Map(location=[latitude, longitude], zoom_start=12);

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto_only['Latitude'], 
                                           df_toronto_only['Longitude'],
                                           df_toronto_only['Borough'], 
                                           df_toronto_only['Neighborhood']):
    label_text = borough + ' - ' + neighborhood
    label = folium.Popup(label_text);
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=borough_color[borough],
        fill_color=borough_color[borough],
        fill_opacity=0.7).add_to(map_toronto_only);
    
map_toronto_only

<folium.vector_layers.CircleMarker at 0x12426f1f908>

<folium.vector_layers.CircleMarker at 0x12426f1fa58>

<folium.vector_layers.CircleMarker at 0x12426f1fdd8>

<folium.vector_layers.CircleMarker at 0x12426f1fe80>

<folium.vector_layers.CircleMarker at 0x12426f1fb70>

<folium.vector_layers.CircleMarker at 0x12426f293c8>

<folium.vector_layers.CircleMarker at 0x12426f29550>

<folium.vector_layers.CircleMarker at 0x12426f29400>

<folium.vector_layers.CircleMarker at 0x12426f29048>

<folium.vector_layers.CircleMarker at 0x12426f29588>

<folium.vector_layers.CircleMarker at 0x12426f29828>

<folium.vector_layers.CircleMarker at 0x12426f29a90>

<folium.vector_layers.CircleMarker at 0x12426f29940>

<folium.vector_layers.CircleMarker at 0x12426f29da0>

<folium.vector_layers.CircleMarker at 0x12426f29ef0>

<folium.vector_layers.CircleMarker at 0x12426edfb70>

<folium.vector_layers.CircleMarker at 0x12426edf9e8>

<folium.vector_layers.CircleMarker at 0x12426edf5f8>

<folium.vector_layers.CircleMarker at 0x12426edff98>

<folium.vector_layers.CircleMarker at 0x12426edf400>

<folium.vector_layers.CircleMarker at 0x12426edfcf8>

<folium.vector_layers.CircleMarker at 0x12426edf390>

<folium.vector_layers.CircleMarker at 0x12426edf128>

<folium.vector_layers.CircleMarker at 0x12426edf898>

<folium.vector_layers.CircleMarker at 0x12426edf8d0>

<folium.vector_layers.CircleMarker at 0x12426edfb00>

<folium.vector_layers.CircleMarker at 0x12426f55b38>

<folium.vector_layers.CircleMarker at 0x12426f55e10>

<folium.vector_layers.CircleMarker at 0x12426f55358>

<folium.vector_layers.CircleMarker at 0x12426f553c8>

<folium.vector_layers.CircleMarker at 0x12426f554e0>

<folium.vector_layers.CircleMarker at 0x12426f558d0>

<folium.vector_layers.CircleMarker at 0x12426f556d8>

<folium.vector_layers.CircleMarker at 0x12426f557f0>

<folium.vector_layers.CircleMarker at 0x12426f551d0>

<folium.vector_layers.CircleMarker at 0x12426efeb00>

<folium.vector_layers.CircleMarker at 0x12426efe3c8>

<folium.vector_layers.CircleMarker at 0x12426efe630>

# Getting Venue Data using FourSquare

In [51]:
# @hidden_cell
CLIENT_ID = 'GRNJBBGR40SI3E1EFEZDSWN5W3FWYZKLOIBK0RMJQ4YYSB3T' # your Foursquare ID
CLIENT_SECRET = 'B0LAURPRNALT4VF0BYS41GAPF21RJLFMNK3NQCJOW4XOWPC1' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: GRNJBBGR40SI3E1EFEZDSWN5W3FWYZKLOIBK0RMJQ4YYSB3T
CLIENT_SECRET:B0LAURPRNALT4VF0BYS41GAPF21RJLFMNK3NQCJOW4XOWPC1


In [25]:
radius = 500
LIMIT = 100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
# Venues around Toronto area

toronto_venues = getNearbyVenues(names=df_toronto_only['Neighborhood'],
                                   latitudes=df_toronto_only['Latitude'],
                                   longitudes=df_toronto_only['Longitude']
                                )

The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The 

In [27]:
toronto_venues.head(10)

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,The Beaches,43.676357,-79.293031,Dip 'n Sip,43.678897,-79.297745,Coffee Shop
5,"The Danforth West, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant
6,"The Danforth West, Riverdale",43.679557,-79.352188,Dolce Gelato,43.677773,-79.351187,Ice Cream Shop
7,"The Danforth West, Riverdale",43.679557,-79.352188,MenEssentials,43.67782,-79.351265,Cosmetics Shop
8,"The Danforth West, Riverdale",43.679557,-79.352188,Cafe Fiorentina,43.677743,-79.350115,Italian Restaurant
9,"The Danforth West, Riverdale",43.679557,-79.352188,La Diperie,43.67753,-79.352295,Ice Cream Shop


In [28]:
toronto_venues.shape

(1699, 7)

In [29]:
# Number of venues per neighborhood

toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,57,57,57,57,57,57
"Brockton, Exhibition Place, Parkdale Village",21,21,21,21,21,21
Business Reply Mail Processing Centre 969 Eastern,19,19,19,19,19,19
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",14,14,14,14,14,14
"Cabbagetown, St. James Town",45,45,45,45,45,45
Central Bay Street,84,84,84,84,84,84
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Christie,15,15,15,15,15,15
Church and Wellesley,84,84,84,84,84,84


In [30]:
# Number of unique venue category around toronto

print("Number of unique venue category is " + str(len(toronto_venues['Venue Category'].unique())))
t_t=toronto_venues.groupby(['Venue Category']).count()
t_t.sort_values(['Venue'],ascending = False)['Venue']

# There are 34 pizza place around Toronto region.

Number of unique venue category is 239


Venue Category
Coffee Shop                        143
Café                                87
Restaurant                          53
Italian Restaurant                  47
Bakery                              43
Hotel                               39
Bar                                 37
Park                                35
Pizza Place                         34
Gym                                 25
Japanese Restaurant                 25
American Restaurant                 23
Gastropub                           23
Sandwich Place                      22
Seafood Restaurant                  22
Steakhouse                          21
Breakfast Spot                      21
Thai Restaurant                     21
Ice Cream Shop                      20
Pub                                 20
Sushi Restaurant                    19
Burger Joint                        19
Vegetarian / Vegan Restaurant       19
Clothing Store                      18
Diner                               18
Beer Bar  

# Preparing to analyze the Pizza place around Toronto

Starting with analysing the neighborhood

In [31]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

toronto_onehot.head(5)

Unnamed: 0,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio,Neighbourhood
0,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,The Beaches
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,The Beaches
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,The Beaches
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,The Beaches
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,The Beaches


In [32]:
toronto_onehot.shape

(1699, 240)

Grouping rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [33]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,...,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.011905,0.0,0.011905
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.06,0.0,0.0,0.04,0.01,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,...,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.0,0.011905


Since, we are analysing the pizza place. Creating a dataframe with pizza places in the Toronto neighbourhood

In [34]:
print("There are "+ str(len(toronto_grouped[toronto_grouped["Pizza Place"] > 0])) +" neighborhood with pizza place around toronto")

There are 21 neighborhood with pizza place around toronto


In [35]:
toronto_pizza_only = toronto_grouped[["Neighbourhood","Pizza Place"]]
toronto_pizza_only.head(5)
toronto_pizza_only.shape

Unnamed: 0,Neighbourhood,Pizza Place
0,"Adelaide, King, Richmond",0.02
1,Berczy Park,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.052632
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0


(38, 2)

# Clustering Neighborhood

Run k-means to cluster the neighborhoods with "Pizza place" in Toronto into 3 clusters.

In [36]:
# set number of clusters
kclusters = 3

toronto_grouped_clustering = toronto_pizza_only.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:30] 

array([0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 2, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1])

In [37]:
# add clustering labels

toronto_pizza_only.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_pizza_only.head(10)
toronto_pizza_only.shape

Unnamed: 0,Cluster Labels,Neighbourhood,Pizza Place
0,0,"Adelaide, King, Richmond",0.02
1,0,Berczy Park,0.0
2,0,"Brockton, Exhibition Place, Parkdale Village",0.0
3,1,Business Reply Mail Processing Centre 969 Eastern,0.052632
4,0,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0
5,1,"Cabbagetown, St. James Town",0.044444
6,0,Central Bay Street,0.011905
7,0,"Chinatown, Grange Park, Kensington Market",0.01
8,0,Christie,0.0
9,0,Church and Wellesley,0.011905


(38, 3)

In [38]:
toronto_venues_pizza = toronto_pizza_only.join(toronto_venues.set_index('Neighbourhood'), on='Neighbourhood')

toronto_venues_pizza.head()

Unnamed: 0,Cluster Labels,Neighbourhood,Pizza Place,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,0,"Adelaide, King, Richmond",0.02,43.650571,-79.384568,Four Seasons Centre for the Performing Arts,43.650592,-79.385806,Concert Hall
0,0,"Adelaide, King, Richmond",0.02,43.650571,-79.384568,The Keg Steakhouse & Bar,43.649937,-79.384196,Steakhouse
0,0,"Adelaide, King, Richmond",0.02,43.650571,-79.384568,Nathan Phillips Square,43.65227,-79.383516,Plaza
0,0,"Adelaide, King, Richmond",0.02,43.650571,-79.384568,Rosalinda,43.650252,-79.385156,Vegetarian / Vegan Restaurant
0,0,"Adelaide, King, Richmond",0.02,43.650571,-79.384568,Shangri-La Toronto,43.649129,-79.386557,Hotel


In [39]:
toronto_venues_pizza.shape

(1699, 9)

In [40]:
toronto_venues_pizza.sort_values(["Cluster Labels"], inplace=True)
toronto_venues_pizza

Unnamed: 0,Cluster Labels,Neighbourhood,Pizza Place,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,0,"Adelaide, King, Richmond",0.020000,43.650571,-79.384568,Four Seasons Centre for the Performing Arts,43.650592,-79.385806,Concert Hall
20,0,"Harbourfront, Regent Park",0.000000,43.654260,-79.360636,Alumnae Theatre,43.652756,-79.364753,Theater
20,0,"Harbourfront, Regent Park",0.000000,43.654260,-79.360636,Dark Horse Espresso Bar,43.653081,-79.357078,Coffee Shop
20,0,"Harbourfront, Regent Park",0.000000,43.654260,-79.360636,The Sweet Escape Patisserie,43.650632,-79.358709,Bakery
20,0,"Harbourfront, Regent Park",0.000000,43.654260,-79.360636,ODIN Cafe + Bar,43.656739,-79.356503,Café
20,0,"Harbourfront, Regent Park",0.000000,43.654260,-79.360636,Parliament Square Park,43.650264,-79.362195,Park
20,0,"Harbourfront, Regent Park",0.000000,43.654260,-79.360636,Cocina Economica,43.654959,-79.365657,Mexican Restaurant
20,0,"Harbourfront, Regent Park",0.000000,43.654260,-79.360636,Cluny Bistro & Boulangerie,43.650565,-79.357843,French Restaurant
20,0,"Harbourfront, Regent Park",0.000000,43.654260,-79.360636,El Catrin,43.650601,-79.358920,Mexican Restaurant
20,0,"Harbourfront, Regent Park",0.000000,43.654260,-79.360636,Caffe Furbo,43.649970,-79.358849,Café


In [41]:
toronto_venues_pizza_clusters = toronto_venues_pizza[toronto_venues_pizza['Venue Category'] == 'Pizza Place']
toronto_venues_pizza_clusters.shape
toronto_venues_pizza_clusters

(34, 9)

Unnamed: 0,Cluster Labels,Neighbourhood,Pizza Place,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
30,0,"Ryerson, Garden District",0.02,43.657162,-79.378937,Blaze Pizza,43.656518,-79.380015,Pizza Place
14,0,"Design Exchange, Toronto Dominion Centre",0.02,43.647177,-79.381576,Pi Co.,43.648651,-79.385874,Pizza Place
16,0,"First Canadian Place, Underground city",0.02,43.648429,-79.38228,Pizzeria Libretto,43.648334,-79.385111,Pizza Place
16,0,"First Canadian Place, Underground city",0.02,43.648429,-79.38228,Pi Co.,43.648651,-79.385874,Pizza Place
32,0,Stn A PO Boxes 25 The Esplanade,0.010417,43.646435,-79.374846,Pizzaiolo,43.650206,-79.376184,Pizza Place
37,0,"The Danforth West, Riverdale",0.02381,43.679557,-79.352188,Pizzeria Libretto,43.678489,-79.347576,Pizza Place
30,0,"Ryerson, Garden District",0.02,43.657162,-79.378937,Panago,43.658258,-79.384313,Pizza Place
31,0,St. James Town,0.01,43.651494,-79.375418,Pizzaiolo,43.650206,-79.376184,Pizza Place
14,0,"Design Exchange, Toronto Dominion Centre",0.02,43.647177,-79.381576,Pizzeria Libretto,43.648334,-79.385111,Pizza Place
6,0,Central Bay Street,0.011905,43.657952,-79.387383,Boston Pizza,43.659338,-79.38226,Pizza Place


Cluster Visualization

In [42]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_venues_pizza_clusters['Venue Latitude'], toronto_venues_pizza_clusters['Venue Longitude'], toronto_venues_pizza_clusters['Neighbourhood'], toronto_venues_pizza_clusters['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<folium.vector_layers.CircleMarker at 0x12426f78048>

<folium.vector_layers.CircleMarker at 0x12426e88c18>

<folium.vector_layers.CircleMarker at 0x12426e884e0>

<folium.vector_layers.CircleMarker at 0x12426e88ac8>

<folium.vector_layers.CircleMarker at 0x12426e88d30>

<folium.vector_layers.CircleMarker at 0x12426e88dd8>

<folium.vector_layers.CircleMarker at 0x12426e88b70>

<folium.vector_layers.CircleMarker at 0x12426e88828>

<folium.vector_layers.CircleMarker at 0x12426e882b0>

<folium.vector_layers.CircleMarker at 0x12426d6fe48>

<folium.vector_layers.CircleMarker at 0x12426d6f6d8>

<folium.vector_layers.CircleMarker at 0x12426d6fb00>

<folium.vector_layers.CircleMarker at 0x12426d6f710>

<folium.vector_layers.CircleMarker at 0x12426d6f6a0>

<folium.vector_layers.CircleMarker at 0x12426d6f400>

<folium.vector_layers.CircleMarker at 0x12426f46eb8>

<folium.vector_layers.CircleMarker at 0x12426f465c0>

<folium.vector_layers.CircleMarker at 0x12426f46240>

<folium.vector_layers.CircleMarker at 0x12426f46748>

<folium.vector_layers.CircleMarker at 0x12426f46438>

<folium.vector_layers.CircleMarker at 0x12426f469b0>

<folium.vector_layers.CircleMarker at 0x12426f46c50>

<folium.vector_layers.CircleMarker at 0x12426f46c88>

<folium.vector_layers.CircleMarker at 0x12426f464a8>

<folium.vector_layers.CircleMarker at 0x12426f46be0>

<folium.vector_layers.CircleMarker at 0x12426f31160>

<folium.vector_layers.CircleMarker at 0x12426f31a20>

<folium.vector_layers.CircleMarker at 0x12426f31550>

<folium.vector_layers.CircleMarker at 0x12426f31588>

<folium.vector_layers.CircleMarker at 0x12426f31fd0>

<folium.vector_layers.CircleMarker at 0x12426f31320>

<folium.vector_layers.CircleMarker at 0x12426f31e10>

<folium.vector_layers.CircleMarker at 0x12426f31c50>

<folium.vector_layers.CircleMarker at 0x12426f31c18>

In [43]:
# Merging tables to view the borough details of the clusters as well
df_toronto_only.rename(columns={'Neighborhood':'Neighbourhood'},inplace = True)
toronto_venues_pizza_borough = toronto_venues_pizza_clusters.join(df_toronto_only.set_index('Neighbourhood'), on='Neighbourhood')
toronto_venues_pizza_borough.shape
toronto_venues_pizza_borough.head(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


(34, 13)

Unnamed: 0,Cluster Labels,Neighbourhood,Pizza Place,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,PostalCode,Borough,Latitude,Longitude
30,0,"Ryerson, Garden District",0.02,43.657162,-79.378937,Blaze Pizza,43.656518,-79.380015,Pizza Place,M5B,Downtown Toronto,43.657162,-79.378937
14,0,"Design Exchange, Toronto Dominion Centre",0.02,43.647177,-79.381576,Pi Co.,43.648651,-79.385874,Pizza Place,M5K,Downtown Toronto,43.647177,-79.381576
16,0,"First Canadian Place, Underground city",0.02,43.648429,-79.38228,Pizzeria Libretto,43.648334,-79.385111,Pizza Place,M5X,Downtown Toronto,43.648429,-79.38228
16,0,"First Canadian Place, Underground city",0.02,43.648429,-79.38228,Pi Co.,43.648651,-79.385874,Pizza Place,M5X,Downtown Toronto,43.648429,-79.38228
32,0,Stn A PO Boxes 25 The Esplanade,0.010417,43.646435,-79.374846,Pizzaiolo,43.650206,-79.376184,Pizza Place,M5W,Downtown Toronto,43.646435,-79.374846
37,0,"The Danforth West, Riverdale",0.02381,43.679557,-79.352188,Pizzeria Libretto,43.678489,-79.347576,Pizza Place,M4K,East Toronto,43.679557,-79.352188
30,0,"Ryerson, Garden District",0.02,43.657162,-79.378937,Panago,43.658258,-79.384313,Pizza Place,M5B,Downtown Toronto,43.657162,-79.378937
31,0,St. James Town,0.01,43.651494,-79.375418,Pizzaiolo,43.650206,-79.376184,Pizza Place,M5C,Downtown Toronto,43.651494,-79.375418
14,0,"Design Exchange, Toronto Dominion Centre",0.02,43.647177,-79.381576,Pizzeria Libretto,43.648334,-79.385111,Pizza Place,M5K,Downtown Toronto,43.647177,-79.381576
6,0,Central Bay Street,0.011905,43.657952,-79.387383,Boston Pizza,43.659338,-79.38226,Pizza Place,M5G,Downtown Toronto,43.657952,-79.387383


In [44]:
# For convience sake, removing the columns "PostalCode" (as not needed), "Latitude" (repeated), "Longitude" (repeated)
# and "venue category" as we know they are pizza place

toronto_final = toronto_venues_pizza_borough.drop(['PostalCode', 'Latitude','Longitude','Venue Category'], axis = 1)
toronto_final.head(10)

Unnamed: 0,Cluster Labels,Neighbourhood,Pizza Place,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Borough
30,0,"Ryerson, Garden District",0.02,43.657162,-79.378937,Blaze Pizza,43.656518,-79.380015,Downtown Toronto
14,0,"Design Exchange, Toronto Dominion Centre",0.02,43.647177,-79.381576,Pi Co.,43.648651,-79.385874,Downtown Toronto
16,0,"First Canadian Place, Underground city",0.02,43.648429,-79.38228,Pizzeria Libretto,43.648334,-79.385111,Downtown Toronto
16,0,"First Canadian Place, Underground city",0.02,43.648429,-79.38228,Pi Co.,43.648651,-79.385874,Downtown Toronto
32,0,Stn A PO Boxes 25 The Esplanade,0.010417,43.646435,-79.374846,Pizzaiolo,43.650206,-79.376184,Downtown Toronto
37,0,"The Danforth West, Riverdale",0.02381,43.679557,-79.352188,Pizzeria Libretto,43.678489,-79.347576,East Toronto
30,0,"Ryerson, Garden District",0.02,43.657162,-79.378937,Panago,43.658258,-79.384313,Downtown Toronto
31,0,St. James Town,0.01,43.651494,-79.375418,Pizzaiolo,43.650206,-79.376184,Downtown Toronto
14,0,"Design Exchange, Toronto Dominion Centre",0.02,43.647177,-79.381576,Pizzeria Libretto,43.648334,-79.385111,Downtown Toronto
6,0,Central Bay Street,0.011905,43.657952,-79.387383,Boston Pizza,43.659338,-79.38226,Downtown Toronto


In [45]:
#Cluster 0
toronto_final.loc[toronto_final['Cluster Labels'] == 0].shape
toronto_final.loc[toronto_final['Cluster Labels'] == 0]

(15, 9)

Unnamed: 0,Cluster Labels,Neighbourhood,Pizza Place,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Borough
30,0,"Ryerson, Garden District",0.02,43.657162,-79.378937,Blaze Pizza,43.656518,-79.380015,Downtown Toronto
14,0,"Design Exchange, Toronto Dominion Centre",0.02,43.647177,-79.381576,Pi Co.,43.648651,-79.385874,Downtown Toronto
16,0,"First Canadian Place, Underground city",0.02,43.648429,-79.38228,Pizzeria Libretto,43.648334,-79.385111,Downtown Toronto
16,0,"First Canadian Place, Underground city",0.02,43.648429,-79.38228,Pi Co.,43.648651,-79.385874,Downtown Toronto
32,0,Stn A PO Boxes 25 The Esplanade,0.010417,43.646435,-79.374846,Pizzaiolo,43.650206,-79.376184,Downtown Toronto
37,0,"The Danforth West, Riverdale",0.02381,43.679557,-79.352188,Pizzeria Libretto,43.678489,-79.347576,East Toronto
30,0,"Ryerson, Garden District",0.02,43.657162,-79.378937,Panago,43.658258,-79.384313,Downtown Toronto
31,0,St. James Town,0.01,43.651494,-79.375418,Pizzaiolo,43.650206,-79.376184,Downtown Toronto
14,0,"Design Exchange, Toronto Dominion Centre",0.02,43.647177,-79.381576,Pizzeria Libretto,43.648334,-79.385111,Downtown Toronto
6,0,Central Bay Street,0.011905,43.657952,-79.387383,Boston Pizza,43.659338,-79.38226,Downtown Toronto


In [46]:
#Cluster 1
toronto_final.loc[toronto_final['Cluster Labels'] == 1].shape
toronto_final.loc[toronto_final['Cluster Labels'] == 1]

(12, 9)

Unnamed: 0,Cluster Labels,Neighbourhood,Pizza Place,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Borough
3,1,Business Reply Mail Processing Centre 969 Eastern,0.052632,43.662744,-79.321558,Queen Margherita Pizza,43.664685,-79.324164,East Toronto
13,1,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",0.066667,43.686412,-79.400049,Pizzaiolo,43.687991,-79.394634,Central Toronto
23,1,"Little Portugal, Trinity",0.030769,43.647927,-79.41975,Pizzeria Libretto,43.648979,-79.420604,West Toronto
15,1,"Dovercourt Village, Dufferin",0.0625,43.669005,-79.442259,North Of Brooklyn Pizzeria,43.669467,-79.439335,West Toronto
23,1,"Little Portugal, Trinity",0.030769,43.647927,-79.41975,Superpoint,43.648439,-79.420514,West Toronto
19,1,"Harbourfront East, Toronto Islands, Union Station",0.03,43.640816,-79.381752,Pie Bar,43.638174,-79.380546,Downtown Toronto
19,1,"Harbourfront East, Toronto Islands, Union Station",0.03,43.640816,-79.381752,Panago,43.642518,-79.383591,Downtown Toronto
5,1,"Cabbagetown, St. James Town",0.044444,43.667967,-79.367675,Pizza Pizza,43.667179,-79.369669,Downtown Toronto
5,1,"Cabbagetown, St. James Town",0.044444,43.667967,-79.367675,Pizza Pizza,43.66809,-79.370274,Downtown Toronto
19,1,"Harbourfront East, Toronto Islands, Union Station",0.03,43.640816,-79.381752,Pizza Pizza,43.639693,-79.3818,Downtown Toronto


In [47]:
#Cluster 2
toronto_final.loc[toronto_final['Cluster Labels'] == 2].shape
toronto_final.loc[toronto_final['Cluster Labels'] == 2]

(7, 9)

Unnamed: 0,Cluster Labels,Neighbourhood,Pizza Place,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Borough
36,2,"The Beaches West, India Bazaar",0.111111,43.668999,-79.315572,Pizza Nova,43.666462,-79.316485,East Toronto
36,2,"The Beaches West, India Bazaar",0.111111,43.668999,-79.315572,Pizzaiolo,43.668953,-79.311683,East Toronto
11,2,Davisville,0.085714,43.704324,-79.38879,Pizza Pizza,43.706138,-79.389292,Central Toronto
11,2,Davisville,0.085714,43.704324,-79.38879,Provocative Pizza Series,43.708293,-79.389546,Central Toronto
11,2,Davisville,0.085714,43.704324,-79.38879,Viva Napoli,43.705752,-79.389125,Central Toronto
34,2,"The Annex, North Midtown, Yorkville",0.083333,43.67271,-79.405678,Martino's Pizza,43.67556,-79.403558,Central Toronto
34,2,"The Annex, North Midtown, Yorkville",0.083333,43.67271,-79.405678,Magic Oven,43.674895,-79.406994,Central Toronto


# Observation obtained

## Most of the pizza place are located in Downtown toronto. There are very less pizza place in the East and West toronto regions. So, it would be best place to invest in opening pizza places in these regions 