# Gathering the Toronto Neighbourhoods

In [1]:
import numpy as np
import pandas as pd

df_raw = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
df = df_raw[0]
df.rename(columns={'Postcode': 'Postalcode'}, inplace=True)

1. Get rid of all rows for which **Borough** is 'Not Assigned'

In [2]:
df = df.drop(df[df['Borough'] == 'Not assigned'].index).reset_index(drop=True) # clear out Boroughs that are 'Not Assigned'

2. Set the **Neighbourhood** to be the same as **Borough** if the **Neighbourhood** is 'Not Assigned'

In [3]:
df.loc[df['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = df['Borough'] # set the Neighbourhood = Borough if the Neighbourhood is 'Not Assigned'

3. Merge all the rows for **Neighbourhood** into a single record joined by a comma, grouped by the **Postcode** and **Borough**

In [4]:
hoods = pd.DataFrame(df.groupby(['Postalcode','Borough'])['Neighbourhood'].apply(','.join).reset_index(), columns=['Postalcode','Borough','Neighbourhood'])

In [5]:
hoods.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [6]:
hoods.shape

(103, 3)

# Adding the Latitude + Longitude to Toronto Neighbourhoods

1. Grabbing the geospatial data from CSV, rename the fields so they align

Had to re-do this as the publicly available CSV was returning 503, so found a different source.

In [7]:
import requests
import zipfile

url = 'http://download.geonames.org/export/zip/CA.zip'
zipname = 'CA.zip'
r = requests.get(url)

with open(zipname, 'wb') as f:
    f.write(r.content)
    
with zipfile.ZipFile(zipname, 'r') as f:
    f.extractall()
    
lat_lon = pd.read_csv('CA.txt', sep='\t', header=None, names=['Countrycode','Postalcode','Placename','Adminname1','Admincode1','Adminname2','Admincode2','Adminname3','Admincode3','Latitude','Longitude','Accuracy'])
lat_lon.head()


Unnamed: 0,Countrycode,Postalcode,Placename,Adminname1,Admincode1,Adminname2,Admincode2,Adminname3,Admincode3,Latitude,Longitude,Accuracy
0,CA,T0A,Eastern Alberta (St. Paul),Alberta,AB,,,,,54.766,-111.7174,6.0
1,CA,T0B,Wainwright Region (Tofield),Alberta,AB,,,,,53.0727,-111.5816,6.0
2,CA,T0C,Central Alberta (Stettler),Alberta,AB,,,,,52.1431,-111.6941,5.0
3,CA,T0E,Western Alberta (Jasper),Alberta,AB,,,,,53.6758,-115.0948,5.0
4,CA,T0G,North Central Alberta (Slave Lake),Alberta,AB,,,,,55.6993,-114.4529,6.0


2. Merging the geospatial data into the original list, joined by Postalcode

In [8]:
hoods_lat_lon = hoods.merge(lat_lon[['Postalcode', 'Latitude', 'Longitude']], on=['Postalcode'], how='inner')

In [9]:
hoods_lat_lon.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.8113,-79.193
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.7878,-79.1564
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.7678,-79.1866
3,M1G,Scarborough,Woburn,43.7712,-79.2144
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389


In [10]:
hoods_lat_lon.shape

(102, 5)

Note the resulting data frame has one less record.  This appears to be related to a postal code for a mail processing center, which for the purposes of this exercise we can ignore.

In [11]:
hoods[hoods.merge(hoods_lat_lon, on=['Postalcode'], how='left')['Latitude'].isnull()]

Unnamed: 0,Postalcode,Borough,Neighbourhood
86,M7R,Mississauga,Canada Post Gateway Processing Centre


# Creating some maps for Toronto

1. Grab all the libraries we're going to need

In [12]:
!conda install -c conda-forge geopy --yes
!conda install -c conda-forge folium=0.5.0 --yes

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.



2. Identify the overall lat/lon for Toronto for the purposes of mapping

In [13]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


3. Build an initial map of Toronto

In [14]:
import folium # map rendering library

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(hoods_lat_lon['Latitude'], hoods_lat_lon['Longitude'], hoods_lat_lon['Borough'], hoods_lat_lon['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# Exploring Toronto Neighbourhoods

Set our secrets...

In [15]:
CLIENT_ID = 'secret' # your Foursquare ID
CLIENT_SECRET = 'secret' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

Grab all the data from Foursquare...

In [16]:
import requests # library to handle requests

LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

def getNearbyVenues(codes, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for code, lat, lng in zip(codes, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            code, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postalcode', 
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [17]:
venues = getNearbyVenues(codes=hoods_lat_lon['Postalcode'],
                                   latitudes=hoods_lat_lon['Latitude'],
                                   longitudes=hoods_lat_lon['Longitude']
                                  )

In [18]:
venues.head()

Unnamed: 0,Postalcode,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1C,43.7878,-79.1564,Fox and Fiddle,43.789082,-79.154459,Bar
1,M1E,43.7678,-79.1866,Chick-N-Joy,43.768752,-79.187982,Fried Chicken Joint
2,M1E,43.7678,-79.1866,Little Caesars Pizza,43.769046,-79.184386,Pizza Place
3,M1E,43.7678,-79.1866,LCBO,43.771462,-79.184384,Liquor Store
4,M1E,43.7678,-79.1866,Bulk Barn,43.771342,-79.184341,Food & Drink Shop


In [19]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 263 uniques categories.


In [20]:
print('There are {} unique restaurant categories'.format(len(venues[venues['Venue Category'].str.contains('Restaurant')]['Venue Category'].unique())))
venues[venues['Venue Category'].str.contains('Restaurant')]['Venue Category'].value_counts().head()

There are 48 unique restaurant categories


Restaurant              73
Italian Restaurant      45
Japanese Restaurant     44
Fast Food Restaurant    39
American Restaurant     35
Name: Venue Category, dtype: int64

In [21]:
print('There are {} postal codes with venues.'.format(venues['Postalcode'].nunique()))
print('There are {} postal codes in the lat / lon list'.format(hoods_lat_lon['Postalcode'].nunique()))

There are 99 postal codes with venues.
There are 102 postal codes in the lat / lon list


Do the one hot encoding on venue categories to see what kind of venues we have assocaited with postal codes.

In [22]:
# one hot encoding
onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
onehot = venues[['Postalcode']].join(onehot)

In [23]:
grouped = onehot.groupby('Postalcode').mean().reset_index()

Now let's work out the **most common** venues in a particular postal code.  We'll start by building a function to help us out here.

In [24]:
def return_most_common_venues(row, num_top_venues):

    row_categories = row.iloc[1:]
    row_categories = row_categories[row_categories > 0]

    row_categories_sorted = row_categories.sort_values(ascending=False).index.values[0:num_top_venues]
    row_categories_sorted = np.append(row_categories_sorted, [np.nan] * (num_top_venues - len(row_categories_sorted)))
        
    return row_categories_sorted

In [25]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postalcode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
hood_venues_sorted = pd.DataFrame(columns=columns)
hood_venues_sorted['Postalcode'] = grouped['Postalcode']

for ind in np.arange(grouped.shape[0]):
    hood_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

hood_venues_sorted.head()

Unnamed: 0,Postalcode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1C,Bar,,,,,,,,,
1,M1E,Pizza Place,Grocery Store,Coffee Shop,Fast Food Restaurant,Greek Restaurant,Beer Store,Breakfast Spot,Burger Joint,Bus Line,Convenience Store
2,M1G,Korean Restaurant,,,,,,,,,
3,M1H,Trail,Lounge,Gaming Cafe,,,,,,,
4,M1J,Spa,Grocery Store,,,,,,,,


Now let's do some clustering!  Start with 4 clusters to see what we get.

In [26]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 4

grouped_clustering = grouped.drop('Postalcode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

Merge the cluster labels back into the venues data set...

In [27]:
# add clustering labels
hood_venues_sorted['Cluster Labels'] = kmeans.labels_

merged = hoods_lat_lon

merged = merged.join(hood_venues_sorted.set_index('Postalcode'), on='Postalcode', how='inner')

merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.7878,-79.1564,Bar,,,,,,,,,,1
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.7678,-79.1866,Pizza Place,Grocery Store,Coffee Shop,Fast Food Restaurant,Greek Restaurant,Beer Store,Breakfast Spot,Burger Joint,Bus Line,Convenience Store,1
3,M1G,Scarborough,Woburn,43.7712,-79.2144,Korean Restaurant,,,,,,,,,,1
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389,Trail,Lounge,Gaming Cafe,,,,,,,,1
5,M1J,Scarborough,Scarborough Village,43.7464,-79.2323,Spa,Grocery Store,,,,,,,,,1


In [28]:
hood_venues_sorted['Cluster Labels'].value_counts()

1    89
0     7
2     2
3     1
Name: Cluster Labels, dtype: int64

Make a pretty map to see what it looks like...

In [29]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

print(rainbow)

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Postalcode'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

['#8000ff', '#2adddd', '#d4dd80', '#ff0000']


Well that's boring.  Toronto is pretty homogenous, but it may just be that we have too many different features to play with.

Let's try something else.  Let's look for restaurant type density, total restaurant density and total restaurant count, and see if there's anything interesting there.

In [44]:
columns = ['Postalcode']

rest_columns = grouped.columns[grouped.columns.str.contains('Restaurant')]
columns.extend(rest_columns)
grouped = grouped[columns]
grouped['Density'] = grouped.sum(axis=1)
grouped['Count'] = grouped[grouped[rest_columns]>0.0].count(axis=1)




In [45]:
grouped.head()

Unnamed: 0,Postalcode,Afghan Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Belgian Restaurant,Brazilian Restaurant,Caribbean Restaurant,Chinese Restaurant,Colombian Restaurant,...,Southern / Soul Food Restaurant,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Theme Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Density,Count
0,M1C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,M1E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,3
2,M1G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1
3,M1H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,M1J,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [46]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postalcode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Restaurant'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Restaurant'.format(ind+1))

# create a new dataframe
hood_rest_sorted = pd.DataFrame(columns=columns)
hood_rest_sorted['Postalcode'] = grouped['Postalcode']

rest_grouped = grouped.drop(['Count', 'Density'], axis=1)

for ind in np.arange(rest_grouped.shape[0]):
    hood_rest_sorted.iloc[ind, 1:] = return_most_common_venues(rest_grouped.iloc[ind, :], num_top_venues)

hood_rest_sorted['Count'] = grouped['Count']
hood_rest_sorted['Density'] = grouped['Density']

hood_rest_sorted.head()

Unnamed: 0,Postalcode,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,Count,Density
0,M1C,,,,,,0,0.0
1,M1E,Fast Food Restaurant,Mexican Restaurant,Greek Restaurant,,,3,0.117647
2,M1G,Korean Restaurant,,,,,1,1.0
3,M1H,,,,,,0,0.0
4,M1J,,,,,,0,0.0


In [47]:
# set number of clusters
kclusters = 6

grouped_clustering = grouped.drop('Postalcode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

hood_rest_sorted['Cluster Labels'] = kmeans.labels_

merged = hoods_lat_lon

merged = merged.join(hood_rest_sorted.set_index('Postalcode'), on='Postalcode', how='inner')

merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,Count,Density,Cluster Labels
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.7878,-79.1564,,,,,,0,0.0,2
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.7678,-79.1866,Fast Food Restaurant,Mexican Restaurant,Greek Restaurant,,,3,0.117647,0
3,M1G,Scarborough,Woburn,43.7712,-79.2144,Korean Restaurant,,,,,1,1.0,5
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389,,,,,,0,0.0,2
5,M1J,Scarborough,Scarborough Village,43.7464,-79.2323,,,,,,0,0.0,2


Hey there are some good groups here!!!

In [48]:
hood_rest_sorted['Cluster Labels'].value_counts()

2    41
5    21
0    12
3    10
1    10
4     5
Name: Cluster Labels, dtype: int64

Let's have a look at the distribution...

In [35]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

print(rainbow)

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Postalcode'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

['#8000ff', '#1996f3', '#4df3ce', '#b2f396', '#ff964f', '#ff0000']



Let's have a look:

Label = 0 : Not many food options around, bit boring.

In [36]:
merged[merged['Cluster Labels']==0]

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,Count,Ratio,Cluster Labels
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.7678,-79.1866,Fast Food Restaurant,Mexican Restaurant,Greek Restaurant,,,3,0.117647,0
13,M1T,Scarborough,"Clarks Corners,Sullivan,Tam O'Shanter",43.7812,-79.3036,Thai Restaurant,Italian Restaurant,Fast Food Restaurant,Chinese Restaurant,,4,0.333333,0
33,M3N,North York,Downsview Northwest,43.7568,-79.521,Vietnamese Restaurant,Fast Food Restaurant,Caribbean Restaurant,,,3,0.15,0
38,M4G,East York,Leaside,43.7124,-79.3644,Restaurant,Sushi Restaurant,Portuguese Restaurant,Indian Restaurant,,4,0.192308,0
39,M4H,East York,Thorncliffe Park,43.7059,-79.3464,Indian Restaurant,Turkish Restaurant,Afghan Restaurant,Restaurant,,4,0.3,0
41,M4K,East Toronto,"The Danforth West,Riverdale",43.6803,-79.3538,Greek Restaurant,Restaurant,Italian Restaurant,American Restaurant,,4,0.405405,0
42,M4L,East Toronto,"The Beaches West,India Bazaar",43.6693,-79.3155,Sushi Restaurant,Italian Restaurant,Fast Food Restaurant,,,3,0.157895,0
53,M5A,Downtown Toronto,Harbourfront,43.6555,-79.3626,Restaurant,Thai Restaurant,Mexican Restaurant,Italian Restaurant,Greek Restaurant,5,0.25,0
72,M6B,North York,Glencairn,43.7081,-79.4479,Fast Food Restaurant,Mediterranean Restaurant,Latin American Restaurant,Japanese Restaurant,,4,0.5,0
76,M6H,West Toronto,"Dovercourt Village,Dufferin",43.6655,-79.4378,Portuguese Restaurant,Middle Eastern Restaurant,Brazilian Restaurant,,,3,0.157895,0


Label = 1 : Looks like lots of options, and particularly a high density of Japanese / Sushi and Seafood Restaurants, if you want some fish go here!

In [37]:
merged[merged['Cluster Labels']==1]

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,Count,Ratio,Cluster Labels
22,M2N,North York,Willowdale South,43.7673,-79.4111,Sushi Restaurant,Ramen Restaurant,Restaurant,Japanese Restaurant,Fast Food Restaurant,12,0.33871,1
55,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756,Restaurant,Seafood Restaurant,Italian Restaurant,American Restaurant,Japanese Restaurant,14,0.26,1
56,M5E,Downtown Toronto,Berczy Park,43.6456,-79.3754,Restaurant,Seafood Restaurant,Japanese Restaurant,Vegetarian / Vegan Restaurant,Thai Restaurant,12,0.208791,1
60,M5K,Downtown Toronto,"Design Exchange,Toronto Dominion Centre",43.6469,-79.3823,Restaurant,Japanese Restaurant,Italian Restaurant,American Restaurant,Seafood Restaurant,13,0.24,1
61,M5L,Downtown Toronto,"Commerce Court,Victoria Hotel",43.6492,-79.3823,Restaurant,Japanese Restaurant,American Restaurant,Seafood Restaurant,Asian Restaurant,13,0.27,1
67,M5T,Downtown Toronto,"Chinatown,Grange Park,Kensington Market",43.6541,-79.3978,Chinese Restaurant,Dumpling Restaurant,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Mexican Restaurant,11,0.3125,1
68,M5V,Downtown Toronto,"CN Tower,Bathurst Quay,Island airport,Harbourf...",43.6404,-79.3995,Italian Restaurant,Restaurant,Vegetarian / Vegan Restaurant,Thai Restaurant,Sushi Restaurant,12,0.266667,1
69,M5W,Downtown Toronto,Stn A PO Boxes 25 The Esplanade,43.6437,-79.3787,Restaurant,Japanese Restaurant,Italian Restaurant,Seafood Restaurant,American Restaurant,11,0.23,1
70,M5X,Downtown Toronto,"First Canadian Place,Underground city",43.6492,-79.3823,Restaurant,Japanese Restaurant,American Restaurant,Seafood Restaurant,Asian Restaurant,13,0.27,1
85,M7A,Queen's Park,Queen's Park,43.6641,-79.3889,Italian Restaurant,Vegetarian / Vegan Restaurant,Sushi Restaurant,Seafood Restaurant,Restaurant,12,0.382353,1


Label = 2 : Looks like a restaurant wasteland, the worst of the group.  Terrible.  Just don't go here.

In [38]:
merged[merged['Cluster Labels']==2].head()

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,Count,Ratio,Cluster Labels
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.7878,-79.1564,,,,,,0,0.0,2
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389,,,,,,0,0.0,2
5,M1J,Scarborough,Scarborough Village,43.7464,-79.2323,,,,,,0,0.0,2
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.7298,-79.2639,,,,,,0,0.0,2
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.7122,-79.2843,,,,,,0,0.0,2


Label = 3 : Middling number of restaurants with some variety.

In [39]:
merged[merged['Cluster Labels']==3]

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,Count,Ratio,Cluster Labels
18,M2J,North York,"Fairview,Henry Farm,Oriole",43.7801,-79.3479,Fast Food Restaurant,Japanese Restaurant,Restaurant,Chinese Restaurant,Asian Restaurant,6,0.193548,3
47,M4S,Central Toronto,Davisville,43.702,-79.3853,Italian Restaurant,Thai Restaurant,Sushi Restaurant,Seafood Restaurant,Restaurant,8,0.409091,3
51,M4X,Downtown Toronto,"Cabbagetown,St. James Town",43.6684,-79.3689,Restaurant,Italian Restaurant,Thai Restaurant,Taiwanese Restaurant,Japanese Restaurant,9,0.255814,3
62,M5M,North York,"Bedford Park,Lawrence Manor East",43.7335,-79.4177,Italian Restaurant,Thai Restaurant,Sushi Restaurant,Restaurant,Indian Restaurant,9,0.47619,3
65,M5R,Central Toronto,"The Annex,North Midtown,Yorkville",43.6736,-79.4035,American Restaurant,Vegetarian / Vegan Restaurant,Restaurant,Mexican Restaurant,Italian Restaurant,7,0.32,3
66,M5S,Downtown Toronto,"Harbord,University of Toronto",43.6629,-79.3987,Restaurant,Japanese Restaurant,Italian Restaurant,French Restaurant,Comfort Food Restaurant,6,0.275862,3
71,M6A,North York,"Lawrence Heights,Lawrence Manor",43.7223,-79.4504,Fast Food Restaurant,Restaurant,Sushi Restaurant,Mexican Restaurant,Mediterranean Restaurant,7,0.135135,3
78,M6K,West Toronto,"Brockton,Exhibition Place,Parkdale Village",43.6383,-79.4301,Restaurant,Mexican Restaurant,Japanese Restaurant,Italian Restaurant,Hawaiian Restaurant,7,0.184211,3
83,M6R,West Toronto,"Parkdale,Roncesvalles",43.6469,-79.4521,Eastern European Restaurant,Sushi Restaurant,Restaurant,American Restaurant,Thai Restaurant,7,0.266667,3
84,M6S,West Toronto,"Runnymede,Swansea",43.6512,-79.4828,Sushi Restaurant,Restaurant,Italian Restaurant,Falafel Restaurant,Vegetarian / Vegan Restaurant,9,0.269231,3


Label = 4 : Foodie jackpot!

In [40]:
merged[merged['Cluster Labels']==4]

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,Count,Ratio,Cluster Labels
52,M4Y,Downtown Toronto,Church and Wellesley,43.6656,-79.383,Japanese Restaurant,Restaurant,Mediterranean Restaurant,Sushi Restaurant,American Restaurant,18,0.284091,4
54,M5B,Downtown Toronto,"Ryerson,Garden District",43.6572,-79.3783,Middle Eastern Restaurant,Fast Food Restaurant,Restaurant,Ramen Restaurant,Japanese Restaurant,16,0.24,4
57,M5G,Downtown Toronto,Central Bay Street,43.6564,-79.386,Middle Eastern Restaurant,Japanese Restaurant,Italian Restaurant,Fast Food Restaurant,Sushi Restaurant,15,0.273684,4
58,M5H,Downtown Toronto,"Adelaide,King,Richmond",43.6496,-79.3833,Restaurant,Sushi Restaurant,Japanese Restaurant,Asian Restaurant,American Restaurant,16,0.29,4
77,M6J,West Toronto,"Little Portugal,Trinity",43.648,-79.4177,Restaurant,Asian Restaurant,Vietnamese Restaurant,New American Restaurant,Vegetarian / Vegan Restaurant,16,0.4,4


Label = 5 : Few number of restaurants with a relatively low ratio vs. other venues.  Probably suburbia with not much to do in the area.

In [41]:
merged[merged['Cluster Labels']==5]

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,Count,Ratio,Cluster Labels
3,M1G,Scarborough,Woburn,43.7712,-79.2144,Korean Restaurant,,,,,1,1.0,5
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.7247,-79.2312,Fast Food Restaurant,,,,,1,0.222222,5
10,M1P,Scarborough,"Dorset Park,Scarborough Town Centre,Wexford He...",43.7612,-79.2707,Asian Restaurant,,,,,1,0.333333,5
12,M1S,Scarborough,Agincourt,43.7946,-79.2644,Shanghai Restaurant,Latin American Restaurant,,,,2,0.5,5
14,M1V,Scarborough,"Agincourt North,L'Amoreaux East,Milliken,Steel...",43.8177,-79.2819,Sushi Restaurant,,,,,1,0.5,5
15,M1W,Scarborough,L'Amoreaux West,43.8016,-79.3216,Fast Food Restaurant,Chinese Restaurant,,,,2,0.4,5
28,M3H,North York,"Bathurst Manor,Downsview North,Wilson Heights",43.7535,-79.4472,Middle Eastern Restaurant,Mediterranean Restaurant,,,,2,0.333333,5
29,M3J,North York,"Northwood Park,York University",43.7694,-79.4921,Middle Eastern Restaurant,,,,,1,0.2,5
34,M4A,North York,Victoria Village,43.7276,-79.3148,Portuguese Restaurant,French Restaurant,,,,2,0.285714,5
35,M4B,East York,"Woodbine Gardens,Parkview Hill",43.7063,-79.3094,Fast Food Restaurant,,,,,1,0.2,5
