In [1]:
#import the necessary libraries
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


import os

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib as mp
import re
import csv
%matplotlib inline


print('Libraries imported.')

Libraries imported.


In [2]:
#for scraping data off the internet
from bs4 import BeautifulSoup

In [3]:
import requests

In [4]:
#we import the list of Canada postal codes for reference throughout the analysis
response_obj = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(response_obj,'lxml')

table = soup.find('table')
table_rows = table.find_all('tr')

l = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        l.append(row)

In [5]:
#build dataframe
df = pd.DataFrame(l, columns=["PostalCode", "Borough", "Neighbourhood"])
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
8,M8A,Not assigned,Not assigned
9,M9A,Queen's Park,Not assigned


In [6]:
# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
df = df[df.Borough != 'Not assigned']
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Not assigned
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


In [7]:
toronto_df_dropna = df[df.Borough != 'Not assigned'].reset_index(drop=True)
toronto_df_dropna.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


In [8]:
# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
df.loc[df['Neighbourhood']=='Not assigned', ['Neighbourhood']] = 'Queen\'s Park'
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Queen's Park
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


Group neighborhoods by postal and borough
There are some neighborhoods that belongs to the same postal code and borough.
We will concatenate them into same row, seperated by a colon.

In [9]:
df_grouped = df.groupby(['PostalCode','Borough'], as_index=False).agg(lambda x:
','.join(x))
df_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [10]:
df_grouped.shape

(103, 3)

In [11]:
import urllib

In [12]:
# Use the Notebook to build the code to scrape the following Wikipedia page, https://en.wikipedia.org
url='https://en.wikipedia.org/wiki/Demographics_of_Toronto_neighbourhoods'
skip_table = 1

source = urllib.request.urlopen(url).read()
soup = BeautifulSoup(source,'html.parser')

table = soup.find_all('table')[skip_table]
table_rows = table.find_all('tr')

l = []
for tr in table_rows:
    #print (tr)
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        l.append(row)

In [13]:
df1 = pd.DataFrame(l, columns=["Neighbourhood", "FM", "Census", "Population", "Land Area", "Density", "Population %", "Income", "Commuting", "Renters", "2nd Language", "2nd Language %"])

# clean up the data and dropping unwanted columns
df1 = df1[df1.Neighbourhood != 'Toronto CMA Average']
df1 = df1.drop('FM', 1)
df1 = df1.drop('Census', 1)
df1 = df1.drop('Population', 1)
df1 = df1.drop('Land Area', 1)
df1 = df1.drop('Population %', 1)
df1 = df1.drop('Density', 1)
df1 = df1.drop('Commuting', 1)
df1 = df1.drop('Renters', 1)
df1 = df1.drop('2nd Language', 1)
df1 = df1.drop('2nd Language %', 1)

# change to the proper datatype
df1['Income'] = df1['Income'].str.replace(',','')
df1['Income'] = df1['Income'].apply(pd.to_numeric)
#print (df1.dtypes)

# display sample data
df1.head(10)

Unnamed: 0,Neighbourhood,Income
1,Agincourt,25750
2,Alderwood,35239
3,Alexandra Park,19687
4,Allenby,245592
5,Amesbury,27546
6,Armour Heights,116651
7,Banbury,92319
8,Bathurst Manor,34169
9,Bay Street Corridor,40598
10,Bayview Village,46752


In [14]:
# get the data size
df.shape
df1.shape

(174, 2)

In [16]:
export_csv = df1.to_csv (r'C:\Users\Lami Attah\.ipython\torincome.csv', index = None, header=True)

In [15]:
# the higher income neighbourhood will have the higher score
# sort the dataframe based on the descending population score
df1 = df1.sort_values('Income', ascending=False)
df1.head(10)

Unnamed: 0,Neighbourhood,Income
18,Bridle Path,314107
4,Allenby,245592
71,Hoggs Hollow,222560
90,Lawrence Park,214110
130,Rosedale,213941
104,Moore Park,154825
60,Governor's Bridge/Bennington Heights,129904
97,Lytton Park,127356
140,South Hill,120453
6,Armour Heights,116651


# We can see from the data that Bridle Path is the neighbourhood with the highest average income- Others that cna be considered are Allenby, Hoggs Hollow, Lawrence Park. If the crime rate shows any of these to be the safest, we will pick the one

We import the crime rate data from data.torontopolice.ca . We clean and drop the unnecessary columns. We are then left with the neighbourhood and crime rate columns

In [16]:
 #Import Crime data to select safest neighbourhoods
c = pd.read_csv("https://yes-office.com/wp-content/uploads/2020/01/crimerate-avg.csv")

print(c)

     OBJECTID                        Neighbourhood  Assault_AVG
0           1                       Yonge-St.Clair         50.8
1           2              York University Heights        109.6
2           3                     Lansing-Westgate        213.0
3           4                   Yorkdale-Glen Park        129.8
4           5                  Stonegate-Queensway        122.6
5           6               Tam O'Shanter-Sullivan         56.8
6           7                          The Beaches        402.6
7           8         Thistletown-Beaumond Heights         36.4
8           9                     Thorncliffe Park        134.8
9          10                   Danforth East York        212.2
10         11                   Humewood-Cedarvale        202.0
11         12           Islington-City Centre West        260.4
12         13                             Danforth         19.6
13         14                               Rustic         96.2
14         15                  Scarborou

Sort Dataframe to give us the safest neighbourhood(i.e with the least rate of assault(assault is the most common crime in Toronto))

In [17]:
df2 = c.sort_values('Assault_AVG', ascending =True)
df2.head(10)

Unnamed: 0,OBJECTID,Neighbourhood,Assault_AVG
128,129,Yonge-Eglinton,16.6
93,94,Bay Street Corridor,17.4
107,108,Parkwoods-Donalda,17.4
16,17,South Riverdale,18.8
12,13,Danforth,19.6
96,97,Willowdale West,22.4
77,78,Eglinton East,22.6
139,140,Mimico,22.6
102,103,Blake-Jones,26.2
111,112,Woburn,28.2


In [18]:
df2.shape

(140, 3)

#### From this table, Yonge-Eglinton is the safest neighbourhood. This corresponds with general internet data. Other neighbourhoods are Bay Street Corridor, Parkwoods.

Although we want a safe neighbourhood, we aim for a vibrant neighbourhood. We will now use foursquare to explore Toronto, taking into account the metro stations and clusters

Getting coordinates and add to the Toronto DataFrame

In [19]:
!wget -q -O "toronto_coordinates.csv" http://cocl.us/Geospatial_data
print('Coordinates downloaded!')
tor_coor = pd.read_csv('toronto_coordinates.csv')

Coordinates downloaded!


In [20]:
print(tor_coor.shape)
tor_coor.head()

(103, 3)


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


We will merge the two dataframes.
To this and get objective data, we will reset the index columns

In [21]:
df_temp = df_grouped.set_index('PostalCode')
tor_coor_temp = tor_coor.set_index('Postal Code')
df_coors = pd.concat([df_temp, tor_coor_temp], axis=1, join='inner')

In [22]:
#reset index
df_coors.index.name = 'PostalCode'
df_coors.reset_index(inplace=True)

In [23]:
#dataframe with coordinates
print(df_coors.shape)
df_coors.head(10)

(103, 5)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


First, explore and cluster neigbourhoods in Toronto

#### We will be exploring venues around Toronto. In addition to the transportation venues, we will also examine other venues which will form the surrounding of the chosen location

We scrape Wikipedia for list of Toronto Metrostations and their corresponding coordinates

In [24]:
# Use the Notebook to build the code to scrape the following Wikipedia page, https://en.wikipedia.org
url='https://en.wikipedia.org/wiki/List_of_Toronto_subway_stations'
skip_table = 4


source = urllib.request.urlopen(url).read()
soup = BeautifulSoup(source,'html.parser')

table = soup.find_all('table')[skip_table]
table_rows = table.find_all('tr')

l = []
for tr in table_rows:
    #print (tr)
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        l.append(row)

In [25]:
df3 = pd.DataFrame(l, columns=["Station"])
df3.head(10)

Unnamed: 0,Station
0,Finch\nNorth York Centre\nSheppard–Yonge\nYork...
1,Kipling\nIslington\nRoyal York\nOld Mill\nJane...
2,Kennedy\nLawrence East\nEllesmere\nMidland\nSc...
3,Sheppard–Yonge\nBayview\nBessarion\nLeslie\nDo...
4,Mount Dennis\nKeelesdale\nCaledonia\nFairbank\...
5,Bingham Loop\nDistillery Loop\nDufferin Gate L...
6,Italics indicate a project under construction


We find that the table does not provide the coordinates. We add the coordinates manually. We then import the csv with station names and their coordinates

In [26]:
s = pd.read_csv("https://yes-office.com/wp-content/uploads/2020/02/stations-and-coords1-1.csv")

print(s)

                 Station    Latitude    Longitude
0                  Finch   43.780556   -79.414722
1      North York Centre   43.768333   -79.412778
2         Sheppard–Yonge   43.761389   -79.410833
3             York Mills   43.744167   -79.406667
4               Lawrence      43.725   -79.402222
5               Eglinton   43.705833   -79.398333
6             Davisville   43.697778   -79.397222
7              St. Clair   43.687778   -79.393056
8             Summerhill   43.682222   -79.390833
9               Rosedale   43.676944   -79.388889
10           Bloor–Yonge  43.671111    -79.385833
11             Wellesley  43.665278    -79.383889
12               College  43.661389    -79.383056
13                Dundas  43.656389    -79.380833
14                 Queen    43.6525    -79.379167
15                  King  43.649167    -79.377778
16                 Union  43.645556    -79.380556
17            St. Andrew   43.647778   -79.384722
18               Osgoode  43.650833    -79.386667


In [27]:
df4 = s
df4.head(10)

Unnamed: 0,Station,Latitude,Longitude
0,Finch,43.780556,-79.414722
1,North York Centre,43.768333,-79.412778
2,Sheppard–Yonge,43.761389,-79.410833
3,York Mills,43.744167,-79.406667
4,Lawrence,43.725,-79.402222
5,Eglinton,43.705833,-79.398333
6,Davisville,43.697778,-79.397222
7,St. Clair,43.687778,-79.393056
8,Summerhill,43.682222,-79.390833
9,Rosedale,43.676944,-79.388889


In [28]:
#Get Toronto coordinates to act as centre point
address = 'Toronto, Ontario'
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="tl-toronto-neigh")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.653963, -79.387207.


In [29]:
!pip install folium
import pandas as pd
import folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 19.2MB/s eta 0:00:01
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/63/36/1c93318e9653f4e414a2e0c3b98fc898b4970e939afeedeee6075dd3b703/branca-0.3.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.3.1 folium-0.10.1


In [30]:
#visualize Toronto map
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, long, post, borough, neigh in zip(df_coors['Latitude'], df_coors['Longitude'], df_coors['PostalCode'], df_coors['Borough'], df_coors['Neighbourhood']):
    label = "{} ({}): {}".format(borough, post, neigh)
    popup = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=popup,
        color='red',
        fill=True,
        fill_color='#ffffff',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto

In [31]:
#Visualize the Toronto metro station map. This helps us to see the spread of the stations 
map_metrotoronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, long, station in zip(df4['Latitude'], df4['Longitude'], df4['Station']):
    label = "{} ({}): {}".format(station, lat, long)
    label = folium.Popup(str(label), parse_html=True)
    folium.map.Marker(location=
        [lat, long,],
        radius=10,
        color='blue',
        fill=True,
        fill_color='#006633',
        fill_opacity=0.7,
        parse_html=False).add_to(map_metrotoronto)
    
map_metrotoronto

In [32]:
CLIENT_ID = 'K13XADWFZK3FGZKYEN2CJALSTAWZX4GJXOCLN2EYPJVCOULH' # my Foursquare ID
CLIENT_SECRET = 'HXES5YCXX04VJGRFGJTOGNY41NFEP0CUAUSSJQ0H2M5EGE4R' # my Foursquare Secret
VERSION = '20190215'
print('My credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

My credentails:
CLIENT_ID: K13XADWFZK3FGZKYEN2CJALSTAWZX4GJXOCLN2EYPJVCOULH
CLIENT_SECRET:HXES5YCXX04VJGRFGJTOGNY41NFEP0CUAUSSJQ0H2M5EGE4R


### We explore venues  in Toronto

In [45]:
Radius = 1000
Limit = 100

def getNearbyVenues(postalCodes, boroughs, neighbourhoods, latitudes, longitudes):
    
    venues_list=[]
    # Loop through each neighbourhood given in parameters
    for postalCode, borough, neighbourhood, lat, lng in zip(postalCodes, boroughs, neighbourhoods, latitudes, longitudes):
            
        # create the API request URL to explore the neighbourhood using FoursquareAPI
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            Radius, 
            Limit)

        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue : name, latitude, longitude, and the categories' names
        venues_list.append([(
            postalCode,
            borough,
            neighbourhood, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    # add the venues in the dataframe
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = [
                        'PostalCode',
                        'Borough',
                        'Neighbourhood', 
                        'Neighbourhood Latitude', 
                        'Neighbourhood Longitude', 
                        'Venue', 
                        'Venue Latitude', 
                        'Venue Longitude', 
                        'Venue Category'
    ]
    
    return(nearby_venues)

In [46]:
toronto_venues = getNearbyVenues(  
                                    postalCodes=df_coors['PostalCode'],
                                    boroughs=df_coors['Borough'],
                                    neighbourhoods=df_coors['Neighbourhood'],
                                    latitudes=df_coors['Latitude'],
                                    longitudes=df_coors['Longitude']
                                  )

toronto_venues.head()
print(toronto_venues.shape)

(4909, 9)


In [47]:
toronto_venues.groupby('Neighbourhood').count().head()

Unnamed: 0_level_0,PostalCode,Borough,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100,100,100
Agincourt,45,45,45,45,45,45,45,45
"Agincourt North,L'Amoreaux East,Milliken,Steeles East",29,29,29,29,29,29,29,29
"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown",18,18,18,18,18,18,18,18
"Alderwood,Long Branch",28,28,28,28,28,28,28,28


In [48]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 333 uniques categories.


In [49]:
# one hot encoding to perform k-means clustering
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add postalCode, borough, and neighbourhood column back to dataframe
toronto_onehot['PostalCode'] = toronto_venues['PostalCode'] 
toronto_onehot['Borough'] = toronto_venues['Borough'] 

toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move postalCode, borough, neighbourhood column to the first column
for i in range(0, 3):
    toronto_onehot = toronto_onehot[[toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])]

toronto_onehot.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Garage,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Badminton Court,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Beach,Beach Bar,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Board Shop,Bookstore,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Buffet,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Castle,Cemetery,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Church,Churrascaria,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,College Lab,College Quad,College Rec Center,College Stadium,College Theater,Comedy Club,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Curling Ice,Dance Studio,Deli / Bodega,Dentist's Office,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fireworks Store,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,Frame Store,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Golf Course,Golf Driving Range,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hakka Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,Housing Development,IT Services,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Indie Theater,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kids Store,Kitchen Supply Store,Korean Restaurant,Lake,Latin American Restaurant,Laundry Service,Light Rail Station,Lighting Store,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Market,Massage Studio,Mattress Store,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Museum,Music School,Music Store,Music Venue,Nail Salon,Neighborhood,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Repair Shop,Outdoors & Recreation,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Photography Lab,Pide Place,Pie Shop,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Pool,Pool Hall,Portuguese Restaurant,Poutine Place,Print Shop,Pub,Ramen Restaurant,Record Shop,Recording Studio,Recreation Center,Rental Car Location,Residential Building (Apartment / Condo),Restaurant,River,Road,Rock Climbing Spot,Rock Club,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shanghai Restaurant,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Ski Area,Ski Chalet,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Souvlaki Shop,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Stationery Store,Steakhouse,Storage Facility,Street Art,Supermarket,Supplement Shop,Sushi Restaurant,Syrian Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Tech Startup,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Track,Trail,Train Station,Transportation Service,Tree,Turkish Restaurant,Udon Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,M1B,Scarborough,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,M1B,Scarborough,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,M1B,Scarborough,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,M1B,Scarborough,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,M1B,Scarborough,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [50]:
toronto_onehot.shape

(4909, 336)

In [51]:
#Group rows by neighbourhood, using means

toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighbourhood,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Garage,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Badminton Court,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Beach,Beach Bar,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Board Shop,Bookstore,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Buffet,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Castle,Cemetery,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Church,Churrascaria,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,College Lab,College Quad,College Rec Center,College Stadium,College Theater,Comedy Club,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Curling Ice,Dance Studio,Deli / Bodega,Dentist's Office,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fireworks Store,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,Frame Store,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Golf Course,Golf Driving Range,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hakka Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,Housing Development,IT Services,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Indie Theater,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kids Store,Kitchen Supply Store,Korean Restaurant,Lake,Latin American Restaurant,Laundry Service,Light Rail Station,Lighting Store,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Market,Massage Studio,Mattress Store,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Museum,Music School,Music Store,Music Venue,Nail Salon,Neighborhood,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Repair Shop,Outdoors & Recreation,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Photography Lab,Pide Place,Pie Shop,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Pool,Pool Hall,Portuguese Restaurant,Poutine Place,Print Shop,Pub,Ramen Restaurant,Record Shop,Recording Studio,Recreation Center,Rental Car Location,Residential Building (Apartment / Condo),Restaurant,River,Road,Rock Climbing Spot,Rock Club,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shanghai Restaurant,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Ski Area,Ski Chalet,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Souvlaki Shop,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Stationery Store,Steakhouse,Storage Facility,Street Art,Supermarket,Supplement Shop,Sushi Restaurant,Syrian Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Tech Startup,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Track,Trail,Train Station,Transportation Service,Tree,Turkish Restaurant,Udon Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,"Adelaide,King,Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044444,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.044444,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.044444,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.172414,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.055556,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood,Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.107143,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.107143,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.035714,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [52]:
#Print each neighborhood along with the top 5 most common venues

num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
              venue  freq
0              Café  0.06
1             Hotel  0.05
2       Coffee Shop  0.05
3           Theater  0.04
4  Sushi Restaurant  0.03


----Agincourt----
                  venue  freq
0    Chinese Restaurant  0.13
1         Shopping Mall  0.07
2                Bakery  0.04
3           Pizza Place  0.04
4  Caribbean Restaurant  0.04


----Agincourt North,L'Amoreaux East,Milliken,Steeles East----
                venue  freq
0  Chinese Restaurant  0.17
1        Noodle House  0.07
2                Park  0.07
3           BBQ Joint  0.07
4         Pizza Place  0.07


----Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown----
            venue  freq
0     Pizza Place  0.17
1   Grocery Store  0.17
2  Sandwich Place  0.06
3    Liquor Store  0.06
4        Gym Pool  0.06


----Alderwood,Long Branch----
               venue  freq
0     Discount Store  0.11
1           Pharmacy  0.11
2  Convenie

In [53]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0 :num_top_venues]

In [54]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
toronto_venues_sorted = pd.DataFrame(columns=columns)
toronto_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    toronto_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

toronto_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Café,Coffee Shop,Hotel,Theater,Gym,Sushi Restaurant,Restaurant,Steakhouse,Movie Theater,Beer Bar
1,Agincourt,Chinese Restaurant,Shopping Mall,Bakery,Caribbean Restaurant,Pizza Place,Bank,Clothing Store,Discount Store,Sushi Restaurant,Supermarket
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Chinese Restaurant,Pizza Place,Noodle House,BBQ Joint,Park,Hobby Shop,Coffee Shop,Korean Restaurant,Bakery,Shop & Service
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Pizza Place,Beer Store,Coffee Shop,Pharmacy,Fried Chicken Joint,Hardware Store,Sandwich Place,Gym Pool,Fast Food Restaurant
4,"Alderwood,Long Branch",Discount Store,Pharmacy,Convenience Store,Pizza Place,Print Shop,Intersection,Pool,Skating Rink,Market,Donut Shop


USING K-MEANS FOR CLUSTERING

In [55]:
from sklearn.cluster import KMeans 
from sklearn.datasets.samples_generator import make_blobs

In [60]:
kclusters = 5
toronto_data_cluster = toronto_grouped.drop('Neighbourhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_data_cluster)

kmeans.labels_[0:10]

array([3, 0, 0, 0, 0, 0, 0, 3, 3, 3], dtype=int32)

In [73]:
# add clustering labels
toronto_venues_sorted.insert(0, 'cluster Labels', kmeans.labels_)

# merge toronto_grouped with toronto coords to add latitude/longitude for each neighborhood
toronto_merged = df_coors
toronto_merged = toronto_merged.join(toronto_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,cluster Labels,Cluster Labels,klusters,labels,clusters labels,5,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fast Food Restaurant,Trail,Auto Workshop,Coffee Shop,Chinese Restaurant,Fruit & Vegetable Store,Caribbean Restaurant,Paper / Office Supplies Store,Gym,Bus Station
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,2.0,2.0,2.0,2.0,2.0,2.0,2.0,Italian Restaurant,Breakfast Spot,Park,Burger Joint,Playground,Zoo,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Pizza Place,Coffee Shop,Fast Food Restaurant,Sports Bar,Juice Bar,Pharmacy,Salon / Barbershop,Sandwich Place,Discount Store,Burger Joint
3,M1G,Scarborough,Woburn,43.770992,-79.216917,2.0,2.0,2.0,2.0,2.0,2.0,2.0,Park,Coffee Shop,Indian Restaurant,Fast Food Restaurant,Chinese Restaurant,Mobile Phone Shop,Fish & Chips Shop,Event Space,Donut Shop,Dumpling Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Bakery,Coffee Shop,Gas Station,Pharmacy,Indian Restaurant,Sporting Goods Shop,Intersection,Fast Food Restaurant,Music Store,Fried Chicken Joint


In [74]:
#The cluster labels are repeated above due to errors

In [75]:
toronto_venues_sorted.head()

Unnamed: 0,cluster Labels,Cluster Labels,klusters,labels,clusters labels,5,Cluster labels,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,3,3,3,3,3,3,3,"Adelaide,King,Richmond",Café,Coffee Shop,Hotel,Theater,Gym,Sushi Restaurant,Restaurant,Steakhouse,Movie Theater,Beer Bar
1,0,0,0,0,0,0,0,Agincourt,Chinese Restaurant,Shopping Mall,Bakery,Caribbean Restaurant,Pizza Place,Bank,Clothing Store,Discount Store,Sushi Restaurant,Supermarket
2,0,0,0,0,0,0,0,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Chinese Restaurant,Pizza Place,Noodle House,BBQ Joint,Park,Hobby Shop,Coffee Shop,Korean Restaurant,Bakery,Shop & Service
3,0,0,0,0,0,0,0,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Pizza Place,Beer Store,Coffee Shop,Pharmacy,Fried Chicken Joint,Hardware Store,Sandwich Place,Gym Pool,Fast Food Restaurant
4,0,0,0,0,0,0,0,"Alderwood,Long Branch",Discount Store,Pharmacy,Convenience Store,Pizza Place,Print Shop,Intersection,Pool,Skating Rink,Market,Donut Shop


In [77]:
#Visualize the clusters to observe patterns
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'],kmeans.labels_):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Let's observe the clusters

In [91]:
#cluster 0
# Cluster 0:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,cluster Labels,Cluster Labels,klusters,labels,clusters labels,5,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Rouge,Malvern",0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fast Food Restaurant,Trail,Auto Workshop,Coffee Shop,Chinese Restaurant,Fruit & Vegetable Store,Caribbean Restaurant,Paper / Office Supplies Store,Gym,Bus Station
2,"Guildwood,Morningside,West Hill",0.0,0.0,0.0,0.0,0.0,0.0,0.0,Pizza Place,Coffee Shop,Fast Food Restaurant,Sports Bar,Juice Bar,Pharmacy,Salon / Barbershop,Sandwich Place,Discount Store,Burger Joint
4,Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Bakery,Coffee Shop,Gas Station,Pharmacy,Indian Restaurant,Sporting Goods Shop,Intersection,Fast Food Restaurant,Music Store,Fried Chicken Joint
5,Scarborough Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ice Cream Shop,Coffee Shop,Japanese Restaurant,Fast Food Restaurant,Restaurant,Sandwich Place,Train Station,Pizza Place,Bowling Alley,Convenience Store
6,"East Birchmount Park,Ionview,Kennedy Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Pizza Place,Discount Store,Grocery Store,Convenience Store,Asian Restaurant,Bus Line,Metro Station
7,"Clairlea,Golden Mile,Oakridge",0.0,0.0,0.0,0.0,0.0,0.0,0.0,Bakery,Intersection,Diner,Coffee Shop,Bus Line,Pizza Place,Fast Food Restaurant,Beer Store,Sandwich Place,Pub
8,"Cliffcrest,Cliffside,Scarborough Village West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,Pizza Place,Ice Cream Shop,Beach,Sports Bar,Park,Burger Joint,Cajun / Creole Restaurant,Hardware Store,Farm,Electronics Store
10,"Dorset Park,Scarborough Town Centre,Wexford He...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,Furniture / Home Store,Coffee Shop,Fast Food Restaurant,Bakery,Electronics Store,Chinese Restaurant,Burger Joint,Pharmacy,Indian Restaurant,Asian Restaurant
11,"Maryvale,Wexford",0.0,0.0,0.0,0.0,0.0,0.0,0.0,Middle Eastern Restaurant,Grocery Store,Pizza Place,Intersection,Burger Joint,Coffee Shop,Pharmacy,Bar,Gas Station,Korean Restaurant
12,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Chinese Restaurant,Shopping Mall,Bakery,Caribbean Restaurant,Pizza Place,Bank,Clothing Store,Discount Store,Sushi Restaurant,Supermarket


#### We see that cluster 0 has a variety of places with the most common being restaurants. Shopping, and family activities also seem to be very prevalent here. There are few transportation spots.

In [92]:
# Cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,cluster Labels,Cluster Labels,klusters,labels,clusters labels,5,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
102,Northwest,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Hotel,Coffee Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm


#### We also see that cluster 1 has a tendency to cater to the international market with hotels being the most common venue and foreign restaurants.

In [93]:
# Cluster 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,cluster Labels,Cluster Labels,klusters,labels,clusters labels,5,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,"Highland Creek,Rouge Hill,Port Union",2.0,2.0,2.0,2.0,2.0,2.0,2.0,Italian Restaurant,Breakfast Spot,Park,Burger Joint,Playground,Zoo,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space
3,Woburn,2.0,2.0,2.0,2.0,2.0,2.0,2.0,Park,Coffee Shop,Indian Restaurant,Fast Food Restaurant,Chinese Restaurant,Mobile Phone Shop,Fish & Chips Shop,Event Space,Donut Shop,Dumpling Restaurant
20,"Silver Hills,York Mills",2.0,2.0,2.0,2.0,2.0,2.0,2.0,Park,Pool,Farm,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space
31,Downsview West,2.0,2.0,2.0,2.0,2.0,2.0,2.0,Park,Coffee Shop,Spa,Bank,Shopping Mall,Pizza Place,Vietnamese Restaurant,Event Space,Dumpling Restaurant,Eastern European Restaurant
91,"Humber Bay,King's Mill Park,Kingsway Park Sout...",2.0,2.0,2.0,2.0,2.0,2.0,2.0,Park,Italian Restaurant,Shopping Mall,Eastern European Restaurant,Ice Cream Shop,Gym / Fitness Center,Event Space,Donut Shop,Dumpling Restaurant,Electronics Store


#### Cluster 2 is dominated by parks and recreational spots. It is multidimensional and may be a suitable place for the immigrants to look for their native food

In [94]:
# Cluster 3
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,cluster Labels,Cluster Labels,klusters,labels,clusters labels,5,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,"Birch Cliff,Cliffside West",3.0,3.0,3.0,3.0,3.0,3.0,3.0,General Entertainment,Gym,Thai Restaurant,Park,College Stadium,Ice Cream Shop,Diner,Restaurant,Skating Rink,Gym Pool
18,"Fairview,Henry Farm,Oriole",3.0,3.0,3.0,3.0,3.0,3.0,3.0,Clothing Store,Coffee Shop,Fast Food Restaurant,Japanese Restaurant,Bakery,Juice Bar,Sandwich Place,Thai Restaurant,Chocolate Shop,Bank
21,"Newtonbrook,Willowdale",3.0,3.0,3.0,3.0,3.0,3.0,3.0,Korean Restaurant,Café,Bus Line,Middle Eastern Restaurant,Pizza Place,Coffee Shop,Sandwich Place,Japanese Restaurant,Dessert Shop,Diner
22,Willowdale South,3.0,3.0,3.0,3.0,3.0,3.0,3.0,Bubble Tea Shop,Coffee Shop,Ramen Restaurant,Japanese Restaurant,Korean Restaurant,Pizza Place,Fast Food Restaurant,Sandwich Place,Café,Sushi Restaurant
23,York Mills West,3.0,3.0,3.0,3.0,3.0,3.0,3.0,Coffee Shop,Restaurant,Park,Gym,Convenience Store,Tennis Court,Bank,Gas Station,Grocery Store,Intersection
26,Don Mills North,3.0,3.0,3.0,3.0,3.0,3.0,3.0,Japanese Restaurant,Coffee Shop,Burger Joint,Pizza Place,Electronics Store,Restaurant,Thai Restaurant,Bank,Basketball Court,Salad Place
27,"Flemingdon Park,Don Mills South",3.0,3.0,3.0,3.0,3.0,3.0,3.0,Coffee Shop,Restaurant,Gym,American Restaurant,Asian Restaurant,Beer Store,Supermarket,Japanese Restaurant,Bike Shop,Chinese Restaurant
30,"CFB Toronto,Downsview East",3.0,3.0,3.0,3.0,3.0,3.0,3.0,Coffee Shop,Turkish Restaurant,Other Repair Shop,Café,Gas Station,Chinese Restaurant,Sandwich Place,Liquor Store,Electronics Store,Park
36,Woodbine Heights,3.0,3.0,3.0,3.0,3.0,3.0,3.0,Park,Coffee Shop,Pizza Place,Sandwich Place,Plaza,Dance Studio,Beer Store,Liquor Store,Thai Restaurant,Café
37,The Beaches,3.0,3.0,3.0,3.0,3.0,3.0,3.0,Pub,Coffee Shop,Pizza Place,Breakfast Spot,Japanese Restaurant,Beach,Bakery,Burger Joint,Bar,Park


#### Cluster 3 appears to be quite busy with foreign food restaurants, gyms,  bus lines, and shopping malls. It also has an active night life with the number of pubs located in this cluster. 

In [95]:
# Cluster 4
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,cluster Labels,Cluster Labels,klusters,labels,clusters labels,5,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Downsview Central,4.0,4.0,4.0,4.0,4.0,4.0,4.0,Vietnamese Restaurant,Food Truck,Baseball Field,Farmers Market,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant


#### Cluster 4 appears to be a mid-level activity cluster. It does not appear to hold a strong place for a barbeque spot

## RESULTS REVIEW AND RECOMMENDATION

SAFETY AND SECURITY

We ranked the neighbourhoods according to the average assault per year and Yonge-Eglinton was the safest. As noted earlier, this corresponds with data we have from other surveys carried out online

From the clustering, we identified cluster 0 and 3 as clusters containing likely neighbourhoods to consider due to the nature of existing spots

We observe cluster 0 and 3 against the top neighbourhoods for safety and income range as classified in df1 and df2

We pick out neighbourhoods in this clusters which satisfy our crime and income average rates.


CONCLUSION

We zero in on clusters 0 and 3 to see how it may fall in line with our other factors

We pick out the neighbourhoods that fall into cluster 3 and yet satisfy the other factors above- safety and high income level

We pick out six neighbourhoods:
    
Lawrence Park  
Rosedale  
Parkwoods  
Willowdale West  
Riverdale West  
Mimico  


Other factors like rent and personal taste can be used to pick the location our of this six. 


On further observation, we narrow it down to three(3) neighbourhoods :

1.Lawrence Park satisfies all the three requirements and is close to a college. This will attract young adults who have an active nightlife.

2.Willowdale West is also a good idea as it satisfies the requirements.
The third option will be 3.Parkwoods, which also satisfies the requirement of safety and income. Further search reveals there are many immigrants.