# Capstone Project - Segmenting and Clustering Toronto Neighbourhoods

## Importing the libraries

In [2]:
import numpy as np # library to handle data in a vectorized manner
import requests
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [3]:
import requests
from bs4 import BeautifulSoup

## Extracting the data from wikipedia page and updating in a Pandas DataFrame

In [4]:
url1 = 'https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=890001695'

In [5]:
soup = BeautifulSoup(requests.get (url1).text)

In [6]:
pretty_soup = soup.prettify ()

In [7]:
df_1 = pd.read_html (pretty_soup, header = 0)

In [8]:
Neighbourhoods = df_1[0]

## Data Cleaning and Organising

In [9]:
Neighbourhoods.shape

(288, 3)

In [10]:
Neighbourhoods.info ()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 288 entries, 0 to 287
Data columns (total 3 columns):
Postcode         288 non-null object
Borough          288 non-null object
Neighbourhood    288 non-null object
dtypes: object(3)
memory usage: 6.9+ KB


In [11]:
Neighbourhoods.head (3)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods


In [12]:
P = Neighbourhoods.Postcode.nunique ()
B = Neighbourhoods.Borough.nunique ()
N = Neighbourhoods.Neighbourhood.nunique ()

print (' Postcode      :', P, '\n','Borough       :', B, '\n','Neighbourhood :', N )

 Postcode      : 180 
 Borough       : 12 
 Neighbourhood : 209


### Removing 'Not assigned' from 'Borough'

In [13]:
Neighbourhoods [Neighbourhoods.Borough == 'Not assigned'].count ()

Postcode         77
Borough          77
Neighbourhood    77
dtype: int64

In [14]:
Neighbourhoods.Borough = Neighbourhoods.Borough.replace ('Not assigned', np.nan)

In [15]:
Neighbourhoods.head ()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,,Not assigned
1,M2A,,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [16]:
Neighbourhoods.dropna (inplace = True)

In [17]:
Neighbourhoods.head ()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [18]:
Neighbourhoods.shape

(211, 3)

### Substituting 'Not assigned' in 'Neighbourhood'

In [19]:
np.sum (Neighbourhoods.Neighbourhood == 'Not assigned')

1

In [20]:
Neighbourhoods[Neighbourhoods.Neighbourhood == 'Not assigned']

Unnamed: 0,Postcode,Borough,Neighbourhood
8,M7A,Queen's Park,Not assigned


In [21]:
Neighbourhoods [Neighbourhoods.Neighbourhood == 'Not assigned'] = "Queen's Park"

In [22]:
np.sum (Neighbourhoods.Neighbourhood == 'Not assigned')

0

In [23]:
Neighbourhoods.head ()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [24]:
Neighbourhoods.shape

(211, 3)

### Combine rows that have the same PostalCode and Borough

In [40]:
Grouped = Neighbourhoods.groupby(['Postcode','Borough'])['Neighbourhood'].apply(list)

In [41]:
Grouped=Grouped.sample(frac=1).reset_index()

In [42]:
Grouped['Neighbourhood']=Grouped['Neighbourhood'].str.join(', ')

In [43]:
Grouped.head ()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M4H,East York,Thorncliffe Park
1,M2P,North York,York Mills West
2,M4A,North York,Victoria Village
3,M1H,Scarborough,Cedarbrae
4,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi..."


In [44]:
Grouped.shape

(103, 3)

### Adding location coordinates

In [27]:
coord = pd.read_csv ('Geospatial_Coordinates.csv')

In [28]:
coord.head ()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [29]:
Merged = pd.merge (Neighbourhoods, coord, left_on = 'Postcode', right_on = 'Postal Code')

In [30]:
Merged.head ()

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M3A,North York,Parkwoods,M3A,43.753259,-79.329656
1,M4A,North York,Victoria Village,M4A,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,M5A,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,M5A,43.65426,-79.360636
4,M6A,North York,Lawrence Heights,M6A,43.718518,-79.464763


In [31]:
Merged.drop ('Postal Code', inplace = True, axis = 1)

### Dataframe with location coordinates

In [32]:
Merged.head () 

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
4,M6A,North York,Lawrence Heights,43.718518,-79.464763


In [33]:
Merged.isnull ().sum ().sum ()

0

In [34]:
P2 = Merged.Postcode.nunique ()
B2 = Merged.Borough.nunique ()
N2 = Merged.Neighbourhood.nunique ()

print (' Postcode      :', P2, '\n','Borough       :', B2, '\n','Neighbourhood :', N2 )
print ('Shape of dataframe: ',Merged.shape)

 Postcode      : 102 
 Borough       : 10 
 Neighbourhood : 208
Shape of dataframe:  (210, 5)


## Plotting Neighbourhoods on map of Toronto

In [43]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [44]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(Merged['Latitude'], Merged['Longitude'], Merged['Borough'], Merged['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## Exploring the Neighbourhoods

In [45]:
CLIENT_ID = '0QAXTTYPA0GE3XVMOMWJA30KKWQFONE4UKMMMKTSX4UONGWM' # your Foursquare ID
CLIENT_SECRET = 'RCSW0XRSYLZQSBOZGJZTEUJABYVGZPZV5G3JR2C5CNOUJVZI' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 0QAXTTYPA0GE3XVMOMWJA30KKWQFONE4UKMMMKTSX4UONGWM
CLIENT_SECRET:RCSW0XRSYLZQSBOZGJZTEUJABYVGZPZV5G3JR2C5CNOUJVZI


### Exploring the first Neighbourhood in the database

In [46]:
Merged.loc[0, 'Neighbourhood']

'Parkwoods'

In [47]:
Neighbourhood_latitude = Merged.loc [0,'Latitude']

In [48]:
Neighbourhood_longitude = Merged.loc [0,'Longitude']

In [49]:
Neighbourhood_name = Merged.loc[0, 'Neighbourhood']

In [50]:
print ('Latitude and Longitude is {} and {} for {}'.format (Neighbourhood_latitude,Neighbourhood_longitude,Neighbourhood_name))

Latitude and Longitude is 43.7532586 and -79.3296565 for Parkwoods


In [51]:
# type your answer here
LIMIT = 100
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    Neighbourhood_latitude, 
    Neighbourhood_longitude, 
    radius, 
    LIMIT)

In [52]:
url

'https://api.foursquare.com/v2/venues/explore?&client_id=0QAXTTYPA0GE3XVMOMWJA30KKWQFONE4UKMMMKTSX4UONGWM&client_secret=RCSW0XRSYLZQSBOZGJZTEUJABYVGZPZV5G3JR2C5CNOUJVZI&v=20180605&ll=43.7532586,-79.3296565&radius=500&limit=100'

In [53]:
results = requests.get (url).json ()

In [57]:
venues = results['response']['groups'][0]['items']

In [58]:
nearby_venues = json_normalize(venues) # flatten JSON

In [59]:
# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

In [60]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [61]:
# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis = 1)

In [62]:
nearby_venues.columns

Index(['venue.name', 'venue.categories', 'venue.location.lat',
       'venue.location.lng'],
      dtype='object')

In [63]:
# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

In [64]:
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,Variety Store,Food & Drink Shop,43.751974,-79.333114
2,Bella Vita Catering & Private Chef Service,BBQ Joint,43.756651,-79.331524


In [65]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

3 venues were returned by Foursquare.


### Exploring all Neighbourhoods in Toronto Borough with Max. Neighbourhoods

In [68]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    LIMIT = 100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    print ('done')
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Extracting nearby venues for Neighbourhoods from Four Square

In [71]:
toronto_venues = getNearbyVenues(names=Merged['Neighbourhood'],latitudes=Merged['Latitude'],longitudes=Merged['Longitude'])

done


In [72]:
toronto_venues.shape

(4262, 7)

In [73]:
toronto_venues.head ()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods,43.753259,-79.329656,Bella Vita Catering & Private Chef Service,43.756651,-79.331524,BBQ Joint
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


# Analysis of Neighbourhoods

In [107]:
toronto_onehot = pd.get_dummies (toronto_venues [['Venue Category']], prefix = "", prefix_sep = "")

In [108]:
toronto_onehot ['Neighbourhood'] = toronto_venues ['Neighbourhood']

In [109]:
toronto_onehot.columns

Index(['Accessories Store', 'Afghan Restaurant', 'Airport',
       'Airport Food Court', 'Airport Gate', 'Airport Lounge',
       'Airport Service', 'Airport Terminal', 'American Restaurant',
       'Antique Shop',
       ...
       'Vegetarian / Vegan Restaurant', 'Video Game Store',
       'Vietnamese Restaurant', 'Warehouse Store', 'Wine Bar', 'Wine Shop',
       'Wings Joint', 'Women's Store', 'Yoga Studio', 'Neighbourhood'],
      dtype='object', length=264)

In [117]:
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])

In [120]:
toronto_onehot.head ()

Unnamed: 0,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,College Stadium,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hakka Restaurant,Harbor / Marina,Hardware Store,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hospital,Hostel,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Mac & Cheese Joint,Market,Martial Arts Dojo,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Motel,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plane,Playground,Plaza,Poke Place,Polish Restaurant,Pool,Portuguese Restaurant,Post Office,Poutine Place,Pub,Ramen Restaurant,Record Shop,Recording Studio,Rental Car Location,Restaurant,River,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Social Club,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Neighbourhood
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Victoria Village
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Victoria Village


In [121]:
toronto_onehot.shape

(4262, 264)

In [122]:
toronto_grouped = toronto_onehot.groupby ('Neighbourhood').mean ().reset_index ()

In [125]:
toronto_grouped.head ()

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,College Stadium,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hakka Restaurant,Harbor / Marina,Hardware Store,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hospital,Hostel,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Mac & Cheese Joint,Market,Martial Arts Dojo,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Motel,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plane,Playground,Plaza,Poke Place,Polish Restaurant,Pool,Portuguese Restaurant,Post Office,Poutine Place,Pub,Ramen Restaurant,Record Shop,Recording Studio,Rental Car Location,Restaurant,River,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Social Club,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Adelaide,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.02,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.07,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Agincourt North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Albion Gardens,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Alderwood,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [126]:
toronto_grouped.shape

(204, 264)

In [146]:
toronto_grouped_transpose = toronto_grouped.transpose ()

In [147]:
toronto_grouped_transpose.head ()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203
Neighbourhood,Adelaide,Agincourt,Agincourt North,Albion Gardens,Alderwood,Bathurst Manor,Bathurst Quay,Bayview Village,Beaumond Heights,Bedford Park,Berczy Park,Birch Cliff,Bloordale Gardens,Brockton,Business Reply Mail Processing Centre 969 Eastern,CFB Toronto,CN Tower,Cabbagetown,Caledonia-Fairbanks,Canada Post Gateway Processing Centre,Cedarbrae,Central Bay Street,Chinatown,Christie,Church and Wellesley,Clairlea,Clarks Corners,Cliffcrest,Cliffside,Cliffside West,Cloverdale,Commerce Court,Davisville,Davisville North,Deer Park,Del Ray,Design Exchange,Don Mills North,Don Mills South,Dorset Park,Dovercourt Village,Downsview,Downsview Central,Downsview East,Downsview North,Downsview Northwest,Downsview West,Dufferin,East Birchmount Park,East Toronto,Emery,Eringate,Exhibition Place,Fairview,First Canadian Place,Flemingdon Park,Forest Hill North,Forest Hill SE,Forest Hill West,Garden District,Glencairn,Golden Mile,Grange Park,Guildwood,Harbord,Harbourfront,Harbourfront East,Harbourfront West,Henry Farm,High Park,Highland Creek,Hillcrest Village,Humber Bay,Humber Bay Shores,Humber Summit,Humbergate,Humberlea,Humewood-Cedarvale,India Bazaar,Ionview,Island airport,Islington,Jamestown,Keelesdale,Kennedy Park,Kensington Market,King,King and Spadina,King's Mill Park,Kingsview Village,Kingsway Park South East,Kingsway Park South West,L'Amoreaux East,L'Amoreaux West,Lawrence Heights,Lawrence Manor,Lawrence Manor East,Lawrence Park,Leaside,Little Portugal,Long Branch,Malvern,Markland Wood,Martin Grove,Martin Grove Gardens,Maryvale,Milliken,Mimico NE,Mimico NW,Mimico South,Montgomery Road,Moore Park,Morningside,Mount Dennis,Mount Olive,New Toronto,North Midtown,North Park,North Toronto West,Northwest,Northwood Park,Oakridge,Old Burnhamthorpe,Old Mill North,Old Mill South,Oriole,Parkdale,Parkdale Village,Parkview Hill,Parkwoods,Port Union,Princess Gardens,Railway Lands,Rathnelly,Regent Park,Richmond,Richview Gardens,Riverdale,Roncesvalles,Rosedale,Roselawn,Rouge,Rouge Hill,Royal York South East,Royal York South West,Runnymede,Ryerson,Scarborough Town Centre,Scarborough Village,Scarborough Village West,Silver Hills,Silverstone,Silverthorn,South Hill,South Niagara,South Steeles,South of Bloor,St. James Town,St. Phillips,Steeles East,Stn A PO Boxes 25 The Esplanade,Studio District,Sullivan,Summerhill East,Summerhill West,Sunnylea,Swansea,Tam O'Shanter,The Annex,The Beaches,The Beaches West,The Danforth West,The Junction North,The Junction South,The Kingsway,The Queensway East,The Queensway West,Thistletown,Thorncliffe Park,Toronto Dominion Centre,Toronto Islands,Trinity,Underground city,Union Station,University of Toronto,Upwood Park,Victoria Hotel,Victoria Village,West Deane Park,West Hill,Westmount,Weston,Wexford,Wexford Heights,Willowdale South,Willowdale West,Wilson Heights,Woburn,Woodbine Gardens,Woodbine Heights,York Mills,York Mills West,York University,Yorkville
Accessories Store,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0714286,0.0714286,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Afghan Restaurant,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0120482,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Airport,0,0,0,0,0,0,0.0588235,0,0,0,0,0,0,0,0,0.5,0.0588235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0588235,0,0,0,0,0,0,0,0,0,0,0,0,0.0588235,0,0,0,0,0,0,0.0588235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0588235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0588235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Airport Food Court,0,0,0,0,0,0,0.0588235,0,0,0,0,0,0,0,0,0,0.0588235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0588235,0,0,0,0,0,0,0,0,0,0,0,0,0.0588235,0,0,0,0,0,0,0.0588235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0588235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0588235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [194]:
for i in toronto_grouped_transpose.columns:
    V = toronto_grouped_transpose.iloc [1:,i].sort_values (ascending = False).head ()
    print ('Neighbourhood :', toronto_grouped_transpose.iloc [0,i],'\n','Venue & Frequency','\n\n', V)
    print ('\n')

Neighbourhood : Adelaide 
 Venue & Frequency 

 Coffee Shop        0.07
Restaurant         0.06
Café               0.05
Thai Restaurant    0.03
Bar                0.03
Name: 0, dtype: object


Neighbourhood : Agincourt 
 Venue & Frequency 

 Lounge                       0.25
Latin American Restaurant    0.25
Breakfast Spot               0.25
Clothing Store               0.25
Donut Shop                      0
Name: 1, dtype: object


Neighbourhood : Agincourt North 
 Venue & Frequency 

 Park                  0.5
Playground            0.5
Yoga Studio             0
Doner Restaurant        0
Dim Sum Restaurant      0
Name: 2, dtype: object


Neighbourhood : Albion Gardens 
 Venue & Frequency 

 Grocery Store            0.25
Pharmacy                0.125
Beer Store              0.125
Sandwich Place          0.125
Fast Food Restaurant    0.125
Name: 3, dtype: object


Neighbourhood : Alderwood 
 Venue & Frequency 

 Pizza Place           0.222222
Coffee Shop           0.111111
Sandwich Plac

Name: 126, dtype: object


Neighbourhood : Parkdale Village 
 Venue & Frequency 

 Café                  0.125
Coffee Shop       0.0833333
Breakfast Spot    0.0833333
Nightclub         0.0833333
Yoga Studio       0.0416667
Name: 127, dtype: object


Neighbourhood : Parkview Hill 
 Venue & Frequency 

 Pizza Place              0.181818
Gym / Fitness Center    0.0909091
Athletics & Sports      0.0909091
Gastropub               0.0909091
Intersection            0.0909091
Name: 128, dtype: object


Neighbourhood : Parkwoods 
 Venue & Frequency 

 Food & Drink Shop    0.333333
Park                 0.333333
BBQ Joint            0.333333
Yoga Studio                 0
Diner                       0
Name: 129, dtype: object


Neighbourhood : Port Union 
 Venue & Frequency 

 Bar                     1
Yoga Studio             0
Dessert Shop            0
Ethiopian Restaurant    0
Empanada Restaurant     0
Name: 130, dtype: object


Neighbourhood : Princess Gardens 
 Venue & Frequency 

 Home Servic

In [225]:
toronto_dict = {}

In [256]:
for i in range(len(toronto_grouped_transpose.columns)):
    value = toronto_grouped_transpose.iloc [1:,i].sort_values (ascending = False).head (10).index.to_list ()
    key = toronto_grouped_transpose.iloc [0,i]
    toronto_dict.update ({key:value})

In [257]:
toronto_dict

{'Adelaide': ['Coffee Shop',
  'Restaurant',
  'Café',
  'Thai Restaurant',
  'Bar',
  'Bakery',
  'Concert Hall',
  'Cosmetics Shop',
  'Lounge',
  'Clothing Store'],
 'Agincourt': ['Lounge',
  'Latin American Restaurant',
  'Breakfast Spot',
  'Clothing Store',
  'Donut Shop',
  'Diner',
  'Discount Store',
  'Distribution Center',
  'Dog Run',
  'Doner Restaurant'],
 'Agincourt North': ['Park',
  'Playground',
  'Yoga Studio',
  'Doner Restaurant',
  'Dim Sum Restaurant',
  'Diner',
  'Discount Store',
  'Distribution Center',
  'Dog Run',
  'Drugstore'],
 'Albion Gardens': ['Grocery Store',
  'Pharmacy',
  'Beer Store',
  'Sandwich Place',
  'Fast Food Restaurant',
  'Fried Chicken Joint',
  'Pizza Place',
  'Dumpling Restaurant',
  'Eastern European Restaurant',
  'Drugstore'],
 'Alderwood': ['Pizza Place',
  'Coffee Shop',
  'Sandwich Place',
  'Pub',
  'Athletics & Sports',
  'Skating Rink',
  'Pharmacy',
  'Gym',
  'Comic Shop',
  'Eastern European Restaurant'],
 'Bathurst Mano

### Updating top venues per Neighbourhood to a dataframe

In [280]:
toronto_df = pd.DataFrame (toronto_dict).transpose ()

In [281]:
toronto_df.index.name = 'Neighbourhood'
toronto_df.columns = ['First', 'Second', 'Third', 'Fourth', 'Fifth', 'Sixth', 'Seventh', 'Eigth', 'Nineth', 'Tenth']

In [282]:
toronto_df.head (3)

Unnamed: 0_level_0,First,Second,Third,Fourth,Fifth,Sixth,Seventh,Eigth,Nineth,Tenth
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Adelaide,Coffee Shop,Restaurant,Café,Thai Restaurant,Bar,Bakery,Concert Hall,Cosmetics Shop,Lounge,Clothing Store
Agincourt,Lounge,Latin American Restaurant,Breakfast Spot,Clothing Store,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
Agincourt North,Park,Playground,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Drugstore


### Clustering Neighbourhoods

In [283]:
# set number of clusters 
kclusters = 5

In [284]:
toronto_grouped_clustering = toronto_grouped.drop ('Neighbourhood',1)

In [285]:
# run k-means clustering
kmeans = KMeans (n_clusters = kclusters, random_state = 0)

In [286]:
kmeans.fit (toronto_grouped_clustering)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=5, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=0, tol=0.0001, verbose=0)

In [318]:
kmeans.labels_ [0:10]

array([0, 0, 4, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [288]:
toronto_df.insert (0, 'Cluster Labels', kmeans.labels_)

In [289]:
toronto_df.tail (3)

Unnamed: 0_level_0,Cluster Labels,First,Second,Third,Fourth,Fifth,Sixth,Seventh,Eigth,Nineth,Tenth
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
York Mills West,4,Park,Bank,Convenience Store,Bar,Yoga Studio,Donut Shop,Discount Store,Distribution Center,Dog Run,Doner Restaurant
York University,0,Miscellaneous Shop,Coffee Shop,Massage Studio,Caribbean Restaurant,Bar,Yoga Studio,Doner Restaurant,Discount Store,Distribution Center,Dog Run
Yorkville,0,Café,Sandwich Place,Coffee Shop,BBQ Joint,Cosmetics Shop,Donut Shop,Burger Joint,Indian Restaurant,Middle Eastern Restaurant,Pub


In [291]:
toronto_merged = Merged.join (toronto_df, on = 'Neighbourhood')

In [293]:
toronto_merged.tail (3)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,First,Second,Third,Fourth,Fifth,Sixth,Seventh,Eigth,Nineth,Tenth
207,M8Z,Etobicoke,The Queensway West,43.628841,-79.520999,0.0,Fast Food Restaurant,Hardware Store,Discount Store,Burrito Place,Burger Joint,Bakery,Sandwich Place,Convenience Store,Social Club,Gym
208,M8Z,Etobicoke,Royal York South West,43.628841,-79.520999,0.0,Fast Food Restaurant,Hardware Store,Discount Store,Burrito Place,Burger Joint,Bakery,Sandwich Place,Convenience Store,Social Club,Gym
209,M8Z,Etobicoke,South of Bloor,43.628841,-79.520999,0.0,Fast Food Restaurant,Hardware Store,Discount Store,Burrito Place,Burger Joint,Bakery,Sandwich Place,Convenience Store,Social Club,Gym


In [297]:
toronto_merge = toronto_merged.dropna ()

In [298]:
toronto_merge.isnull ().sum ().sum ()

0

In [299]:
toronto_merge.info ()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 206 entries, 0 to 209
Data columns (total 16 columns):
Postcode          206 non-null object
Borough           206 non-null object
Neighbourhood     206 non-null object
Latitude          206 non-null float64
Longitude         206 non-null float64
Cluster Labels    206 non-null float64
First             206 non-null object
Second            206 non-null object
Third             206 non-null object
Fourth            206 non-null object
Fifth             206 non-null object
Sixth             206 non-null object
Seventh           206 non-null object
Eigth             206 non-null object
Nineth            206 non-null object
Tenth             206 non-null object
dtypes: float64(3), object(13)
memory usage: 27.4+ KB


In [311]:
toronto_merge.tail (3)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,First,Second,Third,Fourth,Fifth,Sixth,Seventh,Eigth,Nineth,Tenth
207,M8Z,Etobicoke,The Queensway West,43.628841,-79.520999,0.0,Fast Food Restaurant,Hardware Store,Discount Store,Burrito Place,Burger Joint,Bakery,Sandwich Place,Convenience Store,Social Club,Gym
208,M8Z,Etobicoke,Royal York South West,43.628841,-79.520999,0.0,Fast Food Restaurant,Hardware Store,Discount Store,Burrito Place,Burger Joint,Bakery,Sandwich Place,Convenience Store,Social Club,Gym
209,M8Z,Etobicoke,South of Bloor,43.628841,-79.520999,0.0,Fast Food Restaurant,Hardware Store,Discount Store,Burrito Place,Burger Joint,Bakery,Sandwich Place,Convenience Store,Social Club,Gym


### Visualizing the clusters

In [329]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

In [330]:
# add markers to the map
markers_colors = []

In [336]:
for lat, lon, poi, cluster in zip(toronto_merge['Latitude'], toronto_merge['Longitude'], toronto_merge['Neighbourhood'], toronto_merge['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow [int(cluster-1)],
        fill=True,
        fill_color=rainbow [int (cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)

In [337]:
map_clusters

### Checking a Cluster post analysis

In [353]:
toronto_merge.loc [toronto_merge['Cluster Labels'] == 1]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,First,Second,Third,Fourth,Fifth,Sixth,Seventh,Eigth,Nineth,Tenth
97,M3M,North York,Downsview Central,43.728496,-79.495697,1.0,Food Truck,Baseball Field,Yoga Studio,Doner Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop,Falafel Restaurant
105,M9M,North York,Emery,43.724766,-79.532242,1.0,Baseball Field,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop
106,M9M,North York,Humberlea,43.724766,-79.532242,1.0,Baseball Field,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop
197,M8Y,Etobicoke,Humber Bay,43.636258,-79.498509,1.0,Baseball Field,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop
198,M8Y,Etobicoke,King's Mill Park,43.636258,-79.498509,1.0,Baseball Field,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop
199,M8Y,Etobicoke,Kingsway Park South East,43.636258,-79.498509,1.0,Baseball Field,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop
200,M8Y,Etobicoke,Mimico NE,43.636258,-79.498509,1.0,Baseball Field,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop
201,M8Y,Etobicoke,Old Mill South,43.636258,-79.498509,1.0,Baseball Field,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop
202,M8Y,Etobicoke,The Queensway East,43.636258,-79.498509,1.0,Baseball Field,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop
203,M8Y,Etobicoke,Royal York South East,43.636258,-79.498509,1.0,Baseball Field,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop
