# Segmenting and Clustering Neighborhoods in Toronto

In [88]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
import csv
import io

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [59]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [60]:
table = pd.read_html(url, header=0,keep_default_na=False) 

toronto_df = table[0]

toronto_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Reomove cells without assigned Borough:

In [61]:
toronto_df1 = toronto_df.query('Borough != "Not assigned"').reset_index(drop=True)
toronto_df1.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


### Rename columns:

In [62]:
toronto_df1.columns = ['PostalCode','Borough','Neighborhood']
toronto_df1.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


### Combine neighborhood with the same postalcode: 

In [63]:
toronto_df2=toronto_df1.groupby('PostalCode', as_index=False).agg(lambda x: ', '.join(set(x.dropna())))
toronto_df2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Morningside, Guildwood, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Assign borough to neighborhood when neighborhood is not assigned

In [64]:
toronto_df2.loc[toronto_df2['Neighborhood'] == 'Not assigned', 'Neighborhood' ] = toronto_df2['Borough']
toronto_df2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Morningside, Guildwood, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Print the number of rows of your dataframe

In [65]:
toronto_df2.shape[0], toronto_df2.shape[1]

(103, 3)

# Latitude and Longitude Coordinates for each Neighborhood:

### First step is to connect coordinates with Toronto Neighberhoods:

In [66]:
df = toronto_df2

In [67]:
url="http://cocl.us/Geospatial_data"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

In [68]:
c.columns = ['PostalCode', 'Latitude', 'Longitude']
df = pd.merge(c, df, on='PostalCode')

In [69]:
df.head()

Unnamed: 0,PostalCode,Latitude,Longitude,Borough,Neighborhood
0,M1B,43.806686,-79.194353,Scarborough,"Malvern, Rouge"
1,M1C,43.784535,-79.160497,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,43.763573,-79.188711,Scarborough,"Morningside, Guildwood, West Hill"
3,M1G,43.770992,-79.216917,Scarborough,Woburn
4,M1H,43.773136,-79.239476,Scarborough,Cedarbrae


### Next step is to rearrange the columns:

In [70]:
df = df[['PostalCode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude']]
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Morningside, Guildwood, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# We will explore and cluster the neighborhoods in Downtown Toronto

In [71]:
toronto_data = df[df['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
toronto_data

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
1,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
3,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
4,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
5,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
6,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
7,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
8,M5H,Downtown Toronto,"Adelaide, Richmond, King",43.650571,-79.384568
9,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752


In [120]:
address = 'Downtown Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 43.6541737, -79.3808116451341.


In [122]:
# create map of Manhattan using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## We will explore venues offering food:

In [110]:
CLIENT_ID = '4NZBFUVFOURXVTTMKUF1NWN0AO4UK3SIQLUEGHNBHTACDESA' # your Foursquare ID
CLIENT_SECRET = 'VL2SNBIVN0HVPRUFAGCE2R3PTTGEOGGVJ4H4QBGYT1JJHD5T' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 4NZBFUVFOURXVTTMKUF1NWN0AO4UK3SIQLUEGHNBHTACDESA
CLIENT_SECRET:VL2SNBIVN0HVPRUFAGCE2R3PTTGEOGGVJ4H4QBGYT1JJHD5T


In [111]:
LIMIT = 100

In [112]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL (here the &query=Food limits the search)
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&query=Food'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [113]:

toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, Richmond, King
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
University of Toronto, Harbord
Kensington Market, Grange Park, Chinatown
South Niagara, Bathurst Quay, King and Spadina, Railway Lands, CN Tower, Harbourfront West, Island airport
Stn A PO Boxes 25 The Esplanade
Underground city, First Canadian Place
Christie


In [114]:
print(toronto_venues.shape)
toronto_venues.head()

(1519, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rosedale,43.679563,-79.377529,Black Camel,43.677016,-79.389367,BBQ Joint
1,Rosedale,43.679563,-79.377529,Tinuno,43.671281,-79.37492,Filipino Restaurant
2,Rosedale,43.679563,-79.377529,Nijo Japanese Restaurant,43.671849,-79.378824,Japanese Restaurant
3,Rosedale,43.679563,-79.377529,Eggsmart,43.671158,-79.37624,Breakfast Spot
4,Rosedale,43.679563,-79.377529,Subway,43.672168,-79.37776,Sandwich Place


In [115]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, Richmond, King",100,100,100,100,100,100
Berczy Park,100,100,100,100,100,100
Central Bay Street,100,100,100,100,100,100
Christie,89,89,89,89,89,89
Church and Wellesley,100,100,100,100,100,100
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
"Design Exchange, Toronto Dominion Centre",100,100,100,100,100,100
"Harbourfront East, Toronto Islands, Union Station",100,100,100,100,100,100
"Harbourfront, Regent Park",82,82,82,82,82,82
"Kensington Market, Grange Park, Chinatown",100,100,100,100,100,100


In [116]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 82 uniques categories.


In [118]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Arepa Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Belgian Restaurant,Bistro,Brazilian Restaurant,Breakfast Spot,Burger Joint,Burrito Place,Café,Caribbean Restaurant,Chinese Restaurant,Colombian Restaurant,Comfort Food Restaurant,Creperie,Deli / Bodega,Dim Sum Restaurant,Diner,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Gastropub,German Restaurant,Gluten-free Restaurant,Greek Restaurant,Hot Dog Joint,Hotpot Restaurant,Indian Restaurant,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewish Restaurant,Korean Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,Moroccan Restaurant,New American Restaurant,Noodle House,Pakistani Restaurant,Persian Restaurant,Pizza Place,Poke Place,Portuguese Restaurant,Poutine Place,Ramen Restaurant,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Snack Place,Soup Place,South American Restaurant,Souvlaki Shop,Spanish Restaurant,Steakhouse,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Theme Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
0,Rosedale,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Rosedale,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Rosedale,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Rosedale,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Rosedale,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [119]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Arepa Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Belgian Restaurant,Bistro,Brazilian Restaurant,Breakfast Spot,Burger Joint,Burrito Place,Café,Caribbean Restaurant,Chinese Restaurant,Colombian Restaurant,Comfort Food Restaurant,Creperie,Deli / Bodega,Dim Sum Restaurant,Diner,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Gastropub,German Restaurant,Gluten-free Restaurant,Greek Restaurant,Hot Dog Joint,Hotpot Restaurant,Indian Restaurant,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewish Restaurant,Korean Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,Moroccan Restaurant,New American Restaurant,Noodle House,Pakistani Restaurant,Persian Restaurant,Pizza Place,Poke Place,Portuguese Restaurant,Poutine Place,Ramen Restaurant,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Snack Place,Soup Place,South American Restaurant,Souvlaki Shop,Spanish Restaurant,Steakhouse,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Theme Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
0,"Adelaide, Richmond, King",0.0,0.05,0.0,0.02,0.0,0.0,0.03,0.0,0.0,0.01,0.04,0.03,0.03,0.09,0.01,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.01,0.01,0.04,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.06,0.07,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.05,0.01,0.0,0.01,0.01,0.06,0.02,0.0,0.03,0.0,0.01,0.0,0.01,0.0,0.04,0.03,0.01,0.0,0.0,0.04,0.0,0.02,0.0,0.0
1,Berczy Park,0.0,0.05,0.0,0.0,0.02,0.01,0.04,0.01,0.02,0.0,0.04,0.0,0.0,0.11,0.0,0.01,0.0,0.01,0.02,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.01,0.01,0.05,0.0,0.01,0.01,0.0,0.0,0.01,0.01,0.08,0.04,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.02,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.11,0.02,0.01,0.05,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0
2,Central Bay Street,0.0,0.04,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.03,0.03,0.07,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.01,0.01,0.02,0.0,0.0,0.02,0.0,0.01,0.0,0.04,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.03,0.07,0.0,0.0,0.0,0.0,0.02,0.02,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.03,0.01,0.01,0.0,0.05,0.04,0.0,0.02,0.03,0.0,0.0,0.0,0.01,0.0,0.03,0.06,0.01,0.0,0.01,0.03,0.0,0.03,0.0,0.01
3,Christie,0.0,0.011236,0.0,0.0,0.0,0.0,0.033708,0.0,0.0,0.0,0.033708,0.011236,0.0,0.11236,0.011236,0.011236,0.0,0.0,0.0,0.011236,0.0,0.033708,0.0,0.0,0.0,0.011236,0.022472,0.0,0.022472,0.0,0.0,0.0,0.0,0.0,0.011236,0.011236,0.0,0.0,0.0,0.011236,0.0,0.044944,0.0,0.022472,0.033708,0.011236,0.269663,0.011236,0.011236,0.033708,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033708,0.011236,0.0,0.0,0.011236,0.022472,0.0,0.033708,0.0,0.0,0.0,0.022472,0.0,0.0,0.0,0.0,0.022472,0.0,0.0,0.0,0.0,0.022472,0.022472,0.0
4,Church and Wellesley,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.06,0.01,0.08,0.02,0.02,0.0,0.0,0.01,0.01,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.06,0.08,0.0,0.02,0.0,0.02,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.04,0.0,0.01,0.0,0.03,0.05,0.01,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.05,0.0,0.0,0.0,0.02,0.01,0.01,0.01,0.01
5,"Commerce Court, Victoria Hotel",0.0,0.05,0.0,0.02,0.01,0.0,0.05,0.0,0.01,0.01,0.03,0.03,0.01,0.1,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.01,0.01,0.01,0.05,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.06,0.05,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.1,0.03,0.01,0.05,0.0,0.0,0.0,0.0,0.0,0.04,0.01,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.0
6,"Design Exchange, Toronto Dominion Centre",0.0,0.06,0.0,0.02,0.0,0.0,0.03,0.0,0.01,0.01,0.04,0.03,0.01,0.1,0.0,0.0,0.01,0.0,0.01,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.01,0.01,0.01,0.04,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.08,0.05,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.02,0.01,0.0,0.01,0.0,0.06,0.03,0.01,0.04,0.0,0.0,0.0,0.0,0.0,0.05,0.02,0.01,0.0,0.0,0.04,0.0,0.02,0.0,0.0
7,"Harbourfront East, Toronto Islands, Union Station",0.0,0.03,0.0,0.01,0.0,0.0,0.02,0.01,0.02,0.01,0.03,0.02,0.01,0.11,0.0,0.02,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.02,0.02,0.0,0.02,0.01,0.0,0.0,0.01,0.01,0.07,0.05,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.05,0.0,0.0,0.01,0.0,0.08,0.04,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.05,0.04,0.01,0.0,0.0,0.04,0.0,0.02,0.0,0.0
8,"Harbourfront, Regent Park",0.0,0.0,0.0,0.012195,0.012195,0.0,0.060976,0.0,0.0,0.0,0.036585,0.036585,0.012195,0.097561,0.0,0.012195,0.0,0.0,0.0,0.012195,0.0,0.04878,0.0,0.0,0.0,0.0,0.012195,0.012195,0.04878,0.0,0.0,0.0,0.02439,0.012195,0.0,0.02439,0.012195,0.0,0.012195,0.0,0.0,0.02439,0.0,0.04878,0.02439,0.0,0.0,0.0,0.02439,0.02439,0.012195,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.060976,0.0,0.0,0.0,0.0,0.085366,0.0,0.060976,0.012195,0.0,0.0,0.0,0.0,0.012195,0.0,0.04878,0.0,0.0,0.0,0.036585,0.0,0.0,0.012195,0.0
9,"Kensington Market, Grange Park, Chinatown",0.0,0.0,0.01,0.0,0.0,0.01,0.04,0.01,0.02,0.0,0.02,0.03,0.01,0.13,0.02,0.03,0.0,0.02,0.01,0.0,0.02,0.01,0.01,0.01,0.02,0.0,0.0,0.0,0.01,0.02,0.01,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.02,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.05,0.0,0.0,0.0,0.02,0.03,0.0,0.05,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.03,0.0,0.02,0.01,0.0,0.09,0.04,0.0


In [124]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [126]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(25)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, Richmond, King",Café,Japanese Restaurant,Italian Restaurant,Restaurant,Pizza Place,American Restaurant,Breakfast Spot,Steakhouse,Thai Restaurant,Gastropub
1,Berczy Park,Café,Restaurant,Italian Restaurant,Gastropub,American Restaurant,Seafood Restaurant,Breakfast Spot,Japanese Restaurant,Bakery,Deli / Bodega
2,Central Bay Street,Japanese Restaurant,Café,Sushi Restaurant,Ramen Restaurant,American Restaurant,Restaurant,Gastropub,Diner,Burger Joint,Burrito Place
3,Christie,Korean Restaurant,Café,Indian Restaurant,Breakfast Spot,Mexican Restaurant,Japanese Restaurant,Diner,Pizza Place,Bakery,Sandwich Place
4,Church and Wellesley,Japanese Restaurant,Café,Burger Joint,Italian Restaurant,Sushi Restaurant,Restaurant,Sandwich Place,Gastropub,Pizza Place,Diner
5,"Commerce Court, Victoria Hotel",Café,Restaurant,Italian Restaurant,Bakery,American Restaurant,Seafood Restaurant,Japanese Restaurant,Gastropub,Thai Restaurant,Steakhouse
6,"Design Exchange, Toronto Dominion Centre",Café,Italian Restaurant,American Restaurant,Restaurant,Japanese Restaurant,Steakhouse,Gastropub,Thai Restaurant,Deli / Bodega,Breakfast Spot
7,"Harbourfront East, Toronto Islands, Union Station",Café,Restaurant,Italian Restaurant,Deli / Bodega,Pizza Place,Steakhouse,Japanese Restaurant,Salad Place,Sushi Restaurant,Thai Restaurant
8,"Harbourfront, Regent Park",Café,Restaurant,Sandwich Place,Bakery,Pizza Place,Diner,Italian Restaurant,Fast Food Restaurant,Sushi Restaurant,Burger Joint
9,"Kensington Market, Grange Park, Chinatown",Café,Vegetarian / Vegan Restaurant,Pizza Place,Sandwich Place,Bakery,Vietnamese Restaurant,Restaurant,Mexican Restaurant,Burger Joint,Chinese Restaurant


 ## Cluster of Food Venues in different Neighborhoods in Downtown Toronto

In [128]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 4, 3, 4, 1, 1, 1, 4, 4], dtype=int32)

In [129]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head(30) # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,2,Sandwich Place,Breakfast Spot,Café,Indian Restaurant,BBQ Joint,Japanese Restaurant,Fried Chicken Joint,Filipino Restaurant,Donut Shop,Dumpling Restaurant
1,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675,1,Restaurant,Café,Japanese Restaurant,Gastropub,Indian Restaurant,Diner,Thai Restaurant,Italian Restaurant,Caribbean Restaurant,Filipino Restaurant
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,4,Japanese Restaurant,Café,Burger Joint,Italian Restaurant,Sushi Restaurant,Restaurant,Sandwich Place,Gastropub,Pizza Place,Diner
3,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,4,Café,Restaurant,Sandwich Place,Bakery,Pizza Place,Diner,Italian Restaurant,Fast Food Restaurant,Sushi Restaurant,Burger Joint
4,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,1,Café,Italian Restaurant,Japanese Restaurant,Restaurant,Gastropub,Diner,American Restaurant,Breakfast Spot,Seafood Restaurant,Steakhouse
5,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1,Restaurant,Café,Italian Restaurant,Breakfast Spot,Seafood Restaurant,Gastropub,American Restaurant,Pizza Place,Bakery,Steakhouse
6,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,1,Café,Restaurant,Italian Restaurant,Gastropub,American Restaurant,Seafood Restaurant,Breakfast Spot,Japanese Restaurant,Bakery,Deli / Bodega
7,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,4,Japanese Restaurant,Café,Sushi Restaurant,Ramen Restaurant,American Restaurant,Restaurant,Gastropub,Diner,Burger Joint,Burrito Place
8,M5H,Downtown Toronto,"Adelaide, Richmond, King",43.650571,-79.384568,1,Café,Japanese Restaurant,Italian Restaurant,Restaurant,Pizza Place,American Restaurant,Breakfast Spot,Steakhouse,Thai Restaurant,Gastropub
9,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,1,Café,Restaurant,Italian Restaurant,Deli / Bodega,Pizza Place,Steakhouse,Japanese Restaurant,Salad Place,Sushi Restaurant,Thai Restaurant


In [133]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters