# Segmenting and Clustering Neighborhoods in Toronto

## Web Scraping Toronto Postal Codes

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json # library to handle JSON files
import requests # library to handle requests
# import k-means from clustering stage
from sklearn.cluster import KMeans
!conda install -c conda-forge folium=0.5.0 
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    altair-3.3.0               |           py36_0         747 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.2 MB

The following NEW packages will be 

In [3]:
URL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [4]:
page = requests.get(URL)

In [5]:
soup = BeautifulSoup(page.text, 'html.parser')

In [6]:
columns = ['PostalCode','Borough','Neighborhood']

In [7]:
postaltable = soup.find_all('table')[0]

### Converting Postal Codes html table info to Pandas Dataframe

In [8]:
df = pd.read_html(str(postaltable))

In [9]:
df[0].head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [80]:
df[0].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 287 entries, 0 to 286
Data columns (total 3 columns):
Postcode         287 non-null object
Borough          287 non-null object
Neighbourhood    287 non-null object
dtypes: object(3)
memory usage: 6.8+ KB


In [10]:
postalcodes = df[0]

In [11]:
postalcodes.columns = columns

In [12]:
postalcodes.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Narrow down the dataframe to ignore/exclude Not Assigned Boroughs

In [13]:
postalcodes = postalcodes[postalcodes['Borough'] != 'Not assigned']

### Grouping the Neighborhoods w.r.t each Boroughs

In [14]:
toronto_pcodes = postalcodes.groupby(['PostalCode','Borough'])['Neighborhood'].agg(', '.join).to_frame()

In [15]:
toronto_pcodes = toronto_pcodes.reset_index()

In [16]:
toronto_pcodes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103 entries, 0 to 102
Data columns (total 3 columns):
PostalCode      103 non-null object
Borough         103 non-null object
Neighborhood    103 non-null object
dtypes: object(3)
memory usage: 2.5+ KB


In [17]:
toronto_pcodes.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [18]:
toronto_pcodes.shape

(103, 3)

#### overall 103 postal codes are identified and integrated in the dataframe

# Append Coordinates with help of GeoSpatial data

In [19]:
!wget -q -O 'geospatial_coordinates.csv' http://cocl.us/Geospatial_data
print('data downloaded')

data downloaded


In [20]:
geo_coord = pd.read_csv('geospatial_coordinates.csv')
geo_coord.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merging Coordinates with Toronto Postal Codes dataframe

In [21]:
toronto_postal_coord = pd.merge(left=toronto_pcodes, right=geo_coord, left_on = 'PostalCode', right_on = 'Postal Code', how='inner')

In [22]:
toronto_postal_coord.drop(columns=['Postal Code'], inplace=True)

In [23]:
toronto_postal_coord.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Explore and cluster the neighborhoods in Toronto

In [24]:
toronto_postal_coord['Borough'].unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       "Queen's Park", 'Mississauga', 'Etobicoke'], dtype=object)

In [25]:
toronto_postal_coord = toronto_postal_coord[toronto_postal_coord['Borough'].str.contains('Toronto')]

In [26]:
toronto_postal_coord = toronto_postal_coord.reset_index()

In [27]:
toronto_postal_coord.drop(columns='index', inplace=True)

In [28]:
toronto_postal_coord.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [29]:
# Toronto Coordinates
Latitude = 43.6532
Longitude = -79.3832

### Toronto Neighborhood map view

In [30]:
map_toronto = folium.Map(location=[Latitude, Longitude], zoom_start = 12)

for lat, lng, lbl in zip(toronto_postal_coord['Latitude'], toronto_postal_coord['Longitude'], toronto_postal_coord['Neighborhood']):
    label = folium.Popup(lbl, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        fill = True,
        color = 'blue',
        fill_color = 'yellow',
        fill_opacity = 0.7,
        parse_html = False
    ).add_to(map_toronto)
map_toronto

### Define Foursquare Credentials and Version

In [227]:
# The code was removed by Watson Studio for sharing.

### Exploring the first neighborhood of Toronto via Foursquare API results

In [32]:
toronto_postal_coord.loc[0, 'Neighborhood']

'The Beaches'

In [33]:
first_neighbor_name = toronto_postal_coord.loc[0, 'Neighborhood']
first_neighbor_lat = toronto_postal_coord.loc[0, 'Latitude']
first_neighbor_long = toronto_postal_coord.loc[0, 'Longitude']

print('Latitude and longitude values of {} are {}, {}.'.format(first_neighbor_name, 
                                                               first_neighbor_lat, 
                                                               first_neighbor_long))

Latitude and longitude values of The Beaches are 43.67635739999999, -79.2930312.


#### Now, let's get the top 100 venues that are in The Beaches within a radius of 500 meters.

###### First, let's create the GET request URL. Name your URL **url**.

In [34]:

url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
CLIENT_ID,
CLIENT_SECRET,
VERSION,
first_neighbor_lat,
first_neighbor_long,
500,
100
)



###### Send the GET request and examine the resutls

In [35]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5de36e239da7ee001c4828fd'},
 'response': {'headerLocation': 'The Beaches',
  'headerFullLocation': 'The Beaches, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 6,
  'suggestedBounds': {'ne': {'lat': 43.680857404499996,
    'lng': -79.28682091449052},
   'sw': {'lat': 43.67185739549999, 'lng': -79.29924148550948}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bd461bc77b29c74a07d9282',
       'name': 'Glen Manor Ravine',
       'location': {'address': 'Glen Manor',
        'crossStreet': 'Queen St.',
        'lat': 43.67682094413784,
        'lng': -79.29394208780985,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.67682094413784,
          'lng': -79.29394208780985}],
        'distanc

In [36]:
results = results['response']['groups'][0]['items']

###### Transforming the json results to pandas dataframe 

In [37]:
from pandas.io.json import json_normalize
venues = json_normalize(results)

In [38]:
print('{} venues were returned by Foursquare.'.format(venues.shape[0]))

6 venues were returned by Foursquare.


In [39]:
venues.head()

Unnamed: 0,reasons.count,reasons.items,referralId,venue.categories,venue.id,venue.location.address,venue.location.cc,venue.location.city,venue.location.country,venue.location.crossStreet,...,venue.location.formattedAddress,venue.location.labeledLatLngs,venue.location.lat,venue.location.lng,venue.location.postalCode,venue.location.state,venue.name,venue.photos.count,venue.photos.groups,venue.venuePage.id
0,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4bd461bc77b29c74a07d9282-0,"[{'id': '4bf58dd8d48988d159941735', 'name': 'T...",4bd461bc77b29c74a07d9282,Glen Manor,CA,Toronto,Canada,Queen St.,...,"[Glen Manor (Queen St.), Toronto ON, Canada]","[{'label': 'display', 'lat': 43.67682094413784...",43.676821,-79.293942,,ON,Glen Manor Ravine,0,[],
1,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4ad4c062f964a52011f820e3-1,"[{'id': '50aa9e744b90af0d42d5de0e', 'name': 'H...",4ad4c062f964a52011f820e3,125 Southwood Dr,CA,Toronto,Canada,,...,"[125 Southwood Dr, Toronto ON M4E 0B8, Canada]","[{'label': 'display', 'lat': 43.678879, 'lng':...",43.678879,-79.297734,M4E 0B8,ON,The Big Carrot Natural Food Market,0,[],75150878.0
2,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4b8daea1f964a520480833e3-2,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",4b8daea1f964a520480833e3,676 Kingston Rd.,CA,Toronto,Canada,at Main St.,...,"[676 Kingston Rd. (at Main St.), Toronto ON M4...","[{'label': 'display', 'lat': 43.67918143494101...",43.679181,-79.297215,M4E 1R4,ON,Grover Pub and Grub,0,[],
3,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-56afcad6498e05333bf42031-3,"[{'id': '4bf58dd8d48988d162941735', 'name': 'O...",56afcad6498e05333bf42031,,CA,,Canada,,...,[Canada],"[{'label': 'display', 'lat': 43.67629984029563...",43.6763,-79.294784,,,Glen Stewart Ravine,0,[],
4,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-50f9bbcc5d24acebc25935b5-4,"[{'id': '4bf58dd8d48988d1ca941735', 'name': 'P...",50f9bbcc5d24acebc25935b5,670 Kingston Road,CA,Toronto,Canada,,...,"[670 Kingston Road, Toronto ON M4E 1R4, Canada]","[{'label': 'display', 'lat': 43.67905757954377...",43.679058,-79.297382,M4E 1R4,ON,Domino's Pizza,0,[],


###### Cleaning up the Venues dataframe for easy reference

In [40]:
venues.columns

Index(['reasons.count', 'reasons.items', 'referralId', 'venue.categories',
       'venue.id', 'venue.location.address', 'venue.location.cc',
       'venue.location.city', 'venue.location.country',
       'venue.location.crossStreet', 'venue.location.distance',
       'venue.location.formattedAddress', 'venue.location.labeledLatLngs',
       'venue.location.lat', 'venue.location.lng', 'venue.location.postalCode',
       'venue.location.state', 'venue.name', 'venue.photos.count',
       'venue.photos.groups', 'venue.venuePage.id'],
      dtype='object')

In [41]:
filtered_columns = ['venue.categories','venue.name','venue.location.lat','venue.location.lng']
venues = venues.loc[:, filtered_columns]

In [42]:
venues.head()

Unnamed: 0,venue.categories,venue.name,venue.location.lat,venue.location.lng
0,"[{'id': '4bf58dd8d48988d159941735', 'name': 'T...",Glen Manor Ravine,43.676821,-79.293942
1,"[{'id': '50aa9e744b90af0d42d5de0e', 'name': 'H...",The Big Carrot Natural Food Market,43.678879,-79.297734
2,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",Grover Pub and Grub,43.679181,-79.297215
3,"[{'id': '4bf58dd8d48988d162941735', 'name': 'O...",Glen Stewart Ravine,43.6763,-79.294784
4,"[{'id': '4bf58dd8d48988d1ca941735', 'name': 'P...",Domino's Pizza,43.679058,-79.297382


In [43]:
venues.columns = [col.split('.')[-1] for col in venues.columns]
venues.head()

Unnamed: 0,categories,name,lat,lng
0,"[{'id': '4bf58dd8d48988d159941735', 'name': 'T...",Glen Manor Ravine,43.676821,-79.293942
1,"[{'id': '50aa9e744b90af0d42d5de0e', 'name': 'H...",The Big Carrot Natural Food Market,43.678879,-79.297734
2,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",Grover Pub and Grub,43.679181,-79.297215
3,"[{'id': '4bf58dd8d48988d162941735', 'name': 'O...",Glen Stewart Ravine,43.6763,-79.294784
4,"[{'id': '4bf58dd8d48988d1ca941735', 'name': 'P...",Domino's Pizza,43.679058,-79.297382


In [44]:
venues['categories'] = [None if len(row) == 0  else row[0]['name'] for row in venues['categories']] 

In [45]:
venues.head()

Unnamed: 0,categories,name,lat,lng
0,Trail,Glen Manor Ravine,43.676821,-79.293942
1,Health Food Store,The Big Carrot Natural Food Market,43.678879,-79.297734
2,Pub,Grover Pub and Grub,43.679181,-79.297215
3,Other Great Outdoors,Glen Stewart Ravine,43.6763,-79.294784
4,Pizza Place,Domino's Pizza,43.679058,-79.297382


## Explore Neighborhoods in Toronto

###### Let's create a function to repeat the Foursquare API fetch process to all the neighborhoods in Toronto

In [46]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    LIMIT = 10
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

###### Calling the function

In [47]:
toronto_venues = getNearbyVenues(names=toronto_postal_coord['Neighborhood'],
                                   latitudes=toronto_postal_coord['Latitude'],
                                   longitudes=toronto_postal_coord['Longitude']
                                  )

The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The Junction Sout

In [48]:
print(toronto_venues.shape)
toronto_venues.head()

(340, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Glen Stewart Ravine,43.6763,-79.294784,Other Great Outdoors
4,The Beaches,43.676357,-79.293031,Domino's Pizza,43.679058,-79.297382,Pizza Place


In [210]:
toronto_venues.groupby(['Neighborhood'])['Venue'].count()

Neighborhood
Adelaide, King, Richmond                                                                                      10
Berczy Park                                                                                                   10
Brockton, Exhibition Place, Parkdale Village                                                                  10
Business Reply Mail Processing Centre 969 Eastern                                                             10
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara    10
Cabbagetown, St. James Town                                                                                   10
Central Bay Street                                                                                            10
Chinatown, Grange Park, Kensington Market                                                                     10
Christie                                                                           

In [49]:
print('Number of unique Categories of Venues present in Toronto : {}'.format(len(toronto_venues['Venue Category'].unique())))

Number of unique Categories of Venues present in Toronto : 115


## Analyze Neighborhood and its Venues 

In [185]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

toronto_onehot2 = toronto_onehot.assign(neighborhood = toronto_venues["Neighborhood"].to_list())

fixed_columns = [toronto_onehot2.columns[-1]] + list(toronto_onehot2.columns[:-1])
toronto_onehot2 = toronto_onehot2[fixed_columns]

toronto_onehot2

Unnamed: 0,neighborhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Terminal,American Restaurant,Arts & Crafts Store,Asian Restaurant,Auto Workshop,...,Swim School,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Trail,Vegetarian / Vegan Restaurant,Wine Bar,Yoga Studio
0,"Adelaide, King, Richmond",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,"Adelaide, King, Richmond",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Adelaide, King, Richmond",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Adelaide, King, Richmond",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Adelaide, King, Richmond",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,"Adelaide, King, Richmond",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,"Adelaide, King, Richmond",0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
7,"Adelaide, King, Richmond",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"Adelaide, King, Richmond",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,"Adelaide, King, Richmond",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [186]:
toronto_onehot2.shape

(340, 116)

###### Group rows by neighborhood by taking the mean of the frequency of occurrence of each category

In [187]:
toronto_grouped = toronto_onehot2.groupby('neighborhood').mean().reset_index()

In [188]:
toronto_grouped.head()

Unnamed: 0,neighborhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Terminal,American Restaurant,Arts & Crafts Store,Asian Restaurant,Auto Workshop,...,Swim School,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Trail,Vegetarian / Vegan Restaurant,Wine Bar,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.1,0.1,0.1,0.2,0.1,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [189]:
toronto_grouped.shape

(38, 116)

new size after grouping

## Cluster Neighborhoods via KMeans Clustering

Finding the 10 most common venues against each Neighborhood

In [190]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [193]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
toronto_venue_sorted = pd.DataFrame(columns=columns)
toronto_venue_sorted['neighborhood'] = toronto_grouped['neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    toronto_venue_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)



In [194]:
toronto_venue_sorted.head()

Unnamed: 0,neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Steakhouse,Concert Hall,Hotel,Speakeasy,Plaza,Asian Restaurant,Opera House,Coffee Shop,Vegetarian / Vegan Restaurant,Creperie
1,Berczy Park,Museum,French Restaurant,Park,Liquor Store,Steakhouse,Breakfast Spot,Farmers Market,Thai Restaurant,Concert Hall,Vegetarian / Vegan Restaurant
2,"Brockton, Exhibition Place, Parkdale Village",Coffee Shop,Gym,Italian Restaurant,Pet Store,Café,Bar,Bakery,Breakfast Spot,Furniture / Home Store,French Restaurant
3,Business Reply Mail Processing Centre 969 Eastern,Comic Shop,Brewery,Fast Food Restaurant,Farmers Market,Burrito Place,Restaurant,Auto Workshop,Pizza Place,Garden Center,Skate Park
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Lounge,Airport,Harbor / Marina,Boutique,Plane,Coffee Shop,Airport Terminal,Airport Gate,Airport Food Court,Dance Studio


In [195]:
toronto_grouped.drop(columns='neighborhood', inplace= True)

Applying KMeans Algorithm

In [196]:
k = 5

model = KMeans(n_clusters=k, random_state = 0)
model.fit(toronto_grouped)
model.labels_[0:10]

array([3, 3, 0, 3, 0, 2, 0, 2, 2, 0], dtype=int32)

Sorting the Toronto Postal Dataframe by Neighborhood in order to map the Clusters

In [118]:
toronto_postal_coord = toronto_postal_coord.sort_values('Neighborhood', ascending= True).reset_index(drop = True)

In [197]:
toronto_postal_coord.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
1,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
2,M6K,West Toronto,"Brockton, Exhibition Place, Parkdale Village",43.636847,-79.428191
3,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558
4,M5V,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",43.628947,-79.39442


List of Clusters we have in terms of Neighborhoods ascending order

In [198]:
model.labels_

array([3, 3, 0, 3, 0, 2, 0, 2, 2, 0, 2, 3, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
       4, 0, 0, 0, 0, 4, 1, 0, 3, 0, 3, 0, 3, 3, 0, 0], dtype=int32)

Appending the Clusters with respective Neighborhoods in Toronto Postal Dataframe

In [199]:
toronto_cluster_draft = toronto_postal_coord.assign(Cluster = model.labels_)

###### List of Neighborhoods and corresponding Cluster of Toronto location

In [212]:
toronto_cluster_draft.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster
0,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568,3
1,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,3
2,M6K,West Toronto,"Brockton, Exhibition Place, Parkdale Village",43.636847,-79.428191,0
3,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,3
4,M5V,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",43.628947,-79.39442,0


###### Merging the Clusters data with Common Venues data into single dataframe for easy reference

In [214]:
print(toronto_cluster_draft.shape)
print(toronto_venue_sorted.shape)

(38, 6)
(38, 11)


In [215]:
toronto_clusters = pd.merge(left=toronto_cluster_draft, right=toronto_venue_sorted, left_on='Neighborhood', right_on='neighborhood', how='inner')

In [216]:
toronto_clusters.drop(columns=['neighborhood'], inplace=True)

In [217]:
toronto_clusters.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568,3,Steakhouse,Concert Hall,Hotel,Speakeasy,Plaza,Asian Restaurant,Opera House,Coffee Shop,Vegetarian / Vegan Restaurant,Creperie
1,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,3,Museum,French Restaurant,Park,Liquor Store,Steakhouse,Breakfast Spot,Farmers Market,Thai Restaurant,Concert Hall,Vegetarian / Vegan Restaurant
2,M6K,West Toronto,"Brockton, Exhibition Place, Parkdale Village",43.636847,-79.428191,0,Coffee Shop,Gym,Italian Restaurant,Pet Store,Café,Bar,Bakery,Breakfast Spot,Furniture / Home Store,French Restaurant
3,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,3,Comic Shop,Brewery,Fast Food Restaurant,Farmers Market,Burrito Place,Restaurant,Auto Workshop,Pizza Place,Garden Center,Skate Park
4,M5V,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",43.628947,-79.39442,0,Airport Lounge,Airport,Harbor / Marina,Boutique,Plane,Coffee Shop,Airport Terminal,Airport Gate,Airport Food Court,Dance Studio


#### Visualizing the Clusters

In [218]:
# Toronto Coordinates
Latitude = 43.6532
Longitude = -79.3832

In [219]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np

# create map
map_clusters = folium.Map(location=[Latitude, Longitude], zoom_start=12)
kclusters = 5

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_clusters['Latitude'], toronto_clusters['Longitude'], toronto_clusters['Neighborhood'], toronto_clusters['Cluster']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

###### Cluster 1

In [222]:
toronto_clusters.loc[toronto_clusters['Cluster'] == 0, toronto_clusters.columns[[0,1,2,3,4,5] + list(range(6, toronto_clusters.shape[1]))]]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M6K,West Toronto,"Brockton, Exhibition Place, Parkdale Village",43.636847,-79.428191,0,Coffee Shop,Gym,Italian Restaurant,Pet Store,Café,Bar,Bakery,Breakfast Spot,Furniture / Home Store,French Restaurant
4,M5V,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",43.628947,-79.39442,0,Airport Lounge,Airport,Harbor / Marina,Boutique,Plane,Coffee Shop,Airport Terminal,Airport Gate,Airport Food Court,Dance Studio
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Bubble Tea Shop,Sushi Restaurant,Gastropub,Park,Italian Restaurant,Modern European Restaurant,Ramen Restaurant,Eastern European Restaurant,Dog Run
9,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,0,Bookstore,Gastropub,Bubble Tea Shop,Theme Restaurant,Dance Studio,Restaurant,Tea Room,Breakfast Spot,Ramen Restaurant,Mexican Restaurant
12,M4P,Central Toronto,Davisville North,43.712751,-79.390197,0,Gym,Hotel,Breakfast Spot,Park,Food & Drink Shop,Asian Restaurant,Sandwich Place,Clothing Store,Dessert Shop,Fish & Chips Shop
13,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049,0,Coffee Shop,Supermarket,Liquor Store,Restaurant,Fried Chicken Joint,American Restaurant,Pub,Sports Bar,Sushi Restaurant,Dog Run
14,M5K,Downtown Toronto,"Design Exchange, Toronto Dominion Centre",43.647177,-79.381576,0,Coffee Shop,Beer Bar,Gastropub,Gym / Fitness Center,Hotel,Pub,Café,Restaurant,Gym,Auto Workshop
17,M5P,Central Toronto,"Forest Hill North, Forest Hill West",43.696948,-79.411307,0,Jewelry Store,Trail,Sushi Restaurant,Mexican Restaurant,Fish Market,Cosmetics Shop,Creperie,Cuban Restaurant,Dance Studio,Dessert Shop
18,M5S,Downtown Toronto,"Harbord, University of Toronto",43.662696,-79.400049,0,College Gym,Bakery,Dessert Shop,Restaurant,Italian Restaurant,Bookstore,Beer Bar,Bar,Japanese Restaurant,French Restaurant
19,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,0,Breakfast Spot,Spa,Historic Site,Coffee Shop,Restaurant,Farmers Market,Bakery,Park,Gym / Fitness Center,Fried Chicken Joint


###### Cluster 2

In [223]:
toronto_clusters.loc[toronto_clusters['Cluster'] == 1, toronto_clusters.columns[[0,1,2,3,4,5] + list(range(6, toronto_clusters.shape[1]))]]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
28,M5N,Central Toronto,Roselawn,43.711695,-79.416936,1,Garden,Yoga Studio,Fish Market,Cosmetics Shop,Creperie,Cuban Restaurant,Dance Studio,Dessert Shop,Diner,Dog Run


##### Cluster 3

In [224]:
toronto_clusters.loc[toronto_clusters['Cluster'] == 2, toronto_clusters.columns[[0,1,2,3,4,5] + list(range(6, toronto_clusters.shape[1]))]]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675,2,Café,General Entertainment,Indian Restaurant,Diner,Restaurant,Japanese Restaurant,Italian Restaurant,Bakery,Jewelry Store,Food Truck
7,M5T,Downtown Toronto,"Chinatown, Grange Park, Kensington Market",43.653206,-79.400049,2,Café,Organic Grocery,Coffee Shop,Cocktail Bar,Dessert Shop,Arts & Crafts Store,Bakery,Mexican Restaurant,Yoga Studio,Fish & Chips Shop
8,M6G,Downtown Toronto,Christie,43.669542,-79.422564,2,Café,Grocery Store,Coffee Shop,Diner,Candy Store,Restaurant,Italian Restaurant,Fish Market,Cuban Restaurant,Dance Studio
10,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648198,-79.379817,2,Café,Gym,Restaurant,Gym / Fitness Center,Museum,Pub,Coffee Shop,American Restaurant,Arts & Crafts Store,Creperie


##### Cluster 4

In [225]:
toronto_clusters.loc[toronto_clusters['Cluster'] == 3, toronto_clusters.columns[[0,1,2,3,4,5] + list(range(6, toronto_clusters.shape[1]))]]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568,3,Steakhouse,Concert Hall,Hotel,Speakeasy,Plaza,Asian Restaurant,Opera House,Coffee Shop,Vegetarian / Vegan Restaurant,Creperie
1,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,3,Museum,French Restaurant,Park,Liquor Store,Steakhouse,Breakfast Spot,Farmers Market,Thai Restaurant,Concert Hall,Vegetarian / Vegan Restaurant
3,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,3,Comic Shop,Brewery,Fast Food Restaurant,Farmers Market,Burrito Place,Restaurant,Auto Workshop,Pizza Place,Garden Center,Skate Park
11,M4S,Central Toronto,Davisville,43.704324,-79.38879,3,Dessert Shop,Gym,Sushi Restaurant,Indian Restaurant,Park,Pizza Place,Café,Seafood Restaurant,Coffee Shop,Auto Workshop
15,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259,3,Bakery,Brewery,Gym / Fitness Center,Pharmacy,Music Venue,Café,Bar,Middle Eastern Restaurant,Supermarket,Fast Food Restaurant
16,M5X,Downtown Toronto,"First Canadian Place, Underground city",43.648429,-79.38228,3,Steakhouse,Café,Gym,Coffee Shop,Gym / Fitness Center,Pizza Place,American Restaurant,Restaurant,Diner,Eastern European Restaurant
30,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,3,Clothing Store,Tea Room,Burger Joint,Burrito Place,Ramen Restaurant,Plaza,Café,Comic Shop,Pizza Place,Theater
32,M5W,Downtown Toronto,Stn A PO Boxes 25 The Esplanade,43.646435,-79.374846,3,Park,Fountain,Café,Museum,Steakhouse,Food Truck,French Restaurant,Thai Restaurant,Cocktail Bar,Vegetarian / Vegan Restaurant
34,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,3,Café,Coffee Shop,American Restaurant,Pub,Burger Joint,Indian Restaurant,BBQ Joint,Park,Vegetarian / Vegan Restaurant,Fried Chicken Joint
35,M4E,East Toronto,The Beaches,43.676357,-79.293031,3,Other Great Outdoors,Trail,Pizza Place,Neighborhood,Health Food Store,Pub,Farmers Market,Concert Hall,Cosmetics Shop,Creperie


##### Cluster 5

In [226]:
toronto_clusters.loc[toronto_clusters['Cluster'] == 4, toronto_clusters.columns[[0,1,2,3,4,5] + list(range(6, toronto_clusters.shape[1]))]]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,4,Park,Bus Line,Swim School,Yoga Studio,Fish & Chips Shop,Cosmetics Shop,Creperie,Cuban Restaurant,Dance Studio,Dessert Shop
27,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,4,Park,Trail,Playground,Fish & Chips Shop,Concert Hall,Cosmetics Shop,Creperie,Cuban Restaurant,Dance Studio,Dessert Shop
