## Toronto Neighborhood Clustering

**Loading Libraries**

In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.17.0-py_0 conda-forge

geographiclib- 100% |################################| Time: 0:00:00  22.13 MB/s
geopy-1.17.0-p 100% |################################| Time: 0:00:00  34.47 MB/s
Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  51.93 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  35.16 MB/s
vincent-0.4.4- 100% |###################

**Load the Toronto Data Set**

In [19]:
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M", header=0, keep_default_na=False)
pct = df[0]
pct.columns = ['PostalCode','Borough', 'Neighborhood']
pct1 = pct.query('Borough ! = "Not assigned"').reset_index(drop=True)
pct2=pct1.groupby('PostalCode', as_index=False).agg(lambda x: ', '.join(set(x.dropna())))
pct2.loc[pct2['Neighborhood'] == 'Not assigned', 'Neighborhood' ] = pct2['Borough']
df1 = pd.read_csv("https://cocl.us/Geospatial_data", header=0)
df1.columns = ['PostalCode','Latitude', 'Longitude']
df2 = pd.merge(df1, pct2, on='PostalCode')
df3 = df2[['PostalCode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude']]
column_names = ['PostalCode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude']
neighborhoods = pd.DataFrame(columns=column_names)
df3

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Port Union, Rouge Hill",43.784535,-79.160497
2,M1E,Scarborough,"West Hill, Guildwood, Morningside",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Oakridge, Clairlea",43.711112,-79.284577
8,M1M,Scarborough,"Scarborough Village West, Cliffcrest, Cliffside",43.716316,-79.239476
9,M1N,Scarborough,"Cliffside West, Birch Cliff",43.692657,-79.264848


**Use geopy library to get longitude and latitude values for Toronto, ON.**

In [7]:
address = 'Toronto, ON'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of Toronto are 43.653963, -79.387207.


**Create a map of Toronto with neighborhoods superimposed on top.**

In [30]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood, postalcode in zip(df3['Latitude'], df3['Longitude'], df3['Borough'], df3['Neighborhood'], df3['PostalCode']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

**Explore the borough of West Toronto**

In [31]:
westtoronto_data = df3[df3['Borough'] == 'West Toronto'].reset_index(drop=True)
westtoronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259
1,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975
2,M6K,West Toronto,"Brockton, Parkdale Village, Exhibition Place",43.636847,-79.428191
3,M6P,West Toronto,"High Park, The Junction South",43.661608,-79.464763
4,M6R,West Toronto,"Roncesvalles, Parkdale",43.64896,-79.456325


In [32]:
address = 'West Toronto, ON'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of West Toronto are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of West Toronto are 43.653963, -79.387207.


In [35]:
# create map of Toronto using latitude and longitude values
map_westtoronto = folium.Map(location=[latitude, longitude], zoom_start=13)

# add markers to map
for lat, lng, borough, neighborhood, postalcode in zip(westtoronto_data['Latitude'], westtoronto_data['Longitude'], westtoronto_data['Borough'], westtoronto_data['Neighborhood'], westtoronto_data['PostalCode']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_westtoronto)  
    
map_westtoronto

**Define Foursquare credentials and explore West Toronto more**

In [36]:
CLIENT_ID = '01LI5PMDR4JUIH03BSWHKV5PDWP2NA0AA502KV40ZEFRRI5F' # your Foursquare ID
CLIENT_SECRET = 'H5LD2541BHOWDKAL32S1O022JB124ATFEEWLHV1LVGV1NHMB' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 01LI5PMDR4JUIH03BSWHKV5PDWP2NA0AA502KV40ZEFRRI5F
CLIENT_SECRET:H5LD2541BHOWDKAL32S1O022JB124ATFEEWLHV1LVGV1NHMB


In [37]:
westtoronto_data.loc[0, 'Neighborhood']

'Dovercourt Village, Dufferin'

In [38]:
neighborhood_latitude = westtoronto_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = westtoronto_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = westtoronto_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Dovercourt Village, Dufferin are 43.66900510000001, -79.4422593.


In [39]:
LIMIT = 100 
radius = 500 

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=01LI5PMDR4JUIH03BSWHKV5PDWP2NA0AA502KV40ZEFRRI5F&client_secret=H5LD2541BHOWDKAL32S1O022JB124ATFEEWLHV1LVGV1NHMB&v=20180605&ll=43.66900510000001,-79.4422593&radius=500&limit=100'

In [40]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c119891351e3d2f8efacb57'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-5753753b498eeb535c53aed5-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/pub_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d116941735',
         'name': 'Bar',
         'pluralName': 'Bars',
         'primary': True,
         'shortName': 'Bar'}],
       'id': '5753753b498eeb535c53aed5',
       'location': {'address': '229 Geary St',
        'cc': 'CA',
        'city': 'Toronto',
        'country': 'Canada',
        'crossStreet': 'at Dufferin St',
        'distance': 245,
        'formattedAddress': ['229 Geary St (at Dufferin St)',
         'Toronto ON M6H 2C1',
         'Canada'],
        'labeledLatLngs': [{'label': 'display',
   

In [41]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [42]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,The Greater Good Bar,Bar,43.669409,-79.439267
1,Parallel,Middle Eastern Restaurant,43.669516,-79.438728
2,Planet Fitness Toronto Galleria,Gym / Fitness Center,43.667588,-79.442574
3,Happy Bakery & Pastries,Bakery,43.66705,-79.441791
4,FreshCo,Supermarket,43.667918,-79.440754


In [43]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

18 venues were returned by Foursquare.


In [45]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [46]:
westtoronto_venues = getNearbyVenues(names=westtoronto_data['Neighborhood'],
                                   latitudes=westtoronto_data['Latitude'],
                                   longitudes=westtoronto_data['Longitude']
                                  )

Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Parkdale Village, Exhibition Place
High Park, The Junction South
Roncesvalles, Parkdale
Swansea, Runnymede


In [47]:
print(westtoronto_venues.shape)
westtoronto_venues.head()

(182, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Dovercourt Village, Dufferin",43.669005,-79.442259,The Greater Good Bar,43.669409,-79.439267,Bar
1,"Dovercourt Village, Dufferin",43.669005,-79.442259,Parallel,43.669516,-79.438728,Middle Eastern Restaurant
2,"Dovercourt Village, Dufferin",43.669005,-79.442259,Planet Fitness Toronto Galleria,43.667588,-79.442574,Gym / Fitness Center
3,"Dovercourt Village, Dufferin",43.669005,-79.442259,Happy Bakery & Pastries,43.66705,-79.441791,Bakery
4,"Dovercourt Village, Dufferin",43.669005,-79.442259,FreshCo,43.667918,-79.440754,Supermarket


In [48]:
print('There are {} uniques categories.'.format(len(westtoronto_venues['Venue Category'].unique())))

There are 89 uniques categories.


**Now we analyze each neighborhood**

In [49]:
# one hot encoding
westtoronto_onehot = pd.get_dummies(westtoronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
westtoronto_onehot['Neighborhood'] = westtoronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [westtoronto_onehot.columns[-1]] + list(westtoronto_onehot.columns[:-1])
westtoronto_onehot = westtoronto_onehot[fixed_columns]

westtoronto_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Bakery,Bank,Bar,Bookstore,Boutique,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Butcher,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Climbing Gym,Cocktail Bar,Coffee Shop,Convenience Store,Cuban Restaurant,Cupcake Shop,Dessert Shop,Diner,Discount Store,Dog Run,Eastern European Restaurant,Falafel Restaurant,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food,Food & Drink Shop,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gastropub,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Ice Cream Shop,Indie Movie Theater,Italian Restaurant,Juice Bar,Korean Restaurant,Latin American Restaurant,Liquor Store,Mac & Cheese Joint,Malay Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Music Venue,New American Restaurant,Nightclub,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Pool,Pub,Record Shop,Restaurant,Salon / Barbershop,Sandwich Place,Smoothie Shop,Southern / Soul Food Restaurant,Speakeasy,Sports Bar,Stadium,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Dovercourt Village, Dufferin",0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Dovercourt Village, Dufferin",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Dovercourt Village, Dufferin",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Dovercourt Village, Dufferin",0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Dovercourt Village, Dufferin",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [51]:
westtoronto_onehot.shape

(182, 90)

In [52]:
westtoronto_grouped = westtoronto_onehot.groupby('Neighborhood').mean().reset_index()
westtoronto_grouped

Unnamed: 0,Neighborhood,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Bakery,Bank,Bar,Bookstore,Boutique,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Butcher,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Climbing Gym,Cocktail Bar,Coffee Shop,Convenience Store,Cuban Restaurant,Cupcake Shop,Dessert Shop,Diner,Discount Store,Dog Run,Eastern European Restaurant,Falafel Restaurant,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food,Food & Drink Shop,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gastropub,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Ice Cream Shop,Indie Movie Theater,Italian Restaurant,Juice Bar,Korean Restaurant,Latin American Restaurant,Liquor Store,Mac & Cheese Joint,Malay Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Music Venue,New American Restaurant,Nightclub,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Pool,Pub,Record Shop,Restaurant,Salon / Barbershop,Sandwich Place,Smoothie Shop,Southern / Soul Food Restaurant,Speakeasy,Sports Bar,Stadium,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.095238,0.0,0.0,0.047619,0.0,0.095238,0.0,0.047619,0.047619,0.0,0.142857,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.047619,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.047619,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Dovercourt Village, Dufferin",0.0,0.0,0.0,0.0,0.0,0.111111,0.055556,0.055556,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.055556,0.0,0.0,0.055556,0.0,0.0,0.111111,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"High Park, The Junction South",0.0,0.04,0.0,0.04,0.0,0.04,0.0,0.08,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.0,0.04,0.04,0.04,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0
3,"Little Portugal, Trinity",0.015625,0.0,0.015625,0.0,0.03125,0.03125,0.0,0.125,0.0,0.03125,0.0,0.015625,0.0,0.0,0.0,0.046875,0.0,0.0,0.0,0.03125,0.046875,0.0,0.015625,0.015625,0.0,0.015625,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.015625,0.0,0.015625,0.0,0.0,0.015625,0.015625,0.0,0.015625,0.015625,0.015625,0.0,0.0,0.015625,0.015625,0.046875,0.0,0.0,0.015625,0.0,0.015625,0.03125,0.0,0.015625,0.0,0.0,0.0,0.03125,0.0,0.015625,0.015625,0.046875,0.015625,0.0,0.0,0.015625,0.0,0.015625,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625,0.015625,0.03125,0.015625,0.015625
4,"Roncesvalles, Parkdale",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,0.0,0.133333,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.066667,0.0,0.066667,0.0,0.0,0.066667,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Swansea, Runnymede",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.025641,0.0,0.0,0.0,0.025641,0.025641,0.076923,0.0,0.0,0.0,0.0,0.102564,0.0,0.0,0.0,0.025641,0.025641,0.0,0.0,0.0,0.025641,0.0,0.025641,0.025641,0.0,0.025641,0.025641,0.025641,0.0,0.0,0.025641,0.0,0.025641,0.0,0.0,0.025641,0.0,0.0,0.025641,0.051282,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.076923,0.0,0.025641,0.0,0.025641,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.025641,0.0,0.0,0.025641,0.0,0.0,0.0


In [53]:
westtoronto_grouped.shape

(6, 90)

**Five most common venues**

In [54]:
num_top_venues = 5

for hood in westtoronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = westtoronto_grouped[westtoronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Brockton, Parkdale Village, Exhibition Place----
            venue  freq
0     Coffee Shop  0.14
1            Café  0.10
2  Breakfast Spot  0.10
3   Burrito Place  0.05
4       Pet Store  0.05


----Dovercourt Village, Dufferin----
            venue  freq
0     Supermarket  0.11
1        Pharmacy  0.11
2          Bakery  0.11
3         Brewery  0.06
4  Discount Store  0.06


----High Park, The Junction South----
                venue  freq
0                Café  0.08
1       Grocery Store  0.08
2                 Bar  0.08
3  Mexican Restaurant  0.08
4         Flea Market  0.04


----Little Portugal, Trinity----
         venue  freq
0          Bar  0.12
1   Restaurant  0.05
2  Coffee Shop  0.05
3  Men's Store  0.05
4         Café  0.05


----Roncesvalles, Parkdale----
              venue  freq
0         Gift Shop  0.13
1    Breakfast Spot  0.13
2           Dog Run  0.07
3      Dessert Shop  0.07
4  Cuban Restaurant  0.07


----Swansea, Runnymede----
                venue  freq
0    

**Create a pandas dataframe**

In [55]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [57]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = westtoronto_grouped['Neighborhood']

for ind in np.arange(westtoronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(westtoronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Brockton, Parkdale Village, Exhibition Place",Coffee Shop,Breakfast Spot,Café,Gym,Burrito Place,Grocery Store,Furniture / Home Store,Italian Restaurant,Falafel Restaurant,Nightclub
1,"Dovercourt Village, Dufferin",Supermarket,Bakery,Pharmacy,Discount Store,Café,Pool,Fast Food Restaurant,Brewery,Music Venue,Bar
2,"High Park, The Junction South",Mexican Restaurant,Grocery Store,Café,Bar,Flea Market,Music Venue,Park,Gastropub,Fast Food Restaurant,Cajun / Creole Restaurant
3,"Little Portugal, Trinity",Bar,Restaurant,Men's Store,Coffee Shop,Café,Cocktail Bar,New American Restaurant,Pizza Place,Boutique,Bakery
4,"Roncesvalles, Parkdale",Breakfast Spot,Gift Shop,Burger Joint,Cuban Restaurant,Movie Theater,Coffee Shop,Dog Run,Eastern European Restaurant,Restaurant,Dessert Shop
5,"Swansea, Runnymede",Coffee Shop,Café,Pizza Place,Sushi Restaurant,Italian Restaurant,Latin American Restaurant,Burrito Place,Butcher,French Restaurant,Dessert Shop


**Cluster Neighborhoods**

In [58]:
# set number of clusters
kclusters = 5

westtoronto_grouped_clustering = westtoronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(westtoronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 2, 4, 1, 0, 1], dtype=int32)

In [59]:
westtoronto_merged = westtoronto_data

# add clustering labels
westtoronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
westtoronto_merged = westtoronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

westtoronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259,3,Supermarket,Bakery,Pharmacy,Discount Store,Café,Pool,Fast Food Restaurant,Brewery,Music Venue,Bar
1,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975,2,Bar,Restaurant,Men's Store,Coffee Shop,Café,Cocktail Bar,New American Restaurant,Pizza Place,Boutique,Bakery
2,M6K,West Toronto,"Brockton, Parkdale Village, Exhibition Place",43.636847,-79.428191,4,Coffee Shop,Breakfast Spot,Café,Gym,Burrito Place,Grocery Store,Furniture / Home Store,Italian Restaurant,Falafel Restaurant,Nightclub
3,M6P,West Toronto,"High Park, The Junction South",43.661608,-79.464763,1,Mexican Restaurant,Grocery Store,Café,Bar,Flea Market,Music Venue,Park,Gastropub,Fast Food Restaurant,Cajun / Creole Restaurant
4,M6R,West Toronto,"Roncesvalles, Parkdale",43.64896,-79.456325,0,Breakfast Spot,Gift Shop,Burger Joint,Cuban Restaurant,Movie Theater,Coffee Shop,Dog Run,Eastern European Restaurant,Restaurant,Dessert Shop


In [62]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(westtoronto_merged['Latitude'], westtoronto_merged['Longitude'], westtoronto_merged['Neighborhood'], westtoronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [64]:
westtoronto_merged.loc[westtoronto_merged['Cluster Labels'] == 0, westtoronto_merged.columns[[1] + list(range(5, westtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,West Toronto,0,Breakfast Spot,Gift Shop,Burger Joint,Cuban Restaurant,Movie Theater,Coffee Shop,Dog Run,Eastern European Restaurant,Restaurant,Dessert Shop


In [65]:
westtoronto_merged.loc[westtoronto_merged['Cluster Labels'] == 1, westtoronto_merged.columns[[1] + list(range(5, westtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,West Toronto,1,Mexican Restaurant,Grocery Store,Café,Bar,Flea Market,Music Venue,Park,Gastropub,Fast Food Restaurant,Cajun / Creole Restaurant
5,West Toronto,1,Coffee Shop,Café,Pizza Place,Sushi Restaurant,Italian Restaurant,Latin American Restaurant,Burrito Place,Butcher,French Restaurant,Dessert Shop


In [66]:
westtoronto_merged.loc[westtoronto_merged['Cluster Labels'] == 2, westtoronto_merged.columns[[1] + list(range(5, westtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,West Toronto,2,Bar,Restaurant,Men's Store,Coffee Shop,Café,Cocktail Bar,New American Restaurant,Pizza Place,Boutique,Bakery


In [67]:
westtoronto_merged.loc[westtoronto_merged['Cluster Labels'] == 3, westtoronto_merged.columns[[1] + list(range(5, westtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,West Toronto,3,Supermarket,Bakery,Pharmacy,Discount Store,Café,Pool,Fast Food Restaurant,Brewery,Music Venue,Bar


In [68]:
westtoronto_merged.loc[westtoronto_merged['Cluster Labels'] == 4, westtoronto_merged.columns[[1] + list(range(5, westtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,West Toronto,4,Coffee Shop,Breakfast Spot,Café,Gym,Burrito Place,Grocery Store,Furniture / Home Store,Italian Restaurant,Falafel Restaurant,Nightclub
