## #3 Exploration and Clustering of Neighbourhoods in Toronto
### Using the Foursquare API to Segment and Cluster the Neighborhoods of Toronto
### By Oludare Bagbile

# Libraries & Packages

All required packages and libraries are imported in the following code cell.

In [1]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
from pandas.io.json import json_normalize
!pip install geopy==2.1.0
from geopy.geocoders import Nominatim
!pip install folium==0.5.0
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings('ignore')

print("Libraries Imported!")

Collecting geopy==2.1.0
  Downloading geopy-2.1.0-py3-none-any.whl (112 kB)
Collecting geographiclib<2,>=1.49
  Downloading geographiclib-1.50-py3-none-any.whl (38 kB)
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-2.1.0
Collecting folium==0.5.0
  Downloading folium-0.5.0.tar.gz (79 kB)
Building wheels for collected packages: folium
  Building wheel for folium (setup.py): started
  Building wheel for folium (setup.py): finished with status 'done'
  Created wheel for folium: filename=folium-0.5.0-py3-none-any.whl size=76256 sha256=52125872f3d5a2e83b8b0c51fc30add35f0ff772375c228423957c1e31d76cb9
  Stored in directory: c:\users\oluda\appdata\local\pip\cache\wheels\ef\4c\4a\17fd3d7fb7b6243d5a7a8d165870cd5c6ad2ec4c0582f039e4
Successfully built folium
Installing collected packages: folium
  Attempting uninstall: folium
    Found existing installation: folium 0.11.0
    Uninstalling folium-0.11.0:
      Successfully uninstalled folium-0.11.

# Getting Venues Data

## Coordinates for Toronto

In [7]:
# Create a Nominatim object for geolocation
geolocator = Nominatim(user_agent="ny_explorer")

# GEt latitue and longitude values for Toronto
location = geolocator.geocode('Toronto')
lat = location.latitude
lon = location.longitude

# Display latitude and longitude values for Toronto
print('The geograpical coordinate of Toronto are {}, {}.'.format(lat, lon))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


## Create Map for Toronto

In [8]:
# Create map of Toronto using latitude and longitude values
toronto_map = folium.Map(location=[lat, lon], zoom_start=10)

# Add markers to map
for lat, lon, borough, neighborhood in zip(canada_df['Latitude'], canada_df['Longitude'], canada_df['Borough'], canada_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat, lon], 
                        radius=5, 
                        popup=label, 
                        color='blue', 
                        fill=True, 
                        fill_color='#3186cc', 
                        fill_opacity=0.7, 
                        parse_html=False).add_to(toronto_map)  

# Display map for Toronto   
toronto_map

## Setup Foursquare API

In [11]:
# Your Foursqaure API Client ID
CLIENT_ID = 'ZLPAURQKU5T2ENSTGHMMLTRTSEHOR2PK3NMDFVY3C4KXYQY4'

# Your Foursquare API CLient secret
CLIENT_SECRET = 'ZI0U1EYG25XOY2KQCJ1XBULFDR50E44PQXMNIPJMTDWIHL4Q'

# Foursquare API version
VERSION = '20180604'

LIMIT = 100
radius = 500

# URL for getting data from Foursquare API
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lon, VERSION, radius, LIMIT)
print('URL: ', url)

URL:  https://api.foursquare.com/v2/venues/search?client_id=ZLPAURQKU5T2ENSTGHMMLTRTSEHOR2PK3NMDFVY3C4KXYQY4&client_secret=ZI0U1EYG25XOY2KQCJ1XBULFDR50E44PQXMNIPJMTDWIHL4Q&ll=43.6288408,-79.52099940000001&v=20180604&radius=500&limit=100


## Get Data for Venues

In [13]:
# Function to extract the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [14]:
# Send the GET request and examine the results
results = requests.get(url).json()

# Assign relevant part of JSON to venues
venues = results['response']['venues']

# Transform venues into a DataFrame
venues_df = json_normalize(venues)

# Keep only relevant columns
filtered_columns = ['name', 'categories'] + [col for col in venues_df.columns if col.startswith('location.')] + ['id']
venues_df = venues_df.loc[:, filtered_columns]

# Get venue category for each record
venues_df['categories'] = venues_df.apply(get_category_type, axis=1)

# Clean column names
venues_df.columns = [column.split('.')[-1] for column in venues_df.columns]

# Display first 10 records
venues_df.head(10)

Unnamed: 0,name,categories,lat,lng,labeledLatLngs,distance,cc,country,formattedAddress,address,city,state,postalCode,crossStreet,neighborhood,id
0,Holy Angels School,Elementary School,43.628304,-79.518308,"[{'label': 'display', 'lat': 43.62830425140846...",224,CA,Canada,[Canada],,,,,,,50ec1a3ee4b0ef749e77edea
1,Holy Angels' Catholic Church,Church,43.628135,-79.518762,"[{'label': 'display', 'lat': 43.62813532258767...",196,CA,Canada,"[61 Jutland Rd, Etobicoke ON, Canada]",61 Jutland Rd,Etobicoke,ON,,,,4eb72e602c5b53141b16605d
2,Tibetan Canadian Cultural Centre,Spiritual Center,43.630513,-79.521935,"[{'label': 'display', 'lat': 43.63051327764869...",200,CA,Canada,"[40 Titan Road, Toronto ON M6Z 2J8, Canada]",40 Titan Road,Toronto,ON,M6Z 2J8,,,4ba54082f964a520e1f138e3
3,Royal Canadian Legion #210,Social Club,43.628855,-79.518903,"[{'label': 'display', 'lat': 43.62885507709014...",168,CA,Canada,"[110 Jutland Rd (W of Islington Ave), Etobicok...",110 Jutland Rd,Etobicoke,ON,M8Z 2H1,W of Islington Ave,,50775788e4b0b61558fb5e57
4,Cinespace Studios,Design Studio,43.629867,-79.528353,"[{'label': 'display', 'lat': 43.62986663840240...",603,CA,Canada,"[777 Kipling Ave., Toronto ON M8Z 5Z4, Canada]",777 Kipling Ave.,Toronto,ON,M8Z 5Z4,,Islington - City Centre West,4de51e6645dd180ae5855f5e
5,Dollarama,Discount Store,43.629883,-79.518627,"[{'label': 'display', 'lat': 43.629883, 'lng':...",223,CA,Canada,"[1000 Islington Ave (Titan Avenue), Toronto ON...",1000 Islington Ave,Toronto,ON,M8Z 4P8,Titan Avenue,,510991cfe4b05035bb705499
6,Islington Florist & Nursery,Flower Shop,43.630156,-79.518718,"[{'label': 'display', 'lat': 43.63015614347047...",234,CA,Canada,"[Toronto ON, Canada]",,Toronto,ON,,,,4dfcc1cb7d8b30508015bef0
7,RONA,Hardware Store,43.629393,-79.51832,"[{'label': 'display', 'lat': 43.6293926, 'lng'...",224,CA,Canada,"[994 Islington Avenue, Etobicoke ON M8Z 4P8, C...",994 Islington Avenue,Etobicoke,ON,M8Z 4P8,,,51c45641498e31c12c9d726b
8,Healthy Planet,Supplement Shop,43.630214,-79.518495,"[{'label': 'display', 'lat': 43.63021350898869...",253,CA,Canada,"[1000 Islington Ave Unit 3, Etobicoke ON M8Z 4...",1000 Islington Ave Unit 3,Etobicoke,ON,M8Z 4P8,,Islington - City Centre West,4f04a3e1c2eec63e1e379900
9,McDonald's,Fast Food Restaurant,43.630007,-79.518041,"[{'label': 'display', 'lat': 43.6300066, 'lng'...",271,CA,Canada,[1001 Islington Ave (btwn Titan Rd & Jutland R...,1001 Islington Ave,Etobicoke,ON,M8Z 4P8,btwn Titan Rd & Jutland Rd,,4aec9552f964a52007c921e3


## Nearby Venues

In [15]:
# Function to get nearby venues

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # URL to get data about venues from Foursquare API
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lon, radius, LIMIT)   
        
        # Get data through get request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # Add results to list of all venues
        venues_list.append([(name, lat, lon, v['venue']['name'], v['venue']['location']['lat'], v['venue']['location']['lng'], v['venue']['categories'][0]['name']) for v in results])

    # Create DataFrame for nearby venues
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude', 'Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category']
    
    # Return DataFrame of nearby venues
    return(nearby_venues)

In [19]:
# Get nearby venues for Toronto
toronto_venues = getNearbyVenues(names=canada_df['Neighborhood'], latitudes=canada_df['Latitude'], longitudes=canada_df['Longitude'])

# Print number of nearby venues retuirned
print('Foursquare retured {} nearby venues for Toronto.'.format(toronto_venues.shape[0]))

# Display first 10 records for nearby venues for Toronto
toronto_venues.head(10)

Foursquare retured 726 nearby venues for Toronto.


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.520999,Pho Com Viet Nam,43.756631,-79.518336,Vietnamese Restaurant
1,Parkwoods,43.753259,-79.520999,Pizza Hut,43.756169,-79.517983,Pizza Place
2,Parkwoods,43.753259,-79.520999,KFC,43.7566,-79.5181,Fast Food Restaurant
3,Parkwoods,43.753259,-79.520999,The Beer Store,43.756094,-79.516239,Beer Store
4,Parkwoods,43.753259,-79.520999,Subway,43.756171,-79.518251,Sandwich Place
5,Parkwoods,43.753259,-79.520999,Tim Hortons,43.754344,-79.527024,Coffee Shop
6,Parkwoods,43.753259,-79.520999,Tim Hortons,43.756128,-79.516266,Coffee Shop
7,Parkwoods,43.753259,-79.520999,Jian Hing Supermarket,43.756673,-79.518444,Grocery Store
8,Parkwoods,43.753259,-79.520999,Planet Fitness,43.757538,-79.51961,Gym / Fitness Center
9,Parkwoods,43.753259,-79.520999,Hwy 400 at Finch W.,43.754399,-79.526967,Intersection


In [20]:
# Print number of unique venue categories
print('There are {} uniques categories of venues.'.format(len(toronto_venues['Venue Category'].unique())))

There are 87 uniques categories of venues.


# Data Processing

## One-hot Encoding

In [23]:
# Perform one-hot encoding for Toronto venues DataFrame
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# Add neighborhood column back to DataFrame
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# Move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

# Display first 10 records
toronto_onehot.head(100)

Unnamed: 0,Neighborhood,Athletics & Sports,BBQ Joint,Bakery,Bank,Baseball Field,Beer Store,Breakfast Spot,Brewery,Burger Joint,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Camera Store,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,Construction & Landscaping,Convenience Store,Department Store,Diner,Discount Store,Dog Run,Donut Shop,Dry Cleaner,Fabric Shop,Fast Food Restaurant,Fish & Chips Shop,Food & Drink Shop,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,Gastropub,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Historic Site,History Museum,Home Service,Ice Cream Shop,Intersection,Italian Restaurant,Japanese Restaurant,Kebab Restaurant,Kids Store,Latin American Restaurant,Liquor Store,Medical Supply Store,Metro Station,Music School,Nightclub,Other Nightlife,Outdoors & Recreation,Park,Pharmacy,Pizza Place,Playground,Plaza,Pool,Print Shop,Pub,Restaurant,Salon / Barbershop,Sandwich Place,Shopping Mall,Skating Rink,Snack Place,Soccer Field,Spa,Sporting Goods Shop,Sports Bar,Supermarket,Supplement Shop,Sushi Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Vietnamese Restaurant,Wings Joint,Yoga Studio,Zoo
0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Group by Neighborhoods

In [24]:
# Group all records by neighborhood
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()

# Display first 10 records
toronto_grouped.head(10)

Unnamed: 0,Neighborhood,Athletics & Sports,BBQ Joint,Bakery,Bank,Baseball Field,Beer Store,Breakfast Spot,Brewery,Burger Joint,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Camera Store,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,Construction & Landscaping,Convenience Store,Department Store,Diner,Discount Store,Dog Run,Donut Shop,Dry Cleaner,Fabric Shop,Fast Food Restaurant,Fish & Chips Shop,Food & Drink Shop,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,Gastropub,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Historic Site,History Museum,Home Service,Ice Cream Shop,Intersection,Italian Restaurant,Japanese Restaurant,Kebab Restaurant,Kids Store,Latin American Restaurant,Liquor Store,Medical Supply Store,Metro Station,Music School,Nightclub,Other Nightlife,Outdoors & Recreation,Park,Pharmacy,Pizza Place,Playground,Plaza,Pool,Print Shop,Pub,Restaurant,Salon / Barbershop,Sandwich Place,Shopping Mall,Skating Rink,Snack Place,Soccer Field,Spa,Sporting Goods Shop,Sports Bar,Supermarket,Supplement Shop,Sushi Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Vietnamese Restaurant,Wings Joint,Yoga Studio,Zoo
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.111111,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.111111,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Berczy Park,0.0,0.0,0.045455,0.0,0.0,0.045455,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.136364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.090909,0.045455,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.045455,0.0,0.0,0.0,0.0
6,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.090909,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0
8,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.076923,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.076923,0.0,0.0,0.0,0.076923,0.0,0.0
9,Caledonia-Fairbanks,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Exploring & Clustering

## Most Common Venues

In [25]:
# Funtion to get the most comkmong venues for each neighborhood
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [26]:
# Get top 5 venues for each neighborhodd
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# Create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# Create a new DataFrame for neighborhoods and most common venues
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

# Display first 10 records
neighborhoods_venues_sorted.head(10)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Agincourt,Intersection,Other Nightlife,Café,Shopping Mall,Zoo
1,"Alderwood, Long Branch",Coffee Shop,Breakfast Spot,Skating Rink,Soccer Field,Fish & Chips Shop
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Shopping Mall,Grocery Store,Pizza Place,Gas Station
3,Bayview Village,Bus Station,Metro Station,Department Store,Discount Store,Dog Run
4,"Bedford Park, Lawrence Manor East",Pool,Furniture / Home Store,Fish & Chips Shop,Department Store,Diner
5,Berczy Park,Coffee Shop,Grocery Store,Pub,Sushi Restaurant,Sandwich Place
6,"Birch Cliff, Cliffside West",Pizza Place,Park,Gas Station,Zoo,Fast Food Restaurant
7,"Brockton, Parkdale Village, Exhibition Place",Gym / Fitness Center,Park,Yoga Studio,Convenience Store,Bank
8,"CN Tower, King and Spadina, Railway Lands, Har...",Hardware Store,Supplement Shop,Discount Store,Gym,Fast Food Restaurant
9,Caledonia-Fairbanks,Pizza Place,Bakery,Restaurant,Café,Fish & Chips Shop


## Clustering Neighborhoods

In [27]:
# Set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# Run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# Check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 2, 2, 2, 0, 2, 3, 2, 2, 2])

In [28]:
# Add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = canada_df

# Merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
toronto_merged = toronto_merged.dropna()

# Display first 10 records
toronto_merged.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,2,Coffee Shop,Grocery Store,Vietnamese Restaurant,Gym / Fitness Center,Intersection
1,M4A,North York,Victoria Village,43.725882,-79.315572,1,Business Service,Discount Store,Bakery,Golf Course,Cafeteria
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,4,Italian Restaurant,History Museum,Dog Run,Food & Drink Shop,Diner
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0,Bakery,Coffee Shop,Intersection,Outdoors & Recreation,Furniture / Home Store
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494,2,Grocery Store,Spa,Convenience Store,Park,Pharmacy
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242,3,Park,Bus Stop,Skating Rink,Convenience Store,Bakery
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,2,Construction & Landscaping,Gas Station,Furniture / Home Store,Print Shop,Gym
7,M3B,North York,Don Mills North,43.745906,-79.352188,2,Food & Drink Shop,Tea Room,Fabric Shop,Zoo,Diner
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,3,Convenience Store,Park,Historic Site,Diner,Discount Store
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,2,Spa,Bank,Camera Store,Supermarket,Food & Drink Shop


## Plotting Map

In [29]:
# Create map
map_clusters = folium.Map(location=[lat, lon], zoom_start=10)

# Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker([lat, lon], 
                        radius=5, 
                        popup=label, 
                        color=rainbow[cluster-1], 
                        fill=True, 
                        fill_color=rainbow[cluster-1], 
                        fill_opacity=0.7).add_to(map_clusters)

# Display map with clusters
map_clusters