In [1]:
import pandas as pd # library for data analysis
import numpy as np # library to handle data in a vectorized manner

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Libraries imported.


In [13]:
# we proceed to scrap wikipedia page: 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M' 
# in order to obtain the data that is in the table of postal codes & transform data in a new pandas dataframe
df = pd.read_html('https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M')[0]

# we drop any Boroughs with value 'Not assigned'
df = df[df.Borough!='Not assigned']
df = df.groupby(['Postal Code','Borough'], as_index=False).agg(lambda x: ','.join(x))
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [14]:
df_geo = pd.read_csv("https://cocl.us/Geospatial_data")
df = pd.merge(df, df_geo, left_on='Postal Code', right_on='Postal Code').drop(['Postal Code'], axis=1)
df

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Scarborough,Woburn,43.770992,-79.216917
4,Scarborough,Cedarbrae,43.773136,-79.239476
5,Scarborough,Scarborough Village,43.744734,-79.239476
6,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [15]:
# get location of Toronto using geopy
address = 'Toronto'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [17]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [18]:
# Segment and cluster only the neighborhoods in North York Borough
york_data = df[df['Borough']=='North York'].reset_index(drop=True)
york_data

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,North York,Hillcrest Village,43.803762,-79.363452
1,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
2,North York,Bayview Village,43.786947,-79.385975
3,North York,"York Mills, Silver Hills",43.75749,-79.374714
4,North York,"Willowdale, Newtonbrook",43.789053,-79.408493
5,North York,"Willowdale, Willowdale East",43.77012,-79.408493
6,North York,York Mills West,43.752758,-79.400049
7,North York,"Willowdale, Willowdale West",43.782736,-79.442259
8,North York,Parkwoods,43.753259,-79.329656
9,North York,Don Mills,43.745906,-79.352188


In [21]:
address = 'North York'
geolocator = Nominatim(user_agent='to_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print("The geographical coordinates of North York are {}, {}.".format(latitude, longitude))

The geographical coordinates of North York are 43.7543263, -79.44911696639593.


In [32]:
# create map of North York using latitude and longitude values
map_north_york = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighbourhood in zip(york_data['Latitude'], york_data['Longitude'], york_data['Borough'], york_data['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_north_york)  
    
map_north_york

In [29]:
# Define Foursquare Credentials and Version

CLIENT_ID = 'ESPT1Q0S12HZNN0Q0TXZKJKZAOOFAYOQXRVKXPRLRGM1D1E5' # my Foursquare ID
CLIENT_SECRET = 'GBDB1OTGR5FDLAIDC41VUXZ115GU13COU0CPDONYU2JUCOH5' # my Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('My credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

My credentials:
CLIENT_ID: ESPT1Q0S12HZNN0Q0TXZKJKZAOOFAYOQXRVKXPRLRGM1D1E5
CLIENT_SECRET:GBDB1OTGR5FDLAIDC41VUXZ115GU13COU0CPDONYU2JUCOH5


In [33]:
# Get the top 100 venues that are in North York within a radius of 1000 meters
# we define necessary functions

def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [34]:
# run the function above to each neighborhood and create a new dataframe called north_york__venues
north_york_venues = getNearbyVenues(names = york_data['Neighbourhood'],
                                 latitudes = york_data['Latitude'],
                                 longitudes = york_data['Longitude'])

north_york_venues

Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
Willowdale, Willowdale East
York Mills West
Willowdale, Willowdale West
Parkwoods
Don Mills
Don Mills
Bathurst Manor, Wilson Heights, Downsview North
Northwood Park, York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Bedford Park, Lawrence Manor East
Lawrence Manor, Lawrence Heights
Glencairn
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Humberlea, Emery


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hillcrest Village,43.803762,-79.363452,Tastee,43.807722,-79.356798,Bakery
1,Hillcrest Village,43.803762,-79.363452,고려삼계탕 Korean Ginseng Chicken Soup & Bibimbap,43.798391,-79.369187,Korean Restaurant
2,Hillcrest Village,43.803762,-79.363452,Cummer Park,43.799564,-79.371175,Park
3,Hillcrest Village,43.803762,-79.363452,Galati,43.797831,-79.36941,Grocery Store
4,Hillcrest Village,43.803762,-79.363452,Pizza Pizza,43.799079,-79.369449,Pizza Place
5,Hillcrest Village,43.803762,-79.363452,TD Canada Trust,43.798466,-79.368832,Bank
6,Hillcrest Village,43.803762,-79.363452,Subway,43.799059,-79.368946,Sandwich Place
7,Hillcrest Village,43.803762,-79.363452,Tim Hortons,43.798945,-79.369644,Coffee Shop
8,Hillcrest Village,43.803762,-79.363452,Shoppers Drug Mart,43.798341,-79.369804,Pharmacy
9,Hillcrest Village,43.803762,-79.363452,Cliffwood I.D.A. Pharmacy,43.807909,-79.357777,Pharmacy


In [36]:
# check size of resulting dataframe
north_york_venues.shape

(628, 7)

In [37]:
# let's check how many venues were returned for each neighborhood
north_york_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",29,29,29,29,29,29
Bayview Village,14,14,14,14,14,14
"Bedford Park, Lawrence Manor East",39,39,39,39,39,39
Don Mills,75,75,75,75,75,75
Downsview,67,67,67,67,67,67
"Fairview, Henry Farm, Oriole",44,44,44,44,44,44
Glencairn,35,35,35,35,35,35
Hillcrest Village,21,21,21,21,21,21
Humber Summit,10,10,10,10,10,10
"Humberlea, Emery",8,8,8,8,8,8


In [38]:
# Let's find out how many unique categories can be curated from all the returned venues
print('There are {} unique categories.'.format(len(north_york_venues['Venue Category'].unique())))

There are 152 unique categories.


#### We proceed to Cluster, Segment & Analyze out Neighbourhoods

In [41]:
# one hot encoding
york_onehot = pd.get_dummies(north_york_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighbourhood column back to dataframe
york_onehot['Neighbourhood'] = north_york_venues['Neighbourhood'] 

# move neighbourhood column to the first column
fixed_columns = [york_onehot.columns[-1]] + list(york_onehot.columns[:-1])
york_onehot = york_onehot[fixed_columns]

york_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Beer Store,Bike Shop,Boutique,Bowling Alley,Boxing Gym,Breakfast Spot,Bridal Shop,Bubble Tea Shop,Burger Joint,Bus Line,Bus Stop,Business Service,Butcher,Café,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Community Center,Convenience Store,Cosmetics Shop,Creperie,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Fast Food Restaurant,Fireworks Store,Fish & Chips Shop,Food & Drink Shop,Food Court,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gas Station,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,History Museum,Hockey Arena,Hookah Bar,Hot Dog Joint,Hotel,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karaoke Bar,Kitchen Supply Store,Korean Restaurant,Latin American Restaurant,Laundry Service,Liquor Store,Locksmith,Lounge,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Movie Theater,New American Restaurant,Office,Other Repair Shop,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Photography Lab,Pizza Place,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Recreation Center,Rental Car Location,Residential Building (Apartment / Condo),Restaurant,Road,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shop & Service,Shopping Mall,Skating Rink,Ski Area,Ski Chalet,Snack Place,Soccer Field,Spa,Sporting Goods Shop,Sports Bar,Sports Club,Steakhouse,Storage Facility,Supermarket,Sushi Restaurant,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store,Yoga Studio
0,Hillcrest Village,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Hillcrest Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Hillcrest Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Hillcrest Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Hillcrest Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [42]:
# check dataframe size
york_onehot.shape

(628, 153)

In [43]:
# let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
york_grouped = york_onehot.groupby('Neighbourhood').mean().reset_index()
york_grouped.head()

Unnamed: 0,Neighbourhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Beer Store,Bike Shop,Boutique,Bowling Alley,Boxing Gym,Breakfast Spot,Bridal Shop,Bubble Tea Shop,Burger Joint,Bus Line,Bus Stop,Business Service,Butcher,Café,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Community Center,Convenience Store,Cosmetics Shop,Creperie,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Fast Food Restaurant,Fireworks Store,Fish & Chips Shop,Food & Drink Shop,Food Court,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gas Station,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,History Museum,Hockey Arena,Hookah Bar,Hot Dog Joint,Hotel,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karaoke Bar,Kitchen Supply Store,Korean Restaurant,Latin American Restaurant,Laundry Service,Liquor Store,Locksmith,Lounge,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Movie Theater,New American Restaurant,Office,Other Repair Shop,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Photography Lab,Pizza Place,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Recreation Center,Rental Car Location,Residential Building (Apartment / Condo),Restaurant,Road,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shop & Service,Shopping Mall,Skating Rink,Ski Area,Ski Chalet,Snack Place,Soccer Field,Spa,Sporting Goods Shop,Sports Bar,Sports Club,Steakhouse,Storage Facility,Supermarket,Sushi Restaurant,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store,Yoga Studio
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.068966,0.0,0.034483,0.034483,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.034483,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.034483,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.034483,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.034483,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.034483,0.0,0.0,0.034483,0.0,0.034483,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.034483,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.025641,0.051282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,0.0,0.076923,0.025641,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.076923,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.025641,0.0,0.025641,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.025641,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.025641,0.0,0.0
3,Don Mills,0.0,0.0,0.013333,0.013333,0.0,0.026667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026667,0.013333,0.0,0.026667,0.013333,0.0,0.0,0.0,0.013333,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.026667,0.013333,0.0,0.013333,0.0,0.013333,0.08,0.0,0.0,0.0,0.0,0.0,0.013333,0.0,0.0,0.0,0.013333,0.013333,0.013333,0.0,0.0,0.0,0.0,0.0,0.0,0.013333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013333,0.0,0.053333,0.0,0.0,0.013333,0.013333,0.0,0.0,0.0,0.013333,0.013333,0.013333,0.013333,0.066667,0.013333,0.0,0.0,0.0,0.0,0.0,0.0,0.013333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013333,0.0,0.026667,0.0,0.013333,0.013333,0.0,0.013333,0.013333,0.0,0.0,0.0,0.026667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.013333,0.013333,0.013333,0.0,0.013333,0.0,0.0,0.0,0.0,0.0,0.0,0.013333,0.013333,0.0,0.0,0.0,0.0,0.04,0.013333,0.0,0.013333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013333,0.0
4,Downsview,0.0,0.014925,0.014925,0.0,0.0,0.0,0.029851,0.0,0.0,0.0,0.0,0.0,0.014925,0.014925,0.0,0.014925,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.014925,0.0,0.014925,0.0,0.0,0.074627,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.014925,0.0,0.014925,0.029851,0.0,0.0,0.0,0.014925,0.0,0.014925,0.0,0.0,0.044776,0.0,0.0,0.0,0.059701,0.014925,0.014925,0.0,0.0,0.0,0.0,0.0,0.059701,0.0,0.0,0.029851,0.014925,0.0,0.0,0.0,0.0,0.014925,0.0,0.014925,0.0,0.029851,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.029851,0.0,0.044776,0.0,0.044776,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029851,0.0,0.0,0.029851,0.014925,0.0,0.0,0.014925,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029851,0.0,0.0,0.074627,0.0,0.0,0.0


In [44]:
# check dataframe size
york_grouped.shape

(20, 153)

In [45]:
# Let's print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in york_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = york_grouped[york_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor, Wilson Heights, Downsview North----
               venue  freq
0        Coffee Shop  0.07
1               Bank  0.07
2  Mobile Phone Shop  0.03
3         Ski Chalet  0.03
4           Ski Area  0.03


----Bayview Village----
                 venue  freq
0  Japanese Restaurant  0.14
1          Gas Station  0.14
2                 Bank  0.14
3        Grocery Store  0.14
4                Trail  0.07


----Bedford Park, Lawrence Manor East----
                venue  freq
0  Italian Restaurant  0.08
1         Coffee Shop  0.08
2                Bank  0.05
3      Sandwich Place  0.05
4             Butcher  0.03


----Don Mills----
                 venue  freq
0          Coffee Shop  0.08
1           Restaurant  0.08
2  Japanese Restaurant  0.07
3                  Gym  0.05
4          Supermarket  0.04


----Downsview----
                   venue  freq
0  Vietnamese Restaurant  0.07
1            Coffee Shop  0.07
2          Grocery Store  0.06
3                  Hotel  0.06
4

In [46]:
# Let's put that into a pandas dataframe
# we create a function to sort the venues in descending order.

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# we create the new dataframe and display the top 10 venues for each neighborhood

num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = york_grouped['Neighbourhood']

for ind in np.arange(york_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(york_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Restaurant,Community Center,Shopping Mall,Dog Run,Sandwich Place,Bridal Shop,Diner,Chinese Restaurant
1,Bayview Village,Gas Station,Bank,Grocery Store,Japanese Restaurant,Trail,Café,Restaurant,Chinese Restaurant,Skating Rink,Park
2,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Bank,Bridal Shop,Restaurant,Butcher,Pub,Café,Pharmacy
3,Don Mills,Coffee Shop,Restaurant,Japanese Restaurant,Gym,Burger Joint,Supermarket,Mobile Phone Shop,Beer Store,Asian Restaurant,Pizza Place
4,Downsview,Coffee Shop,Vietnamese Restaurant,Hotel,Grocery Store,Gas Station,Pharmacy,Pizza Place,Intersection,Turkish Restaurant,Fast Food Restaurant
5,"Fairview, Henry Farm, Oriole",Coffee Shop,Clothing Store,Sandwich Place,Japanese Restaurant,Bank,Bakery,Restaurant,Juice Bar,Grocery Store,Movie Theater
6,Glencairn,Grocery Store,Fast Food Restaurant,Coffee Shop,Gas Station,Pizza Place,Italian Restaurant,Convenience Store,Pub,Photography Lab,Pharmacy
7,Hillcrest Village,Pharmacy,Park,Coffee Shop,Convenience Store,Chinese Restaurant,Sandwich Place,Restaurant,Residential Building (Apartment / Condo),Korean Restaurant,Recreation Center
8,Humber Summit,Electronics Store,Bank,Pizza Place,Pharmacy,Park,Italian Restaurant,Shopping Mall,Medical Center,Bakery,Yoga Studio
9,"Humberlea, Emery",Discount Store,Convenience Store,Park,Auto Workshop,Golf Course,Storage Facility,Gas Station,Bakery,Yoga Studio,Event Space


In [49]:
# Run k-means to cluster the neighborhood into 5 clusters
# set number of clusters
kclusters = 6

york_grouped_clustering = york_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(york_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 0, 2, 2, 1, 2, 1, 1, 5, 4])

In [50]:
# Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

york_merged = df

# merge toronto_grouped with df to add latitude/longitude for each neighbourhood
york_merged = york_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')
york_merged = york_merged.dropna()
york_merged = york_merged.astype({"Cluster Labels": int})
york_merged.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,North York,Hillcrest Village,43.803762,-79.363452,1,Pharmacy,Park,Coffee Shop,Convenience Store,Chinese Restaurant,Sandwich Place,Restaurant,Residential Building (Apartment / Condo),Korean Restaurant,Recreation Center
18,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,2,Coffee Shop,Clothing Store,Sandwich Place,Japanese Restaurant,Bank,Bakery,Restaurant,Juice Bar,Grocery Store,Movie Theater
19,North York,Bayview Village,43.786947,-79.385975,0,Gas Station,Bank,Grocery Store,Japanese Restaurant,Trail,Café,Restaurant,Chinese Restaurant,Skating Rink,Park
20,North York,"York Mills, Silver Hills",43.75749,-79.374714,3,Park,Pool,Diner,Falafel Restaurant,Event Space,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dog Run,Discount Store
21,North York,"Willowdale, Newtonbrook",43.789053,-79.408493,1,Korean Restaurant,Café,Pizza Place,Middle Eastern Restaurant,Park,Diner,Coffee Shop,Supermarket,Fried Chicken Joint,Shopping Mall


In [51]:
# Finally, let's visualize the resulting clusters
# create map
import matplotlib.cm as cm
import matplotlib.colors as colors

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(york_merged['Latitude'], york_merged['Longitude'], york_merged['Neighbourhood'], york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [53]:
# Cluster 1
york_merged.loc[york_merged['Cluster Labels'] == 0]

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,North York,Bayview Village,43.786947,-79.385975,0,Gas Station,Bank,Grocery Store,Japanese Restaurant,Trail,Café,Restaurant,Chinese Restaurant,Skating Rink,Park


In [54]:
# Cluster 2
york_merged.loc[york_merged['Cluster Labels'] == 1]

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,North York,Hillcrest Village,43.803762,-79.363452,1,Pharmacy,Park,Coffee Shop,Convenience Store,Chinese Restaurant,Sandwich Place,Restaurant,Residential Building (Apartment / Condo),Korean Restaurant,Recreation Center
21,North York,"Willowdale, Newtonbrook",43.789053,-79.408493,1,Korean Restaurant,Café,Pizza Place,Middle Eastern Restaurant,Park,Diner,Coffee Shop,Supermarket,Fried Chicken Joint,Shopping Mall
23,North York,York Mills West,43.752758,-79.400049,1,Park,Restaurant,Coffee Shop,Playground,Gas Station,Dog Run,Business Service,Bowling Alley,French Restaurant,Chinese Restaurant
24,North York,"Willowdale, Willowdale West",43.782736,-79.442259,1,Pharmacy,Butcher,Convenience Store,Pizza Place,Eastern European Restaurant,Coffee Shop,Grocery Store,Park,Bus Line,Bakery
25,North York,Parkwoods,43.753259,-79.329656,1,Park,Convenience Store,Pharmacy,Shopping Mall,Bus Stop,Discount Store,Shop & Service,Fast Food Restaurant,Laundry Service,Café
28,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,1,Bank,Coffee Shop,Restaurant,Community Center,Shopping Mall,Dog Run,Sandwich Place,Bridal Shop,Diner,Chinese Restaurant
30,North York,Downsview,43.737473,-79.464763,1,Coffee Shop,Vietnamese Restaurant,Hotel,Grocery Store,Gas Station,Pharmacy,Pizza Place,Intersection,Turkish Restaurant,Fast Food Restaurant
31,North York,Downsview,43.739015,-79.506944,1,Coffee Shop,Vietnamese Restaurant,Hotel,Grocery Store,Gas Station,Pharmacy,Pizza Place,Intersection,Turkish Restaurant,Fast Food Restaurant
32,North York,Downsview,43.728496,-79.495697,1,Coffee Shop,Vietnamese Restaurant,Hotel,Grocery Store,Gas Station,Pharmacy,Pizza Place,Intersection,Turkish Restaurant,Fast Food Restaurant
33,North York,Downsview,43.761631,-79.520999,1,Coffee Shop,Vietnamese Restaurant,Hotel,Grocery Store,Gas Station,Pharmacy,Pizza Place,Intersection,Turkish Restaurant,Fast Food Restaurant


In [56]:
# Cluster 3
york_merged.loc[york_merged['Cluster Labels'] == 2]

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,2,Coffee Shop,Clothing Store,Sandwich Place,Japanese Restaurant,Bank,Bakery,Restaurant,Juice Bar,Grocery Store,Movie Theater
22,North York,"Willowdale, Willowdale East",43.77012,-79.408493,2,Coffee Shop,Ramen Restaurant,Korean Restaurant,Bubble Tea Shop,Fast Food Restaurant,Pizza Place,Japanese Restaurant,Sandwich Place,Restaurant,Sushi Restaurant
26,North York,Don Mills,43.745906,-79.352188,2,Coffee Shop,Restaurant,Japanese Restaurant,Gym,Burger Joint,Supermarket,Mobile Phone Shop,Beer Store,Asian Restaurant,Pizza Place
27,North York,Don Mills,43.7259,-79.340923,2,Coffee Shop,Restaurant,Japanese Restaurant,Gym,Burger Joint,Supermarket,Mobile Phone Shop,Beer Store,Asian Restaurant,Pizza Place
29,North York,"Northwood Park, York University",43.76798,-79.487262,2,Coffee Shop,Furniture / Home Store,Pizza Place,Restaurant,Bank,Sandwich Place,Metro Station,Fast Food Restaurant,Middle Eastern Restaurant,Road
34,North York,Victoria Village,43.725882,-79.315572,2,Coffee Shop,Portuguese Restaurant,French Restaurant,Hockey Arena,Men's Store,Boxing Gym,Playground,Sporting Goods Shop,Pizza Place,Gym / Fitness Center
62,North York,"Bedford Park, Lawrence Manor East",43.733283,-79.41975,2,Coffee Shop,Italian Restaurant,Sandwich Place,Bank,Bridal Shop,Restaurant,Butcher,Pub,Café,Pharmacy
71,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,2,Clothing Store,Restaurant,Coffee Shop,Fast Food Restaurant,Dessert Shop,Vietnamese Restaurant,Furniture / Home Store,Fried Chicken Joint,Sushi Restaurant,Cheese Shop


In [57]:
# Cluster 4
york_merged.loc[york_merged['Cluster Labels'] == 3]

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,North York,"York Mills, Silver Hills",43.75749,-79.374714,3,Park,Pool,Diner,Falafel Restaurant,Event Space,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dog Run,Discount Store


In [58]:
# Cluster 5
york_merged.loc[york_merged['Cluster Labels'] == 4]

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
97,North York,"Humberlea, Emery",43.724766,-79.532242,4,Discount Store,Convenience Store,Park,Auto Workshop,Golf Course,Storage Facility,Gas Station,Bakery,Yoga Studio,Event Space


In [59]:
# Cluster 6
york_merged.loc[york_merged['Cluster Labels'] == 5]

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
96,North York,Humber Summit,43.756303,-79.565963,5,Electronics Store,Bank,Pizza Place,Pharmacy,Park,Italian Restaurant,Shopping Mall,Medical Center,Bakery,Yoga Studio
