In [1]:
#-----------------------------------------------------------------------
# The scripts below will read the neighborhood data table from wiki page
# and save as a dataframe
# Next, it import the lat and long from a .csv file that cordinated with 
# the above dataframe then merged with the dataframe form wiki page.
# Next, create a dataframe that contain only Toronto in the borough
#----------------------------------------------------------------------

#!conda install -c conda-forge geocoder --yes 
#!conda install -c conda-forge folium=0.5.0 --yes
#import geocoder
from pandas.io.html import read_html
import pandas as pd
import numpy as np
import json
import folium
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import requests
from pandas.io.json import json_normalize

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = read_html(url, attrs={'class':'wikitable'})
df1 = page[0]
df1.drop(df1[df1['Borough'] == 'Not assigned'].index, inplace = True)
df1.reset_index(drop=True, inplace=True)
df1.loc[df1['Neighborhood'] == 'Not assigned', 'Borough']
csv = 'https://cocl.us/Geospatial_data'
df2 = pd.read_csv(csv, index_col=0)
df = pd.merge(df1, df2, on= "Postal Code")
  
# Create a dataframe that only contain 'Toronto' in the Borough column
df_tor = df[df['Borough'].str.contains('Toronto')]
df_tor.reset_index(drop=True, inplace=True)

df_tor.groupby('Neighborhood').count()

tor_gp= df_tor.groupby('Neighborhood').mean().reset_index()

print('There are {} neighborhoods around Toronto'.format(tor_gp.shape[0]))



There are 39 neighborhoods around Toronto


In [2]:
#-----------------------------------------------------------------------
# Using foursquare API, the scripts below will explore the venues around 
# toronto borough which limit the search to 20 per neighborhood (neighborhoods
# with same lat and long are treated as one) to reduce the search time
#----------------------------------------------------------------------

CLIENT_ID = 'RSR2IPLPFCKEM15L3UAWFXDD2IGO3HP3U0FVZO5FZYRLO0BB' 
CLIENT_SECRET = 'DSQWHLPFDBQLTCUDDAHQ4LQXMWTZODYEH4UBDRBXWDJPOBAY' 
VERSION = '20180605' 

# only search for 10 venues first 5 neighborhood.
# change the constains below to expand the exploration
LIMIT = 50 

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        fqurl = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(fqurl).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

toronto_venues = getNearbyVenues(names=tor_gp['Neighborhood'],
                                   latitudes=tor_gp['Latitude'],
                                   longitudes=tor_gp['Longitude'])

print('There are {} explored venues'.format(toronto_venues.shape[0]))

toronto_venues.head()




There are 1184 explored venues


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Berczy Park,43.644771,-79.373306,LCBO,43.642944,-79.37244,Liquor Store
1,Berczy Park,43.644771,-79.373306,The Keg Steakhouse + Bar - Esplanade,43.646712,-79.374768,Restaurant
2,Berczy Park,43.644771,-79.373306,Meridian Hall,43.646292,-79.376022,Concert Hall
3,Berczy Park,43.644771,-79.373306,Fresh On Front,43.647815,-79.374453,Vegetarian / Vegan Restaurant
4,Berczy Park,43.644771,-79.373306,Hockey Hall Of Fame (Hockey Hall of Fame),43.646974,-79.377323,Museum


In [3]:
# This will count the number of explored venues for each neighborhood (neighborhoods
# with same lat and long are treated as one)
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,50,50,50,50,50,50
"Brockton, Parkdale Village, Exhibition Place",22,22,22,22,22,22
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",17,17,17,17,17,17
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",17,17,17,17,17,17
Central Bay Street,50,50,50,50,50,50
Christie,16,16,16,16,16,16
Church and Wellesley,50,50,50,50,50,50
"Commerce Court, Victoria Hotel",50,50,50,50,50,50
Davisville,33,33,33,33,33,33
Davisville North,8,8,8,8,8,8


In [4]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 215 uniques categories.


In [5]:
#-----------------------------------------------------------------------
# The scripts below will analyze each neighborhood (neighborhoods
# with same lat and long are treated as one) and then group the
# dataframe rows by neighborhood and count the average occurence
# of each category
#----------------------------------------------------------------------
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
#-----------------------------------------------------------------------
# The scripts below will list the top 10 venues for each neighborhood 
# (neighborhoods with same lat and long are treated as one)
#----------------------------------------------------------------------

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Beer Bar,Cheese Shop,Seafood Restaurant,Bakery,Restaurant,Café,Breakfast Spot,Pharmacy
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Gym,Stadium,Bakery,Intersection,Restaurant,Convenience Store,Italian Restaurant
2,"Business reply mail Processing Centre, South C...",Park,Spa,Garden,Gym / Fitness Center,Fast Food Restaurant,Farmers Market,Light Rail Station,Comic Shop,Pizza Place,Butcher
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Airport Terminal,Boat or Ferry,Coffee Shop,Boutique,Rental Car Location,Sculpture Garden,Plane,Harbor / Marina
4,Central Bay Street,Coffee Shop,Italian Restaurant,Japanese Restaurant,Bubble Tea Shop,Burger Joint,Café,Sandwich Place,Comic Shop,Ramen Restaurant,Poke Place


In [7]:
#-----------------------------------------------------------------------
# The scripts below will run the k-means to clusters the neighborhood
# The number of clusters is defined in kclusters. The k-means is computed
# using toronto dataframe
# Next, the clusters are inserted into the asorted venues dataframe  
#----------------------------------------------------------------------

kclusters = 5

toronto_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(init='k-means++', n_clusters=kclusters, n_init=12).fit(toronto_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_tor

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head(20) # show the first 20 rows!



Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,4,Coffee Shop,Bakery,Park,Pub,Café,Theater,Breakfast Spot,Yoga Studio,Electronics Store,Hotel
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,4,Coffee Shop,Diner,Sushi Restaurant,Yoga Studio,Hobby Shop,Smoothie Shop,Beer Bar,Italian Restaurant,Sandwich Place,Distribution Center
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,4,Café,Coffee Shop,Ramen Restaurant,Bakery,Bookstore,Theater,Cosmetics Shop,Tea Room,Clothing Store,Sandwich Place
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,4,Café,Coffee Shop,Gastropub,Cosmetics Shop,Creperie,Restaurant,Japanese Restaurant,Diner,Hotel,Middle Eastern Restaurant
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Trail,Pub,Health Food Store,Wine Shop,Coworking Space,Distribution Center,Discount Store,Diner,Dessert Shop,Department Store
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,4,Coffee Shop,Cocktail Bar,Beer Bar,Cheese Shop,Seafood Restaurant,Bakery,Restaurant,Café,Breakfast Spot,Pharmacy
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,4,Coffee Shop,Italian Restaurant,Japanese Restaurant,Bubble Tea Shop,Burger Joint,Café,Sandwich Place,Comic Shop,Ramen Restaurant,Poke Place
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564,4,Grocery Store,Café,Park,Coffee Shop,Candy Store,Restaurant,Italian Restaurant,Diner,Nightclub,Baby Store
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,4,Coffee Shop,Café,Restaurant,Concert Hall,Pizza Place,Steakhouse,General Travel,Brazilian Restaurant,Gym,Gym / Fitness Center
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,4,Bakery,Pharmacy,Park,Supermarket,Bank,Bar,Middle Eastern Restaurant,Grocery Store,Brewery,Music Venue


In [8]:
#-------------------------------------------------------------
# create map at central toronto location
# Each cluster has its distinct color scheme generated
# and store in color array.
# The map is mark in circle with the color scheme generated above
# Each circle marker shown the location of each lat and long
# (neighborhood(s))
#-------------------------------------------------------------

map_clusters = folium.Map(location=[43.654260, -79.360636],
                          width= 1500,
                          height = 800,
                          zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
#markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker([lat, lon],
                        radius=5,
                        popup=label,
                        color=rainbow[cluster-1],
                        fill=True,
                        fill_color=rainbow[cluster-1],
                        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [None]:
# The next data is simply show the top 10 venues in each neighborhood(s) of each cluster

In [9]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,The Beaches,0,Trail,Pub,Health Food Store,Wine Shop,Coworking Space,Distribution Center,Discount Store,Diner,Dessert Shop,Department Store
21,"Forest Hill North & West, Forest Hill Road Park",0,Mexican Restaurant,Trail,Sushi Restaurant,Jewelry Store,College Rec Center,College Cafeteria,Doner Restaurant,Dog Run,Distribution Center,Discount Store
33,Rosedale,0,Park,Trail,Playground,Cosmetics Shop,Distribution Center,Discount Store,Diner,Dessert Shop,Department Store,Deli / Bodega


In [10]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Roselawn,1,Garden,Music Venue,Wine Shop,Creperie,Dog Run,Distribution Center,Discount Store,Diner,Dessert Shop,Department Store


In [11]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,"Moore Park, Summerhill East",2,Trail,Wine Shop,Coworking Space,Dog Run,Distribution Center,Discount Store,Diner,Dessert Shop,Department Store,Deli / Bodega


In [12]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Lawrence Park,3,Park,Bus Line,Construction & Landscaping,Swim School,Dog Run,Distribution Center,Discount Store,Diner,Dessert Shop,Department Store


In [13]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Regent Park, Harbourfront",4,Coffee Shop,Bakery,Park,Pub,Café,Theater,Breakfast Spot,Yoga Studio,Electronics Store,Hotel
1,"Queen's Park, Ontario Provincial Government",4,Coffee Shop,Diner,Sushi Restaurant,Yoga Studio,Hobby Shop,Smoothie Shop,Beer Bar,Italian Restaurant,Sandwich Place,Distribution Center
2,"Garden District, Ryerson",4,Café,Coffee Shop,Ramen Restaurant,Bakery,Bookstore,Theater,Cosmetics Shop,Tea Room,Clothing Store,Sandwich Place
3,St. James Town,4,Café,Coffee Shop,Gastropub,Cosmetics Shop,Creperie,Restaurant,Japanese Restaurant,Diner,Hotel,Middle Eastern Restaurant
5,Berczy Park,4,Coffee Shop,Cocktail Bar,Beer Bar,Cheese Shop,Seafood Restaurant,Bakery,Restaurant,Café,Breakfast Spot,Pharmacy
6,Central Bay Street,4,Coffee Shop,Italian Restaurant,Japanese Restaurant,Bubble Tea Shop,Burger Joint,Café,Sandwich Place,Comic Shop,Ramen Restaurant,Poke Place
7,Christie,4,Grocery Store,Café,Park,Coffee Shop,Candy Store,Restaurant,Italian Restaurant,Diner,Nightclub,Baby Store
8,"Richmond, Adelaide, King",4,Coffee Shop,Café,Restaurant,Concert Hall,Pizza Place,Steakhouse,General Travel,Brazilian Restaurant,Gym,Gym / Fitness Center
9,"Dufferin, Dovercourt Village",4,Bakery,Pharmacy,Park,Supermarket,Bank,Bar,Middle Eastern Restaurant,Grocery Store,Brewery,Music Venue
10,"Harbourfront East, Union Station, Toronto Islands",4,Coffee Shop,Aquarium,Plaza,Park,Scenic Lookout,Café,Hotel,Bubble Tea Shop,Brewery,IT Services
