# Analyzing the neighborhoods in the city of Toronto

#### 1. Importing the necessary libraries for analysis

In [4]:
# Importing the necessary libraties for the project

import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import requests
import random
import json
from pandas.io.json import json_normalize
from sklearn import preprocessing
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors

#!conda install -c conda-forge geopy --yes
!pip install folium
import folium




#### 2. Importing open-source data from Wikipedia on Toronto's neighboorhoods and demography

In [5]:
# Link to wikipedia neighborhoods
url_nei = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wiki_Toronto_nei = pd.read_html(url_nei)[:]
wiki_Toronto_nei = pd.DataFrame(wiki_Toronto_nei[0])
wiki_Toronto_nei.groupby(by='Borough', axis=0)

#Count the number of boroughs                               
wiki_Toronto_borough = pd.unique(wiki_Toronto_nei['Borough'])
wiki_Toronto_borough_count = wiki_Toronto_nei['Borough'].value_counts()
wiki_Toronto_borough_count

#Reorganize the dataframe and sort them in ascending order
wiki_Toronto_nei = wiki_Toronto_nei.sort_values(by=['Borough'], ascending = True).reset_index()
#wiki_Toronto_nei.reset_index(inplace=True)
wiki_Toronto_nei.drop(['index'], axis=1, inplace=True)
wiki_Toronto_nei.set_index
wiki_Toronto_nei.index.name = 'index'
#wiki_Toronto_nei.index = range(len(wiki_Toronto_nei['Borough']))

#wiki_Toronto_nei

In [6]:
#Getting Latitudes and Longtitudes from Postcodes and integrate it with the neighboorhood dataframe

#!pip install pgeocode
import pgeocode
Toronto_geocoder = pgeocode.Nominatim('ca')

Toronto_boroughs_LL = Toronto_geocoder.query_postal_code(i for i in wiki_Toronto_nei['Postal Code'])[['postal_code',
                                                                                                      'latitude',
                                                                                                      'longitude']]
Toronto_boroughs_LL
wiki_Toronto_nei[['latitude','longitude']] = Toronto_boroughs_LL[['latitude','longitude']]
wiki_Toronto_nei

Unnamed: 0_level_0,Postal Code,Borough,Neighbourhood,latitude,longitude
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.6736,-79.4035
1,M4N,Central Toronto,Lawrence Park,43.7301,-79.3935
2,M5N,Central Toronto,Roselawn,43.7113,-79.4195
3,M4P,Central Toronto,Davisville North,43.7135,-79.3887
4,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.6861,-79.4025
5,M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.6966,-79.412
6,M4S,Central Toronto,Davisville,43.702,-79.3853
7,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.7143,-79.4065
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.6899,-79.3853
9,M5W,Downtown Toronto,Stn A PO Boxes,43.6437,-79.3787


#### 3. Import Toronto's demography dataset from Wikipedia, extract and clean the data



In [8]:


def extract_ethnicity(row): 
    y = wiki_Toronto_dem['Ethnicity'][row]
    y = re.search('\(([^)]+)', y).group(1)
    y =  float(y.strip('%'))
    #y =  y.strip('%')

    return y


## Query data of Toronto demograohy from Wikipedia

#!pip install wikipedia #Uncomment this line after installing the first installation of Wikipedia library
import wikipedia as wp
import pandas as pd
import re

url_dem = "https://en.wikipedia.org/wiki/Demographics_of_Toronto_neighbourhoods"
wiki_Toronto_dem = pd.read_html(url_dem)[:]
wiki_Toronto_dem = pd.DataFrame(wiki_Toronto_dem[1])    
#wiki_Toronto_dem.rename(columns={"Second most common language (after English) by name" : "Ethnicity"}, inplace = True)
#wiki_Toronto_dem.groupby(by='Ethnicity', axis=0)

for i in range(2,6):
    x = pd.read_html(url_dem)[:]
    x = pd.DataFrame(x[i])
    wiki_Toronto_dem = wiki_Toronto_dem.append(x, ignore_index=False)

wiki_Toronto_dem.rename(columns={"Second most common language (after English) by name" : "Ethnicity"}, inplace = True)
wiki_Toronto_dem['Ethnicity']
wiki_Toronto_dem.reset_index(inplace=True)

#----------------------------------------------------------------------------------------------------------------------


# Creating a new column and remove unused columns

wiki_Toronto_dem['Ethnicity Percentage (%)'] = ''

for i in range(wiki_Toronto_dem.shape[0]):
    if set(['Map','Census Tracts','Second most common language (after English) by percentage']).issubset(wiki_Toronto_dem):
        wiki_Toronto_dem.drop(columns = ['Map','Census Tracts','Second most common language (after English) by percentage'], axis = 1, inplace=True)
    else:
        pass
    
wiki_Toronto_dem = wiki_Toronto_dem.dropna()

wiki_Toronto_dem.drop(['index'], axis=1, inplace=True)
wiki_Toronto_dem.set_index
wiki_Toronto_dem.index.name = 'index'
wiki_Toronto_dem.index = range(len(wiki_Toronto_dem['Ethnicity']))
wiki_Toronto_dem

#-----------------------------------------------------------------------------------------------------------------------


# Extract the ethnicty and save it in Ethnicity percentage columnn

out = []
for i in range(len(wiki_Toronto_dem['Ethnicity'])):
    out.append(extract_ethnicity(i))

wiki_Toronto_dem['Ethnicity Percentage (%)'] = out
wiki_Toronto_dem['Ethnicity_new'] = wiki_Toronto_dem['Ethnicity'].str.split('(').str[0]

for i in range(len(wiki_Toronto_dem['Ethnicity_new'])):
     wiki_Toronto_dem['Ethnicity_new'][i] = wiki_Toronto_dem['Ethnicity_new'][i].strip()
        
wiki_Toronto_dem
#wiki_Toronto_dem['Ethnicity_new'].value_counts()


#------------------------------------------------------------------------------------------------------------------------

#Create an new column listing the names of neighborhood concatenated with "Toronto" to make the address search easier

temp = wiki_Toronto_dem['Name'] + ', Toronto'
wiki_Toronto_dem.insert(1,'Name-ccat',temp)
wiki_Toronto_dem


#------------------------------------------------------------------------------------------------------------------------

#Clean the neighborhood names (row 1, 55)

pd.set_option('display.max_rows', 500)
wiki_Toronto_dem['Name'][1] = wiki_Toronto_dem['Name'][1].split('/')[0]
wiki_Toronto_dem['Name'][55] = wiki_Toronto_dem['Name'][55].split('/')[0]

wiki_Toronto_dem


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,Name,Name-ccat,FM,Population,Land area (km2),Density (people/km2),% Change in Population since 2001,Average Income,Transit Commuting %,% Renters,Ethnicity,Ethnicity Percentage (%),Ethnicity_new
0,Crescent Town,"Crescent Town, Toronto",EY,8157,0.4,20393,-10.0,23021,24.5,20.3,Bengali (18.1%),18.1,Bengali
1,Governor's Bridge,"Governor's Bridge/Bennington Heights, Toronto",EY,2112,1.87,1129,4.0,129904,7.1,13.3,Polish (1.4%),1.4,Polish
2,Leaside,"Leaside, Toronto",EY,13876,2.81,4938,3.0,82670,9.7,10.5,Bulgarian (0.4%),0.4,Bulgarian
3,O'Connor–Parkview,"O'Connor–Parkview, Toronto",EY,17740,4.94,3591,-6.1,33517,15.8,19.4,Urdu (3.2%),3.2,Urdu
4,Old East York,"Old East York, Toronto",EY,52220,7.94,6577,-4.6,33172,22.0,19.1,Greek (4.3%),4.3,Greek
5,Thorncliffe Park,"Thorncliffe Park, Toronto",EY,17949,3.09,5809,9.1,25340,16.7,32.5,Urdu (21.5%),21.5,Urdu
6,Alderwood,"Alderwood, Toronto",E,11656,4.94,2360,-4.0,35239,8.8,8.5,Polish (6.2%),6.2,Polish
7,Centennial,"Centennial, Toronto",E,12565,4.94,2544,0.5,34867,11.5,8.8,Polish (2.7%),2.7,Polish
8,Clairville,"Clairville, Toronto",E,8506,6.71,1268,-3.3,26610,13.2,7.2,Punjabi (12.0%),12.0,Punjabi
9,Eatonville,"Eatonville, Toronto",E,19131,11.26,1699,4.3,36206,12.6,13.4,Serbian (3.2%),3.2,Serbian


#### 4. Query location data from Foursquare API, and generate  map of Toronto using neighborhood dataset



In [9]:

# Use Nominatim function to generate map of Toronto

address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="Toronto_explorer")
Toronto_location = geolocator.geocode(address, timeout = None)
Toronto_latitude = Toronto_location.latitude
Toronto_longitude = Toronto_location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(Toronto_latitude, Toronto_longitude))


# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[Toronto_latitude, Toronto_longitude], zoom_start=11)

wiki_Toronto_nei = wiki_Toronto_nei.dropna()


# add markers to map
for lat, lng, label in zip(wiki_Toronto_nei['latitude'], wiki_Toronto_nei['longitude'], wiki_Toronto_nei['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


#### 5. Add Latitude, Longitude and Address of the neighborhoods to the demography dataframe

In [11]:

# Adding address to the Dataframe 

Toronto_locator =  Nominatim(user_agent="FourSquare_Toronto")
#Toronto_locator.geocode(Toronto_dem_LL[i])
wiki_Toronto_dem['Address'] = ''
wiki_Toronto_dem['Latitude'] = ''
wiki_Toronto_dem['Longitude'] = ''
    
for i in range(len(wiki_Toronto_dem['Name-ccat'])):
    if bool(Toronto_locator.geocode(wiki_Toronto_dem['Name-ccat'][i])) == True:
        wiki_Toronto_dem['Address'][i] = Toronto_locator.geocode(wiki_Toronto_dem['Name-ccat'][i])[0]
    else:
        #return None 
        wiki_Toronto_dem['Address'][i] = 0

#wiki_Toronto_dem



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [12]:
    
#Making a temporary copy of the Dataframe for ease of testing the code

x = wiki_Toronto_dem
x = x[x['Address'] != 0]
x.reset_index(drop=True, inplace=True)
x

def find_LL(row):
    latitude = Toronto_locator.geocode(x['Name-ccat'][row],timeout=None)[1][0]
    longitude = Toronto_locator.geocode(x['Name-ccat'][row],timeout=None)[1][1]
    return latitude, longitude


for i in range(len(x['Name'])):
    x['Latitude'].loc[i] = find_LL(i)[0]
    x['Longitude'].loc[i] = find_LL(i)[1]
    #lati = find_LL(i)[0]
    #long = find_LL(i)[1]
    #x.append({'Latitude':lati,'Longitude':long}, ignore_index=True)

wiki_Toronto_dem = x



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()


In [15]:
# Color code the neighborhoods based on the highest secondary ethnicty

neigh_labels = wiki_Toronto_dem['Ethnicity_new'].unique()
color_array = cm.rainbow(np.linspace(0, 1, len(neigh_labels)))
color_array = [colors.rgb2hex(i) for i in color_array]
#color_array

for i in range(len(neigh_labels)):
    wiki_Toronto_dem.loc[wiki_Toronto_dem['Ethnicity_new'] == neigh_labels[i] , "Color_ethnicity"] = color_array[i]
    
#wiki_Toronto_dem


#-----------------------------------------------------------------------------------------------------------------


# Color code the neighborhoods based on the neighborhoods

'''neigh_labels = wiki_Toronto_dem['FM'].unique()
color_array = cm.rainbow(np.linspace(0, 1, len(neigh_labels)))
color_array = [colors.rgb2hex(i) for i in color_array]
#color_array

for i in range(len(neigh_labels)):
    wiki_Toronto_dem.loc[wiki_Toronto_dem['FM'] == neigh_labels[i] , "Color_FM"] = color_array[i]
    
#wiki_Toronto_dem'''


'neigh_labels = wiki_Toronto_dem[\'FM\'].unique()\ncolor_array = cm.rainbow(np.linspace(0, 1, len(neigh_labels)))\ncolor_array = [colors.rgb2hex(i) for i in color_array]\n#color_array\n\nfor i in range(len(neigh_labels)):\n    wiki_Toronto_dem.loc[wiki_Toronto_dem[\'FM\'] == neigh_labels[i] , "Color_FM"] = color_array[i]\n    \n#wiki_Toronto_dem'

In [16]:

# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[Toronto_latitude, Toronto_longitude], zoom_start=11)

wiki_Toronto_nei = wiki_Toronto_nei.dropna()


# add markers to map
for lat, lng, label, area, color_ethnicity, color_FM in zip(wiki_Toronto_dem['Latitude'],
                                                      wiki_Toronto_dem['Longitude'],
                                                      wiki_Toronto_dem['Ethnicity_new'],
                                                      wiki_Toronto_dem['Name'],                                                            
                                                      wiki_Toronto_dem['Color_ethnicity'],
                                                      wiki_Toronto_dem['Color_FM']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=color_ethnicity,
        fill=True,
        fill_color=color_ethnicity,
        fill_opacity=1,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

## Analyze the neighborhood using Foursquare API

#### 1. Input user credentials to access Foursquare API

In [17]:

CLIENT_ID = 'RF0UYLVZDZ3W4IBJOOHUTN3LZZY1YTHCOHODTMLWHSN11HJQ' # your Foursquare ID
CLIENT_SECRET = 'Z1LAGZCWCJOMUQKQ53VXXBCPHGHZD1EOG3O4R0XIFK1RCZYB' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RF0UYLVZDZ3W4IBJOOHUTN3LZZY1YTHCOHODTMLWHSN11HJQ
CLIENT_SECRET:Z1LAGZCWCJOMUQKQ53VXXBCPHGHZD1EOG3O4R0XIFK1RCZYB


#### 2. Define a function to query, sort data from Categories list and transfer them into a dataframe



In [44]:

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
#results['response']['groups'][0]['items']


In [67]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)



# type your answer here
Toronto_venues = getNearbyVenues(names=wiki_Toronto_dem['Name-ccat'],
                                   latitudes=wiki_Toronto_dem['Latitude'],
                                   longitudes=wiki_Toronto_dem['Longitude']
                                  )

Toronto_venues

Crescent Town, Toronto
Governor's Bridge/Bennington Heights, Toronto
Leaside, Toronto
O'Connor–Parkview, Toronto
Old East York, Toronto
Thorncliffe Park, Toronto
Alderwood, Toronto
Centennial, Toronto
Clairville, Toronto
Eatonville, Toronto
Humber Heights, Toronto
Humberwood, Toronto
Humber Valley Village, Toronto
Islington – Six Points, Toronto
Kingsview Village, Toronto
Long Branch, Toronto
Markland Wood, Toronto
Mimico, Toronto
New Toronto, Toronto
Princess Gardens, Toronto
Agincourt, Toronto
Alexandra Park, Toronto
Allenby, Toronto
Amesbury, Toronto
Armour Heights, Toronto
Banbury, Toronto
Bathurst Manor, Toronto
Bay Street Corridor, Toronto
Bayview Village, Toronto
Bayview Woods – Steeles, Toronto
Bedford Park, Toronto
Bendale, Toronto
Birch Cliff, Toronto
Bloor West Village, Toronto
Bracondale Hill, Toronto
Branson, Toronto
Bridle Path, Toronto
Brockton, Toronto
Cabbagetown, Toronto
Caribou Park, Toronto
Carleton Village, Toronto
Casa Loma, Toronto
Chaplin Estates, Toronto
Christ

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Crescent Town, Toronto",43.695403,-79.293099,Dentonia Park,43.692809,-79.295533,Park
1,"Crescent Town, Toronto",43.695403,-79.293099,Dentonia Park Golf Course,43.695326,-79.289115,Golf Course
2,"Crescent Town, Toronto",43.695403,-79.293099,Gateway Newstand,43.694477,-79.289194,Convenience Store
3,"Crescent Town, Toronto",43.695403,-79.293099,Victoria Park Subway Station,43.694883,-79.288735,Metro Station
4,"Governor's Bridge/Bennington Heights, Toronto",43.689423,-79.369426,Chorley Park,43.687163,-79.370538,Park
...,...,...,...,...,...,...,...
3659,"Yorkville, Toronto",43.671386,-79.390168,The Yorkville Club,43.671636,-79.395015,Gym
3660,"Yorkville, Toronto",43.671386,-79.390168,Sofia,43.670592,-79.391849,Italian Restaurant
3661,"Yorkville, Toronto",43.671386,-79.390168,The One Eighty,43.668575,-79.388210,American Restaurant
3662,"Yorkville, Toronto",43.671386,-79.390168,Whole Hearth Bakery & Cafe,43.672005,-79.395405,Bakery


In [68]:
#print(Toronto_venues.shape)
Toronto_venues.groupby('Neighborhood').count()


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Agincourt, Toronto",13,13,13,13,13,13
"Alderwood, Toronto",8,8,8,8,8,8
"Alexandra Park, Toronto",100,100,100,100,100,100
"Allenby, Toronto",4,4,4,4,4,4
"Amesbury, Toronto",6,6,6,6,6,6
"Armour Heights, Toronto",1,1,1,1,1,1
"Banbury, Toronto",4,4,4,4,4,4
"Bathurst Manor, Toronto",72,72,72,72,72,72
"Bay Street Corridor, Toronto",100,100,100,100,100,100
"Bayview Village, Toronto",11,11,11,11,11,11


#### 2. Apply one-hot encoding

In [133]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

Toronto_onehot = Toronto_onehot.drop(['Neighborhood'], axis=1)

# add neighborhood column back to dataframe
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood'] 


# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Terminal,American Restaurant,Animal Shelter,Antique Shop,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Agincourt, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Alexandra Park, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,...,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01
3,"Allenby, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Amesbury, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Armour Heights, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Banbury, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Bathurst Manor, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.013889,0.0,0.0,...,0.0,0.027778,0.013889,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Bay Street Corridor, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01
9,"Bayview Village, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [230]:
num_top_venues = 5

for hood in Toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

# Sort the venues in descending order 
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]


# ==================================================================================================================


num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Toronto_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
Toronto_neighborhoods_venues_sorted['Neighborhood'] = Toronto_grouped['Neighborhood']
for ind in np.arange(Toronto_grouped.shape[0]):
    Toronto_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

Toronto_neighborhoods_venues_sorted.head()

----Agincourt, Toronto----
                   venue  freq
0     Chinese Restaurant  0.15
1       Asian Restaurant  0.15
2      Korean Restaurant  0.08
3  Vietnamese Restaurant  0.08
4             Restaurant  0.08


----Alderwood, Toronto----
         venue  freq
0  Pizza Place  0.25
1  Coffee Shop  0.12
2          Gym  0.12
3          Pub  0.12
4     Pharmacy  0.12


----Alexandra Park, Toronto----
                    venue  freq
0                     Bar  0.10
1                    Café  0.05
2    Caribbean Restaurant  0.05
3  Furniture / Home Store  0.05
4     Arts & Crafts Store  0.02


----Allenby, Toronto----
            venue  freq
0    Skating Rink  0.25
1  Sandwich Place  0.25
2       Bookstore  0.25
3    Tennis Court  0.25
4             ATM  0.00


----Amesbury, Toronto----
                venue  freq
0                Bank  0.17
1                Park  0.17
2  Athletics & Sports  0.17
3        Intersection  0.17
4         Coffee Shop  0.17


----Armour Heights, Toronto----
     

4                Diner  0.14


----Humber Summit, Toronto----
                        venue  freq
0  Construction & Landscaping   0.2
1                  Restaurant   0.2
2                      Bakery   0.2
3                        Park   0.2
4                         Gym   0.2


----Humber Valley Village, Toronto----
               venue  freq
0  Convenience Store   0.2
1           Bus Stop   0.2
2             Bakery   0.2
3               Park   0.2
4       Skating Rink   0.2


----Humberlea, Toronto----
                venue  freq
0   Convenience Store   0.2
1        Home Service   0.2
2      Baseball Field   0.2
3         Gas Station   0.2
4  Italian Restaurant   0.2


----Humberwood, Toronto----
               venue  freq
0   Business Service  0.33
1  Mobile Phone Shop  0.33
2        Golf Course  0.33
3         Nail Salon  0.00
4        Opera House  0.00


----Humewood–Cedarvale, Toronto----
               venue  freq
0  Convenience Store   0.2
1              Trail   0.2
2     Socce

                 venue  freq
0                Beach  0.07
1               Bakery  0.04
2      Thai Restaurant  0.04
3          Pizza Place  0.04
4  Japanese Restaurant  0.04


----The Danforth, Toronto----
                        venue  freq
0               Grocery Store  0.07
1                 Pizza Place  0.07
2                    Bus Line  0.07
3                 Coffee Shop  0.07
4  Construction & Landscaping  0.03


----The Elms, Toronto----
           venue  freq
0  Grocery Store   0.2
1   Skating Rink   0.2
2    Pizza Place   0.2
3   Soccer Field   0.2
4    Gas Station   0.2


----The Junction, Toronto----
                venue  freq
0     Thai Restaurant  0.05
1  Italian Restaurant  0.05
2                Café  0.05
3         Coffee Shop  0.05
4                 Bar  0.05


----The Kingsway, Toronto----
                venue  freq
0         Coffee Shop  0.09
1                Bank  0.06
2  Italian Restaurant  0.06
3    Sushi Restaurant  0.06
4      Breakfast Spot  0.06


----The Qu

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Agincourt, Toronto",Asian Restaurant,Chinese Restaurant,Food Court,Peking Duck Restaurant,Restaurant,Cantonese Restaurant,Train Station,Korean Restaurant,Coffee Shop,Hong Kong Restaurant
1,"Alderwood, Toronto",Pizza Place,Pharmacy,Coffee Shop,Sandwich Place,Pool,Pub,Gym,Yoga Studio,Ethiopian Restaurant,Doner Restaurant
2,"Alexandra Park, Toronto",Bar,Café,Furniture / Home Store,Caribbean Restaurant,Gym / Fitness Center,Arepa Restaurant,Boutique,Poutine Place,Pizza Place,Bakery
3,"Allenby, Toronto",Tennis Court,Bookstore,Sandwich Place,Skating Rink,Yoga Studio,Falafel Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
4,"Amesbury, Toronto",Intersection,Athletics & Sports,Coffee Shop,Bank,Gas Station,Park,Farmers Market,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant


### Implement k-means clusterning to develop activity based neighborhood clusters

In [461]:
#wiki_Toronto_dem
Toronto_grouped

# set number of clusters
kclusters = 5

Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

# Comment the following line of code if running this code for the second time
Toronto_neighborhoods_venues_sorted = Toronto_neighborhoods_venues_sorted.drop(['Cluster Labels'], axis=1) 
Toronto_neighborhoods_venues_sorted


# add clustering labels
Toronto_neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
Toronto_neighborhoods_venues_sorted.head()
Toronto_merged = wiki_Toronto_dem

Toronto_merged = Toronto_merged.rename(columns = {'Name':'Neighborhood'})
Toronto_merged = Toronto_merged.sort_values(['Neighborhood'], ascending=True).reset_index(drop=True)
Toronto_merged

# merge manhattan_grouped with Toronto_merged data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(Toronto_neighborhoods_venues_sorted.set_index('Neighborhood'), on='Name-ccat')

Toronto_merged.head() # check the last columns!
Toronto_merged.rename(columns = {'FM':'Borough'})

Toronto_merged = Toronto_merged.dropna().reset_index(drop=True)

In [462]:
# create map
map_clusters = folium.Map(location=[Toronto_latitude, Toronto_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'],
                                  Toronto_merged['Neighborhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)],
        fill=True,
        fill_color=rainbow[int(cluster)],
        fill_opacity=0.7).add_to(map_clusters)
    
map_clusters


In [404]:
Toronto_merged.shape

(166, 29)

In [463]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(20, Toronto_merged.shape[1]))]]

Unnamed: 0,Name-ccat,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,"Amesbury, Toronto",Athletics & Sports,Coffee Shop,Bank,Gas Station,Park,Farmers Market,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
6,"Banbury, Toronto",Tennis Court,Fast Food Restaurant,Park,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
15,"Bracondale Hill, Toronto",Coffee Shop,Bakery,Bar,Art Gallery,Café,Fish Market,Fish & Chips Shop,Filipino Restaurant,Field
20,"Caribou Park, Toronto",Gourmet Shop,Photography Studio,Coffee Shop,Pharmacy,Yoga Studio,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
21,"Carleton Village, Toronto",Coffee Shop,Jewelry Store,Pet Store,Dog Run,Yoga Studio,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
32,"Crescent Town, Toronto",Convenience Store,Park,Golf Course,Falafel Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
33,"Davenport, Toronto",Dog Run,Convenience Store,Coffee Shop,Pet Store,Music Venue,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
51,"Forest Hill, Toronto",Bank,Arts & Crafts Store,Park,Yoga Studio,Falafel Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
62,"Henry Farm, Toronto",Tennis Court,Restaurant,Intersection,Yoga Studio,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
63,"High Park North, Toronto",Tennis Court,Convenience Store,Gym / Fitness Center,Baseball Field,Yoga Studio,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant


In [455]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(20, Toronto_merged.shape[1]))]]

Unnamed: 0,Name-ccat,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Agincourt, Toronto",Chinese Restaurant,Food Court,Peking Duck Restaurant,Restaurant,Cantonese Restaurant,Train Station,Korean Restaurant,Coffee Shop,Hong Kong Restaurant
1,"Alderwood, Toronto",Pharmacy,Coffee Shop,Sandwich Place,Pool,Pub,Gym,Yoga Studio,Ethiopian Restaurant,Doner Restaurant
2,"Alexandra Park, Toronto",Café,Furniture / Home Store,Caribbean Restaurant,Gym / Fitness Center,Arepa Restaurant,Boutique,Poutine Place,Pizza Place,Bakery
3,"Allenby, Toronto",Bookstore,Sandwich Place,Skating Rink,Yoga Studio,Falafel Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
7,"Bathurst Manor, Toronto",Grocery Store,Coffee Shop,Ice Cream Shop,Café,Bakery,Eastern European Restaurant,Video Store,Mexican Restaurant,Middle Eastern Restaurant
8,"Bay Street Corridor, Toronto",Coffee Shop,Hotel,French Restaurant,Sushi Restaurant,Japanese Restaurant,Mediterranean Restaurant,Spa,Café,Italian Restaurant
9,"Bayview Village, Toronto",Breakfast Spot,Fish Market,Pizza Place,Sandwich Place,Sporting Goods Shop,Persian Restaurant,Fast Food Restaurant,Gas Station,Outdoor Supply Store
10,"Bayview Woods – Steeles, Toronto",Farmers Market,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Fast Food Restaurant
11,"Bedford Park, Toronto",Yoga Studio,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
12,"Bendale, Toronto",Fast Food Restaurant,Tennis Court,Grocery Store,Department Store,Dog Run,Filipino Restaurant,Field,Fish & Chips Shop,Farmers Market
