# Neighborhoods in Toronto

In [128]:
#Import the needed packages:
#!pip install wikipedia
#!conda install -c conda-forge geopy --yes
#!conda install -c conda-forge folium=0.5.0 --yes
import pandas as pd
import wikipedia as wp
import io
import requests
from bs4 import BeautifulSoup
import json
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
import numpy as np

Part 1: Pull in data from wikipedia and create a table from it.

In [54]:
#Pull in the wiki page.
html = wp.page("List of postal codes of Canada: M").html().encode("UTF-8")
df = pd.read_html(html, header = 0)[0]

df = df[df.Borough != 'Not assigned']

# Group Neighborhoods under one Borough
df = df.groupby(['Postal code', 'Borough'])['Neighborhood'].apply(list).apply(lambda x:', '.join(x)).to_frame().reset_index()

for index, row in df.iterrows():
    if row['Neighborhood'] == 'Not assigned':
        row['Neighborhood'] = row['Borough']

df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [55]:
df.shape

(103, 3)

Part 2: Integrate the geocoder data into the dataframe.

In [56]:
#Pull in geospacial data.
url="https://cocl.us/Geospatial_data"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))
dfc=df.join(c.set_index('Postal Code'), on='Postal code')
dfc.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


Part 3: Visualize the neighborhoods and how they cluster together.

In [57]:
address='Toronto'
geolocator=Nominatim(user_agent="ny_explorer")
loc=geolocator.geocode(address)
latitude=loc.latitude
longitude=loc.longitude
map_tor=folium.Map(location=[latitude,longitude], zoom_start=10)
for lat,lng,borough,neighborhood in zip(dfc['Latitude'],dfc['Longitude'],dfc['Borough'],dfc['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    lable = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat,lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7,
        parse_html=False).add_to(map_tor)
    
map_tor

In [58]:
CLIENT_ID = 'OP2B2VRR31XUUOI4UH4D0UUPRFOKYMX41ZSOT3IEARQGNORB' # your Foursquare ID
CLIENT_SECRET = 'PGCLF1ZWRFFO4Y40SECHTSXS4Z4DGY3PI3YPINPS3FAAV2ZC' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 100
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
radius=500

url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitudes, longitudes, VERSION, radius, LIMIT)
url

Your credentails:
CLIENT_ID: OP2B2VRR31XUUOI4UH4D0UUPRFOKYMX41ZSOT3IEARQGNORB
CLIENT_SECRET:PGCLF1ZWRFFO4Y40SECHTSXS4Z4DGY3PI3YPINPS3FAAV2ZC


'https://api.foursquare.com/v2/venues/search?client_id=OP2B2VRR31XUUOI4UH4D0UUPRFOKYMX41ZSOT3IEARQGNORB&client_secret=PGCLF1ZWRFFO4Y40SECHTSXS4Z4DGY3PI3YPINPS3FAAV2ZC&ll=43.6534817,-79.3839347&v=20180604&radius=500&limit=100'

In [59]:
def get_category_type(row):
    try:
        categories_list=row['categories']
    except:
        categories_list=row['venue.categories']
    if len(categories_list)==0:
        return None
    else:
        return categories_list[0]['name']
results=requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ea0c2280de0d90028a04ed1'},
 'response': {'venues': [{'id': '4c093ee0340720a153728493',
    'name': 'City Hall Council Chambers',
    'location': {'lat': 43.65182710471462,
     'lng': -79.38394893163043,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.65182710471462,
       'lng': -79.38394893163043}],
     'distance': 184,
     'cc': 'CA',
     'country': 'Canada',
     'formattedAddress': ['Canada']},
    'categories': [{'id': '4bf58dd8d48988d129941735',
      'name': 'City Hall',
      'pluralName': 'City Halls',
      'shortName': 'City Hall',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/cityhall_',
       'suffix': '.png'},
      'primary': True}],
    'referralId': 'v-1587593784',
    'hasPerk': False},
   {'id': '4ad4c05ef964a5208ff620e3',
    'name': 'Toronto City Hall',
    'location': {'address': '100 Queen St. W.',
     'crossStreet': 'at Bay St.',
     'lat': 43.65313989695342,
     'lng': -79.

In [60]:
venues=results['response']['venues']
df2=json_normalize(venues)
df2.head()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.postalCode,location.state,name,referralId
0,"[{'id': '4bf58dd8d48988d129941735', 'name': 'C...",False,4c093ee0340720a153728493,,CA,,Canada,,184,[Canada],"[{'label': 'display', 'lat': 43.65182710471462...",43.651827,-79.383949,,,City Hall Council Chambers,v-1587593784
1,"[{'id': '4bf58dd8d48988d129941735', 'name': 'C...",False,4ad4c05ef964a5208ff620e3,100 Queen St. W.,CA,Toronto,Canada,at Bay St.,38,"[100 Queen St. W. (at Bay St.), Toronto ON M5H...","[{'label': 'display', 'lat': 43.65313989695342...",43.65314,-79.383967,M5H 2N2,ON,Toronto City Hall,v-1587593784
2,"[{'id': '4bf58dd8d48988d176941735', 'name': 'G...",False,50885719498ea7b5aab3a74c,483 Bay St,CA,Toronto,Canada,,130,"[483 Bay St, Toronto ON M5G 2C9, Canada]","[{'label': 'display', 'lat': 43.653436, 'lng':...",43.653436,-79.382314,M5G 2C9,ON,GoodLife Fitness Toronto Bell Trinity Centre,v-1587593784
3,"[{'id': '4bf58dd8d48988d129941735', 'name': 'C...",False,5b193c42598e64002ca79b96,100 Queen St W,CA,Toronto,Canada,,3,"[100 Queen St W, Toronto ON M5H 2N2, Canada]","[{'label': 'display', 'lat': 43.653454, 'lng':...",43.653454,-79.383952,M5H 2N2,ON,City of Toronto Civic Innovation Office,v-1587593784
4,"[{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...",False,4b06ef4cf964a52072f322e3,100 Queen St W,CA,Toronto,Canada,,105,"[100 Queen St W, Toronto ON M5H 2N2, Canada]","[{'label': 'display', 'lat': 43.65263298456729...",43.652633,-79.383361,M5H 2N2,ON,Cafe On The Square,v-1587593784


In [61]:
df2.shape

(65, 17)

In [82]:
filtered_columns=['name','categories']+[col for col in df2.columns if col.startswith('location.')]+['id']
df_clean=df2.loc[:, filtered_columns]

def get_category_type(row):
    try:
        categories_list=row['categories']
    except:
        categories_list=row['venue.categories']
    if len(categories_list)==0:
        return None
    else:
        return categories_list[0]['name']
    
df_clean['categories']=df_clean.apply(get_category_type, axis=1)
df_clean.columns=[column.split('.')[-1] for column in df_clean.columns]
df_clean.head()

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
0,City Hall Council Chambers,City Hall,,CA,,Canada,,184,[Canada],"[{'label': 'display', 'lat': 43.65182710471462...",43.651827,-79.383949,,,4c093ee0340720a153728493
1,Toronto City Hall,City Hall,100 Queen St. W.,CA,Toronto,Canada,at Bay St.,38,"[100 Queen St. W. (at Bay St.), Toronto ON M5H...","[{'label': 'display', 'lat': 43.65313989695342...",43.65314,-79.383967,M5H 2N2,ON,4ad4c05ef964a5208ff620e3
2,GoodLife Fitness Toronto Bell Trinity Centre,Gym,483 Bay St,CA,Toronto,Canada,,130,"[483 Bay St, Toronto ON M5G 2C9, Canada]","[{'label': 'display', 'lat': 43.653436, 'lng':...",43.653436,-79.382314,M5G 2C9,ON,50885719498ea7b5aab3a74c
3,City of Toronto Civic Innovation Office,City Hall,100 Queen St W,CA,Toronto,Canada,,3,"[100 Queen St W, Toronto ON M5H 2N2, Canada]","[{'label': 'display', 'lat': 43.653454, 'lng':...",43.653454,-79.383952,M5H 2N2,ON,5b193c42598e64002ca79b96
4,Cafe On The Square,Restaurant,100 Queen St W,CA,Toronto,Canada,,105,"[100 Queen St W, Toronto ON M5H 2N2, Canada]","[{'label': 'display', 'lat': 43.65263298456729...",43.652633,-79.383361,M5H 2N2,ON,4b06ef4cf964a52072f322e3


In [84]:
df_clean.shape

(65, 15)

In [107]:
def NearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [108]:
tor_ven=NearbyVenues(names=dfc['Neighborhood'], latitudes=dfc['Latitude'], longitudes=dfc['Longitude'])

tor_ven.head()

Malvern / Rouge
Rouge Hill / Port Union / Highland Creek
Guildwood / Morningside / West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park / Ionview / East Birchmount Park
Golden Mile / Clairlea / Oakridge
Cliffside / Cliffcrest / Scarborough Village West
Birch Cliff / Cliffside West
Dorset Park / Wexford Heights / Scarborough Town Centre
Wexford / Maryvale
Agincourt
Clarks Corners / Tam O'Shanter / Sullivan
Milliken / Agincourt North / Steeles East / L'Amoreaux East
Steeles West / L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview / Henry Farm / Oriole
Bayview Village
York Mills / Silver Hills
Willowdale / Newtonbrook
Willowdale
York Mills West
Willowdale
Parkwoods
Don Mills
Don Mills
Bathurst Manor / Wilson Heights / Downsview North
Northwood Park / York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Parkview Hill / Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West / Riverdale
India Bazaar / The Beaches 

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Malvern / Rouge,43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course
2,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,Guildwood / Morningside / West Hill,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,Guildwood / Morningside / West Hill,43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant


In [109]:
tor_ven.shape

(2115, 7)

In [110]:
tor_ven.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
Alderwood / Long Branch,8,8,8,8,8,8
Bathurst Manor / Wilson Heights / Downsview North,21,21,21,21,21,21
Bayview Village,4,4,4,4,4,4
Bedford Park / Lawrence Manor East,22,22,22,22,22,22
Berczy Park,58,58,58,58,58,58
Birch Cliff / Cliffside West,4,4,4,4,4,4
Brockton / Parkdale Village / Exhibition Place,22,22,22,22,22,22
Business reply mail Processing CentrE,17,17,17,17,17,17
CN Tower / King and Spadina / Railway Lands / Harbourfront West / Bathurst Quay / South Niagara / Island airport,16,16,16,16,16,16


In [111]:
len(tor_ven['Venue Category'].unique())

267

In [113]:
tor_onehot=pd.get_dummies(tor_ven[['Venue Category']],prefix="",prefix_sep="")
tor_onehot['Neighborhood']=tor_ven['Neighborhood']
fixed_columns=[tor_onehot.columns[-1]]+list(tor_onehot.columns[:-1])
mh_onehot=tor_onehot[fixed_columns]
tor_onehot.head()

Unnamed: 0,Accessories Store,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [114]:
tor_group=tor_onehot.groupby('Neighborhood').mean().reset_index()
tor_group.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Alderwood / Long Branch,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bathurst Manor / Wilson Heights / Downsview North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Bedford Park / Lawrence Manor East,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [115]:
num_topven=5
for hood in tor_group['Neighborhood']:
    print("----"+hood+"----")
    temp=tor_group[tor_group['Neighborhood']==hood].T.reset_index()
    temp.columns=['venue','freq']
    temp=temp.iloc[1:]
    temp['freq']=temp['freq'].astype(float)
    temp=temp.round({'freq': 2})
    print(temp.sort_values('freq',ascending=False).reset_index(drop=True).head(num_topven))
    print('\n')

----Agincourt----
                       venue  freq
0                     Lounge  0.25
1             Breakfast Spot  0.25
2  Latin American Restaurant  0.25
3         Chinese Restaurant  0.25
4         Mexican Restaurant  0.00


----Alderwood / Long Branch----
            venue  freq
0     Pizza Place  0.25
1        Pharmacy  0.12
2             Gym  0.12
3  Sandwich Place  0.12
4    Skating Rink  0.12


----Bathurst Manor / Wilson Heights / Downsview North----
            venue  freq
0            Bank  0.10
1     Coffee Shop  0.10
2  Ice Cream Shop  0.05
3   Shopping Mall  0.05
4  Sandwich Place  0.05


----Bayview Village----
                 venue  freq
0                 Café  0.25
1                 Bank  0.25
2   Chinese Restaurant  0.25
3  Japanese Restaurant  0.25
4  Monument / Landmark  0.00


----Bedford Park / Lawrence Manor East----
                venue  freq
0      Sandwich Place  0.09
1         Coffee Shop  0.09
2  Italian Restaurant  0.09
3          Restaurant  0.09
4    

In [119]:
def mostcomven(row, num_topven):
    row_categories=row.iloc[1:]
    row_categories_sorted=row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_topven]

indicators =['st','nd','rd']
columns=['Neighborhood']
for ind in np.arange(num_topven):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1,indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
        
neighvensort=pd.DataFrame(columns=columns)
neighvensort['Neighborhood']=tor_group['Neighborhood']

for ind in np.arange(tor_group.shape[0]):
    neighvensort.iloc[ind,1:]=mostcomven(tor_group.iloc[ind, :],num_topven)
    
neighvensort.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Agincourt,Latin American Restaurant,Lounge,Breakfast Spot,Chinese Restaurant,Drugstore
1,Alderwood / Long Branch,Pizza Place,Gym,Coffee Shop,Pharmacy,Skating Rink
2,Bathurst Manor / Wilson Heights / Downsview North,Coffee Shop,Bank,Frozen Yogurt Shop,Bridal Shop,Sandwich Place
3,Bayview Village,Japanese Restaurant,Café,Bank,Chinese Restaurant,Discount Store
4,Bedford Park / Lawrence Manor East,Coffee Shop,Restaurant,Sandwich Place,Italian Restaurant,Juice Bar


In [131]:
kcl=2

tor_groupcl=tor_group.drop('Neighborhood',1)

kmean=KMeans(n_clusters=kcl,random_state=0).fit(tor_groupcl)

kmean.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [132]:
neighvensort.insert(0, 'ClusterLabels', kmean.labels_)
tor_merge=dfc
tor_merge=tor_merge.join(neighvensort.set_index('Neighborhood'), on='Neighborhood')
tor_merge=tor_merge.dropna()
tor_merge.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353,Fast Food Restaurant,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497,Golf Course,Bar,Yoga Studio,Drugstore,Discount Store
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711,Medical Center,Intersection,Rental Car Location,Breakfast Spot,Electronics Store
3,M1G,Scarborough,Woburn,43.770992,-79.216917,Coffee Shop,Korean Restaurant,Convenience Store,Yoga Studio,Drugstore
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,Caribbean Restaurant,Bakery,Fried Chicken Joint,Thai Restaurant,Athletics & Sports


In [129]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kcl)
ys = [i + x + (i*x)**2 for i in range(kcl)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tor_merge['Latitude'], tor_merge['Longitude'], tor_merge['Neighborhood'], tor_merge['ClusterLabels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='red',
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

KeyError: 'ClusterLabels'

In [None]:
tor_merge.loc[tor_merge['ClusterLabels']==1,tor_merge.columns[[1]+list(range(5,tor_merge.shape[1]))]]