# Segmenting and Clustering Neighborhoods in Toronto

#### Scraping the data from the Wikipedia and creating neighborhood table 

In [68]:
!pip install beautifulsoup4 #install beautifulsoup4



In [69]:
!pip install lxml



In [70]:
from bs4 import BeautifulSoup
import requests

import numpy as np
import pandas as pd
import lxml.html as lh
from geopy.geocoders import Nominatim

In [71]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

In [72]:
soup = BeautifulSoup(url.content, 'html.parser')

In [73]:
n_table = soup.find('table', class_='wikitable')
df = pd.read_html(str(n_table))
T_df = pd.DataFrame(df[0])

In [74]:
print(T_df.shape)
T_df.head(10)

(287, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
8,M8A,Not assigned,Not assigned
9,M9A,Queen's Park,Not assigned


In [75]:
dr_df = T_df.drop(T_df[T_df['Borough']=='Not assigned'].index)

In [76]:
dr_df.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Not assigned
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


In [77]:
dr_df[dr_df['Neighbourhood']=='Not assigned']

Unnamed: 0,Postcode,Borough,Neighbourhood
9,M9A,Queen's Park,Not assigned


In [78]:
n_df = dr_df.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(list)

In [79]:
n_df

Postcode  Borough    
M1B       Scarborough                                     [Rouge, Malvern]
M1C       Scarborough             [Highland Creek, Rouge Hill, Port Union]
M1E       Scarborough                  [Guildwood, Morningside, West Hill]
M1G       Scarborough                                             [Woburn]
M1H       Scarborough                                          [Cedarbrae]
                                               ...                        
M9N       York                                                    [Weston]
M9P       Etobicoke                                            [Westmount]
M9R       Etobicoke      [Kingsview Village, Martin Grove Gardens, Rich...
M9V       Etobicoke      [Albion Gardens, Beaumond Heights, Humbergate,...
M9W       Etobicoke                                            [Northwest]
Name: Neighbourhood, Length: 103, dtype: object

In [80]:
n_df = n_df.reset_index()
n_df['Neighbourhood'] = n_df['Neighbourhood'].str.join(', ')
n_df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


In [81]:
#n_df[n_df[n_df['Neighbourhood']=='Not assigned'].index]
n_df.loc[(n_df.Neighbourhood=='Not assigned'), 'Neighbourhood'] = n_df.loc[(n_df.Neighbourhood=='Not assigned'), 'Borough']

In [82]:
#!pip install geocoder

Tried to use geocode but fail to pull the latitude and longitude. Decide to use the .csv data file.

In [83]:
'''
import geocoder

lat = []
lng = []

for postcode in n_df['Postcode']:
    lat_lng_coords = None
    print(postcode)
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postcode))
        lat_lng_coords = g.latlng
        
    lat.append(lat_lng_coords[0])
    lng.append(lat_lng_coords[1])
''' 
        

"\nimport geocoder\n\nlat = []\nlng = []\n\nfor postcode in n_df['Postcode']:\n    lat_lng_coords = None\n    print(postcode)\n    while(lat_lng_coords is None):\n        g = geocoder.google('{}, Toronto, Ontario'.format(postcode))\n        lat_lng_coords = g.latlng\n        \n    lat.append(lat_lng_coords[0])\n    lng.append(lat_lng_coords[1])\n"

Read csv file to get coordinates and use join function to combine the dataframes

In [84]:
n_df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


In [85]:
coord_df = pd.read_csv('Geospatial_Coordinates.csv')
coord_df.rename(columns={"Postal Code": "Postcode"}, inplace=True)
j_df = n_df
j_df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


In [86]:
tr_neigh = j_df.join(coord_df.set_index('Postcode'), on='Postcode')

Count the number of the total borough and neighborhoods in the data.

In [87]:
print('The dataframe has {} borough and {} neighborhoods.'.format(len(tr_neigh['Borough'].unique()),tr_neigh.shape[0]))

The dataframe has 11 borough and 103 neighborhoods.


#### Use geopy library to get the latitude and longitude valuese of the neighborhoods in Toronto

In [88]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="tr_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


#### Import and use folium and visualize at Toronto and its neighborhood in map.

In [89]:
import folium
print('folium imported')

folium imported


In [90]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=9)

# add neighborhood marker to map
for lat, lng, borough, neighborhood in zip(tr_neigh['Latitude'], tr_neigh['Longitude'], tr_neigh['Borough'], tr_neigh['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#9dc209',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto

#### Explore neighborhood using Foursquare API

In [91]:
CLIENT_ID = 'ODEVQ3PCQWU20LY3DMWPYE1M5IB1XBSF1EXYTNEBY3U20YXW' # your Foursquare ID
CLIENT_SECRET = 'NQSMVMMDYA13YF2BGEGWRX4QHUUALONGFCZEWD3LUCFYPUGU' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ODEVQ3PCQWU20LY3DMWPYE1M5IB1XBSF1EXYTNEBY3U20YXW
CLIENT_SECRET:NQSMVMMDYA13YF2BGEGWRX4QHUUALONGFCZEWD3LUCFYPUGU


##### Pick One of the neigborhood in Downtown and explore

In [109]:
tr_neigh.loc[tr_neigh['Borough']=="Downtown Toronto"]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
50,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
51,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675
52,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
53,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
54,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
55,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
56,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
57,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
58,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
59,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752


In [110]:
example_neighbor = tr_neigh.loc[68, 'Neighbourhood']
example_neighbor

'CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara'

In [113]:
ex_nb_lat = tr_neigh.loc[68, 'Latitude']
ex_nb_lon = tr_neigh.loc[68, 'Longitude']

print('Latitude and longitude values of {} are {}, {}'.format(
    example_neighbor, 
    ex_nb_lat, 
    ex_nb_lon))

Latitude and longitude values of CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara are 43.6289467, -79.3944199


In [None]:
LIMIT = 100
radius = 500

url='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.\
format(
    CLIENT_ID,
    CLIENT_SECRET,
    VERSION,
    neighborhood_latitude,
    neighborhood_longitude,
    radius,
    LIMIT)
url