# Segmenting and Clustering Neighborhoods in Toronto.ipynb

In [25]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


#### 1. Scrape the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

#### Scraping the wikipedia site 

In [26]:
import requests
import pandas as pd 
from bs4 import BeautifulSoup

URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

res = requests.get(URL).text
soup = BeautifulSoup(res,'lxml')
# headers
headers = []
for head in soup.find('table', class_='wikitable').find_all('th')[0::1]:
    headers.append(head.get_text()[:-1])


right_table=soup.find('table', class_='wikitable sortable')

postals = []
borough = []
neignbourhood = []

for row in right_table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)==3:
        postals.append(cells[0].find(text=True)[:-1])
        borough.append(cells[1].find(text=True)[:-1])
        neignbourhood.append(cells[2].find(text=True)[:-1])


#### 2. The Dataset

In [27]:
# The Dataframe
df = pd.DataFrame(postals,columns=['Postal Code'])
df['Borough'] = borough
df['Neighbourhood'] = neignbourhood
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


#### 3. cleaning the dataset(Ignoring cells with a borough that is Not assigned.)

In [28]:
data = df[df['Borough'] != 'Not assigned']
data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


#### 4. The Shape of the dataset

In [29]:
data.shape

(103, 3)

#### 5. Getting the logitude and latitude through the 'Geospatial_Coordinates.csv' provided.

- I was having issues with geocoder that's why i used the csv file

In [30]:
geospatial_coordinates = pd.read_csv('Geospatial_Coordinates.csv')
geospatial_coordinates.shape

(103, 3)

In [31]:
geospatial_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


##### 5.1 I merged the two dataset together using the Postal Code

In [32]:
new_data = pd.merge(data,geospatial_coordinates, on='Postal Code', how='inner')
new_data.shape

(103, 5)

In [33]:
new_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


#### 6. Generated maps to visualize the neighborhoods and how they are cluster together.

In [49]:
east_toronto_data = new_data[new_data['Borough'] == 'East Toronto'] 
downtown_toronto_data = new_data[new_data['Borough'] == 'Downtown Toronto'] 
west_toronto_data = new_data[new_data['Borough'] == 'West Toronto']


#### East Toronto clustering

In [64]:
east_toronto_data.head(2)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188


In [66]:
address = 'East Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location_1 = geolocator.geocode(address)
latitude_1 = location.latitude
longitude_1 = location.longitude
print('The geograpical coordinate of East Toronto are {}, {}.'.format(latitude_1, longitude_1))

# create map of Manhattan using latitude and longitude values
east_map = folium.Map(location=[latitude_1, longitude_1], zoom_start=11)

# add markers to map
for lat, lng, label in zip(east_toronto_data['Latitude'], east_toronto_data['Longitude'], east_toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(east_map)  
    
east_map

The geograpical coordinate of East Toronto are 43.6534817, -79.3839347.



#### West Toronto clustering

In [61]:
west_toronto_data.head(2)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259
37,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975


In [67]:
address = 'West Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location_2 = geolocator.geocode(address)
latitude_2 = location.latitude
longitude_2 = location.longitude
print('The geograpical coordinate of West Toronto are {}, {}.'.format(latitude_2, longitude_2))

# create map of Manhattan using latitude and longitude values
west_map = folium.Map(location=[latitude_2, longitude_2], zoom_start=11)

# add markers to map
for lat, lng, label in zip(west_toronto_data['Latitude'], west_toronto_data['Longitude'], west_toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(west_map)  
    
west_map

The geograpical coordinate of West Toronto are 43.6534817, -79.3839347.



#### Downtown Toronto clustering

In [60]:
downtown_toronto_data.head(2)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [68]:
address = 'Downtown Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location_3 = geolocator.geocode(address)
latitude_3 = location.latitude
longitude_3 = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude_3, longitude_3))

# create map of Manhattan using latitude and longitude values
downtown_map = folium.Map(location=[latitude_3, longitude_3], zoom_start=11)

# add markers to map
for lat, lng, label in zip(downtown_toronto_data['Latitude'], downtown_toronto_data['Longitude'], downtown_toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(downtown_map)  
    
downtown_map

The geograpical coordinate of Downtown Toronto are 43.6534817, -79.3839347.
