# Lab Segmenting and Clustering Neighborhoods in the city of Toronto, Canada
## by Christian Cisne

### Import dataset

In [1]:
import pandas as pd

### Load dataset

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
rawdata = pd.read_html(url)

In [3]:
rawdata[0].head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## Clean dataset

In [4]:
#Create a copy of raw dataset
df_toronto = rawdata[0].copy()

#Ignore cells with a borough that is Not assigned.
df_toronto.drop(df_toronto.loc[df_toronto['Borough']=="Not assigned"].index, inplace=True)

#If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
df_toronto['Neighborhood'].replace(to_replace=['Not assigned'], value=[df_toronto['Borough']], inplace=True)

#Reset dataset index
df_toronto.reset_index(inplace=True)
df_toronto.drop(['index'], axis=1, inplace=True)

df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
df_toronto.shape

(103, 3)

### Get latitude and longitude

In [6]:
!pip install geocoder



In [7]:
import geocoder # import geocoder

In [8]:
#Example easy not work. This library not work.
g = geocoder.google('Mountain View, CA')
g.latlng

### Load Geospatial_Coordinates.csv

In [9]:
!wget -O Geospatial_Coordinates.csv https://cocl.us/Geospatial_data

--2020-05-13 02:03:56--  https://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 169.48.113.194, 158.85.108.86, 158.85.108.83
Connecting to cocl.us (cocl.us)|169.48.113.194|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2020-05-13 02:03:59--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 107.152.26.197
Connecting to ibm.box.com (ibm.box.com)|107.152.26.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2020-05-13 02:03:59--  https://ibm.box.com/public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Reusing existing connection to ibm.box.com:443.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.ent.box.com/public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr

In [10]:
df_geo = pd.read_csv('Geospatial_Coordinates.csv')

In [11]:
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
#Merge Postal Code and latitude longitude
df_toronto_geo = df_toronto.join(df_geo.set_index('Postal Code'), on = 'Postal Code')
df_toronto_geo.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
