# Part 1: Scrape Wikipedia Page

## Scrape the table from Wikipedia page using Pandas

In [74]:
import pandas as pd

In [75]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df = pd.read_html(url)
df = df[0]
header = df.iloc[0]
df = df[1:]
df.columns = header
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## Clean scraped data

In [76]:
# Limit rows to borough that are assigned
df = df[df['Borough']!='Not assigned']

In [77]:
# Collapse duplicate postal code area
df.drop_duplicates(['Postal Code'], keep='first', inplace=True)
df1 = df.groupby(['Postal Code'])['Neighbourhood'].apply(', '.join).reset_index()
df = df.drop(['Neighbourhood'], axis=1)
df = df.merge(df1, on=['Postal Code'])

In [78]:
# Let neighborhood that is not assigned be equal to the borough
import numpy as np
df['Neighbourhood'] = np.where(df['Neighbourhood']=='Not assigned',df['Borough'],df['Neighbourhood'])
df.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


## Print number of rows

In [79]:
df.shape

(103, 3)

# Part 2: Get the latitude and the longitude coordiates of each neighborhood

In [85]:
# read in geographical coordinate csv file
df_geo = pd.read_csv(r"..\Geospatial_Coordinates.csv")

In [81]:
# merge into the df
df = df.merge(df_geo, how='left', on=['Postal Code'])

In [82]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
