#### Do all of our imports to set up

In [1]:
import pandas as pd
import numpy as np

#### Download the dataframe directly from the wiki page

In [2]:
df1_array=pd.read_html(io='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M',match='Postcode',skiprows=1)
df1=df1_array[0]

#### Rename the columns

In [3]:
df1.columns=['PostalCode','Borough','Neighborhood']
df1.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


#### Remove rows that have Not assigned as a Borough

In [4]:
df2 = df1[df1.Borough != 'Not assigned']

#### Assign Borough value to Neighborhood if it is Not assigned

In [6]:
df2.loc[:,'Neighborhood'] = df2.Borough.where(df2.Neighborhood == 'Not assigned',df2.Neighborhood)
df2.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


#### Merge Neighborhoods with same PostalCode

In [7]:
df3 = df2.groupby('PostalCode',as_index=False).agg(lambda x: ', '.join(set(x)))
df3.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Morningside, Guildwood, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [8]:
df3.shape

(103, 3)

#### Load a dataframe with the geo data from the csv file

In [9]:
geo_df1 = pd.read_csv('http://cocl.us/Geospatial_data')

#### Set the column names

In [10]:
geo_df1.columns=['PostalCode', 'Latitude', 'Longitude']
geo_df1.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Set the index to be PostalCode

In [11]:
geo_df2 = geo_df1.set_index('PostalCode')
geo_df2.head()

Unnamed: 0_level_0,Latitude,Longitude
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


#### Merge latitude and longitude into dataframe using geo lookup

In [12]:
df4= df3.merge(geo_df2,left_on='PostalCode',right_on='PostalCode', how='inner')
df4.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Morningside, Guildwood, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
