In [1]:
# import libraries
from bs4 import BeautifulSoup
import pandas as pd
import requests

In [2]:
# getting webpage content and parse it to BeautifulSoup
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(source, 'lxml')

In [3]:
# find table in content
postCodes_table = soup.find('table',{'class':'wikitable sortable'})

In [4]:
# create a list, find table rows content (tr), iterate over rows and append content into list
postal_codes = []
rows = postCodes_table.findAll('tr')
for row in rows:
    postal_codes.append(row.text.split('\n'))

In [5]:
# create dataframe from previous list
df = pd.DataFrame(postal_codes[1:],columns=postal_codes[0])

In [6]:
# eliminate values with value 'Not assinged' in column Borough
df = df[df.Borough != 'Not assigned']

In [7]:
df[df.Neighbourhood == 'Not assigned']

Unnamed: 0,Unnamed: 1,Postcode,Borough,Neighbourhood,Unnamed: 5
8,,M7A,Queen's Park,Not assigned,


In [8]:
# find values "Not assinged" in column Neighbourhood and substitue with value in column Borough
df.loc[df.Neighbourhood == 'Not assigned', 'Neighbourhood'] = df.Borough
df[df.Neighbourhood == 'Not assigned']

Unnamed: 0,Unnamed: 1,Postcode,Borough,Neighbourhood,Unnamed: 5


In [9]:
df[df.Neighbourhood == "Queen's Park"]

Unnamed: 0,Unnamed: 1,Postcode,Borough,Neighbourhood,Unnamed: 5
8,,M7A,Queen's Park,Queen's Park,


In [10]:
df[df.Postcode == 'M5A']

Unnamed: 0,Unnamed: 1,Postcode,Borough,Neighbourhood,Unnamed: 5
4,,M5A,Downtown Toronto,Harbourfront,
5,,M5A,Downtown Toronto,Regent Park,


In [11]:
# create a function to join values and group values in dataframe
join = lambda a: ", ".join(a)
df = df.groupby(by=['Postcode','Borough']).agg({'Neighbourhood': join}).reset_index()

In [12]:
df[df.Postcode == 'M5A']

Unnamed: 0,Postcode,Borough,Neighbourhood
53,M5A,Downtown Toronto,"Harbourfront, Regent Park"


In [13]:
df.shape

(103, 3)

In [14]:
coordinates = pd.read_csv("Geospatial_Coordinates.csv")

In [15]:
coordinates.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [18]:
merge_df = pd.merge(df,
                 coordinates[['Postcode','Latitude', 'Longitude']],
                 on='Postcode')
merge_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [19]:
merge_df.shape

(103, 5)