In [44]:
import pandas as pd
import requests 

# Part 1: Get the data from Wikipedia

In [45]:
#Interrogate the target url
website_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
result = requests.get(website_url).text

#Define the class of the elment we're looking for [wikipedia table]
class_to_search = 'wikitable sortable'

#Retrieve all the tables in the document. Return an exception if no table is found.
tables = pd.read_html(website_url, attrs = {'class': class_to_search})
if not len(tables) > 0:
    raise Exception('The source page contains no tables')

#Get the first table (there is only one in the source page)
df = tables[0]

Clean the imported data. Rename the 'Postal Code' column as 'PostalCode' and drop the records that have no assigned neighbourhood 

In [46]:
#Rename 'Postal Code' column as 'PostalCode'
df.rename(columns = {'Postal Code' : 'PostalCode'}, inplace = True)

#Drop the records for which Borough is 'Not assigned'
df.drop(df[df.Borough == 'Not assigned'].index, inplace = True)

# Part 2: Add the geographical data

In [47]:
#Read from the csv
csv_source = 'https://cocl.us/Geospatial_data'
df_coordinates = pd.read_csv(csv_source)
df_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [48]:
df_coordinates.rename(columns = {'Postal Code' : 'PostalCode'}, inplace = True)
df_coordinates.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Add longitude and latitude data to the original neighbourhood dataset

In [49]:
#Do inner join on 'PostalCode'
df = pd.merge(df, df_coordinates, how = 'inner', on = ['PostalCode'])
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
