In [2]:
# import packages and set default displays
import requests
import pandas as pd
import json
from bs4 import BeautifulSoup
pd.options.display.max_rows = 4000

### First, let's scrape the data from Wikipedia and use BeautifulSoup and Pandas to make it into a nice dataframe.

In [3]:
obj = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M") # get the data from wikipedia
text = obj.text
soup = BeautifulSoup(text, 'html.parser') # parse the text with beautifulsoup
table = soup.find_all('table') # find the table in the beautifulsoup-parsed html
df = pd.read_html(str(table))[0] # read the html table into a pandas dataframe
indexNames = df[df['Borough'] == 'Not assigned'].index # find the index of rows with borough not assigned
df = df.drop(indexNames) # drop the rows with borough not assigned
indexNames_N = df[df['Neighborhood'] == 'Not assigned'].index # find any remaining rows with neighborhood not assigned
df.loc[indexNames_N, "Neighborhood"] = df["Borough"] # assign borough name = neighb name if no neighb name available
df = df.reset_index(drop = True) # reset the index of the df
df.head(10) # show the first 10 rows of the df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


### Now, it's time to get down to the business of geocoding. We'll get our coordinates from a .csv first, then join the data to the borough/neighborhood dataframe we just created.

In [6]:
coords = pd.read_csv("https://cocl.us/Geospatial_data") # read the csv into a pandas dataframe

In [28]:
fulldata = pd.merge(df, coords, how="inner",on="Postal Code") # inner joinn

In [30]:
fulldata.head(10) # show the first 10 rows of the newly joined dataset

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
