In [170]:
######### PART 1 #########

In [23]:
import requests

url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
response = requests.get(url).text

In [24]:
pip install beautifulsoup4

Note: you may need to restart the kernel to use updated packages.


In [25]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(response, 'lxml')

In [26]:
#Scraping the Wikipedia page to get table data

table = soup.find_all('table', {'class':'wikitable sortable'})
header = [th.text.rstrip() for th in table[0].find_all('th')]

col1 = [] #Postal Code
col2 = [] #Borough
col3 = [] #Neighborhood

#Adding the data from each column to the corresponding list 
for row in table[0].findAll('tr'):
    cells = row.findAll('td')
    if len(cells) == 3:
        col1.append(cells[0].find(text=True))
        col2.append(cells[1].find(text=True))
        col3.append(cells[2].find(text=True))

In [27]:
#Creating a dictionary for the data

d = dict([(x,0) for x in header])
d

{'Postal Code': 0, 'Borough': 0, 'Neighbourhood': 0}

In [28]:
#Convert to pandas data frame

import pandas as pd

d['Postal Code'] = col1
d['Borough'] = col2
d['Neighbourhood'] = col3

df = pd.DataFrame(d)
df.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


In [29]:
#CLEANING
#Getting rid of cells with a borough that is Not assigned.

df_can = df[df['Borough'] != 'Not assigned\n'].reset_index(drop=True)
df_can.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [30]:
#Using the .shape method to print the number of rows of my dataframe.

df_can.shape

(103, 3)

In [132]:
######### PART 2 #########

#I tried importing geocoder but it returned None for every
#test case I tried, no matter which city. I ran it in a while loop
#and it returned None at least 100 times, so I'm using the .csv
#file instead to get the lat/long coordinates for each postal code. 

In [133]:
file_path = "/Users/ellacathey/Downloads/Geospatial_Coordinates.csv"
data = pd.read_csv(file_path)

In [134]:
#This is the data from the .csv file

data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [135]:
#Adding the latitude and longitude of each postal code to the df_can DataFrame

indices = [] #Keeping track of indices of each postal code because both lists are not in the same order

#for loop to extract indices of each postal code in the .csv file
for can_code in df_can['Postal Code']:
    sub = can_code[0:len(can_code)-1] #can_code is 'M3A\n' so we must find substring 'M3A'
    count = 0
    for x in data["Postal Code"].str.find(sub):
        if x != 0:
            count = count +1
        else:
            indices.append(count)

#Lists to store the latitudes and longitudes of each postal code
lats = []
longs = []

#Looping through indices to preserve the order
for index in indices:
    lats.append(data['Latitude'][index])
    longs.append(data['Longitude'][index])

#Adding latitude and longitude columns to the df_can DataFrame
df_can['Latitude'] = lats
df_can['Longitude'] = longs

#Displaying the first few rows of df_can which now includes lats/longs
#Ordered alphabetically by postal code
df_can.head(20)   

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
