In [1]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd

# Part 1

In [63]:
# Scrape wiki page

website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = bs(website_url, 'lxml')

In [64]:
# Isolate table of interest

canada_neigh = soup.find('table',{'class':'wikitable sortable'})
rows = canada_neigh.find_all('tr')

In [65]:
# Make empty pandas dataframe
can_neigh = pd.DataFrame(columns=['Postal Code', 'Borough', 'Neighborhood'], index=range(0,288)) 


# Populate can_neigh dataframe with wiki table
row_marker = 0
for row in rows[1:]:
    column_marker = 0
    columns = row.find_all('td')
    for column in columns:
        can_neigh.iat[row_marker,column_marker] = column.get_text().rstrip()
        column_marker +=1
    row_marker +=1

# Drop rows where Borough is "Not assigned"
can_neigh = can_neigh[can_neigh['Borough'] != "Not assigned"]

# Assign value of borough for neighborhoods with value "Not assigned"
i = can_neigh['Neighborhood'].loc[can_neigh.Neighborhood=='Not assigned'].index.values
for x in i:
    can_neigh['Neighborhood'][x] = can_neigh['Borough'][x]
can_neigh

# Get rid of duplicate postalcodes and merge neighborhoods
can_neigh=can_neigh.groupby(by=['Postal Code','Borough'])['Neighborhood'].agg(lambda col: ','.join(col))
can_neigh = pd.DataFrame(can_neigh).reset_index()

In [66]:
# Find shape of can_neigh dataframe 
can_neigh.shape

(103, 3)

# Part 2

In [67]:
can_neigh.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [68]:
#!conda install geocoder

In [69]:
# import geocoder

In [70]:
# lat_lng_coords = None
# while(lat_lng_coords is None):
#     g = geocoder.google('{}, Toronto, Ontario'.format('M5G'))
#     lat_lng_coords = g.latlng
# print(lat_lng_coords)

## Note: I could not get geocoder to work so I decided to use the .csv file


In [71]:
# Read Geospatial_data.csv into pandas df
lat_lng_coords = pd.read_csv('https://cocl.us/Geospatial_data')
lat_lng_coords.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [80]:
# Merge can_neigh df with lat_lng_coords df
can_neigh['Postal Code'] = can_neigh['Postal Code'].astype(str)
lat_lng_coords['Postal Code'] = lat_lng_coords['Postal Code'].astype(str)
can_neigh_latlng = can_neigh.merge(lat_lng_coords, on='Postal Code')

In [81]:
can_neigh_latlng.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
