# Part 1: Dataframe: PostalCode, Borough, Neighborhood

In [124]:
import pandas as pd
import numpy as np
import json
from bs4 import BeautifulSoup

In [125]:
soup = BeautifulSoup(requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text, 'lxml')
table = soup.find('table',{'class':'wikitable sortable'})
table_rows = table.find_all('tr')
data = []
for row in table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

df = pandas.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighbourhood'])
df = df[~df['PostalCode'].isnull()]

In [126]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [127]:
df.drop(df[df['Borough']=="Not assigned"].index,axis=0, inplace=True)

In [128]:
df.groupby('PostalCode').agg(lambda x: ','.join(x))

Unnamed: 0_level_0,Borough,Neighbourhood
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern, Rouge"
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
...,...,...
M9N,York,Weston
M9P,Etobicoke,Westmount
M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [129]:
df.loc[df['Neighbourhood']=="Not assigned",'Neighbourhood']=df.loc[df['Neighbourhood']=="Not assigned",'Borough']

In [130]:
df['Borough']= df['Borough'].str.replace('nan|[{}\s]','').str.split(',').apply(set).str.join(',').str.strip(',').str.replace(",{2,}",",")

In [131]:
df.reset_index()

Unnamed: 0,index,PostalCode,Borough,Neighbourhood
0,3,M3A,NorthYork,Parkwoods
1,4,M4A,NorthYork,Victoria Village
2,5,M5A,DowntownToronto,"Regent Park, Harbourfront"
3,6,M6A,NorthYork,"Lawrence Manor, Lawrence Heights"
4,7,M7A,DowntownToronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...,...
98,161,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,166,M4Y,DowntownToronto,Church and Wellesley
100,169,M7Y,EastToronto,"Business reply mail Processing Centre, South C..."
101,170,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [132]:
df.shape

(103, 3)

# Part 2: added Longitude/Latitude

In [133]:
pip install geopy


Note: you may need to restart the kernel to use updated packages.


In [134]:
from  geopy.geocoders import Nominatim

In [135]:
df_geo = df

In [136]:
df_geo['address'] = df[['PostalCode', 'Borough', 'Neighbourhood']].apply(lambda x: ', '.join(x), axis=1 )

In [138]:
df_geo.head()
df_geo.shape

(103, 4)

In [139]:
!conda install -c conda-forge geocoder --yes
print("Installation Done!")
import geocoder # import geocoder
print("Geo Coder imported!")

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Installation Done!
Geo Coder imported!


In [140]:
def get_geocoder(postal_code_from_df):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code_from_df.strip()))
        lat_lng_coords = g.latlng
        latitude = lat_lng_coords[0]
        longitude = lat_lng_coords[1]
    return latitude,longitude

In [141]:
df['Latitude'], df['Longitude'] = zip(*df['PostalCode'].apply(get_geocoder))
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,address,Latitude,Longitude
3,M3A,NorthYork,Parkwoods,"M3A, NorthYork, Parkwoods",43.75188,-79.33036
4,M4A,NorthYork,Victoria Village,"M4A, NorthYork, Victoria Village",43.73042,-79.31282
5,M5A,DowntownToronto,"Regent Park, Harbourfront","M5A, DowntownToronto, Regent Park, Harbourfront",43.65514,-79.36265
6,M6A,NorthYork,"Lawrence Manor, Lawrence Heights","M6A, NorthYork, Lawrence Manor, Lawrence Heights",43.72321,-79.45141
7,M7A,DowntownToronto,"Queen's Park, Ontario Provincial Government","M7A, DowntownToronto, Queen's Park, Ontario Pr...",43.66449,-79.39302


# Part 3: Apply to Toronto

In [143]:
geolocator = Nominatim(user_agent="toronto_ontario")
location = geolocator.geocode('Toronto, Ontario')
latitude = location.latitude
longitude = location.longitude

In [151]:
torontomap = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, long, post, borough, neigh in zip(df['Latitude'], df['Longitude'], df['PostalCode'], df['Borough'], df['Neighbourhood']):
    label = "{} ({}): {}".format(borough, post, neigh)
    popup = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=popup,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(torontomap)
    
torontomap