In [1]:
import bs4

In [2]:
import urllib3

In [3]:
import certifi

In [4]:
import pandas as pd

In [5]:
pm = urllib3.PoolManager(
       cert_reqs='CERT_REQUIRED',
       ca_certs=certifi.where())

In [6]:
myurl = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [7]:
html = pm.urlopen(url=myurl, method="GET").data

In [8]:
soup = bs4.BeautifulSoup(html, features='lxml')

In [9]:
table = soup.find('table', {'class': "wikitable sortable"})

In [10]:
results = table.find_all('tr')

In [11]:
title_list = results[0].find_all('th')
title_list

[<th>Postcode</th>, <th>Borough</th>, <th>Neighbourhood
 </th>]

In [12]:
header = []
for title in title_list:
    header.append(title.text.strip())

header

['Postcode', 'Borough', 'Neighbourhood']

In [13]:
df = pd.DataFrame(columns = header)
df

Unnamed: 0,Postcode,Borough,Neighbourhood


Create the raw table

In [14]:
titles = ['Postcode', 'Borough', 'Neighbourhood']
for result in results[1:]:
    to_be_appended = result.find_all('td')
    lst = []
    for element in to_be_appended:
        lst.append(element.text.strip())
    series = pd.Series(lst, index=titles)
    df = df.append(series, ignore_index=True)

df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


Remove the Not Assigned Borough

In [15]:
for index, row in df.iterrows():
    if row['Borough'] == 'Not assigned':
        df.drop(index, inplace=True)
        
df.reset_index(drop=True)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Not assigned
7,M9A,Etobicoke,Islington Avenue
8,M1B,Scarborough,Rouge
9,M1B,Scarborough,Malvern


Merge the neighbourhoods

In [16]:
df_new =df.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(lambda x: ', '.join(x)).reset_index()

In [17]:
df_new

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


Replace the Not Assigned Neighbourhood

In [18]:
for index, row in df_new.iterrows():
    if row['Neighbourhood'] == 'Not assigned':
        row['Neighbourhood'] = row['Borough']

df_new

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [19]:
df_new.shape

(103, 3)

In [20]:
coord = pd.read_csv('Geospatial_Coordinates.csv')

In [21]:
coord

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [22]:
new_df = pd.merge(df_new, coord, how='left', left_on='Postcode', right_on='Postal Code')
new_df.drop(columns='Postal Code')

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [23]:
! pip install folium



In [24]:
import folium

In [27]:
f_map = folium.Map(location=[43.7001100, -79.4163000], zoom_start=11)

In [28]:
f_map

In [36]:
for _, row in new_df.iterrows():
    loc = [row['Latitude'], row['Longitude']]
    f_map.add_child(folium.Circle(location=loc,
                                 color='green',
                                 radius=500,
                                 popup='Skytree',
                                 fill=True,
                                 fill_opacity=0.5))

In [37]:
f_map