In [0]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

from geopy.geocoders import Nominatim

import folium

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [2]:
# The CSV File file for Geospatial Coordinates.
Geospatial_Coordinates = pd.read_csv('Geospatial_Coordinates.csv') 
Geospatial_Coordinates.shape

(103, 3)

In [0]:
# URL of the Wikipedia Toronto page.
website_url = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text

In [0]:
# Beautiful soup object.
soup = BeautifulSoup(website_url)

In [0]:
# Find table.
My_table = soup.find('table',{'class':'wikitable sortable'})

In [0]:
# Extract the row to dictionary.
row_data = []
for row in My_table.find_all("tr"):
    cols = row.find_all("td")
    cols = [ele.text.strip() for ele in cols]
    row_data.append(cols)

In [7]:
# Pandas dataframe.
df1 = pd.DataFrame(row_data)
df1 = df1.rename(columns={0:"Postcode",1:"Borough",2:"Neighborhood"})
df1.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,,,
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


In [8]:
# Cleaning the dataframe by removing "Not assigned" values.
df2 = df1.copy()
df2 = df1.drop([0])
df2 = df2.drop(df2[df2['Borough']=='Not assigned'].index)
df2 = df2.reset_index(drop=True)
df2.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


In [9]:
# This is the row where 'Neighborhood' = 'Not assigned'
df2.loc[6]

Postcode                 M7A
Borough         Queen's Park
Neighborhood    Not assigned
Name: 6, dtype: object

In [10]:
# Replace values of 'Neighborhood' = 'Not assigned' with 'Bourough'
df3 = df2.copy()

df3['Neighborhood'] = df3.apply(
    lambda row: row['Borough'] if row['Neighborhood'] == 'Not assigned' else row['Neighborhood'],
    axis=1
)

df3.loc[6]

Postcode                 M7A
Borough         Queen's Park
Neighborhood    Queen's Park
Name: 6, dtype: object

In [11]:
# Processing and grouping the dataframe.
df4 = df3.copy()

df4 = (df4.groupby(['Postcode','Borough'])['Neighborhood']
       .apply(lambda x: ','.join(set(x.dropna())))
       .reset_index())

df4 = pd.DataFrame(df4)
df4.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Port Union,Highland Creek,Rouge Hill"
2,M1E,Scarborough,"Morningside,Guildwood,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [12]:
df4.shape

(103, 3)