## Part 1: Scrape the Wikipedia page and wrangle the data, clean it, and then read it into a pandas dataframe.

In [1]:
import pandas as pd

In [2]:
pip install wikipedia

Note: you may need to restart the kernel to use updated packages.


In [3]:
import wikipedia as wp

In [4]:
# Get the html source
html = wp.page("List of postal codes of Canada: M").html().encode("UTF-8")
df = pd.read_html(html)[0]

In [8]:
#1. Rename the columns
df.rename(columns={'Postcode':'PostalCode',
                    'Borough':'Borough',
                    'Neighbourhood':'Neighborhood'}, 
                    inplace=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [9]:
#2. Drop the rows where the Borough is not assigned
df = df[df.Borough != 'Not assigned']
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [10]:
#3. Combine row values of Neighbourhood separated by comma if they have the same Postcode
df = df.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(lambda x: ', '.join(x.astype(str))).reset_index()
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [12]:
#4. Replace the Neighbourhood value with Borough Value if Borough is assigned and Neighbourhood is not assigned
for index, row in df.iterrows():
    if row['Borough'] != 'Not assigned':
        if row['Neighborhood'] == 'Not assigned':
            row['Neighborhood'] = row['Borough']

df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [13]:
df.shape

(103, 3)

## Part 2: Get the latitude and the longitude coordinates of each neighborhood.