### 1. Import libraries

In [3]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

### 2. Get Dataframe from WiKi-page with BeautifulSoup

In [12]:
html = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(html).text
soup = BeautifulSoup(source, "lxml")
table = soup.find('table')
table_rows = table.find_all('tr')

res = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)


df = pd.DataFrame(res, columns=["PostalCode", "Borough", "Neighborhood"])
df.head()


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### 3. Remove all cells with a borough that is Not assigned

In [13]:
df = df[df.Borough != 'Not assigned']
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


### 4. Combine rows into one row with the neighborhoods separated with a comma for the one postal code area

In [14]:
df['Neighborhood'] = df.groupby('PostalCode')['Neighborhood'].transform(lambda x: ', '.join(tuple(x)))
df = df.drop_duplicates().reset_index()
df.head()

Unnamed: 0,index,PostalCode,Borough,Neighborhood
0,2,M3A,North York,Parkwoods
1,3,M4A,North York,Victoria Village
2,4,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,6,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,8,M7A,Queen's Park,Not assigned


### 5. Set the neighborhood the same as the borough if a cell has a borough but a Not assigned neighborhood

In [15]:
df.loc[df['Neighborhood'] == 'Not assigned', ['Neighborhood']] = df['Borough']
df.head()

Unnamed: 0,index,PostalCode,Borough,Neighborhood
0,2,M3A,North York,Parkwoods
1,3,M4A,North York,Victoria Village
2,4,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,6,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,8,M7A,Queen's Park,Queen's Park


In [16]:
df.shape

(103, 4)