### Import packages

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

### Load data

In [2]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(url, 'lxml')

### Quick view and examination

print(soup.prettify()[:1000])

In [11]:
wikitable = soup.find('table')

In [12]:
wikitable.find_all('th')

[<th>Postal code
 </th>, <th>Borough
 </th>, <th>Neighborhood
 </th>]

In [13]:
for i in wikitable.find_all('th'):
    print(i.text.strip())

Postal code
Borough
Neighborhood


### Created columns and data

In [14]:
columns = [i.text.strip() for i in wikitable.find_all('th')]

In [15]:
data = [[j.text.strip() for j in i.find_all('td')] for i in wikitable.find_all("tr")[1:]]

In [21]:
trtdata = pd.DataFrame(data, columns = columns)

In [23]:
trtdata.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


### Step 1 Remove 'Not Assigned'

In [24]:
trtdata1 = trtdata.loc[trtdata.Borough != 'Not assigned'].reset_index(drop=True)

### Step 2 Fill Neighborhood with Borough

In [25]:
trtdata1.loc[trtdata1.Neighborhood == 'Not assigned', 'Neighborhood'] = trtdata['Borough']

In [26]:
trtdata1.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


### Step 3 Merge neighborhood

In [29]:
trtdata2 = trtdata1.groupby(['Postal code', 'Borough']).agg(', '.join).reset_index()

In [30]:
print(trtdata2.shape)

(103, 3)
