### Import libraries

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

### Create a function to concatenate Neighbourhood names

In [226]:
def concatNeighbourhood(x):
    retorno = ''
    tam = len(x)
    for i in range(tam):
        retorno += x.iat[i]
        if (i+1) < tam:
            retorno += ', '
    return retorno

### Manipulate HTML downloaded from wikipedia and transform it into a dataframe

In [227]:
pc_l, br_l, nb_l = [], [], []

html_doc = "List of postal codes of Canada  M - Wikipedia.html" 
with open(html_doc) as fp:
    soup = BeautifulSoup(fp, "lxml")

table = soup.find('table', class_="wikitable sortable jquery-tablesorter")
for line in table.find_all('tr'):
    for t, cell in zip([pc_l, br_l, nb_l], line.find_all('td')):
        t.append(cell.text.strip())
    
df=pd.DataFrame.from_dict({'Postcode': pc_l, 'Borough':br_l, 'Neighbourhood': nb_l})

### Cleansing of the dataframe

In [228]:
# Prepare to delete all lines where Borough is Not assigned
df['Borough'].replace({'Not assigned': None}, inplace=True)
df['Neighbourhood'].replace({'Not assigned': None}, inplace=True)
df['Neighbourhood'].fillna(value=df['Borough'], inplace=True)
df.dropna(inplace=True)

### Group dataframe by Postcode and Borough, then calls function to concatenate Neighbourhood names

In [229]:
df_grouped = df.groupby(['Postcode', 'Borough']).agg(concatNeighbourhood)
df_grouped.reset_index(inplace=True)
df_grouped.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


#### Shape of the final dataframe

In [230]:
df_grouped.shape

(103, 3)