### Importing required libraries

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

### Sending HTTP Request to fetch HTML Page & Pulling data from the HTML Page using html.parser

In [2]:
postal_html = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(postal_html.content,'html.parser')

In [3]:
raw_data = soup.body.find(class_='wikitable sortable')
postal_data = []
for rows in raw_data.find_all('tr'):
    if(rows.find('td')):
        cols = rows.find_all('td')
        cols = [item.text.strip() for item in cols]    
        postal_data.append(cols)

postal_data

[['M1A', 'Not assigned', 'Not assigned'],
 ['M2A', 'Not assigned', 'Not assigned'],
 ['M3A', 'North York', 'Parkwoods'],
 ['M4A', 'North York', 'Victoria Village'],
 ['M5A', 'Downtown Toronto', 'Harbourfront'],
 ['M5A', 'Downtown Toronto', 'Regent Park'],
 ['M6A', 'North York', 'Lawrence Heights'],
 ['M6A', 'North York', 'Lawrence Manor'],
 ['M7A', "Queen's Park", 'Not assigned'],
 ['M8A', 'Not assigned', 'Not assigned'],
 ['M9A', 'Etobicoke', 'Islington Avenue'],
 ['M1B', 'Scarborough', 'Rouge'],
 ['M1B', 'Scarborough', 'Malvern'],
 ['M2B', 'Not assigned', 'Not assigned'],
 ['M3B', 'North York', 'Don Mills North'],
 ['M4B', 'East York', 'Woodbine Gardens'],
 ['M4B', 'East York', 'Parkview Hill'],
 ['M5B', 'Downtown Toronto', 'Ryerson'],
 ['M5B', 'Downtown Toronto', 'Garden District'],
 ['M6B', 'North York', 'Glencairn'],
 ['M7B', 'Not assigned', 'Not assigned'],
 ['M8B', 'Not assigned', 'Not assigned'],
 ['M9B', 'Etobicoke', 'Cloverdale'],
 ['M9B', 'Etobicoke', 'Islington'],
 ['M9B', 

###  Creating Postal DataFrame from scraped HTML Information

In [4]:
df = pd.DataFrame(postal_data, columns=['PostalCode','Borough','Neighborhood'])
df = df[df.Borough != 'Not assigned'].reset_index(drop=True)
df.Neighborhood[df.Neighborhood == "Not assigned"] = df.Borough[df.Neighborhood == "Not assigned"]
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


In [5]:
def fetch_borough(x):
    borough=x.iloc[0]
    for i in range(1,len(x)):
        if(x.iloc[i]!=borough):
            return "error"
    return borough

def concat_neighbor(x):
    neighbor=""
    for i in range(len(x)-1):
        neighbor=neighbor+x.iloc[i]+", "
    neighbor=neighbor+x.iloc[-1]
    return neighbor

df=df.groupby('PostalCode').agg({'Borough': lambda x: fetch_borough(x),'Neighborhood': lambda x: concat_neighbor(x)})
df

Unnamed: 0_level_0,Borough,Neighborhood
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Rouge, Malvern"
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
M1J,Scarborough,Scarborough Village
M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
M1N,Scarborough,"Birch Cliff, Cliffside West"


### Shape of DataFrame:

In [6]:
print(df.shape)

(103, 2)
