### Importing the needed libraries

In [1]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

### Getting the page into xml format

In [2]:
url=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup=BeautifulSoup(url,'lxml')

### Finding the table tag, class tag and its value to get the desired table

In [3]:
table=soup.find('table',{'class':'wikitable sortable'})

   ### Creating a list of headers. The headers are marked with the 'th' tag

In [4]:
ths=table.find_all('th')
headings = [th.text.strip() for th in ths]


### Creating lists data for each header from the rows, the rows are marked with the 'tr' tag, and the data with 'td' tag

In [5]:
Postcode=[]
Borough=[]
Neighbourhood=[]
for tr in table.find_all('tr'):
        tds = tr.find_all('td')
        if not tds:
            continue
        Postcode_, Borough_, Neighbourhood_=[td.text.strip() for td in tds[:3]]
        Postcode.append(Postcode_)
        Borough.append(Borough_)
        Neighbourhood.append(Neighbourhood_)

### Creating the data frame

In [6]:
df=pd.DataFrame(columns=headings)
df['Postcode']=Postcode
df['Neighbourhood']=Neighbourhood
df['Borough']=Borough




### Removing the rows where the Borough is not assigned

In [7]:
df=df[df.Borough!='Not assigned']

### Replacing the Neighbourhoods that are not assigned with the Borough

In [8]:
df['Neighbourhood']=df['Neighbourhood'].replace('Not assigned', df['Borough'])

### Combining the Neighbourhoods for the same Postcode and Borough

In [9]:
grpby=df.groupby(['Postcode','Borough'], as_index=False)
df=grpby.agg(lambda x: ', '.join(x))
df.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


### Shape of the DataFrame

In [10]:
df.shape

(103, 3)

In [11]:
df_geo=pd.read_csv('http://cocl.us/Geospatial_data')
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
df=pd.concat([df, df_geo], join='inner', axis=1)
df.drop('Postal Code', inplace=True, axis=1)

In [13]:
df.head(25)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
