Build the code to scrape the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order to obtain the data that is in the table of postal codes and to transform the data into a pandas dataframe like the one shown on the link below:

https://d3c33hcgiwev3.cloudfront.net/imageAssetProxy.v1/7JXaz3NNEeiMwApe4i-fLg_40e690ae0e927abda2d4bde7d94ed133_Screen-Shot-2018-06-18-at-7.17.57-PM.png?expiry=1586649600000&hmac=26XuOyRvtw7utd7MkXAe6xKFo0_FvEh3AjroOrWDcgE

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

In [9]:
html = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup=BeautifulSoup(html.content)
table=soup.find('table')
rows=table.findAll('tr')
table_data={}
ix=0
for r in rows:
    row_data=r.findAll('td')
    if row_data:
        #assume that there are only 3 columns
        table_data.update({
            ix:[
                row_data[0].text.replace("\n",""), 
                row_data[1].text.replace("\n",""), 
                row_data[2].text.replace("/",","),
                
            ]
        })
        ix+=1

In [10]:
columns=['Postal Code', 'Borough', 'Neighborhoods']

In [11]:
df = pd.DataFrame.from_dict(data=table_data, orient='index', columns=columns)

In [12]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhoods
0,M1A,Not assigned,\n
1,M2A,Not assigned,\n
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,"Regent Park , Harbourfront\n"


In [13]:
new_data={}
for index, row in df.iterrows():
    if row['Borough']=='Not assigned':
        continue
    
    if row['Neighborhoods']=='Not assigned':
        row['Neighborhoods']=row['Borough']
        
    if row['Postal Code'] in new_data.keys():
        if row['Neighborhoods'] not in new_data[row['Postal Code']][2]:
            row['Neighborhoods'] = new_data[row['Postal Code']][2] + " , " + row['Neighborhoods']
        
    new_data.update({
        row['Postal Code']: [row[0],row[1],row[2]]
    })
    
# steps to create dictionary for the DataFrame
new_data_new={}
xcount=0
for i in new_data.keys():
    new_data_new.update({
        xcount: new_data[i]
    })
    xcount+=1
new_df=pd.DataFrame.from_dict(data=new_data_new, orient='index', columns=columns)

In [14]:
new_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhoods
0,M3A,North York,Parkwoods\n
1,M4A,North York,Victoria Village\n
2,M5A,Downtown Toronto,"Regent Park , Harbourfront\n"
3,M6A,North York,"Lawrence Manor , Lawrence Heights\n"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government\n"


In [15]:
new_df.shape

(103, 3)

In [17]:
latlng_coords=pd.read_csv('https://cocl.us/Geospatial_data/Geospatial_Coordinates.csv')

In [19]:
latlng_coords.shape

(103, 3)

In [20]:
combined_df=pd.merge(new_df,latlng_coords,on='Postal Code')

In [21]:
combined_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhoods,Latitude,Longitude
0,M3A,North York,Parkwoods\n,43.753259,-79.329656
1,M4A,North York,Victoria Village\n,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront\n",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights\n",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government\n",43.662301,-79.389494


In [22]:
combined_df['Neighborhoods']

0                                            Parkwoods\n
1                                     Victoria Village\n
2                           Regent Park , Harbourfront\n
3                    Lawrence Manor , Lawrence Heights\n
4         Queen's Park , Ontario Provincial Government\n
5                                     Islington Avenue\n
6                                      Malvern , Rouge\n
7                                            Don Mills\n
8                     Parkview Hill , Woodbine Gardens\n
9                             Garden District, Ryerson\n
10                                           Glencairn\n
11     West Deane Park , Princess Gardens , Martin Gr...
12            Rouge Hill , Port Union , Highland Creek\n
13                                           Don Mills\n
14                                    Woodbine Heights\n
15                                      St. James Town\n
16                                  Humewood-Cedarvale\n
17     Eringate , Bloordale Gar