#### Importing Libraries

In [15]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

### Scraping data using Beautiful Soup

In [16]:
URL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(URL).text
soup = BeautifulSoup(page, 'xml')

In [17]:
tb=soup.find('table')

#### Transforming the data into a pandas dataframe

In [18]:
col_names=['Postal Code','Borough','Neighborhood']
neigh_df = pd.DataFrame(columns = col_names)

In [19]:
for r in tb.find_all('tr'):
    row_data=[]
    for c in r.find_all('td'):
        row_data.append(c.text.strip())
    if len(row_data)==3:
        neigh_df.loc[len(neigh_df)] = row_data

In [20]:
neigh_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


#### Droping rows in which Borough that is 'Not assigned'.

In [21]:
dt = neigh_df[neigh_df['Borough'] =='Not assigned'].index
neigh_df.drop(dt,inplace=True)

#### Assigning Neighborhood same as Borough (for Neighborhood that is 'Not assigned')

In [22]:
neigh_df.loc[ neigh_df['Neighborhood'] == 'Not assigned', 'Neighborhood'] = neigh_df['Borough']

In [24]:
neigh_df.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [25]:
neigh_df.shape

(103, 3)

#### Accessing geographical coordinates of the neighborhood

In [26]:
geo_df=pd.read_csv('http://cocl.us/Geospatial_data')

In [27]:
geo_df.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


#### Merging both the dataframes

In [32]:
neigh_df= pd.merge(neigh_df, geo_df, on='Postal Code')

In [33]:
neigh_df.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
