# Capstone Project Week 3, Part 2/3

### Repeating steps in Part 1 

Importing libs

In [3]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

In [4]:
import geocoder

Opening the Wikipedia page and creating Beautiful Soup Object.

In [5]:
webPage = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(webPage)

Getting the tag for the required table.

In [6]:
table_tag = soup.find("table", class_='wikitable')

Extracting the table.

In [7]:
tr_tags=table_tag.find_all('tr')
neighbourhoods=[]
for tr_tag in tr_tags:
    neighbourhood=[]
    for t_tag in tr_tag.children:
        if (t_tag.string!='\n'):
            texts = t_tag.text
            if (texts[-1]=='\n'):
                texts = texts.rstrip('\n')
            neighbourhood.append(texts)
    neighbourhoods.append(neighbourhood)
neighbourhoods[0:5]

[['Postcode', 'Borough', 'Neighbourhood'],
 ['M1A', 'Not assigned', 'Not assigned'],
 ['M2A', 'Not assigned', 'Not assigned'],
 ['M3A', 'North York', 'Parkwoods'],
 ['M4A', 'North York', 'Victoria Village']]

 
Creating a Panda DataFrame

In [8]:
neighbourhoods_df = pd.DataFrame(neighbourhoods[1:], columns = neighbourhoods[0])
neighbourhoods_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


Dropping rows with Borough as Not assigned

In [9]:
neighbourhoods_df = neighbourhoods_df[neighbourhoods_df.Borough!='Not assigned']

In [10]:
neighbourhoods_df.reset_index(drop=True, inplace=True)
neighbourhoods_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


Grouping Neighbourhoods into one postcodes.

In [21]:
postcode_df = neighbourhoods_df.groupby('Postcode', sort=True).agg({'Borough': lambda x: x.unique(),'Neighbourhood':lambda x: ', '.join(x)})

In [22]:
postcode_df.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Rouge, Malvern"
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


Assigning Neighbouhood names where are Not Assigned

In [23]:
postcode_df.loc[postcode_df['Neighbourhood']=='Not assigned','Neighbourhood'] =postcode_df.loc[postcode_df['Neighbourhood']=='Not assigned','Borough']      # postcode_df.Neighbourhood=postcode_df.Borough

In [24]:
postcode_df.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Rouge, Malvern"
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


In [25]:
postcode_df.shape

(103, 2)

### Part 2

Geocode code below which did not work

In [26]:
#coords=[]
#for postal_code in postcode_df.index:

#    lat_lng_coords = None
#    i=0
#    while((lat_lng_coords is None) or (i<10)):
#        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
#        lat_lng_coords = g.latlng
#        i=i+1
#    #latitude = lat_lng_coords[0]
#    #longitude = lat_lng_coords[1]
#    coords.append(lat_lng_coords)
    
#coords

Reading Latitude and Longitude coordinates from the given csv

In [27]:
geo_df = pd.read_csv('Geospatial_Coordinates.csv')
geo_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Sorting by Postal codes so that both dataframes of Neighbourhoods and Coordinates have Postcodes in the same order.

In [28]:
geo_df.sort_values('Postal Code', inplace=True)

In [29]:
geo_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [30]:
geo_df.shape

(103, 3)

Combining the dataframes

In [41]:
postcode_df['Latitude']=geo_df['Latitude'].values
postcode_df['Longitude']=geo_df['Longitude'].values
postcode_df.head()

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
M1G,Scarborough,Woburn,43.770992,-79.216917
M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [42]:
postcode_df.shape

(103, 4)