# PART 1 : Fetch web page and extract table data using BeautifulSoup

In [1]:
import pandas as pd
import requests 
from bs4 import BeautifulSoup 

In [2]:
URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
r = requests.get(URL) 
  
soup = BeautifulSoup(r.content, 'html5lib') 
table = soup.find('table')

In [3]:
postal_codes = []
boroughs = []
neighborhoods = []

for row in table.find_all('tr'):
    
    columns = row.find_all('td')
    if len(columns) > 0:
        postal_codes.append( columns[0].get_text().rstrip() )
        boroughs.append( columns[1].get_text().rstrip() )
        neighborhoods.append( columns[2].get_text().rstrip() )

        
' Create dataframe '
df_table = pd.DataFrame({'PostalCode':postal_codes, 'Borough':boroughs, 'Neighborhood':neighborhoods}) 

' Remove "Not assigned" entries '
df_table = df_table[df_table['Borough'] != 'Not assigned']

' Reset the dataframe index '
df_table['index'] = range(len(df_table))
df_table.set_index('index',inplace=True)
df_table.index.name = ''

In [4]:
df_table.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
,,,
0.0,M3A,North York,Parkwoods
1.0,M4A,North York,Victoria Village
2.0,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3.0,M6A,North York,"Lawrence Manor, Lawrence Heights"
4.0,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5.0,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6.0,M1B,Scarborough,"Malvern, Rouge"
7.0,M3B,North York,Don Mills
8.0,M4B,East York,"Parkview Hill, Woodbine Gardens"


In [5]:
df_table.shape

(103, 3)

# Part 2 : Append neighborhood coordinates to dataframe

In [7]:
#!pip install geocoder 


Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 6.7 MB/s  eta 0:00:01
[?25hCollecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


## The Geocoder failed to execute

In [None]:
import geocoder 

latitude_list = []
longitude_list = []

for postal_code in df_table['PostalCode'].values:
    
    # Print entry to be analysed
    print(postal_code)
    
    # initialize your variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
      g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
      lat_lng_coords = g.latlng

    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    
    latitude_list.append(latitude)
    longitude_list.append(longitude)
    

## Loading csv file with coordinates

In [6]:
import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

In [7]:
# The code was removed by Watson Studio for sharing.

In [8]:
df_data_1 = pd.read_csv(body)
df_data_1.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Now merge coordinate data with Toronto Neighborhood data

In [9]:
df = df_table.merge(df_data_1, left_on='PostalCode', right_on='Postal Code')
df.drop(['Postal Code'], axis=1, inplace=True)
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
