# Assignment: Using the Dataframe of Toronto neighbours populate Latitude and Longitudes

#### Code part from the  previous assignment

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

#Use the Notebook to build the code to scrape the following Wikipedia page
wiki_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wiki_page_content = requests.get(wiki_link).text

raw_wiki_page_content_xml = BeautifulSoup(raw_wiki_page_content,'xml')

table = raw_wiki_page_content_xml.find('table')
#print(table.prettify())

Postcode      = []
Borough       = []
Neighbourhood = []

line_number = 1
for tr_cell in table.find_all('tr'):
    if line_number > 1 :
        column_number = 1
        Postcode_temp = None
        Borough_temp = None
        Neighbourhood_temp = None
        for td_cell in tr_cell.find_all('td'):
            if column_number == 1 :
                Postcode_temp = td_cell.text
            elif column_number == 2 :
                Borough_temp = td_cell.text
            else :
                Neighbourhood_temp = td_cell.text
            column_number = column_number + 1
        #Ignore cells with a borough that is Not assigned
        if Borough_temp != 'Not assigned':
            Postcode.append(Postcode_temp.strip())
            Borough.append(Borough_temp.strip())
            Neighbourhood_temp = Neighbourhood_temp.strip()
            if Neighbourhood_temp != 'Not assigned':
                Neighbourhood.append(Neighbourhood_temp)
            else :
                #If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
                Neighbourhood.append(Borough_temp)
    line_number = line_number + 1
    
toronto_df = pd.DataFrame({"PostalCode":Postcode,"Borough":Borough,"Neighbourhood":Neighbourhood})

toronto_df = toronto_df.groupby('PostalCode').agg({'Borough':'first', 
                             'Neighbourhood': ', '.join}).reset_index()
print("Shape of the Dataframe:"+str(toronto_df.shape))

Shape of the Dataframe:(103, 3)


# Using geocoder to get Latitude and Longitudes

#### [ Installed geocoder using : pip3 install geocoder ]

In [2]:
#fallback list of coordinates from csv
fallback_url = 'http://cocl.us/Geospatial_data'
fallback_coordinates_df = pd.read_csv(fallback_url, delimiter = ',')
fallback_coordinates_df.columns = ['PostalCode', 'Latitude', 'Longitude']
fallback_coordinates_df.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [3]:
import geocoder

In [4]:
Postcode      = []
Borough       = []
Neighbourhood = []
Latitude = []
Longitude = []
for index, row in toronto_df.iterrows():
    Postcode_temp = row['PostalCode']
    Postcode_temp = Postcode_temp.strip()
    Borough_temp = row['Borough']
    Borough_temp = Borough_temp.strip()
    Neighbourhood_temp = row['Neighbourhood']
    Neighbourhood_temp = Neighbourhood_temp.strip()
    #print('Postcode_temp:'+Postcode_temp)
    lat_lng_coords = None
    try_count = 0
    # try 5 times to get coordinates from internet
    while (lat_lng_coords is None and try_count < 5):
        g = geocoder.google('{}, Toronto, Ontario'.format(Postcode_temp))
        lat_lng_coords = g.latlng
        try_count = try_count + 1
    if lat_lng_coords != None :
        Latitude.append(lat_lng_coords[0])
        Longitude.append(lat_lng_coords[1])
        print('Lat/Long of ' + Postcode_temp + ' is ' + str(lat_lng_coords[0]) + '/' + str(lat_lng_coords[1])  + ' from net')
    else :
        # if not fetched coordinate from net after 5 times trial, fetch it from fallback csv
        temp_df = fallback_coordinates_df[fallback_coordinates_df.PostalCode == Postcode_temp]
        Latitude.append(temp_df.iloc[0]['Latitude'])
        Longitude.append(temp_df.iloc[0]['Longitude'])
        print('Lat/Long of ' + Postcode_temp + ' is ' + str(temp_df.iloc[0]['Latitude']) + '/' + str(temp_df.iloc[0]['Longitude']) + ' from csv')
    Postcode.append(Postcode_temp) 
    Borough.append(Borough_temp)
    Neighbourhood.append(Neighbourhood_temp)

Lat/Long of M1B is 43.806686299999996/-79.19435340000001 from csv
Lat/Long of M1C is 43.7845351/-79.16049709999999 from csv
Lat/Long of M1E is 43.7635726/-79.1887115 from csv
Lat/Long of M1G is 43.7709921/-79.21691740000001 from csv
Lat/Long of M1H is 43.773136/-79.23947609999999 from csv
Lat/Long of M1J is 43.7447342/-79.23947609999999 from csv
Lat/Long of M1K is 43.7279292/-79.26202940000002 from csv
Lat/Long of M1L is 43.711111700000004/-79.2845772 from csv
Lat/Long of M1M is 43.716316/-79.23947609999999 from csv
Lat/Long of M1N is 43.692657000000004/-79.2648481 from csv
Lat/Long of M1P is 43.7574096/-79.27330400000001 from csv
Lat/Long of M1R is 43.750071500000004/-79.2958491 from csv
Lat/Long of M1S is 43.7942003/-79.26202940000002 from csv
Lat/Long of M1T is 43.7816375/-79.3043021 from csv
Lat/Long of M1V is 43.8152522/-79.2845772 from csv
Lat/Long of M1W is 43.799525200000005/-79.3183887 from csv
Lat/Long of M1X is 43.836124700000006/-79.20563609999999 from csv
Lat/Long of M2H i

# Final Dataframe creation with Postal Code, Borough, Neighbourhood, Latitude and Longitude

In [5]:
toronto_df = pd.DataFrame({"PostalCode":Postcode,"Borough":Borough,"Neighbourhood":Neighbourhood,"Latitude":Latitude,"Longitude":Longitude})
toronto_df

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
