<h1>Segmenting and Clustering Neighborhoods in Toronto</h1>
<h2>Part 2 - Geocoder</h2>

Now I'm supposed to reuse the data from the previous notebook... so we'll quickly rebuild that.

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd


headers = {
    "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0",
    "Accept-Language": "en-US,en;q=0.5",
}

text = requests.get(
    "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M",
    headers=headers
).text

soup  = BeautifulSoup(text, features="html.parser")
table = soup.find("table")
header = [th.text.strip() for th in table.find_all("th")]
data = [[td.text.strip() for td in tr.find_all("td")] for tr in table.find_all("tr")]
data = data[1:]

df = pd.DataFrame({
    header[0]: [d[0] for d in data],
    header[1]: [d[1] for d in data],
    header[2]: [d[2] for d in data]
})
df = df[df.Borough != 'Not assigned']
df = df.reset_index(drop=True)
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


"Use the Geocoder package or the csv file to create the following dataframe:"
(dataframe containing Postal Code, Borough, Neighborhood, Latitude, and Longitude columns)

So we just have to figure out how to add Latitude, and Longitude data to this.

In [6]:
import geocoder

lat_col = []
lng_col = []

"""
# Based on documentation, this should work, but doesn't
# This is likely due to google's changes to their services
for postal_code in df['Postal Code']:
    
    # loop until you get the coordinates
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng

    lat_col.append(lat_lng_coords[0])
    lng_col.append(lat_lng_coords[1])
    
df['Latitude'] = lat_col
df['Longitude'] = lng_col
df
"""

# So we'll go with the backup option
df_coordinates = pd.read_csv('Geospatial_Coordinates.csv')
# ['Postal Code', 'Latitude', 'Longitude']

for postal_code in df['Postal Code']:
    coordinates = df_coordinates[df_coordinates['Postal Code'] == postal_code]
    lat_col.append(float(coordinates['Latitude']))
    lng_col.append(float(coordinates['Longitude']))
    
df['Latitude'] = lat_col
df['Longitude'] = lng_col
df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509
