# Part 1

In [1]:
!pip install folium
!pip install geocoder
import pandas as pd
import requests
from bs4 import BeautifulSoup
import numpy as np
import geocoder
from geopy.geocoders import Nominatim
import folium

Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 7.8 MB/s  eta 0:00:01
Collecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
extraction = requests.get(url)
wikiData = BeautifulSoup(extraction.content, 'html.parser')
wikiData;
table = wikiData.find('table')
trs = table.find_all('tr')
rows = []
for tr in trs:
    i = tr.find_all('td')
    if i:
        rows.append(i)
        
lst = []
for row in rows:
    postalcode = row[0].text.rstrip()
    borough = row[1].text.rstrip()
    neighborhood = row[2].text.rstrip()
    if borough != 'Not assigned':
        if neighborhood == 'Not assigned':
            neighborhood = borough
        lst.append([postalcode, borough, neighborhood])

In [3]:
#Converting to dataframe
columns = ['Postcode','Borough','Neighborhood']
toronto = pd.DataFrame(columns = columns)

content = wikiData.find('div', class_='mw-parser-output')
table = content.table.tbody
postcode = 0
borough = 0
neighborhood = 0

for tr in table.find_all('tr'):
    i = 0
    for td in tr.find_all('td'):
        if i == 0:
            postcode = td.text
            i = i + 1
        elif i == 1:
            borough = td.text
            i = i + 1
        elif i == 2: 
            neighborhood = td.text.strip('\n').replace(']','')
    toronto = toronto.append({'Postcode': postcode,'Borough': borough,'Neighborhood': neighborhood},ignore_index=True)

cols = ['PostalCode', 'Borough', 'Neighborhood']
df = pd.DataFrame(lst, columns=cols)
print(df.shape)

df = df.groupby('PostalCode').agg(
    {
        'Borough':'first', 
        'Neighborhood': ', '.join,}
    ).reset_index()
df.head()

(103, 3)


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [4]:
df.loc[df['PostalCode'] == 'M5A']

Unnamed: 0,PostalCode,Borough,Neighborhood
53,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
df.shape

(103, 3)

# Part 2

In [6]:
def get_latilong(postal_code):
    lati_long_coords = None
    while(lati_long_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lati_long_coords = g.latlng
    return lati_long_coords
    
get_latilong('M4G')

[43.709020000000066, -79.36348999999996]

In [9]:
postal_codes = df['PostalCode']    
coords = [ get_latilong(postal_code) for postal_code in postal_codes.tolist() ]
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
df['Latitude'] = df_coords['Latitude']
df['Longitude'] = df_coords['Longitude']

In [10]:
df.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.81139,-79.19662
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.78574,-79.15875
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.1747
3,M1G,Scarborough,Woburn,43.76812,-79.21761
4,M1H,Scarborough,Cedarbrae,43.76944,-79.23892
5,M1J,Scarborough,Scarborough Village,43.74446,-79.23117
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.72582,-79.26461
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.71289,-79.28506
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.7236,-79.23496
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.6951,-79.26466


# Part 3

In [17]:
to_drop_unknown = []
geolocator = Nominatim(user_agent="coursera")
for index, row in df.iterrows():
    address = row['Neighborhood'] + ', Toronto'
    try:
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
        df.loc[index, 'Latitude'] = latitude
        df.loc[index, 'Longitude'] = longitude
    except AttributeError:
        print('Cannot do: {}, will drop index: {}'.format(address, index))
        to_drop_unknown.append(index)
address = 'Toronto'
try:
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude));
    df.loc[index, 'Latitude'] = latitude
    df.loc[index, 'Longitude'] = longitude
except AttributeError:
    print('Cannot do: {}, will drop index: {}'.format(address, index));

my_map = folium.Map(location=[latitude, longitude], zoom_start=11)
clean_df = df.drop(to_drop_unknown)
# add markers to map
for lat, lng, label in zip(clean_df['Latitude'], clean_df['Longitude'], clean_df['Neighborhood']):
    label = folium.Popup(label)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(my_map)  

The geograpical coordinate of Malvern, Rouge, Toronto are 43.8091955, -79.2217008.
Cannot do: Rouge Hill, Port Union, Highland Creek, Toronto, will drop index: 1
The geograpical coordinate of Guildwood, Morningside, West Hill, Toronto are 43.7907925, -79.19369853569341.
The geograpical coordinate of Woburn, Toronto are 43.7598243, -79.2252908.
The geograpical coordinate of Cedarbrae, Toronto are 43.75646655, -79.22669244258802.
The geograpical coordinate of Scarborough Village, Toronto are 43.7437422, -79.2116324.
Cannot do: Kennedy Park, Ionview, East Birchmount Park, Toronto, will drop index: 6
Cannot do: Golden Mile, Clairlea, Oakridge, Toronto, will drop index: 7
Cannot do: Cliffside, Cliffcrest, Scarborough Village West, Toronto, will drop index: 8
Cannot do: Birch Cliff, Cliffside West, Toronto, will drop index: 9
Cannot do: Dorset Park, Wexford Heights, Scarborough Town Centre, Toronto, will drop index: 10
Cannot do: Wexford, Maryvale, Toronto, will drop index: 11
The geograpica

In [18]:
my_map