# Part 1 Loading and Cleaning Data 

## Import the Libraries

In [84]:
!pip install beautifulsoup4 
!pip install googlemaps

import pandas as pd

import requests

from bs4 import BeautifulSoup

Collecting googlemaps
  Downloading https://files.pythonhosted.org/packages/6f/b5/3a2e0b1d96d61b6739a98b37369cef4db7e97144fb90ce2e5684fbac4dde/googlemaps-4.4.0.tar.gz
Building wheels for collected packages: googlemaps
  Building wheel for googlemaps (setup.py) ... [?25ldone
[?25h  Stored in directory: /home/dsxuser/.cache/pip/wheels/b1/f3/2a/6b416bce171c73da2891978bb6efc5011f000e074e72f51ed8
Successfully built googlemaps
Installing collected packages: googlemaps
Successfully installed googlemaps-4.4.0


# Load the Data

In [40]:
req = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

soup = BeautifulSoup(req.content,'lxml')

table = soup.find_all('table')[0]

df = pd.read_html(str(table))

df = pd.DataFrame(df[0])

df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## Remove Rows Without Borough

In [41]:
#Lets see which boroughs we have 
df['Borough'].value_counts()

Not assigned        77
North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
York                 5
East Toronto         5
East York            5
Mississauga          1
Name: Borough, dtype: int64

In [42]:
# Remove any rows where borough is Not assigned 
df = df[df.Borough != 'Not assigned']
df['Borough'].value_counts()

North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
York                 5
East York            5
East Toronto         5
Mississauga          1
Name: Borough, dtype: int64

In [43]:
df.shape

(103, 3)

# Part 2 - Add the Lat & Lng

In [98]:
# The code was removed by Watson Studio for sharing.

In [45]:
#Existing dataframe from Part 1
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## Testing the geocoder

In [115]:
# Test the geo-coder
import googlemaps

gmaps = googlemaps.Client(key=google_key)
geocode_result = gmaps.geocode('{}, Toronto, Ontario'.format('M3A'))  


In [116]:
# Get the lat and lng

lat = geocode_result[0]['geometry']['location']['lat']
lng = geocode_result[0]['geometry']['location']['lng']
txt = 'Lat and Lon {lat:.2f} {lng:.2f}'
print(txt.format(lat = lat, lng = lng))

Lat and Lon 43.75 -79.33


## Create a new dataframe with the lat & lon

In [117]:
# define the dataframe columns
column_names = ['Postal Code', 'Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [118]:
for index, row in df.iterrows():
    postal_code = row['Postal Code']
    borough = row['Borough']
    neighborhood = row['Neighborhood']
    
    geocode_result = gmaps.geocode('{}, Toronto, Ontario'.format(postal_code))  
    
 
    lat = geocode_result[0]['geometry']['location']['lat']
    lng = geocode_result[0]['geometry']['location']['lng']

    neighborhoods = neighborhoods.append({'Postal Code': postal_code,
                                          'Borough': borough,
                                          'Neighborhood': neighborhood,
                                          'Latitude': lat,
                                          'Longitude': lng}, ignore_index=True)


In [120]:
neighborhoods

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


## Plot the neighbourhoods on a map

In [123]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

!pip install folium
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [122]:
geocode_result = gmaps.geocode('Toronto, Ontario')  
lat = geocode_result[0]['geometry']['location']['lat']
lng = geocode_result[0]['geometry']['location']['lng']

print('The geograpical coordinate of Ontario are {}, {}.'.format(lat, lng))

The geograpical coordinate of Ontario are 43.653226, -79.3831843.


In [125]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[lat, lng], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto