# Applied Data Scince Capstone Week 3 Lab

## Part 1

### Install Pandas and read data frames from Wikipedia page

In [21]:
import pandas as pd

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
dfs = pd.read_html(url)
print(len(dfs))

3


### Select the appropraite data frame

In [23]:
print(dfs[0].head())

  Postal Code           Borough              Neighbourhood
0         M1A      Not assigned               Not assigned
1         M2A      Not assigned               Not assigned
2         M3A        North York                  Parkwoods
3         M4A        North York           Victoria Village
4         M5A  Downtown Toronto  Regent Park, Harbourfront


### Assign the data frame and filter out rows where Borough is not assigned

In [24]:
df1 = dfs[0]
mask = df1['Borough'] != 'Not assigned'
df2 = df1[mask]
df2.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Group by postal code



In [26]:
df3 = df2.groupby("Postal Code").sum()
df3.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern, Rouge"
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


### Reset index



In [27]:
df4 = df3.reset_index()
df4.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Print the shape of the dataframe

In [28]:
df4.shape

(103, 3)

# ========================================

## Part 2

### Get the geolocations and append them to the dataframe

In [None]:
import requests
from pandas.io.json import json_normalize

### Single example of geolocation extraction to be used on for loop below

In [29]:
resp = requests.get('http://geogratis.gc.ca/services/geolocation/en/locate?q=M1B')
df_test = pd.json_normalize(resp.json())
print('M1B latitude = ' + str(df_test['geometry.coordinates'][0][1]))
print('M1B longitude = ' + str(df_test['geometry.coordinates'][0][0]))

M1B latitude = 43.809444
M1B longitude = -79.193321


### Loop through all area codes to retireve geolocations

M7R returns an empty JSON file  
Latitude, Longitude coordinates are: 43.63705212384915, -79.61562720000305  
dealt with "manually" through an if: else: structure

In [30]:
lat = []
long = []
url_base = "http://geogratis.gc.ca/services/geolocation/en/locate?q="

for postal_code in df4["Postal Code"]:
    if postal_code == 'M7R':
        lat.append('43.637052')
        long.append('-79.615627')
    else:
        url = url_base + postal_code
        resp = requests.get(url)
        df = pd.json_normalize(resp.json())
        lat.append(df['geometry.coordinates'][0][1])
        long.append(df['geometry.coordinates'][0][0])

df4['Latitude'] = lat
df4['Longitude'] = long
df4

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.8094,-79.1933
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.788,-79.1587
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7697,-79.1731
3,M1G,Scarborough,Woburn,43.7724,-79.2151
4,M1H,Scarborough,Cedarbrae,43.7713,-79.2408
5,M1J,Scarborough,Scarborough Village,43.7456,-79.2312
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.7305,-79.2621
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.7172,-79.2862
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.7261,-79.2323
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.6993,-79.2567


# ========================================

## Part 3

### Download all the dependencies that will be needed

In [None]:
#!pip install folium

In [None]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

### Create map of Toronto using latitude and longitude values



In [31]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


In [33]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(df4['Latitude'], df4['Longitude'], df4['Borough'], df4['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto