In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## Web Scraping using pandas read_html method

In [2]:
data = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M",header=0,attrs={"class": ["wikitable","wikitable sortable"]})

for row in data:
    df = pd.DataFrame(row)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## Droping Borough column which contains Not assigned data

In [3]:
df_new = df.drop(df[df.Borough == 'Not assigned'].index,axis=0)

In [4]:
df_new.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


## Replacing 8th row Neighbourhood column with Queen's Park

In [5]:
df_new.loc[8]['Neighbourhood'] = "Queen's Park"

In [6]:
df_new.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


## Shape of Dataframe

In [7]:
df_new.shape

(211, 3)

## Rename Postcode to Postal Code

In [8]:
df_new.rename(index=str, columns={"Postcode": "Postal Code"},inplace=True)
df_new.head(10)


Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


## Loading Lat Longs from http://cocl.us/Geospatial_data

In [9]:
dframe = pd.read_csv('http://cocl.us/Geospatial_data')
dframe.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Joining two Dataframes

In [10]:
df_new.set_index('Postal Code',inplace=True)
dframe.set_index('Postal Code',inplace=True)

In [12]:

df_mod = pd.concat([df_new, dframe], axis=1, join='inner')
df_mod.head(100)

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.753259,-79.329656
M4A,North York,Victoria Village,43.725882,-79.315572
M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
M6A,North York,Lawrence Heights,43.718518,-79.464763
M6A,North York,Lawrence Manor,43.718518,-79.464763
M7A,Queen's Park,Queen's Park,43.662301,-79.389494
M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
M1B,Scarborough,Rouge,43.806686,-79.194353
M1B,Scarborough,Malvern,43.806686,-79.194353


## Selecting Downtown Toronto

In [15]:
dntoronto_data = df_mod[df_mod['Borough'] == 'Downtown Toronto']
dntoronto_data.head()

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
M5B,Downtown Toronto,Ryerson,43.657162,-79.378937
M5B,Downtown Toronto,Garden District,43.657162,-79.378937
M5C,Downtown Toronto,St. James Town,43.651494,-79.375418


## Use geopy library to get the latitude and longitude values of Downtown Toronto

In [13]:
address = 'Downtown Toronto, TO'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Downtown Toronto are 43.65145205, -79.3620078930752.


In [18]:
# create map of downtown Toronto using latitude and longitude values
map_dntoronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(dntoronto_data['Latitude'], dntoronto_data['Longitude'], dntoronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_dntoronto)  
    
map_dntoronto