Imports

In [1]:
import pandas as pd
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import numpy as np
# import k-means from clustering stage
from sklearn.cluster import KMeans
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

Get the table

In [2]:
link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df = pd.read_html(link)[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


Take out boroughs not assigned and replace not assigned neighbourhoods

In [3]:
df.drop(df[df.Borough == "Not assigned"].index, inplace=True)
df.Neighbourhood.replace("Not assigned", df.Borough, inplace=True)
df.reset_index(inplace=True)
df.drop("index", axis=1, inplace=True)
df.head()

#df.reset_index(inplace=True)
#df.drop("index", axis=1, inplace=True)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


Join repeated postcodes

In [4]:
groups_postcodes = df.groupby("Postcode")
#groups_postcodes.groups

In [5]:
#inds = groups_postcodes.get_group("M5A")["Neighbourhood"].index
#a = df.iloc[inds].Neighbourhood
for key in groups_postcodes.groups.keys():
    inds = groups_postcodes.get_group(key)["Neighbourhood"].index
    serie = df.iloc[inds].Neighbourhood
    #print(serie)
    string = serie.str.cat(sep=", ")
    #print(string)
    df.iloc[inds, 2] = string

# drop_duplicates
df.drop_duplicates(subset="Postcode", keep='first', inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
4,M6A,North York,"Lawrence Heights, Lawrence Manor"
6,M7A,Queen's Park,Queen's Park


In [6]:
print("This table has " + str(df.shape[0]) + " rows and " + str(df.shape[1]) + " columns")

This table has 103 rows and 3 columns


Get the coordinates

In [7]:
url = "http://cocl.us/Geospatial_data"
df2 = pd.read_csv(url,sep=",")
df2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


If sorted the postal codes should be on the same order

In [8]:
df.sort_values("Postcode", inplace=True)
df2.sort_values("Postal Code", inplace=True)
df.reset_index(inplace=True)
df.drop("index", axis=1, inplace=True)
df2.reset_index(inplace=True)
df2.drop("index", axis=1, inplace=True)
df["Postcode"].equals(df2["Postal Code"])

True

They are in the same order

In [9]:
df["Latitude"] = df2["Latitude"]
df["Longitude"] = df2["Longitude"]

df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


Get coordinates for map

In [10]:
address = 'Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


Take out the boroughs without the word toronto

In [11]:
df_map = df[df["Borough"].str.contains("Toronto", regex=False) == True]
df_map.reset_index(drop=True, inplace=True)

In [12]:
df_map.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197
6,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
7,M4S,Central Toronto,Davisville,43.704324,-79.38879
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
9,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049
