# <font size="5">Creating a Table of Boroughs</font>

In [8]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

from bs4 import BeautifulSoup
import xml

print('Libraries imported.')

Libraries imported.


# <font size="5">Scrapping Wikipedia Page</font>

In [9]:
url = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(url, 'lxml')

# <font size="5">Locating Table and Creating it Using Pandas</font>

In [11]:
data = soup.find('table')
fields = data.find_all('td')
postcode = []
borough = []
neighbourhood = []

for i in range(0, len(fields), 3):
    postcode.append(fields[i].text.strip())
    borough.append(fields[i+1].text.strip())
    neighbourhood.append(fields[i+2].text.strip())
    
postal_code = pd.DataFrame(data = [postcode, borough, neighbourhood]).transpose()
postal_code.columns = ['Postcode', 'Borough', 'Neighbourhood']
postal_code.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


# <font size='5'>Removing Not assigned rows</font>

In [20]:
postal_code['Borough'].replace('Not assigned', np.nan, inplace=True)
postal_code.dropna(subset=['Borough'], inplace = True)
postal_code.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [21]:
postal_code_new = postal_code.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index()
postal_code_new.columns = ['Postcode', 'Borough', 'Neighbourhood']
postal_code_new

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [22]:
postal_code_new.shape

(103, 3)

In [27]:
pc_geo = pd.read_csv('Geospatial_Coordinates.csv')
pc_geo.columns = ['Postcode', 'Latitude', 'Longitude']

In [28]:
pc_pos = pd.merge(postal_code_new, pc_geo, on=['Postcode'], how='inner')

pc_tor = pc_pos[['Borough', 'Neighbourhood', 'Postcode', 'Latitude', 'Longitude']].copy()

pc_tor.head()


Unnamed: 0,Borough,Neighbourhood,Postcode,Latitude,Longitude
0,Scarborough,"Malvern, Rouge",M1B,43.806686,-79.194353
1,Scarborough,"Rouge Hill, Port Union, Highland Creek",M1C,43.784535,-79.160497
2,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,Scarborough,Woburn,M1G,43.770992,-79.216917
4,Scarborough,Cedarbrae,M1H,43.773136,-79.239476


In [36]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent = "ny-explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of the City of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of the City of Toronto are 43.6534817, -79.3839347.


# <font size='5'>Clustering the Neighbourhoods in Torrento</font>

In [40]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(pc_tor['Latitude'], pc_tor['Longitude'], pc_tor['Borough'], pc_tor['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto