In [40]:
# Install libraries 
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

!pip -q install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

! pip install geocoder # import geocoder to find latitude and longitude values
import geocoder 

import requests # library to handle requests

!pip -q install folium
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [41]:
# install beautifulsoup
!pip install beautifulsoup4
from bs4 import BeautifulSoup 

# processing XML and HTML with Python
!pip install lxml



In [65]:
# Create dataframe with: PostalCode, Borough, and Neighborhood
# using previous version in wikipedia
# https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=945633050

req = requests.get("https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=890001695")
soup = BeautifulSoup(req.content,'lxml')
list1 = soup.find_all('table')[0]
df = pd.read_html(str(list1))
df = pd.DataFrame(df[0]) 
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [66]:
# Cleaning the list
# only process 'Borough', remove 'Not assigned'
df.drop(df[df['Borough'] == 'Not assigned'].index, inplace= True)
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [67]:
# combining postal codes, separated with a comma 
df = df.groupby('Postcode', as_index=False).agg({'Borough' : 'first','Neighbourhood' : ' , '.join,})   
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge , Malvern"
1,M1C,Scarborough,"Highland Creek , Rouge Hill , Port Union"
2,M1E,Scarborough,"Guildwood , Morningside , West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park , Ionview , Kennedy Park"
7,M1L,Scarborough,"Clairlea , Golden Mile , Oakridge"
8,M1M,Scarborough,"Cliffcrest , Cliffside , Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff , Cliffside West"


In [45]:
#If a cell has a borough but a 'Not assigned' neighborhood, neighborhood will be the same as the borough.
# Only M7A Queen's Park = Not assigned
df.loc[df.Neighbourhood == 'Not assigned','Neighbourhood'] = df.Borough
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge , Malvern"
1,M1C,Scarborough,"Highland Creek , Rouge Hill , Port Union"
2,M1E,Scarborough,"Guildwood , Morningside , West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park , Ionview , Kennedy Park"
7,M1L,Scarborough,"Clairlea , Golden Mile , Oakridge"
8,M1M,Scarborough,"Cliffcrest , Cliffside , Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff , Cliffside West"


In [52]:
# number of rows of dataframe
print('The dataframe has {} boroughs and {} neighborhoods.'.format(len(df['Borough'].unique()),df.shape[0]))

The dataframe has 11 boroughs and 103 neighborhoods.


In [68]:
# finding coordinates from postal codes 
lat_lng_coords = {}
i = 0 
postal_code = df.Postcode
while i < len(postal_code):
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code[i]))
    lat_lng_coords[i] = g.latlng
    i = i + 1

# putting coordinates in dataframe
coords1 = pd.DataFrame(lat_lng_coords).T
coords1.columns = ['Latitude', 'Longitude']
coords1.head(10)

Unnamed: 0,Latitude,Longitude
0,43.811525,-79.195517
1,43.785665,-79.158725
2,43.765815,-79.175193
3,43.768369,-79.21759
4,43.769688,-79.23944
5,43.743125,-79.23175
6,43.726276,-79.263625
7,43.713054,-79.285055
8,43.724235,-79.227925
9,43.69677,-79.259967


In [54]:
# merge dataframes (of toronto from wiki, and corresponding postal codes from geocoder)
# not needed - http://cocl.us/Geospatial_data 
result = pd.concat([df, coords1], axis=1, sort=False)
result

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge , Malvern",43.811525,-79.195517
1,M1C,Scarborough,"Highland Creek , Rouge Hill , Port Union",43.785665,-79.158725
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.765815,-79.175193
3,M1G,Scarborough,Woburn,43.768369,-79.21759
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944
5,M1J,Scarborough,Scarborough Village,43.743125,-79.23175
6,M1K,Scarborough,"East Birchmount Park , Ionview , Kennedy Park",43.726276,-79.263625
7,M1L,Scarborough,"Clairlea , Golden Mile , Oakridge",43.713054,-79.285055
8,M1M,Scarborough,"Cliffcrest , Cliffside , Scarborough Village West",43.724235,-79.227925
9,M1N,Scarborough,"Birch Cliff , Cliffside West",43.69677,-79.259967


In [56]:
# convert Toronto address into latitude and longitude values
address = 'Toronto, Canada'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print(latitude,longitude )

43.6534817 -79.3839347


In [57]:
# Create a map of Toronto with neighborhoods superimposed using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(result['Latitude'], result['Longitude'], result['Borough'], result['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [58]:
# cluster only the boroughs that contain the word Toronto 
toronto_data = result[result['Borough'].str.contains("Toronto")]
toronto_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676531,-79.295425
41,M4K,East Toronto,"The Danforth West , Riverdale",43.683178,-79.355105
42,M4L,East Toronto,"The Beaches West , India Bazaar",43.667965,-79.314667
43,M4M,East Toronto,Studio District,43.660629,-79.334855
44,M4N,Central Toronto,Lawrence Park,43.72842,-79.387133


In [63]:
# convert Toronto address into latitude and longitude values - repeat
address = 'Toronto, Canada'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print(latitude,longitude)

43.6534817 -79.3839347


In [64]:
# create map of Manhattan using latitude and longitude values
map_borough_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_borough_toronto)  
    
map_borough_toronto