## Import Libraries

In [38]:
!pip install folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 16.3MB/s eta 0:00:01
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/81/6d/31c83485189a2521a75b4130f1fee5364f772a0375f81afff619004e5237/branca-0.4.0-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.0 folium-0.10.1


In [39]:
import numpy as np
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
import json
from geopy.geocoders import Nominatim
import requests
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

In [7]:
wikipedia_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wikipedia_page= requests.get(wikipedia_link).text
soup = BeautifulSoup(raw_wikipedia_page,'xml')

### Extracting Raw Table from Web Page

In [9]:
table = soup.find('table')
Postcode      = []
Borough       = []
Neighbourhood = []
for tr_cell in table.find_all('tr'):    
    counter = 1
    Postcode_var      = -1
    Borough_var       = -1
    Neighbourhood_var = -1    
    for td_cell in tr_cell.find_all('td'):
        if counter == 1: 
            Postcode_var = td_cell.text
        if counter == 2: 
            Borough_var = td_cell.text
            tag_a_Borough = td_cell.find('a')            
        if counter == 3: 
            Neighbourhood_var = str(td_cell.text).strip()
            tag_a_Neighbourhood = td_cell.find('a')            
        counter += 1        
        if (Postcode_var == 'Not assigned' or Borough_var == 'Not assigned' or Neighbourhood_var == 'Not assigned'):
            continue
    try:
        if ((tag_a_Borough is None) or (tag_a_Neighbourhood is None)):
            continue
    except:
        pass    
    if(Postcode_var == -1 or Borough_var == -1 or Neighbourhood_var == -1):
        continue    
    Postcode.append(Postcode_var)
    Borough.append(Borough_var)
    Neighbourhood.append(Neighbourhood_var)

### Integrating Postal codes with more than 1 neighbour

In [10]:
unique_p = set(Postcode)
print('num of unique Postal codes:', len(unique_p))
Postcode_u      = []
Borough_u       = []
Neighbourhood_u = []
for postcode_unique_element in unique_p:
    p_var = ''; b_var = ''; n_var = ''; 
    for postcode_idx, postcode_element in enumerate(Postcode):
        if postcode_unique_element == postcode_element:
            p_var = postcode_element;
            b_var = Borough[postcode_idx]
            if n_var == '': 
                n_var = Neighbourhood[postcode_idx]
            else:
                n_var = n_var + ', ' + Neighbourhood[postcode_idx]
    Postcode_u.append(p_var)
    Borough_u.append(b_var)
    Neighbourhood_u.append(n_var)

num of unique Postal codes: 78


### Creating the Dataframe

In [13]:
toronto_dict = {'Postcode':Postcode_u, 'Borough':Borough_u, 'Neighbourhood':Neighbourhood_u}
df = pd.DataFrame.from_dict(toronto_dict)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M5A,Downtown Toronto,Harbourfront
1,M5S,Downtown Toronto,University of Toronto
2,M4E,East Toronto,The Beaches
3,M4V,Central Toronto,"Deer Park, Rathnelly, South Hill"
4,M5C,Downtown Toronto,St. James Town


### Determining the Shape of the Created Dataframe

In [14]:
df.shape

(78, 3)

### Create the Dataframe for Latitudes and Longitudes

In [26]:
df.rename(columns = {'Postcode': 'PostalCode'}, inplace = True)
df2 = pd.read_csv('https://cocl.us/Geospatial_data')
df2.rename(columns = {'Postal Code': 'PostalCode'}, inplace = True)
df2.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merge the Two Created Dataframes

In [27]:
df3 = df.merge(df2, on = 'PostalCode', how = 'left')
df3.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
1,M5S,Downtown Toronto,University of Toronto,43.662696,-79.400049
2,M4E,East Toronto,The Beaches,43.676357,-79.293031
3,M4V,Central Toronto,"Deer Park, Rathnelly, South Hill",43.686412,-79.400049
4,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418


### Use geopy Library to get the latitude and longitude values of Toronto

In [32]:
address = 'Toronto'
geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


### Create a map of Toronto with neighborhoods superimposed on top

In [47]:
map1 = folium.Map(location = [latitude, longitude], zoom_start = 10)
for lat, lng, borough, neighborhood in zip(df3['Latitude'], df3['Longitude'], df3['Borough'], df3['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7).add_to(map1)    
map1

### Filter only boroughs that contain the word Toronto

In [44]:
borough_names = list(df3.Borough.unique())
borough_with_toronto = []
for x in borough_names:
    if "toronto" in x.lower():
        borough_with_toronto.append(x)        
borough_with_toronto

['Downtown Toronto', 'East Toronto', 'Central Toronto', 'West Toronto']

### Create a new DataFrame with only boroughs that contain the word Toronto

In [45]:
df3 = df3[df3['Borough'].isin(borough_with_toronto)].reset_index(drop=True)
print(df3.shape)
df3.head()

(28, 5)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
1,M5S,Downtown Toronto,University of Toronto,43.662696,-79.400049
2,M4E,East Toronto,The Beaches,43.676357,-79.293031
3,M4V,Central Toronto,"Deer Park, Rathnelly, South Hill",43.686412,-79.400049
4,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418


### Create map of Toronto using latitude and longitude values

In [49]:
map2 = folium.Map(location = [latitude, longitude], zoom_start = 10)
for lat, lng, borough, neighborhood in zip(df3['Latitude'], df3['Longitude'], df3['Borough'], df3['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7).add_to(map2)
map2