In [444]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
import folium
import json
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
result = requests.get(url)

#### Loading Wikipedia table

In [445]:
soup = BeautifulSoup(result.content, 'html.parser')
table = soup.find('table')

#### Extracting the data from Wikipedia

In [446]:
data = []

for tr in table.find_all('tr'):
    for td in tr.find_all('td'):
        td_text = td.get_text().strip()
        n = 0
        postalcode = td_text[n:3]
        borough = td_text[3::].split("(")[0]
        try:
            neighborhood = td_text[3:-1].split("(")[1]
        except IndexError:
            neighborhood = ""
        data.append([postalcode, borough, neighborhood])
        n += 1

#### Replacing the column names

In [447]:
cols = ['PostalCode', 'Borough', 'Neighborhood']
df = pd.DataFrame(data, columns=cols)

df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


#### Replacing '/' values with ','

In [448]:
df["Neighborhood"] = df["Neighborhood"].str.replace(' /', ', ')

df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


#### Removing 'Not assigned' values from the Borough column

In [449]:
df_drop = df[df.Borough != 'Not assigned']

#### Checking for empty cells in the 'Neighborhood' column

In [450]:
df_drop.loc[df_drop['Neighborhood'] == '']

Unnamed: 0,PostalCode,Borough,Neighborhood
6,M7A,Queen's Park / Ontario Provincial Government,


#### Resetting the index

In [451]:
df_drop = df_drop.reset_index(drop=True)

#### Replacing empty values in 'Borough' column

In [452]:
df_drop['Neighborhood'] = df_drop['Neighborhood'].replace('', df_drop['Borough'])

#### Checking the dataframe shape

In [453]:
df_drop.shape

(103, 3)

#### Loading the CSV and renaming the columns

In [454]:
dfgeo = pd.read_csv("Geospatial_Coordinates.csv")
dfgeo.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)

#### Merging the dataframes

In [455]:
df2 = pd.merge(df_drop, dfgeo, on="PostalCode", how='left')

In [456]:
df2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park / Ontario Provincial Government,Queen's Park / Ontario Provincial Government,43.662301,-79.389494


#### Checking the geographical coordinate of the City of Torronto.

In [457]:
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of the City of Toronto are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of the City of Toronto are 43.653963, -79.387207.


#### Create map of Torronto using latitude and longitude values

In [458]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df2['Latitude'], df2['Longitude'], df2['Borough'], df2['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.3,
        parse_html=False).add_to(map_toronto)  
    
map_toronto