# Create Dataframe for Toronto

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
# Set URL to download data
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

df = pd.read_html(url)[0]

In [3]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [4]:
# Rename columns
df.rename(columns={"Postal Code":"PostalCode", "Neighbourhood":"Neighborhood"}, inplace=True)

# Drop rows that have "Not assigned" as Borough
df = df[df.Borough != "Not assigned"]

# More than one neighborhood can exist in one postal code area. Group
df = df.groupby(['PostalCode', 'Borough'])["Neighborhood"].apply(lambda x: ", ".join(x)).to_frame().reset_index()
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [5]:
# If column "Neighborhood"=="Not assigned": Neighborhood=Borough
for index, row in df.iterrows():
    if (row["Neighborhood"] == "Not assigned"):
        row["Neighborhood"] = row["Borough"]

df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103 entries, 0 to 102
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   PostalCode    103 non-null    object
 1   Borough       103 non-null    object
 2   Neighborhood  103 non-null    object
dtypes: object(3)
memory usage: 2.5+ KB


In [8]:
df.shape

(103, 3)

# Add Geo-Data to existing Dataframe of Toronto

In [9]:
"""
# Gecoder package
import geocoder

# Initialize variable
lat_lng_coords = None

# Loop until coordinates are received
while (lat_lng_coords is None):
    g = geocoder.google("{}, Toronto, Ontario".format(postal_code))
    lat_lng_coords = g.latlng
    
latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]
"""

# Using locally saved csv-file for importing coordinates
filename = "Geospatial_Coordinates.csv"
geoData = pd.read_csv(filename)
geoData.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
# Merge data
geoData.rename(columns={"Postal Code" : "PostalCode"}, inplace=True)
wikiDataGeo = pd.merge(df, geoData)

In [11]:
wikiDataGeo.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# Analyze Clustering of Toronto

In [29]:
# Setup map of Toronto
import folium

# Create map
toronto_map = folium.Map(location=[43.651070,-79.347015],zoom_start=10)
toronto_map

In [30]:
# Add markers to map
for lat, lng, borough, neighborhood in zip(wikiDataGeo['Latitude'], wikiDataGeo['Longitude'], wikiDataGeo['Borough'], wikiDataGeo['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map)

toronto_map