In [153]:
import numpy as np
import pandas as pd

#### First, read the relevant Wikipedia table into a pandas Dataframe

In [154]:
wikipedia_page_to_scrape_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
df = pd.read_html(wikipedia_page_to_scrape_url)[0]

#### Filter out rows which have "Not assigned" as Neighbourhood

In [155]:
df = df[df['Borough'] != "Not assigned"]

#### Combine all rows with same Postcode into one row, with the different neighbourhoods separated with a comma


In [156]:
df = df.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(','.join).reset_index()

#### We can see the result dataframe obtained by aforementioned steps:

In [157]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie..."
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam..."


#### Using the shape method, we can show the number of rows and columns (respectively) in our dataframe

In [158]:
df.shape

(103, 3)

### In this part we will use the geocoder libary to find the coordinates for each postal code in our dataframe

In [159]:
import geocoder

#### The following function gets a postal code as string and gets its respective coordinates

In [160]:
def get_coordinates(postal_code):
    """
    Return a tuple representing the coordinate per given postal_code (using geocoder)
    tuple format is (lat, lng)
    """
    lat_lng_coords = None
    while lat_lng_coords is None:
        # This loop is required since geocoder sometimes returns None upon first try but succeeds in later tries
        g = geocoder.osm(f"{postal_code}, Toronto, Ontario")
        lat_lng_coords = g.latlng
    
    return lat_lng_coords

#### Unforunately, for our example we will use the supplied CSV for coordinates, since geocoder is unreliable

In [161]:
# Use coordinates CSV since geocoder is unreliable
coordinates_df = pd.read_csv("Geospatial_Coordinates.csv")

#### We will add the obtained coordinates to our existing dataframe, based on postal code

In [162]:
# Merge coordinates dataframe with existing dataframe
df = df.merge(coordinates_df, how='outer', left_on="Postcode", right_on="Postal Code").drop("Postal Code", axis=1)
df

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie...",43.688905,-79.554724
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",43.739416,-79.588437


### Now we can visualize our postal codes on a Toronto map, using the Folium library

In [163]:
import folium
from IPython.display import display

#### We will use the center of Toronto for start coordinates, and generate a folium map object to begin with

In [164]:
toronto_lat = 43.653963
toronto_lon = -79.387207
toronto_map = folium.Map(location=[toronto_lat, toronto_lon], zoom_start=11)

#### Now we will mark each borough with a unique color from the available folium color set

In [165]:
boroughs = df['Borough'].unique()

In [166]:
folium_marker_colors = [
    'red', 'blue', 'green', 'purple', 
    'orange', 'darkred', 'lightred', 
    'beige', 'darkblue', 'darkgreen', 
    'cadetblue', 'darkpurple', 'white', 
    'pink', 'lightblue', 'lightgreen', 
    'gray', 'black', 'lightgray'
]
colordict = {b: folium_marker_colors.pop(0) for b in boroughs}

#### For each coordinate, we will generate a marker, with the respective post code as a popup and the borough color

In [179]:
for i in range(0, len(df)):
    folium.Marker(
        [df.iloc[i]['Latitude'], df.iloc[i]['Longitude']], 
        icon=folium.Icon(color=colordict[df.iloc[i]['Borough']]), 
        popup=(df.iloc[i]['Postcode'], df.iloc[i]['Borough'])
    ).add_to(toronto_map)

#### Finally, we can display the map and see how our neighbourhoods cluster together!

In [180]:
display(toronto_map)