# Clustering Toronto Neighborhoods

This notebook will explore and cluster the neighborhoods in Toronto

## **Import Necessary Libraries**

In [4]:
import pandas as pd
import numpy as np
import folium 


print("Import Database success!")

Import Database success!


## **Import and process Database**

In [5]:
#Import Table from Wikipedia
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
df = pd.read_html(url)
df_all = df[0]

#Remove rows where Boroughs are not assigned
df_all = df_all.drop(df_all[df_all["Borough"] == "Not assigned"].index)
df_all = df_all.reset_index(drop=True)


#Check for cells has a borough but a Not assigned Neighborhood
na_count=df_all["Neighbourhood"].str.contains("Not assigned").sum()
if na_count==0:
    print("There are {n} rows in the dataframe".format(n=df_all.shape[0]))

df_all

There are 103 rows in the dataframe


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


## **Get Geographical Coordinates of Postal Codes**

In [6]:
#unable to use geocoder hence the csv file will be used
url2 = "https://cocl.us/Geospatial_data"
df_source = pd.read_csv(url2)

#add georaphical coordinates to the dataframe
newcolumns = ["Postal Code", "Borough", "Neighbourhood", "Latitude", "Longitude"]
df_all = df_all.reindex(columns=newcolumns, fill_value=0.0)

for count1,row1 in df_all.iterrows():
    for count2,row2 in df_source.iterrows():
        if df_all.loc[count1,"Postal Code"] == df_source.loc[count2,"Postal Code"]:
            df_all.loc[count1,"Latitude"] = df_source.loc[count2, "Latitude"]
            df_all.loc[count1,"Longitude"] = df_source.loc[count2, "Longitude"]

df_all

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


## **Explore and Cluster Neighbourhoods in Boroughs that contain the word "Toronto"**

**Initialize Data and Make Initialize Map of Neighbourhoods in Boroughs that contain the word "Toronto"**

In [18]:
#Make dataframe of boroughs with "toronto" 
toronto_data = df_all[df_all['Borough'].str.contains("Toronto")].reset_index(drop=True)
toronto_data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [43]:
#Get Toronto Coordinate
latitude = 43.67
longitude = -79.35

#Map Toronto
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto