# Segmenting and Clustering Neighborhoods in Toronto

## Part 1: Generate Table

### 1.1: Import libraries

In [1]:
import pandas as pd
import numpy as np
import json
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
!pip install folium
import folium

print ('Libraries imported')

Libraries imported


### 1.2: Scrape Data

In [2]:
data = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
table_col = ['Postalcode', 'Borough', 'Neighborhood']
 
for table in data:
    if(str(np.array_equal(np.array(table.columns),np.array(table_col)))=="True"): 
        table = pd.DataFrame(table)    
    break

table.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


### 1.3: Clean Data

In [3]:
# Drop "Not assigned" values from Borough column
table = table[table.Borough != "Not assigned"]

# Find neighbors that share the same postcode and merge them
table_group = table.groupby(['Postal code', 'Borough'], sort = False).agg(','.join)
table = table_group.reset_index()

# Print the shape of the dataframe and display the table
print("The shape of the dataframe is:", table.shape)
table.head(12)

The shape of the dataframe is: (103, 3)


Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,Malvern / Rouge
7,M3B,North York,Don Mills
8,M4B,East York,Parkview Hill / Woodbine Gardens
9,M5B,Downtown Toronto,"Garden District, Ryerson"


## Part 2: Get Coordinates

### 2.1: Using csv file to get the coordinates

In [4]:
cd = pd.read_csv('https://cocl.us/Geospatial_data')
cd.head(12)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


### 2.2: Merging data to previous dataframe

In [5]:
cd.columns = ['Postal code', 'Latitude', 'Longitude']
table = pd.merge(table, cd, on = 'Postal code')
table.head(12)

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,Parkview Hill / Woodbine Gardens,43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


## Part 3: Toronto Map

### 3.1: Find Coordinates

In [6]:
address = 'Toronto, ON'
geolocator = Nominatim(user_agent = "Toronto")
location = geolocator.geocode(address)
lat = location.latitude
long = location.longitude

print("Toronto coordinates are:", lat, long)

Toronto coordinates are: 43.6534817 -79.3839347


### 3.2: Create Map

In [13]:
toronto_map = folium.Map(location=[lat, long], zoom_start=10)

for latitude, longitude, borough, Neighborhood in zip(table['Latitude'], table['Longitude'], table['Borough'], table['Neighborhood']):
    label = '{}, {}'.format(Neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map)  
    
toronto_map