# Segmenting and Clustering Neighborhoods in Toronto

### Import modules

In [115]:
import numpy as np
import pandas as pd
import bs4 as bs
import urllib.request
from geopy.geocoders import Nominatim
import folium
from sklearn.cluster import KMeans
import matplotlib as plt

### Get HTML table for Toronto Postal Codes from wikipedia

In [56]:
wiki_link = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wiki_source = urllib.request.urlopen(wiki_link).read()

wiki_table = bs.BeautifulSoup(wiki_source, "html.parser").table

### Parse into dataframe and clean

In [101]:
df = pd.read_html(str(wiki_table))
toronto_raw = df[0]
print(toronto_raw.shape)
toronto = toronto_raw.rename(columns={"Postal code": "PostalCode"})
toronto = toronto[toronto["Borough"] != "Not assigned"]
toronto = toronto.groupby(['PostalCode', 'Borough']).agg( Neighborhood=('Neighborhood', ', '.join) ).reset_index()
toronto['Neighborhood'] = toronto['Neighborhood'].str.replace(' /', ',')
toronto

(180, 3)


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [85]:
toronto.shape

(103, 3)

### Pull Lat/Lon for Toronto Postal Codes

In [102]:
latlon = pd.read_csv("https://cocl.us/Geospatial_data")
latlon = latlon.rename(columns={"Postal Code": "PostalCode"})
latlon.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Join Lat/Lon to Toronto Borough data

In [103]:
toronto = toronto.join(latlon, rsuffix = '_z').drop(['PostalCode_z'], axis=1)
toronto

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [105]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(toronto['Borough'].unique()),
        toronto.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


### Use geopy to get Lat/Lon of Toronto

In [111]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Create map of Toronto with neighborhoods

In [118]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Test - Create map of Toronto boroughs containing 'Toronto'

In [None]:
tor_test = toronto[toronto['Borough'].str.contains('Toronto')].reset_index(drop=True)
etobicok.head()

0      False
1      False
2      False
3      False
4      False
       ...  
98     False
99     False
100    False
101    False
102    False
Name: Borough, Length: 103, dtype: bool

### Test - Create map of a neighborhood in Toronto

In [119]:
toronto['Borough'].value_counts()

North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East York            5
York                 5
East Toronto         5
Mississauga          1
Name: Borough, dtype: int64

In [122]:
etobicoke = toronto[toronto['Borough'] == 'Etobicoke'].reset_index(drop=True)
etobicoke.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M8V,Etobicoke,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321
1,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484
2,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
3,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509
4,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999


In [123]:
map_etobicoke = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(etobicoke['Latitude'], etobicoke['Longitude'], etobicoke['Borough'], etobicoke['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_etobicoke)  
    
map_etobicoke