## Segmenting and Clustering Neighborhoods in Toronto

In [49]:
import pandas as pd
import numpy as np

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

In [50]:
# Upload the Neighbourhood data from Wikipedia
df = pd.read_csv('Toronto Post Code, Borough, Neighborhood.csv')
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M9Z,Not assigned,Not assigned
1,M9Y,Not assigned,Not assigned
2,M9X,Not assigned,Not assigned
3,M9W,Etobicoke,"Northwest, West Humber - Clairville"
4,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [51]:
# Filter out where Borough is Not assigned
df = df[(df['Borough'] != 'Not assigned')].reset_index(drop=True)
df.head(20)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M9W,Etobicoke,"Northwest, West Humber - Clairville"
1,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."
2,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
3,M9P,Etobicoke,Westmount
4,M9N,York,Weston
5,M9M,North York,"Humberlea, Emery"
6,M9L,North York,Humber Summit
7,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe..."
8,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov..."
9,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"


In [52]:
df.shape

(103, 3)

In [53]:
# Where Neighbourhood is Not assigned use Borough. 
df['Neighbourhood'] = np.where(df['Neighbourhood'] == 'Not assigned', df['Borough'], df['Neighbourhood'] )
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M9W,Etobicoke,"Northwest, West Humber - Clairville"
1,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."
2,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
3,M9P,Etobicoke,Westmount
4,M9N,York,Weston


In [54]:
df.shape

(103, 3)

Since the shape of the last two dataframes are the same we know that anywhere where Neighbourhood = 'Not assigned', Borough is also 'Not assigned'.

## Add the Latitude and Longitude

In [55]:
# Upload the coordinates
geodf = pd.read_csv('Geospatial_Coordinates.csv')
geodf

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [56]:
# Merge the df with Neighbourhood/Borough data with Latitude and Longitude.
df = pd.merge(df, geodf, on = 'Postal Code', how = 'left')
df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M9W,Etobicoke,"Northwest, West Humber - Clairville",43.706748,-79.594054
1,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
2,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
3,M9P,Etobicoke,Westmount,43.696319,-79.532242
4,M9N,York,Weston,43.706876,-79.518188
...,...,...,...,...,...
98,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
99,M1G,Scarborough,Woburn,43.770992,-79.216917
100,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
101,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497


In [57]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


## Maps to visualize neighborhoods

In [58]:
# Find the coordinates of Toronto to make a map
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [60]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(df, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [62]:
# Simplify the above map and segment and cluster only the neighbourhoods in Scarborough.
scarborough_data = df[df['Borough'] == 'Scarborough'].reset_index(drop=True)
scarborough_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1X,Scarborough,Upper Rouge,43.836125,-79.205636
1,M1W,Scarborough,"Steeles West, L'Amoreaux West",43.799525,-79.318389
2,M1V,Scarborough,"Milliken, Agincourt North, Steeles East, L'Amo...",43.815252,-79.284577
3,M1T,Scarborough,"Clarks Corners, Tam O'Shanter, Sullivan",43.781638,-79.304302
4,M1S,Scarborough,Agincourt,43.7942,-79.262029


In [63]:
# Get coordinates of Scarborough
address = 'Scarborough, CA'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough are 43.773077, -79.257774.


In [65]:
# create map of Scarborough using latitude and longitude values
map_scarborough = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(scarborough_data['Latitude'], scarborough_data['Longitude'], scarborough_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_scarborough)  
    
map_scarborough