## **Segmenting and Clustering Neighborhoods in Toronto**

In [None]:
!pip install geocoder

In [82]:
import pandas as pd
import numpy as np
import requests
import geocoder
import folium
import matplotlib.pyplot as plt


In [None]:
wiki_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

Use pandas to get the tables from the site

In [28]:
web_table = pd.read_html(wiki_url, header=0)

Takes the table in index 0

In [41]:
df = pd.DataFrame(web_table[0])
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


Remove Borough values with "Not assigned" values

In [66]:
can_data = df[df['Borough']!='Not assigned']
print(can_data.head())
print(can_data.shape)
can_data = can_data.reset_index()
can_data = can_data.drop('index', axis=1)
can_data.head(5)

  Postal Code           Borough                                Neighbourhood
2         M3A        North York                                    Parkwoods
3         M4A        North York                             Victoria Village
4         M5A  Downtown Toronto                    Regent Park, Harbourfront
5         M6A        North York             Lawrence Manor, Lawrence Heights
6         M7A  Downtown Toronto  Queen's Park, Ontario Provincial Government
(103, 3)


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [70]:
print(len(can_data))
print(can_data['Postal Code'].unique)

103
<bound method Series.unique of 0      M3A
1      M4A
2      M5A
3      M6A
4      M7A
      ... 
98     M8X
99     M4Y
100    M7Y
101    M8Y
102    M8Z
Name: Postal Code, Length: 103, dtype: object>


Confirming the data is in the correct format. M5A postal cost is listed once and includes the 2 neighborhoods

In [73]:
print(can_data.query("`Postal Code` == 'M5A'"))

  Postal Code           Borough              Neighbourhood
2         M5A  Downtown Toronto  Regent Park, Harbourfront


In [71]:
final_cd.shape

(103, 3)

Append the Latitude and Longitude for the entries

In [76]:
# get the geolocations
geo_csv = "https://cocl.us/Geospatial_data"
df_geo = pd.read_csv(geo_csv)

df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Merge the 2 data frames bases on the postal code

In [80]:
merged_df = can_data.merge(df_geo, how='left', left_on='Postal Code', right_on='Postal Code')
print(merged_df.head())
print(merged_df.shape)

  Postal Code           Borough  ...   Latitude  Longitude
0         M3A        North York  ...  43.753259 -79.329656
1         M4A        North York  ...  43.725882 -79.315572
2         M5A  Downtown Toronto  ...  43.654260 -79.360636
3         M6A        North York  ...  43.718518 -79.464763
4         M7A  Downtown Toronto  ...  43.662301 -79.389494

[5 rows x 5 columns]
(103, 5)


In [81]:
print(merged_df.columns) # confirmed all the columns are available. 

Index(['Postal Code', 'Borough', 'Neighbourhood', 'Latitude', 'Longitude'], dtype='object')


Visualise the neighborhoods in Toronto

In [84]:
# get the latitude and longitude for Toronto

address = 'Toronto, Toronto'
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Downtown Toronto are 43.6534817, -79.3839347.


In [88]:
# create the map of Toronto
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers
for lat, lng, label in zip(merged_df['Latitude'], merged_df['Longitude'], merged_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto



Map for Downtown Toronto Neighbourhoods

In [90]:
# create DF for Toronto
toronto_df = merged_df.query("Borough =='Downtown Toronto'")
print(toronto_df.head())

   Postal Code           Borough  ...   Latitude  Longitude
2          M5A  Downtown Toronto  ...  43.654260 -79.360636
4          M7A  Downtown Toronto  ...  43.662301 -79.389494
9          M5B  Downtown Toronto  ...  43.657162 -79.378937
15         M5C  Downtown Toronto  ...  43.651494 -79.375418
20         M5E  Downtown Toronto  ...  43.644771 -79.373306

[5 rows x 5 columns]


In [93]:
# create the map of Toronto
map_dt_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, label in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_dt_toronto)  
    
map_dt_toronto