# Segmenting and Clustering Neighborhoods in Toronto

## Part 1: Creating and Cleaning dataframe

import dependencies

In [1]:
import pandas as pd
import numpy as np

read the url using the pandas library

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
handle = pd.read_html(url)

parse the html data as a pandas dataframe

In [3]:
headers = ['PostalCode', 'Borough', 'Neighborhood']
df = pd.DataFrame(handle[0])
df.columns = headers

Remove the 'Not assigned' values as per the instructions

In [4]:
df.replace('Not assigned',np.nan,inplace=True)
df.dropna(axis=0,inplace=True)

In [5]:
index = list(map(int,range(0,len(df))))
df.index = index

make multiple neighborhoos commma-separated  as per the instructions

In [6]:
neighborhood_new = list()
for n in df['Neighborhood']:
    n = n.split('/')
    if len(n) > 1:
        n_temp = str()
        for i in range(0,len(n)):
            if i == len(n)-1:
                n_temp = n_temp + n[i]
            else:
                n_temp = n_temp + n[i] + ','                    
        neighborhood_new.append(n_temp)
    else:
        neighborhood_new.append(n[0])
        
df['Neighborhood'] = neighborhood_new

Finally, display the shape of the final dataframe

In [7]:
print(df.shape)
df.head()

(103, 3)


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


## Part 2: Assigning Latitude-Longitude Co-ordinates

importing the GeoCoordinates from the .csv file and create new pandas dataframe from them

In [8]:
hand_coords = pd.read_csv('Geospatial_Coordinates.csv')  
df_coords = pd.DataFrame(hand_coords)

headers = ['PostalCode', 'Latitude' ,'Longitude']
df_coords.columns = headers
df_coords.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Add the Lat/Long values ot the original dataframe, matching the values to postal codes by sorting both dataframes

In [9]:
df.sort_values(by=['PostalCode'],ascending=True,inplace=True)
df_coords.sort_values(by=['PostalCode'],ascending=True,inplace=True)

In [10]:
df['Latitude'] = list(map(float,df_coords['Latitude']))
df['Longitude'] = list(map(float,df_coords['Longitude']))

df.sort_index(inplace=True)

Display final dataframe, including the lat/log coordinates

In [11]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494


## Part 3: Cluster and Visualise Downtown Toronto

import dependecies for geoloaction and map plotting

In [12]:
#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim 

#!conda install -c conda-forge folium=0.5.0 --yes  
import folium 

Find Coordiates of Toronto to intialise map

In [35]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="Toronto")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

Create map of Toronto area using ALL data entries

In [38]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### For the assignment, I will concentrate on a cluster of locations Downtown

In [26]:
df_downtown = df[df['Borough'] == 'Downtown Toronto'].reset_index(drop=True)

In [33]:
# create map of Manhattan using latitude and longitude values
map_downtown = folium.Map(location=[latitude, longitude], zoom_start=13)

# add markers to map
for lat, lng, label in zip(df_downtown['Latitude'], df_downtown['Longitude'], df_downtown['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown)  
    
map_downtown