# Segmenting and Clustering Neighborhoods in Toronto

#### Importing of packages

In [14]:
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

#### Fetching of webpage

In [15]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(url,'lxml')

#### Find all the tables in the webpage

In [16]:
table = soup.find('table')

#### Make the table into pandas dataframe

In [17]:
df = pd.read_html(str(table))[0]
df = df.rename(columns=df.iloc[0]).drop(df.index[0])
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


#### Clean the data

In [18]:
df_filter = df[df.Borough != 'Not assigned']
df_clean = df_filter.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df_clean.replace({'Not assigned': np.nan }, inplace=True)
df_clean.Neighbourhood.fillna(df.Borough, inplace=True)

In [19]:
df_clean.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


#### Find the dimension of the dataframe

In [20]:
df_clean.shape

(103, 3)

In [21]:
df = df_clean.rename(columns = {'Postcode':'Postal Code'})

#### Read the geospatial coordinates csv file

In [22]:
new_df = pd.read_csv("Geospatial_Coordinates.csv")

#### Merge the two dataframes along 'Postal Code' value

In [23]:
data = pd.merge(df, new_df, on='Postal Code')

data.head()

#### Import folium

In [24]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium
print('Folium installed and imported!')

Solving environment: done

# All requested packages already installed.

Folium installed and imported!


#### Toronto latitude and longitude values

In [25]:
latitude = 43.6529
longitude = -79.3849
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=12)

In [43]:
import matplotlib.colors as colors
from sklearn.cluster import KMeans

X = data['Latitude']
Y = data['Longitude']
Z = np.stack((X, Y), axis=1)

kmeans = KMeans(n_clusters=4, random_state=0).fit(Z)

clusters = kmeans.labels_
colors = ['red', 'green', 'blue', 'yellow']
data['Cluster'] = clusters

for latitude, longitude, borough, cluster in zip(data['Latitude'], data['Longitude'], data['Borough'], data['Cluster']):
    label = folium.Popup(borough, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color=colors[cluster],
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map)  

In [44]:
toronto_map