# Segmenting and Clustering Neighborhoods in Toronto

#### Importing of packages

In [6]:
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

#### Fetching of webpage

In [7]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(url,'lxml')

#### Find all the tables in the webpage

In [8]:
table = soup.find('table')

#### Make the table into pandas dataframe

In [9]:
df = pd.read_html(str(table))[0]
df = df.rename(columns=df.iloc[0]).drop(df.index[0])
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


#### Clean the data

In [10]:
df_filter = df[df.Borough != 'Not assigned']
df_clean = df_filter.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df_clean.replace({'Not assigned': np.nan }, inplace=True)
df_clean.Neighbourhood.fillna(df.Borough, inplace=True)

In [11]:
df_clean.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


#### Find the dimension of the dataframe

In [12]:
df_clean.shape

(103, 3)

In [13]:
df = df_clean.rename(columns = {'Postcode':'Postal Code'})

#### Read the geospatial coordinates csv file

In [14]:
new_df = pd.read_csv("Geospatial_Coordinates.csv")

#### Merge the two dataframes along 'Postal Code' value

In [16]:
data = pd.merge(df, new_df, on='Postal Code')

In [17]:
data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
