# Segmenting and Clustering Neighborhoods in Toronto

#### Importing of packages

In [5]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

#### Fetching of webpage

In [6]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(url,'lxml')

#### Find all the tables in the webpage

In [7]:
table = soup.find('table')

#### Make the table into pandas dataframe

In [8]:
df = pd.read_html(str(table))[0]
df = df.rename(columns=df.iloc[0]).drop(df.index[0])
df

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights
8,M6A,North York,Lawrence Manor
9,M7A,Queen's Park,Not assigned
10,M8A,Not assigned,Not assigned


#### Drop the rows whose value is not assigned in Borough column

In [9]:
df = df[df.Borough != 'Not assigned']

In [10]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights
8,M6A,North York,Lawrence Manor
9,M7A,Queen's Park,Not assigned
11,M9A,Etobicoke,Islington Avenue
12,M1B,Scarborough,Rouge
13,M1B,Scarborough,Malvern


#### Find the dimension of the dataframe

In [11]:
df.shape

(212, 3)

In [12]:
df = df.rename(columns = {'Postcode':'Postal Code'})

#### Read the geospatial coordinates csv file

In [13]:
new_df = pd.read_csv("Geospatial_Coordinates.csv")

#### Merge the two dataframes along 'Postal Code' value

In [14]:
n = pd.merge(df, new_df, on='Postal Code')

In [15]:
n

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.654260,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.654260,-79.360636
4,M6A,North York,Lawrence Heights,43.718518,-79.464763
5,M6A,North York,Lawrence Manor,43.718518,-79.464763
6,M7A,Queen's Park,Not assigned,43.662301,-79.389494
7,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
8,M1B,Scarborough,Rouge,43.806686,-79.194353
9,M1B,Scarborough,Malvern,43.806686,-79.194353


#### Import folium

In [16]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium
print('Folium installed and imported!')

Solving environment: done

# All requested packages already installed.

Folium installed and imported!


#### Toronto latitude and longitude values

In [17]:
latitude = 43.6529
longitude = -79.3849
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=12)
toronto_map