# Segmenting and Clustering Neighbourhoods in Toronto Part 2

In [11]:
# import the dependencies all needed

import pandas as pd
import numpy as np
import requests
import lxml
from bs4 import BeautifulSoup

In [15]:
# ! pip install lxml

### Get data from wiki page

In [14]:
wiki = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(wiki,"html.parser")

table = soup.find('table', {'class': 'wikitable sortable'})
table_rows = table.find_all('tr')

my_data =[]

for row in table_rows:
    my_data.append([t.text.strip() for t in row.find_all('td')])

df = pd.DataFrame(my_data, columns=['PostalCode', 'Borough', 'Neighbourhood'])
df = df[1:]
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Remove cells with a borough value equals 'Not Assigned'

In [4]:
df = df[df['Borough'] != 'Not assigned'].reset_index(drop=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
# change value in neighbourhood which is 'Not assigned' to borough value
def change_value(data, col_1, col_2):
    for index, row in data.iterrows():
        if row[col_1] == 'Not assigned':
            row[col_1] == row[col_2]

In [6]:
change_value(df, 'Neighbourhood', 'Borough')

In [9]:
# df.head()

In [11]:
# create a new test dataframe
column_names = ["PostalCode", "Borough", "Neighbourhood"]
show_df = pd.DataFrame(columns=column_names)

test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in test_list:
    show_df = show_df.append(df[df["PostalCode"]==postcode], ignore_index=True)
    
show_df

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M5G,Downtown Toronto,Central Bay Street
1,M2H,North York,Hillcrest Village
2,M4B,East York,"Parkview Hill, Woodbine Gardens"
3,M1J,Scarborough,Scarborough Village
4,M4G,East York,Leaside
5,M4M,East Toronto,Studio District
6,M1R,Scarborough,"Wexford, Maryvale"
7,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."
8,M9L,North York,Humber Summit
9,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har..."


### Print the shape of dataframe

In [8]:
df.shape

(103, 3)

*****

### Load Geo coordinates data from csv file

In [19]:
geo_df = pd.read_csv('Geospatial_Coordinates.csv', names=['PostalCode', 'Latitude', 'Longitude'], header=0)
geo_df.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merge two datasets 

In [20]:
# merge two table on the column "PostalCode"
new_df = df.merge(geo_df, on="PostalCode", how="left")
new_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [33]:
test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]
test_df = new_df[new_df['PostalCode'].isin(test_list)]
test_df.reset_index(drop=True)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
2,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
3,M4G,East York,Leaside,43.70906,-79.363452
4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
5,M2H,North York,Hillcrest Village,43.803762,-79.363452
6,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
7,M9L,North York,Humber Summit,43.756303,-79.565963
8,M4M,East Toronto,Studio District,43.659526,-79.340923
9,M1R,Scarborough,"Wexford, Maryvale",43.750072,-79.295849
