# Neighborhoods in Toronto

### 1 - Scrape Wikipedia page "Canada Postal Codes"

In [47]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [48]:
page = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(page.content, 'html.parser')

In [49]:
table = soup.find('tbody')
rows = table.select('tr')
row = [r.get_text() for r in rows]

### 2 - Create dataframe as shown in picture

In [50]:
df = pd.DataFrame(row)
df = df[0].str.split('\n', expand=True)
df = df.rename(columns=df.iloc[0])
df = df.drop(df.index[0])
df = df.rename(columns={'Postcode':'PostalCode'})
df.head()

Unnamed: 0,Unnamed: 1,PostalCode,Borough,Neighbourhood,Unnamed: 5
1,,M1A,Not assigned,Not assigned,
2,,M2A,Not assigned,Not assigned,
3,,M3A,North York,Parkwoods,
4,,M4A,North York,Victoria Village,
5,,M5A,Downtown Toronto,Harbourfront,


### 3 - Clean dataframe

#### Ignore cells with "Not assigned" Borough

In [51]:
df_clean = df[df.Borough != 'Not assigned']
df_clean.reset_index(inplace = True)
df_clean.head()

Unnamed: 0,index,Unnamed: 2,PostalCode,Borough,Neighbourhood,Unnamed: 6
0,3,,M3A,North York,Parkwoods,
1,4,,M4A,North York,Victoria Village,
2,5,,M5A,Downtown Toronto,Harbourfront,
3,6,,M5A,Downtown Toronto,Regent Park,
4,7,,M6A,North York,Lawrence Heights,


#### Combine neighbourhoods with same PostalCode

In [52]:
df_clean = df_clean.groupby(['PostalCode', 'Borough'], sort = False).agg(','.join)
df_clean.reset_index(inplace = True)
df_clean.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Not assigned


#### Change the value of the Neighbourhood to be like the Borough "Queen's Park"

In [53]:
df_clean = df_clean.replace("Not assigned", "Queen's Park")
df_clean.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


### 4 - Shape of the dataframe

In [54]:
df_clean.shape

(103, 3)

### 5 - Geospatial data

In [55]:
df_geo = pd.read_csv('http://cocl.us/Geospatial_data')
df_geo.columns = ['PostalCode', 'Latitude', 'Longitude']

#### Merge and create as in picture

In [56]:
df_pos = pd.merge(df_clean, df_geo, on=['PostalCode'], how='inner')
df_pos = df_pos[['Borough', 'Neighbourhood', 'PostalCode', 'Latitude', 'Longitude']]

In [57]:
df_pos.head()

Unnamed: 0,Borough,Neighbourhood,PostalCode,Latitude,Longitude
0,North York,Parkwoods,M3A,43.753259,-79.329656
1,North York,Victoria Village,M4A,43.725882,-79.315572
2,Downtown Toronto,"Harbourfront,Regent Park",M5A,43.65426,-79.360636
3,North York,"Lawrence Heights,Lawrence Manor",M6A,43.718518,-79.464763
4,Queen's Park,Queen's Park,M7A,43.662301,-79.389494
