# Toronto Neighborhoods

In [1]:
import pandas as pd

### Web Scraping

In [2]:
import requests
from bs4 import BeautifulSoup

In [3]:
#Wikipedia page scraping

webpage = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
scrapePage = BeautifulSoup(webpage.content, 'html.parser')

postCodesTable = scrapePage.find('tbody')

tabRows = postCodesTable.select('tr')
tabRow = [r.get_text() for r in tabRows]

### Dataframe Creation

In [4]:
#Postal Codes dataframe creation
postDf = pd.DataFrame(tabRow)
postDf = postDf[0].str.split('\n', expand = True)
postDf = postDf.rename(columns = postDf.iloc[0])
postDf = postDf.drop(postDf.index[0])

postDf.head()

Unnamed: 0,Unnamed: 1,Postcode,Borough,Neighbourhood,Unnamed: 5
1,,M1A,Not assigned,Not assigned,
2,,M2A,Not assigned,Not assigned,
3,,M3A,North York,Parkwoods,
4,,M4A,North York,Victoria Village,
5,,M5A,Downtown Toronto,Harbourfront,


### Dataframe Processing

In [5]:
#Ignoring rows with 'Not assigned' as 'Borough' value
postDf = postDf[postDf.Borough != 'Not assigned']

#Neighborhood aggregation (postal codes)
postDf = postDf.groupby(['Postcode', 'Borough'], sort = False).agg(','.join)
postDf.reset_index(inplace = True)

#Filling missing "Neighborhood" values
postDf = postDf.replace("Not assigned", "Queen's Park")

postDf.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


### Dataframe Rows Count

In [6]:
postDf.shape

(103, 3)