#### Importing modules for webscraping and dataframe

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

#### Getting webpage content and putting in into a BeautifulSoup object

In [2]:
webpage_response = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', 'html.parser')
webpage = webpage_response.content
soup = BeautifulSoup(webpage)

#### The table headings on the webpage can be used for the columns for our dataframe.  

In [3]:
headings = []
for child in soup.tbody.tr.children:
    if child.string != '\n':
        headings.append(child.string.split('\n')[0])
    
headings

['Postal Code', 'Borough', 'Neighborhood']

#### After the tr tags in the html, the postal codes, boroughs, and neighborhoods are found in the td tags under the tbody
##### I needed to use mod to get data into the correct as the data was in groups of three

In [17]:
postal_codes, borough, neighborhood = [], [], []
i = 1
for row in soup.tbody.find_all("td"):
    if i % 3 == 1:
        postal_codes.append(row.string.split('\n')[0])
    if i % 3 == 2:
        borough.append(row.string.split('\n')[0])
    if i % 3 == 0:
        neighborhood.append(row.string.split('\n')[0])
    i += 1 #This counter keeps track of the row.  


#### This next cell colmbines the table data with the headings we found into a dataframe

In [36]:
df_Toronto = pd.DataFrame(list(zip(postal_codes, borough, neighborhood)), columns=headings)
df_Toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


#### This next cell removes any rows in which a borough is not assigned

In [37]:
df_Toronto = df_Toronto[df_Toronto.Borough != 'Not assigned'].reset_index()
df_Toronto = df_Toronto.drop(['index'], axis=1)
df_Toronto.head()


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


#### Then we check for any not assigned neighborhoods, I did not find any

In [38]:
df_Toronto[df_Toronto.Neighborhood == 'Not assigned'].count()

Postal Code     0
Borough         0
Neighborhood    0
dtype: int64

#### Finally we print the shape of the datafame and see that we have 103 rows

In [39]:
df_Toronto.shape

(103, 3)