**Question 1: Use pandas, or the BeautifulSoup package, or any other way you are comfortable with to transform the data in the table on the Wikipedia page into the above pandas dataframe**

In [0]:
import pandas as pd
import requests
from bs4 import BeautifulSoup  

In [0]:
List_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
source = requests.get(List_url).text

In [0]:
soup = BeautifulSoup(source, 'xml')

In [0]:
table = soup.find('table')

In [0]:
# The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood
column_names = ['PostalCode', 'Borough', 'Neighborhood']
df = pd.DataFrame(columns = column_names)

In [0]:
for tr_cell in table.find_all('tr'):
  row_data = []
  for td_cell in tr_cell.find_all('td'):
    row_data.append(td_cell.text.strip())
  if len(row_data)==3:
    df.loc[len(df)] = row_data


In [7]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


**Data Cleaning**

Remove cells with a borough that is Not assigned.

In [0]:
df = df[df['Borough'] != 'Not assigned']

In [9]:
df[df['Neighborhood']=='Not assigned'] = df['Borough']
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [0]:
temp_df = df.groupby('PostalCode')['Neighborhood'].apply(lambda x: "%s" % ','.join(x))
temp_df=temp_df.reset_index(drop=False)
temp_df.rename(columns={'Neighborhood':'Neighborhood_joined'}, inplace=True)

In [0]:
df_merge = pd.merge(df, temp_df, on = 'PostalCode')

In [0]:
df_merge.drop(['Neighborhood'], axis = 1, inplace = True)

In [0]:
df_merge.drop_duplicates(inplace=True)

In [0]:
df_merge.rename(columns={'Neighborhood_joined': 'Neighborhood'}, inplace = True)

In [15]:
df_merge.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
5,Queen's Park,Queen's Park,Queen's Park


In [16]:
df_merge.shape

(103, 3)