# Toronto Neighborhoods 

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

## Part 1/3

First the data is parsed from the wiki web page and passed to a pandas dataframe

In [51]:
res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content, 'html')
table = soup.find_all('table')[0]
df = pd.read_html(str(table))[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Structuring the dataframe
The dataframe is modified in order to fulfill the specified structure


In [62]:
df.rename(columns ={ 'Postcode': 'PostalCode'}, inplace =True )  

In [53]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [54]:
df.drop( df[ df["Borough"] == "Not assigned"].index, inplace = True)

In [55]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [56]:
df.loc[df['Neighbourhood'] == "Not assigned", "Neighbourhood"] = df['Borough']

In [59]:
df=df.groupby(['Postcode', 'Borough'], sort=False).agg(lambda x:','.join(x))
df.reset_index(level=['Postcode','Borough'], inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [60]:
df.shape

(103, 3)

## Part 2/3

Getting the latitude and altitude of each neighbourhood

In [64]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


The geocoder did not work so I used the option given by the tutors:
I read the latitude and longitude for each neighbourhood from the link given

In [79]:
url="http://cocl.us/Geospatial_data"
latitud_longitud=pd.read_csv(url)

In [80]:
latitud_longitud.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


I merge the two data frames into one data frame 

In [86]:
d_toronto = df.merge(latitud_longitud, how='inner', left_on='PostalCode', right_on='Postal Code')
d_toronto.drop(['Postal Code'], axis = 1, inplace = True)

In [85]:
d_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M3A,North York,Parkwoods,M3A,43.753259,-79.329656
1,M4A,North York,Victoria Village,M4A,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",M5A,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",M6A,43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,M7A,43.662301,-79.389494


## Part 3/3

In [None]:
# @hidden cell