# Gathering the Toronto Neighbourhoods

In [143]:
import numpy as np
import pandas as pd

df_raw = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
df = df_raw[0]
df.rename(columns={'Postcode': 'Postalcode'}, inplace=True)

1. Get rid of all rows for which **Borough** is 'Not Assigned'

In [144]:
df = df.drop(df[df['Borough'] == 'Not assigned'].index).reset_index(drop=True) # clear out Boroughs that are 'Not Assigned'

2. Set the **Neighbourhood** to be the same as **Borough** if the **Neighbourhood** is 'Not Assigned'

In [145]:
df.loc[df['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = df['Borough'] # set the Neighbourhood = Borough if the Neighbourhood is 'Not Assigned'

3. Merge all the rows for **Neighbourhood** into a single record joined by a comma, grouped by the **Postcode** and **Borough**

In [146]:
toronto_hoods = pd.DataFrame(df.groupby(['Postalcode','Borough'])['Neighbourhood'].apply(','.join).reset_index(), columns=['Postalcode','Borough','Neighbourhood'])

In [147]:
toronto_hoods.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [148]:
toronto_hoods.shape

(103, 3)

# Adding the Latitude + Longitude to Toronto Neighbourhoods

In [150]:
lat_lon = pd.read_csv('http://cocl.us/Geospatial_data')
lat_lon.rename(columns={'Postal Code': 'Postalcode'}, inplace=True)

In [167]:
toronto_hoods_lat_lon = toronto_hoods.merge(lat_lon, on=['Postalcode'])

In [169]:
toronto_hoods_lat_lon.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [170]:
toronto_hoods_lat_lon.shape

(103, 5)