# Toronto neighborhoods coordenates

### Creating the dataframe using BeautifulSoup (skip to last cells to see the final dataframe)

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import numpy as np

In [2]:
# create html variable and an empty dataframe
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
toronto_hoods = pd.DataFrame(columns=['PostalCode', 'Borough', 'Neighborhood'])

In [3]:
# open html using BeautifulSoup
source = requests.get(url).text
soup = BeautifulSoup(source, 'html.parser')

In [4]:
# table separeted from the webpage using soup atributes
table = soup.table
table_rows = table.tbody.text

# create a list with each row of table as a list element
toronto_list = table_rows.split('\n\n\n')

# drop of the table head - the first element of the list
toronto_list.pop(0)
print(toronto_list)

['M1A\nNot assigned\nNot assigned', 'M2A\nNot assigned\nNot assigned', 'M3A\nNorth York\nParkwoods', 'M4A\nNorth York\nVictoria Village', 'M5A\nDowntown Toronto\nHarbourfront', 'M6A\nNorth York\nLawrence Heights', 'M6A\nNorth York\nLawrence Manor', "M7A\nDowntown Toronto\nQueen's Park", 'M8A\nNot assigned\nNot assigned', "M9A\nQueen's Park\nNot assigned", 'M1B\nScarborough\nRouge', 'M1B\nScarborough\nMalvern', 'M2B\nNot assigned\nNot assigned', 'M3B\nNorth York\nDon Mills North', 'M4B\nEast York\nWoodbine Gardens', 'M4B\nEast York\nParkview Hill', 'M5B\nDowntown Toronto\nRyerson', 'M5B\nDowntown Toronto\nGarden District', 'M6B\nNorth York\nGlencairn', 'M7B\nNot assigned\nNot assigned', 'M8B\nNot assigned\nNot assigned', 'M9B\nEtobicoke\nCloverdale', 'M9B\nEtobicoke\nIslington', 'M9B\nEtobicoke\nMartin Grove', 'M9B\nEtobicoke\nPrincess Gardens', 'M9B\nEtobicoke\nWest Deane Park', 'M1C\nScarborough\nHighland Creek', 'M1C\nScarborough\nRouge Hill', 'M1C\nScarborough\nPort Union', 'M2C\nNo

In [5]:
# separate each postalcode, borough, neighborhood from the list to a separate one

postalcode = []
borough = []
neighborhood = []
    
for i in toronto_list:
    row = i
    row = row.split('\n')
    if row[1] != 'Not assigned':
        postalcode.append(row[0])
        borough.append(row[1])
        neighborhood.append(row[2]) 
        
    else:
        pass

In [6]:
#alocate each lists values into the dataframe

for i in range(len(postalcode)):
    toronto_hoods = toronto_hoods.append({'PostalCode' : postalcode[i] , 'Borough' : borough[i], 'Neighborhood' : neighborhood[i]}, ignore_index=True)

toronto_hoods.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor
5,M7A,Downtown Toronto,Queen's Park
6,M9A,Queen's Park,Not assigned
7,M1B,Scarborough,Rouge
8,M1B,Scarborough,Malvern
9,M3B,North York,Don Mills North


In [7]:
# check cases of missing neighborhoods
toronto_hoods.replace('Not assigned', np.nan, inplace=True)
toronto_hoods.isnull().sum()

PostalCode      0
Borough         0
Neighborhood    1
dtype: int64

In [8]:
# replace missing neighborhoods
toronto_hoods.loc[6].replace(np.nan, 'Queen\'s Park', inplace=True)

In [9]:
print(type(toronto_hoods))

<class 'pandas.core.frame.DataFrame'>


In [10]:
# aggregate neighborhoods according to postalcode
strJoin = lambda x:", ".join(x.astype(str))
toronto_hoods = toronto_hoods.groupby(['PostalCode', 'Borough'], as_index=False, sort=False).agg({'Neighborhood':strJoin})

In [11]:
toronto_hoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park


In [12]:
toronto_hoods.shape

(103, 3)

### Getting coordenates and creating final dataframe

In [13]:
coordenates = 'https://cocl.us/Geospatial_data'
latlong = pd.read_csv(coordenates)
latlong.rename(columns = {'Postal Code':'PostalCode'}, inplace = True)
latlong.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [14]:
latlong.shape

(103, 3)

In [15]:
toronto_final = pd.merge(toronto_hoods, latlong, sort=True)

In [16]:
toronto_final.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [17]:
toronto_final.shape

(103, 5)