# Neighborhoods in Toronto

### *Week 3 Assignment for IBM Coursera Applied Data Science Capstone*




### 1.  Bring in data from Wikipedia and clean

In [148]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

# Use BeautifulSoup to parse html and find table of interest
link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(link).text
soup = BeautifulSoup(source, 'lxml')
match = soup.find('table', class_='wikitable')

# convert html to dataframe
df = pd.read_html(str(match), header=0)[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [149]:
# initial shape before any cleaning
df.shape

(289, 3)

In [150]:
# drop records where borough is not assigned
df = df[df.Borough != 'Not assigned']
df.shape

(212, 3)

In [151]:
# group by postal codes and combine neighborhoods
df = df.groupby(['Postcode', 'Borough']).Neighbourhood.agg([('Neighbourhood', ', '.join)]).reset_index()
df.shape

(103, 3)

In [152]:
# assign Neighbourhood value to Borough if Neighbouhood is not assigned (such as Queen's Park)
df.loc[df.Neighbourhood == 'Not assigned', 'Neighbourhood'] = df.loc[df.Neighbourhood == 'Not assigned', 'Borough']
df.loc[df.Neighbourhood == 'Queen\'s Park']

Unnamed: 0,Postcode,Borough,Neighbourhood
85,M7A,Queen's Park,Queen's Park


In [153]:
# final shape after cleaning
toronto = df
toronto.shape

(103, 3)

### 2.  Collect Geospatial Data and add to DataFrame

In [154]:
# bring in data
path='http://cocl.us/Geospatial_data'
df = pd.read_csv(path)
df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [155]:
# join geo data with toronto neighbourhood dataframe
toronto = toronto.join(df.set_index('Postal Code'), on='Postcode')
toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
