## Segmenting and Clustering Neighborhoods in Toronto

In [1]:
# Importing all dependencies we'll need
import numpy as np 

import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json 

# !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import warnings
warnings.filterwarnings('ignore')

print('Libraries imported.')

Libraries imported.


In [2]:
import bs4 # BeautifulSoup Package
from bs4 import BeautifulSoup

#### From the Wikipedia Page - Toroto is made up of 140 Neighbours - We are going to create a dataset with all the 140 Neighbours alongside their Municipality Codes

In [3]:
wikipedia_page = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
results = requests.get(wikipedia_page)

In [4]:
wikipedia_html = results.text

In [5]:
soup = BeautifulSoup(wikipedia_html, 'html.parser')
for table in soup.find_all('table'):
    print(table.get('class'))

['wikitable', 'sortable']
['multicol']
None
['navbox']
None


In [6]:
neighborhood_html_table = soup.find_all('table')[0]
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M", header=0)
df[0]

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


In [7]:
toronto_df = df[0]

### Remove the Not Assigned from the Borough Column of the DataFrame

In [8]:
toronto_df = toronto_df[toronto_df.Borough != "Not assigned"]

### Explore Postal Codes with More than one Neighbourhood

In [9]:
toronto_df.Postcode.value_counts().head()

M9V    8
M8Y    8
M5V    7
M9B    5
M4V    5
Name: Postcode, dtype: int64

### Combine All the Neighbourhoods with the Same Postal Code

In [32]:
toronto_df = toronto_df.groupby(["Postcode", "Borough"]).agg(lambda x: ', '.join(set(x))).reset_index()

### Replace all Not Assigned Neighbourhoods with the value of the Borough

In [33]:
toronto_df[toronto_df.Neighbourhood == 'Not assigned']

Unnamed: 0,Postcode,Borough,Neighbourhood


In [34]:
toronto_df.loc[8, "Neighbourhood"] = toronto_df.loc[8, "Borough"]
toronto_df[toronto_df.Neighbourhood == 'Not assigned']

Unnamed: 0,Postcode,Borough,Neighbourhood


### In this first notebook I have grabed the html table using pandas read_html() and made various transformation to the DataFrame. 

In [35]:
# Lets Look at the resultant Shape of the Data
toronto_df.shape

(103, 3)

## Joining the Geospatial Data with the Dataframe to Obtain the Latitude and Longitudes

In [24]:
# geospatial_df = pd.read_csv('Geospatial_Coordinates.csv')
import types
import pandas as pd
from ibm_botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share your notebook.
client_d6d706e7d1cd4cee888d0fb73e235a18 = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='f2H2YVHLxULwDGzQl-JI0Dd9Xjwa2K-A6u6m3ouu37K7',
    ibm_auth_endpoint="https://iam.bluemix.net/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3-api.us-geo.objectstorage.service.networklayer.com')

body = client_d6d706e7d1cd4cee888d0fb73e235a18.get_object(Bucket='courseracapstone-donotdelete-pr-fyz0vujr4b5v6c',Key='Geospatial_Coordinates.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_data_1 = pd.read_csv(body)
df_data_1.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [28]:
geospatial_data = df_data_1
geospatial_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [30]:
geospatial_data.shape

(103, 3)

In [36]:
toronto_df.shape

(103, 3)

In [41]:
toronto_df_lat = pd.merge(toronto_df, geospatial_data, left_index=True, right_index=True, how='outer')
toronto_df_lat

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"West Hill, Morningside, Guildwood",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,M1J,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, East Birchmount Park, Ionview",M1K,43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",M1L,43.711112,-79.284577
8,M1M,Scarborough,Scarborough,M1M,43.716316,-79.239476
9,M1N,Scarborough,"Cliffside West, Birch Cliff",M1N,43.692657,-79.264848


In [46]:
# toronto_df_lat.drop(columns=['Postal Code'], inplace=True)
toronto_df_lat.head(13)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"West Hill, Morningside, Guildwood",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, East Birchmount Park, Ionview",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,Scarborough,43.716316,-79.239476
9,M1N,Scarborough,"Cliffside West, Birch Cliff",43.692657,-79.264848
