# APPLIED DATA SCIENCE CAPSTONE PROJECT
#### This notebook will be used for the IBM Applied Data Science Capstone Project course

### WEEK 3 - Segmenting and Clustering Neighborhoods in Toronto

In [62]:
import pandas as pd
import numpy as np

#### QUESTION 1 - Create Toronto neighborood pandas Dataframe by scraping Wikipedia page

First of all, let's scrape the html Wikipedia page ad save it into a pandas dataframe

In [63]:
# Read the html table using pandas read_html
wiki_tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

# There are only 2 tables in the page: we need the first one
wiki_table_df = wiki_tables[0]
wiki_table_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


Now let's process the dataframe as required by the assignment

In [64]:
# First, delete the rows with 'Not assigned' Neightborohood
neigh_df = wiki_table_df[wiki_table_df['Borough'] != 'Not assigned'].reset_index(drop=True)
print('Toronto Boroughts:',neigh_df['Borough'].unique())

Toronto Boroughts: ['North York' 'Downtown Toronto' 'Etobicoke' 'Scarborough' 'East York'
 'York' 'East Toronto' 'West Toronto' 'Central Toronto' 'Mississauga']


In [65]:
# Check if 'Not assigned' neighboroods exist
not_ass_neigh_df = neigh_df[neigh_df['Neighbourhood'] == 'Not assigned']
not_ass_neigh_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood


In [66]:
# Display the obtained dataframe with its shape
neigh_df.columns = ['PostalCode', 'Borough', 'Neighborhood']
neigh_df.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [67]:
print('Shape of the neighborhood dataframe:',neigh_df.shape)

Shape of the neighborhood dataframe: (103, 3)


#### QUESTION 2 - Retrieve latitudes and longitudes of each neighborhood

In [68]:
# Use csv file to retrieve the coordinates
coord_df = pd.read_csv('http://cocl.us/Geospatial_data')
coord_df.columns = ['PostalCode', 'Latitude', 'Longitude']
coord_df.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [53]:
# Check if the shape of the dataset matches with the neighborhood dataframe
print('Shape of the coordinates dataframe:',coord_df.shape)

Shape of the coordinates dataframe: (103, 3)


In [69]:
# Join the neighborhood dataset with the coordinates dataset
neigh_coord_df = neigh_df.join(coord_df.set_index('PostalCode'), on='PostalCode')
neigh_coord_df.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [70]:
print('Shape of the merged dataframe:',neigh_coord_df.shape)

Shape of the merged dataframe: (103, 5)


#### QUESTION 3 - Cluster Toronto neighborood

In [71]:
#!pip install folium
import folium
import requests

First create the Toronto map with all the neighborhoods

In [86]:
# Create Toronto map with folium
toronto_map = folium.Map(location=[43.7, -79.347015], zoom_start=11.5)

# Add all the neighborhoods to the map
for lat, lng, borough, neighborhood in zip(neigh_coord_df['Latitude'], neigh_coord_df['Longitude'], neigh_coord_df['Borough'], neigh_coord_df['Neighborhood']):
    label = '{} - {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map)  
    
toronto_map    

Let's explore the East York neighborhood

In [87]:
{
    "tags": [
        "hide-cell",
    ]
}
CLIENT_ID = 'WU0OFAD5X0E4SKHNZHFF51VGBFLB1D4ZFQYVNRYBTVL4Z3HE' # Foursquare ID
CLIENT_SECRET = 'NPAWDVFOVL5XZHSU5ISMB1PXTKGDCN4N1Q0WWPOTOKLGHILY' # Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value