Part 1 - Retrieving data on Toronto Neighborhoodds from Wikipedia and cleaning the data

In [1]:
#Import python libraries
import requests
import pandas as pd
import numpy as np

In [10]:
#Read in the Wikipedia page to be scraped
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]

In [11]:
#Display all contents
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [12]:
#Drop all rows with 'Not assigned' in the Borough column
df.drop(df[df['Borough'] == "Not assigned"].index, inplace=True)

In [13]:
#Check if there are Neighborhoods with value 'Not assigned'
df[df['Neighborhood'].str.match('Not assigned')]

Unnamed: 0,Postal Code,Borough,Neighborhood


In [14]:
#List columns
df.sort_values(by='Postal Code', axis=0, ascending=True, inplace=True)

In [15]:
#Reset index
df = df.reset_index(drop=True)

In [16]:
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [17]:
#Display no. of rows and columns
df.shape

(103, 3)

Part 2 - Retrieving Latitude and Longitude information for Toronto Neighborhoods

In [18]:
tor_postcode_info = pd.read_csv('http://cocl.us/Geospatial_data')
tor_postcode_info

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [19]:
df_lat_long = pd.merge(df, tor_postcode_info, how='left', left_on='Postal Code', right_on='Postal Code')

In [20]:
df_lat_long

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


Part 3 - Explore and Cluster Toronto Neighborhoods

* Objective is to search for places of interest in Westmount in the borough of Etobicoke [M9P]

In [21]:
CLIENT_ID = 'E3C4CQTCLLZMXFZ4TFIZCVGU3C410AMUELWCXQXIHZG2UWKO'
CLIENT_SECRET = 'UV42ONRLWSGQFYXWREGWKNRUXFQG4W2CGTASTJZKGGSJ54QJ'
VERSION = '20180604'
LIMIT = 30
radius = 800

In [22]:
westmount_lat_long = df_lat_long.loc[df_lat_long['Postal Code'] == 'M9P', ['Latitude', 'Longitude']]
latitude = westmount_lat_long['Latitude'].values[0]
longitude = westmount_lat_long['Longitude'].values[0]

In [23]:
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=E3C4CQTCLLZMXFZ4TFIZCVGU3C410AMUELWCXQXIHZG2UWKO&client_secret=UV42ONRLWSGQFYXWREGWKNRUXFQG4W2CGTASTJZKGGSJ54QJ&ll=43.696319,-79.53224240000002&v=20180604&radius=800&limit=30'

In [24]:
westmount_results = requests.get(url).json()

In [25]:
items = westmount_results['response']['groups'][0]['items']

In [26]:
dataframe = pd.json_normalize(items) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories'] + [col for col in dataframe.columns if col.startswith('venue.location.')] + ['venue.id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean columns
dataframe_filtered.columns = [col.split('.')[-1] for col in dataframe_filtered.columns]

dataframe_filtered.head(10)

Unnamed: 0,name,categories,address,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,crossStreet,id
0,Mayflower Chinese Food,Chinese Restaurant,1500 Royal York Rd,43.692753,-79.531566,"[{'label': 'display', 'lat': 43.69275315666511...",400,M9P 3B6,CA,Toronto,ON,Canada,"[1500 Royal York Rd, Toronto ON M9P 3B6, Canada]",,4c0bf5f5ffb8c9b6010b6e61
1,Weston Golf And Country Club,Golf Course,50 St Phillips Rd,43.701685,-79.533228,"[{'label': 'display', 'lat': 43.701685, 'lng':...",602,M9P 2N6,CA,Etobicoke,ON,Canada,"[50 St Phillips Rd (at Weston Rd), Etobicoke O...",at Weston Rd,4ade06a6f964a520796721e3
2,Pizza Hut,Pizza Place,1564 Royal York Rd,43.696431,-79.533233,"[{'label': 'display', 'lat': 43.69643127748791...",80,M9P 3C3,CA,Etobicoke,ON,Canada,"[1564 Royal York Rd, Etobicoke ON M9P 3C3, Can...",,4da8bc62fa8cc76497075931
3,Starbucks,Coffee Shop,1564 Royal York Road,43.696338,-79.533398,"[{'label': 'display', 'lat': 43.696338, 'lng':...",93,M9P 3C3,CA,Toronto,ON,Canada,[1564 Royal York Road (Royal York/The Westway)...,Royal York/The Westway,54089623498e19198cf69267
4,Subway,Sandwich Place,1500 Royal York Rd. Unit 35,43.692927,-79.531471,"[{'label': 'display', 'lat': 43.69292687319966...",382,M9P 3B6,CA,Toronto,ON,Canada,"[1500 Royal York Rd. Unit 35 (at The Westway),...",at The Westway,4b9b0499f964a52020ec35e3
5,Petro-Canada,Gas Station,,43.690165,-79.530166,"[{'label': 'display', 'lat': 43.690165, 'lng':...",705,,CA,,,Canada,[Canada],,4bc948ad3740b713b19b5e65
6,Metro,Supermarket,1500 Royal York Rd,43.691248,-79.53101,"[{'label': 'display', 'lat': 43.69124813823024...",573,M9P 3B6,CA,Etobicoke,ON,Canada,"[1500 Royal York Rd (Trehorne), Etobicoke ON M...",Trehorne,4e0b84a6fa76f9d12b92312c
7,Pizza Nova,Pizza Place,1500 Royal York Road,43.692817,-79.53157,"[{'label': 'display', 'lat': 43.69281687469844...",393,M9P 3B6,CA,Toronto,ON,Canada,"[1500 Royal York Road, Toronto ON M9P 3B6, Can...",,4c8c196bf87e224b56e93105
8,2 Bros Cuisine,Middle Eastern Restaurant,1500 Royal York Rd,43.692499,-79.531698,"[{'label': 'display', 'lat': 43.69249918005648...",427,M9P 3B6,CA,Toronto,ON,Canada,"[1500 Royal York Rd, Toronto ON M9P 3B6, Canada]",,52f9326e11d2c6755f57f596
9,Dixon & Royal York,Intersection,Dixon Rd.,43.700013,-79.534408,"[{'label': 'display', 'lat': 43.70001307201072...",446,,CA,Toronto,ON,Canada,"[Dixon Rd. (Royal York Rd.), Toronto ON, Canada]",Royal York Rd.,4f7c3852e4b0d04bd2a9feb7


In [27]:
import folium

venues_map = folium.Map(location=[latitude, longitude], zoom_start=15) # generate map centred around Ecco

# add popular spots to the map as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        fill=True,
        color='blue',
        fill_color='blue',
        fill_opacity=0.6
        ).add_to(venues_map)

# display map
venues_map