Create dataframe of Toronto Neighborhoods

In [1]:
#Import needed dependencies
import pandas as pd
import requests
import csv
import numpy as np

In [2]:
#Read table from Wikipedia page into pandas list
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df=pd.read_html(url)

In [3]:
#create dataframe of neighborhood data
toronto_data=df[0]
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [4]:
#remove empty rows from dataset
toronto_data.dropna(axis=0, how='any',thresh=None, subset=None, inplace=True)
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
toronto_data.shape

(103, 3)

Add latitude and longitude values to the Toronto Dataframe

In [6]:
#Import CSV file with Latitude and Longitude for Toronto 
file_name='https://cocl.us/Geospatial_data'
toronto_latlng=pd.read_csv(file_name)
toronto_latlng.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [8]:
#Add latitude and Longitude to Toronto Dataframe by merging two dataframes where the Postal Code matches
toronto_merge = pd.merge(toronto_data, toronto_latlng, on="Postal Code", how="left")
toronto_merge.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


Explore and cluster the neighborhoods in Toronto

In [9]:
#Import dependencies
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!pip install folium # map rendering library
import folium

print('Libraries imported.')

Libraries imported.


In [10]:
#Define Foursquare credentials and version
CLIENT_ID = 'WI3VANFJXROYGCKHE5JDVDB3GPY4EBTF24IMDW1RBBJSRBSQ' # your Foursquare ID
CLIENT_SECRET = 'KJEHAZKT1JLOSJJML1II0EMAIDVZMFELUVWPHJWOL4FGBV0L' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


Your credentails:
CLIENT_ID: WI3VANFJXROYGCKHE5JDVDB3GPY4EBTF24IMDW1RBBJSRBSQ
CLIENT_SECRET:KJEHAZKT1JLOSJJML1II0EMAIDVZMFELUVWPHJWOL4FGBV0L


In [11]:
#Use geopy library to get the latitude and longitude values of Toronto
address = 'Toronto, Canada'
geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))


The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [16]:
#Create a map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_merge['Latitude'], toronto_merge['Longitude'], toronto_merge['Borough'], toronto_merge['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
  
map_toronto


In [17]:
#Cluster the neighborhoods in Downtown Toronto
dwtn_Tor = toronto_merge[toronto_merge['Borough']=='Downtown Toronto'].reset_index(drop=True)
dwtn_Tor.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [18]:
#Explore the Berczy Park neighborhood
neighborhood_latitude = dwtn_Tor.loc[4,'Latitude']
neighborhood_longitude = dwtn_Tor.loc[4,'Longitude']

neighborhood_name = dwtn_Tor.loc[4,'Neighborhood']

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))


Latitude and longitude values of Berczy Park are 43.644770799999996, -79.3733064.


In [20]:
#Explore the top 20 venues that are in Berczy Park within a radius of 250 meters
LIMIT = 20
radius = 250
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)

results = requests.get(url).json()
results


{'meta': {'code': 200, 'requestId': '5eb70605ed78b8001bc87b27'},
 'response': {'headerLocation': 'Bay Street Corridor',
  'headerFullLocation': 'Bay Street Corridor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 8,
  'suggestedBounds': {'ne': {'lat': 43.65573170225, 'lng': -79.38083074029716},
   'sw': {'lat': 43.65123169775, 'lng': -79.38703865970284}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5227bb01498e17bf485e6202',
       'name': 'Downtown Toronto',
       'location': {'lat': 43.65323167517444,
        'lng': -79.38529600606677,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.65323167517444,
          'lng': -79.38529600606677}],
        'distance': 113,
        'cc': 'CA',
        'city': 'Toronto',
        '

In [21]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']


In [23]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))


8 venues were returned by Foursquare.


In [25]:
nearby_venues.head(8)

Unnamed: 0,name,categories,lat,lng
0,Downtown Toronto,Neighborhood,43.653232,-79.385296
1,Nathan Phillips Square,Plaza,43.65227,-79.383516
2,Poke Guys,Poke Place,43.654895,-79.385052
3,Textile Museum of Canada,Art Museum,43.654396,-79.3865
4,Chatime 日出茶太,Bubble Tea Shop,43.655542,-79.384684
5,Japango,Sushi Restaurant,43.655268,-79.385165
6,DoubleTree by Hilton,Hotel,43.654608,-79.385942
7,Old City Hall,Monument / Landmark,43.652009,-79.381744


There are 8 nearby venues in downtown Toronto, they are all varied.