### Importing Libraries

In [8]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### Loading our Dataset
Luckily for us, the data on Toronto is free for public use here: https://open.toronto.ca/dataset/neighbourhoods/

In [17]:
with open('C:/Users/User/Neighbourhoods.geojson') as json_data:
    toronto_data = json.load(json_data)
toronto_data

{'type': 'FeatureCollection',
 'crs': {'type': 'name',
  'properties': {'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}},
 'features': [{'type': 'Feature',
   'properties': {'_id': 5461,
    'AREA_ID': 25886861,
    'AREA_ATTR_ID': 25926662,
    'PARENT_AREA_ID': 49885,
    'AREA_SHORT_CODE': 94,
    'AREA_LONG_CODE': 94,
    'AREA_NAME': 'Wychwood (94)',
    'AREA_DESC': 'Wychwood (94)',
    'X': None,
    'Y': None,
    'LONGITUDE': -79.425514947,
    'LATITUDE': 43.6769192679,
    'OBJECTID': 16491505,
    'Shape__Area': 3217959.609375,
    'Shape__Length': 7515.779658331329},
   'geometry': {'type': 'Polygon',
    'coordinates': [[[-79.4359157087306, 43.6801533947749],
      [-79.4349150633973, 43.6803688699489],
      [-79.4339472722385, 43.6805785044903],
      [-79.433881624222, 43.6805899612147],
      [-79.4328154497888, 43.6808080444588],
      [-79.4326971769691, 43.6807965882232],
      [-79.4325306465987, 43.68082785962871],
      [-79.4324594444965, 43.680858827103606],
      [-

In [24]:
neighbourhoods_data = toronto_data['features']

In [64]:
neighbourhoods_data[117]

{'type': 'Feature',
 'properties': {'_id': 5578,
  'AREA_ID': 25886825,
  'AREA_ATTR_ID': 25926779,
  'PARENT_AREA_ID': 49885,
  'AREA_SHORT_CODE': 102,
  'AREA_LONG_CODE': 102,
  'AREA_NAME': 'Forest Hill North (102)',
  'AREA_DESC': 'Forest Hill North (102)',
  'X': None,
  'Y': None,
  'LONGITUDE': -79.42814316479999,
  'LATITUDE': 43.7042175596,
  'OBJECTID': 16493377,
  'Shape__Area': 3004590.06640625,
  'Shape__Length': 7873.295355458609},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-79.4255619572768, 43.7009925329741],
    [-79.4266864626875, 43.7007588213682],
    [-79.4276123994154, 43.7005666040414],
    [-79.4277657588352, 43.7005270054049],
    [-79.4288309874726, 43.7002970967576],
    [-79.4299028501137, 43.7000633116904],
    [-79.430906417128, 43.6998516904669],
    [-79.4310392605244, 43.69982615303681],
    [-79.4318953322216, 43.6996444533229],
    [-79.4327846195209, 43.6994554576857],
    [-79.4328683131534, 43.69943471390431],
    [-79.4338265021995, 43.6

In [65]:
# define the useful dataframe columns
column_names = ['Area_Name','Latitude', 'Longitude'] 

# instantiate the dataframe
neighbourhoods = pd.DataFrame(columns=column_names)

In [66]:
neighbourhoods

Unnamed: 0,Area_Name,Latitude,Longitude


In [67]:
for data in neighborhoods_data:
    Area_Name = data['properties']['AREA_NAME'] 
    neighbourhood_lat = data['properties']['LATITUDE']
    neighbourhood_lon = data['properties']['LONGITUDE']
    neighbourhoods = neighbourhoods.append({'Area_Name': Area_Name,
                                          'Latitude': neighbourhood_lat,
                                          'Longitude': neighbourhood_lon}, ignore_index=True)

In [68]:
neighbourhoods

Unnamed: 0,Area_Name,Latitude,Longitude
0,Wychwood (94),43.676919,-79.425515
1,Yonge-Eglinton (100),43.704689,-79.40359
2,Yonge-St.Clair (97),43.687859,-79.397871
3,York University Heights (27),43.765736,-79.488883
4,Yorkdale-Glen Park (31),43.714672,-79.457108
5,Lambton Baby Point (114),43.65742,-79.496045
6,Lansing-Westgate (38),43.754271,-79.424748
7,Lawrence Park North (105),43.73006,-79.403978
8,Lawrence Park South (103),43.717212,-79.406039
9,Leaside-Bennington (56),43.703797,-79.366072


In [69]:
print('The dataframe has {} neighborhoods.'.format(
        neighbourhoods.shape[0]
    )
)

The dataframe has 140 neighborhoods.


### Getting a map of Toronto

In [71]:
#From WIkipedia page on Toronto
Toronto_latitude = 43.741667
Toronto_longitude = -79.373333
print('The geograpical coordinates of Toronto are {}, {}.'.format(Toronto_latitude, Toronto_longitude))

The geograpical coordinates of Toronto are 43.741667, -79.373333.


In [73]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[Toronto_latitude, Toronto_longitude], zoom_start=10)

# add markers to map
for lat, lng,neighbourhood in zip(neighbourhoods['Latitude'], neighbourhoods['Longitude'], neighbourhoods['Area_Name']):
    label = neighbourhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Exploring Toronto in Detail

Now, we will use the FourSquare API to explore the venues around Toronto!

In [76]:
CLIENT_ID = 'EK1REWEV5DX3ZQYQS2WIE52WGLN5JPLXUDQN0CGWWFM2OMPO' # your Foursquare ID
CLIENT_SECRET = 'KX5UIJSX42SW4DSBXWSEYZKFOC0T4WCGOJUPIBDY01SMFDR0' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: EK1REWEV5DX3ZQYQS2WIE52WGLN5JPLXUDQN0CGWWFM2OMPO
CLIENT_SECRET:KX5UIJSX42SW4DSBXWSEYZKFOC0T4WCGOJUPIBDY01SMFDR0


Let's explore the first neighborhood in our dataframe.

In [77]:
neighbourhoods.head()

Unnamed: 0,Area_Name,Latitude,Longitude
0,Wychwood (94),43.676919,-79.425515
1,Yonge-Eglinton (100),43.704689,-79.40359
2,Yonge-St.Clair (97),43.687859,-79.397871
3,York University Heights (27),43.765736,-79.488883
4,Yorkdale-Glen Park (31),43.714672,-79.457108


Get Wynchwood's name.

In [79]:
neighbourhoods.loc[0, 'Area_Name']

'Wychwood (94)'

Get Wynchwood's latitude and longitude values.

In [82]:
neighbourhood_lat = neighbourhoods.loc[0, 'Latitude'] # neighborhood latitude value
neighbourhood_lon = neighbourhoods.loc[0, 'Longitude'] # neighborhood longitude value

neighbourhood_name = neighbourhoods.loc[0, 'Area_Name'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_lat, 
                                                               neighbourhood_lon))

Latitude and longitude values of Wychwood (94) are 43.6769192679, -79.425514947.


Now, let's get the top 50 venues that are in Wynchwood within a radius of 400 meters.

In [83]:
radius = 400
limit = 100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_lat, 
    neighbourhood_lon, 
    radius, 
    limit)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=EK1REWEV5DX3ZQYQS2WIE52WGLN5JPLXUDQN0CGWWFM2OMPO&client_secret=KX5UIJSX42SW4DSBXWSEYZKFOC0T4WCGOJUPIBDY01SMFDR0&v=20180605&ll=43.6769192679,-79.425514947&radius=400&limit=100'

In [84]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5edb1b116001fe001b620fbf'},
 'response': {'headerLocation': 'Bracondale Hill',
  'headerFullLocation': 'Bracondale Hill, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 7,
  'suggestedBounds': {'ne': {'lat': 43.6805192715, 'lng': -79.42054667207172},
   'sw': {'lat': 43.67331926429999, 'lng': -79.43048322192827}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b86e89df964a52051a531e3',
       'name': "Wychwood Barns Farmers' Market",
       'location': {'address': '601 Christie Street',
        'crossStreet': 'St Clair Avenue West',
        'lat': 43.68001040153905,
        'lng': -79.42384857341463,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.68001040153905,
          'lng': -79.42

In [85]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [86]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,Wychwood Barns Farmers' Market,Farmers Market,43.68001,-79.423849
1,Hillcrest Park,Park,43.676012,-79.424787
2,Wychwood Barns,Event Space,43.680028,-79.42381
3,Annabelle Pasta Bar,Italian Restaurant,43.675445,-79.423341
4,Bob Coffee Bar,Coffee Shop,43.675376,-79.423268


In [87]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

7 venues were returned by Foursquare.


In [88]:
nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Wychwood Barns Farmers' Market,Farmers Market,43.68001,-79.423849
1,Hillcrest Park,Park,43.676012,-79.424787
2,Wychwood Barns,Event Space,43.680028,-79.42381
3,Annabelle Pasta Bar,Italian Restaurant,43.675445,-79.423341
4,Bob Coffee Bar,Coffee Shop,43.675376,-79.423268
5,Wychwood Barns Community Gallery,Art Gallery,43.679386,-79.424254
6,Marian Engel Park,Park,43.673754,-79.423988


##### Ooh Wynchwood looks like a very pleasant place to visit!

### K-means clustering of neighbourhoods by location
##### How will we split Toronto into 5 sections?

In [90]:
neighbourhoods.drop('Area_Name',axis=1,inplace=True)
# set number of clusters
kclusters = 5
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(neighbourhoods)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 3, 3, 0, 3, 4, 0, 0, 3, 1])

In [92]:
# add clustering labels
neighbourhoods.insert(0, 'Cluster Labels', kmeans.labels_)

ValueError: cannot insert Cluster Labels, already exists

In [94]:
# create map
map_clusters = folium.Map(location=[Toronto_latitude, Toronto_longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, cluster in zip(neighbourhoods['Latitude'], neighbourhoods['Longitude'], neighbourhoods['Cluster Labels']):
    label = folium.Popup( ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters