Importing all libraries I will need for this project

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge
    ca-certificates-2019.9.11  |       hecc5488_0         144 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1.20.0-py_0       conda-forge

The following packages will be UPDATED:

    cer

#### Creating a dataframe to work with

In [13]:
# Reading csv file with Toronto coordinates
toronto_lonlat=pd.read_csv("https://cocl.us/Geospatial_data")

# Renaming column Postal Code to Postcode to match other table
toronto_lonlat.rename(columns={'Postal Code': 'Postcode'}, inplace=True)

# Reading Wikipedia file to dataframe
toronto_df=pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")[0]

# Dropping rows where Borough and Neighbourhood are not assigned
drop_values = toronto_df[ (toronto_df['Borough'] == 'Not assigned') & (toronto_df['Neighbourhood'] == 'Not assigned') ].index
toronto_df.drop(drop_values , inplace=True)

# Merging rows with the same Postcode
merged_toronto=toronto_df.groupby(['Postcode','Borough'],as_index=False, sort=False).agg(','.join)
merged_toronto['Neighbourhood']=set(merged_toronto['Neighbourhood'])

# Copying Borough names to Neighbourhoods which are not assigned
merged_toronto['Neighbourhood']=merged_toronto['Neighbourhood'].replace('Not assigned', merged_toronto['Borough'])

# Creating new merged table with 5 colums
new_toronto = pd.merge(merged_toronto, toronto_lonlat)
new_toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,"Birch Cliff,Cliffside West",43.753259,-79.329656
1,M4A,North York,Scarborough Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Weston,43.65426,-79.360636
3,M6A,North York,Rosedale,43.718518,-79.464763
4,M7A,Queen's Park,Woodbine Heights,43.662301,-79.389494


Filtering dataframe by condition that Borough contains word Toronto

In [47]:
filtered = new_toronto.Borough.str.contains('Toronto')
toronto_by_borough=new_toronto[filtered]
toronto_by_borough.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,Weston,43.65426,-79.360636
9,M5B,Downtown Toronto,"Bathurst Manor,Downsview North,Wilson Heights",43.657162,-79.378937
15,M5C,Downtown Toronto,"The Kingsway,Montgomery Road,Old Mill North",43.651494,-79.375418
19,M4E,East Toronto,"Ryerson,Garden District",43.676357,-79.293031
20,M5E,Downtown Toronto,Leaside,43.644771,-79.373306


In [43]:
toronto_by_borough.shape

(38, 5)

Documenting credentials for Foursquare API

In [19]:
CLIENT_ID = 'FTDDUPPDRRYADLDFWIYVSI0WLNTMC4Y0RT2ILL1E4EVGKWGD' # your Foursquare ID
CLIENT_SECRET = 'L0FLQBN3OOK1O02NLOGLORIAZ35JBRGMV1CHCGTCXBY4F2NI' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FTDDUPPDRRYADLDFWIYVSI0WLNTMC4Y0RT2ILL1E4EVGKWGD
CLIENT_SECRET:L0FLQBN3OOK1O02NLOGLORIAZ35JBRGMV1CHCGTCXBY4F2NI


Reseting index in the dataframe

In [60]:
toronto_by_borough = toronto_by_borough.reset_index(drop=True)
toronto_by_borough.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Weston,43.65426,-79.360636
1,M5B,Downtown Toronto,"Bathurst Manor,Downsview North,Wilson Heights",43.657162,-79.378937
2,M5C,Downtown Toronto,"The Kingsway,Montgomery Road,Old Mill North",43.651494,-79.375418
3,M4E,East Toronto,"Ryerson,Garden District",43.676357,-79.293031
4,M5E,Downtown Toronto,Leaside,43.644771,-79.373306


#### Exploaring first Borough in the dataframe

In [49]:
toronto_by_borough.loc[0, 'Borough']

'Downtown Toronto'

In [50]:
borough_latitude = toronto_by_borough.loc[0, 'Latitude'] # neighborhood latitude value
borough_longitude = toronto_by_borough.loc[0, 'Longitude'] # neighborhood longitude value

borough_name = toronto_by_borough.loc[0, 'Borough'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(borough_name, 
                                                               borough_latitude, 
                                                               borough_longitude))

Latitude and longitude values of Downtown Toronto are 43.6542599, -79.3606359.


Creating the GET request URL

In [51]:
LIMIT = 20 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    borough_latitude, 
    borough_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=FTDDUPPDRRYADLDFWIYVSI0WLNTMC4Y0RT2ILL1E4EVGKWGD&client_secret=L0FLQBN3OOK1O02NLOGLORIAZ35JBRGMV1CHCGTCXBY4F2NI&v=20180605&ll=43.6542599,-79.3606359&radius=500&limit=20'

Sending the GET request and examining results

In [52]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5dc56e6266fc653db794c922'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 47,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
 

Borrowing the **get_category_type** function from the Foursquare lab

In [53]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Cleaning the json and structuring it into a *pandas* dataframe

In [54]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues)

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Gym / Fitness Center,43.653191,-79.357947
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149


In [55]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

20 venues were returned by Foursquare.


#### Creating a function to repeat the same process for other boroughs

In [65]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Using the above function to each borough and creating a new dataframe called *toronto_venues*.

In [66]:
toronto_venues = getNearbyVenues(names=toronto_by_borough['Borough'],
                                   latitudes=toronto_by_borough['Latitude'],
                                   longitudes=toronto_by_borough['Longitude']
                                  )

Downtown Toronto
Downtown Toronto
Downtown Toronto
East Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
West Toronto
Downtown Toronto
West Toronto
East Toronto
Downtown Toronto
West Toronto
East Toronto
Downtown Toronto
East Toronto
Central Toronto
Central Toronto
Central Toronto
Central Toronto
West Toronto
Central Toronto
Central Toronto
West Toronto
Central Toronto
Downtown Toronto
West Toronto
Central Toronto
Downtown Toronto
Central Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
East Toronto


In [67]:
print(toronto_venues.shape)
toronto_venues.head()

(622, 7)


Unnamed: 0,Borough,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Downtown Toronto,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Downtown Toronto,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Downtown Toronto,43.65426,-79.360636,Cooper Koo Family YMCA,43.653191,-79.357947,Gym / Fitness Center
3,Downtown Toronto,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Downtown Toronto,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


Checking how many venues there are for each borough

In [69]:
toronto_venues.groupby('Borough').count()

Unnamed: 0_level_0,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Central Toronto,95,95,95,95,95,95
Downtown Toronto,336,336,336,336,336,336
East Toronto,79,79,79,79,79,79
West Toronto,112,112,112,112,112,112


Unique categories for venues

In [70]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 163 uniques categories.


#### Analyzing boroughs

In [71]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Borough'] = toronto_venues['Borough'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Borough,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Beer Bar,Belgian Restaurant,Bistro,Boat or Ferry,Bookstore,Boutique,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Butcher,Café,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Dance Studio,Deli / Bodega,Dessert Shop,Diner,Discount Store,Dog Run,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gastropub,Gay Bar,General Entertainment,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health Food Store,Historic Site,History Museum,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Market,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Nightclub,Opera House,Organic Grocery,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plane,Playground,Plaza,Pub,Ramen Restaurant,Record Shop,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,Downtown Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Downtown Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Downtown Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Downtown Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Downtown Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [72]:
toronto_onehot.shape

(622, 164)

Grouping rows by boroughs and by taking the mean of the frequency of occurrence of each category

In [73]:
toronto_grouped = toronto_onehot.groupby('Borough').mean().reset_index()
toronto_grouped

Unnamed: 0,Borough,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Beer Bar,Belgian Restaurant,Bistro,Boat or Ferry,Bookstore,Boutique,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Butcher,Café,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Dance Studio,Deli / Bodega,Dessert Shop,Diner,Discount Store,Dog Run,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gastropub,Gay Bar,General Entertainment,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health Food Store,Historic Site,History Museum,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Market,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Nightclub,Opera House,Organic Grocery,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plane,Playground,Plaza,Pub,Ramen Restaurant,Record Shop,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,Central Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.021053,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.010526,0.0,0.021053,0.0,0.010526,0.0,0.052632,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.031579,0.0,0.073684,0.0,0.0,0.0,0.0,0.0,0.0,0.021053,0.0,0.0,0.010526,0.0,0.042105,0.021053,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.031579,0.0,0.0,0.0,0.0,0.010526,0.010526,0.010526,0.010526,0.021053,0.0,0.0,0.031579,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.010526,0.021053,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.063158,0.0,0.0,0.010526,0.031579,0.0,0.010526,0.0,0.031579,0.0,0.0,0.031579,0.0,0.010526,0.052632,0.0,0.010526,0.0,0.0,0.0,0.0,0.010526,0.0,0.021053,0.010526,0.0,0.0,0.0,0.010526,0.042105,0.010526,0.0,0.0,0.0,0.0,0.010526,0.010526,0.0,0.0,0.010526,0.010526,0.010526,0.010526,0.0,0.010526
1,Downtown Toronto,0.002976,0.002976,0.002976,0.005952,0.005952,0.002976,0.014881,0.008929,0.002976,0.005952,0.002976,0.0,0.002976,0.002976,0.0,0.026786,0.0,0.011905,0.017857,0.002976,0.002976,0.002976,0.008929,0.002976,0.011905,0.0,0.011905,0.005952,0.002976,0.0,0.002976,0.077381,0.002976,0.008929,0.002976,0.002976,0.002976,0.002976,0.0,0.005952,0.011905,0.080357,0.002976,0.002976,0.002976,0.002976,0.008929,0.002976,0.002976,0.005952,0.0,0.002976,0.014881,0.011905,0.011905,0.0,0.0,0.0,0.002976,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.002976,0.005952,0.005952,0.008929,0.0,0.0,0.0,0.0,0.0,0.029762,0.0,0.002976,0.002976,0.0,0.002976,0.008929,0.011905,0.014881,0.002976,0.0,0.002976,0.0,0.0,0.020833,0.0,0.002976,0.0,0.0,0.020833,0.020833,0.002976,0.002976,0.002976,0.0,0.002976,0.0,0.0,0.002976,0.002976,0.002976,0.011905,0.002976,0.002976,0.002976,0.008929,0.002976,0.005952,0.002976,0.002976,0.002976,0.002976,0.035714,0.005952,0.002976,0.0,0.011905,0.002976,0.002976,0.011905,0.017857,0.008929,0.0,0.041667,0.008929,0.002976,0.005952,0.002976,0.014881,0.0,0.002976,0.0,0.0,0.005952,0.002976,0.002976,0.0,0.0,0.0,0.017857,0.002976,0.008929,0.0,0.002976,0.002976,0.002976,0.014881,0.0,0.008929,0.005952,0.002976,0.0,0.002976,0.008929,0.005952,0.002976,0.0
2,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.025316,0.0,0.0,0.0,0.0,0.0,0.0,0.025316,0.0,0.0,0.037975,0.012658,0.012658,0.025316,0.0,0.0,0.025316,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.063291,0.0,0.0,0.012658,0.012658,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.025316,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.012658,0.012658,0.0,0.012658,0.0,0.0,0.0,0.050633,0.0,0.012658,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.050633,0.0,0.0,0.012658,0.050633,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.012658,0.012658,0.0,0.0,0.0,0.012658,0.0,0.012658,0.0,0.0,0.025316,0.0,0.0,0.0,0.0,0.025316,0.0,0.012658,0.0,0.037975,0.0,0.0,0.0,0.037975,0.0,0.0,0.025316,0.0,0.0,0.025316,0.0,0.012658,0.012658,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.012658
3,West Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.008929,0.008929,0.0,0.0,0.0,0.0,0.0,0.026786,0.017857,0.0625,0.0,0.0,0.0,0.0,0.026786,0.0,0.026786,0.017857,0.0,0.0,0.017857,0.0,0.0,0.0625,0.0,0.008929,0.0,0.0,0.0,0.0,0.008929,0.0,0.008929,0.044643,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.008929,0.008929,0.008929,0.008929,0.008929,0.0,0.008929,0.0,0.008929,0.008929,0.0,0.008929,0.008929,0.0,0.0,0.0,0.0,0.017857,0.008929,0.0,0.017857,0.0,0.0,0.008929,0.0,0.0,0.017857,0.008929,0.008929,0.017857,0.008929,0.008929,0.0,0.0,0.0,0.0,0.0,0.0,0.008929,0.0,0.008929,0.0,0.044643,0.008929,0.0,0.0,0.0,0.008929,0.0,0.008929,0.0,0.0,0.0,0.0,0.008929,0.008929,0.0,0.008929,0.0,0.017857,0.0,0.008929,0.0,0.0,0.0,0.017857,0.008929,0.008929,0.017857,0.026786,0.0,0.0,0.0,0.008929,0.0,0.008929,0.017857,0.0,0.0,0.008929,0.0,0.0,0.0,0.0,0.0,0.008929,0.0,0.008929,0.0,0.0,0.008929,0.0,0.0,0.017857,0.017857,0.0,0.0,0.0,0.0,0.008929,0.0,0.017857,0.008929,0.0,0.0,0.0,0.008929,0.008929,0.008929,0.008929


In [74]:
toronto_grouped.shape

(4, 164)

#### Each borough with the top 5 most common venues

In [87]:
num_top_venues = 5

for hood in toronto_grouped['Borough']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Borough'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central Toronto----
            venue  freq
0     Coffee Shop  0.07
1            Park  0.06
2            Café  0.05
3  Sandwich Place  0.05
4    Dessert Shop  0.04


----Downtown Toronto----
         venue  freq
0         Café  0.08
1  Coffee Shop  0.08
2         Park  0.04
3   Restaurant  0.04
4    Gastropub  0.03


----East Toronto----
                venue  freq
0         Coffee Shop  0.06
1    Greek Restaurant  0.05
2      Ice Cream Shop  0.05
3  Italian Restaurant  0.05
4                 Pub  0.04


----West Toronto----
                venue  freq
0                Café  0.06
1                 Bar  0.06
2  Italian Restaurant  0.04
3         Coffee Shop  0.04
4              Bakery  0.03




Writing a function to sort the venues in descending order

In [88]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Creating the new dataframe and display the top 10 venues for each borough

In [110]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
borough_venues_sorted = pd.DataFrame(columns=columns)
borough_venues_sorted['Borough'] = toronto_grouped['Borough']

for ind in np.arange(toronto_grouped.shape[0]):
    borough_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

borough_venues_sorted.head()

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Coffee Shop,Park,Sandwich Place,Café,Sushi Restaurant,Dessert Shop,Italian Restaurant,Pizza Place,Restaurant,Gym
1,Downtown Toronto,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
2,East Toronto,Coffee Shop,Greek Restaurant,Italian Restaurant,Ice Cream Shop,Pub,Pizza Place,Brewery,Bookstore,Burrito Place,Café
3,West Toronto,Café,Bar,Coffee Shop,Italian Restaurant,Breakfast Spot,Bookstore,Pizza Place,Bakery,Bank,Thai Restaurant


### Clustering boroughs
Runing *k*-means to cluster the boroughs into 4 clusters

In [111]:
# set number of clusters
kclusters = 4

toronto_grouped_clustering = toronto_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 3, 2, 1], dtype=int32)

Creating a new dataframe that includes the cluster as well as the top 10 venues for each borough

In [112]:
# adding clustering labels
borough_venues_sorted.insert(0, 'Cluster labels', kmeans.labels_)

toronto_merged = toronto_by_borough

# merging toronto_grouped with toronto_data to add latitude/longitude for each borough
toronto_merged = toronto_merged.join(borough_venues_sorted.set_index('Borough'), on='Borough')

toronto_merged.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,Weston,43.65426,-79.360636,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
1,M5B,Downtown Toronto,"Bathurst Manor,Downsview North,Wilson Heights",43.657162,-79.378937,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
2,M5C,Downtown Toronto,"The Kingsway,Montgomery Road,Old Mill North",43.651494,-79.375418,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
3,M4E,East Toronto,"Ryerson,Garden District",43.676357,-79.293031,2,Coffee Shop,Greek Restaurant,Italian Restaurant,Ice Cream Shop,Pub,Pizza Place,Brewery,Bookstore,Burrito Place,Café
4,M5E,Downtown Toronto,Leaside,43.644771,-79.373306,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub


Getting Toronto latitude and longitude

In [113]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


Visualizing clusters

In [117]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Borough'], toronto_merged['Cluster labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Examining clusters
4 clusters match different boroughs, maybe I should check different number of clusters

In [118]:
toronto_merged.loc[toronto_merged['Cluster labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Central Toronto,0,Coffee Shop,Park,Sandwich Place,Café,Sushi Restaurant,Dessert Shop,Italian Restaurant,Pizza Place,Restaurant,Gym
18,Central Toronto,0,Coffee Shop,Park,Sandwich Place,Café,Sushi Restaurant,Dessert Shop,Italian Restaurant,Pizza Place,Restaurant,Gym
19,Central Toronto,0,Coffee Shop,Park,Sandwich Place,Café,Sushi Restaurant,Dessert Shop,Italian Restaurant,Pizza Place,Restaurant,Gym
20,Central Toronto,0,Coffee Shop,Park,Sandwich Place,Café,Sushi Restaurant,Dessert Shop,Italian Restaurant,Pizza Place,Restaurant,Gym
22,Central Toronto,0,Coffee Shop,Park,Sandwich Place,Café,Sushi Restaurant,Dessert Shop,Italian Restaurant,Pizza Place,Restaurant,Gym
23,Central Toronto,0,Coffee Shop,Park,Sandwich Place,Café,Sushi Restaurant,Dessert Shop,Italian Restaurant,Pizza Place,Restaurant,Gym
25,Central Toronto,0,Coffee Shop,Park,Sandwich Place,Café,Sushi Restaurant,Dessert Shop,Italian Restaurant,Pizza Place,Restaurant,Gym
28,Central Toronto,0,Coffee Shop,Park,Sandwich Place,Café,Sushi Restaurant,Dessert Shop,Italian Restaurant,Pizza Place,Restaurant,Gym
30,Central Toronto,0,Coffee Shop,Park,Sandwich Place,Café,Sushi Restaurant,Dessert Shop,Italian Restaurant,Pizza Place,Restaurant,Gym


In [119]:
toronto_merged.loc[toronto_merged['Cluster labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,West Toronto,1,Café,Bar,Coffee Shop,Italian Restaurant,Breakfast Spot,Bookstore,Pizza Place,Bakery,Bank,Thai Restaurant
10,West Toronto,1,Café,Bar,Coffee Shop,Italian Restaurant,Breakfast Spot,Bookstore,Pizza Place,Bakery,Bank,Thai Restaurant
13,West Toronto,1,Café,Bar,Coffee Shop,Italian Restaurant,Breakfast Spot,Bookstore,Pizza Place,Bakery,Bank,Thai Restaurant
21,West Toronto,1,Café,Bar,Coffee Shop,Italian Restaurant,Breakfast Spot,Bookstore,Pizza Place,Bakery,Bank,Thai Restaurant
24,West Toronto,1,Café,Bar,Coffee Shop,Italian Restaurant,Breakfast Spot,Bookstore,Pizza Place,Bakery,Bank,Thai Restaurant
27,West Toronto,1,Café,Bar,Coffee Shop,Italian Restaurant,Breakfast Spot,Bookstore,Pizza Place,Bakery,Bank,Thai Restaurant


In [120]:
toronto_merged.loc[toronto_merged['Cluster labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,East Toronto,2,Coffee Shop,Greek Restaurant,Italian Restaurant,Ice Cream Shop,Pub,Pizza Place,Brewery,Bookstore,Burrito Place,Café
11,East Toronto,2,Coffee Shop,Greek Restaurant,Italian Restaurant,Ice Cream Shop,Pub,Pizza Place,Brewery,Bookstore,Burrito Place,Café
14,East Toronto,2,Coffee Shop,Greek Restaurant,Italian Restaurant,Ice Cream Shop,Pub,Pizza Place,Brewery,Bookstore,Burrito Place,Café
16,East Toronto,2,Coffee Shop,Greek Restaurant,Italian Restaurant,Ice Cream Shop,Pub,Pizza Place,Brewery,Bookstore,Burrito Place,Café
37,East Toronto,2,Coffee Shop,Greek Restaurant,Italian Restaurant,Ice Cream Shop,Pub,Pizza Place,Brewery,Bookstore,Burrito Place,Café


In [121]:
toronto_merged.loc[toronto_merged['Cluster labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
1,Downtown Toronto,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
2,Downtown Toronto,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
4,Downtown Toronto,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
5,Downtown Toronto,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
6,Downtown Toronto,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
7,Downtown Toronto,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
9,Downtown Toronto,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
12,Downtown Toronto,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
15,Downtown Toronto,3,Coffee Shop,Café,Restaurant,Park,Gastropub,Bakery,Japanese Restaurant,Italian Restaurant,Hotel,Pub
