# Analyzing the neighborhoods in the city of Toronto

#### ********************************************************************************************************* Bhuvanesh Selvakumar *********************

##### .....................................................................................................................................................................................................................................

#### 1. Importing the necessary libraries for analysis

In [1]:
# Importing the necessary libraties for the project

import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import requests
import random
import json
from pandas.io.json import json_normalize
from sklearn import preprocessing
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors

#!conda install -c conda-forge geopy --yes
!pip install folium
!pip install geocoder

import folium




#### 2. Importing open-source data from Wikipedia on Toronto's neighboorhoods and demography

In [3]:
# Link to wikipedia neighborhoods
url_nei = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wiki_Toronto_nei = pd.read_html(url_nei)[:]
wiki_Toronto_nei = pd.DataFrame(wiki_Toronto_nei[0])
wiki_Toronto_nei.groupby(by='Borough', axis=0)

#Count the number of boroughs                               
wiki_Toronto_borough = pd.unique(wiki_Toronto_nei['Borough'])
wiki_Toronto_borough_count = wiki_Toronto_nei['Borough'].value_counts()
wiki_Toronto_borough_count

#Reorganize the dataframe and sort them in ascending order
wiki_Toronto_nei = wiki_Toronto_nei.sort_values(by=['Borough'], ascending = True).reset_index()
#wiki_Toronto_nei.reset_index(inplace=True)
wiki_Toronto_nei.drop(['index'], axis=1, inplace=True)
wiki_Toronto_nei.set_index
wiki_Toronto_nei.index.name = 'index'
#wiki_Toronto_nei.index = range(len(wiki_Toronto_nei['Borough']))

wiki_Toronto_nei
#wiki_Toronto_nei[wiki_Toronto_nei['Postal Code'] == 'M5A']



Unnamed: 0_level_0,Postal Code,Borough,Neighbourhood
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,M5R,Central Toronto,"The Annex, North Midtown, Yorkville"
1,M4N,Central Toronto,Lawrence Park
2,M5N,Central Toronto,Roselawn
3,M4P,Central Toronto,Davisville North
4,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest..."
...,...,...,...
175,M9N,York,Weston
176,M6E,York,Caledonia-Fairbanks
177,M6N,York,"Runnymede, The Junction North"
178,M6M,York,"Del Ray, Mount Dennis, Keelsdale and Silverthorn"


In [4]:
wiki_Toronto_nei.shape
print('There are {} rows and {} columns in the Toronto neighborhood dataframe'.format(wiki_Toronto_nei.shape[0],
                                                                                       wiki_Toronto_nei.shape[1]))

There are 180 rows and 3 columns in the Toronto neighborhood dataframe


In [5]:
#Getting Latitudes and Longtitudes from Postcodes and integrate it with the neighboorhood dataframe

#!pip install pgeocode
import pgeocode
Toronto_geocoder = pgeocode.Nominatim('ca')

Toronto_boroughs_LL = Toronto_geocoder.query_postal_code(i for i in wiki_Toronto_nei['Postal Code'])[['postal_code',
                                                                                                      'latitude',
                                                                                                      'longitude']]
Toronto_boroughs_LL
wiki_Toronto_nei[['latitude','longitude']] = Toronto_boroughs_LL[['latitude','longitude']]
wiki_Toronto_nei

Unnamed: 0_level_0,Postal Code,Borough,Neighbourhood,latitude,longitude
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.6736,-79.4035
1,M4N,Central Toronto,Lawrence Park,43.7301,-79.3935
2,M5N,Central Toronto,Roselawn,43.7113,-79.4195
3,M4P,Central Toronto,Davisville North,43.7135,-79.3887
4,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.6861,-79.4025
...,...,...,...,...,...
175,M9N,York,Weston,43.7068,-79.5170
176,M6E,York,Caledonia-Fairbanks,43.6889,-79.4507
177,M6N,York,"Runnymede, The Junction North",43.6748,-79.4839
178,M6M,York,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",43.6934,-79.4857


#### 3. Import Toronto's demography dataset from Wikipedia, extract and clean the data



In [6]:


def extract_ethnicity(row): 
    y = wiki_Toronto_dem['Ethnicity'][row]
    y = re.search('\(([^)]+)', y).group(1)
    y =  float(y.strip('%'))
    #y =  y.strip('%')

    return y


## Query data of Toronto demograohy from Wikipedia

#!pip install wikipedia #Uncomment this line after installing the first installation of Wikipedia library
import wikipedia as wp
import pandas as pd
import re

url_dem = "https://en.wikipedia.org/wiki/Demographics_of_Toronto_neighbourhoods"
wiki_Toronto_dem = pd.read_html(url_dem)[:]
wiki_Toronto_dem = pd.DataFrame(wiki_Toronto_dem[1])    
#wiki_Toronto_dem.rename(columns={"Second most common language (after English) by name" : "Ethnicity"}, inplace = True)
#wiki_Toronto_dem.groupby(by='Ethnicity', axis=0)

for i in range(2,6):
    x = pd.read_html(url_dem)[:]
    x = pd.DataFrame(x[i])
    wiki_Toronto_dem = wiki_Toronto_dem.append(x, ignore_index=False)

wiki_Toronto_dem.rename(columns={"Second most common language (after English) by name" : "Ethnicity"}, inplace = True)
wiki_Toronto_dem['Ethnicity']
wiki_Toronto_dem.reset_index(inplace=True)

#----------------------------------------------------------------------------------------------------------------------


# Creating a new column and remove unused columns

wiki_Toronto_dem['Ethnicity Percentage (%)'] = ''

for i in range(wiki_Toronto_dem.shape[0]):
    if set(['Map','Census Tracts','Second most common language (after English) by percentage']).issubset(wiki_Toronto_dem):
        wiki_Toronto_dem.drop(columns = ['Map','Census Tracts','Second most common language (after English) by percentage'], axis = 1, inplace=True)
    else:
        pass
    
wiki_Toronto_dem = wiki_Toronto_dem.dropna()

wiki_Toronto_dem.drop(['index'], axis=1, inplace=True)
wiki_Toronto_dem.set_index
wiki_Toronto_dem.index.name = 'index'
wiki_Toronto_dem.index = range(len(wiki_Toronto_dem['Ethnicity']))
wiki_Toronto_dem

#-----------------------------------------------------------------------------------------------------------------------


# Extract the ethnicty and save it in Ethnicity percentage columnn

out = []
for i in range(len(wiki_Toronto_dem['Ethnicity'])):
    out.append(extract_ethnicity(i))

wiki_Toronto_dem['Ethnicity Percentage (%)'] = out
wiki_Toronto_dem['Ethnicity_new'] = wiki_Toronto_dem['Ethnicity'].str.split('(').str[0]

for i in range(len(wiki_Toronto_dem['Ethnicity_new'])):
     wiki_Toronto_dem['Ethnicity_new'][i] = wiki_Toronto_dem['Ethnicity_new'][i].strip()
        
wiki_Toronto_dem
#wiki_Toronto_dem['Ethnicity_new'].value_counts()


#------------------------------------------------------------------------------------------------------------------------

#Create an new column listing the names of neighborhood concatenated with "Toronto" to make the address search easier

temp = wiki_Toronto_dem['Name'] + ', Toronto'
wiki_Toronto_dem.insert(1,'Name-ccat',temp)
wiki_Toronto_dem


#------------------------------------------------------------------------------------------------------------------------

#Clean the neighborhood names (row 1, 55)

pd.set_option('display.max_rows', 500)
wiki_Toronto_dem['Name'][1] = wiki_Toronto_dem['Name'][1].split('/')[0]
wiki_Toronto_dem['Name'][55] = wiki_Toronto_dem['Name'][55].split('/')[0]

wiki_Toronto_dem


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,Name,Name-ccat,FM,Population,Land area (km2),Density (people/km2),% Change in Population since 2001,Average Income,Transit Commuting %,% Renters,Ethnicity,Ethnicity Percentage (%),Ethnicity_new
0,Crescent Town,"Crescent Town, Toronto",EY,8157,0.4,20393,-10.0,23021,24.5,20.3,Bengali (18.1%),18.1,Bengali
1,Governor's Bridge,"Governor's Bridge/Bennington Heights, Toronto",EY,2112,1.87,1129,4.0,129904,7.1,13.3,Polish (1.4%),1.4,Polish
2,Leaside,"Leaside, Toronto",EY,13876,2.81,4938,3.0,82670,9.7,10.5,Bulgarian (0.4%),0.4,Bulgarian
3,O'Connor–Parkview,"O'Connor–Parkview, Toronto",EY,17740,4.94,3591,-6.1,33517,15.8,19.4,Urdu (3.2%),3.2,Urdu
4,Old East York,"Old East York, Toronto",EY,52220,7.94,6577,-4.6,33172,22.0,19.1,Greek (4.3%),4.3,Greek
5,Thorncliffe Park,"Thorncliffe Park, Toronto",EY,17949,3.09,5809,9.1,25340,16.7,32.5,Urdu (21.5%),21.5,Urdu
6,Alderwood,"Alderwood, Toronto",E,11656,4.94,2360,-4.0,35239,8.8,8.5,Polish (6.2%),6.2,Polish
7,Centennial,"Centennial, Toronto",E,12565,4.94,2544,0.5,34867,11.5,8.8,Polish (2.7%),2.7,Polish
8,Clairville,"Clairville, Toronto",E,8506,6.71,1268,-3.3,26610,13.2,7.2,Punjabi (12.0%),12.0,Punjabi
9,Eatonville,"Eatonville, Toronto",E,19131,11.26,1699,4.3,36206,12.6,13.4,Serbian (3.2%),3.2,Serbian


#### 4. Query location data from Foursquare API, and generate  map of Toronto using neighborhood dataset



In [7]:

# Use Nominatim function to generate map of Toronto

address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="Toronto_explorer")
Toronto_location = geolocator.geocode(address, timeout = None)
Toronto_latitude = Toronto_location.latitude
Toronto_longitude = Toronto_location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(Toronto_latitude, Toronto_longitude))


# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[Toronto_latitude, Toronto_longitude], zoom_start=11)

wiki_Toronto_nei = wiki_Toronto_nei.dropna()


# add markers to map
for lat, lng, label in zip(wiki_Toronto_nei['latitude'], wiki_Toronto_nei['longitude'], wiki_Toronto_nei['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


#### 5. Add Latitude, Longitude and Address of the neighborhoods to the demography dataframe

In [8]:

# Adding address to the Dataframe 

Toronto_locator =  Nominatim(user_agent="FourSquare_Toronto")
#Toronto_locator.geocode(Toronto_dem_LL[i])
wiki_Toronto_dem['Address'] = ''
wiki_Toronto_dem['Latitude'] = ''
wiki_Toronto_dem['Longitude'] = ''
    
for i in range(len(wiki_Toronto_dem['Name-ccat'])):
    if bool(Toronto_locator.geocode(wiki_Toronto_dem['Name-ccat'][i])) == True:
        wiki_Toronto_dem['Address'][i] = Toronto_locator.geocode(wiki_Toronto_dem['Name-ccat'][i])[0]
    else:
        #return None 
        wiki_Toronto_dem['Address'][i] = 0

#wiki_Toronto_dem



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [9]:
    
#Making a temporary copy of the Dataframe for ease of testing the code

x = wiki_Toronto_dem
x = x[x['Address'] != 0]
x.reset_index(drop=True, inplace=True)
x

def find_LL(row):
    latitude = Toronto_locator.geocode(x['Name-ccat'][row],timeout=None)[1][0]
    longitude = Toronto_locator.geocode(x['Name-ccat'][row],timeout=None)[1][1]
    return latitude, longitude


for i in range(len(x['Name'])):
    x['Latitude'].loc[i] = find_LL(i)[0]
    x['Longitude'].loc[i] = find_LL(i)[1]
    #lati = find_LL(i)[0]
    #long = find_LL(i)[1]
    #x.append({'Latitude':lati,'Longitude':long}, ignore_index=True)

wiki_Toronto_dem = x



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()


In [10]:
# Color code the neighborhoods based on the highest secondary ethnicty

neigh_labels = wiki_Toronto_dem['Ethnicity_new'].unique()
color_array = cm.rainbow(np.linspace(0, 1, len(neigh_labels)))
color_array = [colors.rgb2hex(i) for i in color_array]
#color_array

for i in range(len(neigh_labels)):
    wiki_Toronto_dem.loc[wiki_Toronto_dem['Ethnicity_new'] == neigh_labels[i] , "Color_ethnicity"] = color_array[i]
    
#wiki_Toronto_dem


#-----------------------------------------------------------------------------------------------------------------


# Color code the neighborhoods based on the neighborhoods

neigh_labels = wiki_Toronto_dem['FM'].unique()
color_array = cm.rainbow(np.linspace(0, 1, len(neigh_labels)))
color_array = [colors.rgb2hex(i) for i in color_array]
#color_array

for i in range(len(neigh_labels)):
    wiki_Toronto_dem.loc[wiki_Toronto_dem['FM'] == neigh_labels[i] , "Color_FM"] = color_array[i]
    
#wiki_Toronto_dem


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [11]:

# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[Toronto_latitude, Toronto_longitude], zoom_start=11)

wiki_Toronto_nei = wiki_Toronto_nei.dropna()


# add markers to map
for lat, lng, label, area, color_ethnicity, color_FM in zip(wiki_Toronto_dem['Latitude'],
                                                      wiki_Toronto_dem['Longitude'],
                                                      wiki_Toronto_dem['Ethnicity_new'],
                                                      wiki_Toronto_dem['Name'],                                                            
                                                      wiki_Toronto_dem['Color_ethnicity'],
                                                      wiki_Toronto_dem['Color_FM']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=color_FM,
        fill=True,
        fill_color=color_FM,
        fill_opacity=1,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

## Analyze the neighborhoods using Foursquare API

#### 1. Input user credentials to access Foursquare API

In [12]:

CLIENT_ID = 'RF0UYLVZDZ3W4IBJOOHUTN3LZZY1YTHCOHODTMLWHSN11HJQ' # your Foursquare ID
CLIENT_SECRET = 'Z1LAGZCWCJOMUQKQ53VXXBCPHGHZD1EOG3O4R0XIFK1RCZYB' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RF0UYLVZDZ3W4IBJOOHUTN3LZZY1YTHCOHODTMLWHSN11HJQ
CLIENT_SECRET:Z1LAGZCWCJOMUQKQ53VXXBCPHGHZD1EOG3O4R0XIFK1RCZYB


#### 2. Define a function to query, sort data from Categories list and transfer them into a dataframe



In [13]:

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
#results['response']['groups'][0]['items']


In [14]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)



# type your answer here
Toronto_venues = getNearbyVenues(names=wiki_Toronto_dem['Name-ccat'],
                                   latitudes=wiki_Toronto_dem['Latitude'],
                                   longitudes=wiki_Toronto_dem['Longitude']
                                  )

#Toronto_venues

Crescent Town, Toronto
Governor's Bridge/Bennington Heights, Toronto
Leaside, Toronto
O'Connor–Parkview, Toronto
Old East York, Toronto
Thorncliffe Park, Toronto
Alderwood, Toronto
Centennial, Toronto
Clairville, Toronto
Eatonville, Toronto
Humber Heights, Toronto
Humberwood, Toronto
Humber Valley Village, Toronto
Islington – Six Points, Toronto
Kingsview Village, Toronto
Long Branch, Toronto
Markland Wood, Toronto
Mimico, Toronto
New Toronto, Toronto
Princess Gardens, Toronto
Agincourt, Toronto
Alexandra Park, Toronto
Allenby, Toronto
Amesbury, Toronto
Armour Heights, Toronto
Banbury, Toronto
Bathurst Manor, Toronto
Bay Street Corridor, Toronto
Bayview Village, Toronto
Bayview Woods – Steeles, Toronto
Bedford Park, Toronto
Bendale, Toronto
Birch Cliff, Toronto
Bloor West Village, Toronto
Bracondale Hill, Toronto
Branson, Toronto
Bridle Path, Toronto
Brockton, Toronto
Cabbagetown, Toronto
Caribou Park, Toronto
Carleton Village, Toronto
Casa Loma, Toronto
Chaplin Estates, Toronto
Christ

In [15]:
#print(Toronto_venues.shape)
Toronto_venues.groupby('Neighborhood').count()


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Agincourt, Toronto",11,11,11,11,11,11
"Alderwood, Toronto",9,9,9,9,9,9
"Alexandra Park, Toronto",100,100,100,100,100,100
"Allenby, Toronto",5,5,5,5,5,5
"Amesbury, Toronto",6,6,6,6,6,6
"Armour Heights, Toronto",3,3,3,3,3,3
"Banbury, Toronto",4,4,4,4,4,4
"Bathurst Manor, Toronto",72,72,72,72,72,72
"Bay Street Corridor, Toronto",100,100,100,100,100,100
"Bayview Village, Toronto",12,12,12,12,12,12


#### 2. Apply one-hot encoding

In [16]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

Toronto_onehot = Toronto_onehot.drop(['Neighborhood'], axis=1)

# add neighborhood column back to dataframe
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood'] 


# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Terminal,American Restaurant,Animal Shelter,Antique Shop,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Agincourt, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Alexandra Park, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,...,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01
3,"Allenby, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Amesbury, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Armour Heights, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Banbury, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Bathurst Manor, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.013889,0.0,0.0,...,0.0,0.027778,0.013889,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Bay Street Corridor, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01
9,"Bayview Village, Toronto",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
num_top_venues = 5

for hood in Toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

# Sort the venues in descending order 
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]


# ==================================================================================================================


num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Toronto_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
Toronto_neighborhoods_venues_sorted['Neighborhood'] = Toronto_grouped['Neighborhood']
for ind in np.arange(Toronto_grouped.shape[0]):
    Toronto_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

Toronto_neighborhoods_venues_sorted.head()

----Agincourt, Toronto----
                   venue  freq
0     Chinese Restaurant  0.27
1   Cantonese Restaurant  0.09
2             Food Court  0.09
3      Korean Restaurant  0.09
4  Vietnamese Restaurant  0.09


----Alderwood, Toronto----
            venue  freq
0     Pizza Place  0.22
1        Pharmacy  0.11
2  Sandwich Place  0.11
3    Skating Rink  0.11
4     Coffee Shop  0.11


----Alexandra Park, Toronto----
                    venue  freq
0                     Bar  0.10
1    Caribbean Restaurant  0.05
2  Furniture / Home Store  0.05
3                    Café  0.05
4      Italian Restaurant  0.02


----Allenby, Toronto----
            venue  freq
0  Sandwich Place   0.2
1       Bookstore   0.2
2  Tennis Stadium   0.2
3    Tennis Court   0.2
4    Skating Rink   0.2


----Amesbury, Toronto----
                venue  freq
0                Bank  0.17
1        Intersection  0.17
2         Gas Station  0.17
3                Park  0.17
4  Athletics & Sports  0.17


----Armour Heights,

                venue  freq
0  Italian Restaurant  0.22
1              Bakery  0.11
2                Café  0.11
3      Ice Cream Shop  0.06
4             Dog Run  0.06


----East Danforth, Toronto----
           venue  freq
0    Coffee Shop  0.11
1  Grocery Store  0.07
2       Bus Line  0.07
3       Pharmacy  0.07
4    Pizza Place  0.04


----Eatonville, Toronto----
                venue  freq
0  Mexican Restaurant  0.09
1      Clothing Store  0.09
2             Theater  0.09
3                 Gym  0.09
4       Grocery Store  0.09


----Eglinton East, Toronto----
               venue  freq
0     Ice Cream Shop  0.33
1  Indian Restaurant  0.17
2      Train Station  0.17
3         Restaurant  0.17
4     Sandwich Place  0.17


----Elia (Jane and Finch), Toronto----
                  venue  freq
0  Fast Food Restaurant  0.09
1           Pizza Place  0.09
2         Shopping Mall  0.09
3        Discount Store  0.09
4         Grocery Store  0.09


----Fashion District, Toronto----
           

                venue  freq
0          Hobby Shop   0.1
1        Burger Joint   0.1
2  Light Rail Station   0.1
3                 Bar   0.1
4        Liquor Store   0.1


----Lytton Park, Toronto----
                     venue  freq
0               Playground  0.25
1               Restaurant  0.25
2                   Garden  0.25
3       Photography Studio  0.25
4  North Indian Restaurant  0.00


----Malvern, Toronto----
                  venue  freq
0  Fast Food Restaurant  0.19
1           Pizza Place  0.12
2              Pharmacy  0.12
3       Bubble Tea Shop  0.06
4        Sandwich Place  0.06


----Maple Leaf, Toronto----
                     venue  freq
0                   Bakery  0.33
1                    Trail  0.33
2         Basketball Court  0.33
3                      ATM  0.00
4  New American Restaurant  0.00


----Markland Wood, Toronto----
            venue  freq
0     Golf Course  0.25
1       Piano Bar  0.25
2  Baseball Field  0.25
3            Park  0.25
4             A

                        venue  freq
0                  Playground  0.25
1               Poutine Place  0.25
2               Deli / Bodega  0.25
3  Construction & Landscaping  0.25
4         Monument / Landmark  0.00


----Swansea, Toronto----
                        venue  freq
0                        Park  0.29
1  Construction & Landscaping  0.14
2                Skating Rink  0.14
3                 Social Club  0.14
4                    Bus Line  0.14


----Tam O'Shanter – Sullivan, Toronto----
             venue  freq
0   Sandwich Place  0.11
1  Thai Restaurant  0.11
2             Bank  0.11
3      Gas Station  0.11
4     Intersection  0.11


----The Annex, Toronto----
             venue  freq
0      Pizza Place  0.08
1  Thai Restaurant  0.05
2              Gym  0.05
3    Grocery Store  0.05
4           Bistro  0.05


----The Beaches, Toronto----
            venue  freq
0           Beach  0.07
1     Pizza Place  0.05
2             Pub  0.05
3  Breakfast Spot  0.05
4            Park

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Agincourt, Toronto",Chinese Restaurant,Cantonese Restaurant,Vietnamese Restaurant,Korean Restaurant,Asian Restaurant,Train Station,Coffee Shop,Food Court,Hong Kong Restaurant,Deli / Bodega
1,"Alderwood, Toronto",Pizza Place,Pub,Coffee Shop,Gym,Skating Rink,Pool,Sandwich Place,Pharmacy,Ethiopian Restaurant,Doner Restaurant
2,"Alexandra Park, Toronto",Bar,Caribbean Restaurant,Café,Furniture / Home Store,Poutine Place,Boutique,Bakery,Asian Restaurant,Italian Restaurant,Arts & Crafts Store
3,"Allenby, Toronto",Skating Rink,Sandwich Place,Bookstore,Tennis Stadium,Tennis Court,Fast Food Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant
4,"Amesbury, Toronto",Park,Bank,Athletics & Sports,Coffee Shop,Gas Station,Intersection,Yoga Studio,Ethiopian Restaurant,Event Space,Falafel Restaurant


### Implement k-means clusterning to develop activity based neighborhood clusters

In [20]:
#wiki_Toronto_dem
Toronto_grouped

# set number of clusters
kclusters = 5

Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

# Uncomment the following line of code if running this code for the second time
#Toronto_neighborhoods_venues_sorted = Toronto_neighborhoods_venues_sorted.drop(['Cluster Labels'], axis=1) 
Toronto_neighborhoods_venues_sorted


# add clustering labels
Toronto_neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
Toronto_neighborhoods_venues_sorted.head()
Toronto_merged = wiki_Toronto_dem

Toronto_merged = Toronto_merged.rename(columns = {'Name':'Neighborhood'})
Toronto_merged = Toronto_merged.sort_values(['Neighborhood'], ascending=True).reset_index(drop=True)
Toronto_merged

# merge manhattan_grouped with Toronto_merged data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(Toronto_neighborhoods_venues_sorted.set_index('Neighborhood'), on='Name-ccat')

Toronto_merged.head() # check the last columns!
Toronto_merged.rename(columns = {'FM':'Borough'})

Toronto_merged = Toronto_merged.dropna().reset_index(drop=True)

In [21]:
# create map
map_clusters = folium.Map(location=[Toronto_latitude, Toronto_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'],
                                  Toronto_merged['Neighborhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color= rainbow[int(cluster)],
        fill=True,
        fill_color=rainbow[int(cluster)],
        fill_opacity=0.7).add_to(map_clusters)
    
map_clusters


In [22]:
Toronto_merged.shape

(165, 29)

### Analyze Cluster #1

In [23]:
#Analyse Cluser #1:

test = Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(19, Toronto_merged.shape[1]))]]
test

#Find the top 3 places in each ranking list:

"""for column in test.columns[1:]:
    print('------------{}------------'.format(column))
    print(test[column].value_counts().head(3))
    print('\n')"""

#Extract columns 
x = test.iloc[:,1:]
x.mode().iloc[0,:]    

#Wuery the list of all activities
activities = Toronto_grouped.columns
activities

Cluster1_all = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False)
Cluster1_top10 = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False).head(10)
Cluster1_bottom10 = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False).tail(10)
Cluster1_top10


print('----------- CLUSTER#1 TOP 10 -----------')
#print('\n')
print(Cluster1_top10)
print('\n')


print('----------- CLUSTER#1 BOTTOM 10 -----------')
#print('\n')
print(Cluster1_bottom10)



# Count the frequency of occurence of every event
Cluster1_all
y = pd.DataFrame(Cluster1_all,columns =['Freq'])
y.index.name = 'Event'
y

# Filter the popular cuisine (restaurants)
filter_restaurants = y[y.index.str.contains('Rest')]
filter_restaurants

# Filter the popular shops 
filter_shops = y[y.index.str.contains('Shop','Store')]
#filter_shops

filter_restaurants
Cluster1_all

----------- CLUSTER#1 TOP 10 -----------
Park                           22.0
Ethiopian Restaurant           20.0
Event Space                    20.0
Electronics Store              15.0
Farmers Market                 15.0
Falafel Restaurant             15.0
Yoga Studio                    10.0
Fast Food Restaurant            9.0
Eastern European Restaurant     8.0
Field                           8.0
dtype: float64


----------- CLUSTER#1 BOTTOM 10 -----------
Piano Bar              1.0
Kids Store             1.0
Fish Market            1.0
Fish & Chips Shop      1.0
Filipino Restaurant    1.0
Liquor Store           1.0
Mattress Store         1.0
Music Venue            1.0
Pastry Shop            1.0
Art Gallery            1.0
dtype: float64


Event
Park                           22.0
Ethiopian Restaurant           20.0
Event Space                    20.0
Electronics Store              15.0
Farmers Market                 15.0
Falafel Restaurant             15.0
Yoga Studio                    10.0
Fast Food Restaurant            9.0
Eastern European Restaurant     8.0
Field                           8.0
Convenience Store               5.0
Café                            3.0
Skating Rink                    3.0
Bakery                          3.0
Tennis Court                    3.0
Golf Course                     2.0
Construction & Landscaping      2.0
Baseball Field                  2.0
Flower Shop                     2.0
Flea Market                     2.0
Playground                      2.0
Intersection                    2.0
Metro Station                   2.0
Restaurant                      2.0
Business Service                1.0
Doctor's Office                 1.0
Dessert Shop                    1.0
Deli / Bodega         

#### Analyzing cluster #1 we learn that:
1. Poeple in cluster #1 enjoy visiting park and event spaces the most.
2. Ethiopian and Falafel restaurants tend to be very popular among the populace, and there are limited dining options for Asian and Caribbean cuisine
3. Limited interest in fine arts: Galleries, music venues, piano bar
4. Electronics shopping tends to be popular among the popuation, though the overall interest in shopping is very limited. Therefore, we can safely assume that the neighborhoods are dominantly commercial-residential blocks
5. Good neighborhood for sports/outdoor activities: Skating rink, tennis, golf, baseball, playground

### Analyze Cluster #2

In [24]:


test = Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(19, Toronto_merged.shape[1]))]]

#Find the top 3 places in each ranking list
"""for column in test.columns[1:]:
    print('------------{}------------'.format(column))
    print(test[column].value_counts().head(3))
    print('\n')"""
    
    

#Extract columns 
x = test.iloc[:,1:]
x.mode().iloc[0,:]    

#Wuery the list of all activities
activities = Toronto_grouped.columns
activities

Cluster1_all = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False)
Cluster1_top10 = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False).head(10)
Cluster1_bottom10 = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False).tail(10)
Cluster1_top10


print('----------- CLUSTER#1 TOP 10 -----------')
#print('\n')
print(Cluster1_top10)
print('\n')


print('----------- CLUSTER#1 BOTTOM 10 -----------')
#print('\n')
print(Cluster1_bottom10)
print('\n')


# Count the frequency of occurence of every event
Cluster1_all
y = pd.DataFrame(Cluster1_all,columns =['Freq'])
y.index.name = 'Event'
y

# Filter the popular cuisine (restaurants)
filter_restaurants = y[y.index.str.contains('Rest')]
filter_restaurants

# Filter the popular shops 
filter_shops = y[y.index.str.contains('Shop','Store')]
#filter_shops

filter_restaurants
Cluster1_all


----------- CLUSTER#1 TOP 10 -----------
Yoga Studio             5.0
Park                    5.0
Fast Food Restaurant    5.0
Farmers Market          5.0
Falafel Restaurant      5.0
Event Space             5.0
Ethiopian Restaurant    5.0
Electronics Store       5.0
Field                   3.0
Dumpling Restaurant     3.0
dtype: float64


----------- CLUSTER#1 BOTTOM 10 -----------
Farmers Market                 5.0
Falafel Restaurant             5.0
Event Space                    5.0
Ethiopian Restaurant           5.0
Electronics Store              5.0
Field                          3.0
Dumpling Restaurant            3.0
Eastern European Restaurant    2.0
Trail                          1.0
River                          1.0
dtype: float64




Event
Yoga Studio                    5.0
Park                           5.0
Fast Food Restaurant           5.0
Farmers Market                 5.0
Falafel Restaurant             5.0
Event Space                    5.0
Ethiopian Restaurant           5.0
Electronics Store              5.0
Field                          3.0
Dumpling Restaurant            3.0
Eastern European Restaurant    2.0
Trail                          1.0
River                          1.0
dtype: float64

#### Analyzing cluster #2 we learn that:
1. Yoga studios, parks, Farmers market, event spaces tend to be the popular go-to spot
2. Access to trails and rivers
3. Fast food, Falafel and Ethiopian restaurants are popular; Eastern European restuarants are relatively less popular

### Analyze Cluster #3


In [25]:
#Analyse Cluser #3

test = Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(19, Toronto_merged.shape[1]))]]


#Find the top 3 places in each ranking list
"""for column in test.columns[1:]:
    print('------------{}------------'.format(column))
    print(test[column].value_counts().head(3))
    print('\n')"""
    
    

#Extract columns 
x = test.iloc[:,1:]
x.mode().iloc[0,:]    

#Wuery the list of all activities
activities = Toronto_grouped.columns
activities

Cluster1_all = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False)
Cluster1_top10 = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False).head(10)
Cluster1_bottom10 = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False).tail(10)
Cluster1_top10


print('----------- CLUSTER#1 TOP 10 -----------')
#print('\n')
print(Cluster1_top10)
#print('\n')


print('----------- CLUSTER#1 BOTTOM 10 -----------')
#print('\n')
print(Cluster1_bottom10)

# Count the frequency of occurence of every event
Cluster1_all
y = pd.DataFrame(Cluster1_all,columns =['Freq'])
y.index.name = 'Event'
y

# Filter the popular cuisine (restaurants)
filter_restaurants = y[y.index.str.contains('Rest')]
filter_restaurants

# Filter the popular shops 
filter_shops = y[y.index.str.contains('Shop','Store')]
filter_shops
Cluster1_all

----------- CLUSTER#1 TOP 10 -----------
Coffee Shop             45.0
Café                    44.0
Pizza Place             37.0
Fast Food Restaurant    37.0
Restaurant              34.0
Bakery                  29.0
Ethiopian Restaurant    29.0
Electronics Store       28.0
Falafel Restaurant      28.0
Italian Restaurant      27.0
dtype: float64
----------- CLUSTER#1 BOTTOM 10 -----------
Dog Run                  1.0
Donut Shop               1.0
Smoke Shop               1.0
Shoe Store               1.0
Science Museum           1.0
Salad Place              1.0
Rental Car Location      1.0
Portuguese Restaurant    1.0
Pool                     1.0
ATM                      1.0
dtype: float64


Event
Coffee Shop                      45.0
Café                             44.0
Pizza Place                      37.0
Fast Food Restaurant             37.0
Restaurant                       34.0
Bakery                           29.0
Ethiopian Restaurant             29.0
Electronics Store                28.0
Falafel Restaurant               28.0
Italian Restaurant               27.0
Event Space                      27.0
Grocery Store                    24.0
Sandwich Place                   23.0
Farmers Market                   20.0
Yoga Studio                      19.0
Sushi Restaurant                 18.0
Park                             18.0
Bank                             16.0
Pharmacy                         15.0
Field                            14.0
Pub                              14.0
Bar                              14.0
Japanese Restaurant              13.0
Gas Station                      12.0
Mexican Restaurant               11.0
Indian Restaurant                11.0
Ice Cr

#### Analyzing cluster #3 we learn that:
1. Coffee shops and cafes are the most popular eateries
2. Lot of options for dining and arts

### Analyze Cluster #4


In [26]:
#Analyse Cluser #4

test = Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(19, Toronto_merged.shape[1]))]]


#Find the top 3 places in each ranking list
"""for column in test.columns[1:]:
    print('------------{}------------'.format(column))
    print(test[column].value_counts().head(3))
    print('\n')"""
    
    

#Extract columns 
x = test.iloc[:,1:]
x.mode().iloc[0,:]    

#Wuery the list of all activities
activities = Toronto_grouped.columns
activities

Cluster1_all = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False)
Cluster1_top10 = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False).head(10)
Cluster1_bottom10 = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False).tail(10)
Cluster1_top10


print('----------- CLUSTER#1 TOP 10 -----------')
#print('\n')
print(Cluster1_top10)
#print('\n')


print('----------- CLUSTER#1 BOTTOM 10 -----------')
#print('\n')
print(Cluster1_bottom10)



# Count the frequency of occurence of every event
Cluster1_all
y = pd.DataFrame(Cluster1_all,columns =['Freq'])
y.index.name = 'Event'
y

# Filter the popular cuisine (restaurants)
filter_restaurants = y[y.index.str.contains('Rest')]
filter_restaurants

# Filter the popular shops 
filter_shops = y[y.index.str.contains('Shop','Store')]
filter_shops

filter_restaurants
Cluster1_all


----------- CLUSTER#1 TOP 10 -----------
Coffee Shop             27.0
Park                    10.0
Pizza Place             10.0
Grocery Store            9.0
Fast Food Restaurant     9.0
Ethiopian Restaurant     8.0
Pharmacy                 8.0
Restaurant               8.0
Sandwich Place           8.0
Falafel Restaurant       7.0
dtype: float64
----------- CLUSTER#1 BOTTOM 10 -----------
Trail                     1.0
Greek Restaurant          1.0
Gourmet Shop              1.0
Gastropub                 1.0
Furniture / Home Store    1.0
Fried Chicken Joint       1.0
French Restaurant         1.0
Food Truck                1.0
Plaza                     1.0
Arts & Crafts Store       1.0
dtype: float64


Event
Coffee Shop                    27.0
Park                           10.0
Pizza Place                    10.0
Grocery Store                   9.0
Fast Food Restaurant            9.0
Ethiopian Restaurant            8.0
Pharmacy                        8.0
Restaurant                      8.0
Sandwich Place                  8.0
Falafel Restaurant              7.0
Electronics Store               7.0
Bank                            7.0
Eastern European Restaurant     6.0
Gym                             6.0
Pub                             6.0
Dumpling Restaurant             5.0
Café                            5.0
Bar                             5.0
Bakery                          5.0
Yoga Studio                     5.0
Intersection                    4.0
Event Space                     4.0
Italian Restaurant              3.0
Japanese Restaurant             3.0
Bus Stop                        3.0
Discount Store                  3.0
Supermarket                     3.0
Farmers Market        

#### Analyzing cluster #4 we learn that:
1. Coffee shops tend to be the popular go-to spot
2. Pizza, fast-food and Ethiopian restaurants tend to be very popular among the populace, with limited interest in French, Mediterranean, Middle Eastern, Portugese and Ramen restaurants
4. Likely a residential cluster as there are manny grocery stores and pharmacies in this cluster (popular signatures of residential neighborhoods)

### Analyze Cluster #5


In [27]:
#Analyse Cluser #5

test = Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, Toronto_merged.columns[[1] + list(range(19, Toronto_merged.shape[1]))]]


#Find the top 3 places in each ranking list
"""for column in test.columns[1:]:
    print('------------{}------------'.format(column))
    print(test[column].value_counts().head(3))
    print('\n')"""
    
    

#Extract columns 
x = test.iloc[:,1:]
x.mode().iloc[0,:]    

#Wuery the list of all activities
activities = Toronto_grouped.columns
activities

Cluster1_all = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False)
Cluster1_top10 = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False).head(10)
Cluster1_bottom10 = x.apply(pd.Series.value_counts).sum(axis=1).sort_values(ascending=False).tail(10)
Cluster1_top10


print('----------- CLUSTER#1 TOP 10 -----------')
#print('\n')
print(Cluster1_top10)
#print('\n')


print('----------- CLUSTER#1 BOTTOM 10 -----------')
#print('\n')
print(Cluster1_bottom10)


# Count the frequency of occurence of every event
Cluster1_all
y = pd.DataFrame(Cluster1_all,columns =['Freq'])
y.index.name = 'Event'
y

# Filter the popular cuisine (restaurants)
filter_restaurants = y[y.index.str.contains('Rest')]
filter_restaurants

# Filter the popular shops 
filter_shops = y[y.index.str.contains('Shop','Store')]
filter_shops

filter_restaurants
Cluster1_all

----------- CLUSTER#1 TOP 10 -----------
Falafel Restaurant      4.0
Event Space             4.0
Field                   4.0
Trail                   4.0
Farmers Market          3.0
Electronics Store       3.0
Ethiopian Restaurant    3.0
Yoga Studio             3.0
Fast Food Restaurant    3.0
Filipino Restaurant     2.0
dtype: float64
----------- CLUSTER#1 BOTTOM 10 -----------
Yoga Studio                   3.0
Fast Food Restaurant          3.0
Filipino Restaurant           2.0
Financial or Legal Service    1.0
Hockey Arena                  1.0
Dumpling Restaurant           1.0
Dog Run                       1.0
Convenience Store             1.0
Basketball Court              1.0
Bakery                        1.0
dtype: float64


Event
Falafel Restaurant            4.0
Event Space                   4.0
Field                         4.0
Trail                         4.0
Farmers Market                3.0
Electronics Store             3.0
Ethiopian Restaurant          3.0
Yoga Studio                   3.0
Fast Food Restaurant          3.0
Filipino Restaurant           2.0
Financial or Legal Service    1.0
Hockey Arena                  1.0
Dumpling Restaurant           1.0
Dog Run                       1.0
Convenience Store             1.0
Basketball Court              1.0
Bakery                        1.0
dtype: float64

#### Analyzing cluster #5 we learn that:
1. Active for outdoor/sporting activities: Field, trail, Hockey arena, Basket ball court

In [67]:
#Toronto_grouped
#Toronto_venues['Venue Category'].value_counts()[Toronto_venues['Venue Category'].value_counts().index.str.contains('Rest')]

Toronto_grouped.corr()
wiki_Toronto_dem

Unnamed: 0,Name,Name-ccat,FM,Population,Land area (km2),Density (people/km2),% Change in Population since 2001,Average Income,Transit Commuting %,% Renters,Ethnicity,Ethnicity Percentage (%),Ethnicity_new,Address,Latitude,Longitude,Color_ethnicity,Color_FM
0,Crescent Town,"Crescent Town, Toronto",EY,8157,0.4,20393,-10.0,23021,24.5,20.3,Bengali (18.1%),18.1,Bengali,"Crescent Town, Beaches—East York, East York, T...",43.6954,-79.2931,#8000ff,#8000ff
1,Governor's Bridge,"Governor's Bridge/Bennington Heights, Toronto",EY,2112,1.87,1129,4.0,129904,7.1,13.3,Polish (1.4%),1.4,Polish,"Governors Road, Bennington Heights, University...",43.6894,-79.3694,#6e1cff,#8000ff
2,Leaside,"Leaside, Toronto",EY,13876,2.81,4938,3.0,82670,9.7,10.5,Bulgarian (0.4%),0.4,Bulgarian,"Leaside, Don Valley West, East York, Toronto, ...",43.7048,-79.3681,#5a3bfd,#8000ff
3,O'Connor–Parkview,"O'Connor–Parkview, Toronto",EY,17740,4.94,3591,-6.1,33517,15.8,19.4,Urdu (3.2%),3.2,Urdu,"O'Connor Drive, Woodbine Heights, Beaches—East...",43.7024,-79.3161,#4659fb,#8000ff
4,Old East York,"Old East York, Toronto",EY,52220,7.94,6577,-4.6,33172,22.0,19.1,Greek (4.3%),4.3,Greek,"East York, Toronto, Golden Horseshoe, Ontario,...",43.7,-79.3325,#3176f8,#8000ff
5,Thorncliffe Park,"Thorncliffe Park, Toronto",EY,17949,3.09,5809,9.1,25340,16.7,32.5,Urdu (21.5%),21.5,Urdu,"Thorncliffe Park, Don Valley West, East York, ...",43.7046,-79.3454,#4659fb,#8000ff
6,Alderwood,"Alderwood, Toronto",E,11656,4.94,2360,-4.0,35239,8.8,8.5,Polish (6.2%),6.2,Polish,"Alderwood, Etobicoke—Lakeshore, Etobicoke, Tor...",43.6017,-79.5452,#6e1cff,#1996f3
7,Centennial,"Centennial, Toronto",E,12565,4.94,2544,0.5,34867,11.5,8.8,Polish (2.7%),2.7,Polish,"Centennial, Scarborough—Rouge Park, Scarboroug...",43.7875,-79.1508,#6e1cff,#1996f3
8,Clairville,"Clairville, Toronto",E,8506,6.71,1268,-3.3,26610,13.2,7.2,Punjabi (12.0%),12.0,Punjabi,"Toronto Pearson International Airport, Disco R...",43.6785,-79.6291,#1e91f3,#1996f3
9,Eatonville,"Eatonville, Toronto",E,19131,11.26,1699,4.3,36206,12.6,13.4,Serbian (3.2%),3.2,Serbian,"Eatonville, Etobicoke Centre, Etobicoke, Toron...",43.6463,-79.56,#09a9ee,#1996f3
