# Toronto vs. London neighborhoods

In this project I will assess the similarity between the cities of Toronto, Canada and London, Great Britain. This assessment could help someone that is moving between the two cities in deciding the neighbourhoods that are similar to the one where they currently live. Because I am using crime data in the analysis, this information can also help security companies pursue their customers from high crime rate neighbourhoods and can also be used by landlords in implementing extra security measures for the buildings that they own in order to increase safety in and around their building.


I have started collecting my data with web scraping wikipedia pages for the neighbourhood names in each city, then organising the information into data frames. 
For the crime information I have downloaded crime datasets for each city from the respective police stations and I have resumed it to a data frame containing the neighbourhood name and total crimes in the years 2015-2016 in each neighbourhood, for each city. Afterwards I have combined the resulting data frames and created a map visualisation of each neighbourhood with its respective crime rates.

In [1]:
!conda install -c conda-forge folium=0.5.0 --yes 
import folium 

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim 

!pip install geocoder
import geocoder

import json
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import requests 
from pandas.io.json import json_normalize 

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans
from sklearn import preprocessing

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


## Wrangling the crime data of the cities
### Toronto

In [2]:
# Viewing the crime data of Toronto, downloaded from the police website 
crimeT=pd.read_csv('/Users/Elena/Downloads/TorontoCrime.csv')
crimeT.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,X,Y,Index_,event_unique_id,Division,occurrencedate,reporteddate,premises_type,ucr_code,ucr_ext,offence,reportedyear,reportedmonth,reportedday,reporteddayofyear,reporteddayofweek,reportedhour,occurrenceyear,occurrencemonth,occurrenceday,occurrencedayofyear,occurrencedayofweek,occurrencehour,MCI,Hood_ID,Neighbourhood,Long,Lat,ObjectId
0,-8859749.0,5423033.0,10182,GO-20142103239,D23,2014/05/18 04:00:00+00,2014/05/18 04:00:00+00,House,1430,100,Assault,2014,May,18,138,Sunday,12,2014.0,May,18.0,138.0,Sunday,12,Assault,1,West Humber-Clairville (1),-79.588477,43.725321,1
1,-8861050.0,5426864.0,10298,GO-20142111528,D23,2014/05/19 04:00:00+00,2014/05/19 04:00:00+00,Outside,1430,100,Assault,2014,May,19,139,Monday,21,2014.0,May,19.0,139.0,Monday,20,Assault,1,West Humber-Clairville (1),-79.600166,43.750187,2
2,-8861412.0,5422084.0,10300,GO-20142111859,D23,2014/05/19 04:00:00+00,2014/05/19 04:00:00+00,Apartment,2120,200,B&E,2014,May,19,139,Monday,22,2014.0,May,19.0,139.0,Monday,22,Break and Enter,1,West Humber-Clairville (1),-79.60342,43.719158,3
3,-8859955.0,5424372.0,10345,GO-20142116041,D23,2014/05/20 04:00:00+00,2014/05/20 04:00:00+00,Outside,1420,100,Assault With Weapon,2014,May,20,140,Tuesday,14,2014.0,May,20.0,140.0,Tuesday,14,Assault,1,West Humber-Clairville (1),-79.590332,43.734013,4
4,-8859955.0,5424372.0,10346,GO-20142116041,D23,2014/05/20 04:00:00+00,2014/05/20 04:00:00+00,Outside,1420,110,Assault Bodily Harm,2014,May,20,140,Tuesday,14,2014.0,May,20.0,140.0,Tuesday,14,Assault,1,West Humber-Clairville (1),-79.590332,43.734013,5


In [3]:
# Deleting the rows that are not necessary for the analysis. 
# I will be using the registered crimes for the years 2015-2016
crimeT=crimeT[crimeT['reportedyear']<=2016]
crimeT=crimeT[crimeT['reportedyear']>2014]

crimeT.drop(crimeT.iloc[:, 0:10], axis=1, inplace=True)
crimeT.drop(crimeT.iloc[:, 2:13], axis=1, inplace=True)
crimeT.drop(crimeT.iloc[:, 3:4], axis=1, inplace=True)
crimeT.drop(crimeT.iloc[:, 4:7], axis=1, inplace=True)

crimeT.head()

Unnamed: 0,offence,reportedyear,MCI,Neighbourhood
114,Assault,2015,Assault,West Humber-Clairville (1)
118,Assault,2015,Assault,West Humber-Clairville (1)
120,Assault,2015,Assault,West Humber-Clairville (1)
122,Assault,2015,Assault,West Humber-Clairville (1)
124,Assault,2015,Assault,West Humber-Clairville (1)


In [4]:
# Sorting the data by neighborhood
crimeT=crimeT['Neighbourhood'].value_counts()
crimeT=crimeT.to_frame().reset_index()
crimeT.columns=['Neighborhood', 'TotalCrimes']

split=pd.DataFrame()
split[['New1', 'New2']]=crimeT['Neighborhood'].str.split("(", 1, expand=True)

crimeT['Neighborhood']=split['New1']

crimeT=crimeT.sort_values('Neighborhood')
crimeT=crimeT.reset_index()
crimeT.drop(crimeT.iloc[:, 0:1], axis=1, inplace=True)

rowsC=crimeT.iloc[[28, 84, 116]]
rowsC

crimeT=crimeT.drop([crimeT.index[28], crimeT.index[84], crimeT.index[116]])
crimeT=crimeT.reset_index()
crimeT.drop(crimeT.iloc[:, 0:1], axis=1, inplace=True)

print("Shape of crimeT dataframe:", crimeT.shape)

crimeT.head()

Shape of crimeT dataframe: (138, 2)


Unnamed: 0,Neighborhood,TotalCrimes
0,Agincourt North,363
1,Agincourt South-Malvern West,478
2,Alderwood,204
3,Annex,943
4,Banbury-Don Mills,403


### London

In [5]:
# Viewing the crime data of London, downloaded from the police website 
crimeL=pd.read_csv('/Users/Elena/Downloads/Crime_London.csv')
crimeL.head()

Unnamed: 0,WardCode,Ward Name,Borough,Major Category,Minor Category,201004,201005,201006,201007,201008,201009,201010,201011,201012,201101,201102,201103,201104,201105,201106,201107,201108,201109,201110,201111,201112,201201,201202,201203,201204,201205,201206,201207,201208,201209,201210,201211,201212,201301,201302,201303,201304,201305,201306,201307,201308,201309,201310,201311,201312,201401,201402,201403,201404,201405,201406,201407,201408,201409,201410,201411,201412,201501,201502,201503,201504,201505,201506,201507,201508,201509,201510,201511,201512,201601,201602,201603,201604,201605,201606,201607,201608,201609,201610,201611,201612,201701,201702,201703,201704,201705,201706,201707,201708,201709,201710,201711,201712,201801,201802,201803,201804,201805,201806,201807,201808,201809,201810,201811,201812
0,E05000026,Abbey,Barking and Dagenham,Burglary,Burglary In A Dwelling,9,4,6,6,12,5,3,11,8,8,7,4,7,13,5,4,5,8,8,7,7,9,6,5,6,9,6,6,4,5,5,11,1,6,9,9,1,6,4,5,5,2,2,6,3,5,5,9,2,6,3,4,2,2,1,6,5,0,7,1,7,4,2,2,6,3,2,7,3,4,6,5,2,2,10,3,2,3,1,3,3,3,1,7,12,12,3,5,4,3,3,4,3,7,6,8,5,5,4,6,3,6,6,4,8
1,E05000027,Alibon,Barking and Dagenham,Burglary,Burglary In A Dwelling,7,3,8,12,12,14,4,8,7,8,8,10,12,8,8,11,9,5,3,9,4,11,15,17,10,9,8,6,5,3,6,7,5,10,14,4,2,6,3,3,2,1,6,3,5,7,4,11,7,4,4,5,5,0,6,10,7,9,4,5,3,1,1,5,2,5,4,3,8,6,5,7,2,5,4,5,0,0,2,2,5,4,8,3,5,5,11,9,4,0,4,1,1,8,8,7,1,5,3,2,4,4,8,4,10
2,E05000028,Becontree,Barking and Dagenham,Burglary,Burglary In A Dwelling,10,6,21,7,13,8,4,12,9,16,9,21,10,6,7,10,10,15,10,13,12,18,3,14,10,14,6,14,11,8,12,12,18,13,9,12,20,9,7,5,4,7,16,15,10,6,6,8,2,10,11,6,13,9,11,8,9,19,7,2,5,8,2,7,5,6,10,3,9,8,7,6,4,2,6,4,4,4,4,6,5,12,15,4,5,14,5,10,5,7,15,5,11,7,6,7,8,6,4,7,3,6,5,10,9
3,E05000029,Chadwell Heath,Barking and Dagenham,Burglary,Burglary In A Dwelling,10,10,11,8,13,7,12,16,4,18,8,18,9,6,6,7,13,7,10,11,15,14,9,13,10,15,8,14,10,7,10,14,13,13,10,8,11,10,10,5,8,7,11,10,7,12,7,6,7,6,5,11,6,5,7,13,6,6,10,9,3,2,7,7,6,4,6,12,10,7,3,4,1,7,2,5,2,4,3,4,11,10,6,7,7,4,2,2,4,3,5,4,11,12,10,7,5,8,6,5,4,8,5,10,8
4,E05000030,Eastbrook,Barking and Dagenham,Burglary,Burglary In A Dwelling,3,5,2,6,4,4,8,3,6,10,4,7,7,4,2,3,4,6,8,16,14,9,10,8,8,11,6,3,4,5,5,6,8,11,10,5,8,8,5,7,3,2,6,5,5,6,5,5,1,4,2,13,4,8,10,8,7,6,0,6,0,2,5,5,5,3,4,5,7,5,7,1,3,3,1,4,2,7,8,2,7,2,4,7,2,8,3,5,3,5,3,3,2,11,2,5,2,1,2,3,3,6,6,11,2


In [6]:
# Deleting the columns that are not necessary for the analysis. 
# I will be using the registered crimes for the years 2015-2016
crimeL.drop(crimeL.iloc[:, 5:62], axis=1, inplace=True)
crimeL.drop(crimeL.iloc[:, 29:56], axis=1, inplace=True)
crimeL.drop(crimeL.iloc[:, 0:1], axis=1, inplace=True)
crimeL.drop(crimeL.iloc[:, 2:4], axis=1, inplace=True)

# Creating a new column for the total number of crimes in 2015 and 2016
crimeL['TotalCrimes']=crimeL.iloc[:, 2:26].sum(axis=1)
crimeL.drop(crimeL.iloc[:, 2:26], axis=1, inplace=True)

crimeL.columns=['Neighborhood', 'Borough', 'TotalCrimes']
crimeL.head()

Unnamed: 0,Neighborhood,Borough,TotalCrimes
0,Abbey,Barking and Dagenham,88
1,Alibon,Barking and Dagenham,93
2,Becontree,Barking and Dagenham,143
3,Chadwell Heath,Barking and Dagenham,135
4,Eastbrook,Barking and Dagenham,98


In [7]:
# Organizing the data by neighborhood (before it was organized by type of crime)
crimeL=crimeL.groupby(['Neighborhood', 'Borough']).sum()
crimeL=crimeL.reset_index()
crimeL.head()

Unnamed: 0,Neighborhood,Borough,TotalCrimes
0,Abbey,Barking and Dagenham,4334
1,Abbey,Merton,1448
2,Abbey Road,Westminster,1215
3,Abbey Wood,Greenwich,2640
4,Abingdon,Kensington and Chelsea,1771


## Webscraping for the neighborhoods of the cities
### Toronto

In [8]:
# Webscraping the wikipedia page for the neighborhoods of Toronto
url = 'https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Toronto#Table'
r=requests.get(url)
toronto_list= pd.read_html(r.text)
Toronto=toronto_list[10]
Toronto.drop(Toronto.iloc[:, 3:6], axis=1, inplace=True)
Toronto.drop(Toronto.iloc[:, 0:1], axis=1, inplace=True)
Toronto.columns=['Neighborhood', 'Borough']

Toronto=Toronto.drop([Toronto.index[28], Toronto.index[30]])
Toronto=Toronto.reset_index()
Toronto.drop(Toronto.iloc[:, 0:1], axis=1, inplace=True)

print("Shape of Toronto dataframe:", Toronto.shape)

Toronto.head()

Shape of Toronto dataframe: (138, 2)


Unnamed: 0,Neighborhood,Borough
0,Agincourt North,Scarborough
1,Agincourt South-Malvern West,Scarborough
2,Alderwood,Etobicoke
3,Annex,Old City of Toronto
4,Banbury-Don Mills,North York


### London

In [9]:
# Webscraping the wikipedia page for the neighborhoods of London
url = 'https://en.wikipedia.org/wiki/List_of_areas_of_London'
r=requests.get(url)
london_list= pd.read_html(r.text)
London=london_list[1]
print("Shape of data frame:", London.shape)
London.head()

Shape of data frame: (531, 6)


Unnamed: 0,Location,London borough,Post town,Postcode district,Dial code,OS grid ref
0,Abbey Wood,"Bexley, Greenwich [7]",LONDON,SE2,20,TQ465785
1,Acton,"Ealing, Hammersmith and Fulham[8]",LONDON,"W3, W4",20,TQ205805
2,Addington,Croydon[8],CROYDON,CR0,20,TQ375645
3,Addiscombe,Croydon[8],CROYDON,CR0,20,TQ345665
4,Albany Park,Bexley,"BEXLEY, SIDCUP","DA5, DA14",20,TQ478728


In [10]:
# Organizing the data and leaving only the neighborhood and borough columns
London.columns=['Neighborhood', 'Borough', 'PostTown', 'PostCode', 'Dial', 'OS']
London=London.drop(['PostTown','PostCode','Dial', 'OS'], axis=1)

split=pd.DataFrame()
split[['New1', 'New2']]=London['Borough'].str.split("[", 1, expand=True)
London['Borough']=split['New1']

London.head()

Unnamed: 0,Neighborhood,Borough
0,Abbey Wood,"Bexley, Greenwich"
1,Acton,"Ealing, Hammersmith and Fulham"
2,Addington,Croydon
3,Addiscombe,Croydon
4,Albany Park,Bexley


## Merging the crime data with the neighborhoods and boroughs then getting the neighborhoods coordinates
### Toronto

In [11]:
# Merging the Toronto neighborhood data with the crime data
dfToronto=pd.merge(crimeT, Toronto, right_index=True, left_index=True)
dfToronto.drop(dfToronto.iloc[:, 0:1], axis=1, inplace=True)
dfToronto.columns=['TotalCrimes', 'Neighborhood', 'Borough']

dfToronto=dfToronto.filter(['Neighborhood', 'Borough', 'TotalCrimes'])
print("Shape of the final dfToronto:", dfToronto.shape)
dfToronto.head()

Shape of the final dfToronto: (138, 3)


Unnamed: 0,Neighborhood,Borough,TotalCrimes
0,Agincourt North,Scarborough,363
1,Agincourt South-Malvern West,Scarborough,478
2,Alderwood,Etobicoke,204
3,Annex,Old City of Toronto,943
4,Banbury-Don Mills,North York,403


In [12]:
neighborhood = dfToronto['Neighborhood']

# Empty lists that will store the coordinates information from the geocoder
latitude = []
longitude = []
n = 0

# Loop that keeps on trying to get the latitude and longitude for each neighborhood until it succeedes
while n < len(neighborhood):
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(neighborhood[n]))
    lat_lng_coords = g.latlng
    print('The geograpical coordinate of {} are {}, {}.'.format(neighborhood[n], lat_lng_coords[0], lat_lng_coords[1]))
    latitude.append(lat_lng_coords[0])
    longitude.append(lat_lng_coords[1])
    n = n + 1

dfToronto['Longitude'] = longitude
dfToronto['Latitude'] = latitude
dfToronto.head()

The geograpical coordinate of Agincourt North are 43.809300000000064, -79.26706999999999.
The geograpical coordinate of Agincourt South-Malvern West are 43.787360000000035, -79.26934999999997.
The geograpical coordinate of Alderwood are 43.60496000000006, -79.54115999999993.
The geograpical coordinate of Annex are 43.66936000000004, -79.40279999999996.
The geograpical coordinate of Banbury-Don Mills are 43.740410000000054, -79.34851999999995.
The geograpical coordinate of Bathurst Manor are 43.763780000000054, -79.45476999999994.
The geograpical coordinate of Bay Street Corridor are 43.657710000000066, -79.38617999999997.
The geograpical coordinate of Bayview Village are 43.777100000000075, -79.37956999999994.
The geograpical coordinate of Bayview Woods-Steeles are 43.794850000000054, -79.38221999999996.
The geograpical coordinate of Bedford Park-Nortown are 43.73066000000006, -79.42449999999997.
The geograpical coordinate of Beechborough-Greenbrook are 43.69311000000005, -79.478309999

Unnamed: 0,Neighborhood,Borough,TotalCrimes,Longitude,Latitude
0,Agincourt North,Scarborough,363,-79.26707,43.8093
1,Agincourt South-Malvern West,Scarborough,478,-79.26935,43.78736
2,Alderwood,Etobicoke,204,-79.54116,43.60496
3,Annex,Old City of Toronto,943,-79.4028,43.66936
4,Banbury-Don Mills,North York,403,-79.34852,43.74041


#### Creating the map of Toronto with the neighborhoods

In [13]:
# Map of Toronto with the neighborhoods superimposed on it
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood, crimes in zip(dfToronto['Latitude'], dfToronto['Longitude'], dfToronto['Borough'], dfToronto['Neighborhood'], dfToronto['TotalCrimes']):
    label = '{}, {}, {}'.format(neighborhood, borough, crimes)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=6,
        popup=label,
        color='mediumorchid',
        fill=True,
        fill_color='#66CDAA',
        fill_opacity=0.6,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

In [14]:
borough_count=dfToronto['Borough'].value_counts()
borough_count
print("Number of boroughs in Toronto:", len(borough_count))
print("Neighborhood count for each borough:", borough_count)

Number of boroughs in Toronto: 6
Neighborhood count for each borough: Old City of Toronto    43
North York             33
Scarborough            25
Etobicoke              20
York                   10
East York               7
Name: Borough, dtype: int64


### London

In [15]:
# Merging the crime dataframe with the neighborhoods dataframe
dfLondon=pd.merge(London, crimeL, how='outer', on='Neighborhood')
dfLondon.sort_values('Neighborhood')

# Deleting the neighborhoods that were duplicates and merging the information from the borough columns
dfLondon=dfLondon.loc[dfLondon.index.drop([5, 25, 34, 37, 38, 40, 41, 47, 50, 53, 55, 55, 59, 65, 68, 69, 70, 71, 73,  77, 79, 82, 84, 87, 92, 94, 95, 96, 100, 102, 105, 115, 116, 136, 144, 145, 151, 154, 158, 164, 171, 173, 174, 175, 179, 186, 187, 197, 198, 199, 200, 203, 211, 212, 213, 228, 233, 234, 239, 241, 243, 244, 246, 255, 271, 273, 278, 284, 287, 289, 308, 312, 321, 323, 332, 340, 347, 351, 353, 360, 365, 367, 374, 375, 386, 387, 394, 395, 403, 408, 410, 434, 447, 453, 457, 459, 461, 469, 471, 472, 480, 482, 490, 493, 495, 497, 503, 520, 526, 527, 531, 534, 536, 595, 639, 644, 687, 707, 709, 739, 744, 766, 810, 830, 931])]
dfLondon['Borough_x']=dfLondon['Borough_x'].mask(pd.isnull, dfLondon['Borough_y'])
dfLondon=dfLondon.drop(['Borough_y'], axis=1)
dfLondon.columns=['Neighborhood', 'Borough', 'TotalCrimes']
dfLondon['TotalCrimes']=dfLondon['TotalCrimes'].fillna(0)

# The geocoder does not work if the borough is 'City'
dfLondon=dfLondon[dfLondon.Borough != 'City']

dfLondon['Borough']=dfLondon['Borough'].replace({'Lambeth, Southwark':'Lambeth', 'Barnet, Brent, Camden':'Camden', 'Camden, Islington':'Camden',
                                     'Brent, Camden':'Camden', 'Haringey, Islington':'Islington', 'City, Westminster':'Westminster', 'Greenwich, Lewisham':'Greenwich',
                                     'Bexley, Greenwich':'Greenwich','Islington, City':'Islington', 'Lewisham, Southwark':'Lewisham', 'Islington, Camden':'Islington'})

# Selecting only the boroughs that are in the metropolitan area of London
dfLondon=dfLondon[dfLondon['Borough'].isin(['Camden', 'Greenwich', 'Hackney', 'Hammersmith and Fulham', 'Islington', 'Kensington and Chelsea', 'Lambeth', 'Lewisham', 'Southwark', 'Tower Hamlets', 'Wandsworth', 'Westminster'])]
dfLondon['Neighborhood']=dfLondon['Neighborhood'].replace({'Town':'Town, Askew and Munster', 'Evelyn':'Evelyn and Lewisham Central', 'Dalgarno':'Kensington and Chelsea', 'Tooting':'Wandsworth', 'Chinatown':'Westminster'})
dfLondon=dfLondon.sort_values('Neighborhood').reset_index()
dfLondon.drop(dfLondon.iloc[:, 0:1], axis=1, inplace=True)  

# Based on the coordinates resulted from the geocoder, the neighborhoods with the index values below
#have the same coordinates so I have removed them for the analysis to be more accurate
dfLondon=dfLondon.loc[dfLondon.index.drop([1, 6, 13, 34, 39, 49, 62, 64, 85, 92, 99, 102, 117, 131, 133, 145, 149, 151, 153, 157, 163, 166, 170, 176, 178, 179, 189, 197, 203, 214, 217, 218, 229, 236, 243, 247, 255, 259, 263, 265, 275, 280, 284, 288, 289, 298])]
dfLondon=dfLondon.sort_values('Neighborhood').reset_index()
dfLondon.drop(dfLondon.iloc[:, 0:1], axis=1, inplace=True)

# In order not to lose any crime data by deleting the neighborhoods with the same coordinates,
#the below neighborhoods were updated with the sum of the crimes in the deleted ones
indexlist=[75, 118, 121, 230, 239, 252]
valuelist=[1526, 17296, 23374, 5767, 18435, 32207]
        
for i in indexlist:
        dfLondon.loc[i, 'TotalCrimes']=999999

result=dfLondon.where(dfLondon['TotalCrimes']==999999)
result=result.dropna(axis=0)  
result['TotalCrimes']=valuelist

dfLondon.drop(dfLondon[dfLondon.TotalCrimes == 999999].index, inplace=True)
dfLondon=pd.concat([dfLondon, result]) 

print("Shape of the final dfLondon:", dfLondon.shape)
dfLondon.head()

Shape of the final dfLondon: (260, 3)


Unnamed: 0,Neighborhood,Borough,TotalCrimes
0,Abbey Road,Westminster,1215.0
1,Addison,Hammersmith and Fulham,2634.0
2,Aldwych,Westminster,0.0
3,Angel,Islington,0.0
4,Archway,Islington,0.0


In [16]:
neighborhood = dfLondon['Neighborhood']
borough=dfLondon['Borough']

# Empty lists that will store the coordinates information from the geocoder
latitude = []
longitude = []
n = 0

# Loop that keeps on trying to get the latitude and longitude for each neighborhood until it succeedes
while n < len(neighborhood):
    g = geocoder.arcgis('{}, {}, London, England'.format(neighborhood[n], borough[n]))
    lat_lng_coords = g.latlng
    print('The geograpical coordinate of {}, {} are {}, {}.'.format(neighborhood[n], borough[n], lat_lng_coords[0], lat_lng_coords[1]))
    latitude.append(lat_lng_coords[0])
    longitude.append(lat_lng_coords[1])
    n = n + 1

dfLondon['Longitude'] = longitude
dfLondon['Latitude'] = latitude
dfLondon.head()

The geograpical coordinate of Abbey Road, Westminster are 51.49765730513152, -0.12719693873343343.
The geograpical coordinate of Addison, Hammersmith and Fulham are 51.48260000000005, -0.21287999999992735.
The geograpical coordinate of Aldwych, Westminster are 51.51265251684556, -0.11860743006660546.
The geograpical coordinate of Angel, Islington are 51.532020000000045, -0.10625999999996338.
The geograpical coordinate of Archway, Islington are 51.56574780726418, -0.13492240619480866.
The geograpical coordinate of Avonmore and Brook Green, Hammersmith and Fulham are 51.49620224997504, -0.2191129500854935.
The geograpical coordinate of Balham, Wandsworth are 51.44421949800005, -0.1505454249999616.
The geograpical coordinate of Bankside, Southwark are 51.50816355244705, -0.09521630696980721.
The geograpical coordinate of Barnsbury, Islington are 51.536488687022775, -0.11090694947026249.
The geograpical coordinate of Battersea, Wandsworth are 51.459230038551254, -0.1730798884169502.
The ge

Unnamed: 0,Neighborhood,Borough,TotalCrimes,Longitude,Latitude
0,Abbey Road,Westminster,1215.0,-0.127197,51.497657
1,Addison,Hammersmith and Fulham,2634.0,-0.21288,51.4826
2,Aldwych,Westminster,0.0,-0.118607,51.512653
3,Angel,Islington,0.0,-0.10626,51.53202
4,Archway,Islington,0.0,-0.134922,51.565748


#### Creating the map of London with the neighborhoods

In [17]:
# Map of London with the neighborhoods superimposed on it
map_London = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood, crimes in zip(dfLondon['Latitude'], dfLondon['Longitude'], dfLondon['Borough'], dfLondon['Neighborhood'], dfLondon['TotalCrimes']):
    label = '{}, {}, {}'.format(neighborhood, borough, crimes)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=6,
        popup=label,
        color='mediumorchid',
        fill=True,
        fill_color='#66CDAA',
        fill_opacity=0.6,
        parse_html=False).add_to(map_London)  
    
map_London

In [18]:
borough_count=dfLondon['Borough'].value_counts()
borough_count
print("Number of boroughs in metropolitan London:", len(borough_count))
print("Neighborhood count for each borough:", borough_count)

Number of boroughs in metropolitan London: 12
Neighborhood count for each borough: Hackney                   28
Tower Hamlets             28
Westminster               26
Camden                    25
Lewisham                  24
Lambeth                   22
Southwark                 21
Greenwich                 21
Islington                 18
Wandsworth                18
Hammersmith and Fulham    16
Kensington and Chelsea    13
Name: Borough, dtype: int64


## Setting up the Foursquare credentials

In [19]:
# Foursquare credentials
CLIENT_ID = 'T3THHYDPV3NSFOCGTSFMG2REPZK2L1FWNLZPUUTYHEIJDGIF' # your Foursquare ID
CLIENT_SECRET = 'UPBT2EZKN4L054EO2NR2I0RD5EHHZ1LMCOMXKF4WOUJCZHBW' # your Foursquare Secret
ACCESS_TOKEN = 'YZHHK4EQ4RNDT14K5ISYM4DBXSYJRO0C3HT0H0RDU2OVV1WA'
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: T3THHYDPV3NSFOCGTSFMG2REPZK2L1FWNLZPUUTYHEIJDGIF
CLIENT_SECRET:UPBT2EZKN4L054EO2NR2I0RD5EHHZ1LMCOMXKF4WOUJCZHBW


In [20]:
# IDs from Foursquare for the places that the analysis of the neighborhoods is based
id_ArtsEntertainment='4d4b7104d754a06370d81259'
id_Colleges='4d4b7105d754a06372d81259'
id_Restaurants='4d4b7105d754a06374d81259'
id_NightLife='4d4b7105d754a06376d81259'
id_OutdoorsRecreation='4d4b7105d754a06377d81259'
id_GovernmentBuilding='4bf58dd8d48988d126941735'
id_MedicalCenter='4bf58dd8d48988d104941735'
id_SpiritualCenter='4bf58dd8d48988d131941735'
id_FoodShop='4bf58dd8d48988d1f9941735'

In [21]:
# Function for retrieving the venues of each neighborhood, based on the coordinates of the 
#neighborhood and category ID of the venues, on a radius of 500m
def getNearbyVenues(names, latitudes, longitudes, categoryID, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        #API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng,
            categoryID,
            radius, 
            LIMIT)
            
        #GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        #Appending to the venues list only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## Creating dataframes for each neighborhood number of venues in the above categories
### Toronto

In [22]:
# Dataframe consisting of the count of Arts&Entertainment venues in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Arts&Entertainment
#then assigning to a new column the number of venues returned for each neighborhood
Toronto_ArtsEntertainment=getNearbyVenues(names=dfToronto['Neighborhood'], latitudes=dfToronto['Latitude'], longitudes=dfToronto['Longitude'], categoryID=id_ArtsEntertainment)
print("Shape of Toronto_ArtsEntertainment", Toronto_ArtsEntertainment.shape)

df_ArtsEntertainment=pd.DataFrame()
df_ArtsEntertainment['ArtsEntertainmentCount']=Toronto_ArtsEntertainment['Neighborhood'].value_counts()
df_ArtsEntertainment=df_ArtsEntertainment.reset_index()
df_ArtsEntertainment=df_ArtsEntertainment.rename(columns={'index':'Neighborhood'})
df_ArtsEntertainment.head()

Agincourt North
Agincourt South-Malvern West
Alderwood
Annex
Banbury-Don Mills
Bathurst Manor
Bay Street Corridor
Bayview Village
Bayview Woods-Steeles
Bedford Park-Nortown
Beechborough-Greenbrook
Bendale
Birchcliffe-Cliffside
Black Creek
Blake-Jones
Briar Hill-Belgravia
Bridle Path-Sunnybrook-York Mills
Broadview North
Brookhaven-Amesbury
Cabbagetown-South St. James Town
Caledonia-Fairbank
Casa Loma
Centennial Scarborough
Church-Yonge Corridor
Clairlea-Birchmount
Clanton Park
Cliffcrest
Corso Italia-Davenport
Danforth - East York
Don Valley Village
Dorset Park
Dovercourt-Wallace Emerson-Junction
Downsview-Roding-CFB
Dufferin Grove
East End-Danforth
Edenbridge-Humber Valley
Eglinton East
Elms-Old Rexdale
Englemount-Lawrence
Eringate-Centennial-West Deane
Etobicoke West Mall
Flemingdon Park
Forest Hill North
Forest Hill South
Glenfield-Jane Heights
Greenwood-Coxwell
Guildwood
Henry Farm
High Park North
High Park-Swansea
Highland Creek
Hillcrest Village
Humber Heights-Westmount
Humber Su

Unnamed: 0,Neighborhood,ArtsEntertainmentCount
0,Kensington-Chinatown,17
1,Henry Farm,17
2,Church-Yonge Corridor,14
3,North St. James Town,13
4,Junction Area,9


In [23]:
# Dataframe consisting of the count of College venues in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Colleges
#then assigning to a new column the number of venues returned for each neighborhood
Toronto_Colleges=getNearbyVenues(names=dfToronto['Neighborhood'], latitudes=dfToronto['Latitude'], longitudes=dfToronto['Longitude'], categoryID=id_Colleges)
print("Shape of Toronto_Colleges", Toronto_Colleges.shape)

df_Colleges=pd.DataFrame()
df_Colleges['CollegesCount']=Toronto_Colleges['Neighborhood'].value_counts()
df_Colleges=df_Colleges.reset_index()
df_Colleges=df_Colleges.rename(columns={'index':'Neighborhood'})
df_Colleges.head()

Agincourt North
Agincourt South-Malvern West
Alderwood
Annex
Banbury-Don Mills
Bathurst Manor
Bay Street Corridor
Bayview Village
Bayview Woods-Steeles
Bedford Park-Nortown
Beechborough-Greenbrook
Bendale
Birchcliffe-Cliffside
Black Creek
Blake-Jones
Briar Hill-Belgravia
Bridle Path-Sunnybrook-York Mills
Broadview North
Brookhaven-Amesbury
Cabbagetown-South St. James Town
Caledonia-Fairbank
Casa Loma
Centennial Scarborough
Church-Yonge Corridor
Clairlea-Birchmount
Clanton Park
Cliffcrest
Corso Italia-Davenport
Danforth - East York
Don Valley Village
Dorset Park
Dovercourt-Wallace Emerson-Junction
Downsview-Roding-CFB
Dufferin Grove
East End-Danforth
Edenbridge-Humber Valley
Eglinton East
Elms-Old Rexdale
Englemount-Lawrence
Eringate-Centennial-West Deane
Etobicoke West Mall
Flemingdon Park
Forest Hill North
Forest Hill South
Glenfield-Jane Heights
Greenwood-Coxwell
Guildwood
Henry Farm
High Park North
High Park-Swansea
Highland Creek
Hillcrest Village
Humber Heights-Westmount
Humber Su

Unnamed: 0,Neighborhood,CollegesCount
0,University,83
1,Church-Yonge Corridor,68
2,Bay Street Corridor,46
3,Annex,27
4,Moss Park,18


In [24]:
# Dataframe consisting of the count of Restaurant venues in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Restaurants
#then assigning to a new column the number of venues returned for each neighborhood
Toronto_Restaurants=getNearbyVenues(names=dfToronto['Neighborhood'], latitudes=dfToronto['Latitude'], longitudes=dfToronto['Longitude'], categoryID=id_Restaurants)
print("Shape of Toronto_Restaurants", Toronto_Restaurants.shape)

df_Restaurants=pd.DataFrame()
df_Restaurants['RestaurantsCount']=Toronto_Restaurants['Neighborhood'].value_counts()
df_Restaurants=df_Restaurants.reset_index()
df_Restaurants=df_Restaurants.rename(columns={'index':'Neighborhood'})
df_Restaurants.head()

Agincourt North
Agincourt South-Malvern West
Alderwood
Annex
Banbury-Don Mills
Bathurst Manor
Bay Street Corridor
Bayview Village
Bayview Woods-Steeles
Bedford Park-Nortown
Beechborough-Greenbrook
Bendale
Birchcliffe-Cliffside
Black Creek
Blake-Jones
Briar Hill-Belgravia
Bridle Path-Sunnybrook-York Mills
Broadview North
Brookhaven-Amesbury
Cabbagetown-South St. James Town
Caledonia-Fairbank
Casa Loma
Centennial Scarborough
Church-Yonge Corridor
Clairlea-Birchmount
Clanton Park
Cliffcrest
Corso Italia-Davenport
Danforth - East York
Don Valley Village
Dorset Park
Dovercourt-Wallace Emerson-Junction
Downsview-Roding-CFB
Dufferin Grove
East End-Danforth
Edenbridge-Humber Valley
Eglinton East
Elms-Old Rexdale
Englemount-Lawrence
Eringate-Centennial-West Deane
Etobicoke West Mall
Flemingdon Park
Forest Hill North
Forest Hill South
Glenfield-Jane Heights
Greenwood-Coxwell
Guildwood
Henry Farm
High Park North
High Park-Swansea
Highland Creek
Hillcrest Village
Humber Heights-Westmount
Humber Su

Unnamed: 0,Neighborhood,RestaurantsCount
0,Church-Yonge Corridor,80
1,Bay Street Corridor,68
2,Kensington-Chinatown,51
3,Niagara,47
4,Yonge and Eglinton,47


In [25]:
# Dataframe consisting of the count of Night Life venues in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Night Life
#then assigning to a new column the number of venues returned for each neighborhood
Toronto_NightLife=getNearbyVenues(names=dfToronto['Neighborhood'], latitudes=dfToronto['Latitude'], longitudes=dfToronto['Longitude'], categoryID=id_NightLife)
print("Shape of Toronto_NightLife", Toronto_NightLife.shape)

df_NightLife=pd.DataFrame()
df_NightLife['NightLifeCount']=Toronto_NightLife['Neighborhood'].value_counts()
df_NightLife=df_NightLife.reset_index()
df_NightLife=df_NightLife.rename(columns={'index':'Neighborhood'})
df_NightLife.head()

Agincourt North
Agincourt South-Malvern West
Alderwood
Annex
Banbury-Don Mills
Bathurst Manor
Bay Street Corridor
Bayview Village
Bayview Woods-Steeles
Bedford Park-Nortown
Beechborough-Greenbrook
Bendale
Birchcliffe-Cliffside
Black Creek
Blake-Jones
Briar Hill-Belgravia
Bridle Path-Sunnybrook-York Mills
Broadview North
Brookhaven-Amesbury
Cabbagetown-South St. James Town
Caledonia-Fairbank
Casa Loma
Centennial Scarborough
Church-Yonge Corridor
Clairlea-Birchmount
Clanton Park
Cliffcrest
Corso Italia-Davenport
Danforth - East York
Don Valley Village
Dorset Park
Dovercourt-Wallace Emerson-Junction
Downsview-Roding-CFB
Dufferin Grove
East End-Danforth
Edenbridge-Humber Valley
Eglinton East
Elms-Old Rexdale
Englemount-Lawrence
Eringate-Centennial-West Deane
Etobicoke West Mall
Flemingdon Park
Forest Hill North
Forest Hill South
Glenfield-Jane Heights
Greenwood-Coxwell
Guildwood
Henry Farm
High Park North
High Park-Swansea
Highland Creek
Hillcrest Village
Humber Heights-Westmount
Humber Su

Unnamed: 0,Neighborhood,NightLifeCount
0,Church-Yonge Corridor,32
1,Kensington-Chinatown,21
2,Niagara,20
3,Bay Street Corridor,16
4,Little Portugal,14


In [26]:
# Dataframe consisting of the count of Outdoors&Recreational venues in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Outdoors&Recreation venues
#then assigning to a new column the number of venues returned for each neighborhood
Toronto_OutdoorsRecreation=getNearbyVenues(names=dfToronto['Neighborhood'], latitudes=dfToronto['Latitude'], longitudes=dfToronto['Longitude'], categoryID=id_OutdoorsRecreation)
print("Shape of Toronto_OutdoorsRecreation", Toronto_OutdoorsRecreation.shape)

df_OutdoorsRecreation=pd.DataFrame()
df_OutdoorsRecreation['OutdoorsRecreationCount']=Toronto_OutdoorsRecreation['Neighborhood'].value_counts()
df_OutdoorsRecreation=df_OutdoorsRecreation.reset_index()
df_OutdoorsRecreation=df_OutdoorsRecreation.rename(columns={'index':'Neighborhood'})
df_OutdoorsRecreation.head()

Agincourt North
Agincourt South-Malvern West
Alderwood
Annex
Banbury-Don Mills
Bathurst Manor
Bay Street Corridor
Bayview Village
Bayview Woods-Steeles
Bedford Park-Nortown
Beechborough-Greenbrook
Bendale
Birchcliffe-Cliffside
Black Creek
Blake-Jones
Briar Hill-Belgravia
Bridle Path-Sunnybrook-York Mills
Broadview North
Brookhaven-Amesbury
Cabbagetown-South St. James Town
Caledonia-Fairbank
Casa Loma
Centennial Scarborough
Church-Yonge Corridor
Clairlea-Birchmount
Clanton Park
Cliffcrest
Corso Italia-Davenport
Danforth - East York
Don Valley Village
Dorset Park
Dovercourt-Wallace Emerson-Junction
Downsview-Roding-CFB
Dufferin Grove
East End-Danforth
Edenbridge-Humber Valley
Eglinton East
Elms-Old Rexdale
Englemount-Lawrence
Eringate-Centennial-West Deane
Etobicoke West Mall
Flemingdon Park
Forest Hill North
Forest Hill South
Glenfield-Jane Heights
Greenwood-Coxwell
Guildwood
Henry Farm
High Park North
High Park-Swansea
Highland Creek
Hillcrest Village
Humber Heights-Westmount
Humber Su

Unnamed: 0,Neighborhood,OutdoorsRecreationCount
0,Bay Street Corridor,27
1,Church-Yonge Corridor,24
2,Niagara,22
3,Yonge and Eglinton,13
4,North St. James Town,12


In [27]:
# Dataframe consisting of the count of Government Buildings in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Government Buildings
#then assigning to a new column the number of venues returned for each neighborhood
Toronto_GovernmentBuilding=getNearbyVenues(names=dfToronto['Neighborhood'], latitudes=dfToronto['Latitude'], longitudes=dfToronto['Longitude'], categoryID=id_GovernmentBuilding)
print("Shape of Toronto_GovernmentBuilding", Toronto_GovernmentBuilding.shape)

df_GovernmentBuilding=pd.DataFrame()
df_GovernmentBuilding['GovernmentBuildingCount']=Toronto_GovernmentBuilding['Neighborhood'].value_counts()
df_GovernmentBuilding=df_GovernmentBuilding.reset_index()
df_GovernmentBuilding=df_GovernmentBuilding.rename(columns={'index':'Neighborhood'})
df_GovernmentBuilding.head()

Agincourt North
Agincourt South-Malvern West
Alderwood
Annex
Banbury-Don Mills
Bathurst Manor
Bay Street Corridor
Bayview Village
Bayview Woods-Steeles
Bedford Park-Nortown
Beechborough-Greenbrook
Bendale
Birchcliffe-Cliffside
Black Creek
Blake-Jones
Briar Hill-Belgravia
Bridle Path-Sunnybrook-York Mills
Broadview North
Brookhaven-Amesbury
Cabbagetown-South St. James Town
Caledonia-Fairbank
Casa Loma
Centennial Scarborough
Church-Yonge Corridor
Clairlea-Birchmount
Clanton Park
Cliffcrest
Corso Italia-Davenport
Danforth - East York
Don Valley Village
Dorset Park
Dovercourt-Wallace Emerson-Junction
Downsview-Roding-CFB
Dufferin Grove
East End-Danforth
Edenbridge-Humber Valley
Eglinton East
Elms-Old Rexdale
Englemount-Lawrence
Eringate-Centennial-West Deane
Etobicoke West Mall
Flemingdon Park
Forest Hill North
Forest Hill South
Glenfield-Jane Heights
Greenwood-Coxwell
Guildwood
Henry Farm
High Park North
High Park-Swansea
Highland Creek
Hillcrest Village
Humber Heights-Westmount
Humber Su

Unnamed: 0,Neighborhood,GovernmentBuildingCount
0,Bay Street Corridor,23
1,Church-Yonge Corridor,10
2,Beechborough-Greenbrook,5
3,Niagara,4
4,Islington-City Centre West,4


In [28]:
# Dataframe consisting of the count of Medical Centers in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Medical Center venues
#then assigning to a new column the number of venues returned for each neighborhood
Toronto_MedicalCenter=getNearbyVenues(names=dfToronto['Neighborhood'], latitudes=dfToronto['Latitude'], longitudes=dfToronto['Longitude'], categoryID=id_MedicalCenter)
print("Shape of Toronto_MedicalCenter", Toronto_MedicalCenter.shape)

df_MedicalCenter=pd.DataFrame()
df_MedicalCenter['MedicalCenterCount']=Toronto_MedicalCenter['Neighborhood'].value_counts()
df_MedicalCenter=df_MedicalCenter.reset_index()
df_MedicalCenter=df_MedicalCenter.rename(columns={'index':'Neighborhood'})
df_MedicalCenter.head()

Agincourt North
Agincourt South-Malvern West
Alderwood
Annex
Banbury-Don Mills
Bathurst Manor
Bay Street Corridor
Bayview Village
Bayview Woods-Steeles
Bedford Park-Nortown
Beechborough-Greenbrook
Bendale
Birchcliffe-Cliffside
Black Creek
Blake-Jones
Briar Hill-Belgravia
Bridle Path-Sunnybrook-York Mills
Broadview North
Brookhaven-Amesbury
Cabbagetown-South St. James Town
Caledonia-Fairbank
Casa Loma
Centennial Scarborough
Church-Yonge Corridor
Clairlea-Birchmount
Clanton Park
Cliffcrest
Corso Italia-Davenport
Danforth - East York
Don Valley Village
Dorset Park
Dovercourt-Wallace Emerson-Junction
Downsview-Roding-CFB
Dufferin Grove
East End-Danforth
Edenbridge-Humber Valley
Eglinton East
Elms-Old Rexdale
Englemount-Lawrence
Eringate-Centennial-West Deane
Etobicoke West Mall
Flemingdon Park
Forest Hill North
Forest Hill South
Glenfield-Jane Heights
Greenwood-Coxwell
Guildwood
Henry Farm
High Park North
High Park-Swansea
Highland Creek
Hillcrest Village
Humber Heights-Westmount
Humber Su

Unnamed: 0,Neighborhood,MedicalCenterCount
0,Bay Street Corridor,57
1,Yonge and Eglinton,27
2,Church-Yonge Corridor,17
3,Playter Estates-Danforth,16
4,L'Amoreaux,15


In [29]:
# Dataframe consisting of the count of Spiritual Centers in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Spiritual Center venues
#then assigning to a new column the number of venues returned for each neighborhood
Toronto_SpiritualCenter=getNearbyVenues(names=dfToronto['Neighborhood'], latitudes=dfToronto['Latitude'], longitudes=dfToronto['Longitude'], categoryID=id_SpiritualCenter)
print("Shape of Toronto_SpiritualCenter", Toronto_SpiritualCenter.shape)

df_SpiritualCenter=pd.DataFrame()
df_SpiritualCenter['SpiritualCenterCount']=Toronto_SpiritualCenter['Neighborhood'].value_counts()
df_SpiritualCenter=df_SpiritualCenter.reset_index()
df_SpiritualCenter=df_SpiritualCenter.rename(columns={'index':'Neighborhood'})
df_SpiritualCenter.head()

Agincourt North
Agincourt South-Malvern West
Alderwood
Annex
Banbury-Don Mills
Bathurst Manor
Bay Street Corridor
Bayview Village
Bayview Woods-Steeles
Bedford Park-Nortown
Beechborough-Greenbrook
Bendale
Birchcliffe-Cliffside
Black Creek
Blake-Jones
Briar Hill-Belgravia
Bridle Path-Sunnybrook-York Mills
Broadview North
Brookhaven-Amesbury
Cabbagetown-South St. James Town
Caledonia-Fairbank
Casa Loma
Centennial Scarborough
Church-Yonge Corridor
Clairlea-Birchmount
Clanton Park
Cliffcrest
Corso Italia-Davenport
Danforth - East York
Don Valley Village
Dorset Park
Dovercourt-Wallace Emerson-Junction
Downsview-Roding-CFB
Dufferin Grove
East End-Danforth
Edenbridge-Humber Valley
Eglinton East
Elms-Old Rexdale
Englemount-Lawrence
Eringate-Centennial-West Deane
Etobicoke West Mall
Flemingdon Park
Forest Hill North
Forest Hill South
Glenfield-Jane Heights
Greenwood-Coxwell
Guildwood
Henry Farm
High Park North
High Park-Swansea
Highland Creek
Hillcrest Village
Humber Heights-Westmount
Humber Su

Unnamed: 0,Neighborhood,SpiritualCenterCount
0,Trinity-Bellwoods,8
1,Annex,7
2,University,7
3,New Toronto,7
4,Moss Park,6


In [30]:
# Dataframe consisting of the count of Grocery Stores in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Grocery Stores
#then assigning to a new column the number of venues returned for each neighborhood
Toronto_FoodShop=getNearbyVenues(names=dfToronto['Neighborhood'], latitudes=dfToronto['Latitude'], longitudes=dfToronto['Longitude'], categoryID=id_FoodShop)
print("Shape of Toronto_FoodShop", Toronto_FoodShop.shape)

df_FoodShop=pd.DataFrame()
df_FoodShop['FoodShopCount']=Toronto_FoodShop['Neighborhood'].value_counts()
df_FoodShop=df_FoodShop.reset_index()
df_FoodShop=df_FoodShop.rename(columns={'index':'Neighborhood'})
df_FoodShop.head()

Agincourt North
Agincourt South-Malvern West
Alderwood
Annex
Banbury-Don Mills
Bathurst Manor
Bay Street Corridor
Bayview Village
Bayview Woods-Steeles
Bedford Park-Nortown
Beechborough-Greenbrook
Bendale
Birchcliffe-Cliffside
Black Creek
Blake-Jones
Briar Hill-Belgravia
Bridle Path-Sunnybrook-York Mills
Broadview North
Brookhaven-Amesbury
Cabbagetown-South St. James Town
Caledonia-Fairbank
Casa Loma
Centennial Scarborough
Church-Yonge Corridor
Clairlea-Birchmount
Clanton Park
Cliffcrest
Corso Italia-Davenport
Danforth - East York
Don Valley Village
Dorset Park
Dovercourt-Wallace Emerson-Junction
Downsview-Roding-CFB
Dufferin Grove
East End-Danforth
Edenbridge-Humber Valley
Eglinton East
Elms-Old Rexdale
Englemount-Lawrence
Eringate-Centennial-West Deane
Etobicoke West Mall
Flemingdon Park
Forest Hill North
Forest Hill South
Glenfield-Jane Heights
Greenwood-Coxwell
Guildwood
Henry Farm
High Park North
High Park-Swansea
Highland Creek
Hillcrest Village
Humber Heights-Westmount
Humber Su

Unnamed: 0,Neighborhood,FoodShopCount
0,Bay Street Corridor,30
1,Kensington-Chinatown,29
2,Church-Yonge Corridor,23
3,Yonge and Eglinton,14
4,Dufferin Grove,12


In [31]:
# Merging the categories dataframes with the Toronto dataframe and replacing the NaN values with 0
merged_dfToronto=pd.merge(left=dfToronto, right=df_ArtsEntertainment, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfToronto=pd.merge(left=merged_dfToronto, right=df_Colleges, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfToronto=pd.merge(left=merged_dfToronto, right=df_Restaurants, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfToronto=pd.merge(left=merged_dfToronto, right=df_NightLife, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfToronto=pd.merge(left=merged_dfToronto, right=df_OutdoorsRecreation, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfToronto=pd.merge(left=merged_dfToronto, right=df_GovernmentBuilding, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfToronto=pd.merge(left=merged_dfToronto, right=df_MedicalCenter, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfToronto=pd.merge(left=merged_dfToronto, right=df_SpiritualCenter, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfToronto=pd.merge(left=merged_dfToronto, right=df_FoodShop, how='left', left_on='Neighborhood', right_on='Neighborhood')

merged_dfToronto=merged_dfToronto.fillna(0)
merged_dfToronto.head()

Unnamed: 0,Neighborhood,Borough,TotalCrimes,Longitude,Latitude,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,Agincourt North,Scarborough,363,-79.26707,43.8093,1.0,0.0,14.0,1.0,2.0,0.0,6.0,0.0,4.0
1,Agincourt South-Malvern West,Scarborough,478,-79.26935,43.78736,1.0,6.0,20.0,3.0,3.0,1.0,8.0,2.0,4.0
2,Alderwood,Etobicoke,204,-79.54116,43.60496,1.0,1.0,4.0,0.0,4.0,0.0,4.0,0.0,1.0
3,Annex,Old City of Toronto,943,-79.4028,43.66936,4.0,27.0,26.0,7.0,9.0,3.0,10.0,7.0,6.0
4,Banbury-Don Mills,North York,403,-79.34852,43.74041,0.0,0.0,2.0,0.0,1.0,0.0,1.0,1.0,1.0


### London

In [32]:
# Dataframe consisting of the count of Arts&Entertainment venues in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Arts&Entertainment
#then assigning to a new column the number of venues returned for each neighborhood
London_ArtsEntertainment=getNearbyVenues(names=dfLondon['Neighborhood'], latitudes=dfLondon['Latitude'], longitudes=dfLondon['Longitude'], categoryID=id_ArtsEntertainment)
print("Shape of London_ArtsEntertainment", London_ArtsEntertainment.shape)

df_ArtsEntertainment=pd.DataFrame()
df_ArtsEntertainment['ArtsEntertainmentCount']=London_ArtsEntertainment['Neighborhood'].value_counts()
df_ArtsEntertainment=df_ArtsEntertainment.reset_index()
df_ArtsEntertainment=df_ArtsEntertainment.rename(columns={'index':'Neighborhood'})
df_ArtsEntertainment.head()

Abbey Road
Addison
Aldwych
Angel
Archway
Avonmore and Brook Green
Balham
Bankside
Barnsbury
Battersea
Bayswater
Belgravia
Bellingham
Belsize
Bermondsey
Bethnal Green
Bishop's
Blackheath
Blackheath Westcombe
Blackwall
Blackwall & Cubitt Town
Bloomsbury
Bow East
Bow West
Brixton Hill
Brockley
Bromley North
Bromley South
Brompton & Hans Town
Brownswood
Bryanston and Dorset Square
Caledonian
Camberwell Green
Cambridge Heath
Camden Town with Primrose Hill
Canary Wharf
Canonbury
Cantelowes
Catford South
Cathedrals
Cazenove
Chalk Farm
Charing Cross
Charlton
Chelsea Riverside
Chinbrook
Church Street
Churchill
Clapham Common
Clapham Town
Clerkenwell
Clissold
Coldharbour
Coldharbour and New Eltham
College
College Park and Old Oak
Courtfield
Cricklewood
Crofton Park
Cubitt Town
Dalston
De Beauvoir
Denmark Hill
Deptford
Downham
Dulwich
Earl's Court
Earlsfield
East Dulwich
East Putney
East Walworth
Elephant and Castle
Eltham North
Eltham South
Eltham West
Fairfield
Farringdon
Ferndale
Finsbury Park

Unnamed: 0,Neighborhood,ArtsEntertainmentCount
0,Charing Cross,75
1,Holland,70
2,St Luke's,68
3,Ravenscourt Park,45
4,Bloomsbury,44


In [33]:
# Dataframe consisting of the count of College venues in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Colleges
#then assigning to a new column the number of venues returned for each neighborhood
London_Colleges=getNearbyVenues(names=dfLondon['Neighborhood'], latitudes=dfLondon['Latitude'], longitudes=dfLondon['Longitude'], categoryID=id_Colleges)
print("Shape of London_Colleges", London_Colleges.shape)

df_Colleges=pd.DataFrame()
df_Colleges['CollegesCount']=London_Colleges['Neighborhood'].value_counts()
df_Colleges=df_Colleges.reset_index()
df_Colleges=df_Colleges.rename(columns={'index':'Neighborhood'})
df_Colleges.head()

Abbey Road
Addison
Aldwych
Angel
Archway
Avonmore and Brook Green
Balham
Bankside
Barnsbury
Battersea
Bayswater
Belgravia
Bellingham
Belsize
Bermondsey
Bethnal Green
Bishop's
Blackheath
Blackheath Westcombe
Blackwall
Blackwall & Cubitt Town
Bloomsbury
Bow East
Bow West
Brixton Hill
Brockley
Bromley North
Bromley South
Brompton & Hans Town
Brownswood
Bryanston and Dorset Square
Caledonian
Camberwell Green
Cambridge Heath
Camden Town with Primrose Hill
Canary Wharf
Canonbury
Cantelowes
Catford South
Cathedrals
Cazenove
Chalk Farm
Charing Cross
Charlton
Chelsea Riverside
Chinbrook
Church Street
Churchill
Clapham Common
Clapham Town
Clerkenwell
Clissold
Coldharbour
Coldharbour and New Eltham
College
College Park and Old Oak
Courtfield
Cricklewood
Crofton Park
Cubitt Town
Dalston
De Beauvoir
Denmark Hill
Deptford
Downham
Dulwich
Earl's Court
Earlsfield
East Dulwich
East Putney
East Walworth
Elephant and Castle
Eltham North
Eltham South
Eltham West
Fairfield
Farringdon
Ferndale
Finsbury Park

Unnamed: 0,Neighborhood,CollegesCount
0,Aldwych,77
1,Forest Hill,70
2,Thornton,69
3,Bloomsbury,62
4,Ravenscourt Park,56


In [34]:
# Dataframe consisting of the count of Restaurant venues in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Restaurants
#then assigning to a new column the number of venues returned for each neighborhood
London_Restaurants=getNearbyVenues(names=dfLondon['Neighborhood'], latitudes=dfLondon['Latitude'], longitudes=dfLondon['Longitude'], categoryID=id_Restaurants)
print("Shape of London_Restaurants", London_Restaurants.shape)

df_Restaurants=pd.DataFrame()
df_Restaurants['RestaurantsCount']=London_Restaurants['Neighborhood'].value_counts()
df_Restaurants=df_Restaurants.reset_index()
df_Restaurants=df_Restaurants.rename(columns={'index':'Neighborhood'})
df_Restaurants.head()

Abbey Road
Addison
Aldwych
Angel
Archway
Avonmore and Brook Green
Balham
Bankside
Barnsbury
Battersea
Bayswater
Belgravia
Bellingham
Belsize
Bermondsey
Bethnal Green
Bishop's
Blackheath
Blackheath Westcombe
Blackwall
Blackwall & Cubitt Town
Bloomsbury
Bow East
Bow West
Brixton Hill
Brockley
Bromley North
Bromley South
Brompton & Hans Town
Brownswood
Bryanston and Dorset Square
Caledonian
Camberwell Green
Cambridge Heath
Camden Town with Primrose Hill
Canary Wharf
Canonbury
Cantelowes
Catford South
Cathedrals
Cazenove
Chalk Farm
Charing Cross
Charlton
Chelsea Riverside
Chinbrook
Church Street
Churchill
Clapham Common
Clapham Town
Clerkenwell
Clissold
Coldharbour
Coldharbour and New Eltham
College
College Park and Old Oak
Courtfield
Cricklewood
Crofton Park
Cubitt Town
Dalston
De Beauvoir
Denmark Hill
Deptford
Downham
Dulwich
Earl's Court
Earlsfield
East Dulwich
East Putney
East Walworth
Elephant and Castle
Eltham North
Eltham South
Eltham West
Fairfield
Farringdon
Ferndale
Finsbury Park

Unnamed: 0,Neighborhood,RestaurantsCount
0,St Luke's,100
1,Cathedrals,100
2,Weavers,100
3,Homerton,80
4,Holland,78


In [35]:
# Dataframe consisting of the count of Night Life venues in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Night Life
#then assigning to a new column the number of venues returned for each neighborhood
London_NightLife=getNearbyVenues(names=dfLondon['Neighborhood'], latitudes=dfLondon['Latitude'], longitudes=dfLondon['Longitude'], categoryID=id_NightLife)
print("Shape of London_NightLife", London_NightLife.shape)

df_NightLife=pd.DataFrame()
df_NightLife['NightLifeCount']=London_NightLife['Neighborhood'].value_counts()
df_NightLife=df_NightLife.reset_index()
df_NightLife=df_NightLife.rename(columns={'index':'Neighborhood'})
df_NightLife.head()

Abbey Road
Addison
Aldwych
Angel
Archway
Avonmore and Brook Green
Balham
Bankside
Barnsbury
Battersea
Bayswater
Belgravia
Bellingham
Belsize
Bermondsey
Bethnal Green
Bishop's
Blackheath
Blackheath Westcombe
Blackwall
Blackwall & Cubitt Town
Bloomsbury
Bow East
Bow West
Brixton Hill
Brockley
Bromley North
Bromley South
Brompton & Hans Town
Brownswood
Bryanston and Dorset Square
Caledonian
Camberwell Green
Cambridge Heath
Camden Town with Primrose Hill
Canary Wharf
Canonbury
Cantelowes
Catford South
Cathedrals
Cazenove
Chalk Farm
Charing Cross
Charlton
Chelsea Riverside
Chinbrook
Church Street
Churchill
Clapham Common
Clapham Town
Clerkenwell
Clissold
Coldharbour
Coldharbour and New Eltham
College
College Park and Old Oak
Courtfield
Cricklewood
Crofton Park
Cubitt Town
Dalston
De Beauvoir
Denmark Hill
Deptford
Downham
Dulwich
Earl's Court
Earlsfield
East Dulwich
East Putney
East Walworth
Elephant and Castle
Eltham North
Eltham South
Eltham West
Fairfield
Farringdon
Ferndale
Finsbury Park

Unnamed: 0,Neighborhood,NightLifeCount
0,St Luke's,100
1,Cathedrals,73
2,Camden Town with Primrose Hill,65
3,Weavers,58
4,Holland,53


In [36]:
# Dataframe consisting of the count of Outdoors&Recreational venues in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Outdoors&Recreation venues
#then assigning to a new column the number of venues returned for each neighborhood
London_OutdoorsRecreation=getNearbyVenues(names=dfLondon['Neighborhood'], latitudes=dfLondon['Latitude'], longitudes=dfLondon['Longitude'], categoryID=id_OutdoorsRecreation)
print("Shape of London_OutdoorsRecreation", London_OutdoorsRecreation.shape)

df_OutdoorsRecreation=pd.DataFrame()
df_OutdoorsRecreation['OutdoorsRecreationCount']=London_OutdoorsRecreation['Neighborhood'].value_counts()
df_OutdoorsRecreation=df_OutdoorsRecreation.reset_index()
df_OutdoorsRecreation=df_OutdoorsRecreation.rename(columns={'index':'Neighborhood'})
df_OutdoorsRecreation.head()

Abbey Road
Addison
Aldwych
Angel
Archway
Avonmore and Brook Green
Balham
Bankside
Barnsbury
Battersea
Bayswater
Belgravia
Bellingham
Belsize
Bermondsey
Bethnal Green
Bishop's
Blackheath
Blackheath Westcombe
Blackwall
Blackwall & Cubitt Town
Bloomsbury
Bow East
Bow West
Brixton Hill
Brockley
Bromley North
Bromley South
Brompton & Hans Town
Brownswood
Bryanston and Dorset Square
Caledonian
Camberwell Green
Cambridge Heath
Camden Town with Primrose Hill
Canary Wharf
Canonbury
Cantelowes
Catford South
Cathedrals
Cazenove
Chalk Farm
Charing Cross
Charlton
Chelsea Riverside
Chinbrook
Church Street
Churchill
Clapham Common
Clapham Town
Clerkenwell
Clissold
Coldharbour
Coldharbour and New Eltham
College
College Park and Old Oak
Courtfield
Cricklewood
Crofton Park
Cubitt Town
Dalston
De Beauvoir
Denmark Hill
Deptford
Downham
Dulwich
Earl's Court
Earlsfield
East Dulwich
East Putney
East Walworth
Elephant and Castle
Eltham North
Eltham South
Eltham West
Fairfield
Farringdon
Ferndale
Finsbury Park

Unnamed: 0,Neighborhood,OutdoorsRecreationCount
0,Holland,35
1,St Luke's,34
2,Canary Wharf,33
3,Charing Cross,33
4,St John's Wood,32


In [37]:
# Dataframe consisting of the count of Government Buildings in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Government Buildings
#then assigning to a new column the number of venues returned for each neighborhood
London_GovernmentBuilding=getNearbyVenues(names=dfLondon['Neighborhood'], latitudes=dfLondon['Latitude'], longitudes=dfLondon['Longitude'], categoryID=id_GovernmentBuilding)
print("Shape of London_GovernmentBuilding", London_GovernmentBuilding.shape)

df_GovernmentBuilding=pd.DataFrame()
df_GovernmentBuilding['GovernmentBuildingCount']=London_GovernmentBuilding['Neighborhood'].value_counts()
df_GovernmentBuilding=df_GovernmentBuilding.reset_index()
df_GovernmentBuilding=df_GovernmentBuilding.rename(columns={'index':'Neighborhood'})
df_GovernmentBuilding.head()

Abbey Road
Addison
Aldwych
Angel
Archway
Avonmore and Brook Green
Balham
Bankside
Barnsbury
Battersea
Bayswater
Belgravia
Bellingham
Belsize
Bermondsey
Bethnal Green
Bishop's
Blackheath
Blackheath Westcombe
Blackwall
Blackwall & Cubitt Town
Bloomsbury
Bow East
Bow West
Brixton Hill
Brockley
Bromley North
Bromley South
Brompton & Hans Town
Brownswood
Bryanston and Dorset Square
Caledonian
Camberwell Green
Cambridge Heath
Camden Town with Primrose Hill
Canary Wharf
Canonbury
Cantelowes
Catford South
Cathedrals
Cazenove
Chalk Farm
Charing Cross
Charlton
Chelsea Riverside
Chinbrook
Church Street
Churchill
Clapham Common
Clapham Town
Clerkenwell
Clissold
Coldharbour
Coldharbour and New Eltham
College
College Park and Old Oak
Courtfield
Cricklewood
Crofton Park
Cubitt Town
Dalston
De Beauvoir
Denmark Hill
Deptford
Downham
Dulwich
Earl's Court
Earlsfield
East Dulwich
East Putney
East Walworth
Elephant and Castle
Eltham North
Eltham South
Eltham West
Fairfield
Farringdon
Ferndale
Finsbury Park

Unnamed: 0,Neighborhood,GovernmentBuildingCount
0,Churchill,70
1,Rotherhithe,61
2,Abbey Road,61
3,Maida Vale,57
4,Wormholt and White City,52


In [38]:
# Dataframe consisting of the count of Medical Centers in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Medical Center venues
#then assigning to a new column the number of venues returned for each neighborhood
London_MedicalCenter=getNearbyVenues(names=dfLondon['Neighborhood'], latitudes=dfLondon['Latitude'], longitudes=dfLondon['Longitude'], categoryID=id_MedicalCenter)
print("Shape of London_MedicalCenter", London_MedicalCenter.shape)

df_MedicalCenter=pd.DataFrame()
df_MedicalCenter['MedicalCenterCount']=London_MedicalCenter['Neighborhood'].value_counts()
df_MedicalCenter=df_MedicalCenter.reset_index()
df_MedicalCenter=df_MedicalCenter.rename(columns={'index':'Neighborhood'})
df_MedicalCenter.head()

Abbey Road
Addison
Aldwych
Angel
Archway
Avonmore and Brook Green
Balham
Bankside
Barnsbury
Battersea
Bayswater
Belgravia
Bellingham
Belsize
Bermondsey
Bethnal Green
Bishop's
Blackheath
Blackheath Westcombe
Blackwall
Blackwall & Cubitt Town
Bloomsbury
Bow East
Bow West
Brixton Hill
Brockley
Bromley North
Bromley South
Brompton & Hans Town
Brownswood
Bryanston and Dorset Square
Caledonian
Camberwell Green
Cambridge Heath
Camden Town with Primrose Hill
Canary Wharf
Canonbury
Cantelowes
Catford South
Cathedrals
Cazenove
Chalk Farm
Charing Cross
Charlton
Chelsea Riverside
Chinbrook
Church Street
Churchill
Clapham Common
Clapham Town
Clerkenwell
Clissold
Coldharbour
Coldharbour and New Eltham
College
College Park and Old Oak
Courtfield
Cricklewood
Crofton Park
Cubitt Town
Dalston
De Beauvoir
Denmark Hill
Deptford
Downham
Dulwich
Earl's Court
Earlsfield
East Dulwich
East Putney
East Walworth
Elephant and Castle
Eltham North
Eltham South
Eltham West
Fairfield
Farringdon
Ferndale
Finsbury Park

Unnamed: 0,Neighborhood,MedicalCenterCount
0,Mile End,65
1,Forest Hill,29
2,St Luke's,19
3,Bloomsbury,16
4,Paddington,15


In [39]:
# Dataframe consisting of the count of Spiritual Centers in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Spiritual Center venues
#then assigning to a new column the number of venues returned for each neighborhood
London_SpiritualCenter=getNearbyVenues(names=dfLondon['Neighborhood'], latitudes=dfLondon['Latitude'], longitudes=dfLondon['Longitude'], categoryID=id_SpiritualCenter)
print("Shape of London_SpiritualCenter", London_SpiritualCenter.shape)

df_SpiritualCenter=pd.DataFrame()
df_SpiritualCenter['SpiritualCenterCount']=London_SpiritualCenter['Neighborhood'].value_counts()
df_SpiritualCenter=df_SpiritualCenter.reset_index()
df_SpiritualCenter=df_SpiritualCenter.rename(columns={'index':'Neighborhood'})
df_SpiritualCenter.head()

Abbey Road
Addison
Aldwych
Angel
Archway
Avonmore and Brook Green
Balham
Bankside
Barnsbury
Battersea
Bayswater
Belgravia
Bellingham
Belsize
Bermondsey
Bethnal Green
Bishop's
Blackheath
Blackheath Westcombe
Blackwall
Blackwall & Cubitt Town
Bloomsbury
Bow East
Bow West
Brixton Hill
Brockley
Bromley North
Bromley South
Brompton & Hans Town
Brownswood
Bryanston and Dorset Square
Caledonian
Camberwell Green
Cambridge Heath
Camden Town with Primrose Hill
Canary Wharf
Canonbury
Cantelowes
Catford South
Cathedrals
Cazenove
Chalk Farm
Charing Cross
Charlton
Chelsea Riverside
Chinbrook
Church Street
Churchill
Clapham Common
Clapham Town
Clerkenwell
Clissold
Coldharbour
Coldharbour and New Eltham
College
College Park and Old Oak
Courtfield
Cricklewood
Crofton Park
Cubitt Town
Dalston
De Beauvoir
Denmark Hill
Deptford
Downham
Dulwich
Earl's Court
Earlsfield
East Dulwich
East Putney
East Walworth
Elephant and Castle
Eltham North
Eltham South
Eltham West
Fairfield
Farringdon
Ferndale
Finsbury Park

Unnamed: 0,Neighborhood,SpiritualCenterCount
0,St Luke's,16
1,Ferndale,14
2,Holland,11
3,Thornton,10
4,Herne Hill,9


In [40]:
# Dataframe consisting of the count of Grocery Stores in the neighborhoods, resulted 
#by using the getNearbyVenues function with the Foursquare category ID for Grocery Stores
#then assigning to a new column the number of venues returned for each neighborhood
London_FoodShop=getNearbyVenues(names=dfLondon['Neighborhood'], latitudes=dfLondon['Latitude'], longitudes=dfLondon['Longitude'], categoryID=id_FoodShop)
print("Shape of London_FoodShop", London_FoodShop.shape)

df_FoodShop=pd.DataFrame()
df_FoodShop['FoodShopCount']=London_FoodShop['Neighborhood'].value_counts()
df_FoodShop=df_FoodShop.reset_index()
df_FoodShop=df_FoodShop.rename(columns={'index':'Neighborhood'})
df_FoodShop.head()

Abbey Road
Addison
Aldwych
Angel
Archway
Avonmore and Brook Green
Balham
Bankside
Barnsbury
Battersea
Bayswater
Belgravia
Bellingham
Belsize
Bermondsey
Bethnal Green
Bishop's
Blackheath
Blackheath Westcombe
Blackwall
Blackwall & Cubitt Town
Bloomsbury
Bow East
Bow West
Brixton Hill
Brockley
Bromley North
Bromley South
Brompton & Hans Town
Brownswood
Bryanston and Dorset Square
Caledonian
Camberwell Green
Cambridge Heath
Camden Town with Primrose Hill
Canary Wharf
Canonbury
Cantelowes
Catford South
Cathedrals
Cazenove
Chalk Farm
Charing Cross
Charlton
Chelsea Riverside
Chinbrook
Church Street
Churchill
Clapham Common
Clapham Town
Clerkenwell
Clissold
Coldharbour
Coldharbour and New Eltham
College
College Park and Old Oak
Courtfield
Cricklewood
Crofton Park
Cubitt Town
Dalston
De Beauvoir
Denmark Hill
Deptford
Downham
Dulwich
Earl's Court
Earlsfield
East Dulwich
East Putney
East Walworth
Elephant and Castle
Eltham North
Eltham South
Eltham West
Fairfield
Farringdon
Ferndale
Finsbury Park

Unnamed: 0,Neighborhood,FoodShopCount
0,Cathedrals,49
1,Weavers,46
2,College,43
3,Holland,42
4,Bankside,37


In [41]:
# Merging the categories dataframes with the London dataframe and replacing the NaN values with 0
merged_dfLondon=pd.merge(left=dfLondon, right=df_ArtsEntertainment, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfLondon=pd.merge(left=merged_dfLondon, right=df_Colleges, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfLondon=pd.merge(left=merged_dfLondon, right=df_Restaurants, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfLondon=pd.merge(left=merged_dfLondon, right=df_NightLife, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfLondon=pd.merge(left=merged_dfLondon, right=df_OutdoorsRecreation, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfLondon=pd.merge(left=merged_dfLondon, right=df_GovernmentBuilding, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfLondon=pd.merge(left=merged_dfLondon, right=df_MedicalCenter, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfLondon=pd.merge(left=merged_dfLondon, right=df_SpiritualCenter, how='left', left_on='Neighborhood', right_on='Neighborhood')
merged_dfLondon=pd.merge(left=merged_dfLondon, right=df_FoodShop, how='left', left_on='Neighborhood', right_on='Neighborhood')

merged_dfLondon=merged_dfLondon.fillna(0)
merged_dfLondon.head()

Unnamed: 0,Neighborhood,Borough,TotalCrimes,Longitude,Latitude,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,Abbey Road,Westminster,1215.0,-0.127197,51.497657,17.0,5.0,50.0,20.0,19.0,61.0,2.0,8.0,7.0
1,Addison,Hammersmith and Fulham,2634.0,-0.21288,51.4826,3.0,3.0,15.0,4.0,8.0,0.0,10.0,4.0,3.0
2,Aldwych,Westminster,0.0,-0.118607,51.512653,39.0,77.0,61.0,46.0,26.0,12.0,11.0,8.0,24.0
3,Angel,Islington,0.0,-0.10626,51.53202,14.0,11.0,66.0,23.0,19.0,4.0,8.0,3.0,22.0
4,Archway,Islington,0.0,-0.134922,51.565748,2.0,4.0,21.0,10.0,4.0,5.0,4.0,5.0,9.0


## Normalizing the data of the venues dataframe for more accurate results
### Toronto

In [42]:
# Creating a new dataframe that will have only the neighborhoods and categories columns
new_Torontodf=merged_dfToronto
new_Torontodf.drop(['Borough', 'Longitude', 'Latitude'], axis='columns', inplace=True)
new_Torontodf.head()

Unnamed: 0,Neighborhood,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,Agincourt North,363,1.0,0.0,14.0,1.0,2.0,0.0,6.0,0.0,4.0
1,Agincourt South-Malvern West,478,1.0,6.0,20.0,3.0,3.0,1.0,8.0,2.0,4.0
2,Alderwood,204,1.0,1.0,4.0,0.0,4.0,0.0,4.0,0.0,1.0
3,Annex,943,4.0,27.0,26.0,7.0,9.0,3.0,10.0,7.0,6.0
4,Banbury-Don Mills,403,0.0,0.0,2.0,0.0,1.0,0.0,1.0,1.0,1.0


In [43]:
# Creating a dataframe that will store the neighborhood values from the Toronto dataframe
df_neigh=pd.DataFrame()
df_neigh['Neighborhood']=new_Torontodf['Neighborhood']
df_neigh.head()

Unnamed: 0,Neighborhood
0,Agincourt North
1,Agincourt South-Malvern West
2,Alderwood
3,Annex
4,Banbury-Don Mills


In [44]:
# Creating a new dataframe that has only the columns with category counts
df_cat=pd.DataFrame()
df_cat=new_Torontodf
df_cat.drop(['Neighborhood'], axis='columns', inplace=True)
df_cat.head()

Unnamed: 0,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,363,1.0,0.0,14.0,1.0,2.0,0.0,6.0,0.0,4.0
1,478,1.0,6.0,20.0,3.0,3.0,1.0,8.0,2.0,4.0
2,204,1.0,1.0,4.0,0.0,4.0,0.0,4.0,0.0,1.0
3,943,4.0,27.0,26.0,7.0,9.0,3.0,10.0,7.0,6.0
4,403,0.0,0.0,2.0,0.0,1.0,0.0,1.0,1.0,1.0


In [45]:
# Normalizing the values of the venue category counts for more accurate clustering results
norm=pd.DataFrame(df_cat)
min_max_scaler=preprocessing.MinMaxScaler()
x_scaled=min_max_scaler.fit_transform(norm)
Cat_normalized=pd.DataFrame(x_scaled, columns=norm.columns)
Cat_normalized.head()

Unnamed: 0,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,0.114524,0.058824,0.0,0.175,0.03125,0.074074,0.0,0.105263,0.0,0.133333
1,0.164985,0.058824,0.072289,0.25,0.09375,0.111111,0.043478,0.140351,0.25,0.133333
2,0.044756,0.058824,0.012048,0.05,0.0,0.148148,0.0,0.070175,0.0,0.033333
3,0.369022,0.235294,0.325301,0.325,0.21875,0.333333,0.130435,0.175439,0.875,0.2
4,0.132075,0.0,0.0,0.025,0.0,0.037037,0.0,0.017544,0.125,0.033333


In [46]:
# Remaking the dataframe of Toronto neighborhoods and the normalized venue categories columns
merged_dfT=pd.merge(left=df_neigh, right=Cat_normalized, how='left', left_on=df_neigh.index, right_on=Cat_normalized.index)
merged_dfT.drop(['key_0'], axis='columns', inplace=True)
merged_dfT.head()

Unnamed: 0,Neighborhood,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,Agincourt North,0.114524,0.058824,0.0,0.175,0.03125,0.074074,0.0,0.105263,0.0,0.133333
1,Agincourt South-Malvern West,0.164985,0.058824,0.072289,0.25,0.09375,0.111111,0.043478,0.140351,0.25,0.133333
2,Alderwood,0.044756,0.058824,0.012048,0.05,0.0,0.148148,0.0,0.070175,0.0,0.033333
3,Annex,0.369022,0.235294,0.325301,0.325,0.21875,0.333333,0.130435,0.175439,0.875,0.2
4,Banbury-Don Mills,0.132075,0.0,0.0,0.025,0.0,0.037037,0.0,0.017544,0.125,0.033333


### London

In [47]:
# Creating a new dataframe that will have only the neighborhoods and categories columns
new_Londondf=merged_dfLondon
new_Londondf.drop(['Borough', 'Longitude', 'Latitude'], axis='columns', inplace=True)
new_Londondf.head()

Unnamed: 0,Neighborhood,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,Abbey Road,1215.0,17.0,5.0,50.0,20.0,19.0,61.0,2.0,8.0,7.0
1,Addison,2634.0,3.0,3.0,15.0,4.0,8.0,0.0,10.0,4.0,3.0
2,Aldwych,0.0,39.0,77.0,61.0,46.0,26.0,12.0,11.0,8.0,24.0
3,Angel,0.0,14.0,11.0,66.0,23.0,19.0,4.0,8.0,3.0,22.0
4,Archway,0.0,2.0,4.0,21.0,10.0,4.0,5.0,4.0,5.0,9.0


In [48]:
# Creating a dataframe that will store the neighborhood values from the London dataframe
df_neigh=pd.DataFrame()
df_neigh['Neighborhood']=new_Londondf['Neighborhood']
df_neigh.head()

Unnamed: 0,Neighborhood
0,Abbey Road
1,Addison
2,Aldwych
3,Angel
4,Archway


In [49]:
# Creating a new dataframe that has only the columns with category counts
df_cat=pd.DataFrame()
df_cat=new_Londondf
df_cat.drop(['Neighborhood'], axis='columns', inplace=True)
df_cat.head()

Unnamed: 0,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,1215.0,17.0,5.0,50.0,20.0,19.0,61.0,2.0,8.0,7.0
1,2634.0,3.0,3.0,15.0,4.0,8.0,0.0,10.0,4.0,3.0
2,0.0,39.0,77.0,61.0,46.0,26.0,12.0,11.0,8.0,24.0
3,0.0,14.0,11.0,66.0,23.0,19.0,4.0,8.0,3.0,22.0
4,0.0,2.0,4.0,21.0,10.0,4.0,5.0,4.0,5.0,9.0


In [50]:
# Normalizing the values of the venue category counts for more accurate clustering results
norm=pd.DataFrame(df_cat)
min_max_scaler=preprocessing.MinMaxScaler()
x_scaled=min_max_scaler.fit_transform(norm)
Cat_normalized=pd.DataFrame(x_scaled, columns=norm.columns)
Cat_normalized.head()

Unnamed: 0,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,0.037725,0.226667,0.064935,0.5,0.2,0.542857,0.871429,0.030769,0.5,0.142857
1,0.081783,0.04,0.038961,0.15,0.04,0.228571,0.0,0.153846,0.25,0.061224
2,0.0,0.52,1.0,0.61,0.46,0.742857,0.171429,0.169231,0.5,0.489796
3,0.0,0.186667,0.142857,0.66,0.23,0.542857,0.057143,0.123077,0.1875,0.44898
4,0.0,0.026667,0.051948,0.21,0.1,0.114286,0.071429,0.061538,0.3125,0.183673


In [51]:
# Remaking the dataframe of London neighborhoods and the normalized venue categories columns
merged_dfL=pd.merge(left=df_neigh, right=Cat_normalized, how='left', left_on=df_neigh.index, right_on=Cat_normalized.index)
merged_dfL.drop(['key_0'], axis='columns', inplace=True)
merged_dfL.head()

Unnamed: 0,Neighborhood,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,Abbey Road,0.037725,0.226667,0.064935,0.5,0.2,0.542857,0.871429,0.030769,0.5,0.142857
1,Addison,0.081783,0.04,0.038961,0.15,0.04,0.228571,0.0,0.153846,0.25,0.061224
2,Aldwych,0.0,0.52,1.0,0.61,0.46,0.742857,0.171429,0.169231,0.5,0.489796
3,Angel,0.0,0.186667,0.142857,0.66,0.23,0.542857,0.057143,0.123077,0.1875,0.44898
4,Archway,0.0,0.026667,0.051948,0.21,0.1,0.114286,0.071429,0.061538,0.3125,0.183673


## Ordering the most common venues and the crimes data in each neighborhood

In [52]:
# Function for most common venues
def return_most_common_venues(row, top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:top_venues]

### Toronto

In [53]:
# Sorting the venue categories in each neighborhood
top_venues = 10
indicators = ['st', 'nd', 'rd']

# Create columns according to the number of venue categories
columns = ['Neighborhood']
for ind in np.arange(top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# New dataframe for sorted venues
Toronto_venues_sorted = pd.DataFrame(columns=columns)
Toronto_venues_sorted['Neighborhood'] = merged_dfT['Neighborhood']

for ind in np.arange(merged_dfT.shape[0]):
    Toronto_venues_sorted.iloc[ind, 1:] = return_most_common_venues(merged_dfT.iloc[ind, :], top_venues)

Toronto_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt North,RestaurantsCount,FoodShopCount,TotalCrimes,MedicalCenterCount,OutdoorsRecreationCount,ArtsEntertainmentCount,NightLifeCount,SpiritualCenterCount,GovernmentBuildingCount,CollegesCount
1,Agincourt South-Malvern West,SpiritualCenterCount,RestaurantsCount,TotalCrimes,MedicalCenterCount,FoodShopCount,OutdoorsRecreationCount,NightLifeCount,CollegesCount,ArtsEntertainmentCount,GovernmentBuildingCount
2,Alderwood,OutdoorsRecreationCount,MedicalCenterCount,ArtsEntertainmentCount,RestaurantsCount,TotalCrimes,FoodShopCount,CollegesCount,SpiritualCenterCount,GovernmentBuildingCount,NightLifeCount
3,Annex,SpiritualCenterCount,TotalCrimes,OutdoorsRecreationCount,CollegesCount,RestaurantsCount,ArtsEntertainmentCount,NightLifeCount,FoodShopCount,MedicalCenterCount,GovernmentBuildingCount
4,Banbury-Don Mills,TotalCrimes,SpiritualCenterCount,OutdoorsRecreationCount,FoodShopCount,RestaurantsCount,MedicalCenterCount,GovernmentBuildingCount,NightLifeCount,CollegesCount,ArtsEntertainmentCount


### London

In [54]:
# Sorting the venue categories in each neighborhood
top_venues = 10
indicators = ['st', 'nd', 'rd']

# Create columns according to the number of venue categories
columns = ['Neighborhood']
for ind in np.arange(top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# New dataframe for sorted venues
London_venues_sorted = pd.DataFrame(columns=columns)
London_venues_sorted['Neighborhood'] = merged_dfL['Neighborhood']

for ind in np.arange(merged_dfL.shape[0]):
    London_venues_sorted.iloc[ind, 1:] = return_most_common_venues(merged_dfL.iloc[ind, :], top_venues)

London_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Abbey Road,GovernmentBuildingCount,OutdoorsRecreationCount,SpiritualCenterCount,RestaurantsCount,ArtsEntertainmentCount,NightLifeCount,FoodShopCount,CollegesCount,TotalCrimes,MedicalCenterCount
1,Addison,SpiritualCenterCount,OutdoorsRecreationCount,MedicalCenterCount,RestaurantsCount,TotalCrimes,FoodShopCount,NightLifeCount,ArtsEntertainmentCount,CollegesCount,GovernmentBuildingCount
2,Aldwych,CollegesCount,OutdoorsRecreationCount,RestaurantsCount,ArtsEntertainmentCount,SpiritualCenterCount,FoodShopCount,NightLifeCount,GovernmentBuildingCount,MedicalCenterCount,TotalCrimes
3,Angel,RestaurantsCount,OutdoorsRecreationCount,FoodShopCount,NightLifeCount,SpiritualCenterCount,ArtsEntertainmentCount,CollegesCount,MedicalCenterCount,GovernmentBuildingCount,TotalCrimes
4,Archway,SpiritualCenterCount,RestaurantsCount,FoodShopCount,OutdoorsRecreationCount,NightLifeCount,GovernmentBuildingCount,MedicalCenterCount,CollegesCount,ArtsEntertainmentCount,TotalCrimes


## Using Kmeans algorithm to cluster the neighborhoods based on venues frequency
### Toronto

In [55]:
# Setting the number of clusters and performing the clustering operation
kclusters = 4
Toronto_grouped_clustering = merged_dfT.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=40).fit(Toronto_grouped_clustering)

In [56]:
# Adding clustering labels to the dataframe and merging dfToronto so we can add
#the coordinates of each neighborhood
merged_dfT.insert(0, 'Cluster Labels', kmeans.labels_)
dfToronto.columns=['Neighborhood', 'Borough', 'CrimesNumber', 'Longitude', 'Latitude']
Toronto_merged = dfToronto
Toronto_merged = Toronto_merged.join(merged_dfT.set_index('Neighborhood'), on='Neighborhood')
Toronto_merged.head()

Unnamed: 0,Neighborhood,Borough,CrimesNumber,Longitude,Latitude,Cluster Labels,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,Agincourt North,Scarborough,363,-79.26707,43.8093,3,0.114524,0.058824,0.0,0.175,0.03125,0.074074,0.0,0.105263,0.0,0.133333
1,Agincourt South-Malvern West,Scarborough,478,-79.26935,43.78736,1,0.164985,0.058824,0.072289,0.25,0.09375,0.111111,0.043478,0.140351,0.25,0.133333
2,Alderwood,Etobicoke,204,-79.54116,43.60496,3,0.044756,0.058824,0.012048,0.05,0.0,0.148148,0.0,0.070175,0.0,0.033333
3,Annex,Old City of Toronto,943,-79.4028,43.66936,0,0.369022,0.235294,0.325301,0.325,0.21875,0.333333,0.130435,0.175439,0.875,0.2
4,Banbury-Don Mills,North York,403,-79.34852,43.74041,3,0.132075,0.0,0.0,0.025,0.0,0.037037,0.0,0.017544,0.125,0.033333


In [57]:
# Cheching how many neighborhoods are in each cluster
Toronto_merged.groupby('Cluster Labels').count()

Unnamed: 0_level_0,Neighborhood,Borough,CrimesNumber,Longitude,Latitude,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
Cluster Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17
1,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
3,72,72,72,72,72,72,72,72,72,72,72,72,72,72,72


### London

In [58]:
# Setting the number of clusters and performing the clustering operation
kclusters = 4
London_grouped_clustering = merged_dfL.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=40).fit(London_grouped_clustering)

In [59]:
# Adding clustering labels to the dataframe and merging dfLondon so we can add
#the coordinates of each neighborhood
merged_dfL.insert(0, 'Cluster Labels', kmeans.labels_)
dfLondon.columns=['Neighborhood', 'Borough', 'CrimesNumber', 'Longitude', 'Latitude']
London_merged = dfLondon
London_merged = London_merged.join(merged_dfL.set_index('Neighborhood'), on='Neighborhood')
London_merged.head()

Unnamed: 0,Neighborhood,Borough,CrimesNumber,Longitude,Latitude,Cluster Labels,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,Abbey Road,Westminster,1215.0,-0.127197,51.497657,3,0.037725,0.226667,0.064935,0.5,0.2,0.542857,0.871429,0.030769,0.5,0.142857
1,Addison,Hammersmith and Fulham,2634.0,-0.21288,51.4826,0,0.081783,0.04,0.038961,0.15,0.04,0.228571,0.0,0.153846,0.25,0.061224
2,Aldwych,Westminster,0.0,-0.118607,51.512653,1,0.0,0.52,1.0,0.61,0.46,0.742857,0.171429,0.169231,0.5,0.489796
3,Angel,Islington,0.0,-0.10626,51.53202,2,0.0,0.186667,0.142857,0.66,0.23,0.542857,0.057143,0.123077,0.1875,0.44898
4,Archway,Islington,0.0,-0.134922,51.565748,0,0.0,0.026667,0.051948,0.21,0.1,0.114286,0.071429,0.061538,0.3125,0.183673


In [60]:
# Checking how many neighborhoods are in each cluster
London_merged.groupby('Cluster Labels').count()

Unnamed: 0_level_0,Neighborhood,Borough,CrimesNumber,Longitude,Latitude,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
Cluster Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,168,168,168,168,168,168,168,168,168,168,168,168,168,168,168
1,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13
2,73,73,73,73,73,73,73,73,73,73,73,73,73,73,73
3,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8


## Creating new dataframes based on the cluster the neighborhoods belong to
### Toronto

In [61]:
# Creating a label dataframe for each cluster and viewing the data in the cluster
Label0=Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[0] + list(range(5, Toronto_merged.shape[1]))]]
Label0.head()

Unnamed: 0,Neighborhood,Cluster Labels,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
3,Annex,0,0.369022,0.235294,0.325301,0.325,0.21875,0.333333,0.130435,0.175439,0.875,0.2
19,Cabbagetown-South St. James Town,0,0.1448,0.411765,0.012048,0.3,0.125,0.259259,0.0,0.070175,0.375,0.266667
47,Henry Farm,0,0.043879,1.0,0.144578,0.1,0.03125,0.111111,0.086957,0.157895,0.125,0.1
58,Junction Area,0,0.109258,0.529412,0.012048,0.325,0.125,0.333333,0.043478,0.035088,0.25,0.166667
61,Kensington-Chinatown,0,0.473892,1.0,0.13253,0.6375,0.65625,0.296296,0.086957,0.087719,0.75,0.966667


In [62]:
# Creating a label dataframe for each cluster and viewing the data in the cluster
Label1=Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[0] + list(range(5, Toronto_merged.shape[1]))]]
Label1.head()

Unnamed: 0,Neighborhood,Cluster Labels,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
1,Agincourt South-Malvern West,1,0.164985,0.058824,0.072289,0.25,0.09375,0.111111,0.043478,0.140351,0.25,0.133333
5,Bathurst Manor,1,0.06538,0.0,0.0,0.0125,0.0,0.148148,0.0,0.0,0.25,0.0
13,Black Creek,1,0.286968,0.0,0.012048,0.0375,0.0,0.185185,0.0,0.035088,0.25,0.066667
15,Briar Hill-Belgravia,1,0.092146,0.176471,0.0,0.1,0.15625,0.074074,0.086957,0.070175,0.5,0.1
27,Corso Italia-Davenport,1,0.071961,0.0,0.012048,0.2125,0.28125,0.185185,0.043478,0.070175,0.5,0.1


In [63]:
# Creating a label dataframe for each cluster and viewing the data in the cluster
Label2=Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[0] + list(range(5, Toronto_merged.shape[1]))]]
Label2.head()

Unnamed: 0,Neighborhood,Cluster Labels,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
6,Bay Street Corridor,2,0.795086,0.411765,0.554217,0.85,0.5,1.0,1.0,1.0,0.25,1.0
23,Church-Yonge Corridor,2,0.759105,0.823529,0.819277,1.0,1.0,0.888889,0.434783,0.298246,0.5,0.766667


In [64]:
# Creating a label dataframe for each cluster and viewing the data in the cluster
Label3=Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[0] + list(range(5, Toronto_merged.shape[1]))]]
Label3.head()

Unnamed: 0,Neighborhood,Cluster Labels,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,Agincourt North,3,0.114524,0.058824,0.0,0.175,0.03125,0.074074,0.0,0.105263,0.0,0.133333
2,Alderwood,3,0.044756,0.058824,0.012048,0.05,0.0,0.148148,0.0,0.070175,0.0,0.033333
4,Banbury-Don Mills,3,0.132075,0.0,0.0,0.025,0.0,0.037037,0.0,0.017544,0.125,0.033333
7,Bayview Village,3,0.090391,0.0,0.0,0.0,0.0,0.148148,0.0,0.0,0.125,0.0
8,Bayview Woods-Steeles,3,0.032032,0.0,0.0,0.0,0.0,0.148148,0.0,0.017544,0.0,0.0


In [65]:
# Creating dataframes with the mean category values of each cluster
L0T=Label0.mean()
L0T=L0T.to_frame().reset_index().rename(columns={0:'Label 0'}, inplace=False)

L1T=Label1.mean()
L1T=L1T.to_frame().reset_index().rename(columns={0:'Label 1'}, inplace=False)

L2T=Label2.mean()
L2T=L2T.to_frame().reset_index().rename(columns={0:'Label 2'}, inplace=False)

L3T=Label3.mean()
L3T=L3T.to_frame().reset_index().rename(columns={0:'Label 3'}, inplace=False)

### London

In [66]:
# Creating a label dataframe for each cluster and viewing the data in the cluster
Label0=London_merged.loc[London_merged['Cluster Labels'] == 0, London_merged.columns[[0] + list(range(5, London_merged.shape[1]))]]
Label0.head()

Unnamed: 0,Neighborhood,Cluster Labels,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
1,Addison,0,0.081783,0.04,0.038961,0.15,0.04,0.228571,0.0,0.153846,0.25,0.061224
4,Archway,0,0.0,0.026667,0.051948,0.21,0.1,0.114286,0.071429,0.061538,0.3125,0.183673
5,Avonmore and Brook Green,0,0.057255,0.066667,0.025974,0.26,0.13,0.285714,0.042857,0.076923,0.25,0.122449
9,Battersea,0,0.0,0.04,0.0,0.28,0.17,0.142857,0.0,0.076923,0.25,0.163265
12,Bellingham,0,0.095166,0.0,0.0,0.02,0.0,0.142857,0.0,0.030769,0.0625,0.020408


In [67]:
# Creating a label dataframe for each cluster and viewing the data in the cluster
Label1=London_merged.loc[London_merged['Cluster Labels'] == 1, London_merged.columns[[0] + list(range(5, London_merged.shape[1]))]]
Label1.head()

Unnamed: 0,Neighborhood,Cluster Labels,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
2,Aldwych,1,0.0,0.52,1.0,0.61,0.46,0.742857,0.171429,0.169231,0.5,0.489796
7,Bankside,1,0.0,0.293333,0.077922,0.69,0.42,0.771429,0.071429,0.046154,0.1875,0.755102
21,Bloomsbury,1,0.243953,0.586667,0.805195,0.43,0.28,0.6,0.057143,0.246154,0.4375,0.387755
34,Camden Town with Primrose Hill,1,0.247865,0.093333,0.090909,0.78,0.65,0.714286,0.242857,0.107692,0.5625,0.244898
39,Cathedrals,1,0.20983,0.146667,0.311688,1.0,0.73,0.742857,0.057143,0.2,0.25,1.0


In [68]:
# Creating a label dataframe for each cluster and viewing the data in the cluster
Label2=London_merged.loc[London_merged['Cluster Labels'] == 2, London_merged.columns[[0] + list(range(5, London_merged.shape[1]))]]
Label2.head()

Unnamed: 0,Neighborhood,Cluster Labels,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
3,Angel,2,0.0,0.186667,0.142857,0.66,0.23,0.542857,0.057143,0.123077,0.1875,0.44898
6,Balham,2,0.075543,0.093333,0.025974,0.32,0.1,0.171429,0.028571,0.107692,0.25,0.285714
8,Barnsbury,2,0.115348,0.093333,0.051948,0.52,0.19,0.342857,0.042857,0.076923,0.125,0.285714
10,Bayswater,2,0.069798,0.066667,0.077922,0.46,0.17,0.342857,0.028571,0.107692,0.25,0.183673
11,Belgravia,2,0.0,0.133333,0.038961,0.41,0.12,0.457143,0.371429,0.046154,0.3125,0.102041


In [69]:
# Creating a label dataframe for each cluster and viewing the data in the cluster
Label3=London_merged.loc[London_merged['Cluster Labels'] == 3, London_merged.columns[[0] + list(range(5, London_merged.shape[1]))]]
Label3.head()

Unnamed: 0,Neighborhood,Cluster Labels,TotalCrimes,ArtsEntertainmentCount,CollegesCount,RestaurantsCount,NightLifeCount,OutdoorsRecreationCount,GovernmentBuildingCount,MedicalCenterCount,SpiritualCenterCount,FoodShopCount
0,Abbey Road,3,0.037725,0.226667,0.064935,0.5,0.2,0.542857,0.871429,0.030769,0.5,0.142857
42,Charing Cross,3,0.0,1.0,0.142857,0.63,0.36,0.942857,0.571429,0.076923,0.1875,0.489796
47,Churchill,3,0.069146,0.293333,0.025974,0.19,0.16,0.571429,1.0,0.092308,0.4375,0.102041
140,Maida Vale,3,0.043748,0.2,0.077922,0.47,0.23,0.542857,0.814286,0.184615,0.5,0.183673
149,New Eltham,3,0.0,0.133333,0.038961,0.31,0.16,0.514286,0.485714,0.061538,0.375,0.081633


In [70]:
# Creating dataframes with the mean category values of each cluster
L0L=Label0.mean()
L0L=L0L.to_frame().reset_index().rename(columns={0:'Label 0'}, inplace=False)

L1L=Label1.mean()
L1L=L1L.to_frame().reset_index().rename(columns={0:'Label 1'}, inplace=False)

L2L=Label2.mean()
L2L=L2L.to_frame().reset_index().rename(columns={0:'Label 2'}, inplace=False)

L3L=Label3.mean()
L3L=L3L.to_frame().reset_index().rename(columns={0:'Label 3'}, inplace=False)

## Comparing the clusters and drawing conclusions
### Toronto

In [71]:
# Merging the mean values dataframes so the cluster information can be easily compared
labels_df=pd.merge(left=L0T, right=L1T, how='left', left_on='index', right_on='index')
labels_df=pd.merge(left=labels_df, right=L2T, how='left', left_on='index', right_on='index')
labels_df=pd.merge(left=labels_df, right=L3T, how='left', left_on='index', right_on='index')
labels_df=labels_df.drop(index=0)
labels_df

Unnamed: 0,index,Label 0,Label 1,Label 2,Label 3
1,TotalCrimes,0.190822,0.138573,0.777095,0.15123
2,ArtsEntertainmentCount,0.411765,0.091364,0.617647,0.026144
3,CollegesCount,0.141035,0.017944,0.686747,0.008869
4,RestaurantsCount,0.372794,0.096011,0.925,0.04375
5,NightLifeCount,0.262868,0.06117,0.75,0.019097
6,OutdoorsRecreationCount,0.357298,0.124507,0.944444,0.102366
7,GovernmentBuildingCount,0.094629,0.027752,0.717391,0.019324
8,MedicalCenterCount,0.146543,0.061217,0.649123,0.042641
9,SpiritualCenterCount,0.580882,0.348404,0.375,0.039931
10,FoodShopCount,0.264706,0.08156,0.883333,0.036111


In [72]:
# Neighborhoods with significant amounts of Spiritual Centers, Entertainment venues and
#Outdoors&Recreational venues, few NighLife and Government Buildings.
# I would categorize this cluster as close to downtown areas with average crime rates.
labels_df.sort_values('Label 0', ascending=False)

Unnamed: 0,index,Label 0,Label 1,Label 2,Label 3
9,SpiritualCenterCount,0.580882,0.348404,0.375,0.039931
2,ArtsEntertainmentCount,0.411765,0.091364,0.617647,0.026144
4,RestaurantsCount,0.372794,0.096011,0.925,0.04375
6,OutdoorsRecreationCount,0.357298,0.124507,0.944444,0.102366
10,FoodShopCount,0.264706,0.08156,0.883333,0.036111
5,NightLifeCount,0.262868,0.06117,0.75,0.019097
1,TotalCrimes,0.190822,0.138573,0.777095,0.15123
8,MedicalCenterCount,0.146543,0.061217,0.649123,0.042641
3,CollegesCount,0.141035,0.017944,0.686747,0.008869
7,GovernmentBuildingCount,0.094629,0.027752,0.717391,0.019324


In [73]:
# Neighborhoods with many Spiritual Centers, Outdoors&Recreational venues and
#Restaurants, however not many Night Life venues.
# I would categorize this cluster as dense residential neighborhoods with high crime rates.
labels_df.sort_values('Label 1', ascending=False)

Unnamed: 0,index,Label 0,Label 1,Label 2,Label 3
9,SpiritualCenterCount,0.580882,0.348404,0.375,0.039931
1,TotalCrimes,0.190822,0.138573,0.777095,0.15123
6,OutdoorsRecreationCount,0.357298,0.124507,0.944444,0.102366
4,RestaurantsCount,0.372794,0.096011,0.925,0.04375
2,ArtsEntertainmentCount,0.411765,0.091364,0.617647,0.026144
10,FoodShopCount,0.264706,0.08156,0.883333,0.036111
8,MedicalCenterCount,0.146543,0.061217,0.649123,0.042641
5,NightLifeCount,0.262868,0.06117,0.75,0.019097
7,GovernmentBuildingCount,0.094629,0.027752,0.717391,0.019324
3,CollegesCount,0.141035,0.017944,0.686747,0.008869


In [74]:
# Neighborhoods with significant amounts of Recreational venues, Restaurants and
#Night Live venues venues, also significant amount of Govermnment Buildings.
# I would categorize this cluster as City Center/Downtown areas with average amount of crimes.
labels_df.sort_values('Label 2', ascending=False)

Unnamed: 0,index,Label 0,Label 1,Label 2,Label 3
6,OutdoorsRecreationCount,0.357298,0.124507,0.944444,0.102366
4,RestaurantsCount,0.372794,0.096011,0.925,0.04375
10,FoodShopCount,0.264706,0.08156,0.883333,0.036111
1,TotalCrimes,0.190822,0.138573,0.777095,0.15123
5,NightLifeCount,0.262868,0.06117,0.75,0.019097
7,GovernmentBuildingCount,0.094629,0.027752,0.717391,0.019324
3,CollegesCount,0.141035,0.017944,0.686747,0.008869
8,MedicalCenterCount,0.146543,0.061217,0.649123,0.042641
2,ArtsEntertainmentCount,0.411765,0.091364,0.617647,0.026144
9,SpiritualCenterCount,0.580882,0.348404,0.375,0.039931


In [75]:
# Neighborhoods with many Restaurants, Medical Centers and Outdoors&Recreational
#venues, only some Government Buildings and not many Night Life venues
# I would categorize this cluster as densely populated neighborhoods with very high crime rates.
labels_df.sort_values('Label 3', ascending=False)

Unnamed: 0,index,Label 0,Label 1,Label 2,Label 3
1,TotalCrimes,0.190822,0.138573,0.777095,0.15123
6,OutdoorsRecreationCount,0.357298,0.124507,0.944444,0.102366
4,RestaurantsCount,0.372794,0.096011,0.925,0.04375
8,MedicalCenterCount,0.146543,0.061217,0.649123,0.042641
9,SpiritualCenterCount,0.580882,0.348404,0.375,0.039931
10,FoodShopCount,0.264706,0.08156,0.883333,0.036111
2,ArtsEntertainmentCount,0.411765,0.091364,0.617647,0.026144
7,GovernmentBuildingCount,0.094629,0.027752,0.717391,0.019324
5,NightLifeCount,0.262868,0.06117,0.75,0.019097
3,CollegesCount,0.141035,0.017944,0.686747,0.008869


### London

In [76]:
# Merging the mean values dataframes so the cluster information can be easily compared
labels_df=pd.merge(left=L0L, right=L1L, how='left', left_on='index', right_on='index')
labels_df=pd.merge(left=labels_df, right=L2L, how='left', left_on='index', right_on='index')
labels_df=pd.merge(left=labels_df, right=L3L, how='left', left_on='index', right_on='index')
labels_df=labels_df.drop(index=0)
labels_df

Unnamed: 0,index,Label 0,Label 1,Label 2,Label 3
1,TotalCrimes,0.07692,0.091409,0.077995,0.039087
2,ArtsEntertainmentCount,0.037143,0.366154,0.118904,0.316667
3,CollegesCount,0.033472,0.497502,0.119374,0.063312
4,RestaurantsCount,0.119226,0.716923,0.439726,0.39625
5,NightLifeCount,0.05381,0.525385,0.199452,0.20125
6,OutdoorsRecreationCount,0.156973,0.738462,0.398434,0.610714
7,GovernmentBuildingCount,0.020238,0.120879,0.060274,0.739286
8,MedicalCenterCount,0.060714,0.190533,0.104953,0.092308
9,SpiritualCenterCount,0.142113,0.5,0.253425,0.40625
10,FoodShopCount,0.086127,0.613815,0.249371,0.163265


In [77]:
# Neighborhoods with significant amounts of Spiritual Centers, Outdoors&Recreational and
#Restaurant venues, very few Government Buildings and Colleges, however they have a significant amount of crimes.
# I would categorize this cluster as dense populated areas, prone to criminal activities. 
labels_df.sort_values('Label 0', ascending=False)

Unnamed: 0,index,Label 0,Label 1,Label 2,Label 3
6,OutdoorsRecreationCount,0.156973,0.738462,0.398434,0.610714
9,SpiritualCenterCount,0.142113,0.5,0.253425,0.40625
4,RestaurantsCount,0.119226,0.716923,0.439726,0.39625
10,FoodShopCount,0.086127,0.613815,0.249371,0.163265
1,TotalCrimes,0.07692,0.091409,0.077995,0.039087
8,MedicalCenterCount,0.060714,0.190533,0.104953,0.092308
5,NightLifeCount,0.05381,0.525385,0.199452,0.20125
2,ArtsEntertainmentCount,0.037143,0.366154,0.118904,0.316667
3,CollegesCount,0.033472,0.497502,0.119374,0.063312
7,GovernmentBuildingCount,0.020238,0.120879,0.060274,0.739286


In [78]:
# Neighborhoods with many Restaurants, Night Life venues and Outdoors&Recreational
#venues, not many Government Buildings.
# I would categorize this cluster as close do Downtown areas, with very few crimes reported.
labels_df.sort_values('Label 1', ascending=False)

Unnamed: 0,index,Label 0,Label 1,Label 2,Label 3
6,OutdoorsRecreationCount,0.156973,0.738462,0.398434,0.610714
4,RestaurantsCount,0.119226,0.716923,0.439726,0.39625
10,FoodShopCount,0.086127,0.613815,0.249371,0.163265
5,NightLifeCount,0.05381,0.525385,0.199452,0.20125
9,SpiritualCenterCount,0.142113,0.5,0.253425,0.40625
3,CollegesCount,0.033472,0.497502,0.119374,0.063312
2,ArtsEntertainmentCount,0.037143,0.366154,0.118904,0.316667
8,MedicalCenterCount,0.060714,0.190533,0.104953,0.092308
7,GovernmentBuildingCount,0.020238,0.120879,0.060274,0.739286
1,TotalCrimes,0.07692,0.091409,0.077995,0.039087


In [79]:
# Neighborhoods with significant amounts of Spiritual Centers, Restaurants and
#Outdoors&Recreational venues, not many Government Buildings.
# I would categorize this cluster as densely populated residential neighborhoods with low crime rates.
labels_df.sort_values('Label 2', ascending=False)

Unnamed: 0,index,Label 0,Label 1,Label 2,Label 3
4,RestaurantsCount,0.119226,0.716923,0.439726,0.39625
6,OutdoorsRecreationCount,0.156973,0.738462,0.398434,0.610714
9,SpiritualCenterCount,0.142113,0.5,0.253425,0.40625
10,FoodShopCount,0.086127,0.613815,0.249371,0.163265
5,NightLifeCount,0.05381,0.525385,0.199452,0.20125
3,CollegesCount,0.033472,0.497502,0.119374,0.063312
2,ArtsEntertainmentCount,0.037143,0.366154,0.118904,0.316667
8,MedicalCenterCount,0.060714,0.190533,0.104953,0.092308
1,TotalCrimes,0.07692,0.091409,0.077995,0.039087
7,GovernmentBuildingCount,0.020238,0.120879,0.060274,0.739286


In [80]:
# Neighborhoods with many Government Buildings, Outdoors&Recreational venues 
#and Spiritual Centers.
# I would categorize this cluster as City Center/Downtown neighborhoods with very low crime rates.
labels_df.sort_values('Label 3', ascending=False)

Unnamed: 0,index,Label 0,Label 1,Label 2,Label 3
7,GovernmentBuildingCount,0.020238,0.120879,0.060274,0.739286
6,OutdoorsRecreationCount,0.156973,0.738462,0.398434,0.610714
9,SpiritualCenterCount,0.142113,0.5,0.253425,0.40625
4,RestaurantsCount,0.119226,0.716923,0.439726,0.39625
2,ArtsEntertainmentCount,0.037143,0.366154,0.118904,0.316667
5,NightLifeCount,0.05381,0.525385,0.199452,0.20125
10,FoodShopCount,0.086127,0.613815,0.249371,0.163265
8,MedicalCenterCount,0.060714,0.190533,0.104953,0.092308
3,CollegesCount,0.033472,0.497502,0.119374,0.063312
1,TotalCrimes,0.07692,0.091409,0.077995,0.039087


## Map of Toronto with the neighborhoods clusters

In [81]:
#Creating the Toronto map with the neighborhoods clusters imposed on top
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# Color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Markers for the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighborhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Map of London with the neighborhoods clusters

In [82]:
#Creating the Toronto map with the neighborhoods clusters imposed on top
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# Color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Markers for the map
markers_colors = []
for lat, lon, poi, cluster in zip(London_merged['Latitude'], London_merged['Longitude'], London_merged['Neighborhood'], London_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters