### Importing Libraries

In [1]:
import folium
import requests
import numpy as np
import pandas as pd
import matplotlib.cm as cm
from collections import deque
import matplotlib.colors as colors
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim

In [2]:
toronto_data = pd.read_csv('Toronto_data.csv')
toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [3]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(len(toronto_data['Borough'].unique()), toronto_data.shape[0]))

The dataframe has 11 boroughs and 103 neighborhoods.


In [4]:
address = 'Toronto'
geolocator = Nominatim(user_agent="cn_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude= location.longitude
print('The geographical coordinates of Toronto are {}, {}.'.format(latitude,longitude))

The geographical coordinates of Toronto are 43.653963, -79.387207.


In [5]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat,lng,borgh,neigh in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neigh, borgh)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat,lng], radius=5, popup=label, color='blue', fill=True,
                        fill_color='#3186cc', fill_opacity=0.7, parse_html=False).add_to(map_toronto)
    
map_toronto

### Configure Foursquare access

In [24]:
CLIENT_ID = 'XXXXXXXX' # your Foursquare ID
CLIENT_SECRET = 'XXXXXX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: XXXXXXXX
CLIENT_SECRET:XXXXXX


In [7]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
    
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [8]:
toronto_venues = getNearbyVenues(toronto_data.Neighborhood, toronto_data.Latitude, toronto_data.Longitude)
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge, Malvern",43.806686,-79.194353,Interprovincial Group,43.80563,-79.200378,Print Shop
2,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Chris Effects Painting,43.784343,-79.163742,Construction & Landscaping
3,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place


In [9]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Agincourt,4,4,4,4,4,4
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",3,3,3,3,3,3
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",11,11,11,11,11,11
"Alderwood, Long Branch",10,10,10,10,10,10
"Bathurst Manor, Downsview North, Wilson Heights",18,18,18,18,18,18
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",25,25,25,25,25,25
Berczy Park,55,55,55,55,55,55
"Birch Cliff, Cliffside West",4,4,4,4,4,4


In [10]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 280 uniques categories.


### Performing One Hot Encoding for Venue Categories

In [11]:
toronto_OHE = pd.get_dummies(toronto_venues["Venue Category"], prefix = "", prefix_sep = "")
toronto_OHE["Neighborhood"] = toronto_venues["Neighborhood"]

nindex = list(toronto_OHE.columns).index("Neighborhood")
cols = deque(toronto_OHE.columns)
cols.rotate(-nindex)
cols = list(cols)
toronto_OHE = toronto_OHE[cols]
toronto_OHE.head()

Unnamed: 0,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Repair Shop,...,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Motel,Movie Theater,Moving Target,Museum,Music Store,Music Venue
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
toronto_grouped = toronto_OHE.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Repair Shop,...,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Motel,Movie Theater,Moving Target,Museum,Music Store,Music Venue
0,"Adelaide, King, Richmond",0.01,0.000000,0.01,0.000000,0.01,0.000000,0.00,0.00,0.000000,...,0.0,0.000000,0.000000,0.01,0.000000,0.000000,0.0,0.000000,0.000000,0.00
1,Agincourt,0.00,0.000000,0.00,0.000000,0.00,0.000000,0.00,0.00,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.0,0.000000,0.000000,0.00
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.00,0.000000,0.00,0.000000,0.00,0.000000,0.00,0.00,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.0,0.000000,0.000000,0.00
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.00,0.000000,0.00,0.000000,0.00,0.000000,0.00,0.00,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.0,0.000000,0.000000,0.00
4,"Alderwood, Long Branch",0.00,0.000000,0.00,0.000000,0.00,0.000000,0.00,0.00,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.0,0.000000,0.000000,0.00
5,"Bathurst Manor, Downsview North, Wilson Heights",0.00,0.000000,0.00,0.000000,0.00,0.000000,0.00,0.00,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.0,0.000000,0.000000,0.00
6,Bayview Village,0.00,0.000000,0.00,0.000000,0.00,0.000000,0.00,0.00,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.0,0.000000,0.000000,0.00
7,"Bedford Park, Lawrence Manor East",0.00,0.000000,0.00,0.000000,0.00,0.000000,0.00,0.00,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.0,0.000000,0.000000,0.00
8,Berczy Park,0.00,0.018182,0.00,0.000000,0.00,0.000000,0.00,0.00,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.0,0.018182,0.000000,0.00
9,"Birch Cliff, Cliffside West",0.00,0.000000,0.00,0.000000,0.00,0.000000,0.00,0.00,0.000000,...,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.0,0.000000,0.000000,0.00


In [13]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [14]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

toronto_venues_sorted = pd.DataFrame(columns=columns)
toronto_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    toronto_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

toronto_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,American Restaurant,Steakhouse,Bar,Cosmetics Shop,Thai Restaurant,Gym,Bakery,Restaurant
1,Agincourt,Sandwich Place,Lounge,Chinese Restaurant,Breakfast Spot,Antique Shop,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Playground,Asian Restaurant,Music Venue,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Sandwich Place,Beer Store,Liquor Store,Pizza Place,Pharmacy,Coffee Shop,Fast Food Restaurant,Video Store,Fried Chicken Joint
4,"Alderwood, Long Branch",Pizza Place,Pool,Coffee Shop,Sandwich Place,Dance Studio,Pub,Gym,Pharmacy,Skating Rink,Wings Joint


### Performing Neighborhood Clustering
#### Running K-means Clustering to cluster neighborhoods into 5 clusters

In [15]:
kclusters = 5
toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)


In [16]:
toronto_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,"Adelaide, King, Richmond",Coffee Shop,Café,American Restaurant,Steakhouse,Bar,Cosmetics Shop,Thai Restaurant,Gym,Bakery,Restaurant
1,0,Agincourt,Sandwich Place,Lounge,Chinese Restaurant,Breakfast Spot,Antique Shop,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant
2,1,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Playground,Asian Restaurant,Music Venue,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop
3,0,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Sandwich Place,Beer Store,Liquor Store,Pizza Place,Pharmacy,Coffee Shop,Fast Food Restaurant,Video Store,Fried Chicken Joint
4,0,"Alderwood, Long Branch",Pizza Place,Pool,Coffee Shop,Sandwich Place,Dance Studio,Pub,Gym,Pharmacy,Skating Rink,Wings Joint


In [17]:
toronto_merged = toronto_data
toronto_merged = toronto_merged.join(toronto_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
toronto_merged["Cluster Labels"] = toronto_merged["Cluster Labels"].fillna(5).astype("int")
toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0,Fast Food Restaurant,Print Shop,Music Venue,Antique Shop,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Aquarium,Airport Food Court
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,0,Construction & Landscaping,Bar,Music Venue,Antique Shop,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Aquarium
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0,Tech Startup,Mexican Restaurant,Rental Car Location,Electronics Store,Spa,Pizza Place,Medical Center,Breakfast Spot,Intersection,Warehouse Store
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Convenience Store,Korean Restaurant,Music Venue,Antique Shop,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,Caribbean Restaurant,Lounge,Fried Chicken Joint,Hakka Restaurant,Athletics & Sports,Bakery,Bank,Thai Restaurant,Burrito Place,Burger Joint


In [18]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 1

In [19]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,0,Fast Food Restaurant,Print Shop,Music Venue,Antique Shop,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Aquarium,Airport Food Court
1,Scarborough,0,Construction & Landscaping,Bar,Music Venue,Antique Shop,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Aquarium
2,Scarborough,0,Tech Startup,Mexican Restaurant,Rental Car Location,Electronics Store,Spa,Pizza Place,Medical Center,Breakfast Spot,Intersection,Warehouse Store
3,Scarborough,0,Coffee Shop,Convenience Store,Korean Restaurant,Music Venue,Antique Shop,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant
4,Scarborough,0,Caribbean Restaurant,Lounge,Fried Chicken Joint,Hakka Restaurant,Athletics & Sports,Bakery,Bank,Thai Restaurant,Burrito Place,Burger Joint
5,Scarborough,0,Playground,Antique Shop,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Aquarium,Afghan Restaurant
6,Scarborough,0,Coffee Shop,Discount Store,Playground,Department Store,Music Venue,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop
7,Scarborough,0,Bakery,Bus Line,Intersection,Bus Station,Soccer Field,Metro Station,Park,Fast Food Restaurant,Art Museum,Arts & Crafts Store
8,Scarborough,0,Motel,American Restaurant,Music Venue,Antique Shop,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,Aquarium
9,Scarborough,0,Café,General Entertainment,College Stadium,Skating Rink,Music Venue,American Restaurant,Airport Food Court,Airport Gate,Airport Lounge,Airport Service


### Cluster 2

In [20]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Scarborough,1,Park,Playground,Asian Restaurant,Music Venue,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop
23,North York,1,Park,Convenience Store,Bank,Bar,Music Venue,Aquarium,Airport Lounge,Airport Service,Airport Terminal,American Restaurant
25,North York,1,Fast Food Restaurant,Food & Drink Shop,Park,Music Venue,Airport,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant
30,North York,1,Other Repair Shop,Park,Airport,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium
40,East York,1,Park,Convenience Store,Coffee Shop,Music Venue,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop
44,Central Toronto,1,Park,Swim School,Bus Line,Music Venue,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop
50,Downtown Toronto,1,Park,Playground,Trail,Music Venue,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium
64,Central Toronto,1,Trail,Jewelry Store,Sushi Restaurant,Park,Music Venue,Aquarium,Airport Lounge,Airport Service,Airport Terminal,American Restaurant
72,North York,1,Park,Japanese Restaurant,Pizza Place,Pub,Aquarium,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop
74,York,1,Park,Women's Store,Fast Food Restaurant,Pharmacy,Market,Arts & Crafts Store,Art Museum,Art Gallery,Aquarium,Antique Shop


### Cluster 3

In [21]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,North York,3,Japanese Restaurant,Café,Chinese Restaurant,Bank,Antique Shop,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal
94,Etobicoke,3,Bank,Music Venue,Aquarium,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Art Gallery


### Cluster 4

In [22]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,North York,4,Cafeteria,Music Venue,Antique Shop,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Aquarium


### Cluster 5

In [23]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 5, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Scarborough,5,,,,,,,,,,
21,North York,5,,,,,,,,,,
93,Etobicoke,5,,,,,,,,,,
