# Step C : Exploring Toronto Neighborhoods 

## Load the dataframe that has resulted from the previous steps of the assignment

In [6]:
import pandas as pd

In [7]:
merged = pd.read_csv('final_dataframe.csv')
merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


**Get the latitude and longitude of Toronto**

In [8]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

address = 'Toronto,Canada'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.653963, -79.387207.


**Create map of Toronto using latitude and longitude values**

In [11]:
# create map of Toronto using latitude and longitude values

import folium # map rendering library

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, bor, nei in zip(merged['Latitude'], merged['Longitude'], merged['Borough'], merged['Neighborhood']):
    label = '{}, {}'.format(nei, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto

## Explore North York borough since it contains many neighborhoods 

**Get the North York data from our dataframe**

In [12]:
exploring = merged[merged['Borough'].str.contains("North York")]
exploring

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
17,M2H,North York,Hillcrest Village,43.803762,-79.363452
18,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
19,M2K,North York,Bayview Village,43.786947,-79.385975
20,M2L,North York,"Silver Hills, York Mills",43.75749,-79.374714
21,M2M,North York,"Newtonbrook, Willowdale",43.789053,-79.408493
22,M2N,North York,Willowdale South,43.77012,-79.408493
23,M2P,North York,York Mills West,43.752758,-79.400049
24,M2R,North York,Willowdale West,43.782736,-79.442259
25,M3A,North York,Parkwoods,43.753259,-79.329656
26,M3B,North York,Don Mills North,43.745906,-79.352188


**Reset the index**

In [13]:
exploring.reset_index(drop=True, inplace=True)

In [14]:
exploring

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M2H,North York,Hillcrest Village,43.803762,-79.363452
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
2,M2K,North York,Bayview Village,43.786947,-79.385975
3,M2L,North York,"Silver Hills, York Mills",43.75749,-79.374714
4,M2M,North York,"Newtonbrook, Willowdale",43.789053,-79.408493
5,M2N,North York,Willowdale South,43.77012,-79.408493
6,M2P,North York,York Mills West,43.752758,-79.400049
7,M2R,North York,Willowdale West,43.782736,-79.442259
8,M3A,North York,Parkwoods,43.753259,-79.329656
9,M3B,North York,Don Mills North,43.745906,-79.352188


In [15]:
exploring.shape

(24, 5)

**Foursquare credentials**

In [16]:
# The code was removed by Watson Studio for sharing.

## Exploring the first neighbourhood of North York

In [17]:
exploring.loc[0, 'Neighborhood']

'Hillcrest Village'

In [18]:
neighborhood_latitude = exploring.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = exploring.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = exploring.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Hillcrest Village are 43.8037622, -79.3634517.


#### Let's get the top 100 venues that are in Hillcrest Village within a radius of 500 meters.

In [19]:
radius = 500
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET,
    neighborhood_latitude,
    neighborhood_longitude,
    VERSION, 
    radius, 
    LIMIT, 
    )
url

'https://api.foursquare.com/v2/venues/explore?client_id=XJAZBCZCPKCHLOQSUFVE20CO41DOQDJTPIKXE2V3GXGPB4TU&client_secret=U1WJ3NOWKGDOUJ41YL5AE2MQQEM3XYTDCEOK3KP11TGAWPVW&ll=43.8037622,-79.3634517&v=20180604&radius=500&limit=100'

In [20]:
import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c6179ba6a60712d3013b385'},
 'response': {'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 5,
  'suggestedBounds': {'ne': {'lat': 43.808262204500004,
    'lng': -79.3572281853783},
   'sw': {'lat': 43.7992621955, 'lng': -79.3696752146217}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4ad9dce6f964a520651b21e3',
       'name': "Eagle's Nest Golf Club",
       'location': {'address': '10000 Dufferin Rd',
        'lat': 43.805454826002794,
        'lng': -79.36418592243415,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.805454826002794,
          'lng': -79.36418592243415}],
        'distance': 197,
        'cc': 'CA',
        'city': 'Toronto

**Function that extracts the category of a venue**

In [21]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

**Getting the venues from the json file**

In [22]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Eagle's Nest Golf Club,Golf Course,43.805455,-79.364186
1,AY Jackson Pool,Pool,43.804515,-79.366138
2,Villa Madina,Mediterranean Restaurant,43.801685,-79.363938
3,Duncan Creek Park,Dog Run,43.805539,-79.360695
4,A.Y. Jackson Secondary School Track,Athletics & Sports,43.805068,-79.366677


In [23]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

5 venues were returned by Foursquare.


**The neighborhoods with postal codes M2L and M2M do not return any results even if the radius is set to 600
so I do not take them into account in the following analysis**

In [24]:
exploring.drop(exploring.index[3:5], inplace=True)
exploring

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M2H,North York,Hillcrest Village,43.803762,-79.363452
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
2,M2K,North York,Bayview Village,43.786947,-79.385975
5,M2N,North York,Willowdale South,43.77012,-79.408493
6,M2P,North York,York Mills West,43.752758,-79.400049
7,M2R,North York,Willowdale West,43.782736,-79.442259
8,M3A,North York,Parkwoods,43.753259,-79.329656
9,M3B,North York,Don Mills North,43.745906,-79.352188
10,M3C,North York,"Flemingdon Park, Don Mills South",43.7259,-79.340923
11,M3H,North York,"Bathurst Manor, Downsview North, Wilson Heights",43.754328,-79.442259


In [25]:
exploring.shape

(22, 5)

In [26]:
exploring.reset_index(drop=True, inplace=True)
exploring

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M2H,North York,Hillcrest Village,43.803762,-79.363452
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
2,M2K,North York,Bayview Village,43.786947,-79.385975
3,M2N,North York,Willowdale South,43.77012,-79.408493
4,M2P,North York,York Mills West,43.752758,-79.400049
5,M2R,North York,Willowdale West,43.782736,-79.442259
6,M3A,North York,Parkwoods,43.753259,-79.329656
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M3C,North York,"Flemingdon Park, Don Mills South",43.7259,-79.340923
9,M3H,North York,"Bathurst Manor, Downsview North, Wilson Heights",43.754328,-79.442259


### Explore all neighborhoods of North York

In [27]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [28]:
north_york_venues = getNearbyVenues(names=exploring['Neighborhood'],
                                   latitudes=exploring['Latitude'],
                                   longitudes=exploring['Longitude']
                                  )


Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Bedford Park, Lawrence Manor East
Lawrence Heights, Lawrence Manor
Glencairn
Maple Leaf Park, North Park, Upwood Park
Humber Summit
Emery, Humberlea


In [29]:
print(north_york_venues.shape)
north_york_venues.head()

(228, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hillcrest Village,43.803762,-79.363452,Eagle's Nest Golf Club,43.805455,-79.364186,Golf Course
1,Hillcrest Village,43.803762,-79.363452,AY Jackson Pool,43.804515,-79.366138,Pool
2,Hillcrest Village,43.803762,-79.363452,Villa Madina,43.801685,-79.363938,Mediterranean Restaurant
3,Hillcrest Village,43.803762,-79.363452,Duncan Creek Park,43.805539,-79.360695,Dog Run
4,Hillcrest Village,43.803762,-79.363452,A.Y. Jackson Secondary School Track,43.805068,-79.366677,Athletics & Sports


In [30]:
north_york_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Downsview North, Wilson Heights",17,17,17,17,17,17
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",25,25,25,25,25,25
"CFB Toronto, Downsview East",3,3,3,3,3,3
Don Mills North,5,5,5,5,5,5
Downsview Central,3,3,3,3,3,3
Downsview Northwest,5,5,5,5,5,5
Downsview West,4,4,4,4,4,4
"Emery, Humberlea",2,2,2,2,2,2
"Fairview, Henry Farm, Oriole",60,60,60,60,60,60


### Unique categories

In [31]:
print('There are {} uniques categories.'.format(len(north_york_venues['Venue Category'].unique())))

There are 102 uniques categories.


## Analyze Each Neighborhood of North York

In [32]:
# one hot encoding
north_york_onehot = pd.get_dummies(north_york_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
north_york_onehot['Neighborhood'] = north_york_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [north_york_onehot.columns[-1]] + list(north_york_onehot.columns[:-1])
north_york_onehot = north_york_onehot[fixed_columns]

north_york_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Tailor Shop,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Hillcrest Village,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
north_york_onehot.shape

(228, 103)

In [34]:
north_york_grouped = north_york_onehot.groupby('Neighborhood').mean().reset_index()
north_york_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Tailor Shop,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,"Bathurst Manor, Downsview North, Wilson Heights",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CFB Toronto, Downsview East",0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Don Mills North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Downsview Central,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Downsview Northwest,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Downsview West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Emery, Humberlea",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Fairview, Henry Farm, Oriole",0.0,0.0,0.016667,0.0,0.033333,0.0,0.033333,0.016667,0.0,...,0.016667,0.016667,0.0,0.016667,0.016667,0.016667,0.0,0.0,0.016667,0.033333


In [35]:
north_york_grouped.shape

(22, 103)

#### Let's print each neighborhood along with the top 5 most common venues

In [36]:
num_top_venues = 5

for hood in north_york_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = north_york_grouped[north_york_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor, Downsview North, Wilson Heights----
                  venue  freq
0           Coffee Shop  0.12
1           Bridal Shop  0.06
2    Frozen Yogurt Shop  0.06
3   Fried Chicken Joint  0.06
4  Fast Food Restaurant  0.06


----Bayview Village----
                 venue  freq
0   Chinese Restaurant  0.25
1                 Café  0.25
2                 Bank  0.25
3  Japanese Restaurant  0.25
4    Accessories Store  0.00


----Bedford Park, Lawrence Manor East----
                  venue  freq
0           Coffee Shop  0.08
1    Italian Restaurant  0.08
2  Fast Food Restaurant  0.08
3           Pizza Place  0.08
4        Sandwich Place  0.04


----CFB Toronto, Downsview East----
         venue  freq
0      Airport  0.33
1         Park  0.33
2     Bus Stop  0.33
3  Pizza Place  0.00
4    Pet Store  0.00


----Don Mills North----
                  venue  freq
0  Gym / Fitness Center   0.2
1  Caribbean Restaurant   0.2
2                  Café   0.2
3   Japanese Restaurant   0.2


#### Let's put that into a *pandas* dataframe

First, let's write a function to sort the venues in descending order.

In [37]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 5 venues for each neighborhood.

In [39]:
import numpy as np
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = north_york_grouped['Neighborhood']

for ind in np.arange(north_york_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(north_york_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Bathurst Manor, Downsview North, Wilson Heights",Coffee Shop,Frozen Yogurt Shop,Pharmacy,Restaurant,Diner
1,Bayview Village,Chinese Restaurant,Café,Bank,Japanese Restaurant,Dog Run
2,"Bedford Park, Lawrence Manor East",Fast Food Restaurant,Pizza Place,Italian Restaurant,Coffee Shop,Greek Restaurant
3,"CFB Toronto, Downsview East",Park,Airport,Bus Stop,Dog Run,Coffee Shop
4,Don Mills North,Japanese Restaurant,Caribbean Restaurant,Gym / Fitness Center,Café,Basketball Court


## Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 3 clusters.

In [40]:
# set number of clusters
kclusters = 3

north_york_grouped_clustering = north_york_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(north_york_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 2, 0, 2, 2, 1, 0, 0, 1, 0], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 5 venues for each neighborhood.

In [41]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

north_york_merged = exploring

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
north_york_merged = north_york_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

north_york_merged.head(23) # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M2H,North York,Hillcrest Village,43.803762,-79.363452,0,Golf Course,Dog Run,Pool,Athletics & Sports,Mediterranean Restaurant
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,0,Clothing Store,Fast Food Restaurant,Coffee Shop,Restaurant,Asian Restaurant
2,M2K,North York,Bayview Village,43.786947,-79.385975,2,Chinese Restaurant,Café,Bank,Japanese Restaurant,Dog Run
3,M2N,North York,Willowdale South,43.77012,-79.408493,0,Restaurant,Ramen Restaurant,Sandwich Place,Café,Japanese Restaurant
4,M2P,North York,York Mills West,43.752758,-79.400049,2,Park,Bank,Women's Store,Dog Run,Coffee Shop
5,M2R,North York,Willowdale West,43.782736,-79.442259,0,Pharmacy,Grocery Store,Pizza Place,Coffee Shop,Butcher
6,M3A,North York,Parkwoods,43.753259,-79.329656,2,Park,Food & Drink Shop,Fast Food Restaurant,Women's Store,Discount Store
7,M3B,North York,Don Mills North,43.745906,-79.352188,2,Japanese Restaurant,Caribbean Restaurant,Gym / Fitness Center,Café,Basketball Court
8,M3C,North York,"Flemingdon Park, Don Mills South",43.7259,-79.340923,0,Coffee Shop,Asian Restaurant,Gym,Beer Store,Bike Shop
9,M3H,North York,"Bathurst Manor, Downsview North, Wilson Heights",43.754328,-79.442259,0,Coffee Shop,Frozen Yogurt Shop,Pharmacy,Restaurant,Diner


## Visualize the clusters

In [42]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(north_york_merged['Latitude'], north_york_merged['Longitude'], north_york_merged['Neighborhood'], north_york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

In [43]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 0, north_york_merged.columns[[2] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Hillcrest Village,0,Golf Course,Dog Run,Pool,Athletics & Sports,Mediterranean Restaurant
1,"Fairview, Henry Farm, Oriole",0,Clothing Store,Fast Food Restaurant,Coffee Shop,Restaurant,Asian Restaurant
3,Willowdale South,0,Restaurant,Ramen Restaurant,Sandwich Place,Café,Japanese Restaurant
5,Willowdale West,0,Pharmacy,Grocery Store,Pizza Place,Coffee Shop,Butcher
8,"Flemingdon Park, Don Mills South",0,Coffee Shop,Asian Restaurant,Gym,Beer Store,Bike Shop
9,"Bathurst Manor, Downsview North, Wilson Heights",0,Coffee Shop,Frozen Yogurt Shop,Pharmacy,Restaurant,Diner
10,"Northwood Park, York University",0,Coffee Shop,Miscellaneous Shop,Massage Studio,Bar,Dog Run
12,Downsview West,0,Grocery Store,Moving Target,Bank,Women's Store,Electronics Store
14,Downsview Northwest,0,Liquor Store,Grocery Store,Gym / Fitness Center,Athletics & Sports,Discount Store
15,Victoria Village,0,Pizza Place,Coffee Shop,Portuguese Restaurant,Hockey Arena,Women's Store


In [44]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 1, north_york_merged.columns[[2] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
13,Downsview Central,1,Business Service,Food Truck,Baseball Field,Women's Store,Electronics Store
21,"Emery, Humberlea",1,Furniture / Home Store,Baseball Field,Women's Store,Electronics Store,Coffee Shop


In [45]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 2, north_york_merged.columns[[2] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,Bayview Village,2,Chinese Restaurant,Café,Bank,Japanese Restaurant,Dog Run
4,York Mills West,2,Park,Bank,Women's Store,Dog Run,Coffee Shop
6,Parkwoods,2,Park,Food & Drink Shop,Fast Food Restaurant,Women's Store,Discount Store
7,Don Mills North,2,Japanese Restaurant,Caribbean Restaurant,Gym / Fitness Center,Café,Basketball Court
11,"CFB Toronto, Downsview East",2,Park,Airport,Bus Stop,Dog Run,Coffee Shop
18,Glencairn,2,Japanese Restaurant,Pizza Place,Pub,Metro Station,Park
19,"Maple Leaf Park, North Park, Upwood Park",2,Park,Construction & Landscaping,Bakery,Basketball Court,Electronics Store
