In [251]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis

# 1. Explore Dataset

## Preprocessing the dataframe with Toronto postal codes.

In [252]:
toronto_geo = pd.read_csv('Geospatial_Coordinates.csv')
neighborhood_data = pd.read_csv('toronto.txt', sep="\t", header=None)
neighborhood_data.columns = ["Postal Code","Borough","Neighborhood"]
toronto=pd.DataFrame(neighborhood_data, index=neighborhood_data.index, columns=["Postal Code","Borough","Neighborhood"])
toronto=toronto[toronto['Borough']!='Not assigned']
toronto=toronto.set_index('Postal Code')
toronto=toronto.reset_index()
toronto.at[6,'Neighborhood']=toronto.at[6,'Borough']
toronto=toronto.groupby(['Postal Code','Borough'], as_index=False).agg(lambda Neighborhood: ', '.join(Neighborhood))
toronto=toronto.merge(toronto_geo,on='Postal Code')
toronto

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland_Creek, Rouge_Hill, Port_Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West_Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough_Village,43.744734,-79.239476
6,M1K,Scarborough,"East_Birchmount_Park, Ionview, Kennedy_Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden_Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough_Village_West",43.716316,-79.239476
9,M1N,Scarborough,"Birch_Cliff, Cliffside_West",43.692657,-79.264848


## Work with only boroughs that contain the word Toronto

In [253]:
toronto=toronto.join(toronto['Borough'].str.split(' ', expand=True).rename(columns={0:'A', 1:'B'}))
toronto=toronto[toronto['B']=='Toronto']
toronto=toronto.drop(['A','B'],axis=1)
toronto=toronto.set_index('Postal Code')
toronto=toronto.reset_index()
print(toronto.shape)
toronto.head()

# uncomment next lines to see all data
# pd.set_option('display.max_rows', toronto.shape[0])
# print(toronto)

(39, 5)


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The_Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The_Danforth_West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The_Beaches_West, India_Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio_District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence_Park,43.72802,-79.38879


## Create a map of Toronto with neighborhoods superimposed on top.

### Download some the dependencies that we will need.

In [254]:
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

### Use geopy library to get the latitude and longitude values of Toronto.
### In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent <em>toronto_explorer</em>, as shown below.

In [255]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.653963, -79.387207.


### Create map of Toronto using latitude and longitude values

In [256]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(
    toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## Explore the neighborhoods and segment them.

### Define Foursquare Credentials and Version Next to start utilizing the Foursquare API.

In [257]:
CLIENT_ID = 'HCWD2R5W5QVZLI3U04HATEW3SNDCIU3I0VZ53YLJYDL5CUCY' # your Foursquare ID
CLIENT_SECRET = 'YPLMU4SJBFDWIHOY2KVWYYEBU35H2HGQSXNDMZKYVYCLCYTP' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: HCWD2R5W5QVZLI3U04HATEW3SNDCIU3I0VZ53YLJYDL5CUCY
CLIENT_SECRET:YPLMU4SJBFDWIHOY2KVWYYEBU35H2HGQSXNDMZKYVYCLCYTP


### Explore the first neighborhood in our dataframe.

#### Get the neighborhood's name.

In [258]:
toronto.loc[0,'Neighborhood']

'The_Beaches'

#### Get the neighborhood's latitude and longitude values.

In [259]:
neighborhood_latitude = toronto.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = toronto.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = toronto.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of The_Beaches are 43.67635739999999, -79.2930312.


#### Get the top 100 venues that are in first neighborhood, The Beaches, within a radius of 500 meters.

In [260]:
import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe


#### Create the GET request URL. Name your URL **url**.

In [261]:
radius = 500
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
results = requests.get(url).json()
results


{'meta': {'code': 200, 'requestId': '5e31a3cf949393001bde4a49'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Bay Street Corridor',
  'headerFullLocation': 'Bay Street Corridor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 79,
  'suggestedBounds': {'ne': {'lat': 43.6584630045, 'lng': -79.38099903084075},
   'sw': {'lat': 43.649462995499995, 'lng': -79.39341496915925}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5227bb01498e17bf485e6202',
       'name': 'Downtown Toronto',
       'location': {'lat': 43.65323167517444,
        'lng': -79.38529600606677,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.65323167517444,
          

#### From the Foursquare lab in the previous module, we know that all the information is in the *items* key. Before we proceed, let's borrow the **get_category_type** function from the Foursquare lab.

In [262]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#### Now we are ready to clean the json and structure it into a *pandas* dataframe.

In [263]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Downtown Toronto,Neighborhood,43.653232,-79.385296
1,Japango,Sushi Restaurant,43.655268,-79.385165
2,Rolltation,Japanese Restaurant,43.654918,-79.387424
3,Sansotei Ramen 三草亭,Ramen Restaurant,43.655157,-79.386501
4,Poke Guys,Poke Place,43.654895,-79.385052


#### And how many venues were returned by Foursquare?

In [264]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

79 venues were returned by Foursquare.


# 2. Explore Neighborhoods in Toronto

## Repeat the same process to all the neighborhoods in Toronto

### Create a function to repeat the same process

In [265]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Run the above function on each neighborhood and create a new dataframe called *toronto_venues*.

In [266]:
toronto_venues = getNearbyVenues(names=toronto['Neighborhood'],
                                   latitudes=toronto['Latitude'],
                                   longitudes=toronto['Longitude']
                                  )

The_Beaches
The_Danforth_West, Riverdale
The_Beaches_West, India_Bazaar
Studio_District
Lawrence_Park
Davisville_North
North_Toronto_West
Davisville
Moore_Park, Summerhill_East
Deer_Park, Forest_Hill_SE, Rathnelly, South_Hill, Summerhill_West
Rosedale
Cabbagetown, St._James_Town
Church_and_Wellesley
Harbourfront
Ryerson, Garden_District
St._James_Town
Berczy_Park
Central_Bay_Street
Adelaide, King, Richmond
Harbourfront_East, Toronto_Islands, Union_Station
Design_Exchange, Toronto_Dominion_Centre
Commerce_Court, Victoria_Hotel
Roselawn
Forest_Hill_North, Forest_Hill_West
The_Annex, North_Midtown, Yorkville
Harbord, University_of_Toronto
Chinatown, Grange_Park, Kensington_Market
CN_Tower, Bathurst_Quay, Island_airport, Harbourfront_West, King_and_Spadina, Railway_Lands, South_Niagara
Stn_A_PO_Boxes_25_The_Esplanade
First_Canadian_Place, Underground_city
Christie
Dovercourt_Village, Dufferin
Little_Portugal, Trinity
Brockton, Exhibition_Place, Parkdale_Village
High_Park, The_Junction_Sout

In [267]:
print(toronto_venues.shape)
toronto_venues.head()

(1699, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The_Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The_Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The_Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The_Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The_Danforth_West, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


### Check how many venues were returned for each neighborhood

In [268]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy_Park,55,55,55,55,55,55
"Brockton, Exhibition_Place, Parkdale_Village",22,22,22,22,22,22
Business_Reply_Mail_Processing_Centre_969_Eastern,15,15,15,15,15,15
"CN_Tower, Bathurst_Quay, Island_airport, Harbourfront_West, King_and_Spadina, Railway_Lands, South_Niagara",17,17,17,17,17,17
"Cabbagetown, St._James_Town",43,43,43,43,43,43
Central_Bay_Street,83,83,83,83,83,83
"Chinatown, Grange_Park, Kensington_Market",86,86,86,86,86,86
Christie,18,18,18,18,18,18
Church_and_Wellesley,82,82,82,82,82,82


### Find out how many unique categories can be curated from all the returned venues

In [269]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 231 uniques categories.


# 3. Analyze Each Neighborhood

## One hot encoding

In [270]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot.insert(0,'neighborhood',toronto_venues['Neighborhood'])
print(toronto_onehot.shape)
toronto_onehot.head()

(1699, 232)


Unnamed: 0,neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,The_Beaches,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,The_Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The_Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The_Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The_Danforth_West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [271]:
toronto_grouped = toronto_onehot.groupby('neighborhood').mean().reset_index()
print(toronto_grouped.shape)
toronto_grouped

(39, 232)


Unnamed: 0,neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0
1,Berczy_Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition_Place, Parkdale_Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business_Reply_Mail_Processing_Centre_969_Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN_Tower, Bathurst_Quay, Island_airport, Harbo...",0.0,0.058824,0.058824,0.058824,0.117647,0.117647,0.117647,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St._James_Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central_Bay_Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012048,0.0,...,0.0,0.0,0.0,0.012048,0.0,0.0,0.012048,0.0,0.0,0.012048
7,"Chinatown, Grange_Park, Kensington_Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.046512,0.0,0.069767,0.011628,0.0,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church_and_Wellesley,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,...,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.012195,0.0,0.012195


### Print each neighborhood along with the top 5 most common venues

In [272]:
num_top_venues = 5

for hood in toronto_grouped['neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
             venue  freq
0      Coffee Shop  0.07
1             Café  0.04
2       Steakhouse  0.04
3              Bar  0.04
4  Thai Restaurant  0.03


----Berczy_Park----
                venue  freq
0         Coffee Shop  0.07
1        Cocktail Bar  0.05
2            Beer Bar  0.04
3  Seafood Restaurant  0.04
4         Cheese Shop  0.04


----Brockton, Exhibition_Place, Parkdale_Village----
            venue  freq
0  Breakfast Spot  0.09
1            Café  0.09
2     Coffee Shop  0.09
3   Grocery Store  0.05
4    Intersection  0.05


----Business_Reply_Mail_Processing_Centre_969_Eastern----
              venue  freq
0        Comic Shop  0.07
1     Auto Workshop  0.07
2  Recording Studio  0.07
3        Restaurant  0.07
4    Farmers Market  0.07


----CN_Tower, Bathurst_Quay, Island_airport, Harbourfront_West, King_and_Spadina, Railway_Lands, South_Niagara----
              venue  freq
0    Airport Lounge  0.12
1   Airport Service  0.12
2  Airport Termin

### Display the top 10 venues for each neighborhood.

In [273]:
# function to sort the venues in descending order.
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['neighborhood'] = toronto_grouped['neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Steakhouse,Café,Bar,Bakery,Restaurant,Asian Restaurant,Cosmetics Shop,Thai Restaurant,Seafood Restaurant
1,Berczy_Park,Coffee Shop,Cocktail Bar,Steakhouse,Cheese Shop,Café,Farmers Market,Beer Bar,Bakery,Seafood Restaurant,Gourmet Shop
2,"Brockton, Exhibition_Place, Parkdale_Village",Coffee Shop,Café,Breakfast Spot,Grocery Store,Bakery,Office,Performing Arts Venue,Pet Store,Nightclub,Climbing Gym
3,Business_Reply_Mail_Processing_Centre_969_Eastern,Pizza Place,Auto Workshop,Brewery,Light Rail Station,Farmers Market,Spa,Fast Food Restaurant,Burrito Place,Restaurant,Recording Studio
4,"CN_Tower, Bathurst_Quay, Island_airport, Harbo...",Airport Lounge,Airport Service,Airport Terminal,Plane,Harbor / Marina,Coffee Shop,Rental Car Location,Sculpture Garden,Boat or Ferry,Bar


# 4. Cluster Neighborhoods

In [274]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 4

toronto_grouped_clustering = toronto_grouped.drop('neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:100] 

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       2, 1, 3, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [275]:
toronto.columns

Index(['Postal Code', 'Borough', 'Neighborhood', 'Latitude', 'Longitude'], dtype='object')

In [276]:
toronto.columns.values[2] = "neighborhood"
toronto.head()

Unnamed: 0,Postal Code,Borough,neighborhood,Latitude,Longitude
0,M4E,East Toronto,The_Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The_Danforth_West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The_Beaches_West, India_Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio_District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence_Park,43.72802,-79.38879


In [277]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto

# merge toronto_grouped with toronto to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The_Beaches,43.676357,-79.293031,1,Neighborhood,Health Food Store,Trail,Pub,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
1,M4K,East Toronto,"The_Danforth_West, Riverdale",43.679557,-79.352188,1,Greek Restaurant,Coffee Shop,Italian Restaurant,Restaurant,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Fruit & Vegetable Store,Pub,Pizza Place
2,M4L,East Toronto,"The_Beaches_West, India_Bazaar",43.668999,-79.315572,1,Park,Sandwich Place,Liquor Store,Fast Food Restaurant,Fish & Chips Shop,Sushi Restaurant,Brewery,Food & Drink Shop,Pub,Italian Restaurant
3,M4M,East Toronto,Studio_District,43.659526,-79.340923,1,Café,Coffee Shop,Gastropub,Brewery,Bakery,Italian Restaurant,American Restaurant,Yoga Studio,Comfort Food Restaurant,Sandwich Place
4,M4N,Central Toronto,Lawrence_Park,43.72802,-79.38879,2,Dim Sum Restaurant,Park,Bus Line,Swim School,Yoga Studio,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


### Visualize the resulting clusters

In [278]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Explore the characteristics of the clusters

In [279]:
toronto_merged[toronto_merged['Cluster Labels']==0]

Unnamed: 0,Postal Code,Borough,neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,M5N,Central Toronto,Roselawn,43.711695,-79.416936,0,Garden,Yoga Studio,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


In [280]:
toronto_merged[toronto_merged['Cluster Labels']==1]

Unnamed: 0,Postal Code,Borough,neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The_Beaches,43.676357,-79.293031,1,Neighborhood,Health Food Store,Trail,Pub,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
1,M4K,East Toronto,"The_Danforth_West, Riverdale",43.679557,-79.352188,1,Greek Restaurant,Coffee Shop,Italian Restaurant,Restaurant,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Fruit & Vegetable Store,Pub,Pizza Place
2,M4L,East Toronto,"The_Beaches_West, India_Bazaar",43.668999,-79.315572,1,Park,Sandwich Place,Liquor Store,Fast Food Restaurant,Fish & Chips Shop,Sushi Restaurant,Brewery,Food & Drink Shop,Pub,Italian Restaurant
3,M4M,East Toronto,Studio_District,43.659526,-79.340923,1,Café,Coffee Shop,Gastropub,Brewery,Bakery,Italian Restaurant,American Restaurant,Yoga Studio,Comfort Food Restaurant,Sandwich Place
5,M4P,Central Toronto,Davisville_North,43.712751,-79.390197,1,Department Store,Gym,Park,Breakfast Spot,Hotel,Sandwich Place,Food & Drink Shop,Dog Run,Doner Restaurant,Discount Store
6,M4R,Central Toronto,North_Toronto_West,43.715383,-79.405678,1,Coffee Shop,Sporting Goods Shop,Yoga Studio,Italian Restaurant,Salon / Barbershop,Restaurant,Park,Miscellaneous Shop,Mexican Restaurant,Fast Food Restaurant
7,M4S,Central Toronto,Davisville,43.704324,-79.38879,1,Pizza Place,Dessert Shop,Sandwich Place,Café,Italian Restaurant,Coffee Shop,Gym,Sushi Restaurant,Discount Store,Deli / Bodega
9,M4V,Central Toronto,"Deer_Park, Forest_Hill_SE, Rathnelly, South_Hi...",43.686412,-79.400049,1,Coffee Shop,Pub,Supermarket,Fried Chicken Joint,Light Rail Station,Vietnamese Restaurant,Sushi Restaurant,Pizza Place,Liquor Store,Restaurant
11,M4X,Downtown Toronto,"Cabbagetown, St._James_Town",43.667967,-79.367675,1,Coffee Shop,Italian Restaurant,Restaurant,Café,Pub,Bakery,Chinese Restaurant,Pizza Place,Grocery Store,Pharmacy
12,M4Y,Downtown Toronto,Church_and_Wellesley,43.66586,-79.38316,1,Coffee Shop,Japanese Restaurant,Restaurant,Sushi Restaurant,Gay Bar,Fast Food Restaurant,Hotel,Men's Store,Burger Joint,Café


In [281]:
toronto_merged[toronto_merged['Cluster Labels']==2]

Unnamed: 0,Postal Code,Borough,neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,M4N,Central Toronto,Lawrence_Park,43.72802,-79.38879,2,Dim Sum Restaurant,Park,Bus Line,Swim School,Yoga Studio,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
10,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,2,Park,Playground,Trail,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
23,M5P,Central Toronto,"Forest_Hill_North, Forest_Hill_West",43.696948,-79.411307,2,Park,Jewelry Store,Trail,Sushi Restaurant,Yoga Studio,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant


In [282]:
toronto_merged[toronto_merged['Cluster Labels']==3]

Unnamed: 0,Postal Code,Borough,neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,M4T,Central Toronto,"Moore_Park, Summerhill_East",43.689574,-79.38316,3,Playground,Summer Camp,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


### <span style="color: red;"> Most neighborhoods are similar and are in cluster 1, which greatest interest is food.
### <span style="color: red;">   Cluster 2 greatest interest is Park.</span>