In [1]:
import pandas as pd
from pandas.io.json import json_normalize

Scrape table from wikipedia with pandas

In [2]:
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

In [3]:
df #there are so many table but first of them is that we want

[    Postal Code           Borough  \
 0           M1A      Not assigned   
 1           M2A      Not assigned   
 2           M3A        North York   
 3           M4A        North York   
 4           M5A  Downtown Toronto   
 ..          ...               ...   
 175         M5Z      Not assigned   
 176         M6Z      Not assigned   
 177         M7Z      Not assigned   
 178         M8Z         Etobicoke   
 179         M9Z      Not assigned   
 
                                          Neighbourhood  
 0                                         Not assigned  
 1                                         Not assigned  
 2                                            Parkwoods  
 3                                     Victoria Village  
 4                            Regent Park, Harbourfront  
 ..                                                 ...  
 175                                       Not assigned  
 176                                       Not assigned  
 177                

In [4]:
df = df[0] #get it

In [5]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


Dropping lines that have not assigned 

In [6]:
df.drop(df[df.Borough=="Not assigned"].index,inplace = True)

In [7]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [8]:
df.shape

(103, 3)

In [9]:
df.rename(columns={"Neighbourhood":"Neighborhood"},inplace=True) 

Get coordinates from CSV file.

In [10]:
coords = pd.read_csv("Geospatial_Coordinates.csv")

In [11]:
df = pd.merge(df,coords)

In [89]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


Import geocoder library and get Toronto's coordinates.

In [13]:
import geocoder
g = geocoder.osm("Toronto, CA")
g.latlng
latitude = g.latlng[0]
longitude = g.latlng[1]

### Create a map of Toronto with neighborhoods superimposed on top.

In [50]:
import folium
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude],zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=True).add_to(map_Toronto)  
    
map_Toronto

In [15]:
df["Borough"].value_counts()

North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East York            5
York                 5
East Toronto         5
Mississauga          1
Name: Borough, dtype: int64

I decided to work with just North York borough.

Slice the original dataframe and create a new dataframe of the North York data

In [56]:
ny_data = df[df['Borough'] == 'North York'].reset_index(drop=True)
ny_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073


In [17]:
import geocoder
g = geocoder.osm("North York, CA")
g.latlng
latitude = g.latlng[0]
longitude = g.latlng[1]
print('The geograpical coordinate of North York are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of North York are 43.7543263, -79.44911696639593.


In [18]:
# create map of North York using latitude and longitude values
map_ny = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(ny_data['Latitude'], ny_data['Longitude'],ny_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_ny)  
    
map_ny

Get the neighborhood's name, latitude and longitude.

In [20]:
ny_data.loc[0, 'Neighborhood']

'Parkwoods'

In [21]:
df_latitude = ny_data.loc[0, 'Latitude'] # neighborhood latitude value
df_longitude = ny_data.loc[0, 'Longitude'] # neighborhood longitude value

df_name = ny_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(df_name, 
                                                               df_latitude, 
                                                               df_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


### I hided my Foursquare API values for privacy:)

In [59]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    df_latitude, 
    df_longitude, 
    radius, 
    LIMIT)

Send the GET request and examine the resutls

In [60]:
import requests
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5fa3bc3f6304202de9236db2'},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 2,
  'suggestedBounds': {'ne': {'lat': 43.757758604500005,
    'lng': -79.32343823984928},
   'sw': {'lat': 43.7487585955, 'lng': -79.33587476015072}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 245,
        'cc': 'CA',
        'c

Before we proceed, let's borrow the get_category_type function from the Foursquare lab.

In [61]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Now we are ready to clean the json and structure it into a pandas dataframe.

In [62]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,Variety Store,Food & Drink Shop,43.751974,-79.333114


In [26]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

2 venues were returned by Foursquare.


### Create a function to repeat the same process to all the neighborhoods in North York

In [63]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

The code to run the above function on each neighborhood and create a new dataframe called manhattan_venues.

In [64]:
ny_venues = getNearbyVenues(names=ny_data['Neighborhood'],
                                   latitudes=ny_data['Latitude'],
                                   longitudes=ny_data['Longitude']
                                  )

Parkwoods
Victoria Village
Lawrence Manor, Lawrence Heights
Don Mills
Glencairn
Don Mills
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Fairview, Henry Farm, Oriole
Northwood Park, York University
Bayview Village
Downsview
York Mills, Silver Hills
Downsview
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Willowdale, Newtonbrook
Downsview
Bedford Park, Lawrence Manor East
Humberlea, Emery
Willowdale, Willowdale East
Downsview
York Mills West
Willowdale, Willowdale West


In [93]:
print(ny_venues.shape)
ny_venues.head()

(494, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
2,Parkwoods,43.753259,-79.329656,Tim Hortons,43.760668,-79.326368,Café
3,Parkwoods,43.753259,-79.329656,Bruno's valu-mart,43.746143,-79.32463,Grocery Store
4,Parkwoods,43.753259,-79.329656,A&W,43.760643,-79.326865,Fast Food Restaurant


Check how many venues were returned for each neighborhood

In [65]:
ny_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",29,29,29,29,29,29
Bayview Village,14,14,14,14,14,14
"Bedford Park, Lawrence Manor East",30,30,30,30,30,30
Don Mills,60,60,60,60,60,60
Downsview,62,62,62,62,62,62
"Fairview, Henry Farm, Oriole",30,30,30,30,30,30
Glencairn,30,30,30,30,30,30
Hillcrest Village,20,20,20,20,20,20
Humber Summit,10,10,10,10,10,10
"Humberlea, Emery",9,9,9,9,9,9


In [66]:
print('There are {} uniques categories.'.format(len(ny_venues['Venue Category'].unique())))

There are 140 uniques categories.


In [67]:
# one hot encoding
ny_onehot = pd.get_dummies(ny_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
ny_onehot['Neighborhood'] = ny_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [ny_onehot.columns[-1]] + list(ny_onehot.columns[:-1])
ny_onehot = ny_onehot[fixed_columns]

ny_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,Automotive Shop,...,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Vietnamese Restaurant,Wings Joint,Women's Store
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [68]:
ny_onehot.shape

(494, 141)

Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [69]:
ny_grouped = ny_onehot.groupby('Neighborhood').mean().reset_index()
ny_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,Automotive Shop,...,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Vietnamese Restaurant,Wings Joint,Women's Store
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0
3,Don Mills,0.0,0.0,0.0,0.016667,0.0,0.033333,0.0,0.0,0.0,...,0.0,0.016667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016667
4,Downsview,0.0,0.016129,0.016129,0.0,0.0,0.0,0.032258,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.080645,0.0,0.0
5,"Fairview, Henry Farm, Oriole",0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.0
6,Glencairn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Humber Summit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Humberlea, Emery",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [70]:
ny_grouped.shape

(20, 141)

#### Print each neighborhood along with the top 5 most common venue

In [71]:
num_top_venues = 5

for hood in ny_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = ny_grouped[ny_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor, Wilson Heights, Downsview North----
                       venue  freq
0                Coffee Shop  0.07
1                       Bank  0.07
2                 Ski Chalet  0.03
3  Middle Eastern Restaurant  0.03
4             Sandwich Place  0.03


----Bayview Village----
                 venue  freq
0        Grocery Store  0.14
1  Japanese Restaurant  0.14
2          Gas Station  0.14
3                 Bank  0.14
4                 Park  0.07


----Bedford Park, Lawrence Manor East----
                     venue  freq
0              Coffee Shop  0.10
1       Italian Restaurant  0.10
2           Sandwich Place  0.07
3          Thai Restaurant  0.03
4  Comfort Food Restaurant  0.03


----Don Mills----
                 venue  freq
0  Japanese Restaurant  0.08
1           Restaurant  0.08
2                  Gym  0.05
3          Coffee Shop  0.05
4         Burger Joint  0.05


----Downsview----
                   venue  freq
0  Vietnamese Restaurant  0.08
1            Cof

 to sort the venues in descending order.

In [72]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Create the new dataframe and display the top 10 venues for each neighborhood.

In [73]:
import numpy as np

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = ny_grouped['Neighborhood']

for ind in np.arange(ny_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ny_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Dog Run,Mobile Phone Shop,Deli / Bodega,Sandwich Place,Diner,Mediterranean Restaurant,Shopping Mall,Ski Area
1,Bayview Village,Grocery Store,Gas Station,Japanese Restaurant,Bank,Café,Trail,Chinese Restaurant,Park,Skating Rink,Restaurant
2,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Grocery Store,Sports Club,Restaurant,Café,Liquor Store,Pub,Bridal Shop
3,Don Mills,Japanese Restaurant,Restaurant,Burger Joint,Coffee Shop,Gym,Bank,Supermarket,Beer Store,Pizza Place,Asian Restaurant
4,Downsview,Vietnamese Restaurant,Coffee Shop,Grocery Store,Pizza Place,Gas Station,Hotel,Pharmacy,Liquor Store,Fast Food Restaurant,Shopping Mall


### Run k-means to cluster the neighborhood into 5 clusters.

In [74]:
#import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

ny_grouped_clustering = ny_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ny_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 0, 1, 1, 1, 1, 1, 3, 3, 4])

Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [75]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

ny_merged = ny_data

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
ny_merged = ny_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

ny_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,3,Park,Pharmacy,Convenience Store,Bus Stop,Shopping Mall,Chinese Restaurant,Fish & Chips Shop,Pizza Place,Cosmetics Shop,Caribbean Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,3,Coffee Shop,Portuguese Restaurant,Sporting Goods Shop,Gym / Fitness Center,Grocery Store,Golf Course,Intersection,Men's Store,Pizza Place,Playground
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1,Coffee Shop,Fast Food Restaurant,Vietnamese Restaurant,Clothing Store,Dessert Shop,Restaurant,Sushi Restaurant,Fried Chicken Joint,Accessories Store,Cheese Shop
3,M3B,North York,Don Mills,43.745906,-79.352188,1,Japanese Restaurant,Restaurant,Burger Joint,Coffee Shop,Gym,Bank,Supermarket,Beer Store,Pizza Place,Asian Restaurant
4,M6B,North York,Glencairn,43.709577,-79.445073,1,Grocery Store,Fast Food Restaurant,Coffee Shop,Gas Station,Pizza Place,Convenience Store,Pub,Playground,Pet Store,Park


visualize the resulting clusters

In [76]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ny_merged['Latitude'], ny_merged['Longitude'], ny_merged['Neighborhood'],ny_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True).add_to(map_clusters)
       
map_clusters

# Examine clusters

In [77]:
ny_merged.loc[ny_merged['Cluster Labels'] == 0, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,North York,0,Grocery Store,Gas Station,Japanese Restaurant,Bank,Café,Trail,Chinese Restaurant,Park,Skating Rink,Restaurant


In [78]:
ny_merged.loc[ny_merged['Cluster Labels'] == 1, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,North York,1,Coffee Shop,Fast Food Restaurant,Vietnamese Restaurant,Clothing Store,Dessert Shop,Restaurant,Sushi Restaurant,Fried Chicken Joint,Accessories Store,Cheese Shop
3,North York,1,Japanese Restaurant,Restaurant,Burger Joint,Coffee Shop,Gym,Bank,Supermarket,Beer Store,Pizza Place,Asian Restaurant
4,North York,1,Grocery Store,Fast Food Restaurant,Coffee Shop,Gas Station,Pizza Place,Convenience Store,Pub,Playground,Pet Store,Park
5,North York,1,Japanese Restaurant,Restaurant,Burger Joint,Coffee Shop,Gym,Bank,Supermarket,Beer Store,Pizza Place,Asian Restaurant
7,North York,1,Coffee Shop,Bank,Dog Run,Mobile Phone Shop,Deli / Bodega,Sandwich Place,Diner,Mediterranean Restaurant,Shopping Mall,Ski Area
8,North York,1,Clothing Store,Coffee Shop,Bakery,Restaurant,Juice Bar,Japanese Restaurant,Electronics Store,Shopping Mall,Department Store,Caribbean Restaurant
9,North York,1,Coffee Shop,Furniture / Home Store,Pizza Place,Restaurant,Japanese Restaurant,Caribbean Restaurant,Middle Eastern Restaurant,Sports Bar,Bar,Bank
11,North York,1,Vietnamese Restaurant,Coffee Shop,Grocery Store,Pizza Place,Gas Station,Hotel,Pharmacy,Liquor Store,Fast Food Restaurant,Shopping Mall
13,North York,1,Vietnamese Restaurant,Coffee Shop,Grocery Store,Pizza Place,Gas Station,Hotel,Pharmacy,Liquor Store,Fast Food Restaurant,Shopping Mall
16,North York,1,Korean Restaurant,Café,Diner,Middle Eastern Restaurant,Park,Pizza Place,Coffee Shop,Hot Dog Joint,Shopping Mall,Hookah Bar


In [79]:
ny_merged.loc[ny_merged['Cluster Labels'] == 2, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,North York,2,Park,Pool,Dessert Shop,Electronics Store,Eastern European Restaurant,Dog Run,Discount Store,Diner,Dim Sum Restaurant,Department Store


In [80]:
ny_merged.loc[ny_merged['Cluster Labels'] == 3, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,3,Park,Pharmacy,Convenience Store,Bus Stop,Shopping Mall,Chinese Restaurant,Fish & Chips Shop,Pizza Place,Cosmetics Shop,Caribbean Restaurant
1,North York,3,Coffee Shop,Portuguese Restaurant,Sporting Goods Shop,Gym / Fitness Center,Grocery Store,Golf Course,Intersection,Men's Store,Pizza Place,Playground
6,North York,3,Park,Coffee Shop,Pharmacy,Chinese Restaurant,Shopping Mall,Ice Cream Shop,Intersection,Korean Restaurant,Convenience Store,Pizza Place
14,North York,3,Coffee Shop,Gas Station,Pizza Place,Chinese Restaurant,Park,Dim Sum Restaurant,Mediterranean Restaurant,Bakery,Convenience Store,Athletics & Sports
15,North York,3,Electronics Store,Medical Center,Bank,Pizza Place,Pharmacy,Park,Shopping Mall,Italian Restaurant,Bakery,Eastern European Restaurant
22,North York,3,Park,Coffee Shop,Restaurant,French Restaurant,Pet Store,Chinese Restaurant,Business Service,Bubble Tea Shop,Dog Run,Playground
23,North York,3,Pharmacy,Coffee Shop,Bakery,Pizza Place,Park,Butcher,Bus Line,Eastern European Restaurant,Convenience Store,Grocery Store


In [81]:
ny_merged.loc[ny_merged['Cluster Labels'] == 4, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,North York,4,Gas Station,Convenience Store,Intersection,Discount Store,Park,Auto Workshop,Golf Course,Storage Facility,Bakery,Dessert Shop
