In [1]:
import numpy as np 
import pandas as pd 
import os
import dotenv
import json 
#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests 
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
#!conda install -c conda-forge folium=0.5.0 --yes 
import folium 

In [2]:
with open('../dataBank/nygeojson.json') as f:
    ny_data = json.load(f)

In [3]:
neighborhoods_data = ny_data['features']
neighborhoods_data[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

In [4]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)
neighborhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude


In [5]:
# Then let's loop through the data and fill the dataframe one row at a time

for data in neighborhoods_data:
    borough = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']   
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [6]:
# And make sure that the dataset has all 5 boroughs and 306 neighborhoods

print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


Use geopy library to get the latitude and longitude values of New York City.
In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent ny_explorer, as shown below.

In [7]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [8]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

However, for illustration purposes, let's simplify the above map and segment and cluster only the neighborhoods in Manhattan. So let's slice the original dataframe and create a new dataframe of the Manhattan data.

In [9]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


In [10]:
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 40.7900869, -73.9598295.


In [11]:
# create map of Manhattan using latitude and longitude values
map_manhattan = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan)  
    
map_manhattan

In [23]:
# define Foursquare credentials
dotenv.load_dotenv()

CLIENT_ID = os.getenv("FOURSQUARE_ID") 
CLIENT_SECRET = os.getenv("FOURSQUARE_SECRET") 
VERSION = '20180605' # Foursquare API version

Let's explore the first neighborhood in our dataframe.
Get the neighborhood's name.

In [38]:
manhattan_data.loc[0, ['Borough','Neighborhood', 'Latitude', 'Longitude']]

Borough           Manhattan
Neighborhood    Marble Hill
Latitude            40.8766
Longitude          -73.9107
Name: 0, dtype: object

In [25]:
neighborhood_latitude = manhattan_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = manhattan_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = manhattan_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Marble Hill are 40.87655077879964, -73.91065965862981.


Now, let's get the top 100 venues that are in Marble Hill within a radius of 500 meters.
First, let's create the GET request URL. Name your URL url.

In [39]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

# send GET request and examine the results
results = requests.get(url).json()

In [40]:
# all the information is in the items key. Before we proceed, 
# let's borrow the get_category_type function from the Foursquare lab.

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [41]:
# clean the json and structure it into a pandas dataframe

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Arturo's,Pizza Place,40.874412,-73.910271
1,Bikram Yoga,Yoga Studio,40.876844,-73.906204
2,Tibbett Diner,Diner,40.880404,-73.908937
3,Starbucks,Coffee Shop,40.877531,-73.905582
4,Dunkin',Donut Shop,40.877136,-73.906666


In [43]:
# how many venues were returned by Foursquare?

print('{} venues were returned by the Foursquare API.'.format(nearby_venues.shape[0]))

26 venues were returned by the Foursquare API.


Explore Neighborhoods in Manhattan
Let's create a function to repeat the same process to all the neighborhoods in Manhattan

In [31]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [44]:
# run the above function on each neighborhood and create a new dataframe called manhattan_venues.

manhattan_venues = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude']
                                  )

Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards


Analyze Each Neighborhood

In [45]:
# one hot encoding
manhattan_onehot = pd.get_dummies(manhattan_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighborhood'] = manhattan_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns]

manhattan_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Arcade,Arepa Restaurant,...,Volleyball Court,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [46]:
# let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

manhattan_grouped = manhattan_onehot.groupby('Neighborhood').mean().reset_index()
manhattan_grouped

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Arcade,Arepa Restaurant,...,Volleyball Court,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Battery Park City,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.02,0.0
1,Carnegie Hill,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.01,0.03
2,Central Harlem,0.0,0.0,0.0,0.06383,0.042553,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Chelsea,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0
4,Chinatown,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Civic Center,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.03
6,Clinton,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.02,0.03,0.0,0.0,0.0
7,East Harlem,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,East Village,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.02,...,0.0,0.0,0.0,0.0,0.0,0.05,0.02,0.0,0.0,0.0
9,Financial District,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.04,0.0,0.01,0.0


In [47]:
# Let's print each neighborhood along with the top 5 most common venues

num_top_venues = 5

for hood in manhattan_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = manhattan_grouped[manhattan_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Battery Park City----
            venue  freq
0            Park  0.08
1     Coffee Shop  0.07
2           Hotel  0.05
3   Memorial Site  0.04
4  Clothing Store  0.03


----Carnegie Hill----
                 venue  freq
0          Pizza Place  0.06
1          Coffee Shop  0.06
2                 Café  0.04
3  Japanese Restaurant  0.03
4    French Restaurant  0.03


----Central Harlem----
                  venue  freq
0    African Restaurant  0.06
1  Gym / Fitness Center  0.04
2           Art Gallery  0.04
3     French Restaurant  0.04
4        Cosmetics Shop  0.04


----Chelsea----
                venue  freq
0         Coffee Shop  0.06
1  Italian Restaurant  0.05
2      Ice Cream Shop  0.05
3              Bakery  0.04
4           Nightclub  0.04


----Chinatown----
                 venue  freq
0   Chinese Restaurant  0.08
1         Cocktail Bar  0.04
2  American Restaurant  0.04
3      Bubble Tea Shop  0.03
4                  Spa  0.03


----Civic Center----
                  venue 

In [48]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [49]:
#  let's create the new dataframe and display the top 10 venues for each neighborhood.

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = manhattan_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Battery Park City,Park,Coffee Shop,Hotel,Memorial Site,Wine Shop,Italian Restaurant,Clothing Store,Gym,Plaza,Men's Store
1,Carnegie Hill,Coffee Shop,Pizza Place,Café,Yoga Studio,Bookstore,Wine Shop,Cosmetics Shop,French Restaurant,Bar,Japanese Restaurant
2,Central Harlem,African Restaurant,Public Art,Art Gallery,Seafood Restaurant,Chinese Restaurant,Gym / Fitness Center,French Restaurant,American Restaurant,Cosmetics Shop,Liquor Store
3,Chelsea,Coffee Shop,Ice Cream Shop,Italian Restaurant,Bakery,Nightclub,Theater,Seafood Restaurant,American Restaurant,Hotel,Art Gallery
4,Chinatown,Chinese Restaurant,American Restaurant,Cocktail Bar,Spa,Dumpling Restaurant,Vietnamese Restaurant,Bubble Tea Shop,Optical Shop,Salon / Barbershop,Ice Cream Shop


Cluster Neighborhoods
Run k-means to cluster the neighborhood into 5 clusters.

In [50]:
# set number of clusters
kclusters = 5

manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 2, 2, 2, 2, 2, 1, 4, 2, 2], dtype=int32)

In [51]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

manhattan_merged = manhattan_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
manhattan_merged = manhattan_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

manhattan_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Manhattan,Marble Hill,40.876551,-73.91066,2,Coffee Shop,Discount Store,Sandwich Place,Yoga Studio,Tennis Stadium,Supplement Shop,Steakhouse,Spa,Seafood Restaurant,Clothing Store
1,Manhattan,Chinatown,40.715618,-73.994279,2,Chinese Restaurant,American Restaurant,Cocktail Bar,Spa,Dumpling Restaurant,Vietnamese Restaurant,Bubble Tea Shop,Optical Shop,Salon / Barbershop,Ice Cream Shop
2,Manhattan,Washington Heights,40.851903,-73.9369,4,Café,Mobile Phone Shop,Bakery,Deli / Bodega,Spanish Restaurant,Latin American Restaurant,New American Restaurant,Sandwich Place,Tapas Restaurant,Mexican Restaurant
3,Manhattan,Inwood,40.867684,-73.92121,4,Mexican Restaurant,Café,Lounge,Bakery,Pizza Place,Park,Frozen Yogurt Shop,Chinese Restaurant,Deli / Bodega,American Restaurant
4,Manhattan,Hamilton Heights,40.823604,-73.949688,4,Deli / Bodega,Café,Mexican Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Sushi Restaurant,Caribbean Restaurant,School,Bakery


In [52]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Examine Clusters
Now, you can examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, you can then assign a name to each cluster. 

In [53]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Roosevelt Island,Park,Sandwich Place,Coffee Shop,Deli / Bodega,Pizza Place,Greek Restaurant,Dry Cleaner,Bus Stop,Baseball Field,Liquor Store
26,Morningside Heights,Park,Coffee Shop,Bookstore,American Restaurant,Food Truck,New American Restaurant,Burger Joint,Deli / Bodega,Tennis Court,Outdoor Sculpture
28,Battery Park City,Park,Coffee Shop,Hotel,Memorial Site,Wine Shop,Italian Restaurant,Clothing Store,Gym,Plaza,Men's Store


In [54]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Lincoln Square,Theater,Gym / Fitness Center,Café,Plaza,Italian Restaurant,Concert Hall,French Restaurant,Performing Arts Venue,Indie Movie Theater,Park
14,Clinton,Theater,Italian Restaurant,Gym / Fitness Center,American Restaurant,Hotel,Coffee Shop,Sandwich Place,Wine Shop,Spa,New American Restaurant


In [55]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Marble Hill,Coffee Shop,Discount Store,Sandwich Place,Yoga Studio,Tennis Stadium,Supplement Shop,Steakhouse,Spa,Seafood Restaurant,Clothing Store
1,Chinatown,Chinese Restaurant,American Restaurant,Cocktail Bar,Spa,Dumpling Restaurant,Vietnamese Restaurant,Bubble Tea Shop,Optical Shop,Salon / Barbershop,Ice Cream Shop
6,Central Harlem,African Restaurant,Public Art,Art Gallery,Seafood Restaurant,Chinese Restaurant,Gym / Fitness Center,French Restaurant,American Restaurant,Cosmetics Shop,Liquor Store
8,Upper East Side,Italian Restaurant,Exhibit,Coffee Shop,Juice Bar,Bakery,Art Gallery,Gym / Fitness Center,French Restaurant,Spa,Hotel
9,Yorkville,Italian Restaurant,Gym,Bar,Coffee Shop,Pizza Place,Sushi Restaurant,Deli / Bodega,Japanese Restaurant,Ice Cream Shop,Mexican Restaurant
10,Lenox Hill,Coffee Shop,Italian Restaurant,Sushi Restaurant,Pizza Place,Gym,Café,Cosmetics Shop,Burger Joint,Sporting Goods Shop,Gym / Fitness Center
12,Upper West Side,Italian Restaurant,Wine Bar,Bar,Coffee Shop,Mediterranean Restaurant,Bakery,Indian Restaurant,Vegetarian / Vegan Restaurant,Yoga Studio,Pub
15,Midtown,Hotel,Coffee Shop,Theater,American Restaurant,Cocktail Bar,Clothing Store,Bakery,Sporting Goods Shop,Japanese Restaurant,Steakhouse
16,Murray Hill,Coffee Shop,Hotel,Sandwich Place,Japanese Restaurant,Gym,Italian Restaurant,French Restaurant,Cocktail Bar,Bagel Shop,Bar
17,Chelsea,Coffee Shop,Ice Cream Shop,Italian Restaurant,Bakery,Nightclub,Theater,Seafood Restaurant,American Restaurant,Hotel,Art Gallery


In [56]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 3, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,Stuyvesant Town,Bar,Park,Playground,Pet Service,Farmers Market,Baseball Field,Fountain,Harbor / Marina,Cocktail Bar,Coffee Shop


In [57]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 4, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Washington Heights,Café,Mobile Phone Shop,Bakery,Deli / Bodega,Spanish Restaurant,Latin American Restaurant,New American Restaurant,Sandwich Place,Tapas Restaurant,Mexican Restaurant
3,Inwood,Mexican Restaurant,Café,Lounge,Bakery,Pizza Place,Park,Frozen Yogurt Shop,Chinese Restaurant,Deli / Bodega,American Restaurant
4,Hamilton Heights,Deli / Bodega,Café,Mexican Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Sushi Restaurant,Caribbean Restaurant,School,Bakery
5,Manhattanville,Italian Restaurant,Mexican Restaurant,Deli / Bodega,Park,Coffee Shop,Seafood Restaurant,Beer Garden,Bike Trail,Lounge,Sushi Restaurant
7,East Harlem,Mexican Restaurant,Deli / Bodega,Bakery,Latin American Restaurant,Thai Restaurant,Convenience Store,Café,Gas Station,Taco Place,Steakhouse
25,Manhattan Valley,Indian Restaurant,Coffee Shop,Pizza Place,Yoga Studio,Mexican Restaurant,Café,Bar,Thai Restaurant,Deli / Bodega,Szechuan Restaurant
36,Tudor City,Park,Mexican Restaurant,Café,Greek Restaurant,Asian Restaurant,Deli / Bodega,Pizza Place,Hotel,Dog Run,Spa
