# Determining the Impact of Movie Theater Removal in NYC

#### Importing the code from the previous part of the assignment

In [3]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import pandas as pd

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import json
import numpy as np

print('Libraries imported.')

Libraries imported.


#### Use geopy library to get the latitude and longitude values of New York City.

In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent to_explorer, as shown below.

In [4]:
address = 'New York City'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of NYC are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of NYC are 40.7127281, -74.0060152.


#### Define Foursquare Credentials and Version

Note: For security reasons, these have been removed and not shared on the repository.

In [5]:
CLIENT_ID = 'DFKTMIF5UYAMMREQCACKVE3TWGCTVTQXUULGP5JGIALUB0V2' # your Foursquare ID
CLIENT_SECRET = 'OLER0OGXSRALACUMLZNNL1AZEBNUTAOSVAEM1CNKAW3FANRG' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = '1000' # A default Foursquare API limit value

#### Getting NYC Data

In [9]:
!wget -q -O 'newyork_data.json' https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs/newyork_data.json
print('Data downloaded!')

with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

#Only including the relevant information from features column
neighborhoods_data = newyork_data['features']
    
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

Data downloaded!


In [10]:
manhattan_data = neighborhoods[neighborhoods['Borough'] =='Manhattan'].reset_index(drop=True)

#### Now, let's get the top 1000 venues that are in Manhattan within a radius of 1000 meters.

In [11]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']

        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return(nearby_venues)

In [12]:
neighborhood_latitude = manhattan_data['Latitude'].to_list() # neighborhood latitude value
neighborhood_longitude = manhattan_data['Longitude'].to_list() # neighborhood longitude value

neighborhood_name = manhattan_data['Neighborhood'].to_list() # neighborhood name

#Pulling the data
nyc_venues = getNearbyVenues(names=neighborhood_name,
                                   latitudes=neighborhood_latitude,
                                   longitudes=neighborhood_longitude
                                  )
nyc_venues.shape

(3979, 7)

Counting number of Movie venues in NYE area as per latitudes and longitudes

In [13]:
nyc_venues[nyc_venues['Venue Category'].str.contains('Movie')]

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
87,Chinatown,40.715618,-73.994279,Metrograph,40.714999,-73.991035,Indie Movie Theater
164,Chinatown,40.715618,-73.994279,Regal Essex 14 & RPX,40.718133,-73.987895,Movie Theater
1150,Roosevelt Island,40.76216,-73.949168,CMX CineBistro,40.761338,-73.960406,Movie Theater
1180,Upper West Side,40.787658,-73.977059,AMC Loews 84th Street 6,40.78677,-73.977608,Movie Theater
1284,Lincoln Square,40.773529,-73.985338,Walter Reade Theater,40.773783,-73.983924,Indie Movie Theater
1288,Lincoln Square,40.773529,-73.985338,Film at Lincoln Center,40.773609,-73.983571,Indie Movie Theater
1299,Lincoln Square,40.773529,-73.985338,Elinor Bunin Munroe Film Center,40.773709,-73.983489,Indie Movie Theater
1307,Lincoln Square,40.773529,-73.985338,AMC Lincoln Square 13,40.775063,-73.982095,Movie Theater
1831,Greenwich Village,40.726933,-73.999914,Film Forum,40.728462,-74.004423,Indie Movie Theater
1849,Greenwich Village,40.726933,-73.999914,IFC Center,40.731167,-74.001339,Indie Movie Theater


Let's find out how many unique categories can be curated from all the returned venues

In [14]:
print('There are {} uniques categories.'.format(len(nyc_venues['Venue Category'].unique())))

There are 310 uniques categories.


## 3. Analyze Each Neighborhood

In [29]:
# one hot encoding
nyc_onehot = pd.get_dummies(nyc_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
nyc_onehot['Neighborhood'] = nyc_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [nyc_onehot.columns[-1]] + list(nyc_onehot.columns[:-1])
nyc_onehot = nyc_onehot[fixed_columns]

nyc_onehot.head(100)

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Antique Shop,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,...,Video Game Store,Vietnamese Restaurant,Volleyball Court,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
96,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
97,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
98,Chinatown,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


And let's examine the new dataframe size.

In [16]:
nyc_onehot.shape

(3979, 311)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [17]:
nyc_grouped = nyc_onehot.groupby('Neighborhood').mean().reset_index()
nyc_grouped

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Antique Shop,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,...,Video Game Store,Vietnamese Restaurant,Volleyball Court,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Battery Park City,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.0
1,Carnegie Hill,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.03,0.0,...,0.0,0.01,0.0,0.0,0.0,0.01,0.03,0.0,0.01,0.04
2,Central Harlem,0.0,0.0,0.03,0.03,0.0,0.0,0.01,0.01,0.02,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02
3,Chelsea,0.0,0.0,0.0,0.03,0.0,0.0,0.08,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01
4,Chinatown,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.04,0.01,0.0,0.0,0.0
5,Civic Center,0.0,0.0,0.0,0.02,0.01,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.01
6,Clinton,0.0,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0
7,East Harlem,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0
8,East Village,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.01,...,0.0,0.02,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0
9,Financial District,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0


#### Let's confirm the new size

In [18]:
nyc_grouped.shape

(40, 311)

#### Let's print each neighborhood along with the top 5 most common venues

In [19]:
num_top_venues = 5

for hood in nyc_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = nyc_grouped[nyc_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Battery Park City----
          venue  freq
0          Park  0.09
1   Coffee Shop  0.07
2         Hotel  0.03
3           Gym  0.03
4  Burger Joint  0.03


----Carnegie Hill----
                  venue  freq
0           Coffee Shop  0.06
1                  Café  0.06
2           Yoga Studio  0.04
3  Gym / Fitness Center  0.04
4                Bakery  0.04


----Central Harlem----
                             venue  freq
0  Southern / Soul Food Restaurant  0.05
1                             Café  0.04
2                      Pizza Place  0.03
3               African Restaurant  0.03
4              American Restaurant  0.03


----Chelsea----
                     venue  freq
0              Art Gallery  0.08
1              Coffee Shop  0.06
2                   Bakery  0.04
3  New American Restaurant  0.03
4       Seafood Restaurant  0.03


----Chinatown----
                venue  freq
0              Bakery  0.05
1      Ice Cream Shop  0.05
2  Chinese Restaurant  0.04
3                Ca

#### Let's put that into a _pandas_ dataframe

First, let's write a function to sort the venues in descending order.

In [20]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [21]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = nyc_grouped['Neighborhood']

for ind in np.arange(nyc_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(nyc_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Battery Park City,Park,Coffee Shop,Memorial Site,Wine Shop,Pizza Place,Burger Joint,Playground,Plaza,Sandwich Place,Hotel
1,Carnegie Hill,Café,Coffee Shop,Yoga Studio,Bakery,Gym / Fitness Center,Art Museum,Cocktail Bar,Bookstore,Pizza Place,Gym
2,Central Harlem,Southern / Soul Food Restaurant,Café,Seafood Restaurant,Gym / Fitness Center,Sushi Restaurant,Lounge,French Restaurant,Bar,Theater,Cocktail Bar
3,Chelsea,Art Gallery,Coffee Shop,Bakery,Seafood Restaurant,American Restaurant,Italian Restaurant,New American Restaurant,Cosmetics Shop,Bagel Shop,Salon / Barbershop
4,Chinatown,Bakery,Ice Cream Shop,Coffee Shop,Café,Wine Bar,Chinese Restaurant,Sandwich Place,Cocktail Bar,Optical Shop,Bar


## 4. Cluster Neighborhoods

Run _k_-means to cluster the neighborhood into 5 clusters.

In [22]:
# set number of clusters
kclusters = 5

nyc_grouped_clustering = nyc_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(nyc_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 3, 0, 2, 2, 2, 0, 1, 3, 2], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [23]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

nyc_merged = manhattan_data

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
nyc_merged = nyc_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

nyc_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Manhattan,Marble Hill,40.876551,-73.91066,1,Park,Café,Pizza Place,Donut Shop,Spanish Restaurant,Sandwich Place,Mexican Restaurant,Bus Station,Scenic Lookout,Bakery
1,Manhattan,Chinatown,40.715618,-73.994279,2,Bakery,Ice Cream Shop,Coffee Shop,Café,Wine Bar,Chinese Restaurant,Sandwich Place,Cocktail Bar,Optical Shop,Bar
2,Manhattan,Washington Heights,40.851903,-73.9369,1,Pizza Place,Bakery,Latin American Restaurant,Café,Coffee Shop,Spanish Restaurant,Mexican Restaurant,Tapas Restaurant,Mobile Phone Shop,Bar
3,Manhattan,Inwood,40.867684,-73.92121,1,Latin American Restaurant,Deli / Bodega,Mexican Restaurant,Pizza Place,Restaurant,Wine Bar,Spanish Restaurant,Café,Park,Chinese Restaurant
4,Manhattan,Hamilton Heights,40.823604,-73.949688,1,Coffee Shop,Mexican Restaurant,Bar,Café,Chinese Restaurant,Park,Deli / Bodega,Cocktail Bar,Yoga Studio,Sushi Restaurant


Finally, let's visualize the resulting clusters

## 5. Examine Clusters

Now, you can examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, you can then assign a name to each cluster. I will leave this exercise to you.

#### Cluster 1

In [24]:
nyc_merged.loc[nyc_merged['Cluster Labels'] == 0, nyc_merged.columns[[1] + list(range(5, nyc_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Manhattanville,Coffee Shop,Park,American Restaurant,Italian Restaurant,Mexican Restaurant,Theater,Café,Seafood Restaurant,Lounge,Tennis Court
6,Central Harlem,Southern / Soul Food Restaurant,Café,Seafood Restaurant,Gym / Fitness Center,Sushi Restaurant,Lounge,French Restaurant,Bar,Theater,Cocktail Bar
13,Lincoln Square,French Restaurant,Performing Arts Venue,Coffee Shop,Plaza,Theater,Jazz Club,Italian Restaurant,Gym / Fitness Center,Concert Hall,Indie Movie Theater
14,Clinton,Theater,Hotel,Coffee Shop,American Restaurant,Gym / Fitness Center,Bar,Gift Shop,Burger Joint,Wine Shop,Italian Restaurant
15,Midtown,Theater,Plaza,Steakhouse,Sushi Restaurant,Hotel,Cuban Restaurant,Gym,Coffee Shop,Pizza Place,Concert Hall
39,Hudson Yards,Gym / Fitness Center,Hotel,Coffee Shop,Theater,Lounge,Italian Restaurant,Gym,American Restaurant,Cocktail Bar,Art Gallery


#### Cluster 2

In [25]:
nyc_merged.loc[nyc_merged['Cluster Labels'] == 1, nyc_merged.columns[[1] + list(range(5, nyc_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Marble Hill,Park,Café,Pizza Place,Donut Shop,Spanish Restaurant,Sandwich Place,Mexican Restaurant,Bus Station,Scenic Lookout,Bakery
2,Washington Heights,Pizza Place,Bakery,Latin American Restaurant,Café,Coffee Shop,Spanish Restaurant,Mexican Restaurant,Tapas Restaurant,Mobile Phone Shop,Bar
3,Inwood,Latin American Restaurant,Deli / Bodega,Mexican Restaurant,Pizza Place,Restaurant,Wine Bar,Spanish Restaurant,Café,Park,Chinese Restaurant
4,Hamilton Heights,Coffee Shop,Mexican Restaurant,Bar,Café,Chinese Restaurant,Park,Deli / Bodega,Cocktail Bar,Yoga Studio,Sushi Restaurant
7,East Harlem,Café,Pizza Place,Mexican Restaurant,Bakery,Deli / Bodega,Italian Restaurant,Thai Restaurant,Fountain,Gym,Cocktail Bar


#### Cluster 3

In [26]:
nyc_merged.loc[nyc_merged['Cluster Labels'] == 2, nyc_merged.columns[[1] + list(range(5, nyc_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Chinatown,Bakery,Ice Cream Shop,Coffee Shop,Café,Wine Bar,Chinese Restaurant,Sandwich Place,Cocktail Bar,Optical Shop,Bar
8,Upper East Side,Exhibit,Italian Restaurant,Bakery,Gym / Fitness Center,Coffee Shop,Hotel,French Restaurant,Yoga Studio,Playground,Pizza Place
17,Chelsea,Art Gallery,Coffee Shop,Bakery,Seafood Restaurant,American Restaurant,Italian Restaurant,New American Restaurant,Cosmetics Shop,Bagel Shop,Salon / Barbershop
18,Greenwich Village,Italian Restaurant,Spa,Coffee Shop,Pizza Place,Sushi Restaurant,American Restaurant,Sandwich Place,Cosmetics Shop,Gym,Salon / Barbershop
21,Tribeca,Park,Hotel,Spa,Coffee Shop,Steakhouse,Men's Store,Art Gallery,Gym / Fitness Center,Sushi Restaurant,American Restaurant
22,Little Italy,Clothing Store,Bakery,Optical Shop,Café,Dessert Shop,Women's Store,Sandwich Place,Hotel,Asian Restaurant,Chinese Restaurant
23,Soho,Italian Restaurant,Bakery,Café,Clothing Store,Dessert Shop,Mediterranean Restaurant,Optical Shop,Cosmetics Shop,Women's Store,Jewelry Store
24,West Village,Italian Restaurant,American Restaurant,Park,Cocktail Bar,Wine Bar,Coffee Shop,Gym,New American Restaurant,French Restaurant,Café
28,Battery Park City,Park,Coffee Shop,Memorial Site,Wine Shop,Pizza Place,Burger Joint,Playground,Plaza,Sandwich Place,Hotel
29,Financial District,Coffee Shop,Pizza Place,Hotel,Cocktail Bar,Gym / Fitness Center,American Restaurant,Café,Memorial Site,Italian Restaurant,Jewelry Store


#### Cluster 4

In [27]:
nyc_merged.loc[nyc_merged['Cluster Labels'] == 3, nyc_merged.columns[[1] + list(range(5, nyc_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Yorkville,Ice Cream Shop,Italian Restaurant,Japanese Restaurant,Gym,Bar,Pizza Place,Coffee Shop,Wine Shop,Indian Restaurant,Gym / Fitness Center
10,Lenox Hill,Italian Restaurant,Cocktail Bar,Sushi Restaurant,Gym / Fitness Center,Coffee Shop,Café,Burger Joint,French Restaurant,Wine Bar,Gym
11,Roosevelt Island,Park,Coffee Shop,Pizza Place,Cocktail Bar,Sushi Restaurant,Greek Restaurant,Café,Tennis Court,Deli / Bodega,Mexican Restaurant
12,Upper West Side,Bakery,Italian Restaurant,Coffee Shop,Café,Sushi Restaurant,Mediterranean Restaurant,American Restaurant,Wine Bar,Ice Cream Shop,Bar
19,East Village,Wine Bar,Coffee Shop,Cocktail Bar,Pizza Place,Bar,Korean Restaurant,Dessert Shop,Ice Cream Shop,Garden,Japanese Restaurant
20,Lower East Side,Café,French Restaurant,Pizza Place,Wine Bar,Italian Restaurant,Coffee Shop,Asian Restaurant,Art Gallery,Mexican Restaurant,Food Court
25,Manhattan Valley,Coffee Shop,Grocery Store,Park,Mexican Restaurant,Pizza Place,Playground,Bar,Wine Shop,Ice Cream Shop,Indian Restaurant
26,Morningside Heights,Coffee Shop,Italian Restaurant,Park,Mexican Restaurant,American Restaurant,Bakery,Chinese Restaurant,Playground,Bookstore,Wine Shop
27,Gramercy,American Restaurant,New American Restaurant,Coffee Shop,Juice Bar,Wine Shop,Indian Restaurant,Italian Restaurant,Mediterranean Restaurant,Hotel,Park
30,Carnegie Hill,Café,Coffee Shop,Yoga Studio,Bakery,Gym / Fitness Center,Art Museum,Cocktail Bar,Bookstore,Pizza Place,Gym


#### Cluster 5

In [28]:
nyc_merged.loc[nyc_merged['Cluster Labels'] == 4, nyc_merged.columns[[1] + list(range(5, nyc_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Murray Hill,Korean Restaurant,Gym / Fitness Center,Japanese Restaurant,Gourmet Shop,Pizza Place,Coffee Shop,Hotel,Bakery,Building,Steakhouse
33,Midtown South,Korean Restaurant,Gym / Fitness Center,Hotel,Café,Italian Restaurant,Coffee Shop,Dessert Shop,Bakery,Pizza Place,New American Restaurant


# Observations

It is interesting to note that the largest cluster is Cluster 1 composed mostly of places to hang out and be around in (social areas). The other areas contain different types of venues as being the most popular such as Cluster 5 which is very popular with parks, trails and playgrounds.