<h1>Capstone Project - The Battle of Neighborhoods</h1>

### Importing libraries required

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
import urllib.request
import json
from bs4 import BeautifulSoup
from urllib.request import urlopen
import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import matplotlib.colors as colors
%matplotlib inline
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Libraries imported.')

Libraries imported.


### Downloading and exploring the dataset

In [6]:
def get_new_york_data():
    url='https://cocl.us/new_york_dataset'
    resp=requests.get(url).json()
    # all data is present in features label
    features=resp['features']
    
    # define the dataframe columns
    column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
    # instantiate the dataframe
    new_york_data = pd.DataFrame(columns=column_names)
    
    for data in features:
        borough = data['properties']['borough'] 
        neighborhood_name = data['properties']['name']
        
        neighborhood_latlon = data['geometry']['coordinates']
        neighborhood_lat = neighborhood_latlon[1]
        neighborhood_lon = neighborhood_latlon[0]
    
        new_york_data = new_york_data.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)
    
    return new_york_data

In [7]:
newyork_data = get_new_york_data()

In [8]:
newyork_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


<b>Converting the data to a pandas dataframe</b>

In [11]:
neighborhoods = newyork_data

In [12]:
neighborhoods.head(10)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
5,Bronx,Kingsbridge,40.881687,-73.902818
6,Manhattan,Marble Hill,40.876551,-73.91066
7,Bronx,Woodlawn,40.898273,-73.867315
8,Bronx,Norwood,40.877224,-73.879391
9,Bronx,Williamsbridge,40.881039,-73.857446


<b>Use geopy library to get the latitude and longitude values of New York City.</b>

In [14]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


<b>Creating a map of New York with neighborhoods superimposed on top.</b>

In [15]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head(10)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688
5,Manhattan,Manhattanville,40.816934,-73.957385
6,Manhattan,Central Harlem,40.815976,-73.943211
7,Manhattan,East Harlem,40.792249,-73.944182
8,Manhattan,Upper East Side,40.775639,-73.960508
9,Manhattan,Yorkville,40.77593,-73.947118


In [16]:
import folium
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Borough'], manhattan_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

### Foursquare venues

In [17]:
import urllib
def getNearbyVenues(names, latitudes, longitudes, radius=5000, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude',  
                  'Venue Category']
    
    except:
        print(url)
        print(response)
        print(results)
        print(nearby_venues)

    return(nearby_venues)

In [18]:
LIMIT = 500 
radius = 5000 
CLIENT_ID = 'Q0YHEK5S4HOI2Y1PIVXGCYLOKIETM3K3G42QPBGQ3XWJLDHV'
CLIENT_SECRET = 'JFUMCGWKNQASN0QT1LBH41ZHYIXPW52PBG3BXYGJV2TQ5140'
VERSION = '20200202'

In [19]:
neighborhoods = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
newyork_venues_gym = getNearbyVenues(names=neighborhoods['Neighborhood'], latitudes=neighborhoods['Latitude'], longitudes=neighborhoods['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d175941735')
newyork_venues_gym.head(10)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Marble Hill,40.876551,-73.91066,Planet Fitness,40.874088,-73.909137,Gym / Fitness Center
1,Marble Hill,40.876551,-73.91066,Blink Fitness,40.877271,-73.905595,Gym
2,Marble Hill,40.876551,-73.91066,24 Hour Fitness,40.880592,-73.908255,Gym / Fitness Center
3,Marble Hill,40.876551,-73.91066,Bronx Boxing,40.876646,-73.905927,Boxing Gym
4,Marble Hill,40.876551,-73.91066,Build N Box,40.879847,-73.904302,Gym / Fitness Center
5,Marble Hill,40.876551,-73.91066,Bread and Yoga,40.868229,-73.918028,Yoga Studio
6,Marble Hill,40.876551,-73.91066,3210 Riverdale Avenue - Wellness Center & Gym,40.882746,-73.907625,Gym
7,Marble Hill,40.876551,-73.91066,TCR The Club of Riverdale,40.878628,-73.914568,Tennis Stadium
8,Marble Hill,40.876551,-73.91066,Astral Fitness & Wellness Center,40.876705,-73.906372,Gym
9,Marble Hill,40.876551,-73.91066,Abadá-Capoeira Bronx,40.879667,-73.906976,Martial Arts Dojo


In [20]:
newyork_venues_gym.shape

(1886, 7)

In [21]:
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Neighborhood'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [22]:
map_newyork_gym = folium.Map(location=[latitude, longitude], zoom_start=10)
addToMap(newyork_venues_gym, 'red', map_newyork_gym)

map_newyork_gym

In [23]:
def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('Neighborhood').count()
    
    for n in startDf['Neighborhood']:
        try:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = 0

In [24]:
manhattan_grouped = newyork_venues_gym.groupby('Neighborhood').count()
manhattan_grouped

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Battery Park City,50,50,50,50,50,50
Carnegie Hill,50,50,50,50,50,50
Central Harlem,49,49,49,49,49,49
Chelsea,50,50,50,50,50,50
Chinatown,50,50,50,50,50,50
Civic Center,50,50,50,50,50,50
Clinton,50,50,50,50,50,50
East Harlem,49,49,49,49,49,49
East Village,50,50,50,50,50,50
Financial District,50,50,50,50,50,50


### Analyze each neighborhood

In [25]:
# one hot encoding
manhattan_onehot = pd.get_dummies(newyork_venues_gym[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighborhood'] = newyork_venues_gym['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns]

manhattan_onehot.head()

Unnamed: 0,Neighborhood,Athletics & Sports,Basketball Court,Beer Garden,Bike Shop,Boxing Gym,Building,Climbing Gym,Club House,Community Center,Corporate Amenity,Cultural Center,Cycle Studio,Dance Studio,Doctor's Office,Dog Run,Exhibit,General College & University,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Martial Arts Dojo,Massage Studio,Medical Center,Non-Profit,Office,Outdoor Gym,Park,Physical Therapist,Pilates Studio,Playground,Pool,Recreation Center,Residential Building (Apartment / Condo),School,Spa,Spiritual Center,Sports Club,State / Provincial Park,Tennis Court,Tennis Stadium,Track,Weight Loss Center,Yoga Studio
0,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Marble Hill,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [26]:
manhattan_grouped = manhattan_onehot.groupby('Neighborhood').mean().reset_index()
manhattan_grouped

Unnamed: 0,Neighborhood,Athletics & Sports,Basketball Court,Beer Garden,Bike Shop,Boxing Gym,Building,Climbing Gym,Club House,Community Center,Corporate Amenity,Cultural Center,Cycle Studio,Dance Studio,Doctor's Office,Dog Run,Exhibit,General College & University,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Martial Arts Dojo,Massage Studio,Medical Center,Non-Profit,Office,Outdoor Gym,Park,Physical Therapist,Pilates Studio,Playground,Pool,Recreation Center,Residential Building (Apartment / Condo),School,Spa,Spiritual Center,Sports Club,State / Provincial Park,Tennis Court,Tennis Stadium,Track,Weight Loss Center,Yoga Studio
0,Battery Park City,0.04,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.3,0.36,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06
1,Carnegie Hill,0.0,0.0,0.0,0.0,0.02,0.02,0.02,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.32,0.38,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.14
2,Central Harlem,0.020408,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.061224,0.0,0.0,0.0,0.0,0.020408,0.367347,0.306122,0.0,0.0,0.040816,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857
3,Chelsea,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.16,0.0,0.0,0.0,0.0,0.0,0.16,0.48,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.12
4,Chinatown,0.04,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.22,0.34,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.18
5,Civic Center,0.02,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.02,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.3,0.26,0.02,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.12
6,Clinton,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.38,0.32,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.08
7,East Harlem,0.0,0.0,0.0,0.0,0.040816,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.469388,0.265306,0.0,0.020408,0.061224,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.040816,0.061224
8,East Village,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.28,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.1
9,Financial District,0.02,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.34,0.38,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04


In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = manhattan_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(10)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Battery Park City,Gym / Fitness Center,Gym,Boxing Gym,Yoga Studio,Gym Pool,Cycle Studio,Athletics & Sports,Corporate Amenity,Doctor's Office,Medical Center
1,Carnegie Hill,Gym / Fitness Center,Gym,Yoga Studio,Pool,Boxing Gym,Building,Climbing Gym,Community Center,Cycle Studio,Martial Arts Dojo
2,Central Harlem,Gym,Gym / Fitness Center,Yoga Studio,Cycle Studio,Martial Arts Dojo,Athletics & Sports,General College & University,Climbing Gym,Pilates Studio,Corporate Amenity
3,Chelsea,Gym / Fitness Center,Gym,Cycle Studio,Yoga Studio,Spa,Recreation Center,Bike Shop,Boxing Gym,Dance Studio,Basketball Court
4,Chinatown,Gym / Fitness Center,Gym,Yoga Studio,Pilates Studio,Boxing Gym,Martial Arts Dojo,Athletics & Sports,Cycle Studio,Office,Corporate Amenity
5,Civic Center,Gym,Gym / Fitness Center,Yoga Studio,Boxing Gym,Cycle Studio,Pilates Studio,Corporate Amenity,Gym Pool,Office,Martial Arts Dojo
6,Clinton,Gym,Gym / Fitness Center,Yoga Studio,Cycle Studio,Exhibit,Boxing Gym,Building,Medical Center,Residential Building (Apartment / Condo),Track
7,East Harlem,Gym,Gym / Fitness Center,Yoga Studio,Martial Arts Dojo,Weight Loss Center,Boxing Gym,Track,Gymnastics Gym,Climbing Gym,Cycle Studio
8,East Village,Gym / Fitness Center,Gym,Yoga Studio,Pilates Studio,Cycle Studio,Track,Boxing Gym,Spa,Basketball Court,General College & University
9,Financial District,Gym / Fitness Center,Gym,Boxing Gym,Yoga Studio,Cycle Studio,Gym Pool,Medical Center,Corporate Amenity,Doctor's Office,Athletics & Sports


Cluster Neighborhoods

In [29]:
# set number of clusters
kclusters = 5

manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 3, 1, 4, 4, 3, 2, 0, 3])

In [30]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

manhattan_merged = manhattan_data
manhattan_merged = manhattan_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

manhattan_merged.head(10)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Manhattan,Marble Hill,40.876551,-73.91066,4,Gym / Fitness Center,Gym,Yoga Studio,Pilates Studio,Martial Arts Dojo,Weight Loss Center,Boxing Gym,Tennis Stadium,State / Provincial Park,Cultural Center
1,Manhattan,Chinatown,40.715618,-73.994279,4,Gym / Fitness Center,Gym,Yoga Studio,Pilates Studio,Boxing Gym,Martial Arts Dojo,Athletics & Sports,Cycle Studio,Office,Corporate Amenity
2,Manhattan,Washington Heights,40.851903,-73.9369,2,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Gymnastics Gym,General College & University,Exhibit,Dog Run,Doctor's Office,Dance Studio
3,Manhattan,Inwood,40.867684,-73.92121,2,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Gymnastics Gym,General College & University,Exhibit,Dog Run,Doctor's Office,Dance Studio
4,Manhattan,Hamilton Heights,40.823604,-73.949688,0,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Martial Arts Dojo,State / Provincial Park,Basketball Court,Climbing Gym,Track,Cultural Center
5,Manhattan,Manhattanville,40.816934,-73.957385,2,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Track,Basketball Court,Climbing Gym,Cycle Studio,Gym Pool,Park
6,Manhattan,Central Harlem,40.815976,-73.943211,3,Gym,Gym / Fitness Center,Yoga Studio,Cycle Studio,Martial Arts Dojo,Athletics & Sports,General College & University,Climbing Gym,Pilates Studio,Corporate Amenity
7,Manhattan,East Harlem,40.792249,-73.944182,2,Gym,Gym / Fitness Center,Yoga Studio,Martial Arts Dojo,Weight Loss Center,Boxing Gym,Track,Gymnastics Gym,Climbing Gym,Cycle Studio
8,Manhattan,Upper East Side,40.775639,-73.960508,0,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Pilates Studio,Spa,Building,Club House,Martial Arts Dojo,Track
9,Manhattan,Yorkville,40.77593,-73.947118,0,Gym / Fitness Center,Gym,Yoga Studio,Pilates Studio,School,Building,Community Center,Cycle Studio,Martial Arts Dojo,Track


In [31]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [32]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Hamilton Heights,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Martial Arts Dojo,State / Provincial Park,Basketball Court,Climbing Gym,Track,Cultural Center
8,Upper East Side,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Pilates Studio,Spa,Building,Club House,Martial Arts Dojo,Track
9,Yorkville,Gym / Fitness Center,Gym,Yoga Studio,Pilates Studio,School,Building,Community Center,Cycle Studio,Martial Arts Dojo,Track
10,Lenox Hill,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Pilates Studio,Spa,Club House,Martial Arts Dojo,Spiritual Center,Tennis Court
13,Lincoln Square,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Cultural Center,Climbing Gym,Spa,Residential Building (Apartment / Condo),Gym Pool,Pilates Studio
16,Murray Hill,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Boxing Gym,Pilates Studio,General College & University,Exhibit,Dog Run,Doctor's Office
18,Greenwich Village,Gym / Fitness Center,Gym,Cycle Studio,Yoga Studio,Pilates Studio,Pool,Boxing Gym,General College & University,Exhibit,Dog Run
19,East Village,Gym / Fitness Center,Gym,Yoga Studio,Pilates Studio,Cycle Studio,Track,Boxing Gym,Spa,Basketball Court,General College & University
22,Little Italy,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Pilates Studio,Boxing Gym,Office,Athletics & Sports,Spa,School
23,Soho,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Pilates Studio,Boxing Gym,Pool,Office,Martial Arts Dojo,Athletics & Sports


In [33]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Chelsea,Gym / Fitness Center,Gym,Cycle Studio,Yoga Studio,Spa,Recreation Center,Bike Shop,Boxing Gym,Dance Studio,Basketball Court
24,West Village,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Pilates Studio,Pool,Spa,Physical Therapist,Basketball Court,General College & University
27,Gramercy,Gym / Fitness Center,Gym,Cycle Studio,Yoga Studio,Pilates Studio,Bike Shop,Boxing Gym,Spa,General College & University,Exhibit
38,Flatiron,Gym / Fitness Center,Gym,Cycle Studio,Yoga Studio,Bike Shop,Boxing Gym,Pilates Studio,General College & University,Exhibit,Dog Run


In [34]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Washington Heights,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Gymnastics Gym,General College & University,Exhibit,Dog Run,Doctor's Office,Dance Studio
3,Inwood,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Gymnastics Gym,General College & University,Exhibit,Dog Run,Doctor's Office,Dance Studio
5,Manhattanville,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Track,Basketball Court,Climbing Gym,Cycle Studio,Gym Pool,Park
7,East Harlem,Gym,Gym / Fitness Center,Yoga Studio,Martial Arts Dojo,Weight Loss Center,Boxing Gym,Track,Gymnastics Gym,Climbing Gym,Cycle Studio


In [35]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 3, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Central Harlem,Gym,Gym / Fitness Center,Yoga Studio,Cycle Studio,Martial Arts Dojo,Athletics & Sports,General College & University,Climbing Gym,Pilates Studio,Corporate Amenity
11,Roosevelt Island,Gym / Fitness Center,Gym,Yoga Studio,Pilates Studio,School,Club House,Dance Studio,Non-Profit,Recreation Center,Martial Arts Dojo
14,Clinton,Gym,Gym / Fitness Center,Yoga Studio,Cycle Studio,Exhibit,Boxing Gym,Building,Medical Center,Residential Building (Apartment / Condo),Track
15,Midtown,Gym,Gym / Fitness Center,Cycle Studio,Yoga Studio,Boxing Gym,Gym Pool,General College & University,Exhibit,Dog Run,Doctor's Office
25,Manhattan Valley,Gym,Gym / Fitness Center,Yoga Studio,Martial Arts Dojo,Cycle Studio,Track,Medical Center,Playground,Corporate Amenity,General College & University
29,Financial District,Gym / Fitness Center,Gym,Boxing Gym,Yoga Studio,Cycle Studio,Gym Pool,Medical Center,Corporate Amenity,Doctor's Office,Athletics & Sports
34,Sutton Place,Gym,Gym / Fitness Center,Cycle Studio,Yoga Studio,Pilates Studio,Club House,Gym Pool,Physical Therapist,Martial Arts Dojo,Tennis Court
35,Turtle Bay,Gym / Fitness Center,Gym,Cycle Studio,Yoga Studio,Spa,Boxing Gym,Pilates Studio,Martial Arts Dojo,Spiritual Center,Sports Club
36,Tudor City,Gym,Gym / Fitness Center,Cycle Studio,Yoga Studio,Spa,Boxing Gym,Gym Pool,Pilates Studio,Martial Arts Dojo,School
37,Stuyvesant Town,Gym,Gym / Fitness Center,Pilates Studio,Yoga Studio,Outdoor Gym,Track,Gym Pool,Beer Garden,Basketball Court,General College & University


In [36]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 4, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Marble Hill,Gym / Fitness Center,Gym,Yoga Studio,Pilates Studio,Martial Arts Dojo,Weight Loss Center,Boxing Gym,Tennis Stadium,State / Provincial Park,Cultural Center
1,Chinatown,Gym / Fitness Center,Gym,Yoga Studio,Pilates Studio,Boxing Gym,Martial Arts Dojo,Athletics & Sports,Cycle Studio,Office,Corporate Amenity
12,Upper West Side,Gym,Gym / Fitness Center,Pilates Studio,Yoga Studio,Cycle Studio,Martial Arts Dojo,Track,Dog Run,Gym Pool,Weight Loss Center
20,Lower East Side,Gym / Fitness Center,Yoga Studio,Gym,Pilates Studio,Boxing Gym,Outdoor Gym,Pool,Martial Arts Dojo,Athletics & Sports,Track
21,Tribeca,Gym,Gym / Fitness Center,Yoga Studio,Boxing Gym,Cycle Studio,Pilates Studio,Gym Pool,Pool,Corporate Amenity,Office
32,Civic Center,Gym,Gym / Fitness Center,Yoga Studio,Boxing Gym,Cycle Studio,Pilates Studio,Corporate Amenity,Gym Pool,Office,Martial Arts Dojo
