# Capstone Project - The Battle of Neighborhoods

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
import urllib.request
import json
from bs4 import BeautifulSoup
from urllib.request import urlopen
import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import matplotlib.colors as colors
%matplotlib inline
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Libraries imported.')

Libraries imported.


## 1. Download and Explore Dataset


Neighborhood has a total of 5 boroughs and 306 neighborhoods. In order to segement the neighborhoods and explore them, we will essentially need a dataset that contains the 5 boroughs and the neighborhoods that exist in each borough as well as the the latitude and logitude coordinates of each neighborhood.

For your convenience, I downloaded the files and placed it on the server, so you can simply run a `wget` command and access the data. So let's go ahead and do that.

In [2]:

!wget -q -O 'newyork_data.json' https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs/newyork_data.json
print('Data downloaded!')

Data downloaded!


In [3]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [4]:
newyork_data

{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

In [6]:
neighborhoods_data=newyork_data['features']

In [9]:
neighborhoods_data[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

Transform the data into pandas dataframe

In [10]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [24]:
for data in neighborhoods_data:
    borough=neighborhood_name=data["properties"]["borough"]
    neighborhood_name=data["properties"]["name"]
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)
    

In [27]:
neighborhoods["Borough"].unique()

array(['Manhattan', 'Bronx', 'Brooklyn', 'Queens', 'Staten Island'],
      dtype=object)

In [15]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


### Use geopy library to get the latitude and longitude values of New York City.

In [16]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


### Create a map of new york

In [62]:
queens_data = neighborhoods[neighborhoods['Borough'] == 'Queens'].reset_index(drop=True)
queens_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Queens,Astoria,40.768509,-73.915654
1,Queens,Woodside,40.746349,-73.901842
2,Queens,Jackson Heights,40.751981,-73.882821
3,Queens,Elmhurst,40.744049,-73.881656
4,Queens,Howard Beach,40.654225,-73.838138


### Map of Queens

In [65]:
# create map of New York using latitude and longitude values
map_queens = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(queens_data['Latitude'], queens_data['Longitude'], queens_data['Borough'], queens_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_queens)  
    
map_queens

### Foursquare Venues

In [18]:
def getNearbyVenues(names, latitudes, longitudes, radius=5000, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass
                
                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude',  
                  'Venue Category']
    
    except:
        print(url)
        print(response)
        print(results)
        print(nearby_venues)

    return(nearby_venues)

### Foursquare credentials

In [26]:
CLIENT_ID = 'EFRUXBMR0C5CT25BAOJM0W3VWVEMUFRJB0ZNLWQ4CVJES0NJ'
CLIENT_SECRET = '2G5I3WSK2H2RME0TK5JJXUP4IPR5KG14CFYPRX2HII1Y3153'
VERSION = '20181020'
LIMIT = 500 
radius = 5000 

In [66]:
#https://developer.foursquare.com/docs/resources/categories
indian = "4bf58dd8d48988d10f941735"
queens = neighborhoods[neighborhoods['Borough'] == 'Queens'].reset_index(drop=True)
queens_venues_indian = getNearbyVenues(names=queens['Neighborhood'], latitudes=queens['Latitude'], longitudes=queens['Longitude'], radius=1000, categoryIds=indian)
queens_venues_indian.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Astoria,40.768509,-73.915654,The Gully,40.766719,-73.91241,Indian Restaurant
1,Astoria,40.768509,-73.915654,Tikka Indian Grill,40.765397,-73.918658,Indian Restaurant
2,Astoria,40.768509,-73.915654,Mysttik Masaala,40.770163,-73.906086,Food Truck
3,Astoria,40.768509,-73.915654,Alpha Restaurant & Lounge,40.768759,-73.910756,Hookah Bar
4,Astoria,40.768509,-73.915654,Taste of Bengal,40.767854,-73.920663,Indian Restaurant


In [67]:
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Neighborhood'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [69]:
map_queens_indian = folium.Map(location=[latitude, longitude], zoom_start=10)
addToMap(queens_venues_indian, 'red', map_queens_indian)

map_queens_indian

In [70]:
queens_grouped = queens_venues_indian.groupby('Neighborhood').count()
print('There are {} uniques categories.'.format(len(queens_venues_indian['Venue Category'].unique())))
queens_grouped

There are 16 uniques categories.


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Astoria,15,15,15,15,15,15
Astoria Heights,3,3,3,3,3,3
Bay Terrace,1,1,1,1,1,1
Bayside,4,4,4,4,4,4
Bellaire,2,2,2,2,2,2
Bellerose,5,5,5,5,5,5
Blissville,4,4,4,4,4,4
Briarwood,8,8,8,8,8,8
Elmhurst,45,45,45,45,45,45
Floral Park,20,20,20,20,20,20


### Analyze each neighborhood

In [71]:
# one hot encoding
queens_onehot = pd.get_dummies(queens_venues_indian[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
queens_onehot['Neighborhood'] = queens_venues_indian['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [queens_onehot.columns[-1]] + list(queens_onehot.columns[:-1])
queens_onehot = queens_onehot[fixed_columns]

queens_onehot.head()

Unnamed: 0,Neighborhood,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar,Indian Chinese Restaurant,Indian Restaurant,Middle Eastern Restaurant,North Indian Restaurant,Persian Restaurant,Snack Place,Tibetan Restaurant
0,Astoria,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
1,Astoria,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
2,Astoria,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
3,Astoria,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
4,Astoria,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0


### Count of each cuisines in neighborhoods

In [72]:
queens_grouped = queens_onehot.groupby('Neighborhood').sum().reset_index()
queens_grouped.head()

Unnamed: 0,Neighborhood,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar,Indian Chinese Restaurant,Indian Restaurant,Middle Eastern Restaurant,North Indian Restaurant,Persian Restaurant,Snack Place,Tibetan Restaurant
0,Astoria,0,0,0,0,0,1,0,0,1,0,13,0,0,0,0,0
1,Astoria Heights,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,0
2,Bay Terrace,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
3,Bayside,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
4,Bellaire,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0


In [73]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

## Top 10 common venues in each neighborhood

In [92]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
common_venues_sorted = pd.DataFrame(columns=columns)
common_venues_sorted['Neighborhood'] = queens_grouped['Neighborhood']

for ind in np.arange(queens_grouped.shape[0]):
    common_venues_sorted.iloc[ind, 1:] = return_most_common_venues(queens_grouped.iloc[ind, :], num_top_venues)

common_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Astoria,Indian Restaurant,Food Truck,Hookah Bar,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Halal Restaurant,Himalayan Restaurant
1,Astoria Heights,Indian Restaurant,Food Truck,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Halal Restaurant,Himalayan Restaurant,Hookah Bar
2,Bay Terrace,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
3,Bayside,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
4,Bellaire,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar


## Cluser Neighborhoods

In [75]:
# set number of clusters
kclusters = 5

queens_grouped_clustering = queens_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(queens_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([3, 0, 0, 4, 0, 4, 0, 4, 1, 2])

In [120]:
#common_venues_sorted.insert(0, 'Cluster Labels',kmeans.labels_)

queens_merged = queens_data
queens_merged = queens_merged.join(common_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
queens_merged['Cluster Labels']=queens_merged['Cluster Labels'].astype('Int64')
queens_merged_1=queens_merged.dropna()
queens_merged_1

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Queens,Astoria,40.768509,-73.915654,3,Indian Restaurant,Food Truck,Hookah Bar,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Halal Restaurant,Himalayan Restaurant
1,Queens,Woodside,40.746349,-73.901842,1,Indian Restaurant,Asian Restaurant,Food Court,Halal Restaurant,Himalayan Restaurant,Persian Restaurant,Snack Place,Tibetan Restaurant,Caribbean Restaurant,Chinese Restaurant
2,Queens,Jackson Heights,40.751981,-73.882821,1,Indian Restaurant,Asian Restaurant,Food Court,Halal Restaurant,Persian Restaurant,Snack Place,Tibetan Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place
3,Queens,Elmhurst,40.744049,-73.881656,1,Indian Restaurant,Asian Restaurant,Food Court,Halal Restaurant,Persian Restaurant,Snack Place,Tibetan Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place
6,Queens,Forest Hills,40.725264,-73.844475,4,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
7,Queens,Kew Gardens,40.705179,-73.829819,3,Indian Restaurant,Caribbean Restaurant,Asian Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
8,Queens,Richmond Hill,40.697947,-73.831833,2,Indian Restaurant,Caribbean Restaurant,Chinese Restaurant,Asian Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
10,Queens,Long Island City,40.750217,-73.939202,3,Indian Restaurant,North Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant
11,Queens,Sunnyside,40.740176,-73.926916,4,Indian Restaurant,Chinese Restaurant,North Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant
13,Queens,Maspeth,40.725427,-73.896217,0,Indian Restaurant,North Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant


In [125]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters

rainbow = ["purple","blue","green","black","yellow"]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(queens_merged_1['Latitude'], queens_merged_1['Longitude'], queens_merged_1['Neighborhood'], queens_merged_1['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster],
        fill=True,
        fill_color=rainbow[cluster],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [137]:
queens_merged_2.loc[queens_merged_2['Cluster Labels'] == 0, queens_merged_2.columns[[1] + list(range(5, queens_merged_2.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Maspeth,Indian Restaurant,North Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant
14,Ridgewood,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
17,Woodhaven,Indian Restaurant,Middle Eastern Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant
18,Ozone Park,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
21,Whitestone,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
28,Kew Gardens Hills,Indian Chinese Restaurant,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant
29,Fresh Meadows,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
33,Queens Village,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
34,Hollis,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
35,South Jamaica,Caribbean Restaurant,Asian Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar,Indian Chinese Restaurant


In [132]:
queens_merged_2.loc[queens_merged_2['Cluster Labels'] == 1, queens_merged_2.columns[[1] + list(range(5, queens_merged_2.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Woodside,Indian Restaurant,Asian Restaurant,Food Court,Halal Restaurant,Himalayan Restaurant,Persian Restaurant,Snack Place,Tibetan Restaurant,Caribbean Restaurant,Chinese Restaurant
2,Jackson Heights,Indian Restaurant,Asian Restaurant,Food Court,Halal Restaurant,Persian Restaurant,Snack Place,Tibetan Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place
3,Elmhurst,Indian Restaurant,Asian Restaurant,Food Court,Halal Restaurant,Persian Restaurant,Snack Place,Tibetan Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place


In [138]:
queens_merged_2.loc[queens_merged_2['Cluster Labels'] == 1, "Neighborhood"]

1           Woodside
2    Jackson Heights
3           Elmhurst
Name: Neighborhood, dtype: object

In [133]:
queens_merged_2.loc[queens_merged_2['Cluster Labels'] == 2, queens_merged_2.columns[[1] + list(range(5, queens_merged_2.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Richmond Hill,Indian Restaurant,Caribbean Restaurant,Chinese Restaurant,Asian Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
52,Floral Park,Indian Restaurant,Asian Restaurant,Dosa Place,Caribbean Restaurant,Chinese Restaurant,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar


In [139]:
queens_merged_2.loc[queens_merged_2['Cluster Labels'] == 2, "Neighborhood"]

8     Richmond Hill
52      Floral Park
Name: Neighborhood, dtype: object

In [134]:
queens_merged_2.loc[queens_merged_2['Cluster Labels'] == 3, queens_merged_2.columns[[1] + list(range(5, queens_merged_2.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Astoria,Indian Restaurant,Food Truck,Hookah Bar,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Halal Restaurant,Himalayan Restaurant
7,Kew Gardens,Indian Restaurant,Caribbean Restaurant,Asian Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
10,Long Island City,Indian Restaurant,North Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant
26,Glen Oaks,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
31,Jamaica Center,Indian Restaurant,Chinese Restaurant,Caribbean Restaurant,Asian Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
54,Jamaica Estates,Indian Restaurant,Chinese Restaurant,Asian Restaurant,Caribbean Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
57,Ravenswood,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
68,Jamaica Hills,Indian Restaurant,Chinese Restaurant,North Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant
80,Queensbridge,Indian Restaurant,North Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant


In [135]:
queens_merged_2.loc[queens_merged_2['Cluster Labels'] == 4, queens_merged_2.columns[[1] + list(range(5, queens_merged_2.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Forest Hills,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
11,Sunnyside,Indian Restaurant,Chinese Restaurant,North Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant
16,Rego Park,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
19,South Ozone Park,Indian Restaurant,Caribbean Restaurant,North Indian Restaurant,Asian Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant
22,Bayside,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
27,Bellerose,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
30,Briarwood,Indian Restaurant,North Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant
44,Steinway,Indian Restaurant,Food Truck,Hookah Bar,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Halal Restaurant,Himalayan Restaurant
51,Murray Hill,Indian Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant,Hookah Bar
55,Queensboro Hill,Indian Restaurant,Middle Eastern Restaurant,Asian Restaurant,Caribbean Restaurant,Chinese Restaurant,Dosa Place,Food Court,Food Truck,Halal Restaurant,Himalayan Restaurant


## Results and Discussions

* The clustering model has clustered the data into 5 clusters based on the count of each cuisines
* All the clusters has the 1st most common venue as indian restaurants and secondly indian chinese restaurants
* We also have to consider that there are neighborhoods with no data of any restaurants situated near them.
* clusters 1 and 2 has a total of 5 neighborhoods and the common venues are related to asian cuisines.
* clusters 0,3 and 4 has the remaining neighborhoods where the asian cuisines dominate the top 10 common venues
* Based on this, we can recommend the stakeholder to open a restaurant in neighborhoods on clusters 1 and 2.
* The stakeholder can open a Indian resturant in the following neighborhoods
    * Woodside
    * Jackson Heights
    * Elmhurst
    * Richmond Hill
    * Floral Park
    

## Conclusions

Although the final goal of the project is met, there is definitely room for further improvement and development where more features can be used to fit the model.