# Exploring the neighbourhoods of New York for a new Coffee Shop venue
## Importing Libraries

In [360]:
!pip install beautifulsoup4 lxml html5lib requests folium geopy
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import folium
import matplotlib.pyplot as plt
import seaborn as sns
from geopy.geocoders import Nominatim
import json
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors



## Loading in our location data for New York
### The file has been pre-downloaded to prevent problems when running the code

In [294]:
with open('nyneigh.json') as json_data:
    ny_data = json.load(json_data)

In [295]:
ny_data

{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

In [296]:
ny_neighbors = ny_data["features"]

In [297]:
neighbors = pd.DataFrame(columns=['District', 'Neighborhood', 'Latitude', 'Longitude'])

In [298]:
neighbors

Unnamed: 0,District,Neighborhood,Latitude,Longitude


In [299]:
for data in ny_neighbors:
    district = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighbors = neighbors.append({'District': district,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [300]:
neighbors

Unnamed: 0,District,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
...,...,...,...,...
301,Manhattan,Hudson Yards,40.756658,-74.000111
302,Queens,Hammels,40.587338,-73.805530
303,Queens,Bayswater,40.611322,-73.765968
304,Queens,Queensbridge,40.756091,-73.945631


In [301]:
print('The dataframe has {} districts and {} neighborhoods.'.format(
        len(neighbors['District'].unique()),
        neighbors.shape[0]
    )
)

The dataframe has 5 districts and 306 neighborhoods.


In [302]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="NY")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


## Create a map of NY with the neighborhoods

In [303]:
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, district, neighborhood in zip(neighbors['Latitude'], neighbors['Longitude'], neighbors['District'], neighbors['Neighborhood']):
    label = '{}, {}'.format(neighborhood, district)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='yellow',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

## Making preparations for the FourSquare api request

In [304]:
CLIENT_ID = 'IT0NFU2BRJ1WIDYJOAK2WPGIMWQQTZ5LFWPRQNAN3UVJOQGW' # your Foursquare ID
CLIENT_SECRET = 'N20TH1204TXX4TZ3BYX1NKEWLOCIGIJ2L2SOXDDIMCTUUEEB' # your Foursquare Secret
VERSION = '20200605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: IT0NFU2BRJ1WIDYJOAK2WPGIMWQQTZ5LFWPRQNAN3UVJOQGW
CLIENT_SECRET:N20TH1204TXX4TZ3BYX1NKEWLOCIGIJ2L2SOXDDIMCTUUEEB


In [305]:
neighborhood_latitude = neighbors.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = neighbors.loc[0, 'Longitude'] # neighborhood longitude value
neighborhood_name = neighbors.loc[0, 'Neighborhood'] # neighborhood name

In [306]:
neighborhood_latitude

40.89470517661

In [307]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
query = "food"
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&query={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT,
    query)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=IT0NFU2BRJ1WIDYJOAK2WPGIMWQQTZ5LFWPRQNAN3UVJOQGW&client_secret=N20TH1204TXX4TZ3BYX1NKEWLOCIGIJ2L2SOXDDIMCTUUEEB&v=20200605&ll=40.89470517661,-73.84720052054902&radius=500&limit=100&query=food'

## Making a test with a single neighborhood using foursquare api

In [308]:
results = requests.get(url).json()

In [309]:
#Getting the types of the venues
def venue_type(row):
    try:
        cat_list = row['categories']
    except:
        cat_list = row['venue.categories']
        
    if len(cat_list) == 0:
        return None
    else:
        return cat_list[0]['name']

In [310]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(venue_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Dunkin',Donut Shop,40.890459,-73.849089
1,Subway,Sandwich Place,40.890468,-73.849152
2,Pitman Deli,Food,40.896744,-73.844398
3,Central Deli,Deli / Bodega,40.896728,-73.844387
4,Louis Pizza,Pizza Place,40.898399,-73.84881


## Now do it for all the neighborhoods in New York

In [311]:
def getNearbyVenues(names, latitudes, longitudes, query, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&query={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT,
            query)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [312]:
new_york_venues = getNearbyVenues(names=neighbors['Neighborhood'],
                                   latitudes=neighbors['Latitude'],
                                   longitudes=neighbors['Longitude'],
                                  query="food"
                                  )

Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Marble Hill
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker

In [313]:
new_york_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wakefield,40.894705,-73.847201,Dunkin',40.890459,-73.849089,Donut Shop
1,Wakefield,40.894705,-73.847201,Subway,40.890468,-73.849152,Sandwich Place
2,Wakefield,40.894705,-73.847201,Pitman Deli,40.896744,-73.844398,Food
3,Wakefield,40.894705,-73.847201,Central Deli,40.896728,-73.844387,Deli / Bodega
4,Wakefield,40.894705,-73.847201,Louis Pizza,40.898399,-73.848810,Pizza Place
...,...,...,...,...,...,...,...
8197,Fox Hills,40.617311,-74.081740,SUBWAY,40.618939,-74.082881,Sandwich Place
8198,Fox Hills,40.617311,-74.081740,Tropical Supermarket,40.619338,-74.081435,Deli / Bodega
8199,Fox Hills,40.617311,-74.081740,Mona's Cuisine,40.618282,-74.084975,African Restaurant
8200,Fox Hills,40.617311,-74.081740,Bums Chicken N Ribs Joint,40.618192,-74.085506,BBQ Joint


In [314]:
new_york_coffee = getNearbyVenues(names=neighbors['Neighborhood'],
                                   latitudes=neighbors['Latitude'],
                                   longitudes=neighbors['Longitude'],
                                  query="Coffee Shop"
                                  )

Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Marble Hill
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker

## Export our DataFrame to a csv file to be able to load in data without foursquare call limitations

In [315]:
#new_york_coffee.to_csv("nycoffee.csv")

In [316]:
#new_york_venues.to_csv("nyvenues.csv")

In [317]:
ny_venues = pd.read_csv("nyvenues.csv")
ny_coffee = pd.read_csv("nycoffee.csv")

In [318]:
ny_venues.head()

Unnamed: 0.1,Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,0,Wakefield,40.894705,-73.847201,Dunkin',40.890459,-73.849089,Donut Shop
1,1,Wakefield,40.894705,-73.847201,Subway,40.890468,-73.849152,Sandwich Place
2,2,Wakefield,40.894705,-73.847201,Pitman Deli,40.896744,-73.844398,Food
3,3,Wakefield,40.894705,-73.847201,Central Deli,40.896728,-73.844387,Deli / Bodega
4,4,Wakefield,40.894705,-73.847201,Louis Pizza,40.898399,-73.84881,Pizza Place


In [319]:
ny_coffee.head()

Unnamed: 0.1,Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,0,Kingsbridge,40.881687,-73.902818,Mon Amour Coffee & Wine,40.885009,-73.900332,Coffee Shop
1,1,Kingsbridge,40.881687,-73.902818,Gold Mine Cafe,40.878916,-73.904698,Café
2,2,Kingsbridge,40.881687,-73.902818,Tony's Cafe,40.87928,-73.905228,Café
3,3,Marble Hill,40.876551,-73.91066,Starbucks,40.877531,-73.905582,Coffee Shop
4,4,Marble Hill,40.876551,-73.91066,Starbucks,40.873755,-73.908613,Coffee Shop


In [320]:
ny_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Unnamed: 0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Allerton,24,24,24,24,24,24,24
Annadale,15,15,15,15,15,15,15
Arden Heights,2,2,2,2,2,2,2
Arlington,3,3,3,3,3,3,3
Arrochar,15,15,15,15,15,15,15
...,...,...,...,...,...,...,...
Woodhaven,20,20,20,20,20,20,20
Woodlawn,15,15,15,15,15,15,15
Woodrow,10,10,10,10,10,10,10
Woodside,71,71,71,71,71,71,71


In [321]:
print('There are {} uniques food places.'.format(len(ny_venues['Venue Category'].unique())))
print('There are {} uniques food places.'.format(len(ny_coffee['Venue Category'].unique())))

There are 137 uniques food places.
There are 3 uniques food places.


In [322]:
ny_venues['Venue Category'].unique()

array(['Donut Shop', 'Sandwich Place', 'Food', 'Deli / Bodega',
       'Pizza Place', 'Bagel Shop', 'Fast Food Restaurant', 'Restaurant',
       'Chinese Restaurant', 'Fried Chicken Joint', 'Diner',
       'Caribbean Restaurant', 'Seafood Restaurant', 'Bakery',
       'Food Truck', 'Latin American Restaurant', 'Burger Joint',
       'Spanish Restaurant', 'Mexican Restaurant', 'Wings Joint', 'Café',
       'Breakfast Spot', 'Asian Restaurant',
       'Vegetarian / Vegan Restaurant', 'Snack Place', 'Steakhouse',
       'American Restaurant', 'Indian Restaurant', 'Italian Restaurant',
       'Soup Place', 'Sushi Restaurant', 'French Restaurant',
       'Tapas Restaurant', 'African Restaurant', 'Empanada Restaurant',
       'Burrito Place', 'Greek Restaurant', 'Buffet',
       'Puerto Rican Restaurant', 'Fish & Chips Shop',
       'Peruvian Restaurant', 'BBQ Joint', 'South American Restaurant',
       'Southern / Soul Food Restaurant', 'Middle Eastern Restaurant',
       'Arepa Restaurant'

## Convert non-coffee shop places into "food places" and dropping cafe and pet cafe from nyvenues to get rid of duplicates

In [323]:
ny_venues = ny_venues[~ny_venues["Venue Category"].isin(["Café", "Pet Café"])]

In [324]:
print('There are {} uniques food places.'.format(len(ny_venues['Venue Category'].unique())))

There are 135 uniques food places.


In [325]:
ny_venues["Venue Category"] = "Food Place"

In [326]:
ny_venues["Venue Category"].unique()

array(['Food Place'], dtype=object)

In [327]:
ny_coffee["Venue Category"] = "Coffee Shop"

In [328]:
ny_coffee["Venue Category"].unique()

array(['Coffee Shop'], dtype=object)

## Merge the two dataframes, basic sanity checks for correct merge, and cleaning up

In [329]:
ny_merged = pd.concat([ny_venues, ny_coffee])

In [330]:
ny_merged.shape

(10052, 8)

In [331]:
print(ny_venues.shape,"\n",ny_coffee.shape)

(7908, 8) 
 (2144, 8)


In [332]:
ny_merged

Unnamed: 0.1,Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,0,Wakefield,40.894705,-73.847201,Dunkin',40.890459,-73.849089,Food Place
1,1,Wakefield,40.894705,-73.847201,Subway,40.890468,-73.849152,Food Place
2,2,Wakefield,40.894705,-73.847201,Pitman Deli,40.896744,-73.844398,Food Place
3,3,Wakefield,40.894705,-73.847201,Central Deli,40.896728,-73.844387,Food Place
4,4,Wakefield,40.894705,-73.847201,Louis Pizza,40.898399,-73.848810,Food Place
...,...,...,...,...,...,...,...,...
2139,2139,Hudson Yards,40.756658,-74.000111,Pier Cafe,40.759625,-74.004162,Coffee Shop
2140,2140,Hudson Yards,40.756658,-74.000111,Granja,40.752507,-74.001879,Coffee Shop
2141,2141,Queensbridge,40.756091,-73.945631,Mama’s Coffee Shop & Restaurant,40.755649,-73.941760,Coffee Shop
2142,2142,Queensbridge,40.756091,-73.945631,Carlos Coffee Shop,40.754051,-73.941643,Coffee Shop


In [348]:
ny_merged = ny_merged.drop(["Unnamed: 0"], 1)

In [349]:
# one hot encoding
ny_hotcoded = pd.get_dummies(ny_merged[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
ny_hotcoded['Neighborhood'] = ny_merged['Neighborhood'] 

# move neighborhood column to the first column
temp_columns = [ny_hotcoded.columns[-1]] + list(ny_hotcoded.columns[:-1])
ny_hotcoded = ny_hotcoded[temp_columns]

ny_hotcoded.head()

Unnamed: 0,Neighborhood,Coffee Shop,Food Place
0,Wakefield,0,1
1,Wakefield,0,1
2,Wakefield,0,1
3,Wakefield,0,1
4,Wakefield,0,1


In [350]:
ny_proportions = ny_hotcoded.groupby('Neighborhood').mean().reset_index()
ny_proportions.head(10)

Unnamed: 0,Neighborhood,Coffee Shop,Food Place
0,Allerton,0.0,1.0
1,Annadale,0.0,1.0
2,Arden Heights,0.333333,0.666667
3,Arlington,0.5,0.5
4,Arrochar,0.0625,0.9375
5,Arverne,0.222222,0.777778
6,Astoria,0.142857,0.857143
7,Astoria Heights,0.1,0.9
8,Auburndale,0.166667,0.833333
9,Bath Beach,0.043478,0.956522


In [351]:
# print the places with the highest food place ratio
ny_proportions.sort_values(by="Food Place", ascending=False)

Unnamed: 0,Neighborhood,Coffee Shop,Food Place
0,Allerton,0.000000,1.000000
158,Malba,0.000000,1.000000
1,Annadale,0.000000,1.000000
139,Jamaica Estates,0.000000,1.000000
133,Huguenot,0.000000,1.000000
...,...,...,...
152,Little Italy,0.505495,0.494505
172,Midtown South,0.507614,0.492386
171,Midtown,0.510204,0.489796
242,Soho,0.529101,0.470899


## Clustering neighborhoods

In [352]:
from sklearn.cluster import KMeans

In [353]:
# set number of clusters
clusters = 4

ny_clustering = ny_proportions.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=clusters, random_state=4).fit(ny_clustering)

# check cluster labels generated
kmeans.labels_[0:10]

array([2, 2, 3, 1, 2, 3, 0, 0, 0, 2], dtype=int32)

In [354]:
kmeans

KMeans(n_clusters=4, random_state=4)

In [393]:
ny_merge = ny_proportions

# add clustering labels
ny_merge['Cluster Labels'] = kmeans.labels_
ny_merge = ny_merged.merge(ny_merge, on="Neighborhood")
ny_merge = ny_merge.drop_duplicates(subset=["Neighborhood"]).reset_index()
ny_merge = ny_merge.drop(["index", "Venue", "Venue Latitude", "Venue Longitude", "Venue Category"], 1)
ny_merge.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Coffee Shop,Food Place,Cluster Labels
0,Wakefield,40.894705,-73.847201,0.0,1.0,2
1,Co-op City,40.874294,-73.829939,0.0,1.0,2
2,Eastchester,40.887556,-73.827806,0.0,1.0,2
3,Riverdale,40.890834,-73.912585,0.0,1.0,2
4,Kingsbridge,40.881687,-73.902818,0.050847,0.949153,2


In [394]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(clusters)
ys = [i+x+(i*x)**2 for i in range(clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers
markers_colors = []
for lat, lon, poi, cluster in zip(ny_merge["Neighborhood Latitude"], ny_merge["Neighborhood Longitude"], ny_merge['Neighborhood'], ny_merge['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Explore the clusters further

In [397]:
# Cluster 1
ny_merge.loc[ny_merge['Cluster Labels'] == 0].sort_values(by="Food Place", ascending=False)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Coffee Shop,Food Place,Cluster Labels
240,Shore Acres,40.609719,-74.066678,0.066667,0.933333,0
155,Kew Gardens Hills,40.722578,-73.820878,0.066667,0.933333,0
33,Belmont,40.857277,-73.888452,0.068966,0.931034,0
57,Windsor Terrace,40.656946,-73.980073,0.068966,0.931034,0
201,New Springville,40.594252,-74.164960,0.071429,0.928571,0
...,...,...,...,...,...,...
292,Fox Hills,40.617311,-74.081740,0.166667,0.833333,0
138,Sunnyside,40.740176,-73.926916,0.169492,0.830508,0
61,Bushwick,40.698116,-73.925258,0.174603,0.825397,0
101,Hamilton Heights,40.823604,-73.949688,0.174603,0.825397,0


In [398]:
# Cluster 2
ny_merge.loc[ny_merge['Cluster Labels'] == 1].sort_values(by="Food Place", ascending=False)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Coffee Shop,Food Place,Cluster Labels
289,Hudson Yards,40.756658,-74.000111,0.363636,0.636364,1
114,Chelsea,40.744035,-74.003116,0.367188,0.632812,1
98,Chinatown,40.715618,-73.994279,0.379747,0.620253,1
236,Noho,40.723259,-73.988434,0.396226,0.603774,1
239,Richmond Town,40.569606,-74.134057,0.4,0.6,1
210,Silver Lake,40.619193,-74.09629,0.4,0.6,1
264,Flatiron,40.739673,-73.990947,0.402516,0.597484,1
271,Dumbo,40.703176,-73.988753,0.413793,0.586207,1
110,Lincoln Square,40.773529,-73.985338,0.414286,0.585714,1
123,Morningside Heights,40.808,-73.963896,0.424242,0.575758,1


In [399]:
# Cluster 3
ny_merge.loc[ny_merge['Cluster Labels'] == 2].sort_values(by="Food Place", ascending=False)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Coffee Shop,Food Place,Cluster Labels
0,Wakefield,40.894705,-73.847201,0.000000,1.000000,2
156,Fresh Meadows,40.734394,-73.782713,0.000000,1.000000,2
215,Westerleigh,40.621090,-74.133041,0.000000,1.000000,2
208,Tottenville,40.505334,-74.246569,0.000000,1.000000,2
206,Annadale,40.538114,-74.178549,0.000000,1.000000,2
...,...,...,...,...,...,...
175,Rockaway Beach,40.582802,-73.822361,0.060606,0.939394,2
42,Concourse,40.834284,-73.915589,0.060606,0.939394,2
218,Arrochar,40.596313,-74.067124,0.062500,0.937500,2
278,Middle Village,40.716415,-73.881143,0.062500,0.937500,2


In [400]:
# Cluster 3
ny_merge.loc[ny_merge['Cluster Labels'] == 3].sort_values(by="Food Place", ascending=False)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Coffee Shop,Food Place,Cluster Labels
135,Richmond Hill,40.697947,-73.831833,0.181818,0.818182,3
86,Prospect Lefferts Gardens,40.658420,-73.954899,0.183673,0.816327,3
36,Pelham Bay,40.850641,-73.832074,0.184211,0.815789,3
93,East Williamsburg,40.708492,-73.938858,0.186441,0.813559,3
170,Steinway,40.775923,-73.902290,0.187500,0.812500,3
...,...,...,...,...,...,...
199,Mariner's Harbor,40.632546,-74.150085,0.333333,0.666667,3
62,Bedford Stuyvesant,40.687232,-73.941785,0.333333,0.666667,3
75,Manhattan Beach,40.577914,-73.943537,0.333333,0.666667,3
81,Marine Park,40.609748,-73.931344,0.333333,0.666667,3
