In [1]:
import pandas as pd
import numpy as np
import requests
import json
import folium
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans
print('Libraries imported.')

Libraries imported.


In [2]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


In [3]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [4]:
neighborhoods_data = newyork_data['features']


In [5]:
neighborhoods_data[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

In [6]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [7]:
for data in neighborhoods_data:
    borough = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [8]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [9]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


In [10]:
address = 'New York City, NY'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7308619, -73.9871558.


### Define Fousquare Credentials and Versions 

In [11]:
CLIENT_ID = 'FWRAAQDILXRO0OK05ZKNO4JN1WERLDNU20NAQSRBXEEZIOGB' # your Foursquare ID
CLIENT_SECRET = 'VD4HC2UPTO3VZSYPZZEPNPHO2YS4IJBN14NMZEUAY2PZI2QX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FWRAAQDILXRO0OK05ZKNO4JN1WERLDNU20NAQSRBXEEZIOGB
CLIENT_SECRET:VD4HC2UPTO3VZSYPZZEPNPHO2YS4IJBN14NMZEUAY2PZI2QX


In [12]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [13]:
limit=200
ny_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )


Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Marble Hill
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker

In [14]:
ny_venues.shape

(20483, 7)

In [15]:
ny_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wakefield,40.894705,-73.847201,Lollipops Gelato,40.894123,-73.845892,Dessert Shop
1,Wakefield,40.894705,-73.847201,Ripe Kitchen & Bar,40.898152,-73.838875,Caribbean Restaurant
2,Wakefield,40.894705,-73.847201,Jackie's West Indian Bakery,40.889283,-73.84331,Caribbean Restaurant
3,Wakefield,40.894705,-73.847201,Rite Aid,40.896521,-73.84468,Pharmacy
4,Wakefield,40.894705,-73.847201,Ali's Roti Shop,40.894036,-73.856935,Caribbean Restaurant


In [16]:
x=pd.DataFrame(ny_venues['Neighborhood'].value_counts())

In [17]:
ny_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Allerton,62,62,62,62,62,62
Annadale,16,16,16,16,16,16
Arden Heights,19,19,19,19,19,19
Arlington,19,19,19,19,19,19
Arrochar,21,21,21,21,21,21
Arverne,34,34,34,34,34,34
Astoria,100,100,100,100,100,100
Astoria Heights,70,70,70,70,70,70
Auburndale,100,100,100,100,100,100
Bath Beach,97,97,97,97,97,97


In [18]:
print('There are {} uniques categories.'.format(len(ny_venues['Venue Category'].unique())))

There are 458 uniques categories.


## Analyze each Neighborhood

In [19]:
# one hot encoding
ny_onehot = pd.get_dummies(ny_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
ny_onehot['Neighborhood'] = ny_venues['Neighborhood'] 

ny_onehot.head()

Unnamed: 0,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Lounge,Airport Service,Airport Terminal,Airport Tram,American Restaurant,Amphitheater,...,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
i=tuple(ny_onehot.columns.values)
p=i.index('Neighborhood')

In [21]:
p

280

In [22]:
fixed_columns = [ny_onehot.columns[p]] + list(ny_onehot.columns[:(p-1)]) + list(ny_onehot.columns[(p+1):])
ny_onehot = ny_onehot[fixed_columns]

In [23]:
ny_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Lounge,Airport Service,Airport Terminal,Airport Tram,American Restaurant,...,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
ny_onehot.shape

(20483, 457)

In [25]:
ny_grouped = ny_onehot.groupby('Neighborhood').mean().reset_index()
ny_grouped

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Lounge,Airport Service,Airport Terminal,Airport Tram,American Restaurant,...,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Allerton,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.016129,...,0.000000,0.00,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000
1,Annadale,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.125000,...,0.000000,0.00,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000
2,Arden Heights,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,...,0.000000,0.00,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000
3,Arlington,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.052632,...,0.000000,0.00,0.000000,0.000000,0.00,0.052632,0.000000,0.000000,0.000000,0.000000
4,Arrochar,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,...,0.000000,0.00,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000
5,Arverne,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,...,0.000000,0.00,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000
6,Astoria,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,...,0.000000,0.00,0.000000,0.020000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000
7,Astoria Heights,0.000000,0.0,0.000000,0.000000,0.0,0.014286,0.057143,0.0,0.000000,...,0.000000,0.00,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000
8,Auburndale,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.020000,...,0.000000,0.00,0.000000,0.000000,0.00,0.010000,0.000000,0.000000,0.000000,0.000000
9,Bath Beach,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.010309,...,0.000000,0.00,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000


In [26]:
col_name=[]
for col in ny_grouped.columns.values:
    if "Restaurant" in col:
        col_name.append(col)
    

In [27]:
col_name[0:10]

['Afghan Restaurant',
 'African Restaurant',
 'American Restaurant',
 'Arepa Restaurant',
 'Argentinian Restaurant',
 'Asian Restaurant',
 'Australian Restaurant',
 'Austrian Restaurant',
 'Brazilian Restaurant',
 'Burmese Restaurant']

In [28]:
ny_grouped_1 =  ny_grouped.loc[:,col_name]

In [29]:
print(ny_grouped_1.shape)
ny_grouped_1.head()

(302, 92)


Unnamed: 0,Afghan Restaurant,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Austrian Restaurant,Brazilian Restaurant,Burmese Restaurant,...,Tex-Mex Restaurant,Thai Restaurant,Theme Restaurant,Tibetan Restaurant,Turkish Restaurant,Udon Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Vietnamese Restaurant
0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
ny_grouped_1['Neighborhood'] = ny_grouped['Neighborhood']

In [31]:
fixed_columns = [ny_grouped_1.columns[-1]] + list(ny_grouped_1.columns[:-1])
ny_grouped_1 = ny_grouped_1[fixed_columns]

ny_grouped_1.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Austrian Restaurant,Brazilian Restaurant,...,Tex-Mex Restaurant,Thai Restaurant,Theme Restaurant,Tibetan Restaurant,Turkish Restaurant,Udon Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Vietnamese Restaurant
0,Allerton,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Annadale,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Arden Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Arlington,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Arrochar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
num_top_venues = 5

for hood in ny_grouped_1['Neighborhood']:
    print("----"+hood+"----")
    temp = ny_grouped_1[ny_grouped_1['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Allerton----
                  venue  freq
0  Fast Food Restaurant  0.08
1    Chinese Restaurant  0.05
2  Caribbean Restaurant  0.03
3   American Restaurant  0.02
4    Mexican Restaurant  0.02


----Annadale----
                 venue  freq
0  American Restaurant  0.12
1           Restaurant  0.12
2    Afghan Restaurant  0.00
3    Paella Restaurant  0.00
4   Russian Restaurant  0.00


----Arden Heights----
                  venue  freq
0    Mexican Restaurant  0.11
1    Italian Restaurant  0.05
2      Sushi Restaurant  0.05
3     Afghan Restaurant  0.00
4  Pakistani Restaurant  0.00


----Arlington----
                  venue  freq
0  Fast Food Restaurant  0.11
1   American Restaurant  0.05
2     Afghan Restaurant  0.00
3  Pakistani Restaurant  0.00
4    Russian Restaurant  0.00


----Arrochar----
                       venue  freq
0         Italian Restaurant  0.10
1   Mediterranean Restaurant  0.05
2  Middle Eastern Restaurant  0.05
3         Chinese Restaurant  0.05
4          A

In [33]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Create the dataframe and display 5 most visited restaurant in each Neighborhood

In [34]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Restaurant'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Restaurant'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = ny_grouped_1['Neighborhood']

for ind in np.arange(ny_grouped_1.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ny_grouped_1.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted


Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant
0,Allerton,Fast Food Restaurant,Chinese Restaurant,Caribbean Restaurant,Eastern European Restaurant,American Restaurant
1,Annadale,Restaurant,American Restaurant,Vietnamese Restaurant,Greek Restaurant,English Restaurant
2,Arden Heights,Mexican Restaurant,Italian Restaurant,Sushi Restaurant,Vietnamese Restaurant,Gluten-free Restaurant
3,Arlington,Fast Food Restaurant,American Restaurant,Vietnamese Restaurant,Halal Restaurant,English Restaurant
4,Arrochar,Italian Restaurant,Mediterranean Restaurant,Middle Eastern Restaurant,Chinese Restaurant,Vietnamese Restaurant
5,Arverne,Chinese Restaurant,Thai Restaurant,Caribbean Restaurant,Vietnamese Restaurant,Gluten-free Restaurant
6,Astoria,Greek Restaurant,Middle Eastern Restaurant,Italian Restaurant,Seafood Restaurant,Mediterranean Restaurant
7,Astoria Heights,Italian Restaurant,Greek Restaurant,Chinese Restaurant,Middle Eastern Restaurant,Sushi Restaurant
8,Auburndale,Korean Restaurant,Greek Restaurant,Sushi Restaurant,American Restaurant,Italian Restaurant
9,Bath Beach,Japanese Restaurant,Chinese Restaurant,Cantonese Restaurant,Sushi Restaurant,Italian Restaurant


## Clustering

In [35]:
# set number of clusters
kclusters = 5

ny_grouped_clustering = ny_grouped_1.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ny_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 3, 3, 1, 2, 0, 3, 3, 3, 0], dtype=int32)

In [36]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

ny_merged = neighborhoods

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
ny_merged = ny_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

ny_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant
0,Bronx,Wakefield,40.894705,-73.847201,4,Caribbean Restaurant,Fast Food Restaurant,Chinese Restaurant,Southern / Soul Food Restaurant,Vietnamese Restaurant
1,Bronx,Co-op City,40.874294,-73.829939,3,Restaurant,Caribbean Restaurant,Mexican Restaurant,Dumpling Restaurant,Fast Food Restaurant
2,Bronx,Eastchester,40.887556,-73.827806,4,Caribbean Restaurant,Fast Food Restaurant,Asian Restaurant,Seafood Restaurant,Mexican Restaurant
3,Bronx,Fieldston,40.895437,-73.905643,1,Mexican Restaurant,Greek Restaurant,Fast Food Restaurant,Chinese Restaurant,Tapas Restaurant
4,Bronx,Riverdale,40.890834,-73.912585,3,Japanese Restaurant,Mexican Restaurant,Greek Restaurant,Italian Restaurant,Thai Restaurant


In [45]:
new_col=['Borough','1st Most Common Restaurant','2nd Most Common Restaurant','3rd Most Common Restaurant','4th Most Common Restaurant','5th Most Common Restaurant']
ny_merged_new=ny_merged.loc[:,new_col]

In [46]:
ny_merged_new.head()

Unnamed: 0,Borough,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant
0,Bronx,Caribbean Restaurant,Fast Food Restaurant,Chinese Restaurant,Southern / Soul Food Restaurant,Vietnamese Restaurant
1,Bronx,Restaurant,Caribbean Restaurant,Mexican Restaurant,Dumpling Restaurant,Fast Food Restaurant
2,Bronx,Caribbean Restaurant,Fast Food Restaurant,Asian Restaurant,Seafood Restaurant,Mexican Restaurant
3,Bronx,Mexican Restaurant,Greek Restaurant,Fast Food Restaurant,Chinese Restaurant,Tapas Restaurant
4,Bronx,Japanese Restaurant,Mexican Restaurant,Greek Restaurant,Italian Restaurant,Thai Restaurant


In [54]:
nybg=ny_merged_new.groupby('Borough')


In [59]:
nybg['1st Most Common Restaurant'].value_counts()

Borough        1st Most Common Restaurant     
Bronx          Fast Food Restaurant               15
               Italian Restaurant                 12
               Caribbean Restaurant                4
               Chinese Restaurant                  4
               Mexican Restaurant                  4
               Japanese Restaurant                 3
               Seafood Restaurant                  3
               Spanish Restaurant                  3
               Restaurant                          2
               Latin American Restaurant           1
               South American Restaurant           1
Brooklyn       Caribbean Restaurant               16
               Italian Restaurant                 12
               American Restaurant                10
               Chinese Restaurant                  6
               Japanese Restaurant                 5
               Fast Food Restaurant                4
               Mexican Restaurant                  3

#### Let's Visualize

In [37]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [j + x + (j*x)**2 for j in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(j) for j in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ny_merged['Latitude'],ny_merged['Longitude'], ny_merged['Neighborhood'], ny_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

In [38]:
ny_merged.loc[ny_merged['Cluster Labels'] == 0, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant
10,Baychester,Chinese Restaurant,Spanish Restaurant,Italian Restaurant,Fast Food Restaurant,American Restaurant
11,Pelham Parkway,Chinese Restaurant,Italian Restaurant,Sushi Restaurant,Eastern European Restaurant,Vietnamese Restaurant
13,Bedford Park,Chinese Restaurant,Mexican Restaurant,Spanish Restaurant,New American Restaurant,Fast Food Restaurant
15,Morris Heights,Chinese Restaurant,Spanish Restaurant,Fast Food Restaurant,Seafood Restaurant,Asian Restaurant
47,Bensonhurst,Chinese Restaurant,Japanese Restaurant,Italian Restaurant,Vietnamese Restaurant,Cantonese Restaurant
72,East New York,Chinese Restaurant,Spanish Restaurant,Caribbean Restaurant,Vietnamese Restaurant,Egyptian Restaurant
73,Starrett City,Chinese Restaurant,Seafood Restaurant,Sushi Restaurant,Italian Restaurant,American Restaurant
79,Bath Beach,Japanese Restaurant,Chinese Restaurant,Cantonese Restaurant,Sushi Restaurant,Italian Restaurant
85,Sea Gate,Chinese Restaurant,Vietnamese Restaurant,Greek Restaurant,English Restaurant,Ethiopian Restaurant
94,Georgetown,Chinese Restaurant,Italian Restaurant,American Restaurant,Japanese Restaurant,Caribbean Restaurant


In [39]:
ny_merged.loc[ny_merged['Cluster Labels'] == 1, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant
3,Fieldston,Mexican Restaurant,Greek Restaurant,Fast Food Restaurant,Chinese Restaurant,Tapas Restaurant
8,Norwood,Fast Food Restaurant,Chinese Restaurant,Spanish Restaurant,Mexican Restaurant,Caribbean Restaurant
14,University Heights,Spanish Restaurant,African Restaurant,Latin American Restaurant,Fast Food Restaurant,Mexican Restaurant
17,East Tremont,Fast Food Restaurant,American Restaurant,Latin American Restaurant,Chinese Restaurant,Paella Restaurant
18,West Farms,Fast Food Restaurant,Latin American Restaurant,Chinese Restaurant,Paella Restaurant,Vietnamese Restaurant
21,Mott Haven,Fast Food Restaurant,Mexican Restaurant,Italian Restaurant,Latin American Restaurant,Asian Restaurant
23,Longwood,Fast Food Restaurant,Chinese Restaurant,Latin American Restaurant,Mexican Restaurant,American Restaurant
25,Morrisania,Fast Food Restaurant,Mexican Restaurant,American Restaurant,Italian Restaurant,Latin American Restaurant
26,Soundview,Fast Food Restaurant,Chinese Restaurant,American Restaurant,Southern / Soul Food Restaurant,Vietnamese Restaurant
30,Parkchester,Spanish Restaurant,Latin American Restaurant,Restaurant,American Restaurant,Caribbean Restaurant


In [40]:
ny_merged.loc[ny_merged['Cluster Labels'] == 2, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant
12,City Island,Seafood Restaurant,Italian Restaurant,American Restaurant,French Restaurant,Japanese Restaurant
16,Fordham,Italian Restaurant,Spanish Restaurant,Chinese Restaurant,Mexican Restaurant,Fast Food Restaurant
28,Throgs Neck,Italian Restaurant,Restaurant,American Restaurant,Asian Restaurant,Chinese Restaurant
29,Country Club,Italian Restaurant,Fast Food Restaurant,American Restaurant,Asian Restaurant,Thai Restaurant
33,Morris Park,Italian Restaurant,Spanish Restaurant,Fast Food Restaurant,Restaurant,Arepa Restaurant
34,Belmont,Italian Restaurant,Latin American Restaurant,Mexican Restaurant,Spanish Restaurant,African Restaurant
37,Pelham Bay,Italian Restaurant,Fast Food Restaurant,Asian Restaurant,Thai Restaurant,Mexican Restaurant
39,Edgewater Park,Italian Restaurant,Chinese Restaurant,American Restaurant,Asian Restaurant,Latin American Restaurant
52,Sheepshead Bay,Sushi Restaurant,Italian Restaurant,Russian Restaurant,Turkish Restaurant,Restaurant
77,Manhattan Beach,Italian Restaurant,Turkish Restaurant,Restaurant,Seafood Restaurant,Russian Restaurant


In [41]:
ny_merged.loc[ny_merged['Cluster Labels'] == 3, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant
1,Co-op City,Restaurant,Caribbean Restaurant,Mexican Restaurant,Dumpling Restaurant,Fast Food Restaurant
4,Riverdale,Japanese Restaurant,Mexican Restaurant,Greek Restaurant,Italian Restaurant,Thai Restaurant
5,Kingsbridge,Mexican Restaurant,Spanish Restaurant,Latin American Restaurant,Japanese Restaurant,American Restaurant
6,Marble Hill,Spanish Restaurant,Mexican Restaurant,Latin American Restaurant,Caribbean Restaurant,Mediterranean Restaurant
7,Woodlawn,Italian Restaurant,Caribbean Restaurant,American Restaurant,Fast Food Restaurant,Indian Restaurant
19,High Bridge,Seafood Restaurant,Theme Restaurant,Caribbean Restaurant,Gluten-free Restaurant,Empanada Restaurant
20,Melrose,Mexican Restaurant,Italian Restaurant,Fast Food Restaurant,Chinese Restaurant,American Restaurant
22,Port Morris,Restaurant,Latin American Restaurant,Peruvian Restaurant,Vietnamese Restaurant,Gluten-free Restaurant
24,Hunts Point,Seafood Restaurant,Spanish Restaurant,Dumpling Restaurant,Egyptian Restaurant,Empanada Restaurant
27,Clason Point,South American Restaurant,Vietnamese Restaurant,Gluten-free Restaurant,Empanada Restaurant,English Restaurant


In [42]:
ny_merged.loc[ny_merged['Cluster Labels'] == 4, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant
0,Wakefield,Caribbean Restaurant,Fast Food Restaurant,Chinese Restaurant,Southern / Soul Food Restaurant,Vietnamese Restaurant
2,Eastchester,Caribbean Restaurant,Fast Food Restaurant,Asian Restaurant,Seafood Restaurant,Mexican Restaurant
9,Williamsbridge,Caribbean Restaurant,Fast Food Restaurant,Restaurant,Vegetarian / Vegan Restaurant,American Restaurant
45,Edenwald,Fast Food Restaurant,Caribbean Restaurant,Chinese Restaurant,Vietnamese Restaurant,Egyptian Restaurant
54,Flatbush,Caribbean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Indian Restaurant,Thai Restaurant
55,Crown Heights,Caribbean Restaurant,Japanese Restaurant,Southern / Soul Food Restaurant,Sushi Restaurant,Vegetarian / Vegan Restaurant
56,East Flatbush,Caribbean Restaurant,Indian Restaurant,Chinese Restaurant,Vietnamese Restaurant,Gluten-free Restaurant
74,Canarsie,Caribbean Restaurant,Fast Food Restaurant,Mexican Restaurant,Vietnamese Restaurant,Greek Restaurant
75,Flatlands,Caribbean Restaurant,Fast Food Restaurant,Chinese Restaurant,Restaurant,American Restaurant
88,Prospect Lefferts Gardens,Caribbean Restaurant,Indian Restaurant,Italian Restaurant,Tapas Restaurant,Chinese Restaurant
