# Introduction/Business Problem

In [3]:
# As more and more people are considering immigration to other countries
# this project would wish to give suggestions to whom would try to start their business in other countries
# in order to immigrate 
# So,
# lets see which place in New York would be recommended to start business

# Package needed

In [4]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

#!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


# Download and Explore Dataset

In [23]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

/bin/sh: wget: command not found
Data downloaded!


In [27]:
import json # library to handle JSON files
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [28]:
neighborhoods_data = newyork_data['features']

In [29]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [31]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [36]:
neighborhoods.head(100)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
...,...,...,...,...
95,Brooklyn,East Williamsburg,40.708492,-73.938858
96,Brooklyn,North Side,40.714823,-73.958809
97,Brooklyn,South Side,40.710861,-73.958001
98,Brooklyn,Ocean Parkway,40.613060,-73.968367


In [33]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


In [34]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [35]:
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [37]:
b_data = neighborhoods[neighborhoods['Borough'] == 'Brooklyn'].reset_index(drop=True)
b_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Brooklyn,Bay Ridge,40.625801,-74.030621
1,Brooklyn,Bensonhurst,40.611009,-73.99518
2,Brooklyn,Sunset Park,40.645103,-74.010316
3,Brooklyn,Greenpoint,40.730201,-73.954241
4,Brooklyn,Gravesend,40.59526,-73.973471


In [39]:
address = 'Brooklyn, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Brooklyn are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Brooklyn are 40.6501038, -73.9495823.


In [41]:
# create map of Brooklyn using latitude and longitude values
map_b = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(b_data['Latitude'], b_data['Longitude'], b_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_b)  
    
map_b

In [42]:
CLIENT_ID = '320KARHINECZ0UGN4DLW01ZC1PCQX0FLJWH222UHX4DQRACT' # your Foursquare ID
CLIENT_SECRET = 'IXGI2KILN3LYRO4HDIU2FCRYKTCBKLESRNPSXYUBDRFIASAW' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 320KARHINECZ0UGN4DLW01ZC1PCQX0FLJWH222UHX4DQRACT
CLIENT_SECRET:IXGI2KILN3LYRO4HDIU2FCRYKTCBKLESRNPSXYUBDRFIASAW


In [43]:
b_data.loc[0, 'Neighborhood']

'Bay Ridge'

In [45]:
b_latitude = b_data.loc[0, 'Latitude'] # neighborhood latitude value
b_longitude = b_data.loc[0, 'Longitude'] # neighborhood longitude value

b_name = b_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(b_name, 
                                                               b_latitude, 
                                                               b_longitude))

Latitude and longitude values of Bay Ridge are 40.625801065010656, -74.03062069353813.


In [47]:

LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    b_latitude, 
    b_longitude, 
    radius, 
    LIMIT)
url


'https://api.foursquare.com/v2/venues/explore?&client_id=320KARHINECZ0UGN4DLW01ZC1PCQX0FLJWH222UHX4DQRACT&client_secret=IXGI2KILN3LYRO4HDIU2FCRYKTCBKLESRNPSXYUBDRFIASAW&v=20180605&ll=40.625801065010656,-74.03062069353813&radius=500&limit=100'

In [48]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5eda0c4195feaf001b862d18'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': '$-$$$$', 'key': 'price'}]},
  'headerLocation': 'Bay Ridge',
  'headerFullLocation': 'Bay Ridge, Brooklyn',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 87,
  'suggestedBounds': {'ne': {'lat': 40.63030106951066,
    'lng': -74.02470273356597},
   'sw': {'lat': 40.62130106051065, 'lng': -74.03653865351028}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b895827f964a5206c2d32e3',
       'name': 'Pilo Arts Day Spa and Salon',
       'location': {'address': '8412 3rd Ave',
        'lat': 40.62474788273414,
        'lng': -74.03059056940135,
        'labeledLatLngs': [{'label': 'display',
          'lat':

In [49]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [50]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,Pilo Arts Day Spa and Salon,Spa,40.624748,-74.030591
1,Bagel Boy,Bagel Shop,40.627896,-74.029335
2,Leo's Casa Calamari,Pizza Place,40.6242,-74.030931
3,Pegasus Cafe,Breakfast Spot,40.623168,-74.031186
4,The Bookmark Shoppe,Bookstore,40.624577,-74.030562


In [51]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

87 venues were returned by Foursquare.


# Explore Neighborhoods in Brooklyn

In [52]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [53]:
b_venues = getNearbyVenues(names=b_data['Neighborhood'],
                                   latitudes=b_data['Latitude'],
                                   longitudes=b_data['Longitude']
                                  )


Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker Heights
Gerritsen Beach
Marine Park
Clinton Hill
Sea Gate
Downtown
Boerum Hill
Prospect Lefferts Gardens
Ocean Hill
City Line
Bergen Beach
Midwood
Prospect Park South
Georgetown
East Williamsburg
North Side
South Side
Ocean Parkway
Fort Hamilton
Ditmas Park
Wingate
Rugby
Remsen Village
New Lots
Paerdegat Basin
Mill Basin
Fulton Ferry
Vinegar Hill
Weeksville
Broadway Junction
Dumbo
Homecrest
Highland Park
Madison
Erasmus


In [55]:
print(b_venues.shape)
b_venues.head()

(2740, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bay Ridge,40.625801,-74.030621,Pilo Arts Day Spa and Salon,40.624748,-74.030591,Spa
1,Bay Ridge,40.625801,-74.030621,Bagel Boy,40.627896,-74.029335,Bagel Shop
2,Bay Ridge,40.625801,-74.030621,Leo's Casa Calamari,40.6242,-74.030931,Pizza Place
3,Bay Ridge,40.625801,-74.030621,Pegasus Cafe,40.623168,-74.031186,Breakfast Spot
4,Bay Ridge,40.625801,-74.030621,The Bookmark Shoppe,40.624577,-74.030562,Bookstore


In [56]:
b_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bath Beach,47,47,47,47,47,47
Bay Ridge,87,87,87,87,87,87
Bedford Stuyvesant,27,27,27,27,27,27
Bensonhurst,31,31,31,31,31,31
Bergen Beach,6,6,6,6,6,6
...,...,...,...,...,...,...
Vinegar Hill,29,29,29,29,29,29
Weeksville,14,14,14,14,14,14
Williamsburg,32,32,32,32,32,32
Windsor Terrace,27,27,27,27,27,27


In [57]:
print('There are {} uniques categories.'.format(len(b_venues['Venue Category'].unique())))

There are 288 uniques categories.


# Analyze Each Neighborhood

In [59]:
# one hot encoding
b_onehot = pd.get_dummies(b_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
b_onehot['Neighborhood'] = b_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [b_onehot.columns[-1]] + list(b_onehot.columns[:-1])
b_onehot = b_onehot[fixed_columns]

b_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Airport Terminal,American Restaurant,Animal Shelter,Antique Shop,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Arts & Crafts Store,...,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [60]:
b_grouped = b_onehot.groupby('Neighborhood').mean().reset_index()
b_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Airport Terminal,American Restaurant,Animal Shelter,Antique Shop,Arepa Restaurant,Argentinian Restaurant,Art Gallery,...,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Bath Beach,0.00000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,...,0.0,0.021277,0.021277,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0
1,Bay Ridge,0.00000,0.0,0.0,0.034483,0.0,0.000000,0.0,0.0,0.000000,...,0.0,0.011494,0.000000,0.011494,0.0,0.000000,0.000000,0.000000,0.0,0.0
2,Bedford Stuyvesant,0.00000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.037037,0.037037,0.0,0.0
3,Bensonhurst,0.00000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0
4,Bergen Beach,0.00000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,Vinegar Hill,0.00000,0.0,0.0,0.034483,0.0,0.034483,0.0,0.0,0.068966,...,0.0,0.000000,0.000000,0.000000,0.0,0.034483,0.034483,0.034483,0.0,0.0
66,Weeksville,0.00000,0.0,0.0,0.071429,0.0,0.000000,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0
67,Williamsburg,0.03125,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.031250,...,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.031250,0.000000,0.0,0.0
68,Windsor Terrace,0.00000,0.0,0.0,0.037037,0.0,0.037037,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.037037,0.0,0.0


In [61]:
num_top_venues = 5

for hood in b_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = b_grouped[b_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bath Beach----
                  venue  freq
0           Pizza Place  0.09
1    Chinese Restaurant  0.09
2              Pharmacy  0.06
3       Bubble Tea Shop  0.04
4  Fast Food Restaurant  0.04


----Bay Ridge----
                 venue  freq
0                  Spa  0.07
1   Italian Restaurant  0.07
2          Pizza Place  0.05
3     Greek Restaurant  0.05
4  American Restaurant  0.03


----Bedford Stuyvesant----
           venue  freq
0  Deli / Bodega  0.07
1    Pizza Place  0.07
2    Coffee Shop  0.07
3           Café  0.07
4            Bar  0.07


----Bensonhurst----
                venue  freq
0  Chinese Restaurant  0.10
1    Sushi Restaurant  0.06
2          Donut Shop  0.06
3      Ice Cream Shop  0.06
4  Italian Restaurant  0.06


----Bergen Beach----
                venue  freq
0     Harbor / Marina  0.33
1  Athletics & Sports  0.17
2          Playground  0.17
3      Baseball Field  0.17
4          Donut Shop  0.17


----Boerum Hill----
            venue  freq
0     Coffee 

                venue  freq
0  Child Care Service  0.14
1         Auto Garage  0.14
2       Moving Target  0.14
3                Food  0.14
4            Bus Line  0.14


----Park Slope----
                 venue  freq
0          Coffee Shop  0.08
1         Burger Joint  0.07
2  American Restaurant  0.05
3               Bakery  0.03
4            Bookstore  0.03


----Prospect Heights----
                venue  freq
0                 Bar  0.09
1  Mexican Restaurant  0.07
2     Thai Restaurant  0.04
3        Cocktail Bar  0.04
4        Gourmet Shop  0.04


----Prospect Lefferts Gardens----
                  venue  freq
0                  Café  0.08
1           Pizza Place  0.08
2                Bakery  0.08
3  Caribbean Restaurant  0.06
4         Deli / Bodega  0.04


----Prospect Park South----
                  venue  freq
0  Caribbean Restaurant  0.10
1  Fast Food Restaurant  0.06
2           Pizza Place  0.06
3         Grocery Store  0.06
4     Mobile Phone Shop  0.06


----Red Hook--

In [62]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [65]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = b_grouped['Neighborhood']

for ind in np.arange(b_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(b_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bath Beach,Chinese Restaurant,Pizza Place,Pharmacy,Italian Restaurant,Bubble Tea Shop,Fast Food Restaurant,Gas Station,Park,Cantonese Restaurant,Surf Spot
1,Bay Ridge,Spa,Italian Restaurant,Greek Restaurant,Pizza Place,Pharmacy,American Restaurant,Chinese Restaurant,Bar,Bagel Shop,Mediterranean Restaurant
2,Bedford Stuyvesant,Pizza Place,Coffee Shop,Bar,Café,Deli / Bodega,Park,Discount Store,Cocktail Bar,New American Restaurant,Tiki Bar
3,Bensonhurst,Chinese Restaurant,Italian Restaurant,Sushi Restaurant,Donut Shop,Ice Cream Shop,Liquor Store,Russian Restaurant,Cha Chaan Teng,Noodle House,Sporting Goods Shop
4,Bergen Beach,Harbor / Marina,Playground,Donut Shop,Athletics & Sports,Baseball Field,Fish Market,Fish & Chips Shop,Filipino Restaurant,Field,Fast Food Restaurant


# Cluster Neighborhoods

In [67]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

In [68]:
# set number of clusters
kclusters = 5

b_grouped_clustering = b_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(b_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 3, 3, 3, 0, 3, 1, 3, 1, 3], dtype=int32)

In [69]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

b_merged = b_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
b_merged = b_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

b_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Brooklyn,Bay Ridge,40.625801,-74.030621,3,Spa,Italian Restaurant,Greek Restaurant,Pizza Place,Pharmacy,American Restaurant,Chinese Restaurant,Bar,Bagel Shop,Mediterranean Restaurant
1,Brooklyn,Bensonhurst,40.611009,-73.99518,3,Chinese Restaurant,Italian Restaurant,Sushi Restaurant,Donut Shop,Ice Cream Shop,Liquor Store,Russian Restaurant,Cha Chaan Teng,Noodle House,Sporting Goods Shop
2,Brooklyn,Sunset Park,40.645103,-74.010316,1,Pizza Place,Latin American Restaurant,Bank,Bakery,Mexican Restaurant,Mobile Phone Shop,Pharmacy,Fried Chicken Joint,Gym,Italian Restaurant
3,Brooklyn,Greenpoint,40.730201,-73.954241,3,Bar,Pizza Place,Coffee Shop,Cocktail Bar,Yoga Studio,Café,Sushi Restaurant,French Restaurant,Deli / Bodega,Tea Room
4,Brooklyn,Gravesend,40.59526,-73.973471,3,Pizza Place,Italian Restaurant,Lounge,Chinese Restaurant,Bakery,Breakfast Spot,Donut Shop,Fish Market,Music Venue,Men's Store


In [71]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [72]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(b_merged['Latitude'], b_merged['Longitude'], b_merged['Neighborhood'], b_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

In [73]:
# 1st cluster
b_merged.loc[b_merged['Cluster Labels'] == 0, b_merged.columns[[1] + list(range(5, b_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,Bergen Beach,Harbor / Marina,Playground,Donut Shop,Athletics & Sports,Baseball Field,Fish Market,Fish & Chips Shop,Filipino Restaurant,Field,Fast Food Restaurant


In [74]:
# 2nd cluster
b_merged.loc[b_merged['Cluster Labels'] == 1, b_merged.columns[[1] + list(range(5, b_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Sunset Park,Pizza Place,Latin American Restaurant,Bank,Bakery,Mexican Restaurant,Mobile Phone Shop,Pharmacy,Fried Chicken Joint,Gym,Italian Restaurant
7,Manhattan Terrace,Pizza Place,Donut Shop,Ice Cream Shop,Pharmacy,Japanese Restaurant,Mobile Phone Shop,Steakhouse,Organic Grocery,Liquor Store,Bank
8,Flatbush,Pharmacy,Coffee Shop,Caribbean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Juice Bar,Pizza Place,Donut Shop,Chinese Restaurant,Sandwich Place
25,Cypress Hills,Fried Chicken Joint,Latin American Restaurant,Pizza Place,Ice Cream Shop,Chinese Restaurant,Donut Shop,Fast Food Restaurant,Women's Store,Metro Station,Gas Station
26,East New York,Deli / Bodega,Bus Station,Caribbean Restaurant,Plaza,Pizza Place,Event Service,Fast Food Restaurant,Music Venue,Pharmacy,Salon / Barbershop
27,Starrett City,Pharmacy,Gym Pool,Supermarket,Donut Shop,Bus Stop,River,Caribbean Restaurant,Pizza Place,American Restaurant,Fast Food Restaurant
29,Flatlands,Pharmacy,Caribbean Restaurant,Fast Food Restaurant,Fried Chicken Joint,Paper / Office Supplies Store,Park,Lounge,Arts & Crafts Store,Bus Station,Chinese Restaurant
33,Bath Beach,Chinese Restaurant,Pizza Place,Pharmacy,Italian Restaurant,Bubble Tea Shop,Fast Food Restaurant,Gas Station,Park,Cantonese Restaurant,Surf Spot
34,Borough Park,Bank,Pizza Place,Pharmacy,Fast Food Restaurant,Hotel,Grocery Store,Coffee Shop,Restaurant,Chinese Restaurant,Café
44,City Line,Donut Shop,Cosmetics Shop,Liquor Store,Fried Chicken Joint,Mobile Phone Shop,Grocery Store,Coffee Shop,Bus Line,Pizza Place,Cuban Restaurant


In [75]:
# 3rd cluster
b_merged.loc[b_merged['Cluster Labels'] == 2, b_merged.columns[[1] + list(range(5, b_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
30,Mill Island,Pool,Women's Store,Farmers Market,Ethiopian Restaurant,Event Service,Event Space,Factory,Falafel Restaurant,Farm,Fast Food Restaurant


In [76]:
# 4th cluster
b_merged.loc[b_merged['Cluster Labels'] == 3, b_merged.columns[[1] + list(range(5, b_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bay Ridge,Spa,Italian Restaurant,Greek Restaurant,Pizza Place,Pharmacy,American Restaurant,Chinese Restaurant,Bar,Bagel Shop,Mediterranean Restaurant
1,Bensonhurst,Chinese Restaurant,Italian Restaurant,Sushi Restaurant,Donut Shop,Ice Cream Shop,Liquor Store,Russian Restaurant,Cha Chaan Teng,Noodle House,Sporting Goods Shop
3,Greenpoint,Bar,Pizza Place,Coffee Shop,Cocktail Bar,Yoga Studio,Café,Sushi Restaurant,French Restaurant,Deli / Bodega,Tea Room
4,Gravesend,Pizza Place,Italian Restaurant,Lounge,Chinese Restaurant,Bakery,Breakfast Spot,Donut Shop,Fish Market,Music Venue,Men's Store
5,Brighton Beach,Russian Restaurant,Restaurant,Beach,Eastern European Restaurant,Mobile Phone Shop,Sushi Restaurant,Gourmet Shop,Bank,Fried Chicken Joint,Supermarket
6,Sheepshead Bay,Turkish Restaurant,Dessert Shop,Sandwich Place,Hotel,Italian Restaurant,Restaurant,Pizza Place,Outlet Store,Miscellaneous Shop,Karaoke Bar
9,Crown Heights,Pizza Place,Museum,Café,Playground,Bagel Shop,Moving Target,Bus Station,Bookstore,Supermarket,Candy Store
10,East Flatbush,Bakery,Food & Drink Shop,Chinese Restaurant,Park,Supermarket,Pharmacy,Fast Food Restaurant,Caribbean Restaurant,Department Store,Print Shop
11,Kensington,Thai Restaurant,Grocery Store,Pizza Place,Ice Cream Shop,Sandwich Place,Furniture / Home Store,Spa,Bakery,Japanese Restaurant,Mobile Phone Shop
12,Windsor Terrace,Park,Diner,Plaza,Grocery Store,Café,Butcher,Bar,Coffee Shop,Bakery,Bagel Shop


In [77]:
# 5th cluster
b_merged.loc[b_merged['Cluster Labels'] == 4, b_merged.columns[[1] + list(range(5, b_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
35,Dyker Heights,Golf Course,Bagel Shop,Cosmetics Shop,Burger Joint,Grocery Store,Farm,Ethiopian Restaurant,Event Service,Event Space,Factory
