## Introduction:

I am planning on opening a restaurant in New York and hoping to find some area that has all the necesseties to make it a success. 

## Problem Statement:

looking for the best possible option that suits my restaurant requirements and generate best possible sales based on the location and its environment.

## Requirements:

1. Analyze and discover areas that are densely populated in New York.
2. Make sure that average per capita income of that area is above poverty line and thriving.
3. Easy access to customers and great visibility
4. Cheapest and readily available resources in the area.


## Install required libraries

In [None]:
!pip install geopy
!pip install BeautifulSoup4

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
import urllib.request
import json
from bs4 import BeautifulSoup
from urllib.request import urlopen
import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import matplotlib.colors as colors
%matplotlib inline
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes
import folium
import urllib

print('Libraries imported.')

In [2]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

Data downloaded!


## Data Transformation and Wrangling

In [3]:
neighborhoods_data = newyork_data['features']
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [4]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


## Geocoding through Geopy

In [5]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [6]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


## Display Locations on Map

In [7]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Borough'], manhattan_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [8]:
def getNearbyVenues(names, latitudes, longitudes, radius=5000, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude',  
                  'Venue Category']
    
    except:
        print(url)
        print(response)
        print(results)
        print(nearby_venues)

    return(nearby_venues)

## Using FourSquare API to get desired location details.

In [9]:
LIMIT = 500 
radius = 1000 
CLIENT_ID = 'GPNLONQFTDH1PRG3WUMKW4CDYT510ITGIPJD3UX2CPJB1EGD'
CLIENT_SECRET = 'D4BNEZXIRAWMXCTTJFEA21SM4B5W50OE5EZAQPOQJ2Y1QL2B'
VERSION = '20210317'

In [10]:
#https://developer.foursquare.com/docs/resources/categories
#Food = 4d4b7105d754a06374d81259
neighborhoods = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
newyork_venues_food = getNearbyVenues(names=neighborhoods['Neighborhood'], latitudes=neighborhoods['Latitude'], longitudes=neighborhoods['Longitude'], radius=1000, categoryIds='4d4b7105d754a06374d81259')
newyork_venues_food.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Marble Hill,40.876551,-73.91066,Dunkin',40.879296,-73.905135,Café
1,Marble Hill,40.876551,-73.91066,Bello Deli,40.869624,-73.917666,Deli / Bodega
2,Marble Hill,40.876551,-73.91066,Twin Donut,40.871077,-73.914474,Donut Shop
3,Marble Hill,40.876551,-73.91066,Dunkin',40.877136,-73.906666,Donut Shop
4,Marble Hill,40.876551,-73.91066,Saber Halal Food,40.872161,-73.908727,Food Truck


In [11]:
newyork_venues_food.shape

(2000, 7)

In [12]:
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Neighborhood'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [13]:
map_newyork_food = folium.Map(location=[latitude, longitude], zoom_start=10)
addToMap(newyork_venues_food, 'red', map_newyork_food)
map_newyork_food

In [14]:
def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('Neighborhood').count()
    
    for n in startDf['Neighborhood']:
        try:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = 0

In [15]:
manhattan_grouped = newyork_venues_food.groupby('Neighborhood').count()
manhattan_grouped
#print('There are {} uniques categories.'.format(len(newyork_venues_sushi['Venue Category'].unique())))

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Battery Park City,50,50,50,50,50,50
Carnegie Hill,50,50,50,50,50,50
Central Harlem,50,50,50,50,50,50
Chelsea,50,50,50,50,50,50
Chinatown,50,50,50,50,50,50
Civic Center,50,50,50,50,50,50
Clinton,50,50,50,50,50,50
East Harlem,50,50,50,50,50,50
East Village,50,50,50,50,50,50
Financial District,50,50,50,50,50,50


## One Hot Encoding for Location Analysis

In [16]:
# one hot encoding
manhattan_onehot = pd.get_dummies(newyork_venues_food[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighborhood'] = newyork_venues_food['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns]

manhattan_onehot.head()

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Asian Restaurant,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,Beer Garden,Bookstore,Breakfast Spot,Bubble Tea Shop,Burger Joint,Cafeteria,Café,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Caucasian Restaurant,Chinese Restaurant,Churrascaria,Club House,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Convenience Store,Cuban Restaurant,Deli / Bodega,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Egyptian Restaurant,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Fast Food Restaurant,Filipino Restaurant,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gastropub,German Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Halal Restaurant,Health Food Store,Hobby Shop,Hot Dog Joint,Hotel,Ice Cream Shop,Indian Restaurant,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Latin American Restaurant,Lounge,Mac & Cheese Joint,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Moroccan Restaurant,New American Restaurant,Noodle House,Office,Paella Restaurant,Pastry Shop,Peruvian Restaurant,Pet Café,Pizza Place,Poke Place,Pub,Ramen Restaurant,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Snack Place,South Indian Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theme Restaurant,Turkish Restaurant,Udon Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wings Joint
0,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [17]:
manhattan_grouped = manhattan_onehot.groupby('Neighborhood').mean().reset_index()
manhattan_grouped

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Asian Restaurant,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,Beer Garden,Bookstore,Breakfast Spot,Bubble Tea Shop,Burger Joint,Cafeteria,Café,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Caucasian Restaurant,Chinese Restaurant,Churrascaria,Club House,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Convenience Store,Cuban Restaurant,Deli / Bodega,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Egyptian Restaurant,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Fast Food Restaurant,Filipino Restaurant,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gastropub,German Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Halal Restaurant,Health Food Store,Hobby Shop,Hot Dog Joint,Hotel,Ice Cream Shop,Indian Restaurant,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Latin American Restaurant,Lounge,Mac & Cheese Joint,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Moroccan Restaurant,New American Restaurant,Noodle House,Office,Paella Restaurant,Pastry Shop,Peruvian Restaurant,Pet Café,Pizza Place,Poke Place,Pub,Ramen Restaurant,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Snack Place,South Indian Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theme Restaurant,Turkish Restaurant,Udon Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wings Joint
0,Battery Park City,0.0,0.02,0.0,0.0,0.02,0.02,0.04,0.02,0.02,0.0,0.0,0.02,0.06,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.18,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.06,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0
1,Carnegie Hill,0.0,0.02,0.0,0.0,0.0,0.08,0.06,0.08,0.0,0.0,0.0,0.0,0.02,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Harlem,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.04,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.08,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.04,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02
3,Chelsea,0.0,0.1,0.0,0.0,0.0,0.04,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.02,0.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.06,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0
4,Chinatown,0.0,0.0,0.02,0.02,0.0,0.0,0.14,0.0,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.06,0.0,0.02,0.04,0.12,0.0,0.0,0.0,0.0,0.02,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Civic Center,0.0,0.02,0.02,0.0,0.0,0.02,0.1,0.0,0.0,0.0,0.0,0.04,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.02,0.14,0.0,0.0,0.0,0.0,0.04,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.04,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0
6,Clinton,0.0,0.06,0.0,0.0,0.0,0.06,0.06,0.02,0.0,0.0,0.0,0.0,0.04,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.28,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,East Harlem,0.0,0.02,0.0,0.0,0.02,0.04,0.08,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.08,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.12,0.0,0.0,0.0,0.06,0.02,0.0,0.04,0.1,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,East Village,0.0,0.02,0.0,0.0,0.0,0.04,0.08,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.02,0.14,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.06,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.02,0.0,0.02,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0
9,Financial District,0.0,0.0,0.0,0.0,0.02,0.02,0.02,0.02,0.02,0.02,0.0,0.02,0.06,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.18,0.0,0.0,0.0,0.02,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.08,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [19]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = manhattan_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Battery Park City,Coffee Shop,Mexican Restaurant,Burger Joint,Food Court,Fast Food Restaurant,Bakery,Grocery Store,Taco Place,Salad Place,New American Restaurant
1,Carnegie Hill,Coffee Shop,Café,Bagel Shop,Bar,Bakery,Ice Cream Shop,German Restaurant,Dessert Shop,Deli / Bodega,Gourmet Shop
2,Central Harlem,Coffee Shop,Donut Shop,American Restaurant,Fried Chicken Joint,Burger Joint,Mexican Restaurant,Fast Food Restaurant,Seafood Restaurant,Caribbean Restaurant,Southern / Soul Food Restaurant
3,Chelsea,Coffee Shop,American Restaurant,Italian Restaurant,Bakery,Café,French Restaurant,New American Restaurant,Bagel Shop,Seafood Restaurant,Gourmet Shop
4,Chinatown,Bakery,Coffee Shop,Dim Sum Restaurant,Chinese Restaurant,Bookstore,Italian Restaurant,Bubble Tea Shop,Food Court,Pizza Place,Thai Restaurant


## Forming clusters of Neighbourhood

In [20]:
# set number of clusters
kclusters = 5

manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([4, 1, 0, 1, 2, 2, 1, 0, 0, 4], dtype=int32)

In [21]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

manhattan_merged = manhattan_data
manhattan_merged = manhattan_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

manhattan_merged.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Manhattan,Marble Hill,40.876551,-73.91066,0,Donut Shop,Deli / Bodega,Bagel Shop,Diner,Pizza Place,Café,Fried Chicken Joint,Coffee Shop,Mexican Restaurant,Food Truck
1,Manhattan,Chinatown,40.715618,-73.994279,2,Bakery,Coffee Shop,Dim Sum Restaurant,Chinese Restaurant,Bookstore,Italian Restaurant,Bubble Tea Shop,Food Court,Pizza Place,Thai Restaurant
2,Manhattan,Washington Heights,40.851903,-73.9369,0,Deli / Bodega,Café,Pizza Place,New American Restaurant,Latin American Restaurant,Donut Shop,Coffee Shop,Sandwich Place,Ramen Restaurant,Seafood Restaurant
3,Manhattan,Inwood,40.867684,-73.92121,0,Deli / Bodega,Restaurant,Fast Food Restaurant,Café,Bakery,Latin American Restaurant,Spanish Restaurant,Mexican Restaurant,Donut Shop,Coffee Shop
4,Manhattan,Hamilton Heights,40.823604,-73.949688,0,Coffee Shop,American Restaurant,Fried Chicken Joint,Fast Food Restaurant,Ethiopian Restaurant,Pizza Place,Cocktail Bar,Italian Restaurant,Donut Shop,Spanish Restaurant


In [22]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [23]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Marble Hill,Donut Shop,Deli / Bodega,Bagel Shop,Diner,Pizza Place,Café,Fried Chicken Joint,Coffee Shop,Mexican Restaurant,Food Truck
2,Washington Heights,Deli / Bodega,Café,Pizza Place,New American Restaurant,Latin American Restaurant,Donut Shop,Coffee Shop,Sandwich Place,Ramen Restaurant,Seafood Restaurant
3,Inwood,Deli / Bodega,Restaurant,Fast Food Restaurant,Café,Bakery,Latin American Restaurant,Spanish Restaurant,Mexican Restaurant,Donut Shop,Coffee Shop
4,Hamilton Heights,Coffee Shop,American Restaurant,Fried Chicken Joint,Fast Food Restaurant,Ethiopian Restaurant,Pizza Place,Cocktail Bar,Italian Restaurant,Donut Shop,Spanish Restaurant
5,Manhattanville,Coffee Shop,American Restaurant,Fried Chicken Joint,Sandwich Place,Deli / Bodega,Spanish Restaurant,Italian Restaurant,Fast Food Restaurant,Bakery,Donut Shop
6,Central Harlem,Coffee Shop,Donut Shop,American Restaurant,Fried Chicken Joint,Burger Joint,Mexican Restaurant,Fast Food Restaurant,Seafood Restaurant,Caribbean Restaurant,Southern / Soul Food Restaurant
7,East Harlem,Coffee Shop,Donut Shop,Bakery,Fast Food Restaurant,Café,Deli / Bodega,Sandwich Place,Diner,Bagel Shop,Pizza Place
19,East Village,Coffee Shop,Pizza Place,Bakery,Deli / Bodega,Italian Restaurant,Fast Food Restaurant,Bagel Shop,Sandwich Place,Café,French Restaurant
25,Manhattan Valley,Coffee Shop,Café,Bagel Shop,Pizza Place,Italian Restaurant,Donut Shop,Bakery,Indian Restaurant,Diner,Food Truck
37,Stuyvesant Town,Coffee Shop,Deli / Bodega,Bagel Shop,Bakery,Seafood Restaurant,Korean Restaurant,American Restaurant,Fast Food Restaurant,Italian Restaurant,Pizza Place


In [24]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Upper East Side,Coffee Shop,Bakery,Café,French Restaurant,Burger Joint,American Restaurant,Thai Restaurant,Italian Restaurant,Salad Place,Health Food Store
9,Yorkville,Coffee Shop,Bagel Shop,Bar,Bakery,Café,American Restaurant,Burger Joint,Ice Cream Shop,Food Truck,French Restaurant
10,Lenox Hill,Coffee Shop,Bakery,French Restaurant,Café,American Restaurant,Food Truck,Thai Restaurant,Burger Joint,Salad Place,Dessert Shop
12,Upper West Side,Coffee Shop,Café,American Restaurant,Bagel Shop,Salad Place,Bakery,Dumpling Restaurant,French Restaurant,Comfort Food Restaurant,Italian Restaurant
13,Lincoln Square,Coffee Shop,Bakery,Café,Mexican Restaurant,American Restaurant,Salad Place,Fast Food Restaurant,Taco Place,Bagel Shop,French Restaurant
14,Clinton,Coffee Shop,Café,American Restaurant,Bagel Shop,Bakery,Burger Joint,Taco Place,Food Court,Poke Place,Salad Place
17,Chelsea,Coffee Shop,American Restaurant,Italian Restaurant,Bakery,Café,French Restaurant,New American Restaurant,Bagel Shop,Seafood Restaurant,Gourmet Shop
24,West Village,Coffee Shop,Italian Restaurant,Bakery,French Restaurant,American Restaurant,New American Restaurant,Café,Donut Shop,Cafeteria,Bagel Shop
26,Morningside Heights,Coffee Shop,American Restaurant,Pizza Place,Café,Bagel Shop,Bakery,Italian Restaurant,Bar,Burger Joint,Salad Place
30,Carnegie Hill,Coffee Shop,Café,Bagel Shop,Bar,Bakery,Ice Cream Shop,German Restaurant,Dessert Shop,Deli / Bodega,Gourmet Shop


In [25]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Chinatown,Bakery,Coffee Shop,Dim Sum Restaurant,Chinese Restaurant,Bookstore,Italian Restaurant,Bubble Tea Shop,Food Court,Pizza Place,Thai Restaurant
18,Greenwich Village,Coffee Shop,Italian Restaurant,Bakery,Café,Bagel Shop,Cocktail Bar,New American Restaurant,Cafeteria,French Restaurant,Thai Restaurant
20,Lower East Side,Bakery,Pizza Place,Vietnamese Restaurant,Bagel Shop,Coffee Shop,Chinese Restaurant,Food Court,Fast Food Restaurant,Italian Restaurant,Thai Restaurant
21,Tribeca,Coffee Shop,Bakery,Italian Restaurant,Japanese Restaurant,Cocktail Bar,French Restaurant,Bubble Tea Shop,New American Restaurant,Dessert Shop,Cafeteria
22,Little Italy,Coffee Shop,Bakery,Chinese Restaurant,Thai Restaurant,Food Court,Dessert Shop,French Restaurant,Bookstore,Cafeteria,Cocktail Bar
23,Soho,Coffee Shop,Bakery,Italian Restaurant,French Restaurant,Chinese Restaurant,Cocktail Bar,Dessert Shop,Cafeteria,Japanese Restaurant,Dim Sum Restaurant
31,Noho,Coffee Shop,Bakery,Pizza Place,Italian Restaurant,Cocktail Bar,Chinese Restaurant,Fast Food Restaurant,Food Court,Bookstore,French Restaurant
32,Civic Center,Coffee Shop,Bakery,Chinese Restaurant,Dim Sum Restaurant,Italian Restaurant,Bubble Tea Shop,Restaurant,French Restaurant,Food Court,Thai Restaurant


In [26]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 3, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Midtown,Coffee Shop,Korean Restaurant,Bakery,American Restaurant,Steakhouse,Bagel Shop,Donut Shop,Burger Joint,Fast Food Restaurant,Churrascaria
16,Murray Hill,Korean Restaurant,Coffee Shop,Café,American Restaurant,Bakery,Steakhouse,Burger Joint,Japanese Restaurant,Pastry Shop,Pizza Place
27,Gramercy,Coffee Shop,Korean Restaurant,American Restaurant,Bakery,Deli / Bodega,Italian Restaurant,Chinese Restaurant,Pizza Place,Café,Fast Food Restaurant
33,Midtown South,Coffee Shop,Korean Restaurant,Café,Bakery,Burger Joint,Fast Food Restaurant,Italian Restaurant,Steakhouse,Japanese Restaurant,Bagel Shop
36,Tudor City,Coffee Shop,Korean Restaurant,American Restaurant,Bakery,Donut Shop,Café,Burger Joint,Mexican Restaurant,Fast Food Restaurant,Chinese Restaurant
38,Flatiron,Coffee Shop,Bakery,American Restaurant,Korean Restaurant,Italian Restaurant,Donut Shop,French Restaurant,Café,Bagel Shop,Thai Restaurant


In [27]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 4, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Roosevelt Island,Coffee Shop,Sushi Restaurant,Mexican Restaurant,Chinese Restaurant,Diner,Café,Deli / Bodega,Burger Joint,Thai Restaurant,Pizza Place
28,Battery Park City,Coffee Shop,Mexican Restaurant,Burger Joint,Food Court,Fast Food Restaurant,Bakery,Grocery Store,Taco Place,Salad Place,New American Restaurant
29,Financial District,Coffee Shop,Mexican Restaurant,Burger Joint,Fast Food Restaurant,Food Court,New American Restaurant,Taco Place,Grocery Store,Salad Place,Cocktail Bar
