# Where to open a new Chinese Restaurant in New York

## Import libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2020.4.5.2 |       hecda079_0         147 KB  conda-forge
    certifi-2020.4.5.2         |   py36h9f0ad1d_0         152 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.22.0               |     pyh9f0ad1d_0          63 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         395 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-1.22.0-pyh9f0ad1d_0

The following packages will b

## Open  and clean New York Borough/zip mapping file 
### data downloaded from Kaggle

In [2]:
df = pd.read_csv('datasets_41589_68656_zip_borough.csv')
df.head()

Unnamed: 0,zip,borough
0,10001,Manhattan
1,10002,Manhattan
2,10003,Manhattan
3,10004,Manhattan
4,10005,Manhattan


In [3]:
df=df[['borough','zip']]
df=df.rename(columns={'zip':'ZIPCodes'})
df.head()

Unnamed: 0,borough,ZIPCodes
0,Manhattan,10001
1,Manhattan,10002
2,Manhattan,10003
3,Manhattan,10004
4,Manhattan,10005


## Download and clean New York Demographic information by Zip file 

In [4]:
!wget -q -O 'newyork_data.json' https://data.cityofnewyork.us/resource/kku6-nxdu.json
print('Data downloaded!') 

Data downloaded!


In [5]:
with open('newyork_data.json') as json_data:
    newyorkdemo = json.load(json_data)

In [6]:
newyorkdemo=pd.DataFrame(newyorkdemo)

In [7]:
newyorkdemo.head()

Unnamed: 0,jurisdiction_name,count_participants,count_female,percent_female,count_male,percent_male,count_gender_unknown,percent_gender_unknown,count_gender_total,percent_gender_total,count_pacific_islander,percent_pacific_islander,count_hispanic_latino,percent_hispanic_latino,count_american_indian,percent_american_indian,count_asian_non_hispanic,percent_asian_non_hispanic,count_white_non_hispanic,percent_white_non_hispanic,count_black_non_hispanic,percent_black_non_hispanic,count_other_ethnicity,percent_other_ethnicity,count_ethnicity_unknown,percent_ethnicity_unknown,count_ethnicity_total,percent_ethnicity_total,count_permanent_resident_alien,percent_permanent_resident_alien,count_us_citizen,percent_us_citizen,count_other_citizen_status,percent_other_citizen_status,count_citizen_status_unknown,percent_citizen_status_unknown,count_citizen_status_total,percent_citizen_status_total,count_receives_public_assistance,percent_receives_public_assistance,count_nreceives_public_assistance,percent_nreceives_public_assistance,count_public_assistance_unknown,percent_public_assistance_unknown,count_public_assistance_total,percent_public_assistance_total
0,10001,44,22,0.5,22,0.5,0,0,44,100,0,0,16,0.36,0,0,3,0.07,1,0.02,21,0.48,3,0.07,0,0,44,100,2,0.05,42,0.95,0,0,0,0,44,100,20,0.45,24,0.55,0,0,44,100
1,10002,35,19,0.54,16,0.46,0,0,35,100,0,0,1,0.03,0,0,28,0.8,6,0.17,0,0.0,0,0.0,0,0,35,100,2,0.06,33,0.94,0,0,0,0,35,100,2,0.06,33,0.94,0,0,35,100
2,10003,1,1,1.0,0,0.0,0,0,1,100,0,0,0,0.0,0,0,1,1.0,0,0.0,0,0.0,0,0.0,0,0,1,100,0,0.0,1,1.0,0,0,0,0,1,100,0,0.0,1,1.0,0,0,1,100
3,10004,0,0,0.0,0,0.0,0,0,0,0,0,0,0,0.0,0,0,0,0.0,0,0.0,0,0.0,0,0.0,0,0,0,0,0,0.0,0,0.0,0,0,0,0,0,0,0,0.0,0,0.0,0,0,0,0
4,10005,2,2,1.0,0,0.0,0,0,2,100,0,0,0,0.0,0,0,1,0.5,0,0.0,1,0.5,0,0.0,0,0,2,100,1,0.5,1,0.5,0,0,0,0,2,100,0,0.0,2,1.0,0,0,2,100


In [8]:
newyorkdemo1=newyorkdemo[['jurisdiction_name','count_participants','count_pacific_islander','count_hispanic_latino','count_american_indian','count_asian_non_hispanic','count_white_non_hispanic','count_black_non_hispanic','count_other_ethnicity','count_ethnicity_unknown']]

In [9]:
newyorkdemo1=newyorkdemo1.rename(columns={'jurisdiction_name':'ZIPCodes'})
newyorkdemo1=newyorkdemo1.astype(int)
newyorkdemo1.dtypes

ZIPCodes                    int64
count_participants          int64
count_pacific_islander      int64
count_hispanic_latino       int64
count_american_indian       int64
count_asian_non_hispanic    int64
count_white_non_hispanic    int64
count_black_non_hispanic    int64
count_other_ethnicity       int64
count_ethnicity_unknown     int64
dtype: object

In [10]:
df_merge=pd.merge(df,newyorkdemo1,on='ZIPCodes')
df_merge.shape

(176, 11)

In [11]:
df_merge=df_merge.groupby(['borough']).sum()
df_merge

Unnamed: 0_level_0,ZIPCodes,count_participants,count_pacific_islander,count_hispanic_latino,count_american_indian,count_asian_non_hispanic,count_white_non_hispanic,count_black_non_hispanic,count_other_ethnicity,count_ethnicity_unknown
borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Bronx,251105,680,2,306,3,13,14,309,29,4
Brooklyn,426403,1236,0,48,0,44,943,123,70,8
Manhattan,461960,224,0,66,0,62,11,76,8,1
Queens,648938,83,0,11,0,3,53,12,3,1
Staten,113373,1,0,0,0,1,0,0,0,0


In [12]:
df_merge=df_merge.drop('ZIPCodes',axis=1)

In [13]:
df_per=df_merge.div(df_merge['count_participants'].values,axis=0)

In [14]:
df_per.sort_values(by='count_asian_non_hispanic',ascending=False)

Unnamed: 0_level_0,count_participants,count_pacific_islander,count_hispanic_latino,count_american_indian,count_asian_non_hispanic,count_white_non_hispanic,count_black_non_hispanic,count_other_ethnicity,count_ethnicity_unknown
borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Staten,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
Manhattan,1.0,0.0,0.294643,0.0,0.276786,0.049107,0.339286,0.035714,0.004464
Queens,1.0,0.0,0.13253,0.0,0.036145,0.638554,0.144578,0.036145,0.012048
Brooklyn,1.0,0.0,0.038835,0.0,0.035599,0.762945,0.099515,0.056634,0.006472
Bronx,1.0,0.002941,0.45,0.004412,0.019118,0.020588,0.454412,0.042647,0.005882


### will pick Manhattan

In [15]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


In [16]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [17]:
neighborhoods_data = newyork_data['features']

In [18]:
neighborhoods_data[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

In [19]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [20]:
neighborhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude


In [22]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [23]:
neighborhoods=neighborhoods[neighborhoods['Borough']=='Manhattan']
neighborhoods.reset_index(drop = True, inplace = True)
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


In [24]:
print('Manhattan has {} neighborhoods.'.format(
        len(neighborhoods['Neighborhood'].unique())
    )
)

Manhattan has 40 neighborhoods.


## Analysis Venues in different neighborhood

In [21]:
CLIENT_ID = 'R1YIF5QR5PHH1UCWYO0OVJRFOUWZWDJ4PFBGKMP5MMPWFLR2' 
CLIENT_SECRET = 'GLLDHVINWJHVVHCLLKDHLYD3ARQW2AD1DLIDIRAFNHOTOJ3A' 
VERSION = '20200604'

In [25]:
def getNearbyVenues(names, latitudes, longitudes):
    radius=500
    LIMIT=100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
manhattan_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )

Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards


In [27]:
manhattan_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Marble Hill,40.876551,-73.91066,Arturo's,40.874412,-73.910271,Pizza Place
1,Marble Hill,40.876551,-73.91066,Bikram Yoga,40.876844,-73.906204,Yoga Studio
2,Marble Hill,40.876551,-73.91066,Tibbett Diner,40.880404,-73.908937,Diner
3,Marble Hill,40.876551,-73.91066,Starbucks,40.877531,-73.905582,Coffee Shop
4,Marble Hill,40.876551,-73.91066,Dunkin',40.877136,-73.906666,Donut Shop


In [75]:
manhattan_venues.groupby('Neighborhood').count().reset_index()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Battery Park City,66,66,66,66,66,66
1,Carnegie Hill,87,87,87,87,87,87
2,Central Harlem,45,45,45,45,45,45
3,Chelsea,100,100,100,100,100,100
4,Chinatown,100,100,100,100,100,100
5,Civic Center,99,99,99,99,99,99
6,Clinton,100,100,100,100,100,100
7,East Harlem,40,40,40,40,40,40
8,East Village,100,100,100,100,100,100
9,Financial District,100,100,100,100,100,100


In [45]:
manhattan_onehot = pd.get_dummies(manhattan_venues[['Venue Category']], prefix="", prefix_sep="")
manhattan_onehot.insert(loc=0, column='Neighborhood', value=manhattan_venues['Neighborhood'])
manhattan_onehot.shape

(3121, 332)

In [46]:
manhattan_grouped = manhattan_onehot.groupby('Neighborhood').mean().reset_index()
manhattan_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Basketball Stadium,Beer Bar,Beer Garden,Beer Store,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bike Trail,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Bridal Shop,Bridge,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Station,Bus Stop,Butcher,Cafeteria,Café,Cambodian Restaurant,Camera Store,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Caucasian Restaurant,Cha Chaan Teng,Cheese Shop,Chinese Restaurant,Chocolate Shop,Circus,Climbing Gym,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Arts Building,College Bookstore,College Cafeteria,College Theater,Comedy Club,Comfort Food Restaurant,Community Center,Concert Hall,Convenience Store,Cooking School,Cosmetics Shop,Creperie,Cuban Restaurant,Cultural Center,Cupcake Shop,Cycle Studio,Czech Restaurant,Dance Studio,Daycare,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doctor's Office,Dog Run,Donut Shop,Drugstore,Dry Cleaner,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space,Exhibit,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Financial or Legal Service,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,General Entertainment,German Restaurant,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Heliport,High School,Himalayan Restaurant,Historic Site,History Museum,Hobby Shop,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Kitchen Supply Store,Korean Restaurant,Kosher Restaurant,Latin American Restaurant,Laundry Service,Lebanese Restaurant,Library,Lingerie Store,Liquor Store,Lounge,Mac & Cheese Joint,Malay Restaurant,Market,Martial Arts Dojo,Massage Studio,Mattress Store,Medical Center,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target,Museum,Music School,Music Venue,Nail Salon,New American Restaurant,Newsstand,Nightclub,Non-Profit,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Outdoor Sculpture,Outdoors & Recreation,Paella Restaurant,Paper / Office Supplies Store,Park,Pedestrian Plaza,Performing Arts Venue,Perfume Shop,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Service,Pet Store,Pharmacy,Photography Studio,Physical Therapist,Pie Shop,Pier,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Pool,Pub,Public Art,Ramen Restaurant,Record Shop,Rental Car Location,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Rock Club,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shanghai Restaurant,Shipping Store,Shoe Repair,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soba Restaurant,Soccer Field,Soup Place,South American Restaurant,South Indian Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Steakhouse,Street Art,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Tech Startup,Temple,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Tiki Bar,Tourist Information Center,Toy / Game Store,Track,Trail,Train Station,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Battery Park City,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.015152,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.015152,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.015152,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.060606,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.045455,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.121212,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.030303,0.030303,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,0.0,0.0
1,Carnegie Hill,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.011494,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.022989,0.0,0.022989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.045977,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022989,0.091954,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.022989,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.022989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.011494,0.0,0.022989,0.034483,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.011494,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.034483,0.0,0.022989,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.011494,0.0,0.0,0.0,0.011494,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.011494,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.022989,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.022989,0.0,0.0,0.0,0.011494,0.034483,0.0,0.0,0.034483
2,Central Harlem,0.0,0.0,0.0,0.066667,0.044444,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.022222,0.0,0.022222,0.0,0.0,0.044444,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.044444,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.044444,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.044444,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044444,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Chelsea,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.02,0.08,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.01,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0
4,Chinatown,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.04,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.04,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.03,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01


In [54]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [55]:
num_top_venues = 15

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = manhattan_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
0,Battery Park City,Park,Hotel,Coffee Shop,Gym,Memorial Site,Playground,Gourmet Shop,Food Court,Mexican Restaurant,Shopping Mall,Boat or Ferry,Plaza,Electronics Store,Garden,Steakhouse
1,Carnegie Hill,Coffee Shop,Café,Yoga Studio,Bookstore,Gym / Fitness Center,Gym,Italian Restaurant,Pizza Place,Wine Shop,Vietnamese Restaurant,Bakery,Bar,Shipping Store,Cocktail Bar,Grocery Store
2,Central Harlem,African Restaurant,Chinese Restaurant,Seafood Restaurant,Bar,French Restaurant,Gym / Fitness Center,American Restaurant,Park,Cafeteria,Library,Bookstore,Boutique,Market,Cycle Studio,Spa
3,Chelsea,Coffee Shop,Art Gallery,Ice Cream Shop,Café,Bakery,American Restaurant,Cocktail Bar,Theater,Italian Restaurant,Bar,Market,Pizza Place,Nightclub,Cycle Studio,Cupcake Shop
4,Chinatown,Chinese Restaurant,Bakery,Cocktail Bar,Bubble Tea Shop,Spa,Bar,Ice Cream Shop,Coffee Shop,American Restaurant,Optical Shop,Malay Restaurant,Mexican Restaurant,Salon / Barbershop,Sandwich Place,Noodle House


In [49]:
kclusters = 5

manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([1, 1, 0, 0, 0, 1, 1, 2, 0, 1, 1, 0, 3, 2, 3, 2, 3, 3, 1, 0, 0, 2,
       2, 1, 1, 2, 1, 0, 2, 3, 4, 0, 3, 2, 3, 3, 0, 2, 3, 0], dtype=int32)

In [56]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

manhattan_merged = neighborhoods

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
manhattan_merged = manhattan_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

manhattan_merged.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
0,Manhattan,Marble Hill,40.876551,-73.91066,2,Sandwich Place,Gym,Coffee Shop,Yoga Studio,Pharmacy,Supplement Shop,Steakhouse,Seafood Restaurant,Pizza Place,Deli / Bodega,Bank,Department Store,Diner,Discount Store,Donut Shop
1,Manhattan,Chinatown,40.715618,-73.994279,0,Chinese Restaurant,Bakery,Cocktail Bar,Bubble Tea Shop,Spa,Bar,Ice Cream Shop,Coffee Shop,American Restaurant,Optical Shop,Malay Restaurant,Mexican Restaurant,Salon / Barbershop,Sandwich Place,Noodle House
2,Manhattan,Washington Heights,40.851903,-73.9369,2,Café,Bakery,Mobile Phone Shop,Mexican Restaurant,Donut Shop,Latin American Restaurant,Supermarket,Tapas Restaurant,Sandwich Place,Bank,Chinese Restaurant,Pizza Place,Coffee Shop,Spanish Restaurant,Grocery Store
3,Manhattan,Inwood,40.867684,-73.92121,2,Lounge,Mexican Restaurant,Restaurant,Bakery,Café,Frozen Yogurt Shop,Spanish Restaurant,Caribbean Restaurant,Chinese Restaurant,Park,Deli / Bodega,Wine Bar,American Restaurant,Pizza Place,Diner
4,Manhattan,Hamilton Heights,40.823604,-73.949688,2,Pizza Place,Coffee Shop,Deli / Bodega,Café,Mexican Restaurant,Sandwich Place,Sushi Restaurant,Cocktail Bar,Bakery,Yoga Studio,Chinese Restaurant,Caribbean Restaurant,Indian Restaurant,School,Seafood Restaurant


In [57]:
address = 'Manhattan,NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 40.7896239, -73.9598939.


In [58]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# cluster 1

In [59]:
cluster1=manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]
cluster1

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
1,Chinatown,Chinese Restaurant,Bakery,Cocktail Bar,Bubble Tea Shop,Spa,Bar,Ice Cream Shop,Coffee Shop,American Restaurant,Optical Shop,Malay Restaurant,Mexican Restaurant,Salon / Barbershop,Sandwich Place,Noodle House
6,Central Harlem,African Restaurant,Chinese Restaurant,Seafood Restaurant,Bar,French Restaurant,Gym / Fitness Center,American Restaurant,Park,Cafeteria,Library,Bookstore,Boutique,Market,Cycle Studio,Spa
9,Yorkville,Coffee Shop,Italian Restaurant,Gym,Bar,Deli / Bodega,Sushi Restaurant,Pizza Place,Wine Shop,Mexican Restaurant,Japanese Restaurant,Ice Cream Shop,Bakery,Sandwich Place,Chinese Restaurant,Park
12,Upper West Side,Italian Restaurant,Bar,Indian Restaurant,Wine Bar,Dessert Shop,Vegetarian / Vegan Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Pizza Place,Pub,Ice Cream Shop,Coffee Shop,Seafood Restaurant,Café
17,Chelsea,Coffee Shop,Art Gallery,Ice Cream Shop,Café,Bakery,American Restaurant,Cocktail Bar,Theater,Italian Restaurant,Bar,Market,Pizza Place,Nightclub,Cycle Studio,Cupcake Shop
19,East Village,Bar,Mexican Restaurant,Cocktail Bar,Coffee Shop,Pizza Place,Speakeasy,Wine Bar,Juice Bar,Ice Cream Shop,Seafood Restaurant,Italian Restaurant,Bakery,Bagel Shop,Gourmet Shop,Greek Restaurant
20,Lower East Side,Chinese Restaurant,Cocktail Bar,Bakery,Café,Ramen Restaurant,Art Gallery,Coffee Shop,Yoga Studio,French Restaurant,Mediterranean Restaurant,Bubble Tea Shop,Flower Shop,Filipino Restaurant,Sandwich Place,Clothing Store
25,Manhattan Valley,Coffee Shop,Yoga Studio,Pizza Place,Mexican Restaurant,Bar,Park,Spa,Bubble Tea Shop,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Playground,Peruvian Restaurant,Ethiopian Restaurant
27,Gramercy,Bar,Pizza Place,Coffee Shop,Bagel Shop,Mexican Restaurant,Cocktail Bar,American Restaurant,Italian Restaurant,Grocery Store,Playground,Sandwich Place,Taco Place,Comedy Club,Spa,Diner
31,Noho,Pizza Place,Coffee Shop,Grocery Store,Japanese Restaurant,Italian Restaurant,Sushi Restaurant,Mexican Restaurant,Wine Bar,Wine Shop,Hotel,Candy Store,Rock Club,Sandwich Place,Southern / Soul Food Restaurant,Café


# cluster 2

In [60]:
cluster2=manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]
cluster2

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
14,Clinton,Theater,Gym / Fitness Center,Italian Restaurant,Coffee Shop,Hotel,Gym,Cocktail Bar,Pizza Place,Thai Restaurant,American Restaurant,Sandwich Place,Wine Shop,Performing Arts Venue,Spa,Lounge
15,Midtown,Hotel,Coffee Shop,Bakery,Theater,Pizza Place,Cuban Restaurant,Sushi Restaurant,Japanese Restaurant,Clothing Store,Burger Joint,Tailor Shop,Cosmetics Shop,Sporting Goods Shop,Salon / Barbershop,Steakhouse
16,Murray Hill,Hotel,Sandwich Place,Pizza Place,Coffee Shop,Gym / Fitness Center,Mediterranean Restaurant,Japanese Restaurant,Jewish Restaurant,Deli / Bodega,Bar,Chinese Restaurant,Juice Bar,Café,Grocery Store,American Restaurant
22,Little Italy,Bakery,Spa,Bubble Tea Shop,Italian Restaurant,Ice Cream Shop,Pizza Place,Mediterranean Restaurant,Salon / Barbershop,Thai Restaurant,Cosmetics Shop,Café,Sandwich Place,Seafood Restaurant,Japanese Restaurant,Chinese Restaurant
28,Battery Park City,Park,Hotel,Coffee Shop,Gym,Memorial Site,Playground,Gourmet Shop,Food Court,Mexican Restaurant,Shopping Mall,Boat or Ferry,Plaza,Electronics Store,Garden,Steakhouse
29,Financial District,Coffee Shop,Pizza Place,Hotel,American Restaurant,Italian Restaurant,Café,Mexican Restaurant,Sandwich Place,Cocktail Bar,Steakhouse,Event Space,Falafel Restaurant,Bar,Juice Bar,Park
30,Carnegie Hill,Coffee Shop,Café,Yoga Studio,Bookstore,Gym / Fitness Center,Gym,Italian Restaurant,Pizza Place,Wine Shop,Vietnamese Restaurant,Bakery,Bar,Shipping Store,Cocktail Bar,Grocery Store
32,Civic Center,Coffee Shop,Hotel,Cocktail Bar,Spa,Park,American Restaurant,Café,French Restaurant,Gym / Fitness Center,Bakery,Sporting Goods Shop,Hotel Bar,Gym,Yoga Studio,Laundry Service
33,Midtown South,Korean Restaurant,Hotel,Dessert Shop,Burger Joint,Coffee Shop,Japanese Restaurant,Café,American Restaurant,Gym / Fitness Center,Hotel Bar,New American Restaurant,Spa,Scenic Lookout,Sandwich Place,Cocktail Bar
38,Flatiron,Gym / Fitness Center,Italian Restaurant,Mediterranean Restaurant,Café,Coffee Shop,New American Restaurant,Spa,Japanese Restaurant,Gym,Toy / Game Store,Yoga Studio,Park,Furniture / Home Store,Gift Shop,Juice Bar


# cluster 3

In [61]:
cluster3=manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]
cluster3

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
0,Marble Hill,Sandwich Place,Gym,Coffee Shop,Yoga Studio,Pharmacy,Supplement Shop,Steakhouse,Seafood Restaurant,Pizza Place,Deli / Bodega,Bank,Department Store,Diner,Discount Store,Donut Shop
2,Washington Heights,Café,Bakery,Mobile Phone Shop,Mexican Restaurant,Donut Shop,Latin American Restaurant,Supermarket,Tapas Restaurant,Sandwich Place,Bank,Chinese Restaurant,Pizza Place,Coffee Shop,Spanish Restaurant,Grocery Store
3,Inwood,Lounge,Mexican Restaurant,Restaurant,Bakery,Café,Frozen Yogurt Shop,Spanish Restaurant,Caribbean Restaurant,Chinese Restaurant,Park,Deli / Bodega,Wine Bar,American Restaurant,Pizza Place,Diner
4,Hamilton Heights,Pizza Place,Coffee Shop,Deli / Bodega,Café,Mexican Restaurant,Sandwich Place,Sushi Restaurant,Cocktail Bar,Bakery,Yoga Studio,Chinese Restaurant,Caribbean Restaurant,Indian Restaurant,School,Seafood Restaurant
5,Manhattanville,Coffee Shop,Deli / Bodega,Seafood Restaurant,Italian Restaurant,Park,Chinese Restaurant,Mexican Restaurant,Bus Station,Gym / Fitness Center,Café,American Restaurant,Bus Stop,Climbing Gym,Spanish Restaurant,Falafel Restaurant
7,East Harlem,Mexican Restaurant,Bakery,Thai Restaurant,Sandwich Place,Latin American Restaurant,Deli / Bodega,Pharmacy,Steakhouse,Cocktail Bar,French Restaurant,Beer Bar,Grocery Store,Liquor Store,Street Art,Seafood Restaurant
11,Roosevelt Island,Deli / Bodega,Park,Farmers Market,Food & Drink Shop,Metro Station,Supermarket,School,Scenic Lookout,Sandwich Place,Gym / Fitness Center,Dry Cleaner,Liquor Store,Soccer Field,Coffee Shop,Kosher Restaurant
26,Morningside Heights,Park,Coffee Shop,American Restaurant,Bookstore,Deli / Bodega,Burger Joint,Sandwich Place,Paper / Office Supplies Store,Seafood Restaurant,Optical Shop,Salad Place,Garden,New American Restaurant,Supermarket,Greek Restaurant
36,Tudor City,Café,Park,Mexican Restaurant,Pizza Place,Sushi Restaurant,Deli / Bodega,Thai Restaurant,Dog Run,Seafood Restaurant,Garden,Greek Restaurant,Vietnamese Restaurant,Gym,Gym / Fitness Center,Shanghai Restaurant


# cluster 4

In [62]:
cluster4=manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 3, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]
cluster4

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
8,Upper East Side,Italian Restaurant,Coffee Shop,Gym / Fitness Center,Bakery,French Restaurant,Spa,Juice Bar,Yoga Studio,American Restaurant,Wine Shop,Hotel,Sushi Restaurant,Pizza Place,Exhibit,Women's Store
10,Lenox Hill,Coffee Shop,Pizza Place,Italian Restaurant,Sushi Restaurant,Café,Cocktail Bar,Gym / Fitness Center,Gym,Burger Joint,Art Gallery,Bakery,Turkish Restaurant,Cycle Studio,Thai Restaurant,Salad Place
13,Lincoln Square,Plaza,Italian Restaurant,Café,Gym / Fitness Center,Theater,Performing Arts Venue,Concert Hall,Wine Shop,French Restaurant,Indie Movie Theater,American Restaurant,Coffee Shop,Gym,Bakery,Clothing Store
18,Greenwich Village,Italian Restaurant,Café,Sushi Restaurant,Bar,Dessert Shop,Seafood Restaurant,Caribbean Restaurant,Sandwich Place,Spa,Chinese Restaurant,Clothing Store,Coffee Shop,Pizza Place,Comedy Club,Burger Joint
21,Tribeca,Park,American Restaurant,Italian Restaurant,Café,Wine Bar,Greek Restaurant,Spa,Coffee Shop,Scenic Lookout,Skate Park,Bakery,Bar,Hotel,Playground,Steakhouse
23,Soho,Italian Restaurant,Sandwich Place,Coffee Shop,Mediterranean Restaurant,Spa,Clothing Store,Salon / Barbershop,Bakery,Café,Ice Cream Shop,Seafood Restaurant,Gym,Dessert Shop,Wine Bar,Sushi Restaurant
24,West Village,Italian Restaurant,Wine Bar,American Restaurant,Pizza Place,Park,Jazz Club,Cocktail Bar,New American Restaurant,Bakery,Coffee Shop,Sandwich Place,Gourmet Shop,Theater,Ice Cream Shop,Cosmetics Shop
35,Turtle Bay,Italian Restaurant,Café,Park,Sushi Restaurant,Coffee Shop,Seafood Restaurant,French Restaurant,Wine Bar,Deli / Bodega,Karaoke Bar,Garden,American Restaurant,Plaza,Japanese Restaurant,Pharmacy
39,Hudson Yards,Hotel,Italian Restaurant,American Restaurant,Gym / Fitness Center,Park,Burger Joint,Coffee Shop,Dog Run,Gym,Nightclub,Bar,Peruvian Restaurant,Comedy Club,Residential Building (Apartment / Condo),Building


# cluster 5

In [63]:
cluster5=manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 4, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]
cluster5

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
37,Stuyvesant Town,Park,Fountain,Heliport,Pet Service,Coffee Shop,Cocktail Bar,Skating Rink,Farmers Market,Gas Station,Bistro,Baseball Field,Bar,Gym / Fitness Center,Harbor / Marina,Playground


## Would like to pick a Neighborhood similar to ChinaTown

In [64]:
cluster1_nei=cluster1['Neighborhood']

In [65]:
count=manhattan_venues.groupby('Neighborhood').count()

In [67]:
nei_count=pd.merge(cluster1_nei,count,on='Neighborhood')
nei_count

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Chinatown,100,100,100,100,100,100
1,Central Harlem,45,45,45,45,45,45
2,Yorkville,100,100,100,100,100,100
3,Upper West Side,82,82,82,82,82,82
4,Chelsea,100,100,100,100,100,100
5,East Village,100,100,100,100,100,100
6,Lower East Side,48,48,48,48,48,48
7,Manhattan Valley,40,40,40,40,40,40
8,Gramercy,82,82,82,82,82,82
9,Noho,100,100,100,100,100,100


In [68]:
For_ANA=nei_count[['Neighborhood','Venue']].sort_values(by='Venue', ascending=False)
For_ANA=For_ANA.reset_index(drop=True)
For_ANA1=For_ANA.head()
For_ANA1

Unnamed: 0,Neighborhood,Venue
0,Chinatown,100
1,Yorkville,100
2,Chelsea,100
3,East Village,100
4,Noho,100


In [69]:
target=['Chinatown','Yorkville','Chelsea','East Village','Noho']
manhattan_venues1=manhattan_venues[manhattan_venues['Neighborhood'].isin(target)]
manhattan_venues1.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
25,Chinatown,40.715618,-73.994279,Cheeky Sandwiches,40.715821,-73.99183,Sandwich Place
26,Chinatown,40.715618,-73.994279,Kiki's,40.714476,-73.992036,Greek Restaurant
27,Chinatown,40.715618,-73.994279,Hotel 50 Bowery NYC,40.715936,-73.996789,Hotel
28,Chinatown,40.715618,-73.994279,Renew Day Spa,40.715559,-73.996747,Spa
29,Chinatown,40.715618,-73.994279,Michaeli Bakery,40.714704,-73.991847,Bakery


In [70]:
manhattan_venues2=manhattan_venues1.loc[manhattan_venues1['Venue Category'].str.contains('Restaurant')]
manhattan_venues2.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
26,Chinatown,40.715618,-73.994279,Kiki's,40.714476,-73.992036,Greek Restaurant
33,Chinatown,40.715618,-73.994279,The Fat Radish,40.715323,-73.99195,English Restaurant
35,Chinatown,40.715618,-73.994279,Da Yu Hot Pot 大渝火锅,40.716735,-73.995752,Hotpot Restaurant
36,Chinatown,40.715618,-73.994279,Forgtmenot,40.714459,-73.991546,New American Restaurant
41,Chinatown,40.715618,-73.994279,Cervo's,40.714763,-73.991455,Spanish Restaurant


In [72]:
manhattan_venues2.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Chelsea,17,17,17,17,17,17
Chinatown,36,36,36,36,36,36
East Village,35,35,35,35,35,35
Noho,32,32,32,32,32,32
Yorkville,29,29,29,29,29,29


In [73]:
manhattan_onehot2 = pd.get_dummies(manhattan_venues2[['Venue Category']], prefix="", prefix_sep="")
manhattan_onehot2.insert(loc=0, column='Neighborhood', value=manhattan_venues2['Neighborhood'])
manhattan_onehot2.shape

(149, 47)

In [74]:
manhattan_grouped2 = manhattan_onehot2.groupby('Neighborhood').sum().reset_index()
manhattan_grouped2.head()

Unnamed: 0,Neighborhood,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,Austrian Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,English Restaurant,Filipino Restaurant,French Restaurant,German Restaurant,Greek Restaurant,Himalayan Restaurant,Hotpot Restaurant,Indian Restaurant,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Moroccan Restaurant,New American Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Scandinavian Restaurant,Seafood Restaurant,Shanghai Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Sushi Restaurant,Swiss Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Vietnamese Restaurant
0,Chelsea,3,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,2,0,0,0,0,1,0,1,0,1,0,1,1,0,2,0,0,0,1,0,0,0,1,0,0,0,0
1,Chinatown,3,0,0,2,1,1,0,7,2,1,1,0,0,0,2,0,1,0,0,1,0,1,0,2,0,2,0,0,1,0,0,0,0,0,2,0,1,0,0,1,0,1,0,1,0,2
2,East Village,1,1,1,0,0,0,1,1,0,1,0,2,0,0,2,0,0,0,0,2,2,1,0,0,0,5,0,1,1,0,1,0,1,2,0,0,0,1,1,1,1,1,0,2,0,2
3,Noho,1,0,1,2,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,4,4,0,1,0,0,3,0,0,2,0,0,0,0,1,0,2,0,3,0,0,0,2,0,1,1,0
4,Yorkville,0,0,0,1,0,0,0,2,0,0,0,0,1,1,0,0,0,1,0,6,3,0,1,0,0,3,0,0,1,1,0,0,0,0,0,0,0,4,0,0,0,1,1,0,0,2


### Choose Noho to open a new Chinese restaurant