In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [2]:
!wget -q -O 'newyork_data.json' https://geo.nyu.edu/download/file/nyu-2451-34572-geojson.json
print('Data downloaded!')

Data downloaded!


In [3]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

neighborhoods_data = newyork_data['features']

In [4]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [5]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [6]:
#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

In [7]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [8]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    color = 'blue'
    if borough=='Manhattan':
        color='red'
    folium.CircleMarker(
        [lat, lng],
        radius=3,

        popup=label,
        color=None,
        fill=True,
        fill_color=color,
        fill_opacity=0.8,
        parse_html=False).add_to(map_newyork)  
map_newyork.save('map.html')
map_newyork

In [9]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


In [11]:
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 40.7896239, -73.9598939.


In [12]:
import os
CLIENT_ID = os.environ['foursquare_ClientId'] # your Foursquare ID
CLIENT_SECRET = os.environ['foursquare_ClientSecret'] # your Foursquare Secret
VERSION = '20180604'
LIMIT = 50

In [13]:
neighborhood_latitude = manhattan_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = manhattan_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = manhattan_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Marble Hill are 40.87655077879964, -73.91065965862981.


In [14]:
# type your answer here
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&section=trending&categoryId=4d4b7105d754a06374d81259'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)



In [15]:
results = requests.get(url).json()

In [16]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['shortName']

In [17]:
venues = results['response']['groups'][0]['items']

In [18]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues = np.round(nearby_venues,2)
nearby_venues.head(5)

Unnamed: 0,name,categories,lat,lng
0,Arturo's,Pizza,40.87,-73.91
1,Pick Up Six: Asian Kitchen,Asian,40.88,-73.91
2,Boston Market,American,40.88,-73.91
3,SUBWAY,Sandwiches,40.88,-73.91
4,Dunkin',Donuts,40.88,-73.91


In [19]:
with open('marble_hill_venues.tex','w') as f:
    f.write(nearby_venues.head().to_latex(index=False))

In [20]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

13 venues were returned by Foursquare.


In [33]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&section=trending&categoryId=4d4b7105d754a06374d81259'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['shortName']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [34]:
# type your answer here

manhattan_venues = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude']
                                  )



Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards


In [35]:
print(manhattan_venues.shape)
manhattan_venues.head()

(1830, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Marble Hill,40.876551,-73.91066,Arturo's,40.874412,-73.910271,Pizza
1,Marble Hill,40.876551,-73.91066,Pick Up Six: Asian Kitchen,40.878075,-73.907033,Asian
2,Marble Hill,40.876551,-73.91066,Boston Market,40.87743,-73.905412,American
3,Marble Hill,40.876551,-73.91066,SUBWAY,40.878493,-73.905385,Sandwiches
4,Marble Hill,40.876551,-73.91066,Dunkin',40.877136,-73.906666,Donuts


In [36]:
manhattan_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Battery Park City,32,32,32,32,32,32
Carnegie Hill,50,50,50,50,50,50
Central Harlem,46,46,46,46,46,46
Chelsea,50,50,50,50,50,50
Chinatown,50,50,50,50,50,50
Civic Center,50,50,50,50,50,50
Clinton,50,50,50,50,50,50
East Harlem,49,49,49,49,49,49
East Village,50,50,50,50,50,50
Financial District,50,50,50,50,50,50


In [25]:
print('There are {} uniques categories.'.format(len(manhattan_venues['Venue Category'].unique())))

There are 104 uniques categories.


In [37]:
# one hot encoding
manhattan_onehot = pd.get_dummies(manhattan_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighborhood'] = manhattan_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns]

manhattan_onehot.head()

Unnamed: 0,Neighborhood,African,American,Argentinian,Asian,Australian,Austrian,BBQ,Bagels,Bakery,Belgian,Bistro,Brazilian,Breakfast,Burgers,Burritos,Cafeteria,Café,Cajun / Creole,Caribbean,Caucasian,Chinese,Creperie,Cuban,Czech,Deli / Bodega,Dim Sum,Diner,Donuts,Dumplings,Eastern European,Egyptian Restaurant,Empanada,English,Ethiopian,Falafel,Fast Food,Filipino,Food,Food Court,Food Truck,French,Fried Chicken,Gastropub,German,Gluten-free,Greek,Hawaiian,Himalayan,Hot Dogs,Hotpot,Indian,Indonesian,Irish,Israeli,Italian,Japanese,Japanese Curry,Korean,Kosher,Latin American,Mac & Cheese,Malay,Mediterranean,Mexican,Middle Eastern,Modern European,Molecular Gastronomy,Moroccan,New American,Noodles,North Indian,Paella,Pakistani,Peruvian,Pet Café,Pizza,Poke Place,Ramen,Restaurant,Russian,Salad,Sandwiches,Scandinavian,Seafood,Shanghai,Snacks,Soup,South Indian,Southern / Soul,Spanish,Steakhouse,Sushi,Szechuan,Tacos,Taiwanese,Tapas,Tex-Mex,Thai,Tonkatsu,Turkish,Udon,Vegetarian / Vegan,Vietnamese,Wings
0,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Marble Hill,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Marble Hill,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [38]:
manhattan_onehot.shape

(1830, 105)

In [39]:
manhattan_grouped = manhattan_onehot.groupby('Neighborhood').mean().reset_index()
manhattan_grouped

Unnamed: 0,Neighborhood,African,American,Argentinian,Asian,Australian,Austrian,BBQ,Bagels,Bakery,Belgian,Bistro,Brazilian,Breakfast,Burgers,Burritos,Cafeteria,Café,Cajun / Creole,Caribbean,Caucasian,Chinese,Creperie,Cuban,Czech,Deli / Bodega,Dim Sum,Diner,Donuts,Dumplings,Eastern European,Egyptian Restaurant,Empanada,English,Ethiopian,Falafel,Fast Food,Filipino,Food,Food Court,Food Truck,French,Fried Chicken,Gastropub,German,Gluten-free,Greek,Hawaiian,Himalayan,Hot Dogs,Hotpot,Indian,Indonesian,Irish,Israeli,Italian,Japanese,Japanese Curry,Korean,Kosher,Latin American,Mac & Cheese,Malay,Mediterranean,Mexican,Middle Eastern,Modern European,Molecular Gastronomy,Moroccan,New American,Noodles,North Indian,Paella,Pakistani,Peruvian,Pet Café,Pizza,Poke Place,Ramen,Restaurant,Russian,Salad,Sandwiches,Scandinavian,Seafood,Shanghai,Snacks,Soup,South Indian,Southern / Soul,Spanish,Steakhouse,Sushi,Szechuan,Tacos,Taiwanese,Tapas,Tex-Mex,Thai,Tonkatsu,Turkish,Udon,Vegetarian / Vegan,Vietnamese,Wings
0,Battery Park City,0.0,0.03125,0.0,0.0,0.0,0.0,0.0625,0.0,0.03125,0.0,0.03125,0.0,0.0,0.0625,0.03125,0.0,0.03125,0.0,0.0,0.0,0.09375,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.03125,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Carnegie Hill,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.12,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.12,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0
2,Central Harlem,0.065217,0.043478,0.0,0.0,0.0,0.0,0.021739,0.021739,0.021739,0.0,0.0,0.0,0.021739,0.021739,0.0,0.021739,0.021739,0.0,0.065217,0.0,0.086957,0.0,0.0,0.0,0.108696,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.086957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.065217,0.0,0.0,0.021739,0.0,0.0,0.043478,0.0,0.065217,0.0,0.0,0.0,0.0,0.065217,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.021739,0.0,0.0,0.0,0.0,0.021739,0.0,0.0
3,Chelsea,0.0,0.06,0.0,0.02,0.0,0.0,0.0,0.02,0.1,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.08,0.0,0.02,0.0,0.02,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.1,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.06,0.02,0.02,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.06,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.0
4,Chinatown,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.24,0.0,0.0,0.0,0.04,0.02,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.02,0.0,0.0,0.0,0.06,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.06,0.02,0.0
5,Civic Center,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.06,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.1,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.06,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02
6,Clinton,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.02,0.0,0.0,0.06,0.0,0.02,0.02,0.0,0.02,0.0,0.0,0.16,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.1,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.02,0.0,0.04,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0
7,East Harlem,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.102041,0.0,0.0,0.0,0.0,0.040816,0.0,0.0,0.061224,0.0,0.0,0.0,0.020408,0.0,0.020408,0.0,0.102041,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040816,0.0,0.0,0.0,0.0,0.020408,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.081633,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.102041,0.0,0.0,0.020408,0.0,0.0,0.020408,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.040816,0.040816,0.0,0.0,0.020408,0.0,0.0,0.0,0.061224,0.0,0.0,0.0,0.0,0.0,0.0
8,East Village,0.0,0.06,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.08,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.12,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.02,0.0,0.04,0.0,0.0,0.02,0.06,0.04,0.0
9,Financial District,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.06,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.06,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.04,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.02,0.06,0.1,0.0,0.02,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [40]:
num_top_venues = 5

for hood in manhattan_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = manhattan_grouped[manhattan_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Battery Park City----
        venue  freq
0       Pizza  0.12
1     Italian  0.12
2     Chinese  0.09
3  Food Court  0.06
4     Burgers  0.06


----Carnegie Hill----
    venue  freq
0  Bakery  0.12
1   Pizza  0.12
2    Café  0.10
3  French  0.06
4   Sushi  0.06


----Central Harlem----
           venue  freq
0  Deli / Bodega  0.11
1  Fried Chicken  0.09
2        Chinese  0.09
3        African  0.07
4          Pizza  0.07


----Chelsea----
      venue  freq
0    Bakery  0.10
1   Italian  0.10
2      Café  0.08
3     Sushi  0.06
4  American  0.06


----Chinatown----
                venue  freq
0             Chinese  0.24
1           Dumplings  0.08
2  Vegetarian / Vegan  0.06
3             Italian  0.06
4              Bakery  0.06


----Civic Center----
        venue  freq
0  Sandwiches  0.12
1     Italian  0.10
2      French  0.08
3    American  0.08
4      Bakery  0.06


----Clinton----
           venue  freq
0  Deli / Bodega  0.16
1        Italian  0.10
2           Café  0.06
3   

In [41]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [81]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{}'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = manhattan_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st,2nd,3rd,4th,5th
0,Battery Park City,Pizza,Italian,Chinese,Sandwiches,BBQ
1,Carnegie Hill,Bakery,Pizza,Café,French,Sushi
2,Central Harlem,Deli / Bodega,Fried Chicken,Chinese,African,Southern / Soul
3,Chelsea,Italian,Bakery,Café,Pizza,Sushi
4,Chinatown,Chinese,Dumplings,Malay,Vegetarian / Vegan,Italian


In [45]:
with open('neighborhoods_venues.tex','w') as f:
    f.write(neighborhoods_venues_sorted.head().to_latex(index=False))

In [82]:
# set number of clusters
kclusters = 4

manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 1, 1, 0, 1, 2, 2, 1, 1, 2], dtype=int32)

In [83]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.predict(manhattan_grouped_clustering))

manhattan_merged = manhattan_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
manhattan_merged = manhattan_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

manhattan_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st,2nd,3rd,4th,5th
0,Manhattan,Marble Hill,40.876551,-73.91066,1,Sandwiches,Deli / Bodega,Donuts,Seafood,Diner
1,Manhattan,Chinatown,40.715618,-73.994279,1,Chinese,Dumplings,Malay,Vegetarian / Vegan,Italian
2,Manhattan,Washington Heights,40.851903,-73.9369,1,Deli / Bodega,Pizza,Mexican,Chinese,Restaurant
3,Manhattan,Inwood,40.867684,-73.92121,1,Pizza,Café,Restaurant,Mexican,Bakery
4,Manhattan,Hamilton Heights,40.823604,-73.949688,1,Pizza,Deli / Bodega,Café,Mexican,Chinese


In [84]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [85]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]].head()

Unnamed: 0,Neighborhood,1st,2nd,3rd,4th,5th
8,Upper East Side,Italian,American,French,Burgers,Sushi
10,Lenox Hill,Italian,Deli / Bodega,Pizza,Greek,Restaurant
12,Upper West Side,Italian,Bakery,Café,Mexican,French
17,Chelsea,Italian,Bakery,Café,Pizza,Sushi
18,Greenwich Village,Italian,French,Café,Sushi,Vietnamese


In [86]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st,2nd,3rd,4th,5th
0,Marble Hill,Sandwiches,Deli / Bodega,Donuts,Seafood,Diner
1,Chinatown,Chinese,Dumplings,Malay,Vegetarian / Vegan,Italian
2,Washington Heights,Deli / Bodega,Pizza,Mexican,Chinese,Restaurant
3,Inwood,Pizza,Café,Restaurant,Mexican,Bakery
4,Hamilton Heights,Pizza,Deli / Bodega,Café,Mexican,Chinese
5,Manhattanville,Chinese,Deli / Bodega,Seafood,Mexican,Sandwiches
6,Central Harlem,Deli / Bodega,Fried Chicken,Chinese,African,Southern / Soul
7,East Harlem,Mexican,Pizza,Deli / Bodega,Bakery,Latin American
9,Yorkville,Italian,Pizza,Deli / Bodega,Sandwiches,Chinese
11,Roosevelt Island,Deli / Bodega,Sandwiches,Café,Pizza,Greek


In [90]:
with open('mexican_cluster.tex','w') as f:
    f.write(manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]].to_latex(index=False))

In [87]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st,2nd,3rd,4th,5th
13,Lincoln Square,Café,Food Truck,American,Italian,French
14,Clinton,Deli / Bodega,Italian,Café,Sandwiches,Restaurant
15,Midtown,Sandwiches,Food Truck,Deli / Bodega,American,Burgers
16,Murray Hill,American,Sandwiches,Chinese,Japanese,Scandinavian
21,Tribeca,Italian,American,Bakery,Café,Burgers
24,West Village,Italian,American,Gastropub,Burgers,Steakhouse
29,Financial District,Sandwiches,Café,American,Salad,Food Truck
32,Civic Center,Sandwiches,Italian,American,French,Bakery
34,Sutton Place,Bagels,American,Italian,Chinese,Pizza
35,Turtle Bay,Italian,Café,Deli / Bodega,Food Truck,Sandwiches


In [88]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 3, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st,2nd,3rd,4th,5th
37,Stuyvesant Town,Deli / Bodega,Sandwiches,German,Bistro,Sushi
