# NYC Clustering Analysis

## Zachary Avant

### Libraries

In [1]:
%%capture installation_output 
# Assumed that common data science libraries are already installed.
import sys

import numpy as np

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json 

!{sys.executable} -m pip install geopy;
!{sys.executable} -m pip install geopandas;
!{sys.executable} -m pip install geocoder;
from geopy.geocoders import Nominatim;
import geopandas
import geocoder

import requests 
from pandas.io.json import json_normalize
from bs4 import BeautifulSoup 

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

!{sys.executable} -m pip install folium;
import folium


### Read and Clean Data

In [65]:
nyc_df = pd.read_csv('Median_Incomes.csv',header=4)
nyc_df.head()

Unnamed: 0,Location,Household Type,TimeFrame,DataFormat,Data,Fips
0,New York City,All Households,2005,Dollars,56998.60927,3651000
1,New York City,Families,2005,Dollars,64793.69467,3651000
2,New York City,Families with Children,2005,Dollars,55448.77832,3651000
3,New York City,Families without Children,2005,Dollars,72309.25932,3651000
4,New York City,All Households,2006,Dollars,59080.75625,3651000


In [66]:
nyc_df_neighborhoods = pd.read_csv('https://data.beta.nyc/dataset/0ff93d2d-90ba-457c-9f7e-39e47bf2ac5f/resource/7caac650-d082-4aea-9f9b-3681d568e8a5/download/nyc_zip_borough_neighborhoods_pop.csv')
nyc_df_neighborhoods.head()

Unnamed: 0,zip,borough,post_office,neighborhood,population,density
0,10001,Manhattan,"New York, NY",Chelsea and Clinton,21102,33959
1,10002,Manhattan,"New York, NY",Lower East Side,81410,92573
2,10003,Manhattan,"New York, NY",Lower East Side,56024,97188
3,10004,Manhattan,"New York, NY",Lower Manhattan,3089,5519
4,10005,Manhattan,"New York, NY",Lower Manhattan,7135,97048


In [67]:
nyc_df = nyc_df.loc[3900:]
nyc_df = nyc_df.loc[(nyc_df['TimeFrame'] == 2019) & (nyc_df['Household Type'] == 'All Households')]
nyc_df = nyc_df.reset_index().drop(['index','Household Type','TimeFrame', 'DataFormat','Fips'], axis=1)
nyc_df.rename(columns={'Location':'Zip Code', 'Data':'Median Income'},inplace=True)
for index, zip_code in enumerate(nyc_df['Zip Code']):
    nyc_df['Zip Code'][index] = int(zip_code.split()[2])
nyc_df.head()

Unnamed: 0,Zip Code,Median Income
0,10001,92840
1,10002,36982
2,10003,118161
3,10004,190223
4,10005,189702


In [68]:
nyc_df_neighborhoods.drop(['post_office'], axis=1, inplace=True)
nyc_df_neighborhoods.rename(columns={'zip':'Zip Code', 'borough':'Borough','neighborhood':'Neighborhood','population':'Population','density':'Density'}, inplace=True)
nyc_df_neighborhoods.head()

Unnamed: 0,Zip Code,Borough,Neighborhood,Population,Density
0,10001,Manhattan,Chelsea and Clinton,21102,33959
1,10002,Manhattan,Lower East Side,81410,92573
2,10003,Manhattan,Lower East Side,56024,97188
3,10004,Manhattan,Lower Manhattan,3089,5519
4,10005,Manhattan,Lower Manhattan,7135,97048


In [69]:
nyc_df = nyc_df.join(nyc_df_neighborhoods.set_index('Zip Code'), on='Zip Code')

In [70]:
nyc_df.head()

Unnamed: 0,Zip Code,Median Income,Borough,Neighborhood,Population,Density
0,10001,92840,Manhattan,Chelsea and Clinton,21102.0,33959.0
1,10002,36982,Manhattan,Lower East Side,81410.0,92573.0
2,10003,118161,Manhattan,Lower East Side,56024.0,97188.0
3,10004,190223,Manhattan,Lower Manhattan,3089.0,5519.0
4,10005,189702,Manhattan,Lower Manhattan,7135.0,97048.0


In [71]:
# Calculate average density to fill NaN
total = 0
for density in nyc_df['Density']:
    try:
        int(density) # Trying to cast NaN will raise an error and "skip" that entry, as intended
        total+=density
    except:
        pass
avg_density = total/len(nyc_df['Density'])
print(avg_density)

42245.6408839779


In [72]:
nyc_df['Density'].fillna(avg_density, inplace=True)

In [73]:
nyc_df.shape

(181, 6)

### Find Geospatial Data using ArcGIS

In [74]:
nyc_df['Latitude'] = None
nyc_df['Longitude'] = None

In [75]:
for index, zip_code in enumerate(nyc_df['Zip Code']):
    lat_long_coordinates = None
    loop_counter = 0
    
    while(lat_long_coordinates == None):
        address = '{}, New York, New York'.format(zip_code)
        location = geocoder.arcgis(address)
        lat_long_coordinates = location.latlng
        
        if (loop_counter >= 10):
            print('Infinite loop -- aborting')
            break
        else:
            print(' .', end='')
        loop_counter += 1
    latitude, longitude = lat_long_coordinates[0], lat_long_coordinates[1]
    nyc_df['Latitude'][index] = latitude
    nyc_df['Longitude'][index] = longitude
print('\nGeospatial data downloaded.')

 .

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nyc_df['Latitude'][index] = latitude
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nyc_df['Longitude'][index] = longitude


 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
Geospatial data downloaded.


In [76]:
nyc_df.head()

Unnamed: 0,Zip Code,Median Income,Borough,Neighborhood,Population,Density,Latitude,Longitude
0,10001,92840,Manhattan,Chelsea and Clinton,21102.0,33959.0,40.751,-73.9981
1,10002,36982,Manhattan,Lower East Side,81410.0,92573.0,40.7165,-73.9884
2,10003,118161,Manhattan,Lower East Side,56024.0,97188.0,40.7324,-73.9891
3,10004,190223,Manhattan,Lower Manhattan,3089.0,5519.0,40.704,-74.0122
4,10005,189702,Manhattan,Lower Manhattan,7135.0,97048.0,40.7058,-74.0077


### Map of NYC using Folium

In [77]:
nyc_location = geocoder.arcgis('New York, New York')

In [78]:
print("New York City's latitude and longitude:", nyc_location.latlng)

New York City's latitude and longitude: [40.71455000000003, -74.00713999999994]


In [382]:
# Create map
map_nyc = folium.Map(location= nyc_location.latlng, zoom_start=10)

# Populate map with markers
for latitude, longitude, zip_code, borough, neighborhood in zip(nyc_df['Latitude'], nyc_df['Longitude'],nyc_df['Zip Code'], nyc_df['Borough'], nyc_df['Neighborhood']):
    label = '{}, {}, {}'.format(zip_code, neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude], 
        radius=4,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=.7,
        parse_html=False).add_to(map_nyc)
# Display map
map_nyc

## Foursquare

#### Import and inspect data acquired via Foursquare

In [123]:
nyc_grouped = pd.read_csv('nyc_foursquare_data')
nyc_grouped.head()

Unnamed: 0,Zip Code,Median Income,ATM,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,Auto Dealership,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Basketball Stadium,Bath House,Beach,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bike Trail,Bistro,Board Shop,Boat or Ferry,Bookstore,Border Crossing,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Buffet,Building,Burger Joint,Burmese Restaurant,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Caucasian Restaurant,Check Cashing Service,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Circus,Climbing Gym,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Arts Building,College Bookstore,College Cafeteria,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cooking School,Cosmetics Shop,Costume Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Cycle Studio,Czech Restaurant,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Doctor's Office,Dog Run,Donut Shop,Dosa Place,Dry Cleaner,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Service,Event Space,Exhibit,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Service,Food Stand,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Gluten-free Restaurant,Golf Course,Golf Driving Range,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gymnastics Gym,Halal Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Heliport,High School,Historic Site,History Museum,Hockey Field,Home Service,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,IT Services,Ice Cream Shop,Imported Food Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Indonesian Restaurant,Intersection,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Kitchen Supply Store,Korean Restaurant,Kosher Restaurant,Lake,Latin American Restaurant,Laundromat,Laundry Service,Lawyer,Leather Goods Store,Lebanese Restaurant,Library,Lingerie Store,Liquor Store,Locksmith,Lounge,Luggage Store,Malay Restaurant,Market,Martial Arts School,Massage Studio,Mattress Store,Medical Center,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Motel,Motorcycle Shop,Movie Theater,Moving Target,Multiplex,Museum,Music School,Music Store,Music Venue,Nail Salon,New American Restaurant,Nightclub,Non-Profit,Noodle House,North Indian Restaurant,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Nightlife,Other Repair Shop,Outdoor Gym,Outdoor Sculpture,Outdoor Supply Store,Outdoors & Recreation,Outlet Store,Paella Restaurant,Pakistani Restaurant,Paper / Office Supplies Store,Park,Parking,Pedestrian Plaza,Peking Duck Restaurant,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Peruvian Roast Chicken Joint,Pet Café,Pet Service,Pet Store,Pharmacy,Photography Studio,Physical Therapist,Piano Bar,Pie Shop,Pier,Pilates Studio,Pizza Place,Platform,Playground,Plaza,Poke Place,Pool,Pool Hall,Post Office,Pub,Public Art,Puerto Rican Restaurant,Ramen Restaurant,Record Shop,Recording Studio,Recreation Center,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Roller Rink,Roof Deck,Russian Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shabu-Shabu Restaurant,Shanghai Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Ski Lodge,Smoke Shop,Smoothie Shop,Snack Place,Soba Restaurant,Soccer Field,Social Club,Soup Place,South American Restaurant,South Indian Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Spiritual Center,Sporting Goods Shop,Sports Bar,Sports Club,Stables,State / Provincial Park,Stationery Store,Steakhouse,Storage Facility,Street Art,Strip Club,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Tech Startup,Tennis Court,Tennis Stadium,Tex-Mex Restaurant,Thai Restaurant,Theater,Theme Park,Theme Restaurant,Thrift / Vintage Store,Tiki Bar,Tourist Information Center,Toy / Game Store,Track,Trail,Train,Train Station,Tree,Tunnel,Turkish Restaurant,Udon Restaurant,Ukrainian Restaurant,Used Bookstore,Vape Store,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Waste Facility,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo
0,10001,92840,0.0,0.0,0.0,0.0,0.022989,0.0,0.0,0.0,0.0,0.011494,0.045977,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022989,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045977,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.057471,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.022989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.022989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.022989,0.011494,0.034483,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057471,0.011494,0.0,0.0,0.011494,0.0,0.0,0.0,0.022989,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.011494,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022989,0.0,0.011494,0.0,0.011494,0.0,0.011494,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.022989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.011494,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.011494,0.0,0.0,0.011494,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.011494,0.0,0.0,0.011494,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0
1,10002,36982,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.05,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0
2,10003,118161,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.03,0.01,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.05,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0
3,10004,190223,0.0,0.010526,0.0,0.0,0.031579,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.063158,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.042105,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021053,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.042105,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.010526,0.010526,0.021053,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021053,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.010526,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.010526,0.010526,0.0,0.021053,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021053,0.0,0.021053,0.0,0.0,0.0,0.0,0.0,0.0,0.031579,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.010526,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.031579,0.0,0.0,0.0,0.0,0.0,0.0,0.021053,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.031579,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.021053,0.0,0.010526,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021053,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.010526,0.0,0.0
4,10005,189702,0.0,0.01,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.03,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.02,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0


In [124]:
neighborhood_venues_sorted = pd.read_csv('nyc_top_10_venues')
neighborhood_venues_sorted.head()

Unnamed: 0,Zip Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,10001,Hotel,Coffee Shop,Art Gallery,Café,Music Venue,Gym / Fitness Center,Burger Joint,Pizza Place,Furniture / Home Store,Indie Theater
1,10002,Bakery,Coffee Shop,American Restaurant,Café,Mexican Restaurant,Bar,Cocktail Bar,Chinese Restaurant,Boutique,Asian Restaurant
2,10003,Dessert Shop,Japanese Restaurant,Coffee Shop,Mediterranean Restaurant,Grocery Store,Ice Cream Shop,Pizza Place,Tea Room,Cosmetics Shop,Speakeasy
3,10004,Bar,Pizza Place,Mexican Restaurant,Coffee Shop,Boat or Ferry,Cocktail Bar,Hotel,Park,American Restaurant,Monument / Landmark
4,10005,Coffee Shop,American Restaurant,Italian Restaurant,Cocktail Bar,Falafel Restaurant,Pizza Place,Gym / Fitness Center,Mexican Restaurant,Juice Bar,Bar


## KMeans Clustering

#### Preprocess / Standardize

In [125]:
#nyc_grouped.insert(1, 'Median Income', nyc_df['Median Income'])
nyc_grouped.insert(2, 'Density', nyc_df['Density'])

In [281]:
from sklearn.preprocessing import StandardScaler

features = nyc_grouped.values[:,1:]
cluster_dataset = StandardScaler().fit_transform(features)
cluster_dataset

array([[ 0.39717142, -0.3144607 , -0.07474351, ..., -0.29577186,
        -0.34862321, -0.07474351],
       [-1.07797166,  1.63601085, -0.07474351, ..., -0.29577186,
        -0.34862321, -0.07474351],
       [ 1.0658688 ,  1.78958211, -0.07474351, ..., -0.29577186,
        -0.34862321, -0.07474351],
       ...,
       [-0.81818824, -0.8266866 , -0.07474351, ..., -0.29577186,
        -0.34862321, -0.07474351],
       [-0.45506715, -1.04684424, -0.07474351, ..., -0.29577186,
         3.73296534, -0.07474351],
       [ 0.17652585, -0.94721425, -0.07474351, ..., -0.29577186,
        -0.34862321, -0.07474351]])

#### Find KMeans Clusters

In [397]:
k = 8

kmeans = KMeans(init='k-means++', n_clusters=k, random_state = 0).fit(cluster_dataset)
#kmeans = KMeans(n_clusters=k, random_state = 0).fit(cluster_dataset)
kmeans.labels_

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 7, 1, 7, 7, 7, 7, 7, 1, 1, 1, 4, 7, 7, 6, 1, 1, 1, 1, 1,
       7, 7, 7, 7, 7, 4, 7, 7, 4, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 7, 7,
       7, 7, 4, 7, 7, 7, 7, 4, 7, 4, 7, 7, 7, 4, 7, 4, 7, 7, 1, 5, 7, 4,
       7, 1, 4, 1, 1, 7, 7, 1, 1, 4, 7, 1, 0, 1, 7, 7, 7, 1, 1, 3, 7, 7,
       2, 4, 1, 7, 7, 7, 7, 4, 7, 4, 1, 1, 7, 4, 7, 7, 7, 1, 7, 7, 7, 7,
       4, 4, 4, 7, 4, 7, 7, 4, 7, 7, 4, 7, 4, 1, 1, 7, 7, 7, 4, 7, 4, 7,
       7, 4, 7, 7, 7, 7, 7, 7, 4, 4, 4, 7, 4, 7, 7, 7, 4, 7, 7, 7, 4, 7,
       4, 4, 7, 7], dtype=int32)

## Map the Clusters

#### Preprocess Data

In [398]:
# Add KMeans cluster labels
neighborhood_venues_sorted.drop('Cluster Labels', axis=1, inplace=True)
neighborhood_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

# Merge nyc_df with neighborhood_venues_sorted to get the necessary feature set for mapping
nyc_merged = nyc_df.join(neighborhood_venues_sorted.set_index('Zip Code'), on='Zip Code')

In [399]:
nyc_merged.dropna(subset=['Cluster Labels'], inplace=True)
nyc_merged = nyc_merged.astype({'Cluster Labels': int})

In [404]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster, neighborhood, borough in zip(nyc_merged['Latitude'], nyc_merged['Longitude'], nyc_merged['Zip Code'], nyc_merged['Cluster Labels'], nyc_merged['Neighborhood'], nyc_merged['Borough']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster) +', {}, {}'.format(neighborhood, borough), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Results and Median Income by Cluster

In [405]:
Cluster_0 = nyc_merged.loc[nyc_merged['Cluster Labels']==0]
Cluster_0.head()

Unnamed: 0,Zip Code,Median Income,Borough,Neighborhood,Population,Density,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
101,11210,69385,Brooklyn,Flatbush,62008.0,37785.0,40.6216,-73.9505,0,Deli / Bodega,Sushi Restaurant,Pizza Place,Ice Cream Shop,Boutique,Pharmacy,Shipping Store,Coffee Shop,Bus Station,Kosher Restaurant


In [406]:
total = 0
for n in Cluster_0['Median Income']:
    n = float(n)
    total += n
print("Average median income of Cluster 0:", total/len(Cluster_0))

Average median income of Cluster 0: 69385.0


In [358]:
# Most of Manhattan
Cluster_1 = nyc_merged.loc[nyc_merged['Cluster Labels']==1]
Cluster_1.head()

Unnamed: 0,Zip Code,Median Income,Borough,Neighborhood,Population,Density,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,10001,92840,Manhattan,Chelsea and Clinton,21102.0,33959.0,40.751,-73.9981,1,Hotel,Coffee Shop,Art Gallery,Café,Music Venue,Gym / Fitness Center,Burger Joint,Pizza Place,Furniture / Home Store,Indie Theater
1,10002,36982,Manhattan,Lower East Side,81410.0,92573.0,40.7165,-73.9884,1,Bakery,Coffee Shop,American Restaurant,Café,Mexican Restaurant,Bar,Cocktail Bar,Chinese Restaurant,Boutique,Asian Restaurant
2,10003,118161,Manhattan,Lower East Side,56024.0,97188.0,40.7324,-73.9891,1,Dessert Shop,Japanese Restaurant,Coffee Shop,Mediterranean Restaurant,Grocery Store,Ice Cream Shop,Pizza Place,Tea Room,Cosmetics Shop,Speakeasy
3,10004,190223,Manhattan,Lower Manhattan,3089.0,5519.0,40.704,-74.0122,1,Bar,Pizza Place,Mexican Restaurant,Coffee Shop,Boat or Ferry,Cocktail Bar,Hotel,Park,American Restaurant,Monument / Landmark
4,10005,189702,Manhattan,Lower Manhattan,7135.0,97048.0,40.7058,-74.0077,1,Coffee Shop,American Restaurant,Italian Restaurant,Cocktail Bar,Falafel Restaurant,Pizza Place,Gym / Fitness Center,Mexican Restaurant,Juice Bar,Bar


In [359]:
total = 0
for n in Cluster_1['Median Income']:
    n = float(n)
    total += n
print("Average median income of Cluster 1:", total/len(Cluster_1))

Average median income of Cluster 1: 110665.76


In [360]:
Cluster_2 = nyc_merged.loc[nyc_merged['Cluster Labels']==2]
Cluster_2.head()

Unnamed: 0,Zip Code,Median Income,Borough,Neighborhood,Population,Density,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
111,11220,50191,Brooklyn,Sunset Park,99598.0,55603.0,40.6583,-74.0036,2,Furniture / Home Store,Coffee Shop,Bakery,Bar,Pizza Place,Sushi Restaurant,Pet Store,Restaurant,Food Court,Brewery


In [361]:
total = 0
for n in Cluster_2['Median Income']:
    n = float(n)
    total += n
print("Average median income of Cluster 2:", total/len(Cluster_2))

Average median income of Cluster 2: 50191.0


In [353]:
Cluster_3 = nyc_merged.loc[nyc_merged['Cluster Labels']==3]
Cluster_3.head()

Unnamed: 0,Zip Code,Median Income,Borough,Neighborhood,Population,Density,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
108,11217,119375,Brooklyn,Northwest Brooklyn,35881.0,47796.0,40.6869,-73.9755,3,Coffee Shop,Bar,Lounge,Flower Shop,Italian Restaurant,Cosmetics Shop,Opera House,Burger Joint,Performing Arts Venue,Playground


In [362]:
total = 0
for n in Cluster_3['Median Income']:
    n = float(n)
    total += n
print("Average median income of Cluster 3:", total/len(Cluster_3))

Average median income of Cluster 3: 119375.0


In [363]:
Cluster_4 = nyc_merged.loc[nyc_merged['Cluster Labels']==4]
Cluster_4.head()

Unnamed: 0,Zip Code,Median Income,Borough,Neighborhood,Population,Density,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
35,10039,42278,Manhattan,Central Harlem,24527.0,52965.0,40.8263,-73.9385,4,Deli / Bodega,American Restaurant,Garden,Coffee Shop,Ethiopian Restaurant,Basketball Court,Performing Arts Venue,Pool,Historic Site,Burger Joint
49,10306,80834,Staten Island,South Shore,55909.0,7481.0,40.5692,-74.1331,4,Bagel Shop,Zoo,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market
52,10309,109457,Staten Island,South Shore,32519.0,4840.0,40.522,-74.2067,4,Pet Store,Beach,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market
64,10459,29284,Bronx,Hunts Point and Mott Haven,47308.0,57907.0,40.8322,-73.8836,4,Metro Station,Mexican Restaurant,Fried Chicken Joint,Shopping Mall,Bus Station,Park,Playground,Pizza Place,Flower Shop,Farmers Market
69,10464,96366,Bronx,Southeast Bronx,4534.0,1295.0,40.8695,-73.8035,4,History Museum,Sculpture Garden,Zoo,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market


In [364]:
total = 0
for n in Cluster_4['Median Income']:
    n = float(n)
    total += n
print("Average median income of Cluster 4:", total/len(Cluster_4))

Average median income of Cluster 4: 72350.29411764706


In [366]:
Cluster_5 = nyc_merged.loc[nyc_merged['Cluster Labels']==5]
Cluster_5.head()

Unnamed: 0,Zip Code,Median Income,Borough,Neighborhood,Population,Density,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
86,11101,75581,Queens,Northwest Queens,25484.0,9744.0,40.7529,-73.9432,5,Café,Bar,Coffee Shop,Deli / Bodega,Bubble Tea Shop,Indian Restaurant,Gym,Dessert Shop,Sandwich Place,Convenience Store


In [367]:
total = 0
for n in Cluster_5['Median Income']:
    n = float(n)
    total += n
print("Average median income of Cluster 5:", total/len(Cluster_5))

Average median income of Cluster 5: 75581.0


In [368]:
Cluster_6 = nyc_merged.loc[nyc_merged['Cluster Labels']==6]
Cluster_6.head()

Unnamed: 0,Zip Code,Median Income,Borough,Neighborhood,Population,Density,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
38,10065,148441,Manhattan,Upper East Side,32270.0,83533.0,40.765,-73.964,6,Italian Restaurant,Gym,Coffee Shop,French Restaurant,Hotel,Boutique,Sporting Goods Shop,Burger Joint,Café,Clothing Store


In [369]:
total = 0
for n in Cluster_6['Median Income']:
    n = float(n)
    total += n
print("Average median income of Cluster 6:", total/len(Cluster_6))

Average median income of Cluster 6: 148441.0


In [370]:
# Upper Manhattan and most of the rest of NYC
Cluster_7 = nyc_merged.loc[nyc_merged['Cluster Labels']==7]
Cluster_7.head()

Unnamed: 0,Zip Code,Median Income,Borough,Neighborhood,Population,Density,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
25,10029,33720,Manhattan,East Harlem,76003.0,92116.0,40.7914,-73.9448,7,Thai Restaurant,Mexican Restaurant,Bakery,Park,Spa,Café,Gym,Pizza Place,Restaurant,Sandwich Place
27,10031,53660,Manhattan,Inwood and Washington Heights,56438.0,86722.0,40.8247,-73.948,7,Coffee Shop,Pizza Place,Deli / Bodega,Bar,Café,Seafood Restaurant,Juice Bar,Chinese Restaurant,Mexican Restaurant,Caribbean Restaurant
28,10032,53525,Manhattan,Inwood and Washington Heights,57331.0,87199.0,40.8379,-73.9419,7,Pizza Place,Mexican Restaurant,Coffee Shop,Bakery,Deli / Bodega,Sandwich Place,Fried Chicken Joint,Latin American Restaurant,Bank,History Museum
29,10033,63093,Manhattan,Inwood and Washington Heights,53926.0,89201.0,40.8498,-73.936,7,Grocery Store,Bakery,Café,Pizza Place,Mobile Phone Shop,Spanish Restaurant,Park,Donut Shop,Chinese Restaurant,Lounge
30,10034,54406,Manhattan,Inwood and Washington Heights,38908.0,34544.0,40.867,-73.9199,7,Mexican Restaurant,Café,Restaurant,Lounge,Park,Bakery,Chinese Restaurant,Caribbean Restaurant,Pizza Place,Wine Bar


In [371]:
total = 0
for n in Cluster_7['Median Income']:
    n = float(n)
    total += n
print("Average median income of Cluster 7:", total/len(Cluster_7))

Average median income of Cluster 7: 61058.10989010989
