# Capstone Project - The Battle of the Neighborhoods
#### Applied Data Science Capstone 

In [1]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import json # library to handle JSON files
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    openssl-1.1.1f             |       h516909a_0         2.1 MB  conda-forge
    branca-0.4.0               |             py_0          26 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    ------------------------------------------------------------
                       

## Step1 Download and Process Neighborhood Data for Toronto, Yorkville

In [2]:
import html5lib
import lxml

In [3]:
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M", header=0)[0]
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [4]:
df1 = df[df.Borough != 'Not assigned']
df1

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,Malvern / Rouge
11,M3B,North York,Don Mills
12,M4B,East York,Parkview Hill / Woodbine Gardens
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [5]:
func = {'Postal code':'first', 'Borough': 'sum', 'Neighborhood': 'sum'}
df2 = df1.groupby(df1['Postal code']).aggregate(func).reindex(columns=df1.columns)
df2.reset_index(drop=True, inplace=True)
df2 = df2.rename(columns={'Postal code': 'Postal Code', 'Borough': 'Borough', 'Neighborhood': 'Neighborhood'})
df2.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [6]:
df2.shape

(103, 3)

In [7]:
df_geo = pd.read_csv('https://cocl.us/Geospatial_data')

In [8]:
neighborhoods_tr = pd.merge(left=df2, right=df_geo, on='Postal Code')
neighborhoods_tr

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,Kennedy Park / Ionview / East Birchmount Park,43.727929,-79.262029
7,M1L,Scarborough,Golden Mile / Clairlea / Oakridge,43.711112,-79.284577
8,M1M,Scarborough,Cliffside / Cliffcrest / Scarborough Village West,43.716316,-79.239476
9,M1N,Scarborough,Birch Cliff / Cliffside West,43.692657,-79.264848


In [9]:
address = 'Yorkville, CA'

geolocator = Nominatim(user_agent="tr_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Yorkville are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Yorkville are 43.6713861, -79.3901677.


In [10]:
# create map of Manhattan using latitude and longitude values
map_yorkville = folium.Map(location=[latitude, longitude], zoom_start=15)  
    
map_yorkville

In [11]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods_yk = pd.DataFrame(columns=column_names)

In [12]:
borough = 'Central Toronto'
neighborhood_name = 'Yorkville_CN'
neighborhood_lat = location.latitude
neighborhood_lon = location.longitude
    
neighborhoods_yk = neighborhoods_yk.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [13]:
neighborhoods_yk

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Central Toronto,Yorkville_CN,43.671386,-79.390168


## Step2 Download Neighborhood Data for New York

In [14]:
# Down load the neighborhood data for New York
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


In [15]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [16]:
neighborhoods_data_ny = newyork_data['features']

In [17]:
# define the dataframe columns
column_names_ny = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods_ny = pd.DataFrame(columns=column_names)

In [18]:
for data in neighborhoods_data_ny:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods_ny = neighborhoods_ny.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [19]:
neighborhoods_ny.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [20]:
neighborhoods_ny.shape

(306, 4)

In [21]:
# Since we are interested in Manhattan, we create a new dataframe of the Manhattan data.
neighborhoods_mh = neighborhoods_ny[neighborhoods_ny['Borough'] == 'Manhattan'].reset_index(drop=True)
neighborhoods_mh.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


In [22]:
neighborhoods_mh.shape

(40, 4)

## Step3 Define Foursquare Credentials and Version

In [23]:
CLIENT_ID = 'S11VNEPJROJK3W11FZZAEAZ5YGPHWFADCVN2WLR02DS1NAFO' # your Foursquare ID
CLIENT_SECRET = 'SAMJUM4ABLYDECVRUVJNEHXFHBITZZEQYSQVCYKLQJO1D03Q' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: S11VNEPJROJK3W11FZZAEAZ5YGPHWFADCVN2WLR02DS1NAFO
CLIENT_SECRET:SAMJUM4ABLYDECVRUVJNEHXFHBITZZEQYSQVCYKLQJO1D03Q


# Step4 Explore Neighborhoods in Manhatten Together with Yorkville in Canada

We use Manhatten data since Manhatten is the central of New York City and it has the highest posibbility to find a neighborhood likes Yorkville in Canada.

In [24]:
# Define function to get the top 100 venues with in radius in the neighborhood list
LIMIT = 100
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [25]:
# Test the getNearbyVenues function by getting the venues in Yorkville Canada
yorkville_venues = getNearbyVenues(names=['Yorkville_CA'],
                                   latitudes=[latitude],
                                   longitudes=[longitude]
                                  )

Yorkville_CA


In [26]:
yorkville_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Yorkville_CA,43.671386,-79.390168,Trattoria Nervosa,43.671019,-79.391081,Italian Restaurant
1,Yorkville_CA,43.671386,-79.390168,Paramount Fine Foods,43.670677,-79.389865,Middle Eastern Restaurant
2,Yorkville_CA,43.671386,-79.390168,Four Seasons Hotel Toronto,43.671796,-79.389457,Hotel
3,Yorkville_CA,43.671386,-79.390168,Summer's Homemade Ice Cream,43.670903,-79.392299,Ice Cream Shop
4,Yorkville_CA,43.671386,-79.390168,Pi Co.,43.670107,-79.389852,Pizza Place


In [27]:
yorkville_venues.shape

(100, 7)

Now, lets merge the neighborhood of Yorkville Canada with New York City

In [28]:
neighborhoods_yk

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Central Toronto,Yorkville_CN,43.671386,-79.390168


In [29]:
neighborhoods_yk.shape

(1, 4)

In [30]:
neighborhoods_mh.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


In [31]:
neighborhoods_mh.shape

(40, 4)

In [32]:
frames = [neighborhoods_yk, neighborhoods_mh]
neighborhoods_ykmh = pd.concat(frames)
neighborhoods_ykmh.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Central Toronto,Yorkville_CN,43.671386,-79.390168
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121


In [33]:
neighborhoods_ykmh.shape

(41, 4)

In [34]:
# Run the above function on each neighborhood and create a new dataframe called ykmh_venues
ykmh_venues = getNearbyVenues(names=neighborhoods_ykmh['Neighborhood'],
                                   latitudes=neighborhoods_ykmh['Latitude'],
                                   longitudes=neighborhoods_ykmh['Longitude']
                                  )

Yorkville_CN
Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards


In [35]:
# check the size of the result dataframe
print(ykmh_venues.shape)
ykmh_venues.head()

(3089, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Yorkville_CN,43.671386,-79.390168,Trattoria Nervosa,43.671019,-79.391081,Italian Restaurant
1,Yorkville_CN,43.671386,-79.390168,Paramount Fine Foods,43.670677,-79.389865,Middle Eastern Restaurant
2,Yorkville_CN,43.671386,-79.390168,Four Seasons Hotel Toronto,43.671796,-79.389457,Hotel
3,Yorkville_CN,43.671386,-79.390168,Summer's Homemade Ice Cream,43.670903,-79.392299,Ice Cream Shop
4,Yorkville_CN,43.671386,-79.390168,Pi Co.,43.670107,-79.389852,Pizza Place


## Step5 Analyze Each Neighborhood in Manhatten Together with Yorkville Canada

In [36]:
# one hot encoding
ykmh_onehot = pd.get_dummies(ykmh_venues[['Venue Category']], prefix="", prefix_sep="")
# add neighborhood column back to dataframe
ykmh_onehot['Neighborhood'] = ykmh_venues['Neighborhood'] 
# move neighborhood column to the first column
fixed_columns = [ykmh_onehot.columns[-1]] + list(ykmh_onehot.columns[:-1])
ykmh_onehot = ykmh_onehot[fixed_columns]
ykmh_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,...,Video Store,Vietnamese Restaurant,Volleyball Court,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Yorkville_CN,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Yorkville_CN,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Yorkville_CN,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Yorkville_CN,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Yorkville_CN,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
ykmh_onehot.shape

(3089, 319)

In [38]:
# Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
ykmh_grouped = ykmh_onehot.groupby('Neighborhood').mean().reset_index()
ykmh_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,...,Video Store,Vietnamese Restaurant,Volleyball Court,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Battery Park City,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0
1,Carnegie Hill,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.011628,0.0,...,0.0,0.011628,0.0,0.0,0.0,0.011628,0.034884,0.0,0.0,0.034884
2,Central Harlem,0.0,0.0,0.044444,0.044444,0.0,0.0,0.0,0.0,0.022222,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Chelsea,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.13,...,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0
4,Chinatown,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01


In [39]:
ykmh_grouped.shape

(41, 319)

## Step6 Cluster Neighborhood

In [40]:
# Run k-means to cluster the neighborhood into 5 clusters.
# set number of clusters
kclusters = 5

ykmh_grouped_clustering = ykmh_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ykmh_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([3, 2, 1, 2, 1, 1, 1, 0, 2, 1, 1, 2, 2, 0, 1, 0, 2, 1, 1, 1, 1, 2,
       2, 1, 1, 0, 2, 2, 3, 2, 4, 2, 1, 0, 2, 1, 2, 0, 2, 2, 1],
      dtype=int32)

In [48]:
# add clustering labels
neighborhoods_ykmh.insert(0, 'Cluster Labels', kmeans.labels_)

neighborhoods_ykmh

Unnamed: 0,Cluster Labels,Borough,Neighborhood,Latitude,Longitude
0,3,Central Toronto,Yorkville_CN,43.671386,-79.390168
0,2,Manhattan,Marble Hill,40.876551,-73.91066
1,1,Manhattan,Chinatown,40.715618,-73.994279
2,2,Manhattan,Washington Heights,40.851903,-73.9369
3,1,Manhattan,Inwood,40.867684,-73.92121
4,1,Manhattan,Hamilton Heights,40.823604,-73.949688
5,1,Manhattan,Manhattanville,40.816934,-73.957385
6,0,Manhattan,Central Harlem,40.815976,-73.943211
7,2,Manhattan,East Harlem,40.792249,-73.944182
8,1,Manhattan,Upper East Side,40.775639,-73.960508


In [49]:
neighborhoods_ykmh.shape

(41, 5)

In [51]:
#ykmh_grouped.insert(0, 'Cluster Labels', kmeans.labels_)
ykmh_grouped

Unnamed: 0,Cluster Labels,Neighborhood,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,Argentinian Restaurant,...,Video Store,Vietnamese Restaurant,Volleyball Court,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,3,Battery Park City,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0
1,2,Carnegie Hill,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.011628,...,0.0,0.011628,0.0,0.0,0.0,0.011628,0.034884,0.0,0.0,0.034884
2,1,Central Harlem,0.0,0.0,0.044444,0.044444,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2,Chelsea,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0
4,1,Chinatown,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
5,1,Civic Center,0.0,0.0,0.0,0.022472,0.011236,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.011236,0.022472,0.0,0.0,0.033708
6,1,Clinton,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.04,0.0,0.0,0.0
7,0,East Harlem,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,2,East Village,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,...,0.0,0.03,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0
9,1,Financial District,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01


Let's create a map of Manhattan.

In [52]:
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 40.7896239, -73.9598939.


In [53]:
neighborhoods_mh2 = neighborhoods_ykmh.copy()
neighborhoods_mh2.shape

(41, 5)

In [54]:
neighborhoods_mh2 = neighborhoods_mh2[neighborhoods_mh2.Neighborhood != 'Yorkville_CN']
neighborhoods_mh2

Unnamed: 0,Cluster Labels,Borough,Neighborhood,Latitude,Longitude
0,2,Manhattan,Marble Hill,40.876551,-73.91066
1,1,Manhattan,Chinatown,40.715618,-73.994279
2,2,Manhattan,Washington Heights,40.851903,-73.9369
3,1,Manhattan,Inwood,40.867684,-73.92121
4,1,Manhattan,Hamilton Heights,40.823604,-73.949688
5,1,Manhattan,Manhattanville,40.816934,-73.957385
6,0,Manhattan,Central Harlem,40.815976,-73.943211
7,2,Manhattan,East Harlem,40.792249,-73.944182
8,1,Manhattan,Upper East Side,40.775639,-73.960508
9,1,Manhattan,Yorkville,40.77593,-73.947118


In [55]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(neighborhoods_mh2['Latitude'], neighborhoods_mh2['Longitude'], neighborhoods_mh2['Neighborhood'], neighborhoods_mh2['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Step7 Print Out the Results

In [56]:
neighborhoods_mh2.loc[neighborhoods_mh2['Cluster Labels'] == 2, neighborhoods_mh2.columns[[2] + list(range(5, neighborhoods_mh2.shape[1]))]]

Unnamed: 0,Neighborhood
0,Marble Hill
2,Washington Heights
7,East Harlem
10,Lenox Hill
11,Roosevelt Island
15,Midtown
20,Lower East Side
21,Tribeca
25,Manhattan Valley
26,Morningside Heights
