# A Comparison of Manhattan Neighborhoods and San Francisco Neighborhoods Based on Their Venues

<a id='table of contents'></a>

## Table of Contents
<div class="alert alert-block alert-info" style="margin-top: 20px">

<font size = 3>
    
<a href="#Initialization">A. Initialize the Notebook</a>    
<a href="#Introduction">B. Introduction / Business Problem</a>    
<a href="#Methodology and Data">C. Methodology and Data</a>    
<a href="#Results">D. Results</a>    
<a href="#Discussion">E. Discussion</a>    
<a href="#Conclusion">F. Conclusion</a>    
<a href="#Appendix">G. Appendix: <i>k</i>-means analysis with n_clusters = 2</a>  

</font>
</div>

<a id='Initialization'></a>

# A. Initialize the Notebook

## Set up Foursquare credentials and other constants

In [1]:
# Credentials
CLIENT_ID = 'YOUR-FOURSQUARE-CLIENT-ID'
CLIENT_SECRET = 'YOUR-FOURSQUARE-CLIENT-SECRET'

# Foursquare version used in this notebook
VERSION = '20180604'

# Other constants
# Foursquare API URI base address
URI_BASE = 'https://api.foursquare.com/v2/'
URI_AUTH = '?client_id={}&client_secret={}&v={}'.format(CLIENT_ID,
                                                        CLIENT_SECRET,
                                                        VERSION)

## Import libraries
<b>Important:</b> Make sure to uncomment the two "!conda ..." lines below if you have not yet installed the geopy or folium libraries.

In [2]:
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation
import json
import operator # Used for operator.itemgetter()

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# Uncomment the following line if you have not already run it at least once.
#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
from geopy import distance # module to compute great-circle distance between two locations

# Unomment the following line if you have not already run it at least once.
#!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

import requests # library to handle requests

print('Libraries imported.')

Libraries imported.


## Define utility functions

In [3]:
# For the given address, return a tuple of the latitude and longitude
# for that address. If address is None, then return (None, None).
def get_geo_coords(address=None):
    latitude, longitude = (None, None)

    if address is not None:
        geolocator = Nominatim(user_agent='my-capstone')
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude

    return latitude, longitude

In [4]:
# Given an address or geographical coordinates, return a folium map centered
# on the address or coordinates. Markes are added if a Pandas DataFrame with
# individual neighborhood names, latitudes, and longitudes is provided.
#
# If both address and geographical coordinates (latitude and longitude) are
# provided, the address is used, and the coordinates ignored.
def get_map_with_markers(address=None,
                         latitude=None,
                         longitude=None,
                         df_neighborhoods=None,
                         zoom=10):
    if address is None and (latitude is None or longitude is None):
        print('No address or geographical coordinages provided, a map cannot be created.')
        return None

    if address is not None:
        # Any latitude and longitude passed as arguments to this method
        # are ignored in favor of the address.
        latitude, longitude = get_geo_coords(address)
        if latitude is None or longitude is None:
            print('Geographical coordinates could not be found, a map cannot be created.')
            return None

    folium_map = folium.Map(location=[latitude, longitude], zoom_start=zoom)

    # Add a marker for the address.
    label = folium.Popup(address, parse_html=True)
    folium.CircleMarker([latitude, longitude],
                        radius=8,
                        popup=label,
                        color='red',
                        fill=True,
                        fill_color='#cc0000',
                        fill_opacity=0.7,
                        parse_html=False).add_to(folium_map)  

    # If a dataframe was provided, add markers
    if df_neighborhoods is not None:
        # Add markers to the map
        for latitude, longitude, neighborhood in zip(df_neighborhoods['Latitude'],
                                                     df_neighborhoods['Longitude'],
                                                     df_neighborhoods['Neighborhood']):
            label = '{}'.format(neighborhood)
            label = folium.Popup(label, parse_html=True)
            folium.CircleMarker([latitude, longitude],
                                radius=5,
                                popup=label,
                                color='blue',
                                fill=True,
                                fill_color='#3186cc',
                                fill_opacity=0.7,
                                parse_html=False).add_to(folium_map)  

    return folium_map

In [5]:
def get_neighborhood_venues(neighborhoods, latitudes, longitudes, radius=500):
    limit = 100
    venues_list=[]
    radii = []

    if type(radius) is int:
        # Use a list of the same provided radius
        radii = [radius] * len(neighborhoods)
    else:
        # Use the provided list of radii
        radii = radius

    for name, lat, lng, rad in zip(neighborhoods, latitudes, longitudes, radii):
        print('Exploring venues in {} with search radius {} meters'.format(name, rad))

        # create the API request URL
        url = '{}venues/explore{}&ll={},{}&radius={}&limit={}'.format(URI_BASE,
                                                                      URI_AUTH,
                                                                      lat, lng,
                                                                      rad,
                                                                      limit)

        # make the GET request
        # results = requests.get(url).json()["response"]['groups'][0]['items']
        results_temp = requests.get(url).json()
        results = results_temp['response']['groups'][0]['items']

        # return only relevant information for each nearby venue
        venues_list.append([(name,
                             lat,
                             lng,
                             v['venue']['name'],
                             v['venue']['id'],
                             v['venue']['location']['lat'],
                             v['venue']['location']['lng'],
                             v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood',
                             'Neighborhood Latitude',
                             'Neighborhood Longitude',
                             'Venue',
                             'Id',
                             'Venue Latitude',
                             'Venue Longitude',
                             'Venue Category']

    return nearby_venues, results

In [6]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [7]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

<a href="#table of contents">Go back to top</a>

<a id='Introduction'></a>

# B. Introduction / Business Problem
I work for a company that specializes in helping clients relocate from one city to another city within the United States.

Our latest client is relocating from Manhattan, a borough of New York City, to San Francisco, a city in northern California. My assignment is to research venues in the neighborhoods of each city, and identify similar neighborhoods, based on their venues.

My company will use this analysis to help their client find a San Francisco neighborhood they might like to live in, based on the neighborhoods of Manhattan that the client is already familiar with. In other words, my company wants to be able to tell our client, "if you like the venues in Manhattan neighborhood <i>A</i>, you will probably like San Francisco neighborhood <i>B</i>." Of course, this could also work the other way, for someone relocating from San Francisco to Manhattan.

<a href="#table of contents">Go back to top</a>

<a id='Methodology and Data'></a>

# C. Methodology and Data
I will identify the types of venues in each neighborhood of Manhattan and San Francisco, then run a <i>k</i>-means cluster analysis to identify similarities between the neighborhoods. Based on the results, I will assess whether we can make meaningful statements to our client, "if you like the venues in Manhattan neighborhood <i>A</i>, you will probably like San Francisco neighborhood <i>B</i>."

Approach:

1. Identify the neighborhoods in each city and their geographical coordinates. The geographical coordinates are necessary to use Foursquare to find venues in that neighborhood. The geographical coordinates are also used to display illustrative maps of each city.

    Geographical coordinates for Manhattan were taken from the GeoJSON file that I obtaind from here:

        https://geo.nyu.edu/catalog/nyu_2451_34572

    The data is published by the <b>New York City Department of City Planning</b>. The JSON file includes latitude and longitude that (presumably) represent the centroid of each neighborhood.

    Geographical coordinates for San Francisco were derived from the GeoJSON file that I exported from here:

        https://data.sfgov.org/Geographic-Locations-and-Boundaries/Planning-Neighborhood-Groups-Map/iacs-ws63

    The data.sfgov.org site has several different versions of neighborhood maps, such as neighborhoods defined by the <b>San Francisco Association of Realtors</b> and <b>The Department of Public Health and the Mayor’s Office of Housing and Community Development</b>. I used the version of the map from the <b>Department of City Planning</b>, since the Manhattan neighborhoods map is from the <b>New York City Department of City Planning</b>.
    
    The San Francicso map data is in the form of multipolygon coordinates for each neighborhood. I will estimate the centroid for each neighborhood by averaging the latitudes and longitudes of each neighborhoods coordinates.
    

2. Identify nearby venues in each neighborhood, using the Foursquare API. The results will be cleaned and wranged into a single dataframe with rows that consists of a neighborhood name and top ten venue categories for that neighborhood.


3. Group neighborhoods into like clusters, based on the commonality of the venues. <i>k</i>-means clustering will be used to partition the neighborhoods into like groups.

## 1. Identify the neighborhoods in Manhattan and San Francisco
Determine the neighborhoods and their geographical coordinates, and wrangle them into Pandas dataframes. Display maps of each city, including markers for their respective neighborhoods.

### Download the Manhattan and San Francisco GeoJSON data files

In [8]:
# For brevity, "ny" will be used in variable names to refer to Manhattan.

# Get the data for Manhattan
print('Starting download for the Manhattan neighborhood geographical locations.')
!wget -nv -O 'ny_geo_data.json' 'https://geo.nyu.edu/download/file/nyu-2451-34572-geojson.json'
!ls -l 'ny_geo_data.json'

print()
print('Check the above messages. No error should be reported, '
      'and the JSON file should be more than 100 KB in size.')
print()

# Get the data for San Francisco
print('Starting download for the San Francisco neighborhood geographical locations.')
!wget -nv -O 'sf_geo_data.json' 'https://data.sfgov.org/api/geospatial/iacs-ws63?method=export&format=GeoJSON'
!ls -l 'sf_geo_data.json'
print()
print('Check the above messages. No error should be reported, '
      'and the JSON file should be more than 500 KB size.')


Starting download for the Manhattan neighborhood geographical locations.
2018-11-20 00:07:47 URL:https://geo.nyu.edu/download/file/nyu-2451-34572-geojson.json [115774] -> "ny_geo_data.json" [1]
-rw-r--r--@ 1 andy  staff  115774 Nov 20 00:07 ny_geo_data.json

Check the above messages. No error should be reported, and the JSON file should be more than 100 KB in size.

Starting download for the San Francisco neighborhood geographical locations.
2018-11-20 00:07:47 URL:https://data.sfgov.org/api/geospatial/iacs-ws63?method=export&format=GeoJSON [535545] -> "sf_geo_data.json" [1]
-rw-r--r--@ 1 andy  staff  535545 Nov 20 00:07 sf_geo_data.json

Check the above messages. No error should be reported, and the JSON file should be more than 500 KB size.


### Load the GeoJSON files for each city into Python data dictionaries

In [9]:
# Load the Manhattan JSON file a into dictionary.
with open('ny_geo_data.json') as json_data:
    ny_geo_data = json.load(json_data)

In [10]:
# Load the San Francisco JSON file into a dictionary.
with open('sf_geo_data.json') as json_data:
    sf_geo_data = json.load(json_data)

### Use the dictionaries to create Pandas DataFrames for each city

#### First we will do Manhattan

In [11]:
# Pandas dataframe column names for the Manhattan and San Francisco neighborhood dataframes.
column_names = ['Neighborhood', 'Latitude', 'Longitude']

In [12]:
# Use the 'features' data to create a Pandas dataframe for Manhattan.
# Each row consists of a Manhattan neighborhood name, its latitude, and its longitude.

# Instantiate a dataframe with the desired column names.
ny_nhoods = pd.DataFrame(columns=column_names)

# Loop through the neighborhood geographical data, and add rows to the
# dataframe for each neighborhood in Manhattan.
for data in ny_geo_data['features']:
    if data['properties']['borough'] == 'Manhattan':
        ny_nhoods = ny_nhoods.append({'Neighborhood': data['properties']['name'],
                                      'Latitude': data['geometry']['coordinates'][1],
                                      'Longitude': data['geometry']['coordinates'][0]},
                                     ignore_index=True)

print('Manhattan has {} neighborhoods'.format(len(ny_nhoods)))
ny_nhoods

Manhattan has 40 neighborhoods


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Marble Hill,40.876551,-73.91066
1,Chinatown,40.715618,-73.994279
2,Washington Heights,40.851903,-73.9369
3,Inwood,40.867684,-73.92121
4,Hamilton Heights,40.823604,-73.949688
5,Manhattanville,40.816934,-73.957385
6,Central Harlem,40.815976,-73.943211
7,East Harlem,40.792249,-73.944182
8,Upper East Side,40.775639,-73.960508
9,Yorkville,40.77593,-73.947118


In [13]:
# For each neighborhood, find the closest neighborhood, then use half of the
# distance between the neighborhoods as the radius for exploring venues

# Create an array to hold the nearest neighborhood names and search radii
# for each neighborhood.
ny_search_radii = []
ny_nearest_nhoods = []

# Implementation note: The algorithm used here has a running time of O(n^2), which
# is not optimal. Faster algorithms or libraries to do this work are available.
# However, given the relatively small number of neighborhoods involved and the
# short elapsed run-time, I used this approach rather than complicating the
# notebook with additional libraries that might not be installed on the user's
# system. For analyses done on much larger scale, a more optimal approach to
# solving this form of the "Nearest Neighbor Search" problem can be implemented.
for current_nhood in ny_nhoods.iterrows():
    nearest_distance = float("inf")
    nearest_neighborhood = ""
    current_index = current_nhood[0]
    current_location = (current_nhood[1]['Latitude'], current_nhood[1]['Longitude'])

    for other_nhood in ny_nhoods.iterrows():
        other_index = other_nhood[0]
        if current_index == other_index:
            # Do not compare the neighborhood to itself
            continue

        other_location = (other_nhood[1]['Latitude'], other_nhood[1]['Longitude'])
        distance_to_other = distance.distance(current_location, other_location).meters

        if distance_to_other < nearest_distance:
            nearest_distance = distance_to_other
            nearest_neighborhood = other_nhood[1]['Neighborhood']

    ny_search_radii.append(0.5 * nearest_distance)
    ny_nearest_nhoods.append(nearest_neighborhood)
    print('Nearest neighborhood to {} is {}, {} meters away.'.format(current_nhood[1]['Neighborhood'],
                                                                     nearest_neighborhood,
                                                                     nearest_distance))

# Append the search radii and nearest neighborhoods to the neighborhood dataframe
ny_nhoods['Search Radius'] = ny_search_radii
ny_nhoods['Nearest Neighborhood'] = ny_nearest_nhoods

Nearest neighborhood to Marble Hill is Inwood, 1326.883278533227 meters away.
Nearest neighborhood to Chinatown is Little Italy, 484.4102255435927 meters away.
Nearest neighborhood to Washington Heights is Inwood, 2195.7660333032018 meters away.
Nearest neighborhood to Inwood is Marble Hill, 1326.883278533227 meters away.
Nearest neighborhood to Hamilton Heights is Manhattanville, 985.0494353952133 meters away.
Nearest neighborhood to Manhattanville is Hamilton Heights, 985.0494353952133 meters away.
Nearest neighborhood to Central Harlem is Hamilton Heights, 1008.0516743252322 meters away.
Nearest neighborhood to East Harlem is Carnegie Hill, 1309.7008586992301 meters away.
Nearest neighborhood to Upper East Side is Lenox Hill, 847.2501855995193 meters away.
Nearest neighborhood to Yorkville is Carnegie Hill, 911.512139092511 meters away.
Nearest neighborhood to Lenox Hill is Upper East Side, 847.2501855995193 meters away.
Nearest neighborhood to Roosevelt Island is Lenox Hill, 1051.9

In [14]:
# Display the Manhattan dataframe
ny_nhoods

Unnamed: 0,Neighborhood,Latitude,Longitude,Search Radius,Nearest Neighborhood
0,Marble Hill,40.876551,-73.91066,663.441639,Inwood
1,Chinatown,40.715618,-73.994279,242.205113,Little Italy
2,Washington Heights,40.851903,-73.9369,1097.883017,Inwood
3,Inwood,40.867684,-73.92121,663.441639,Marble Hill
4,Hamilton Heights,40.823604,-73.949688,492.524718,Manhattanville
5,Manhattanville,40.816934,-73.957385,492.524718,Hamilton Heights
6,Central Harlem,40.815976,-73.943211,504.025837,Hamilton Heights
7,East Harlem,40.792249,-73.944182,654.850429,Carnegie Hill
8,Upper East Side,40.775639,-73.960508,423.625093,Lenox Hill
9,Yorkville,40.77593,-73.947118,455.75607,Carnegie Hill


#### Now we will do San Francisco

In [15]:
# Use the 'features' data to create a Pandas dataframe for San Francisco.

# The JSON data does not provide a single set of coordinates for each neightborhood.
# Rather, the coordinate data is in the form of MultiPolygons, one for each
# neighborhood.

# Each MultiPolygon is a list of Polygons. Each Polygon is a list of linear rings,
# which are represented as LineStrings. The first linear ring is the exterior ring,
# which is the outer boundary of the Polygon. Other linear rings represent "holes"
# inside the Polygon.

# The centroid of each neighborhood is computed as the averages of all the latitudes
# and longitudes in each exterior linear ring (the first LineString in each Polygon
# that comprises the MultiPolygon). The first and last values in each linear ring
# are identical, but should be used one time only for computing the average latitudes
# and longitudes.

# I used RFC 7946 as the reference for the MultiPolygon structure:
#           https://tools.ietf.org/html/rfc7946#section-3.1.7

# List for "staging" the neighborhoods and geographical coordinates.
nhoods = []

for feature in sf_geo_data['features']:
    sum_latitudes = 0
    sum_longitudes = 0
    num_geo_coords = 0
    nhood = feature['properties']['neighborho']

    # feature['geometry']['coordinates'] is a MultiPolygon, so each item
    # in the MultiPolygon is a Polygon.
    for polygon in feature['geometry']['coordinates']:
        # Each item in the polygon is a linear_ring.
        for linear_ring in polygon:
            # Since the linear ring's first and last pairs of coordinates are
            # the same, I use a set to ensure they are counted only once.
            geo_coords = set(tuple(coords) for coords in linear_ring)
            num_geo_coords += len(geo_coords)

            sum_longitudes += sum(coords[0] for coords in geo_coords)
            sum_latitudes += sum(coords[1] for coords in geo_coords)

    average_latitude = sum_latitudes / num_geo_coords
    average_longitude = sum_longitudes / num_geo_coords

    nhoods += [[nhood, average_latitude, average_longitude]]

sf_nhoods = pd.DataFrame(nhoods, columns=column_names)

print('San Francisco has {} neighborhoods'.format(len(sf_nhoods)))
sf_nhoods

San Francisco has 37 neighborhoods


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Seacliff,37.786519,-122.501281
1,Haight Ashbury,37.765925,-122.444278
2,Outer Mission,37.723424,-122.445313
3,Inner Sunset,37.756437,-122.45907
4,Downtown/Civic Center,37.786657,-122.412221
5,Diamond Heights,37.741023,-122.442018
6,Lakeshore,37.726754,-122.501586
7,Russian Hill,37.808851,-122.424953
8,Noe Valley,37.748368,-122.437959
9,Treasure Island/YBI,37.815175,-122.367598


In [16]:
# For each neighborhood, find the closest neighborhood, then use half of the
# distance between the neighborhoods as the radius for exploring venues

# Create an array to hold the nearest neighborhood names and search radii
# for each neighborhood
sf_search_radii = []
sf_nearest_nhoods = []

# Implementation note: The algorithm used here has a running time of O(n^2), which
# is not optimal. Faster algorithms or libraries to do this work are available.
# However, given the relatively small number of neighborhoods involved and the
# short elapsed run-time, I used this approach rather than complicating the
# notebook with additional libraries that might not be installed on the user's
# system. For analyses done on much larger scale, a more optimal approach to
# solving this form of the "Nearest Neighbor Search" problem can be implemented.
for current_nhood in sf_nhoods.iterrows():
    nearest_distance = float("inf")
    nearest_neighborhood = ""
    current_index = current_nhood[0]
    current_location = (current_nhood[1]['Latitude'], current_nhood[1]['Longitude'])

    for other_nhood in sf_nhoods.iterrows():
        other_index = other_nhood[0]
        if current_index == other_index:
            # Do not compare the neighborhood to itself
            continue

        other_location = (other_nhood[1]['Latitude'], other_nhood[1]['Longitude'])
        distance_to_other = distance.distance(current_location, other_location).meters

        if distance_to_other < nearest_distance:
            nearest_distance = distance_to_other
            nearest_neighborhood = other_nhood[1]['Neighborhood']

    sf_search_radii.append(0.5 * nearest_distance)
    sf_nearest_nhoods.append(nearest_neighborhood)
    print('Nearest neighborhood to {} is {}, {} meters away.'.format(current_nhood[1]['Neighborhood'],
                                                                     nearest_neighborhood,
                                                                     nearest_distance))

# Append the search radii and nearest neighborhoods to the neighborhood dataframe
sf_nhoods['Search Radius'] = sf_search_radii
sf_nhoods['Nearest Neighborhood'] = sf_nearest_nhoods

Nearest neighborhood to Seacliff is Outer Richmond, 1478.141897556772 meters away.
Nearest neighborhood to Haight Ashbury is Castro/Upper Market, 510.7938056450166 meters away.
Nearest neighborhood to Outer Mission is Crocker Amazon, 1552.8556956554105 meters away.
Nearest neighborhood to Inner Sunset is Twin Peaks, 852.1541984173886 meters away.
Nearest neighborhood to Downtown/Civic Center is Nob Hill, 528.1025270422847 meters away.
Nearest neighborhood to Diamond Heights is Glen Park, 723.0307941825782 meters away.
Nearest neighborhood to Lakeshore is Parkside, 1597.6781928136065 meters away.
Nearest neighborhood to Russian Hill is Marina, 1322.528975944624 meters away.
Nearest neighborhood to Noe Valley is Diamond Heights, 890.2497477108994 meters away.
Nearest neighborhood to Treasure Island/YBI is Financial District, 3219.764468071559 meters away.
Nearest neighborhood to Outer Richmond is Golden Gate Park, 917.8267089650642 meters away.
Nearest neighborhood to Crocker Amazon is O

In [17]:
# Display the San Francisco dataframe
sf_nhoods

Unnamed: 0,Neighborhood,Latitude,Longitude,Search Radius,Nearest Neighborhood
0,Seacliff,37.786519,-122.501281,739.070949,Outer Richmond
1,Haight Ashbury,37.765925,-122.444278,255.396903,Castro/Upper Market
2,Outer Mission,37.723424,-122.445313,776.427848,Crocker Amazon
3,Inner Sunset,37.756437,-122.45907,426.077099,Twin Peaks
4,Downtown/Civic Center,37.786657,-122.412221,264.051264,Nob Hill
5,Diamond Heights,37.741023,-122.442018,361.515397,Glen Park
6,Lakeshore,37.726754,-122.501586,798.839096,Parkside
7,Russian Hill,37.808851,-122.424953,661.264488,Marina
8,Noe Valley,37.748368,-122.437959,445.124874,Diamond Heights
9,Treasure Island/YBI,37.815175,-122.367598,1609.882234,Financial District


### Display maps of each city with markers for each neighborhood in the city

#### Map of Manhattan neighborhoods

In [18]:
# Now that we have the neighborhoods in Manhattan, let's see how they look on a map.
# The map is centered on Manhattan proper.
address = 'Manhattan, New York'

ny_nhoods_map = get_map_with_markers(address, ny_nhoods, 11)
ny_nhoods_map

#### Map of San Francisco neighborhoods

In [19]:
# Show a similar map for San Francisco.
address = 'San Francisco, California'

sf_nhoods_map = get_map_with_markers(address, sf_nhoods, 12)
sf_nhoods_map

## 2. Identify nearby venues in each neighborhood using the Foursquare API
Clean and wrange the results into a dataframe for each city, with rows that consists of a neighborhood name and top ten venue categories for that neighborhood.

### Obtain information about nearby venues in each city's neighborhoods

In [20]:
print('Getting Manhattan neighborhood venues:')
ny_venues, ny_full_results = get_neighborhood_venues(ny_nhoods['Neighborhood'],
                                                     ny_nhoods['Latitude'],
                                                     ny_nhoods['Longitude'],
                                                     ny_nhoods['Search Radius'])

print('Done!')

Getting Manhattan neighborhood venues:
Exploring venues in Marble Hill with search radius 663.4416392666135 meters
Exploring venues in Chinatown with search radius 242.20511277179634 meters
Exploring venues in Washington Heights with search radius 1097.8830166516009 meters
Exploring venues in Inwood with search radius 663.4416392666135 meters
Exploring venues in Hamilton Heights with search radius 492.52471769760666 meters
Exploring venues in Manhattanville with search radius 492.52471769760666 meters
Exploring venues in Central Harlem with search radius 504.0258371626161 meters
Exploring venues in East Harlem with search radius 654.8504293496151 meters
Exploring venues in Upper East Side with search radius 423.62509279975967 meters
Exploring venues in Yorkville with search radius 455.7560695462555 meters
Exploring venues in Lenox Hill with search radius 423.62509279975967 meters
Exploring venues in Roosevelt Island with search radius 525.9959776453348 meters
Exploring venues in Upper 

In [21]:
print('Getting San Francisco neighborhood venues:')
sf_venues, sf_full_results = get_neighborhood_venues(sf_nhoods['Neighborhood'],
                                                     sf_nhoods['Latitude'],
                                                     sf_nhoods['Longitude'],
                                                     sf_nhoods['Search Radius'])
print('Done!')

Getting San Francisco neighborhood venues:
Exploring venues in Seacliff with search radius 739.070948778386 meters
Exploring venues in Haight Ashbury with search radius 255.3969028225083 meters
Exploring venues in Outer Mission with search radius 776.4278478277052 meters
Exploring venues in Inner Sunset with search radius 426.0770992086943 meters
Exploring venues in Downtown/Civic Center with search radius 264.05126352114235 meters
Exploring venues in Diamond Heights with search radius 361.5153970912891 meters
Exploring venues in Lakeshore with search radius 798.8390964068033 meters
Exploring venues in Russian Hill with search radius 661.264487972312 meters
Exploring venues in Noe Valley with search radius 445.1248738554497 meters
Exploring venues in Treasure Island/YBI with search radius 1609.8822340357794 meters
Exploring venues in Outer Richmond with search radius 458.9133544825321 meters
Exploring venues in Crocker Amazon with search radius 776.4278478277052 meters
Exploring venues

### Remove any duplicate venues
It is conceivable that if two neighborhoods radii touch each other, and a venue exist at that location, then the same venue could be added more than once. But do not be surprised if there are no duplicates.

In [22]:
print('In Manhattan, there are {} venues and {} venue categories before removing duplicates.'
      .format(ny_venues.shape[0], len(ny_venues['Venue Category'].unique())))
ny_venues.drop_duplicates('Id', inplace=True)
ny_venues.reset_index(drop=True, inplace=True)

print('In Manhattan, there are {} venues and {} venue categories after removing duplicates.'
      .format(ny_venues.shape[0], len(ny_venues['Venue Category'].unique())))

In Manhattan, there are 2930 venues and 326 venue categories before removing duplicates.
In Manhattan, there are 2930 venues and 326 venue categories after removing duplicates.


In [23]:
print('In San Francisco, there are {} venues and {} venue categories before removing duplicates.'
      .format(sf_venues.shape[0], len(sf_venues['Venue Category'].unique())))
sf_venues.drop_duplicates('Id', inplace=True)
sf_venues.reset_index(drop=True, inplace=True)
print('In San Francisco, there are {} venues and {} venue categories after removing duplicates.'
      .format(sf_venues.shape[0], len(sf_venues['Venue Category'].unique())))

In San Francisco, there are 1559 venues and 299 venue categories before removing duplicates.
In San Francisco, there are 1559 venues and 299 venue categories after removing duplicates.


### Examine the number of venues reported for each neighborhood

#### Manhattan

In [24]:
print('Number of venues per neighborhood in Manhattan:')
ny_venues.groupby('Neighborhood').count()['Venue']

Number of venues per neighborhood in Manhattan:


Neighborhood
Battery Park City       62
Carnegie Hill           84
Central Harlem          43
Chelsea                100
Chinatown               48
Civic Center           100
Clinton                 38
East Harlem             76
East Village           100
Financial District     100
Flatiron               100
Gramercy                92
Greenwich Village      100
Hamilton Heights        54
Hudson Yards            18
Inwood                  99
Lenox Hill             100
Lincoln Square         100
Little Italy           100
Lower East Side         48
Manhattan Valley        88
Manhattanville          40
Marble Hill             51
Midtown                100
Midtown South          100
Morningside Heights     51
Murray Hill             50
Noho                    92
Roosevelt Island        30
Soho                    36
Stuyvesant Town          5
Sutton Place           100
Tribeca                 66
Tudor City              17
Turtle Bay              58
Upper East Side         91
Upper West Side

#### San Francisco

In [25]:
print('Number of venues per neighborhood in San Francisco:')
sf_venues.groupby('Neighborhood').count()['Venue']

Number of venues per neighborhood in San Francisco:


Neighborhood
Bayview                   36
Bernal Heights            86
Castro/Upper Market        5
Chinatown                 45
Crocker Amazon            34
Diamond Heights            4
Downtown/Civic Center     69
Excelsior                  6
Financial District       100
Glen Park                  8
Golden Gate Park           6
Haight Ashbury             4
Inner Richmond            45
Inner Sunset               9
Lakeshore                 39
Marina                    20
Mission                  100
Nob Hill                  25
Noe Valley                24
North Beach               95
Ocean View                16
Outer Mission             45
Outer Richmond            13
Outer Sunset              24
Pacific Heights          100
Parkside                  44
Potrero Hill              87
Presidio                  77
Presidio Heights          19
Russian Hill              96
Seacliff                  20
South of Market          100
Treasure Island/YBI       26
Twin Peaks                 8
V

#### Observation
Some neighborhoods in each city have very few venues.

San Francisco seems to have fewer venues per neighborhood.

### Create a new dataframe for both city's neighborhoods that includes the neighborhood name and top ten most common venues

#### Append ' (NY)' or ' (SF)' to each neighborhood name
This makes it easier to distinguish which neighborhood belongs to which city in the combined dataframe.

In [26]:
# Make sure we append the city abbreviation one time only.
if ny_venues.iloc[0]['Neighborhood'][-4:] != '(NY)':
    ny_venues['Neighborhood'] = ny_venues['Neighborhood'] + ' (NY)'
display(ny_venues.head())
if sf_venues.iloc[0]['Neighborhood'][-4:] != '(SF)':
    sf_venues['Neighborhood'] = sf_venues['Neighborhood'] + ' (SF)'
display(sf_venues.head())

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Id,Venue Latitude,Venue Longitude,Venue Category
0,Marble Hill (NY),40.876551,-73.91066,Arturo's,4b4429abf964a52037f225e3,40.874412,-73.910271,Pizza Place
1,Marble Hill (NY),40.876551,-73.91066,Bikram Yoga,4baf59e8f964a520a6f93be3,40.876844,-73.906204,Yoga Studio
2,Marble Hill (NY),40.876551,-73.91066,Tibbett Diner,4b79cc46f964a520c5122fe3,40.880404,-73.908937,Diner
3,Marble Hill (NY),40.876551,-73.91066,Sam's Pizza,4bb114c4f964a520b9783ce3,40.879435,-73.905859,Pizza Place
4,Marble Hill (NY),40.876551,-73.91066,Starbucks,55f81cd2498ee903149fcc64,40.877531,-73.905582,Coffee Shop


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Id,Venue Latitude,Venue Longitude,Venue Category
0,Seacliff (SF),37.786519,-122.501281,Legion of Honor,44d344bef964a52041361fe3,37.785063,-122.500202,Art Museum
1,Seacliff (SF),37.786519,-122.501281,Lincoln Park,4af782def964a520a60922e3,37.785436,-122.502022,Park
2,Seacliff (SF),37.786519,-122.501281,The Thinker (Rodin),4e597f46e4cd875e8eafcae9,37.784716,-122.50053,Sculpture Garden
3,Seacliff (SF),37.786519,-122.501281,Lands End Trail,53a6a1ca498ee55d8d1f5db4,37.7865,-122.505144,Trail
4,Seacliff (SF),37.786519,-122.501281,Mile Rock Beach,4ba55e00f964a520d5ff38e3,37.787268,-122.506294,Beach


#### Create a combined venues dataframe

In [27]:
combined_venues = ny_venues.append(sf_venues, ignore_index=True)
display(combined_venues.shape)
display(combined_venues.head())
display(combined_venues.tail())

(4489, 8)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Id,Venue Latitude,Venue Longitude,Venue Category
0,Marble Hill (NY),40.876551,-73.91066,Arturo's,4b4429abf964a52037f225e3,40.874412,-73.910271,Pizza Place
1,Marble Hill (NY),40.876551,-73.91066,Bikram Yoga,4baf59e8f964a520a6f93be3,40.876844,-73.906204,Yoga Studio
2,Marble Hill (NY),40.876551,-73.91066,Tibbett Diner,4b79cc46f964a520c5122fe3,40.880404,-73.908937,Diner
3,Marble Hill (NY),40.876551,-73.91066,Sam's Pizza,4bb114c4f964a520b9783ce3,40.879435,-73.905859,Pizza Place
4,Marble Hill (NY),40.876551,-73.91066,Starbucks,55f81cd2498ee903149fcc64,40.877531,-73.905582,Coffee Shop


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Id,Venue Latitude,Venue Longitude,Venue Category
4484,Golden Gate Park (SF),37.767262,-122.504648,Murphy Windmill,4bb1706df964a52089933ce3,37.764831,-122.508519,Monument / Landmark
4485,Golden Gate Park (SF),37.767262,-122.504648,Golden Gate Trail Run,4bafb73df964a5205f1a3ce3,37.765191,-122.506772,Trail
4486,Golden Gate Park (SF),37.767262,-122.504648,Bercut Equitation Field,4d56e5b7fb65236a68a50bb4,37.766102,-122.500927,Park
4487,Golden Gate Park (SF),37.767262,-122.504648,Chain of Lakes,4aff2496f964a520de3422e3,37.767039,-122.50044,Lake
4488,Golden Gate Park (SF),37.767262,-122.504648,Middle Lake,4fc69958e4b0e5b2f2b8281e,37.766096,-122.499776,Lake


#### Remove rows for irrelevant venue categories
Some of the venues are not relevant, such as 'Office', 'Building', 'Neighborhood', and 'Toll Plaza'. After reviewing the different venue types, I decided to remove these from the dataframe as not being relevant, or as being too vaguely defined. If I was not sure whether a venue category was relevant, I did not remove it.

In [28]:
irrelevant_venue_categories = ['Building', 'College Academic Building', 'College Cafeteria',
                               'College Gym', 'Event Space', 'Exhibit', 'Field', 'Fountain',
                               'General Entertainment', 'Hostel', 'Insurance Office',
                               'Intersection', 'Lawyer', 'Locksmith', 'Neighborhood',
                               'Non-Profit', 'Office', 'Residential Building (Apartment / Condo)',
                               'Rest Area', 'Tech Startup', 'Toll Plaza', 'Tunnel']

for category in irrelevant_venue_categories:
    combined_venues = combined_venues.loc[combined_venues['Venue Category'] != category]

combined_venues.reset_index(drop=True, inplace=True)
combined_venues.tail()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Id,Venue Latitude,Venue Longitude,Venue Category
4418,Golden Gate Park (SF),37.767262,-122.504648,Murphy Windmill,4bb1706df964a52089933ce3,37.764831,-122.508519,Monument / Landmark
4419,Golden Gate Park (SF),37.767262,-122.504648,Golden Gate Trail Run,4bafb73df964a5205f1a3ce3,37.765191,-122.506772,Trail
4420,Golden Gate Park (SF),37.767262,-122.504648,Bercut Equitation Field,4d56e5b7fb65236a68a50bb4,37.766102,-122.500927,Park
4421,Golden Gate Park (SF),37.767262,-122.504648,Chain of Lakes,4aff2496f964a520de3422e3,37.767039,-122.50044,Lake
4422,Golden Gate Park (SF),37.767262,-122.504648,Middle Lake,4fc69958e4b0e5b2f2b8281e,37.766096,-122.499776,Lake


#### Rename similar venue categories
Rename venue categories of similar types to the same name. I had to use some judgment about which categories were similar and which were not. For example, 'Pub' is similar to 'Bar', but 'Whisky Bar' seems rather specialized, so I did not rename it to 'Bar'.

When I was not sure, such as whether 'Public Art' is the same as 'Art Museum', I erred on the side of caution and did not rename it.

In [29]:
print('Before combining like venue categories, there are', len(combined_venues['Venue Category'].unique()), 'venues')

Before combining like venue categories, there are 373 venues


In [30]:
similar_venues = [('Used Bookstore', 'Bookstore'),
                  ('Burrito Place', 'Mexican Restaurant'),
                  ('Bus Line', 'Bus Station'),
                  ('Bus Stop', 'Bus Station'),
                  ('Cocktail Bar', 'Bar'),
                  ('Concert Hall', 'Music Venue'),
                  ('Drugstore', 'Pharmacy'),
                  ('Gym / Fitness Center', 'Gym'),
                  ('Gym Pool', 'Pool'),
                  ('Historic Site', 'Monument / Landmark'),
                  ('History Museum', 'Museum'),
                  ('Japanese Curry Restaurant', 'Japanese Restaurant'),
                  ('Light Rail Station', 'Train Station'),
                  ('Metro Station', 'Train Station'),
                  ('Pub', 'Bar'),
                  ('Speakeasy', 'Bar'),
                  ('Taco Place', 'Mexican Restaurant'),
                  ('Vineyard', 'Winery'),
                  ('Zoo Exhibit', 'Zoo')]

for similar_venue in similar_venues:
    mask = combined_venues['Venue Category'] == similar_venue[0]
    combined_venues.loc[mask, 'Venue Category'] = similar_venue[1]

In [31]:
print('After combining like venue categories, there are', len(combined_venues['Venue Category'].unique()), 'venues')

After combining like venue categories, there are 354 venues


#### Create a one-hot encoding of the venue categories

In [32]:
# Create the one-hot encoding dataframe
combined_onehot = pd.get_dummies(combined_venues[['Venue Category']], prefix="", prefix_sep="")

# Add the Neighborhood column to the end of the one-hot dataframe
combined_onehot.insert(loc=0, column='Neighborhood', value=combined_venues['Neighborhood'])

# Move the Neighborhood column to the front, so it is the first column
#fixed_columns = [combined_onehot.columns[-1]] + list(combined_onehot.columns[:-1])
#combined_onehot = combined_onehot[fixed_columns]

# 'Neighborhood' is not a venue category, so subtract 1 from the number of
# venue categories.
print('The one-hot dataframe includes {} '
      'venues and {} unique venue categories'.format(combined_onehot.shape[0],
                                                     combined_onehot.shape[1] - 1))

display(combined_onehot.head())

The one-hot dataframe includes 4423 venues and 354 unique venue categories


Unnamed: 0,Neighborhood,ATM,Acai House,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Beach,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Belgian Restaurant,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bike Trail,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Bowling Green,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Buffet,Burger Joint,Burmese Restaurant,Bus Station,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Cambodian Restaurant,Camera Store,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Cha Chaan Teng,Cheese Shop,Chinese Restaurant,Chocolate Shop,Christmas Market,Church,Climbing Gym,Clothing Store,Club House,Coffee Shop,College Theater,Comedy Club,Comfort Food Restaurant,Comic Shop,Community Center,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cultural Center,Cupcake Shop,Cycle Studio,Czech Restaurant,Dance Studio,Daycare,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distillery,Dive Bar,Dive Shop,Dog Run,Donut Shop,Dosa Place,Dry Cleaner,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish Market,Fishing Spot,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,Football Stadium,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General College & University,German Restaurant,Gift Shop,Gluten-free Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gun Range,Gym,Gymnastics Gym,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Herbs & Spices Store,High School,Hobby Shop,Home Service,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,Hunan Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Indonesian Restaurant,Irish Pub,Island,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kids Store,Kitchen Supply Store,Korean Restaurant,Kosher Restaurant,Lake,Latin American Restaurant,Lebanese Restaurant,Library,Lighthouse,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Marijuana Dispensary,Market,Martial Arts Dojo,Massage Studio,Medical Center,Mediterranean Restaurant,Memorial Site,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Motel,Motorcycle Shop,Mountain,Movie Theater,Moving Target,Museum,Music School,Music Store,Music Venue,Nail Salon,National Park,New American Restaurant,Newsstand,Nightclub,Noodle House,North Indian Restaurant,Opera House,Optical Shop,Organic Grocery,Other Nightlife,Outdoor Sculpture,Outdoor Supply Store,Outdoors & Recreation,Paella Restaurant,Pakistani Restaurant,Paper / Office Supplies Store,Park,Performing Arts Venue,Perfume Shop,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Store,Pharmacy,Piano Bar,Pier,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Pool,Pool Hall,Pop-Up Shop,Portuguese Restaurant,Public Art,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Restaurant,Rock Climbing Spot,Rock Club,Roller Rink,Roof Deck,Rugby Pitch,Russian Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Salvadoran Restaurant,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Science Museum,Sculpture Garden,Seafood Restaurant,Shanghai Restaurant,Shipping Store,Shoe Repair,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Souvlaki Shop,Spa,Spanish Restaurant,Spiritual Center,Sporting Goods Shop,Sports Bar,Sports Club,State / Provincial Park,Stationery Store,Steakhouse,Street Art,Street Food Gathering,Strip Club,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Tea Room,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park Ride / Attraction,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Tiki Bar,Tour Provider,Tourist Information Center,Toy / Game Store,Track,Trail,Train,Train Station,Tree,Turkish Restaurant,Tuscan Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo
0,Marble Hill (NY),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Marble Hill (NY),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2,Marble Hill (NY),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Marble Hill (NY),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Marble Hill (NY),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Group the rows by neighborhood, taking the mean of the frequency of occurrence of each category for that neighborhood

In [33]:
# After the groupyby, the index becomes Neighborhood, so reset_index()
# moves the Neighborhood back to a regular column, and the DataFrame is
# 0..nnn indexed.
combined_grouped = combined_onehot.groupby('Neighborhood').mean().reset_index()

print('The combined_grouped dataframe includes {} '
      'neighborhoods and {} unique venue categories'.format(combined_grouped.shape[0],
                                                            combined_grouped.shape[1]))

combined_grouped

The combined_grouped dataframe includes 77 neighborhoods and 355 unique venue categories


Unnamed: 0,Neighborhood,ATM,Acai House,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Beach,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Belgian Restaurant,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bike Trail,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Bowling Green,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Buffet,Burger Joint,Burmese Restaurant,Bus Station,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Cambodian Restaurant,Camera Store,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Cha Chaan Teng,Cheese Shop,Chinese Restaurant,Chocolate Shop,Christmas Market,Church,Climbing Gym,Clothing Store,Club House,Coffee Shop,College Theater,Comedy Club,Comfort Food Restaurant,Comic Shop,Community Center,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cultural Center,Cupcake Shop,Cycle Studio,Czech Restaurant,Dance Studio,Daycare,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distillery,Dive Bar,Dive Shop,Dog Run,Donut Shop,Dosa Place,Dry Cleaner,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish Market,Fishing Spot,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,Football Stadium,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General College & University,German Restaurant,Gift Shop,Gluten-free Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gun Range,Gym,Gymnastics Gym,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Herbs & Spices Store,High School,Hobby Shop,Home Service,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,Hunan Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Indonesian Restaurant,Irish Pub,Island,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kids Store,Kitchen Supply Store,Korean Restaurant,Kosher Restaurant,Lake,Latin American Restaurant,Lebanese Restaurant,Library,Lighthouse,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Marijuana Dispensary,Market,Martial Arts Dojo,Massage Studio,Medical Center,Mediterranean Restaurant,Memorial Site,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Motel,Motorcycle Shop,Mountain,Movie Theater,Moving Target,Museum,Music School,Music Store,Music Venue,Nail Salon,National Park,New American Restaurant,Newsstand,Nightclub,Noodle House,North Indian Restaurant,Opera House,Optical Shop,Organic Grocery,Other Nightlife,Outdoor Sculpture,Outdoor Supply Store,Outdoors & Recreation,Paella Restaurant,Pakistani Restaurant,Paper / Office Supplies Store,Park,Performing Arts Venue,Perfume Shop,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Store,Pharmacy,Piano Bar,Pier,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Pool,Pool Hall,Pop-Up Shop,Portuguese Restaurant,Public Art,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Restaurant,Rock Climbing Spot,Rock Club,Roller Rink,Roof Deck,Rugby Pitch,Russian Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Salvadoran Restaurant,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Science Museum,Sculpture Garden,Seafood Restaurant,Shanghai Restaurant,Shipping Store,Shoe Repair,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Souvlaki Shop,Spa,Spanish Restaurant,Spiritual Center,Sporting Goods Shop,Sports Bar,Sports Club,State / Provincial Park,Stationery Store,Steakhouse,Street Art,Street Food Gathering,Strip Club,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Tea Room,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park Ride / Attraction,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Tiki Bar,Tour Provider,Tourist Information Center,Toy / Game Store,Track,Trail,Train,Train Station,Tree,Turkish Restaurant,Tuscan Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo
0,Battery Park City (NY),0.0,0.0,0.0,0.0,0.0,0.0,0.033898,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.033898,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.084746,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033898,0.0,0.0,0.0,0.0,0.0,0.033898,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.033898,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.016949,0.0,0.0,0.0,0.016949,0.0,0.0,0.016949,0.0,0.0,0.0,0.033898,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033898,0.016949,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.016949,0.016949,0.016949,0.033898,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.101695,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033898,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0,0.016949,0.0,0.0
1,Bayview (SF),0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.028571,0.0,0.028571,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.028571,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.114286,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bernal Heights (SF),0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035294,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.011765,0.011765,0.0,0.023529,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.047059,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.011765,0.0,0.0,0.023529,0.0,0.023529,0.0,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.011765,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.035294,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047059,0.0,0.0,0.0,0.0,0.023529,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035294,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.047059,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.011765,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.0,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.0
3,Carnegie Hill (NY),0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.02381,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.011905,0.0,0.059524,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.011905,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.011905,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.071429,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.011905,0.02381,0.0,0.0,0.0,0.02381,0.0
4,Castro/Upper Market (SF),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Central Harlem (NY),0.0,0.0,0.0,0.0,0.0,0.071429,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.02381,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.02381,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.02381,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Chelsea (NY),0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.030303,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.010101,0.0,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.060606,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.010101,0.0,0.0,0.020202,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.010101,0.0,0.020202,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.040404,0.010101,0.0,0.010101,0.0,0.0,0.0,0.010101,0.060606,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.020202,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.010101,0.0,0.040404,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.010101,0.0,0.0,0.0,0.030303,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.010101,0.0,0.0,0.010101,0.0,0.0
7,Chinatown (NY),0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06383,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.085106,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.042553,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.042553,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.042553,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.021277,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.021277,0.0,0.042553,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.042553,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0
8,Chinatown (SF),0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.069767,0.0,0.023256,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.023256,0.0,0.046512,0.0,0.0,0.0,0.023256,0.0,0.0,0.023256,0.0,0.0,0.0,0.093023,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.023256,0.0,0.0,0.0,0.046512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.046512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.093023,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.046512,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.046512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Civic Center (NY),0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.010204,0.0,0.0,0.0,0.0,0.020408,0.010204,0.0,0.0,0.010204,0.0,0.0,0.010204,0.0,0.0,0.0,0.010204,0.010204,0.05102,0.0,0.05102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.010204,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.102041,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.040816,0.010204,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.010204,0.0,0.0,0.010204,0.010204,0.0,0.0,0.0,0.0,0.010204,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030612,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030612,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030612,0.0,0.0,0.030612,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.030612,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.020408,0.0,0.010204,0.0,0.030612,0.0


#### "Sanity" check: Verify the sum of all values in a given row is 1.0
This was actually helpful to me in cleaning the data, as it was here that I noticed a problem: one of the rows, for Twin Peaks in San Francisco, only summed to 0.875. This led me to a discovery: Twin Peaks (in San Francisco) has a venue named 'Twin Peaks' with venue category 'Neighborhood'. This is apparently a self-referential venue in Foursquare. After creating the one-hot encoding dataframe, this venue category caused a problem when I tried to insert the 'Neighborhoods' column into the dataframe. Beause the loss of one venue is negligible, I modified the notebook, as described a few cells above, to get rid of rows with venue category of 'Neighborhood'.

In [34]:
print('Sum of the values in each row is {}'.format(sum(combined_grouped.sum(axis=1))))
print('This should match the number of neighborhoods, which is {}'.format(combined_grouped.shape[0]))
print('Detail below: Each row should sum to 1.0')
combined_grouped.sum(axis=1)

Sum of the values in each row is 77.0
This should match the number of neighborhoods, which is 77
Detail below: Each row should sum to 1.0


0     1.0
1     1.0
2     1.0
3     1.0
4     1.0
5     1.0
6     1.0
7     1.0
8     1.0
9     1.0
10    1.0
11    1.0
12    1.0
13    1.0
14    1.0
15    1.0
16    1.0
17    1.0
18    1.0
19    1.0
20    1.0
21    1.0
22    1.0
23    1.0
24    1.0
25    1.0
26    1.0
27    1.0
28    1.0
29    1.0
30    1.0
31    1.0
32    1.0
33    1.0
34    1.0
35    1.0
36    1.0
37    1.0
38    1.0
39    1.0
40    1.0
41    1.0
42    1.0
43    1.0
44    1.0
45    1.0
46    1.0
47    1.0
48    1.0
49    1.0
50    1.0
51    1.0
52    1.0
53    1.0
54    1.0
55    1.0
56    1.0
57    1.0
58    1.0
59    1.0
60    1.0
61    1.0
62    1.0
63    1.0
64    1.0
65    1.0
66    1.0
67    1.0
68    1.0
69    1.0
70    1.0
71    1.0
72    1.0
73    1.0
74    1.0
75    1.0
76    1.0
dtype: float64

#### Now put this into a dataframe, using the ten most common venues in each neighborhood

In [35]:
num_top_venues = 20

# Ordinal suffices for 1, 2, 3 (1st, 2nd, 3rd)
indicators = ['st', 'nd', 'rd']

# create columns for 1st - 10th most common venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        # Prints 'st', 'nd', and 'rd' for 1st, 2nd and 3rd
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        # Prints 'th' for 4th, 5th, etc.
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
combined_venues_sorted = pd.DataFrame(columns=columns)
combined_venues_sorted['Neighborhood'] = combined_grouped['Neighborhood']

for ind in np.arange(combined_grouped.shape[0]):
    combined_venues_sorted.iloc[ind, 1:] = return_most_common_venues(combined_grouped.iloc[ind, :],
                                                                     num_top_venues)

combined_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Battery Park City (NY),Park,Coffee Shop,American Restaurant,Mexican Restaurant,Department Store,Cupcake Shop,Food Truck,Hotel,Sandwich Place,BBQ Joint,Gym,Garden,Gastropub,Ice Cream Shop,Men's Store,Monument / Landmark,Memorial Site,Mediterranean Restaurant,Sushi Restaurant,Food Court
1,Bayview (SF),Southern / Soul Food Restaurant,Brewery,Park,Football Stadium,Grocery Store,Outdoor Sculpture,Dance Studio,Theater,Gym,Mexican Restaurant,Café,Pharmacy,Garden Center,Bus Station,Spa,Burger Joint,Harbor / Marina,Bakery,State / Provincial Park,Home Service
2,Bernal Heights (SF),Pizza Place,Latin American Restaurant,Coffee Shop,Bar,Italian Restaurant,Park,Mexican Restaurant,Playground,Bakery,Café,Trail,Ramen Restaurant,Art Gallery,Liquor Store,Gourmet Shop,Grocery Store,Gym,Yoga Studio,Japanese Restaurant,Indian Restaurant
3,Carnegie Hill (NY),Pizza Place,Bar,Cosmetics Shop,Coffee Shop,Gym,Spa,Café,Bookstore,French Restaurant,Wine Shop,Yoga Studio,Italian Restaurant,Sports Bar,Bakery,Dance Studio,Ramen Restaurant,Community Center,Restaurant,Salon / Barbershop,Chinese Restaurant
4,Castro/Upper Market (SF),Park,Grocery Store,Shoe Store,Trail,Zoo,Farmers Market,Electronics Store,Elementary School,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,Fish Market,Fishing Spot,Flea Market,Flower Shop,Food,Food & Drink Shop
5,Central Harlem (NY),African Restaurant,Gym,French Restaurant,Chinese Restaurant,American Restaurant,Cosmetics Shop,Seafood Restaurant,Caribbean Restaurant,Beer Bar,Park,Spa,Bar,Market,Bagel Shop,Bookstore,Ethiopian Restaurant,BBQ Joint,Boutique,Dessert Shop,Tapas Restaurant
6,Chelsea (NY),Italian Restaurant,Coffee Shop,Nightclub,Ice Cream Shop,Hotel,Seafood Restaurant,Bakery,American Restaurant,Bar,Theater,Art Gallery,Gift Shop,Mexican Restaurant,Café,Tapas Restaurant,Gym,Bookstore,Men's Store,Health & Beauty Service,Asian Restaurant
7,Chinatown (NY),Chinese Restaurant,Bar,Bubble Tea Shop,Sandwich Place,Hotpot Restaurant,Japanese Restaurant,Noodle House,Vietnamese Restaurant,English Restaurant,Malay Restaurant,Greek Restaurant,Bookstore,Spa,Dumpling Restaurant,Cantonese Restaurant,Museum,Monument / Landmark,Szechuan Restaurant,Tea Room,Bike Shop
8,Chinatown (SF),Gym,Coffee Shop,Bubble Tea Shop,Cantonese Restaurant,Dive Bar,Hotel,Tea Room,French Restaurant,Camera Store,Café,Monument / Landmark,Ramen Restaurant,Chinese Restaurant,Grocery Store,Church,Bar,Mediterranean Restaurant,Bakery,Burger Joint,Massage Studio
9,Civic Center (NY),Gym,Bar,Bakery,Italian Restaurant,Spa,Yoga Studio,Sporting Goods Shop,Sandwich Place,Sushi Restaurant,Park,Coffee Shop,American Restaurant,Falafel Restaurant,Wine Shop,Dance Studio,Art Gallery,Martial Arts Dojo,French Restaurant,Medical Center,Bookstore


## 3. Group neighborhoods into like clusters, based on the commonality of the venues
<i>k</i>-means clustering will be used for this task.

### Choosing the <i>k</i>-means n_clusters value
I had to experiment with n_clusters to find the "right" number of clusters. I found no value that created nice, evenly sized clusters. Some n_clusters values led to one large cluster, maybe one medium-sized cluster, and lots of very small clusters. I wanted to find an n_clusters value that yielded at least one or two medium-sized clusters that were somewhat balanced between number of Manhattan and San Francisco neighborhoods. For example, a cluster of four neighborhoods that are all in Manhattan are not very helpful with regard to the purpose of this analysis.

I used the code in the cell below to help identify a "good" n_clusters value. It runs the <i>k</i>-means clustering for n_clusters ranging from 2 to 40, and for each run, displays the number of neighborhoods in each cluster. My thoughts:

  * Very small n_clusters values (2 or 3, for example) are just too small to create meaningful clusters, so their inclusion in the code below is merely academic.
  * Arguably, if larger n_clusters values led to a lot of clusters with two or three neighborhoods that were from Manhattan and San Francisco, we might have a good mapping between the neighborhoods of each city ("if you like A, you might like B"). However, such a cluster pattern never emerged. Rather, the larger n_clusters values led to more one-neighborhood clusters, which is not helpful.
  * The n_clusters values that lead to a cluster of 50 or more neighborhoods could mean the cluster is too generic. Or it could simply imply that most of the neighborhoods are very similar to each other.
  * In general, the tendency is for any n_clusters value to create no more than three clusters of significant size.

In [36]:
print('An asterisk (*) to the left of the cluster size list indicates a potentially')
print('good n_clusters value.')
print()
print('n_clusters value (number of one-neighborhood clusters): list of cluster sizes')
print('-----------------------------------------------------------------------------')

candidate_n_clusters = []
candidate_threshold = 0.5 * combined_grouped.shape[0]

for i in range(2, 40):
    kclusters = i

    # Get rid of 'Neighborhood' because it is not a venue category.
    combined_grouped_clustering = combined_grouped.drop('Neighborhood', 1)

    # Now run the k-means clustering.
    kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(combined_grouped_clustering)

    # Combine the ny_nhoods and sf_nhoods dataframes into a combined dataframe.
    column_names = ['Neighborhood', 'Latitude', 'Longitude']
    combined_merged = ny_nhoods[column_names]
    combined_merged = combined_merged.append(sf_nhoods[column_names], ignore_index=True)

    # Add the k-means labels
    combined_merged['Cluster Labels'] = kmeans.labels_

    # merge combined_grouped with combined_merged to add latitude and longitude for each neighborhood.
    combined_merged = combined_merged.join(combined_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

    combined_merged.head() # check the last columns!

    # Display the data for each cluster
    cluster_list = []
    for cluster in range(kclusters):
        cluster_df = combined_merged.loc[combined_merged['Cluster Labels'] == cluster,
                     combined_merged.columns[[0] + list(range(4, combined_merged.shape[1]))]]
        cluster_list.append(cluster_df.shape[0])

    # Check if this is a potentially good n_clusters value.
    copy_cluster_list = cluster_list[:]
    copy_cluster_list.sort(reverse=True)
    num_one_neighborhood_clusters = copy_cluster_list.count(1)
    candidate_indicator = ' '

    # Looking for the largest cluster size to be less than half the total
    # neighborhoods, and the number of one-neighborhood clusters to be small
    # (less than 10)
    if copy_cluster_list[0] < candidate_threshold and num_one_neighborhood_clusters < 10:
        candidate_indicator = '*'
        candidate_n_clusters.append(kclusters)

    print('n_clusters {} ({}): {}{}'.format(i,
                                            cluster_list.count(1),
                                            candidate_indicator,
                                            cluster_list))

An asterisk (*) to the left of the cluster size list indicates a potentially
good n_clusters value.

n_clusters value (number of one-neighborhood clusters): list of cluster sizes
-----------------------------------------------------------------------------
n_clusters 2 (0):  [67, 10]
n_clusters 3 (1):  [9, 67, 1]
n_clusters 4 (1):  [54, 5, 1, 17]
n_clusters 5 (3):  [70, 1, 1, 1, 4]
n_clusters 6 (4):  [12, 1, 1, 61, 1, 1]
n_clusters 7 (5):  [68, 1, 4, 1, 1, 1, 1]
n_clusters 8 (6):  [67, 1, 1, 4, 1, 1, 1, 1]
n_clusters 9 (5):  [12, 4, 54, 1, 1, 2, 1, 1, 1]
n_clusters 10 (7):  [1, 61, 1, 3, 1, 1, 1, 1, 1, 6]
n_clusters 11 (8):  [7, 1, 58, 1, 1, 4, 1, 1, 1, 1, 1]
n_clusters 12 (9):  [52, 4, 1, 1, 1, 1, 1, 12, 1, 1, 1, 1]
n_clusters 13 (8):  [2, 2, 14, 1, 1, 1, 1, 1, 3, 1, 48, 1, 1]
n_clusters 14 (9): *[31, 1, 1, 2, 1, 16, 1, 14, 5, 1, 1, 1, 1, 1]
n_clusters 15 (12):  [53, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1]
n_clusters 16 (11):  [1, 25, 2, 1, 1, 1, 1, 1, 1, 1, 1, 19, 4, 1, 1, 16]
n_cl

Display the computed n_clusters candidate values. Be sure to review the above data, too, in case other candidates look good.

In [37]:
print('Potentially good n_clusters values: {}'.format(candidate_n_clusters))

Potentially good n_clusters values: [14]


#### After reviewing the above results for different n_clusters value, I set kclusters to the suggested value.

In [38]:
kclusters = 14

Append <b>(NY)</b> or <b>(SF)</b> to each neighborhood name in ny_nhoods and sf_nhoods, to make it easier to distinguish the city to which each neighborhood belongs.

In [39]:
# Make sure we append the city abbreviation one time only.
if ny_nhoods.iloc[0]['Neighborhood'][-4:] != '(NY)':
    ny_nhoods['Neighborhood'] = ny_nhoods['Neighborhood'] + ' (NY)'
if sf_nhoods.iloc[0]['Neighborhood'][-4:] != '(SF)':
    sf_nhoods['Neighborhood'] = sf_nhoods['Neighborhood'] + ' (SF)'

display(ny_nhoods.head())
display(sf_nhoods.head())

Unnamed: 0,Neighborhood,Latitude,Longitude,Search Radius,Nearest Neighborhood
0,Marble Hill (NY),40.876551,-73.91066,663.441639,Inwood
1,Chinatown (NY),40.715618,-73.994279,242.205113,Little Italy
2,Washington Heights (NY),40.851903,-73.9369,1097.883017,Inwood
3,Inwood (NY),40.867684,-73.92121,663.441639,Marble Hill
4,Hamilton Heights (NY),40.823604,-73.949688,492.524718,Manhattanville


Unnamed: 0,Neighborhood,Latitude,Longitude,Search Radius,Nearest Neighborhood
0,Seacliff (SF),37.786519,-122.501281,739.070949,Outer Richmond
1,Haight Ashbury (SF),37.765925,-122.444278,255.396903,Castro/Upper Market
2,Outer Mission (SF),37.723424,-122.445313,776.427848,Crocker Amazon
3,Inner Sunset (SF),37.756437,-122.45907,426.077099,Twin Peaks
4,Downtown/Civic Center (SF),37.786657,-122.412221,264.051264,Nob Hill


Initialize and run the <i>k</i>-means clustering on the neighborhood venues dataframe.

In [40]:
# Get rid of 'Neighborhood' because it is not a venue category.
combined_grouped_clustering = combined_grouped.drop('Neighborhood', 1)

# Now run the k-means clustering.
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(combined_grouped_clustering)

# Look at the cluster labels generated for each row in the dataframe.
print('Cluster labels: {}'.format(kmeans.labels_))

# Combine the ny_nhoods and sf_nhoods dataframes into a combined dataframe.
column_names = ['Neighborhood', 'Latitude', 'Longitude']
combined_merged = ny_nhoods[column_names]
combined_merged = combined_merged.append(sf_nhoods[column_names], ignore_index=True)

# Add the k-means labels
combined_merged['Cluster Labels'] = kmeans.labels_

# merge combined_grouped with combined_merged to add latitude and longitude for each neighborhood.
combined_merged = combined_merged.join(combined_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

combined_merged.head()

Cluster labels: [ 0  5  7  5  3  0  0  0  5  5  0  7  1  0  7  0  2  0  0  5  3 13  0  0
  9  7 11  7 10  7  4  5  5  0  7  7  0  7  5  0  5  0  0  0  0  5  0  0
  8  7  8  7  5  7  5  0  0  8  0  0  0  5  6  5  5  0  8  0 12  0  0  7
  7  0  8  0  5]


Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Marble Hill (NY),40.876551,-73.91066,0,Pizza Place,Bar,Pharmacy,Supermarket,Shoe Store,Bank,Sandwich Place,Donut Shop,Coffee Shop,Athletics & Sports,Discount Store,Spanish Restaurant,Deli / Bodega,Kids Store,Grocery Store,Steakhouse,Café,Supplement Shop,Candy Store,Department Store
1,Chinatown (NY),40.715618,-73.994279,5,Chinese Restaurant,Bar,Bubble Tea Shop,Sandwich Place,Hotpot Restaurant,Japanese Restaurant,Noodle House,Vietnamese Restaurant,English Restaurant,Malay Restaurant,Greek Restaurant,Bookstore,Spa,Dumpling Restaurant,Cantonese Restaurant,Museum,Monument / Landmark,Szechuan Restaurant,Tea Room,Bike Shop
2,Washington Heights (NY),40.851903,-73.9369,7,Pizza Place,Café,Bakery,Latin American Restaurant,Bar,Park,Grocery Store,Tapas Restaurant,Mexican Restaurant,Deli / Bodega,Wine Shop,Chinese Restaurant,Gym,Coffee Shop,Caribbean Restaurant,Spa,Sandwich Place,Scenic Lookout,Lounge,Market
3,Inwood (NY),40.867684,-73.92121,5,Mexican Restaurant,Pizza Place,Café,Spanish Restaurant,Wine Bar,Lounge,Bar,Restaurant,Latin American Restaurant,Park,Bakery,Deli / Bodega,Basketball Court,Coffee Shop,American Restaurant,Bank,Pharmacy,Seafood Restaurant,Caribbean Restaurant,Wine Shop
4,Hamilton Heights (NY),40.823604,-73.949688,3,Mexican Restaurant,Bar,Coffee Shop,Café,Pizza Place,Bakery,Indian Restaurant,Deli / Bodega,Liquor Store,Sandwich Place,Yoga Studio,School,Chinese Restaurant,Caribbean Restaurant,Mediterranean Restaurant,Park,Monument / Landmark,Gastropub,Smoke Shop,Sushi Restaurant


<a href="#table of contents">Go back to top</a>

<a id='Results'></a>

# D. Results
Now that the data has been collected and partitioned into clusters, we can look at it in different ways.

### Visualize the resulting clusters
Display a map of the clusters in each city.

Due to the distance between Manhattan and San Francisco, there is no benefit to displaying a single map. Thus two maps will be displayed, one for each city.

In [41]:
# Create the maps for Manhattan and San Francisco, with markers for the clusters.

# Set a color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Manhattan map
ny_merged = combined_merged[combined_merged['Neighborhood'].str[-4:] == '(NY)']
ny_lat_long = get_geo_coords('Manhattan, New York')
ny_cluster_map = folium.Map(location=ny_lat_long, zoom_start=11)

# add markers to the map
for lat, lon, poi, cluster in zip(ny_merged['Latitude'],
                                  ny_merged['Longitude'],
                                  ny_merged['Neighborhood'],
                                  ny_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker([lat, lon],
                        radius=7,
                        popup=label,
                        color=rainbow[cluster-1],
                        fill=True,
                        fill_color=rainbow[cluster-1],
                        fill_opacity=0.7).add_to(ny_cluster_map)

# San Francisco map
sf_merged = combined_merged[combined_merged['Neighborhood'].str[-4:] == '(SF)']
sf_lat_long = get_geo_coords('San Francisco, California')
sf_cluster_map = folium.Map(location=sf_lat_long, zoom_start=12)

# add markers to the map
for lat, lon, poi, cluster in zip(sf_merged['Latitude'],
                                  sf_merged['Longitude'],
                                  sf_merged['Neighborhood'],
                                  sf_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker([lat, lon],
                        radius=7,
                        popup=label,
                        color=rainbow[cluster-1],
                        fill=True,
                        fill_color=rainbow[cluster-1],
                        fill_opacity=0.7).add_to(sf_cluster_map)

#### Display the Manhattan cluster map

In [42]:
print('Manhattan cluster map')
ny_cluster_map

Manhattan cluster map


#### Display the San Francisco cluster map

In [43]:
print('San Francisco cluster map')
sf_cluster_map

San Francisco cluster map


### Examine the clusters
First, let us look at the clusters that have at least one neighborhood from each of Manhattan and San Francisco.

  * Cluster 0 has 31 neighborhoods (14 from Manhattan, 17 from San Francisco)
  * Cluster 5 has 16 neighborhoods (8 from Manhattan, 8 from San Francisco)
  * Cluster 7 has 14 neighborhoods (9 from Manhattan, 5 from San Francisco)

These represent 61 of the 77 total neighborhoods, which is 79%.

From this data, we can infer that any neighborhood in a given cluster is similar to another neighborhood in the same cluster.

In [44]:
# Display the data for each cluster if it has at least one neighborhood from each city.
clusters_with_one_city_only = []

for cluster in range(kclusters):
    cluster_nhoods = combined_merged.loc[combined_merged['Cluster Labels'] == cluster,
                     combined_merged.columns[[0] + list(range(4, combined_merged.shape[1]))]]
    ny_count = cluster_nhoods['Neighborhood'].str.count('NY').sum()
    sf_count = cluster_nhoods['Neighborhood'].str.count('SF').sum()

    if ny_count == 0 or sf_count == 0:
        clusters_with_one_city_only.append(cluster)
    else:
        print('*' * 12)
        print(' Cluster {} has {} neighborhoods'.format(cluster, cluster_nhoods.shape[0]))
        print('*' * 12)
        display(cluster_nhoods)
        print()


************
 Cluster 0 has 31 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Marble Hill (NY),Pizza Place,Bar,Pharmacy,Supermarket,Shoe Store,Bank,Sandwich Place,Donut Shop,Coffee Shop,Athletics & Sports,Discount Store,Spanish Restaurant,Deli / Bodega,Kids Store,Grocery Store,Steakhouse,Café,Supplement Shop,Candy Store,Department Store
5,Manhattanville (NY),Deli / Bodega,Seafood Restaurant,Mexican Restaurant,Italian Restaurant,Bus Station,Bike Trail,Sushi Restaurant,Supermarket,Fried Chicken Joint,Museum,Music School,Burger Joint,Spanish Restaurant,Other Nightlife,Food & Drink Shop,Bar,Falafel Restaurant,Chinese Restaurant,Climbing Gym,Park
6,Central Harlem (NY),African Restaurant,Gym,French Restaurant,Chinese Restaurant,American Restaurant,Cosmetics Shop,Seafood Restaurant,Caribbean Restaurant,Beer Bar,Park,Spa,Bar,Market,Bagel Shop,Bookstore,Ethiopian Restaurant,BBQ Joint,Boutique,Dessert Shop,Tapas Restaurant
7,East Harlem (NY),Mexican Restaurant,Bakery,Thai Restaurant,Deli / Bodega,Gym,Sandwich Place,Latin American Restaurant,Pharmacy,Art Museum,Chinese Restaurant,Pizza Place,Park,Restaurant,Fast Food Restaurant,Liquor Store,Steakhouse,Spanish Restaurant,Spa,Burger Joint,Café
10,Lenox Hill (NY),Gym,Italian Restaurant,Coffee Shop,Sushi Restaurant,Pizza Place,Mexican Restaurant,Deli / Bodega,Bar,Wine Shop,Burger Joint,Cosmetics Shop,Café,Chinese Restaurant,Turkish Restaurant,Thai Restaurant,Bakery,Gift Shop,Cycle Studio,Salon / Barbershop,Sandwich Place
13,Lincoln Square (NY),Gym,Italian Restaurant,French Restaurant,Jazz Club,Bakery,Theater,Plaza,Opera House,Grocery Store,Yoga Studio,Music Venue,Indie Movie Theater,Hotel,Performing Arts Venue,Mexican Restaurant,Park,Mediterranean Restaurant,Coffee Shop,Spa,American Restaurant
15,Midtown (NY),Hotel,Coffee Shop,Clothing Store,Steakhouse,Bar,Theater,Sporting Goods Shop,Food Truck,Bakery,Bookstore,Spa,Park,Gym,Sushi Restaurant,French Restaurant,Tailor Shop,Greek Restaurant,Sandwich Place,Massage Studio,Women's Store
17,Chelsea (NY),Italian Restaurant,Coffee Shop,Nightclub,Ice Cream Shop,Hotel,Seafood Restaurant,Bakery,American Restaurant,Bar,Theater,Art Gallery,Gift Shop,Mexican Restaurant,Café,Tapas Restaurant,Gym,Bookstore,Men's Store,Health & Beauty Service,Asian Restaurant
18,Greenwich Village (NY),Italian Restaurant,Clothing Store,Cosmetics Shop,French Restaurant,Art Gallery,Sushi Restaurant,Juice Bar,Café,Bar,Boutique,Mexican Restaurant,Shoe Store,Jewelry Store,Sandwich Place,Gourmet Shop,Seafood Restaurant,Chinese Restaurant,Accessories Store,Electronics Store,Optical Shop
22,Little Italy (NY),Italian Restaurant,Bar,Bakery,Café,Coffee Shop,Ice Cream Shop,Chinese Restaurant,Sandwich Place,Grocery Store,Optical Shop,Massage Studio,Bubble Tea Shop,Wine Bar,Gourmet Shop,Malay Restaurant,American Restaurant,Bike Shop,Sushi Restaurant,Design Studio,Market



************
 Cluster 5 has 16 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
1,Chinatown (NY),Chinese Restaurant,Bar,Bubble Tea Shop,Sandwich Place,Hotpot Restaurant,Japanese Restaurant,Noodle House,Vietnamese Restaurant,English Restaurant,Malay Restaurant,Greek Restaurant,Bookstore,Spa,Dumpling Restaurant,Cantonese Restaurant,Museum,Monument / Landmark,Szechuan Restaurant,Tea Room,Bike Shop
3,Inwood (NY),Mexican Restaurant,Pizza Place,Café,Spanish Restaurant,Wine Bar,Lounge,Bar,Restaurant,Latin American Restaurant,Park,Bakery,Deli / Bodega,Basketball Court,Coffee Shop,American Restaurant,Bank,Pharmacy,Seafood Restaurant,Caribbean Restaurant,Wine Shop
8,Upper East Side (NY),Italian Restaurant,Art Gallery,Boutique,Coffee Shop,Juice Bar,Hotel,French Restaurant,Gym,Bakery,American Restaurant,Shoe Store,Sushi Restaurant,Gourmet Shop,Bar,Mexican Restaurant,Cosmetics Shop,Wine Shop,Spa,Burger Joint,Seafood Restaurant
9,Yorkville (NY),Gym,Coffee Shop,Italian Restaurant,Bar,Wine Shop,Sushi Restaurant,Mexican Restaurant,Deli / Bodega,Pizza Place,Pharmacy,Nail Salon,Bagel Shop,Wine Bar,Park,Japanese Restaurant,Thai Restaurant,Ice Cream Shop,Rental Car Location,Cosmetics Shop,Pool
19,East Village (NY),Bar,Ice Cream Shop,Wine Bar,Mexican Restaurant,Vegetarian / Vegan Restaurant,Japanese Restaurant,Italian Restaurant,Chinese Restaurant,Pizza Place,Seafood Restaurant,Arepa Restaurant,Korean Restaurant,American Restaurant,Coffee Shop,Bagel Shop,Wine Shop,Tapas Restaurant,Vietnamese Restaurant,Café,Spa
31,Noho (NY),Bar,Italian Restaurant,Pizza Place,Mexican Restaurant,French Restaurant,Boutique,Gift Shop,Hotel,Coffee Shop,Sushi Restaurant,Bookstore,Deli / Bodega,Grocery Store,Wine Shop,Rock Club,Sandwich Place,Candy Store,Furniture / Home Store,Tattoo Parlor,Comedy Club
32,Civic Center (NY),Gym,Bar,Bakery,Italian Restaurant,Spa,Yoga Studio,Sporting Goods Shop,Sandwich Place,Sushi Restaurant,Park,Coffee Shop,American Restaurant,Falafel Restaurant,Wine Shop,Dance Studio,Art Gallery,Martial Arts Dojo,French Restaurant,Medical Center,Bookstore
38,Flatiron (NY),Gym,American Restaurant,Italian Restaurant,Yoga Studio,Women's Store,New American Restaurant,Cycle Studio,Cosmetics Shop,Bakery,Japanese Restaurant,Vegetarian / Vegan Restaurant,Clothing Store,Salon / Barbershop,Sporting Goods Shop,Mediterranean Restaurant,Burger Joint,Coffee Shop,Lingerie Store,Furniture / Home Store,Mexican Restaurant
40,Seacliff (SF),Trail,Sculpture Garden,Scenic Lookout,Park,Golf Course,Pharmacy,Beach,Café,Art Museum,Monument / Landmark,Cafeteria,Bus Station,Outdoor Sculpture,Farmers Market,Elementary School,Fast Food Restaurant,Filipino Restaurant,Fish Market,Fishing Spot,Falafel Restaurant
45,Diamond Heights (SF),Trail,Salon / Barbershop,Playground,Shipping Store,Farmers Market,Duty-free Shop,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Zoo,Dumpling Restaurant,Filipino Restaurant,Fish Market,Fishing Spot,Flea Market,Flower Shop



************
 Cluster 7 has 14 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
2,Washington Heights (NY),Pizza Place,Café,Bakery,Latin American Restaurant,Bar,Park,Grocery Store,Tapas Restaurant,Mexican Restaurant,Deli / Bodega,Wine Shop,Chinese Restaurant,Gym,Coffee Shop,Caribbean Restaurant,Spa,Sandwich Place,Scenic Lookout,Lounge,Market
11,Roosevelt Island (NY),Park,Gym,Bus Station,Sandwich Place,Dry Cleaner,Pharmacy,Train,Train Station,Liquor Store,School,Coffee Shop,Farmers Market,Playground,Bubble Tea Shop,Greek Restaurant,Waterfront,Outdoors & Recreation,Supermarket,Deli / Bodega,Indie Theater
14,Clinton (NY),Theater,Restaurant,Hotel,Bar,Spa,Gym,Breakfast Spot,Comedy Club,Café,Park,Performing Arts Venue,Bank,Dog Run,Peruvian Restaurant,Lounge,Movie Theater,Sandwich Place,Food Court,Sporting Goods Shop,Steakhouse
25,Manhattan Valley (NY),Coffee Shop,Deli / Bodega,Pizza Place,Indian Restaurant,Mexican Restaurant,Chinese Restaurant,Thai Restaurant,Grocery Store,Bar,French Restaurant,Latin American Restaurant,Café,Italian Restaurant,Gastropub,American Restaurant,Park,Ice Cream Shop,Playground,Wine Shop,Sports Bar
27,Gramercy (NY),Bar,Mexican Restaurant,Wine Shop,Bagel Shop,Italian Restaurant,Coffee Shop,Thrift / Vintage Store,Restaurant,Pizza Place,Spa,Grocery Store,Diner,Playground,Bank,Hotel,Bike Rental / Bike Share,Supplement Shop,Bed & Breakfast,Beer Bar,Sandwich Place
29,Financial District (NY),Coffee Shop,Gym,Bar,Hotel,Food Truck,Steakhouse,Juice Bar,Pizza Place,Wine Shop,Monument / Landmark,Italian Restaurant,Park,Jewelry Store,Falafel Restaurant,Spa,Chinese Restaurant,Vegetarian / Vegan Restaurant,Japanese Restaurant,Mexican Restaurant,American Restaurant
34,Sutton Place (NY),Gym,Italian Restaurant,Furniture / Home Store,Indian Restaurant,Juice Bar,Dessert Shop,Mexican Restaurant,Pizza Place,American Restaurant,Bar,Grocery Store,French Restaurant,Department Store,Bakery,Coffee Shop,Sushi Restaurant,Yoga Studio,Boutique,Cupcake Shop,Ice Cream Shop
35,Turtle Bay (NY),Café,Hotel,Italian Restaurant,Noodle House,Bar,Sushi Restaurant,Turkish Restaurant,Karaoke Bar,Coffee Shop,Deli / Bodega,Gift Shop,Tennis Court,Sandwich Place,Grocery Store,Park,Bank,Donut Shop,Steakhouse,Lounge,Liquor Store
37,Stuyvesant Town (NY),Park,Playground,Baseball Field,Farmers Market,Zoo,Filipino Restaurant,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Fish Market,Electronics Store,Fishing Spot,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand
49,Treasure Island/YBI (SF),Food Truck,Museum,Island,Brewery,Harbor / Marina,Bagel Shop,Breakfast Spot,Grocery Store,Park,Athletics & Sports,Baseball Field,Moving Target,Flea Market,Bus Station,American Restaurant,Rugby Pitch,Gym,Diner,Fish Market,Filipino Restaurant





If you look at the above cluster information, you can search for a particular neighborhood, such as 'Flatiron' in Manhattan in cluster 5. Then you can make a statement like, "If you like the venues in 'Flatiron of Manhattan', you will also probably like the venues of 'Financial District of San Francisco'.

Note that the above display shows only the top 20 venues for each neighborhood, but the clusters were created based on all venues in all neighborhoods.

### Now we can look at the other clusters, which have neighborhoods in either Manhattan or in San Francisco, but not in both
The cities that appear in these clusters are:

  * Manhattan
    * Battery Park City
    * Carnegie Hill
    * Hamilton Heights
    * Lower East Side
    * Morningside Heights
    * Murray Hill
    * Tribeca
    * Upper West Side
    * West Village


  * San Francisco
    * Noe Valley
    * Outer Sunset
    * Pacific Heights
    * Potrero Hill
    * Outer Richmond
    * Twin Peaks
    * West of Twin Peaks

We can infer that none of these neighborhoods from one city is sufficiently similar to another neighborhood in the other city. However, see the 'Discussion' and 'Appendix' sections below for an alternative way to find some similarities between neighborhoods.

In [45]:
# clusters_with_one_city_only is a list created in the preceding [code] cell.
for cluster in clusters_with_one_city_only:
    cluster_nhoods = combined_merged.loc[combined_merged['Cluster Labels'] == cluster,
                     combined_merged.columns[[0] + list(range(4, combined_merged.shape[1]))]]
    print('*' * 12)
    print(' Cluster {} has {} neighborhoods'.format(cluster, cluster_nhoods.shape[0]))
    print('*' * 12)
    display(cluster_nhoods)
    print()

************
 Cluster 1 has 1 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
12,Upper West Side (NY),Bar,Italian Restaurant,Gym,Wine Bar,Bakery,Bookstore,Coffee Shop,American Restaurant,Ice Cream Shop,Indian Restaurant,Park,Cosmetics Shop,Sushi Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Breakfast Spot,Burger Joint,Dessert Shop,Seafood Restaurant,Monument / Landmark



************
 Cluster 2 has 1 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
16,Murray Hill (NY),Hotel,Bar,Burger Joint,Sandwich Place,Bagel Shop,Italian Restaurant,Japanese Restaurant,Coffee Shop,Tea Room,Lounge,Salon / Barbershop,Scandinavian Restaurant,Mediterranean Restaurant,Seafood Restaurant,Mexican Restaurant,Gastropub,Shanghai Restaurant,Sushi Restaurant,French Restaurant,Museum



************
 Cluster 3 has 2 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
4,Hamilton Heights (NY),Mexican Restaurant,Bar,Coffee Shop,Café,Pizza Place,Bakery,Indian Restaurant,Deli / Bodega,Liquor Store,Sandwich Place,Yoga Studio,School,Chinese Restaurant,Caribbean Restaurant,Mediterranean Restaurant,Park,Monument / Landmark,Gastropub,Smoke Shop,Sushi Restaurant
20,Lower East Side (NY),Bar,Chinese Restaurant,Café,Shoe Store,Pizza Place,Ramen Restaurant,Japanese Restaurant,Coffee Shop,Art Gallery,Pool,Clothing Store,Performing Arts Venue,Park,Music Venue,Sandwich Place,Tailor Shop,Mediterranean Restaurant,Track,Bike Rental / Bike Share,Mexican Restaurant



************
 Cluster 4 has 1 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
30,Carnegie Hill (NY),Pizza Place,Bar,Cosmetics Shop,Coffee Shop,Gym,Spa,Café,Bookstore,French Restaurant,Wine Shop,Yoga Studio,Italian Restaurant,Sports Bar,Bakery,Dance Studio,Ramen Restaurant,Community Center,Restaurant,Salon / Barbershop,Chinese Restaurant



************
 Cluster 6 has 1 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
62,Pacific Heights (SF),Cosmetics Shop,Italian Restaurant,Gym,Bakery,Spa,Sandwich Place,Salon / Barbershop,Sushi Restaurant,Mexican Restaurant,Juice Bar,Yoga Studio,Thai Restaurant,Café,Grocery Store,Arts & Crafts Store,Bar,Boutique,Vietnamese Restaurant,Coffee Shop,Ice Cream Shop



************
 Cluster 8 has 5 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
48,Noe Valley (SF),Italian Restaurant,Park,Mexican Restaurant,Video Store,Gift Shop,Café,Dog Run,Coffee Shop,Gym,Library,Playground,Sushi Restaurant,Optical Shop,Burger Joint,Spanish Restaurant,Bar,Indian Restaurant,American Restaurant,Yoga Studio,Deli / Bodega
50,Outer Richmond (SF),Bus Station,Chinese Restaurant,Liquor Store,Surf Spot,Board Shop,Deli / Bodega,Grocery Store,Café,Park,Zoo,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish Market,Filipino Restaurant,English Restaurant,Fishing Spot,Flea Market,Flower Shop,Food
57,West of Twin Peaks (SF),Trail,Monument / Landmark,Park,Mexican Restaurant,Convenience Store,Tree,Pharmacy,Sandwich Place,Burger Joint,Bus Station,Dive Bar,Playground,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Farmers Market,Electronics Store,Fast Food Restaurant,Filipino Restaurant
66,Potrero Hill (SF),Gym,Park,Café,Bar,Brewery,Bus Station,Coffee Shop,Art Gallery,Sandwich Place,Bakery,Dessert Shop,Train Station,Dog Run,Restaurant,Gift Shop,Yoga Studio,Rental Car Location,Hardware Store,Climbing Gym,Hawaiian Restaurant
74,Outer Sunset (SF),Trail,Café,Coffee Shop,Art Gallery,Mexican Restaurant,Grocery Store,Bar,Chinese Restaurant,Bakery,Market,Train Station,Pet Store,Liquor Store,Board Shop,Miscellaneous Shop,Pizza Place,Dive Bar,Convenience Store,Korean Restaurant,Surf Spot



************
 Cluster 9 has 1 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
24,West Village (NY),Italian Restaurant,Cosmetics Shop,Gastropub,Japanese Restaurant,Jazz Club,New American Restaurant,Clothing Store,Coffee Shop,Wine Bar,Bar,French Restaurant,Bakery,Gym,Men's Store,Boutique,Mexican Restaurant,Gourmet Shop,Park,Gay Bar,Chinese Restaurant



************
 Cluster 10 has 1 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
28,Battery Park City (NY),Park,Coffee Shop,American Restaurant,Mexican Restaurant,Department Store,Cupcake Shop,Food Truck,Hotel,Sandwich Place,BBQ Joint,Gym,Garden,Gastropub,Ice Cream Shop,Men's Store,Monument / Landmark,Memorial Site,Mediterranean Restaurant,Sushi Restaurant,Food Court



************
 Cluster 11 has 1 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
26,Morningside Heights (NY),Coffee Shop,Sandwich Place,Park,Bookstore,American Restaurant,Deli / Bodega,Burger Joint,Food Truck,Tennis Court,Dog Run,Grocery Store,Greek Restaurant,Bank,Bar,Supermarket,Chinese Restaurant,Outdoor Sculpture,Café,Ethiopian Restaurant,Farmers Market



************
 Cluster 12 has 1 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
68,Twin Peaks (SF),Scenic Lookout,Trail,Lake,Zoo,Fast Food Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Farmers Market,Fish Market,Filipino Restaurant,Fishing Spot,Flea Market,Flower Shop,Food,Food & Drink Shop



************
 Cluster 13 has 1 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
21,Tribeca (NY),American Restaurant,Greek Restaurant,Bar,Boutique,Spa,Café,Gym,Chinese Restaurant,Steakhouse,Men's Store,Coffee Shop,Hotel,Wine Bar,Wine Shop,Skate Park,Furniture / Home Store,Sushi Restaurant,Basketball Court,Bakery,Mini Golf





### Examine the venue frequencies, in descending order, of each cluster
This gives us some insight into how <i>k</i>-means partitioned the neighborhoods into the resulting clusters. You can see that clusters 0, 3, and 8 all have multiple common venues, while the other neighborhoods have only one or two "common" venues.

In [47]:
# Display frequency of venues (just the top 20), in descending order, for each cluster
for cluster in range(kclusters):
    print('*' * 12)
    print(' Cluster {}'.format(cluster))
    print('*' * 12)
    venues = combined_merged.loc[combined_merged['Cluster Labels'] == cluster,
                                 combined_merged.columns[[0] + list(range(4, combined_merged.shape[1]))]]

    venues.columns = ['Neighborhood'] + [str(n) for n in range(1, num_top_venues + 1)]
    freqs = {}

    for i in range(1, num_top_venues + 1):
        counts = venues[str(i)].value_counts()
        for j in range(len(counts)):
            key = counts.index[j]
            value = counts.values[j]
            if key in freqs:
                freqs[key] += value
            else:
                freqs[key] = value

    sorted_freqs = sorted(freqs.items(), reverse=True, key=operator.itemgetter(1))

    # Show the top 20 items in each cluster, to keep the output to a manageable size.
    # The 'if' and 'break' below can be removed if you want to see all the items.
    for n, item in enumerate(sorted_freqs):
        print(item)
        if n > 20:
            break
    print()

************
 Cluster 0
************
('Park', 19)
('Bar', 16)
('Mexican Restaurant', 15)
('Café', 15)
('Coffee Shop', 15)
('Gym', 14)
('Bakery', 13)
('Italian Restaurant', 11)
('Grocery Store', 11)
('American Restaurant', 9)
('Art Gallery', 9)
('Farmers Market', 9)
('Ethiopian Restaurant', 9)
('Deli / Bodega', 8)
('Chinese Restaurant', 8)
('English Restaurant', 8)
('Burger Joint', 8)
('Falafel Restaurant', 8)
('Fast Food Restaurant', 8)
('Pizza Place', 7)
('Seafood Restaurant', 7)
('Trail', 7)

************
 Cluster 1
************
('Bar', 1)
('Italian Restaurant', 1)
('Gym', 1)
('Wine Bar', 1)
('Bakery', 1)
('Bookstore', 1)
('Coffee Shop', 1)
('American Restaurant', 1)
('Ice Cream Shop', 1)
('Indian Restaurant', 1)
('Park', 1)
('Cosmetics Shop', 1)
('Sushi Restaurant', 1)
('Thai Restaurant', 1)
('Vegetarian / Vegan Restaurant', 1)
('Breakfast Spot', 1)
('Burger Joint', 1)
('Dessert Shop', 1)
('Seafood Restaurant', 1)
('Monument / Landmark', 1)

************
 Cluster 2
************
('Ho

<a href="#table of contents">Go back to top</a>

<a id='Discussion'></a>

# E. Discussion

The results suggest that overall, Manhattan and San Francisco have much in common in terms of the venues available in each, even though a relatively small percentage of the neighborhoods remain unique (do not share sufficiently similar types of venues).

Even in the three major clusters, there is much in common. In each of those clusters there are three or more of these kinds of venues:

  * Bar
  * Café
  * Coffee Shop
  * Mexican Restaurant
  * Park

For the 16 neighborhoods that do not seem sufficiently similar to other neighborhoods, a "compromise" can be reached: Re-run the <i>k</i>-means analysis with an n_clusters value of 2. See the appendix for the data. The analysis yields one cluster with 67 neighborhoods and a another cluster with 10 neighborhoods. While the similarities between neighborhoods are not as granular, it does provide a small degree of similarity that could be used to make statements like, "If you like neighborhood <i>A</i>, you might like neighborhood <i>B</i>, for the 16 neighborhoods that did not otherwise fit into the primary analysis in this notebook.

This study is focused only on venues, and only on the venues reported by Foursquare. My experience is that the number of venues reported by the Foursquare API varied throughout the day. It is possible that there are other data sources that provide more comprehensive and consistent information about venues in a neighborhood, but this assignment required the use Foursquare. Nonetheless, despite some of the differences in the data returned by Foursquare with each call to the API, the overall results are rather consistent.

Note: This study used Foursquare's "explore" endpoint. I also tested with the "search" endpoint, and that produced very similar results.

<a href="#table of contents">Go back to top</a>

<a id='Conclusion'></a>

# F. Conclusion

This study shows that we can find enough similarities between the neighborhoods of Manhattan and the neighborhoods of San Francisco, so we can say "If you like neighborhood <i>A</i>, you might like neighborhood <i>B</i>." An example is given above, where the neighborhood clusters are analyzed in the 'Results' section of this notebook.

The scope of this study was to find similarity based on the available venues in each neighborhood. When making a more general comparison of neighborhoods, other factors should also be considered, such as population demographics, cost of living, "character" (urban or suburban, residential or mixed-use), and so on.

It is worth pointing out that Manhattan has a much higher population and population density than San Francisco. According to Wikipedia articles:

    https://en.wikipedia.org/wiki/Manhattan  
    https://en.wikipedia.org/wiki/San_Francisco

Manhattan's land area is 59.1 square kilometers.
San Francisco's land area is 121.46 square kilometers, twice as large as Manhattan.

Manhattan's population, as of 2017, is 1,664,727, with a population density of 28,154 per square kilometer.
San Francisco's population, as of 2017, is 884,363, with a population density of 7,282 per square kilometer, about 25% that of Manhattan.

<a href="#table of contents">Go back to top</a>

<a id='Appendix'></a>

# G. Appendix: <i>k</i>-means analysis with n_clusters = 2

In [48]:
app_kclusters = 2

# Get rid of 'Neighborhood' because it is not a venue category.
app_combined_grouped_clustering = combined_grouped.drop('Neighborhood', 1)

# Now run the k-means clustering.
app_kmeans = KMeans(n_clusters=app_kclusters, random_state=0).fit(app_combined_grouped_clustering)

# Look at the cluster labels generated for each row in the dataframe.
print('Cluster labels: {}'.format(app_kmeans.labels_))

# Combine the ny_nhoods and sf_nhoods dataframes into a combined dataframe.
app_column_names = ['Neighborhood', 'Latitude', 'Longitude']
app_combined_merged = ny_nhoods[app_column_names]
app_combined_merged = app_combined_merged.append(sf_nhoods[column_names], ignore_index=True)

# Add the k-means labels
app_combined_merged['Cluster Labels'] = app_kmeans.labels_

# merge combined_grouped with combined_merged to add latitude and longitude for each neighborhood.
app_combined_merged = app_combined_merged.join(combined_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

app_combined_merged.head()

Cluster labels: [0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 1 0 0 0 0 0
 0 0 0]


Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Marble Hill (NY),40.876551,-73.91066,0,Pizza Place,Bar,Pharmacy,Supermarket,Shoe Store,Bank,Sandwich Place,Donut Shop,Coffee Shop,Athletics & Sports,Discount Store,Spanish Restaurant,Deli / Bodega,Kids Store,Grocery Store,Steakhouse,Café,Supplement Shop,Candy Store,Department Store
1,Chinatown (NY),40.715618,-73.994279,0,Chinese Restaurant,Bar,Bubble Tea Shop,Sandwich Place,Hotpot Restaurant,Japanese Restaurant,Noodle House,Vietnamese Restaurant,English Restaurant,Malay Restaurant,Greek Restaurant,Bookstore,Spa,Dumpling Restaurant,Cantonese Restaurant,Museum,Monument / Landmark,Szechuan Restaurant,Tea Room,Bike Shop
2,Washington Heights (NY),40.851903,-73.9369,0,Pizza Place,Café,Bakery,Latin American Restaurant,Bar,Park,Grocery Store,Tapas Restaurant,Mexican Restaurant,Deli / Bodega,Wine Shop,Chinese Restaurant,Gym,Coffee Shop,Caribbean Restaurant,Spa,Sandwich Place,Scenic Lookout,Lounge,Market
3,Inwood (NY),40.867684,-73.92121,0,Mexican Restaurant,Pizza Place,Café,Spanish Restaurant,Wine Bar,Lounge,Bar,Restaurant,Latin American Restaurant,Park,Bakery,Deli / Bodega,Basketball Court,Coffee Shop,American Restaurant,Bank,Pharmacy,Seafood Restaurant,Caribbean Restaurant,Wine Shop
4,Hamilton Heights (NY),40.823604,-73.949688,1,Mexican Restaurant,Bar,Coffee Shop,Café,Pizza Place,Bakery,Indian Restaurant,Deli / Bodega,Liquor Store,Sandwich Place,Yoga Studio,School,Chinese Restaurant,Caribbean Restaurant,Mediterranean Restaurant,Park,Monument / Landmark,Gastropub,Smoke Shop,Sushi Restaurant


#### There are two clusters: one with 67 neighborhoods, the other with 10 neighborhoods

In [49]:
# Display the data for each cluster.
for cluster in range(app_kclusters):
    cluster_nhoods = app_combined_merged.loc[app_combined_merged['Cluster Labels'] == cluster,
                     app_combined_merged.columns[[0] + list(range(4, app_combined_merged.shape[1]))]]

    print('*' * 12)
    print(' Cluster {} has {} neighborhoods'.format(cluster, cluster_nhoods.shape[0]))
    print('*' * 12)
    display(cluster_nhoods)
    print()

************
 Cluster 0 has 67 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Marble Hill (NY),Pizza Place,Bar,Pharmacy,Supermarket,Shoe Store,Bank,Sandwich Place,Donut Shop,Coffee Shop,Athletics & Sports,Discount Store,Spanish Restaurant,Deli / Bodega,Kids Store,Grocery Store,Steakhouse,Café,Supplement Shop,Candy Store,Department Store
1,Chinatown (NY),Chinese Restaurant,Bar,Bubble Tea Shop,Sandwich Place,Hotpot Restaurant,Japanese Restaurant,Noodle House,Vietnamese Restaurant,English Restaurant,Malay Restaurant,Greek Restaurant,Bookstore,Spa,Dumpling Restaurant,Cantonese Restaurant,Museum,Monument / Landmark,Szechuan Restaurant,Tea Room,Bike Shop
2,Washington Heights (NY),Pizza Place,Café,Bakery,Latin American Restaurant,Bar,Park,Grocery Store,Tapas Restaurant,Mexican Restaurant,Deli / Bodega,Wine Shop,Chinese Restaurant,Gym,Coffee Shop,Caribbean Restaurant,Spa,Sandwich Place,Scenic Lookout,Lounge,Market
3,Inwood (NY),Mexican Restaurant,Pizza Place,Café,Spanish Restaurant,Wine Bar,Lounge,Bar,Restaurant,Latin American Restaurant,Park,Bakery,Deli / Bodega,Basketball Court,Coffee Shop,American Restaurant,Bank,Pharmacy,Seafood Restaurant,Caribbean Restaurant,Wine Shop
5,Manhattanville (NY),Deli / Bodega,Seafood Restaurant,Mexican Restaurant,Italian Restaurant,Bus Station,Bike Trail,Sushi Restaurant,Supermarket,Fried Chicken Joint,Museum,Music School,Burger Joint,Spanish Restaurant,Other Nightlife,Food & Drink Shop,Bar,Falafel Restaurant,Chinese Restaurant,Climbing Gym,Park
6,Central Harlem (NY),African Restaurant,Gym,French Restaurant,Chinese Restaurant,American Restaurant,Cosmetics Shop,Seafood Restaurant,Caribbean Restaurant,Beer Bar,Park,Spa,Bar,Market,Bagel Shop,Bookstore,Ethiopian Restaurant,BBQ Joint,Boutique,Dessert Shop,Tapas Restaurant
7,East Harlem (NY),Mexican Restaurant,Bakery,Thai Restaurant,Deli / Bodega,Gym,Sandwich Place,Latin American Restaurant,Pharmacy,Art Museum,Chinese Restaurant,Pizza Place,Park,Restaurant,Fast Food Restaurant,Liquor Store,Steakhouse,Spanish Restaurant,Spa,Burger Joint,Café
8,Upper East Side (NY),Italian Restaurant,Art Gallery,Boutique,Coffee Shop,Juice Bar,Hotel,French Restaurant,Gym,Bakery,American Restaurant,Shoe Store,Sushi Restaurant,Gourmet Shop,Bar,Mexican Restaurant,Cosmetics Shop,Wine Shop,Spa,Burger Joint,Seafood Restaurant
9,Yorkville (NY),Gym,Coffee Shop,Italian Restaurant,Bar,Wine Shop,Sushi Restaurant,Mexican Restaurant,Deli / Bodega,Pizza Place,Pharmacy,Nail Salon,Bagel Shop,Wine Bar,Park,Japanese Restaurant,Thai Restaurant,Ice Cream Shop,Rental Car Location,Cosmetics Shop,Pool
10,Lenox Hill (NY),Gym,Italian Restaurant,Coffee Shop,Sushi Restaurant,Pizza Place,Mexican Restaurant,Deli / Bodega,Bar,Wine Shop,Burger Joint,Cosmetics Shop,Café,Chinese Restaurant,Turkish Restaurant,Thai Restaurant,Bakery,Gift Shop,Cycle Studio,Salon / Barbershop,Sandwich Place



************
 Cluster 1 has 10 neighborhoods
************


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
4,Hamilton Heights (NY),Mexican Restaurant,Bar,Coffee Shop,Café,Pizza Place,Bakery,Indian Restaurant,Deli / Bodega,Liquor Store,Sandwich Place,Yoga Studio,School,Chinese Restaurant,Caribbean Restaurant,Mediterranean Restaurant,Park,Monument / Landmark,Gastropub,Smoke Shop,Sushi Restaurant
12,Upper West Side (NY),Bar,Italian Restaurant,Gym,Wine Bar,Bakery,Bookstore,Coffee Shop,American Restaurant,Ice Cream Shop,Indian Restaurant,Park,Cosmetics Shop,Sushi Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Breakfast Spot,Burger Joint,Dessert Shop,Seafood Restaurant,Monument / Landmark
16,Murray Hill (NY),Hotel,Bar,Burger Joint,Sandwich Place,Bagel Shop,Italian Restaurant,Japanese Restaurant,Coffee Shop,Tea Room,Lounge,Salon / Barbershop,Scandinavian Restaurant,Mediterranean Restaurant,Seafood Restaurant,Mexican Restaurant,Gastropub,Shanghai Restaurant,Sushi Restaurant,French Restaurant,Museum
20,Lower East Side (NY),Bar,Chinese Restaurant,Café,Shoe Store,Pizza Place,Ramen Restaurant,Japanese Restaurant,Coffee Shop,Art Gallery,Pool,Clothing Store,Performing Arts Venue,Park,Music Venue,Sandwich Place,Tailor Shop,Mediterranean Restaurant,Track,Bike Rental / Bike Share,Mexican Restaurant
21,Tribeca (NY),American Restaurant,Greek Restaurant,Bar,Boutique,Spa,Café,Gym,Chinese Restaurant,Steakhouse,Men's Store,Coffee Shop,Hotel,Wine Bar,Wine Shop,Skate Park,Furniture / Home Store,Sushi Restaurant,Basketball Court,Bakery,Mini Golf
28,Battery Park City (NY),Park,Coffee Shop,American Restaurant,Mexican Restaurant,Department Store,Cupcake Shop,Food Truck,Hotel,Sandwich Place,BBQ Joint,Gym,Garden,Gastropub,Ice Cream Shop,Men's Store,Monument / Landmark,Memorial Site,Mediterranean Restaurant,Sushi Restaurant,Food Court
59,Marina (SF),Harbor / Marina,Gym,Coffee Shop,Café,Vegetarian / Vegan Restaurant,Grocery Store,Martial Arts Dojo,Bike Rental / Bike Share,Athletics & Sports,Lighthouse,Bank,Monument / Landmark,Park,Food Stand,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Food Court,Farmers Market,Food & Drink Shop
62,Pacific Heights (SF),Cosmetics Shop,Italian Restaurant,Gym,Bakery,Spa,Sandwich Place,Salon / Barbershop,Sushi Restaurant,Mexican Restaurant,Juice Bar,Yoga Studio,Thai Restaurant,Café,Grocery Store,Arts & Crafts Store,Bar,Boutique,Vietnamese Restaurant,Coffee Shop,Ice Cream Shop
66,Potrero Hill (SF),Gym,Park,Café,Bar,Brewery,Bus Station,Coffee Shop,Art Gallery,Sandwich Place,Bakery,Dessert Shop,Train Station,Dog Run,Restaurant,Gift Shop,Yoga Studio,Rental Car Location,Hardware Store,Climbing Gym,Hawaiian Restaurant
68,Twin Peaks (SF),Scenic Lookout,Trail,Lake,Zoo,Fast Food Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Falafel Restaurant,Farmers Market,Fish Market,Filipino Restaurant,Fishing Spot,Flea Market,Flower Shop,Food,Food & Drink Shop





<a href="#table of contents">Go back to top</a>