## 1. Install libraries and load data

#### Install libraries for the project

In [2]:
# Install libraries

!pip install geopy

import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print('Libraries imported.')

Libraries imported.


#### Preparation - load 3 datasets and pre-processing: 
#### San Francisco zipcode and neighborhood, 
http://www.healthysf.org/bdi/outcomes/zipmap.htm
#### Zipcode and coordinate database
https://www.unitedstateszipcodes.org/zip-code-database/
#### Chinese population in San Francisco by zipcode
http://zipatlas.com/us/ca/san-francisco/zip-code-comparison/percentage-chinese-population.htm


In [4]:
SF_zipcode = pd.read_csv("SF zipcode.csv")
SF_zipcode.head()

Unnamed: 0,Zip Code,Neighborhood
0,94102,Hayes Valley/Tenderloin/North of Market
1,94103,South of Market
2,94107,Potrero Hill
3,94108,Chinatown
4,94109,Polk/Russian Hill (Nob Hill)


In [5]:
Zipcode=pd.read_csv("zip_code_database.csv")
Zipcode.head()

Unnamed: 0,zip,type,decommissioned,primary_city,acceptable_cities,unacceptable_cities,state,county,timezone,area_codes,world_region,country,latitude,longitude,irs_estimated_population_2015
0,501,UNIQUE,0,Holtsville,,I R S Service Center,NY,Suffolk County,America/New_York,631,,US,40.81,-73.04,562
1,544,UNIQUE,0,Holtsville,,Irs Service Center,NY,Suffolk County,America/New_York,631,,US,40.81,-73.04,0
2,601,STANDARD,0,Adjuntas,,"Colinas Del Gigante, Jard De Adjuntas, Urb San...",PR,Adjuntas Municipio,America/Puerto_Rico,787939,,US,18.16,-66.72,0
3,602,STANDARD,0,Aguada,,"Alts De Aguada, Bo Guaniquilla, Comunidad Las ...",PR,Aguada Municipio,America/Puerto_Rico,787939,,US,18.38,-67.18,0
4,603,STANDARD,0,Aguadilla,Ramey,"Bda Caban, Bda Esteves, Bo Borinquen, Bo Ceiba...",PR,Aguadilla Municipio,America/Puerto_Rico,787,,US,18.43,-67.15,0


In [6]:
SF_Chinese=pd.read_csv("Chinese percentage of SF.csv")
SF_Chinese.head()

Unnamed: 0.1,Unnamed: 0,Zip Code,Location,City,Population,% Chinese,National Rank
0,1,94104,"37.791222, -122.402241","San Francisco, California",374,58.28%,#1
1,2,94133,"37.802071, -122.411004","San Francisco, California",26827,51.31%,#3
2,3,94108,"37.791998, -122.408653","San Francisco, California",13716,50.75%,#4
3,4,94116,"37.744410, -122.486764","San Francisco, California",42958,39.50%,#10
4,5,94122,"37.760412, -122.484966","San Francisco, California",55492,34.10%,#13


In [16]:
# Preprocessing Zipcode database 
# Select only 'zip', 'latitude' and 'longtitude', create a new dataframe 
latlng=Zipcode[['zip','latitude','longitude']]
latlng.head()

Unnamed: 0,zip,latitude,longitude
0,501,40.81,-73.04
1,544,40.81,-73.04
2,601,18.16,-66.72
3,602,18.38,-67.18
4,603,18.43,-67.15


In [17]:
# Merge 'SF_zipcode' dataset and 'latlng' dataset by zip code
df_SFdata = SF_zipcode.merge(latlng, left_on="Zip Code", right_on="zip", how="left")

# Remove 'zip' column
df_SFdata = df_SFdata.drop('zip', 1)

df_SFdata.head()

Unnamed: 0,Zip Code,Neighborhood,latitude,longitude
0,94102,Hayes Valley/Tenderloin/North of Market,37.78,-122.42
1,94103,South of Market,37.77,-122.41
2,94107,Potrero Hill,37.76,-122.39
3,94108,Chinatown,37.79,-122.41
4,94109,Polk/Russian Hill (Nob Hill),37.79,-122.42


In [18]:
# Pre-processing 'SF_Chinese' dataset: same way as with Zipcode database
# Select zipcode and Chinese percentage as a new dataframe, merge with 'df_SFdata'

Chinese_zip=SF_Chinese[['Zip Code','Population','% Chinese']]
df_SFdata = df_SFdata.merge(Chinese_zip, on="Zip Code", how="left")

df_SFdata

Unnamed: 0,Zip Code,Neighborhood,latitude,longitude,Population,% Chinese
0,94102,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,28991,8.08%
1,94103,South of Market,37.77,-122.41,23016,6.00%
2,94107,Potrero Hill,37.76,-122.39,17368,7.25%
3,94108,Chinatown,37.79,-122.41,13716,50.75%
4,94109,Polk/Russian Hill (Nob Hill),37.79,-122.42,56322,16.40%
5,94110,Inner Mission/Bernal Heights,37.74,-122.41,74633,4.72%
6,94112,Ingelside-Excelsior/Crocker-Amazon,37.72,-122.44,73104,25.03%
7,94114,Castro/Noe Valley,37.75,-122.43,30574,3.06%
8,94115,Western Addition/Japantown,37.78,-122.43,33115,5.99%
9,94116,Parkside/Forest Hill,37.74,-122.48,42958,39.50%


Data loading and preparation is completed
A dataframe with zipcode, neighborhood, coordinates, population and Chinese percentage has been formed

## 2. Create a map of San Francisco and use FourSquare to get venues of all neighborhoods
#### Create a map of SF with neighborhoods superimposed on top.

In [19]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="SF_explorer")
location = geolocator.geocode("Union Square, San Francisco")
print(location.address)
print('The geograpical coordinate of San Francisco, CA are {}, {}.'.format(location.latitude, location.longitude))

Union Square, San Francisco, San Francisco City and County, California, United States of America
The geograpical coordinate of San Francisco, CA are 37.7879363, -122.40751740318035.


In [20]:
# create map of San Francisco using latitude and longitude values
map_sf = folium.Map(location=[location.latitude, location.longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_SFdata['latitude'], df_SFdata['longitude'], df_SFdata['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_sf)  
    
map_sf

#### Define Foursquare Credentials and Version

In [21]:
CLIENT_ID = 'K24VV2FJTUBULJ3N0PVBMNHY5GQD3WUO0ELJA0WN0L1NWBK5' # your Foursquare ID
CLIENT_SECRET = 'DTLBHE5ZL2VXBYEQV00UYTAAN01AUIZ4ADDGEWQL1RZ5HRS4' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: K24VV2FJTUBULJ3N0PVBMNHY5GQD3WUO0ELJA0WN0L1NWBK5
CLIENT_SECRET:DTLBHE5ZL2VXBYEQV00UYTAAN01AUIZ4ADDGEWQL1RZ5HRS4


Create the GET request URL

In [22]:
LIMIT=100
radius=500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, df_SFdata["latitude"], df_SFdata["longitude"], VERSION, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=K24VV2FJTUBULJ3N0PVBMNHY5GQD3WUO0ELJA0WN0L1NWBK5&client_secret=DTLBHE5ZL2VXBYEQV00UYTAAN01AUIZ4ADDGEWQL1RZ5HRS4&ll=0     37.78\n1     37.77\n2     37.76\n3     37.79\n4     37.79\n5     37.74\n6     37.72\n7     37.75\n8     37.78\n9     37.74\n10    37.77\n11    37.78\n12    37.77\n13    37.76\n14    37.80\n15    37.73\n16    37.74\n17    37.74\n18    37.72\n19    37.80\n20    37.72\nName: latitude, dtype: float64,0    -122.42\n1    -122.41\n2    -122.39\n3    -122.41\n4    -122.42\n5    -122.41\n6    -122.44\n7    -122.43\n8    -122.43\n9    -122.48\n10   -122.45\n11   -122.46\n12   -122.49\n13   -122.48\n14   -122.43\n15   -122.38\n16   -122.46\n17   -122.44\n18   -122.48\n19   -122.41\n20   -122.41\nName: longitude, dtype: float64&v=20180605&radius=500&limit=100'

#### Create a function to get veneus of different zipcode with radius of 500m

In [23]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [24]:
SF_venues = getNearbyVenues(names=df_SFdata['Neighborhood'],
                                   latitudes=df_SFdata['latitude'],
                                   longitudes=df_SFdata['longitude']
                                  )

Hayes Valley/Tenderloin/North of Market
South of Market
Potrero Hill
Chinatown
Polk/Russian Hill (Nob Hill)
Inner Mission/Bernal Heights
Ingelside-Excelsior/Crocker-Amazon
Castro/Noe Valley
Western Addition/Japantown
Parkside/Forest Hill
Haight-Ashbury
Inner Richmond
Outer Richmond
Sunset
Marina
Bayview-Hunters Point
St. Francis Wood/Miraloma/West Portal
Twin Peaks-Glen Park
Lake Merced
North Beach/Chinatown
Visitacion Valley/Sunnydale


In [25]:
# Check the size of the resulting dataframe
print(SF_venues.shape)
SF_venues.head()

(1048, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Herbst Theater,37.779548,-122.420953,Concert Hall
1,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,War Memorial Opera House,37.778601,-122.420816,Opera House
2,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,San Francisco Ballet,37.77858,-122.420798,Dance Studio
3,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Louise M. Davies Symphony Hall,37.777976,-122.420157,Concert Hall
4,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,War Memorial Court,37.779042,-122.420971,Park


In [26]:
# Check how many venues were returned for each neighborhood

SF_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bayview-Hunters Point,6,6,6,6,6,6
Castro/Noe Valley,85,85,85,85,85,85
Chinatown,84,84,84,84,84,84
Haight-Ashbury,79,79,79,79,79,79
Hayes Valley/Tenderloin/North of Market,96,96,96,96,96,96
Ingelside-Excelsior/Crocker-Amazon,44,44,44,44,44,44
Inner Mission/Bernal Heights,38,38,38,38,38,38
Inner Richmond,58,58,58,58,58,58
Lake Merced,15,15,15,15,15,15
Marina,54,54,54,54,54,54


## 3. Analyze Each Neighborhood

In [27]:
# one hot encoding
SF_onehot = pd.get_dummies(SF_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
SF_onehot['Neighborhood'] = SF_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [SF_onehot.columns[-1]] + list(SF_onehot.columns[:-1])
SF_onehot = SF_onehot[fixed_columns]

SF_onehot.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,American Restaurant,Antique Shop,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Beer Bar,Bike Shop,Board Shop,Bookstore,Boutique,Boxing Gym,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burmese Restaurant,Burrito Place,Bus Line,Bus Station,Bus Stop,Café,Camera Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Convenience Store,Cosmetics Shop,Credit Union,Creperie,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Disc Golf,Discount Store,Dive Bar,Dog Run,Donut Shop,Dry Cleaner,Dumpling Restaurant,Electronics Store,Event Space,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gymnastics Gym,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Herbs & Spices Store,History Museum,Hobby Shop,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hunan Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Kitchen Supply Store,Korean Restaurant,Lake,Latin American Restaurant,Laundromat,Library,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Marijuana Dispensary,Market,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Monument / Landmark,Motorcycle Shop,Mountain,Moving Target,Museum,Music Store,Music Venue,Nail Salon,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Opera House,Optical Shop,Other Repair Shop,Outdoor Supply Store,Paper / Office Supplies Store,Park,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Poke Place,Pool,Pool Hall,Pop-Up Shop,Pub,Ramen Restaurant,Record Shop,Recording Studio,Rental Car Location,Restaurant,Rock Club,Sake Bar,Salon / Barbershop,Salvadoran Restaurant,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shipping Store,Shoe Repair,Shoe Store,Shopping Mall,Sicilian Restaurant,Smoke Shop,Snack Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Steakhouse,Street Food Gathering,Supermarket,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Tea Room,Tennis Court,Thai Restaurant,Theater,Thrift / Vintage Store,Tiki Bar,Toy / Game Store,Track,Trail,Trattoria/Osteria,Tree,Tuscan Restaurant,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Warehouse,Wine Bar,Wine Shop,Winery,Women's Store,Yoga Studio
0,Hayes Valley/Tenderloin/North of Market,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Hayes Valley/Tenderloin/North of Market,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Hayes Valley/Tenderloin/North of Market,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Hayes Valley/Tenderloin/North of Market,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Hayes Valley/Tenderloin/North of Market,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [28]:
# View the size of dataset
SF_onehot.shape

(1048, 233)

#### Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [29]:
SF_grouped = SF_onehot.groupby('Neighborhood').mean().reset_index()
SF_grouped.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,American Restaurant,Antique Shop,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Beer Bar,Bike Shop,Board Shop,Bookstore,Boutique,Boxing Gym,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burmese Restaurant,Burrito Place,Bus Line,Bus Station,Bus Stop,Café,Camera Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Convenience Store,Cosmetics Shop,Credit Union,Creperie,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Disc Golf,Discount Store,Dive Bar,Dog Run,Donut Shop,Dry Cleaner,Dumpling Restaurant,Electronics Store,Event Space,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gymnastics Gym,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Herbs & Spices Store,History Museum,Hobby Shop,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hunan Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Kitchen Supply Store,Korean Restaurant,Lake,Latin American Restaurant,Laundromat,Library,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Marijuana Dispensary,Market,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Monument / Landmark,Motorcycle Shop,Mountain,Moving Target,Museum,Music Store,Music Venue,Nail Salon,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Opera House,Optical Shop,Other Repair Shop,Outdoor Supply Store,Paper / Office Supplies Store,Park,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Poke Place,Pool,Pool Hall,Pop-Up Shop,Pub,Ramen Restaurant,Record Shop,Recording Studio,Rental Car Location,Restaurant,Rock Club,Sake Bar,Salon / Barbershop,Salvadoran Restaurant,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shipping Store,Shoe Repair,Shoe Store,Shopping Mall,Sicilian Restaurant,Smoke Shop,Snack Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Steakhouse,Street Food Gathering,Supermarket,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Tea Room,Tennis Court,Thai Restaurant,Theater,Thrift / Vintage Store,Tiki Bar,Toy / Game Store,Track,Trail,Trattoria/Osteria,Tree,Tuscan Restaurant,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Warehouse,Wine Bar,Wine Shop,Winery,Women's Store,Yoga Studio
0,Bayview-Hunters Point,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Castro/Noe Valley,0.0,0.0,0.0,0.0,0.011765,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.035294,0.011765,0.011765,0.0,0.0,0.0,0.0,0.035294,0.011765,0.0,0.023529,0.0,0.0,0.023529,0.011765,0.0,0.0,0.0,0.023529,0.0,0.0,0.0,0.011765,0.023529,0.011765,0.0,0.0,0.0,0.0,0.035294,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.011765,0.0,0.0,0.0,0.0,0.0,0.035294,0.011765,0.011765,0.011765,0.023529,0.011765,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.011765,0.011765,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.011765,0.0,0.0,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.011765,0.011765,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.023529,0.0,0.023529,0.011765
2,Chinatown,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.02381,0.0,0.0,0.011905,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.011905,0.0,0.0,0.0,0.059524,0.0,0.011905,0.0,0.02381,0.011905,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.035714,0.047619,0.035714,0.0,0.011905,0.011905,0.02381,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.011905,0.011905,0.0,0.0,0.0,0.011905,0.0,0.130952,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.02381,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.011905,0.011905,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.0,0.0,0.011905,0.011905,0.011905,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.011905,0.0,0.0,0.011905,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.011905
3,Haight-Ashbury,0.0,0.025316,0.0,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.012658,0.037975,0.063291,0.0,0.025316,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.050633,0.0,0.012658,0.012658,0.0,0.0,0.0,0.0,0.037975,0.012658,0.037975,0.0,0.0,0.025316,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.025316,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025316,0.012658,0.0,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037975,0.0,0.0,0.0,0.0,0.0,0.0,0.025316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.037975,0.0,0.0,0.012658,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.012658,0.012658,0.0,0.0,0.012658,0.0,0.0,0.012658,0.0,0.012658,0.012658,0.0,0.0,0.012658,0.0,0.025316,0.0,0.012658,0.0,0.0,0.0,0.012658,0.0,0.012658,0.0,0.012658,0.0,0.012658,0.0,0.0,0.012658,0.025316
4,Hayes Valley/Tenderloin/North of Market,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.010417,0.0,0.020833,0.0,0.0,0.010417,0.03125,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.010417,0.0,0.041667,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.020833,0.041667,0.0,0.020833,0.0,0.0,0.010417,0.0,0.0,0.010417,0.010417,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.03125,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.010417,0.0,0.0,0.010417,0.010417,0.0,0.010417,0.0,0.010417,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.010417,0.010417,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.020833,0.0,0.0,0.0,0.0,0.0,0.010417,0.020833,0.0,0.0,0.0,0.020833,0.0,0.020833,0.0,0.0,0.0,0.010417,0.020833,0.010417,0.010417,0.010417,0.0,0.0,0.0,0.0,0.010417,0.0,0.010417,0.0,0.020833,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.03125,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.041667,0.0,0.03125,0.010417,0.0,0.0,0.0


Select venues of restaurants to form a new dataframe 'SF_dine'

In [71]:
SF_restaurant=SF_grouped.loc[:, SF_grouped.columns.str.contains('Restaurant')]
SF_bar=SF_grouped.loc[:, SF_grouped.columns.str.contains('Bar')]
SF_food=SF_grouped.loc[:, SF_grouped.columns.str.contains('Food')]

In [72]:
SF_dine=pd.concat([SF_restaurant, SF_bar, SF_food], axis=1)

SF_dine.head()

Unnamed: 0,Afghan Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Burmese Restaurant,Caribbean Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,Greek Restaurant,Hawaiian Restaurant,Hunan Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,North Indian Restaurant,Persian Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Salvadoran Restaurant,Seafood Restaurant,Sicilian Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Tapas Restaurant,Thai Restaurant,Tuscan Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Bar,Beer Bar,Cocktail Bar,Dive Bar,Gay Bar,Hotel Bar,Juice Bar,Karaoke Bar,Sake Bar,Salon / Barbershop,Sports Bar,Tiki Bar,Wine Bar,Fast Food Restaurant.1,Food,Food & Drink Shop,Food Court,Food Truck,Southern / Soul Food Restaurant.1,Street Food Gathering
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0
1,0.0,0.011765,0.0,0.0,0.011765,0.0,0.023529,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.011765,0.0,0.035294,0.0,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.02381,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.0,0.02381,0.0,0.0,0.011905,0.0,0.0,0.011905,0.011905,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.011905,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.012658,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.012658,0.012658,0.012658,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.010417,0.020833,0.0,0.0,0.0,0.0,0.020833,0.0,0.020833,0.0,0.0,0.020833,0.0,0.020833,0.041667,0.010417,0.020833,0.020833,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.010417,0.03125,0.0,0.0,0.010417,0.0,0.0,0.020833,0.0


In [73]:
# Add the column 'Neighborhood'
SF_dine['Neighborhood'] = SF_grouped['Neighborhood']

# Move neighborhood column to the first column
fixed_columns_1 = [SF_dine.columns[-1]] + list(SF_dine.columns[:-1])
SF_dine = SF_dine[fixed_columns_1]

# There is a column named 'Food' which does not have specific indication, so better to drop this column
SF_dine = SF_dine.drop('Food', 1)

In [33]:
SF_dine.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Burmese Restaurant,Caribbean Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fast Food Restaurant,Fast Food Restaurant.1,Filipino Restaurant,French Restaurant,Greek Restaurant,Hawaiian Restaurant,Hunan Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,North Indian Restaurant,Persian Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Salvadoran Restaurant,Seafood Restaurant,Sicilian Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Southern / Soul Food Restaurant.1,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Tapas Restaurant,Thai Restaurant,Tuscan Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Bar,Beer Bar,Cocktail Bar,Dive Bar,Gay Bar,Hotel Bar,Juice Bar,Karaoke Bar,Sake Bar,Salon / Barbershop,Sports Bar,Tiki Bar,Wine Bar,Fast Food Restaurant.2,Fast Food Restaurant.3,Food & Drink Shop,Food Court,Food Truck,Southern / Soul Food Restaurant.2,Southern / Soul Food Restaurant.3,Street Food Gathering
0,Bayview-Hunters Point,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Castro/Noe Valley,0.0,0.011765,0.0,0.0,0.011765,0.0,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.011765,0.0,0.035294,0.0,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Chinatown,0.0,0.02381,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.0,0.02381,0.0,0.0,0.011905,0.0,0.0,0.011905,0.011905,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.011905,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Haight-Ashbury,0.0,0.012658,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.012658,0.012658,0.012658,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Hayes Valley/Tenderloin/North of Market,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.010417,0.020833,0.0,0.0,0.0,0.0,0.020833,0.020833,0.0,0.020833,0.0,0.0,0.020833,0.0,0.020833,0.041667,0.010417,0.020833,0.020833,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.010417,0.03125,0.0,0.0,0.010417,0.0,0.0,0.020833,0.020833,0.0


In [161]:
SF_dine_percentage = SF_dine.merge(df_SFdata, on="Neighborhood", how="left")
SF_dine_percentage.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Burmese Restaurant,Caribbean Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fast Food Restaurant,Fast Food Restaurant.1,Filipino Restaurant,French Restaurant,Greek Restaurant,Hawaiian Restaurant,Hunan Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,North Indian Restaurant,Persian Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Salvadoran Restaurant,Seafood Restaurant,Sicilian Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Southern / Soul Food Restaurant.1,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Tapas Restaurant,Thai Restaurant,Tuscan Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Bar,Beer Bar,Cocktail Bar,Dive Bar,Gay Bar,Hotel Bar,Juice Bar,Karaoke Bar,Sake Bar,Salon / Barbershop,Sports Bar,Tiki Bar,Wine Bar,Fast Food Restaurant.2,Fast Food Restaurant.3,Food & Drink Shop,Food Court,Food Truck,Southern / Soul Food Restaurant.2,Southern / Soul Food Restaurant.3,Street Food Gathering,Zip Code,latitude,longitude,Population,% Chinese
0,Bayview-Hunters Point,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94124,37.73,-122.38,33170,16.96%
1,Castro/Noe Valley,0.0,0.011765,0.0,0.0,0.011765,0.0,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.011765,0.0,0.035294,0.0,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94114,37.75,-122.43,30574,3.06%
2,Chinatown,0.0,0.02381,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.0,0.02381,0.0,0.0,0.011905,0.0,0.0,0.011905,0.011905,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.011905,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94108,37.79,-122.41,13716,50.75%
3,Haight-Ashbury,0.0,0.012658,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.012658,0.012658,0.012658,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94117,37.77,-122.45,38738,3.00%
4,Hayes Valley/Tenderloin/North of Market,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.010417,0.020833,0.0,0.0,0.0,0.0,0.020833,0.020833,0.0,0.020833,0.0,0.0,0.020833,0.0,0.020833,0.041667,0.010417,0.020833,0.020833,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.010417,0.03125,0.0,0.0,0.010417,0.0,0.0,0.020833,0.020833,0.0,94102,37.78,-122.42,28991,8.08%


In [162]:
# Change percentage string to float for normalization
SF_dine_percentage['% Chinese']=SF_dine_percentage['% Chinese'].str.replace(r'%', r'0').astype('float') / 100.0
SF_dine_percentage['Population']=SF_dine_percentage['Population'].replace({',':''},regex=True).apply(pd.to_numeric,1)


In [163]:
#Calculate Chinese population as the target customer and use the number to normalize venus

SF_dine_percentage['Chinese Population']=SF_dine_percentage['Population']*SF_dine_percentage['% Chinese']
SF_dine_percentage.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Burmese Restaurant,Caribbean Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fast Food Restaurant,Fast Food Restaurant.1,Filipino Restaurant,French Restaurant,Greek Restaurant,Hawaiian Restaurant,Hunan Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,North Indian Restaurant,Persian Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Salvadoran Restaurant,Seafood Restaurant,Sicilian Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Southern / Soul Food Restaurant.1,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Tapas Restaurant,Thai Restaurant,Tuscan Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Bar,Beer Bar,Cocktail Bar,Dive Bar,Gay Bar,Hotel Bar,Juice Bar,Karaoke Bar,Sake Bar,Salon / Barbershop,Sports Bar,Tiki Bar,Wine Bar,Fast Food Restaurant.2,Fast Food Restaurant.3,Food & Drink Shop,Food Court,Food Truck,Southern / Soul Food Restaurant.2,Southern / Soul Food Restaurant.3,Street Food Gathering,Zip Code,latitude,longitude,Population,% Chinese,Chinese Population
0,Bayview-Hunters Point,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94124,37.73,-122.38,33170,0.1696,5625.632
1,Castro/Noe Valley,0.0,0.011765,0.0,0.0,0.011765,0.0,0.023529,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.011765,0.0,0.035294,0.0,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94114,37.75,-122.43,30574,0.0306,935.5644
2,Chinatown,0.0,0.02381,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.0,0.02381,0.0,0.0,0.011905,0.0,0.0,0.011905,0.011905,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.011905,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94108,37.79,-122.41,13716,0.5075,6960.87
3,Haight-Ashbury,0.0,0.012658,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.012658,0.012658,0.012658,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94117,37.77,-122.45,38738,0.03,1162.14
4,Hayes Valley/Tenderloin/North of Market,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.010417,0.020833,0.0,0.0,0.0,0.0,0.020833,0.020833,0.0,0.020833,0.0,0.0,0.020833,0.0,0.020833,0.041667,0.010417,0.020833,0.020833,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.010417,0.03125,0.0,0.0,0.010417,0.0,0.0,0.020833,0.020833,0.0,94102,37.78,-122.42,28991,0.0808,2342.4728


In [164]:
# Drop columns that will not be analyzed in this step
SF_dine_percentage=SF_dine_percentage.drop(['Zip Code','latitude','longitude','% Chinese'], 1)

In [194]:
# Create the normalizer data series 
divider=SF_dine_percentage['Chinese Population']

In [195]:
# To perform division on the entire dataframe, we need to create a temporary dataset and drop the 'Neighborhood' column to make all data numeric
temp_df=SF_dine_percentage
temp_df=temp_df.drop('Neighborhood',1)

In [196]:
temp_df=temp_df*10000

In [197]:
temp_df=temp_df.divide(divider, 0)

In [198]:
#Add back the 'Neighborhood' column
temp_df=pd.concat([temp_df, SF_dine_percentage['Neighborhood']], axis=1)

In [199]:
# Move neighborhood column to the first column
fixed_columns_2 = [temp_df.columns[-1]] + list(temp_df.columns[:-1])
SF_dine_final = temp_df[fixed_columns_2]
SF_dine_final = SF_dine_final.drop(['Chinese Population','Population'],1)
SF_dine_final.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Burmese Restaurant,Caribbean Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fast Food Restaurant,Fast Food Restaurant.1,Fast Food Restaurant.2,Fast Food Restaurant.3,Fast Food Restaurant.4,Fast Food Restaurant.5,Fast Food Restaurant.6,Fast Food Restaurant.7,Filipino Restaurant,French Restaurant,Greek Restaurant,Hawaiian Restaurant,Hunan Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,North Indian Restaurant,Persian Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Salvadoran Restaurant,Seafood Restaurant,Sicilian Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Southern / Soul Food Restaurant.1,Southern / Soul Food Restaurant.2,Southern / Soul Food Restaurant.3,Southern / Soul Food Restaurant.4,Southern / Soul Food Restaurant.5,Southern / Soul Food Restaurant.6,Southern / Soul Food Restaurant.7,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Tapas Restaurant,Thai Restaurant,Tuscan Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Bar,Beer Bar,Cocktail Bar,Dive Bar,Gay Bar,Hotel Bar,Juice Bar,Karaoke Bar,Sake Bar,Salon / Barbershop,Sports Bar,Tiki Bar,Wine Bar,Fast Food Restaurant.8,Fast Food Restaurant.9,Fast Food Restaurant.10,Fast Food Restaurant.11,Fast Food Restaurant.12,Fast Food Restaurant.13,Fast Food Restaurant.14,Fast Food Restaurant.15,Food & Drink Shop,Food Court,Food Truck,Southern / Soul Food Restaurant.8,Southern / Soul Food Restaurant.9,Southern / Soul Food Restaurant.10,Southern / Soul Food Restaurant.11,Southern / Soul Food Restaurant.12,Southern / Soul Food Restaurant.13,Southern / Soul Food Restaurant.14,Southern / Soul Food Restaurant.15,Street Food Gathering
0,Bayview-Hunters Point,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Castro/Noe Valley,0.0,0.12575,0.0,0.0,0.12575,0.0,0.2515,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.12575,0.0,0.0,0.0,0.12575,0.0,0.12575,0.0,0.0,0.12575,0.0,0.377249,0.0,0.0,0.0,0.0,0.12575,0.0,0.12575,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.377249,0.0,0.0,0.0,0.0,0.0,0.0,0.12575,0.0,0.0,0.0,0.0,0.0,0.12575,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Chinatown,0.0,0.034205,0.0,0.017102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017102,0.0,0.0,0.017102,0.0,0.0,0.034205,0.0,0.0,0.017102,0.0,0.0,0.017102,0.017102,0.0,0.0,0.017102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034205,0.0,0.0,0.0,0.0,0.0,0.017102,0.017102,0.017102,0.06841,0.0,0.0,0.0,0.0,0.0,0.0,0.017102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Haight-Ashbury,0.0,0.108922,0.0,0.0,0.0,0.108922,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.108922,0.108922,0.0,0.0,0.0,0.0,0.0,0.0,0.108922,0.108922,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.108922,0.108922,0.0,0.108922,0.108922,0.108922,0.0,0.108922,0.0,0.108922,0.0,0.0,0.0,0.0,0.0,0.108922,0.0,0.108922,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Hayes Valley/Tenderloin/North of Market,0.0,0.0,0.0,0.0,0.0,0.0,0.044469,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133406,0.0,0.0,0.0,0.044469,0.044469,0.0,0.0,0.0,0.0,0.0,0.044469,0.044469,0.0,0.0,0.0,0.0,0.0,0.044469,0.088937,0.0,0.0,0.0,0.0,0.088937,0.088937,0.088937,0.088937,0.088937,0.088937,0.088937,0.088937,0.0,0.088937,0.0,0.0,0.088937,0.0,0.088937,0.177875,0.044469,0.088937,0.088937,0.0,0.0,0.044469,0.044469,0.0,0.0,0.0,0.0,0.044469,0.133406,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044469,0.0,0.0,0.088937,0.088937,0.088937,0.088937,0.088937,0.088937,0.088937,0.088937,0.0


#### Print each neighborhood along with the top 5 most common venues

In [200]:
num_top_venues = 5

for hood in SF_dine_final['Neighborhood']:
    print("----"+hood+"----")
    temp = SF_dine_final[SF_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bayview-Hunters Point----
               venue  freq
0  Afghan Restaurant   0.0
1         Sports Bar   0.0
2           Sake Bar   0.0
3        Karaoke Bar   0.0
4          Juice Bar   0.0


----Castro/Noe Valley----
                 venue  freq
0     Sushi Restaurant  0.38
1   Mexican Restaurant  0.38
2   Chinese Restaurant  0.25
3     Kebab Restaurant  0.13
4  Peruvian Restaurant  0.13


----Chinatown----
                 venue  freq
0         Cocktail Bar  0.07
1     Sushi Restaurant  0.03
2   Italian Restaurant  0.03
3  American Restaurant  0.03
4             Beer Bar  0.02


----Haight-Ashbury----
                       venue  freq
0                   Wine Bar  0.11
1  Middle Eastern Restaurant  0.11
2                    Gay Bar  0.11
3               Cocktail Bar  0.11
4                 Sports Bar  0.11


----Hayes Valley/Tenderloin/North of Market----
                             venue  freq
0            Vietnamese Restaurant  0.18
1                         Wine Bar  0.13
2   

Write a function to sort the venues in descending order.

In [201]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Create the new dataframe and display the top 10 venues for each neighborhood

In [202]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = SF_dine_final['Neighborhood']

for ind in np.arange(SF_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(SF_dine_final.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bayview-Hunters Point,Street Food Gathering,Indian Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant
1,Castro/Noe Valley,Mexican Restaurant,Sushi Restaurant,Chinese Restaurant,Kebab Restaurant,American Restaurant,Italian Restaurant,Burmese Restaurant,Malay Restaurant,Restaurant,Greek Restaurant
2,Chinatown,Cocktail Bar,American Restaurant,Italian Restaurant,Sushi Restaurant,Korean Restaurant,Vietnamese Restaurant,Asian Restaurant,Salon / Barbershop,Hawaiian Restaurant,Beer Bar
3,Haight-Ashbury,Indian Restaurant,Mexican Restaurant,Cocktail Bar,Sports Bar,Bar,Wine Bar,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Thai Restaurant,Tapas Restaurant
4,Hayes Valley/Tenderloin/North of Market,Vietnamese Restaurant,French Restaurant,Wine Bar,Southern / Soul Food Restaurant,Southern / Soul Food Restaurant,Vegetarian / Vegan Restaurant,Thai Restaurant,Sushi Restaurant,Southern / Soul Food Restaurant,Southern / Soul Food Restaurant
5,Ingelside-Excelsior/Crocker-Amazon,Chinese Restaurant,Mexican Restaurant,Vietnamese Restaurant,Bar,Fast Food Restaurant,Fast Food Restaurant,Japanese Restaurant,Latin American Restaurant,Hunan Restaurant,Filipino Restaurant
6,Inner Mission/Bernal Heights,Mexican Restaurant,New American Restaurant,Fast Food Restaurant,Fast Food Restaurant,Indian Restaurant,Fast Food Restaurant,Fast Food Restaurant,Fast Food Restaurant,Fast Food Restaurant,Fast Food Restaurant
7,Inner Richmond,Sushi Restaurant,Bar,Vietnamese Restaurant,Japanese Restaurant,Korean Restaurant,Burmese Restaurant,Southern / Soul Food Restaurant,Italian Restaurant,Thai Restaurant,Southern / Soul Food Restaurant
8,Lake Merced,Juice Bar,Cocktail Bar,Mexican Restaurant,Street Food Gathering,Middle Eastern Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant
9,Marina,Italian Restaurant,Vegetarian / Vegan Restaurant,American Restaurant,Caribbean Restaurant,Mediterranean Restaurant,Mexican Restaurant,Sushi Restaurant,Bar,New American Restaurant,Kebab Restaurant


## 4. Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.
Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [203]:
# set number of clusters
kclusters = 5

SF_grouped_clustering = SF_dine_final.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(SF_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

SF_merged = df_SFdata

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
SF_merged = SF_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

In [204]:
SF_merged

Unnamed: 0,Zip Code,Neighborhood,latitude,longitude,Population,% Chinese,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,94102,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,28991,8.08%,0,Vietnamese Restaurant,French Restaurant,Wine Bar,Southern / Soul Food Restaurant,Southern / Soul Food Restaurant,Vegetarian / Vegan Restaurant,Thai Restaurant,Sushi Restaurant,Southern / Soul Food Restaurant,Southern / Soul Food Restaurant
1,94103,South of Market,37.77,-122.41,23016,6.00%,2,Gay Bar,American Restaurant,Cocktail Bar,Thai Restaurant,Food Truck,Street Food Gathering,Middle Eastern Restaurant,Japanese Restaurant,Bar,Sports Bar
2,94107,Potrero Hill,37.76,-122.39,17368,7.25%,1,Wine Bar,Cocktail Bar,Food Truck,Restaurant,Southern / Soul Food Restaurant,Southern / Soul Food Restaurant,Bar,Italian Restaurant,Sushi Restaurant,Juice Bar
3,94108,Chinatown,37.79,-122.41,13716,50.75%,0,Cocktail Bar,American Restaurant,Italian Restaurant,Sushi Restaurant,Korean Restaurant,Vietnamese Restaurant,Asian Restaurant,Salon / Barbershop,Hawaiian Restaurant,Beer Bar
4,94109,Polk/Russian Hill (Nob Hill),37.79,-122.42,56322,16.40%,0,Sushi Restaurant,Wine Bar,Vietnamese Restaurant,Bar,Thai Restaurant,Cocktail Bar,American Restaurant,Mexican Restaurant,Seafood Restaurant,Gay Bar
5,94110,Inner Mission/Bernal Heights,37.74,-122.41,74633,4.72%,0,Mexican Restaurant,New American Restaurant,Fast Food Restaurant,Fast Food Restaurant,Indian Restaurant,Fast Food Restaurant,Fast Food Restaurant,Fast Food Restaurant,Fast Food Restaurant,Fast Food Restaurant
6,94112,Ingelside-Excelsior/Crocker-Amazon,37.72,-122.44,73104,25.03%,0,Chinese Restaurant,Mexican Restaurant,Vietnamese Restaurant,Bar,Fast Food Restaurant,Fast Food Restaurant,Japanese Restaurant,Latin American Restaurant,Hunan Restaurant,Filipino Restaurant
7,94114,Castro/Noe Valley,37.75,-122.43,30574,3.06%,3,Mexican Restaurant,Sushi Restaurant,Chinese Restaurant,Kebab Restaurant,American Restaurant,Italian Restaurant,Burmese Restaurant,Malay Restaurant,Restaurant,Greek Restaurant
8,94115,Western Addition/Japantown,37.78,-122.43,33115,5.99%,0,Karaoke Bar,New American Restaurant,American Restaurant,Seafood Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant
9,94116,Parkside/Forest Hill,37.74,-122.48,42958,39.50%,0,Chinese Restaurant,Sushi Restaurant,Thai Restaurant,American Restaurant,Dive Bar,Dim Sum Restaurant,Food Truck,Vietnamese Restaurant,Japanese Restaurant,Korean Restaurant


Visualize the resulting clusters

In [205]:
# create map
map_clusters = folium.Map(location=[location.latitude, location.longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(SF_merged['latitude'], SF_merged['longitude'], SF_merged['Neighborhood'], SF_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [206]:
SF_merged.loc[SF_merged['Cluster Labels'] == 0, SF_merged.columns[[1] + list(range(5, SF_merged.shape[1]))]]

Unnamed: 0,Neighborhood,% Chinese,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Hayes Valley/Tenderloin/North of Market,8.08%,0,Vietnamese Restaurant,French Restaurant,Wine Bar,Southern / Soul Food Restaurant,Southern / Soul Food Restaurant,Vegetarian / Vegan Restaurant,Thai Restaurant,Sushi Restaurant,Southern / Soul Food Restaurant,Southern / Soul Food Restaurant
3,Chinatown,50.75%,0,Cocktail Bar,American Restaurant,Italian Restaurant,Sushi Restaurant,Korean Restaurant,Vietnamese Restaurant,Asian Restaurant,Salon / Barbershop,Hawaiian Restaurant,Beer Bar
4,Polk/Russian Hill (Nob Hill),16.40%,0,Sushi Restaurant,Wine Bar,Vietnamese Restaurant,Bar,Thai Restaurant,Cocktail Bar,American Restaurant,Mexican Restaurant,Seafood Restaurant,Gay Bar
5,Inner Mission/Bernal Heights,4.72%,0,Mexican Restaurant,New American Restaurant,Fast Food Restaurant,Fast Food Restaurant,Indian Restaurant,Fast Food Restaurant,Fast Food Restaurant,Fast Food Restaurant,Fast Food Restaurant,Fast Food Restaurant
6,Ingelside-Excelsior/Crocker-Amazon,25.03%,0,Chinese Restaurant,Mexican Restaurant,Vietnamese Restaurant,Bar,Fast Food Restaurant,Fast Food Restaurant,Japanese Restaurant,Latin American Restaurant,Hunan Restaurant,Filipino Restaurant
8,Western Addition/Japantown,5.99%,0,Karaoke Bar,New American Restaurant,American Restaurant,Seafood Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant
9,Parkside/Forest Hill,39.50%,0,Chinese Restaurant,Sushi Restaurant,Thai Restaurant,American Restaurant,Dive Bar,Dim Sum Restaurant,Food Truck,Vietnamese Restaurant,Japanese Restaurant,Korean Restaurant
10,Haight-Ashbury,3.00%,0,Indian Restaurant,Mexican Restaurant,Cocktail Bar,Sports Bar,Bar,Wine Bar,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Thai Restaurant,Tapas Restaurant
11,Inner Richmond,24.51%,0,Sushi Restaurant,Bar,Vietnamese Restaurant,Japanese Restaurant,Korean Restaurant,Burmese Restaurant,Southern / Soul Food Restaurant,Italian Restaurant,Thai Restaurant,Southern / Soul Food Restaurant
12,Outer Richmond,31.78%,0,Food Truck,Street Food Gathering,North Indian Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant


In [207]:
SF_merged.loc[SF_merged['Cluster Labels'] == 1, SF_merged.columns[[1] + list(range(5, SF_merged.shape[1]))]]

Unnamed: 0,Neighborhood,% Chinese,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Potrero Hill,7.25%,1,Wine Bar,Cocktail Bar,Food Truck,Restaurant,Southern / Soul Food Restaurant,Southern / Soul Food Restaurant,Bar,Italian Restaurant,Sushi Restaurant,Juice Bar


In [208]:
SF_merged.loc[SF_merged['Cluster Labels'] == 2, SF_merged.columns[[1] + list(range(5, SF_merged.shape[1]))]]

Unnamed: 0,Neighborhood,% Chinese,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,South of Market,6.00%,2,Gay Bar,American Restaurant,Cocktail Bar,Thai Restaurant,Food Truck,Street Food Gathering,Middle Eastern Restaurant,Japanese Restaurant,Bar,Sports Bar


In [209]:
SF_merged.loc[SF_merged['Cluster Labels'] == 3, SF_merged.columns[[1] + list(range(5, SF_merged.shape[1]))]]

Unnamed: 0,Neighborhood,% Chinese,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Castro/Noe Valley,3.06%,3,Mexican Restaurant,Sushi Restaurant,Chinese Restaurant,Kebab Restaurant,American Restaurant,Italian Restaurant,Burmese Restaurant,Malay Restaurant,Restaurant,Greek Restaurant


In [210]:
SF_merged.loc[SF_merged['Cluster Labels'] == 4, SF_merged.columns[[1] + list(range(5, SF_merged.shape[1]))]]

Unnamed: 0,Neighborhood,% Chinese,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Marina,5.41%,4,Italian Restaurant,Vegetarian / Vegan Restaurant,American Restaurant,Caribbean Restaurant,Mediterranean Restaurant,Mexican Restaurant,Sushi Restaurant,Bar,New American Restaurant,Kebab Restaurant
