In [86]:
# data analysis libraries
import pandas as pd
import numpy as np

# website scraping library
import requests
from bs4 import BeautifulSoup

# lat and long library
from uszipcode import SearchEngine

# mapping libraries
import folium
from geopy.geocoders import Nominatim
import json
import matplotlib.cm as cm
import matplotlib.colors as colors

# clustering libraries
from sklearn.cluster import KMeans

# 1. Create a SF Map with neighborhoods

## Get SF neighborhoods and zip codes

In [87]:
response = requests.get("http://www.healthysf.org/bdi/outcomes/zipmap.htm")
soup = BeautifulSoup(response.text, "lxml")
table = soup.find_all("table")
df = pd.read_html(str(table))
df = pd.DataFrame(df[4])

In [88]:
# make the first row of df, the columns and drop the first and last rows.
df.columns = df.iloc[0]
df.drop(df.index[0], inplace=True)
df.drop(df.index[21], inplace=True)

In [89]:
# add lat and long to each neighborhood
search = SearchEngine(simple_zipcode=True)

latitude = []
longitude = []

for index, row in df.iterrows():
    zipcode = search.by_zipcode(row["Zip Code"]).to_dict()
    latitude.append(zipcode.get("lat"))
    longitude.append(zipcode.get("lng"))

df["Latitude"] = latitude
df["Longitude"] = longitude

df

Unnamed: 0,Zip Code,Neighborhood,Population (Census 2000),Latitude,Longitude
1,94102,Hayes Valley/Tenderloin/North of Market,28991,37.78,-122.42
2,94103,South of Market,23016,37.78,-122.41
3,94107,Potrero Hill,17368,37.77,-122.39
4,94108,Chinatown,13716,37.791,-122.409
5,94109,Polk/Russian Hill (Nob Hill),56322,37.79,-122.42
6,94110,Inner Mission/Bernal Heights,74633,37.75,-122.42
7,94112,Ingelside-Excelsior/Crocker-Amazon,73104,37.72,-122.44
8,94114,Castro/Noe Valley,30574,37.76,-122.44
9,94115,Western Addition/Japantown,33115,37.79,-122.44
10,94116,Parkside/Forest Hill,42958,37.74,-122.48


In [90]:
# fixing a few lat and longs of Outer Richmond and Marina neighborhoods
df.at[13, 'Latitude'] = 37.781 # Outer Richmond
df.at[13, 'Longitude'] = -122.498 # Outer Richmond
df.at[15, 'Latitude'] = 37.802 # Marina
df.at[15, 'Longitude'] = -122.438 # Marina

In [91]:
# find lat and long of SF
address = 'San Francisco, CA, USA'

geolocator = Nominatim(user_agent = "san_francisco_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of San Francisco are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of San Francisco are 37.7790262, -122.4199061.


## Add neighborhood markers to SF map

In [92]:
# print map of SF
sf_map = folium.Map(location = [latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7,
        parse_html = False).add_to(sf_map)  
    
sf_map

# 2. Pull Data from Foursquare

In [93]:
## foursquare credentials and version
CLIENT_ID = '1OEZ3H4JWU312DDTEUJZLLCFJU4UZWK4CJ1JEAQU0E41DDBS' # your Foursquare ID
CLIENT_SECRET = 'KTJ4SLN3ZFDMIEUZDHQXA3HYHHJXO1JWUYTYGH5D5Y02ZFSM' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

## Getting venues for each SF neighborhood from Foursquare

In [94]:
## note: tried the Trending API endpoint as opposed to Explore API. 
## Trending API will only return venues where people are currently checked in.
## The response was consistently empty
## Test API response: https://foursquare.com/developers/explore#req=venues%2Fexplore%3F%26client_id%3D1OEZ3H4JWU312DDTEUJZLLCFJU4UZWK4CJ1JEAQU0E41DDBS%26client_secret%3DKTJ4SLN3ZFDMIEUZDHQXA3HYHHJXO1JWUYTYGH5D5Y02ZFSM%26v%3D20180605%26ll%3D37.78%2C-122.42%26radius%3D1609%26limit%3D100%26categoryId%3D4bf58dd8d48988d142941735

In [95]:
LIMIT = 200
radius = 1609 # given in meters. 1609m is roughly 1 mile.

def getNearbyVenues(names, latitudes, longitudes, radius=1609):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [98]:
sf_venues = getNearbyVenues(names = df['Neighborhood'],
                                latitudes = df['Latitude'],
                                longitudes = df['Longitude']
                                )

Hayes Valley/Tenderloin/North of Market
South of Market
Potrero Hill
Chinatown
Polk/Russian Hill (Nob Hill)
Inner Mission/Bernal Heights
Ingelside-Excelsior/Crocker-Amazon
Castro/Noe Valley
Western Addition/Japantown
Parkside/Forest Hill
Haight-Ashbury
Inner Richmond
Outer Richmond
Sunset
Marina
Bayview-Hunters Point
St. Francis Wood/Miraloma/West Portal
Twin Peaks-Glen Park
Lake Merced
North Beach/Chinatown
Visitacion Valley/Sunnydale


In [99]:
sf_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Herbst Theater,37.779548,-122.420953,Concert Hall
1,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,War Memorial Opera House,37.778601,-122.420816,Opera House
2,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,San Francisco Ballet,37.778580,-122.420798,Dance Studio
3,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Louise M. Davies Symphony Hall,37.777976,-122.420157,Concert Hall
4,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Asian Art Museum,37.780178,-122.416505,Art Museum
5,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Whitechapel,37.782230,-122.418884,Cocktail Bar
6,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Philz Coffee,37.781433,-122.417073,Coffee Shop
7,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Robin,37.779127,-122.423378,Sushi Restaurant
8,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Brenda's French Soul Food,37.782896,-122.418897,Southern / Soul Food Restaurant
9,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Maker & Moss,37.777144,-122.422368,Furniture / Home Store


### Only pull Asian restaurants and boba shops

In [100]:
## only interested in Asian Restaurants and Bubble Tea Shops
## https://developer.foursquare.com/docs/build-with-foursquare/categories/
VenueCats = {
    'Bubble Tea Shop':'52e81612bcbc57f1066b7a0c',
    'Asian Restaurant':'4bf58dd8d48988d142941735',
}

In [101]:
LIMIT = 200
radius = 1609 # given in meters. 1609m is roughly 1 mile.

def getNearbyAsianVenues(names, latitudes, longitudes, radius=1609, cat = VenueCats):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        for key in cat:
                    
            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&categoryId={}'.format(
                CLIENT_ID, 
                CLIENT_SECRET, 
                VERSION, 
                lat, 
                lng, 
                radius, 
                LIMIT,
                VenueCats[key])

            # make the GET request
            results = requests.get(url).json()["response"]['groups'][0]['items']

            # return only relevant information for each nearby venue
            venues_list.append([(
                name, 
                lat, 
                lng, 
                v['venue']['name'], 
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [102]:
sf_venues_asian = getNearbyAsianVenues(names = df['Neighborhood'],
                                latitudes = df['Latitude'],
                                longitudes = df['Longitude']
                                )

Hayes Valley/Tenderloin/North of Market
South of Market
Potrero Hill
Chinatown
Polk/Russian Hill (Nob Hill)
Inner Mission/Bernal Heights
Ingelside-Excelsior/Crocker-Amazon
Castro/Noe Valley
Western Addition/Japantown
Parkside/Forest Hill
Haight-Ashbury
Inner Richmond
Outer Richmond
Sunset
Marina
Bayview-Hunters Point
St. Francis Wood/Miraloma/West Portal
Twin Peaks-Glen Park
Lake Merced
North Beach/Chinatown
Visitacion Valley/Sunnydale


In [103]:
sf_venues_asian

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Boba Guys,37.772907,-122.423507,Bubble Tea Shop
1,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,SimplexiTea,37.775960,-122.417007,Bubble Tea Shop
2,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Urban Ritual,37.775595,-122.425863,Bubble Tea Shop
3,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Black Sugar,37.786135,-122.409948,Bubble Tea Shop
4,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Sunday at the Museum,37.780025,-122.416066,Bubble Tea Shop
5,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Boba Guys,37.783657,-122.432662,Bubble Tea Shop
6,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Boba Guys,37.789899,-122.407077,Bubble Tea Shop
7,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Sharetea,37.784581,-122.403183,Bubble Tea Shop
8,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Sharetea,37.784000,-122.407425,Bubble Tea Shop
9,Hayes Valley/Tenderloin/North of Market,37.78,-122.42,Belly Good Cafe & Crepes,37.785006,-122.430666,Creperie


### Compare size of SF venues and SF asian venues

In [104]:
sf_venues.size

13993

In [105]:
sf_venues_asian.size

12320

# 3. Compare neighborhoods based on Asian venues

### The number of Asian venues by neighborhood

In [106]:
sf_venues_asian.groupby('Neighborhood').count().sort_values('Venue', ascending=False)

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Polk/Russian Hill (Nob Hill),144,144,144,144,144,144
Chinatown,139,139,139,139,139,139
South of Market,131,131,131,131,131,131
Hayes Valley/Tenderloin/North of Market,122,122,122,122,122,122
Sunset,119,119,119,119,119,119
Parkside/Forest Hill,116,116,116,116,116,116
Inner Richmond,109,109,109,109,109,109
Western Addition/Japantown,106,106,106,106,106,106
Inner Mission/Bernal Heights,98,98,98,98,98,98
Haight-Ashbury,93,93,93,93,93,93


### Analyze each neighborhood

In [107]:
# one hot encoding
sf_asian_onehot = pd.get_dummies(sf_venues_asian[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
sf_asian_onehot['Neighborhood'] = sf_venues_asian['Neighborhood']

# move neighborhood column to the first column
sf_asian_onehot.set_index('Neighborhood', inplace=True)
sf_asian_onehot.reset_index(inplace=True)
sf_asian_onehot.head()

Unnamed: 0,Neighborhood,Asian Restaurant,BBQ Joint,Bakery,Boutique,Breakfast Spot,Bubble Tea Shop,Burmese Restaurant,Café,Cambodian Restaurant,...,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tea Room,Thai Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Yoshoku Restaurant
0,Hayes Valley/Tenderloin/North of Market,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Hayes Valley/Tenderloin/North of Market,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Hayes Valley/Tenderloin/North of Market,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Hayes Valley/Tenderloin/North of Market,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Hayes Valley/Tenderloin/North of Market,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [108]:
# group rows by neighborhood 
sf_asian_g = sf_asian_onehot.groupby('Neighborhood').mean().reset_index()
sf_asian_g

Unnamed: 0,Neighborhood,Asian Restaurant,BBQ Joint,Bakery,Boutique,Breakfast Spot,Bubble Tea Shop,Burmese Restaurant,Café,Cambodian Restaurant,...,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tea Room,Thai Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Yoshoku Restaurant
0,Bayview-Hunters Point,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.181818,0.0,0.0
1,Castro/Noe Valley,0.082192,0.0,0.0,0.0,0.027397,0.054795,0.013699,0.0,0.0,...,0.219178,0.013699,0.0,0.0,0.260274,0.0,0.0,0.082192,0.0,0.0
2,Chinatown,0.172662,0.0,0.007194,0.0,0.0,0.258993,0.0,0.021583,0.0,...,0.093525,0.0,0.0,0.0,0.115108,0.0,0.007194,0.021583,0.0,0.0
3,Haight-Ashbury,0.064516,0.0,0.0,0.0,0.021505,0.075269,0.010753,0.0,0.0,...,0.215054,0.010753,0.0,0.0,0.204301,0.0,0.0,0.053763,0.0,0.0
4,Hayes Valley/Tenderloin/North of Market,0.122951,0.0,0.0,0.0,0.0,0.139344,0.0,0.02459,0.0,...,0.155738,0.0,0.0,0.0,0.155738,0.0,0.008197,0.098361,0.008197,0.0
5,Ingelside-Excelsior/Crocker-Amazon,0.071429,0.0,0.0,0.0,0.0,0.107143,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.053571,0.0,0.0,0.142857,0.0,0.0
6,Inner Mission/Bernal Heights,0.091837,0.0,0.0,0.0,0.010204,0.061224,0.020408,0.0,0.020408,...,0.193878,0.0,0.0,0.010204,0.102041,0.0,0.010204,0.102041,0.0,0.0
7,Inner Richmond,0.183486,0.018349,0.009174,0.0,0.0,0.073394,0.009174,0.018349,0.0,...,0.091743,0.0,0.0,0.0,0.082569,0.0,0.0,0.055046,0.0,0.0
8,Lake Merced,0.095238,0.0,0.0,0.0,0.0,0.238095,0.0,0.0,0.0,...,0.071429,0.0,0.02381,0.0,0.095238,0.02381,0.0,0.047619,0.0,0.0
9,Marina,0.153846,0.0,0.015385,0.015385,0.0,0.046154,0.015385,0.015385,0.0,...,0.230769,0.0,0.0,0.0,0.076923,0.0,0.0,0.076923,0.0,0.0


In [109]:
# print each neighborhood along with the top 5 most common Asian venues
num_top_venues = 5

for n in sf_asian_g['Neighborhood']:
    print("----"+n+"----")
    temp = sf_asian_g[sf_asian_g['Neighborhood'] == n].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bayview-Hunters Point----
                   venue  freq
0     Chinese Restaurant  0.36
1  Vietnamese Restaurant  0.18
2       Asian Restaurant  0.09
3    Japanese Restaurant  0.09
4    Hawaiian Restaurant  0.09


----Castro/Noe Valley----
                   venue  freq
0        Thai Restaurant  0.26
1       Sushi Restaurant  0.22
2  Vietnamese Restaurant  0.08
3       Asian Restaurant  0.08
4     Chinese Restaurant  0.05


----Chinatown----
                venue  freq
0     Bubble Tea Shop  0.26
1    Asian Restaurant  0.17
2     Thai Restaurant  0.12
3  Chinese Restaurant  0.10
4    Sushi Restaurant  0.09


----Haight-Ashbury----
                venue  freq
0    Sushi Restaurant  0.22
1     Thai Restaurant  0.20
2     Bubble Tea Shop  0.08
3    Asian Restaurant  0.06
4  Chinese Restaurant  0.06


----Hayes Valley/Tenderloin/North of Market----
                   venue  freq
0       Sushi Restaurant  0.16
1        Thai Restaurant  0.16
2        Bubble Tea Shop  0.14
3       Asian R

In [110]:
# put into df with top 5 venues for each neighborhood
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
hood_asian_venue_sorted = pd.DataFrame(columns=columns)
hood_asian_venue_sorted['Neighborhood'] = sf_asian_g['Neighborhood']

for ind in np.arange(sf_asian_g.shape[0]):
    hood_asian_venue_sorted.iloc[ind, 1:] = return_most_common_venues(sf_asian_g.iloc[ind, :], num_top_venues)

hood_asian_venue_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bayview-Hunters Point,Chinese Restaurant,Vietnamese Restaurant,Asian Restaurant,Sushi Restaurant,Japanese Restaurant
1,Castro/Noe Valley,Thai Restaurant,Sushi Restaurant,Asian Restaurant,Vietnamese Restaurant,Chinese Restaurant
2,Chinatown,Bubble Tea Shop,Asian Restaurant,Thai Restaurant,Chinese Restaurant,Sushi Restaurant
3,Haight-Ashbury,Sushi Restaurant,Thai Restaurant,Bubble Tea Shop,Asian Restaurant,Chinese Restaurant
4,Hayes Valley/Tenderloin/North of Market,Sushi Restaurant,Thai Restaurant,Bubble Tea Shop,Asian Restaurant,Vietnamese Restaurant


# 4. Cluster Neighborhoods

In [111]:
# set number of clusters
kclusters = 4

sf_asian_g_c = sf_asian_g.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters = kclusters, random_state=0).fit(sf_asian_g_c)

# check cluster labels generated for each row in df
kmeans.labels_[0:10]

array([0, 1, 2, 1, 3, 0, 3, 0, 2, 3], dtype=int32)

In [112]:
# add clustering labels
hood_asian_venue_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

sf_asian_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
sf_asian_merged = sf_asian_merged.join(hood_asian_venue_sorted.set_index('Neighborhood'), on='Neighborhood')

sf_asian_merged.head()

Unnamed: 0,Zip Code,Neighborhood,Population (Census 2000),Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,94102,Hayes Valley/Tenderloin/North of Market,28991,37.78,-122.42,3,Sushi Restaurant,Thai Restaurant,Bubble Tea Shop,Asian Restaurant,Vietnamese Restaurant
2,94103,South of Market,23016,37.78,-122.41,2,Bubble Tea Shop,Thai Restaurant,Asian Restaurant,Sushi Restaurant,Vietnamese Restaurant
3,94107,Potrero Hill,17368,37.77,-122.39,3,Japanese Restaurant,Sushi Restaurant,Food Truck,Chinese Restaurant,Asian Restaurant
4,94108,Chinatown,13716,37.791,-122.409,2,Bubble Tea Shop,Asian Restaurant,Thai Restaurant,Chinese Restaurant,Sushi Restaurant
5,94109,Polk/Russian Hill (Nob Hill),56322,37.79,-122.42,2,Bubble Tea Shop,Asian Restaurant,Thai Restaurant,Sushi Restaurant,Chinese Restaurant


In [113]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(sf_asian_merged['Latitude'], sf_asian_merged['Longitude'], sf_asian_merged['Neighborhood'], sf_asian_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# 5. Examine Neighborhood Clusters 

### Cluster 1: Neighborhoods with mainly Chinese restaurants and low density of Bubble Tea Shops

In [114]:
sf_asian_merged.loc[sf_asian_merged['Cluster Labels'] == 0, sf_asian_merged.columns[[1] + list(range(5, sf_asian_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
7,Ingelside-Excelsior/Crocker-Amazon,0,Chinese Restaurant,Vietnamese Restaurant,Bubble Tea Shop,Filipino Restaurant,Asian Restaurant
10,Parkside/Forest Hill,0,Chinese Restaurant,Bubble Tea Shop,Sushi Restaurant,Asian Restaurant,Japanese Restaurant
12,Inner Richmond,0,Chinese Restaurant,Asian Restaurant,Sushi Restaurant,Thai Restaurant,Bubble Tea Shop
13,Outer Richmond,0,Chinese Restaurant,Sushi Restaurant,Vietnamese Restaurant,Japanese Restaurant,Asian Restaurant
14,Sunset,0,Chinese Restaurant,Bubble Tea Shop,Japanese Restaurant,Asian Restaurant,Vietnamese Restaurant
16,Bayview-Hunters Point,0,Chinese Restaurant,Vietnamese Restaurant,Asian Restaurant,Sushi Restaurant,Japanese Restaurant
21,Visitacion Valley/Sunnydale,0,Vietnamese Restaurant,Chinese Restaurant,Asian Restaurant,Bubble Tea Shop,Dim Sum Restaurant


### Cluster 2: Neighborhoods with high density of Thai and Sushi restaurants

In [115]:
sf_asian_merged.loc[sf_asian_merged['Cluster Labels'] == 1, sf_asian_merged.columns[[1] + list(range(5, sf_asian_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
8,Castro/Noe Valley,1,Thai Restaurant,Sushi Restaurant,Asian Restaurant,Vietnamese Restaurant,Chinese Restaurant
11,Haight-Ashbury,1,Sushi Restaurant,Thai Restaurant,Bubble Tea Shop,Asian Restaurant,Chinese Restaurant
18,Twin Peaks-Glen Park,1,Sushi Restaurant,Thai Restaurant,Chinese Restaurant,Japanese Restaurant,Bubble Tea Shop


### Cluster 3: Neighborhoods with high density of bubble tea shops!

In [116]:
sf_asian_merged.loc[sf_asian_merged['Cluster Labels'] == 2, sf_asian_merged.columns[[1] + list(range(5, sf_asian_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,South of Market,2,Bubble Tea Shop,Thai Restaurant,Asian Restaurant,Sushi Restaurant,Vietnamese Restaurant
4,Chinatown,2,Bubble Tea Shop,Asian Restaurant,Thai Restaurant,Chinese Restaurant,Sushi Restaurant
5,Polk/Russian Hill (Nob Hill),2,Bubble Tea Shop,Asian Restaurant,Thai Restaurant,Sushi Restaurant,Chinese Restaurant
17,St. Francis Wood/Miraloma/West Portal,2,Bubble Tea Shop,Chinese Restaurant,Japanese Restaurant,Asian Restaurant,Vietnamese Restaurant
19,Lake Merced,2,Bubble Tea Shop,Chinese Restaurant,Japanese Restaurant,Asian Restaurant,Thai Restaurant


### Cluster 4: Neighborhoods with high density of Japanese/Sushi restaurants 

In [117]:
sf_asian_merged.loc[sf_asian_merged['Cluster Labels'] == 3, sf_asian_merged.columns[[1] + list(range(5, sf_asian_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Hayes Valley/Tenderloin/North of Market,3,Sushi Restaurant,Thai Restaurant,Bubble Tea Shop,Asian Restaurant,Vietnamese Restaurant
3,Potrero Hill,3,Japanese Restaurant,Sushi Restaurant,Food Truck,Chinese Restaurant,Asian Restaurant
6,Inner Mission/Bernal Heights,3,Sushi Restaurant,Chinese Restaurant,Thai Restaurant,Vietnamese Restaurant,Asian Restaurant
9,Western Addition/Japantown,3,Sushi Restaurant,Asian Restaurant,Japanese Restaurant,Chinese Restaurant,Thai Restaurant
15,Marina,3,Sushi Restaurant,Asian Restaurant,Chinese Restaurant,Thai Restaurant,Japanese Restaurant
20,North Beach/Chinatown,3,Sushi Restaurant,Asian Restaurant,Thai Restaurant,Japanese Restaurant,Chinese Restaurant
