In [186]:
import pandas as pd
import numpy as np

import requests
from bs4 import BeautifulSoup
import json  
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors

import seaborn as sns

#!conda install -c conda-forge folium=0.10.1 --yes 
#uncomment if you need to install folium
import folium #maps library

from sklearn.cluster import KMeans

In [187]:
link = requests.get("https://en.wikipedia.org/wiki/List_of_neighborhoods_in_San_Francisco")
soup = BeautifulSoup(link.text, "lxml")

neighborhoods_list = []
ignore = [
    'See also',
    'References',
    'External links',
    'Specific neighborhoods',
]
for article in soup.find_all(class_="mw-headline"):
    if not article.string in ignore:
        neighborhoods_list.append(article.string)

In [188]:
neighborhoods_list

['Alamo Square',
 'Anza Vista',
 'Ashbury Heights',
 'Balboa Park',
 'Balboa Terrace',
 'Bayview',
 'Belden Place',
 'Bernal Heights',
 'Buena Vista',
 'Butchertown (Old and New)',
 'Castro',
 'Cathedral Hill',
 'Cayuga Terrace',
 'China Basin',
 'Chinatown',
 'Civic Center',
 'Clarendon Heights',
 'Cole Valley',
 'Corona Heights',
 'Cow Hollow',
 'Crocker-Amazon',
 'Design District',
 'Diamond Heights',
 'Dogpatch',
 'Dolores Heights',
 'Duboce Triangle',
 'Embarcadero',
 'Eureka Valley',
 'Excelsior',
 'Fillmore',
 'Financial District',
 'Financial District South',
 "Fisherman's Wharf",
 'Forest Hill',
 'Forest Knolls',
 'Glen Park',
 'Golden Gate Heights',
 'Haight-Ashbury',
 'Hayes Valley',
 'Hunters Point',
 'India Basin',
 'Ingleside',
 'Ingleside Terraces',
 'Inner Sunset',
 'Irish Hill',
 'Islais Creek',
 'Jackson Square',
 'Japantown',
 'Jordan Park',
 'Laguna Honda',
 'Lake Street',
 'Lakeside',
 'Lakeshore',
 'Laurel Heights',
 'Lincoln Manor',
 'Little Hollywood',
 'Little 

In [189]:
df = pd.DataFrame(neighborhoods_list)

In [190]:
df.rename(columns={0:'Neighborhood'}, inplace = True)

In [191]:
df

Unnamed: 0,Neighborhood
0,Alamo Square
1,Anza Vista
2,Ashbury Heights
3,Balboa Park
4,Balboa Terrace
...,...
114,West Portal
115,Western Addition
116,Westwood Highlands
117,Westwood Park


In [192]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(df)

                  Neighborhood
0                 Alamo Square
1                   Anza Vista
2              Ashbury Heights
3                  Balboa Park
4               Balboa Terrace
5                      Bayview
6                 Belden Place
7               Bernal Heights
8                  Buena Vista
9    Butchertown (Old and New)
10                      Castro
11              Cathedral Hill
12              Cayuga Terrace
13                 China Basin
14                   Chinatown
15                Civic Center
16           Clarendon Heights
17                 Cole Valley
18              Corona Heights
19                  Cow Hollow
20              Crocker-Amazon
21             Design District
22             Diamond Heights
23                    Dogpatch
24             Dolores Heights
25             Duboce Triangle
26                 Embarcadero
27               Eureka Valley
28                   Excelsior
29                    Fillmore
30          Financial District
31    Fi

In [193]:
#correct mistakes
df.loc[118, "Neighborhood"] = "Butchertown"
df.loc[98, "Neighborhood"] = "South End"
df.loc[99, "Neighborhood"] = "South of Market"
df.loc[68, "Neighborhood"] = "Mission"

In [194]:
#!pip install opencage
from opencage.geocoder import OpenCageGeocode

In [195]:
KEY = 'DELETED'
geocoder = OpenCageGeocode(KEY)

In [196]:
list_lat = []   # create empty lists

list_lng = []

for index, row in df.iterrows(): # iterate over rows in dataframe
    Neigborhood = row['Neighborhood']
    City = 'San Francisco'
    State = 'CA'
    query = str(Neigborhood)+', '+str(City)+', '+str(State)

    results = geocoder.geocode(query)   
    lat = results[0]['geometry']['lat']
    lng = results[0]['geometry']['lng']

    list_lat.append(lat)
    list_lng.append(lng)


# create new columns from lists    

df['lat'] = list_lat   

df['lng'] = list_lng

In [197]:
df

Unnamed: 0,Neighborhood,lat,lng
0,Alamo Square,37.776360,-122.434688
1,Anza Vista,37.780836,-122.443149
2,Ashbury Heights,37.775599,-122.448068
3,Balboa Park,37.721427,-122.447547
4,Balboa Terrace,32.809471,-117.208557
...,...,...,...
114,West Portal,37.741141,-122.465634
115,Western Addition,37.779559,-122.429810
116,Westwood Highlands,37.725726,-122.458199
117,Westwood Park,37.725726,-122.458199


In [198]:
df = df.loc[df['lng'] < -122]


In [199]:
df

Unnamed: 0,Neighborhood,lat,lng
0,Alamo Square,37.776360,-122.434688
1,Anza Vista,37.780836,-122.443149
2,Ashbury Heights,37.775599,-122.448068
3,Balboa Park,37.721427,-122.447547
5,Bayview,37.728889,-122.392500
...,...,...,...
114,West Portal,37.741141,-122.465634
115,Western Addition,37.779559,-122.429810
116,Westwood Highlands,37.725726,-122.458199
117,Westwood Park,37.725726,-122.458199


In [200]:
df = df.reset_index(drop=True)
df
                    

Unnamed: 0,Neighborhood,lat,lng
0,Alamo Square,37.776360,-122.434688
1,Anza Vista,37.780836,-122.443149
2,Ashbury Heights,37.775599,-122.448068
3,Balboa Park,37.721427,-122.447547
4,Bayview,37.728889,-122.392500
...,...,...,...
101,West Portal,37.741141,-122.465634
102,Western Addition,37.779559,-122.429810
103,Westwood Highlands,37.725726,-122.458199
104,Westwood Park,37.725726,-122.458199


In [201]:
sf_map = folium.Map(
    location=[37.7647993, -122.4629897],  
    zoom_start=12, 
)


# add markers to map
for lat, lng, neighborhood in zip(df['lat'], df['lng'], df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(sf_map)
sf_map

In [202]:
CLIENT_ID = '3V0D42QQYPYGM0CSXWBVPA3IY4LOETYJ1CUAOHIBAHEQQOTR' # your Foursquare ID
CLIENT_SECRET = 'PR3RJSSBXWPCBSDI1MVIDYMFRPB2S0RMPBTHQDNTS3COUI0X' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 3V0D42QQYPYGM0CSXWBVPA3IY4LOETYJ1CUAOHIBAHEQQOTR
CLIENT_SECRET:PR3RJSSBXWPCBSDI1MVIDYMFRPB2S0RMPBTHQDNTS3COUI0X


In [203]:
df.loc[0,'Neighborhood']
neighborhood_latitude = df.loc[0, 'lat'] # neighborhood latitude value
neighborhood_longitude = df.loc[0, 'lng'] # neighborhood longitude value

neighborhood_name = 'Alamo Square' # Neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Alamo Square are 37.7763598, -122.4346885.


In [204]:
LIMIT = 200
radius = 1000
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=3V0D42QQYPYGM0CSXWBVPA3IY4LOETYJ1CUAOHIBAHEQQOTR&client_secret=PR3RJSSBXWPCBSDI1MVIDYMFRPB2S0RMPBTHQDNTS3COUI0X&v=20180605&ll=37.7763598,-122.4346885&radius=1000&limit=200'

In [205]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ecd90990be7b4001b27207c'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': '$-$$$$', 'key': 'price'},
    {'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Alamo Square',
  'headerFullLocation': 'Alamo Square, San Francisco',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 127,
  'suggestedBounds': {'ne': {'lat': 37.78535980900001,
    'lng': -122.42332322170856},
   'sw': {'lat': 37.76735979099999, 'lng': -122.44605377829143}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4460d38bf964a5200a331fe3',
       'name': 'Alamo Square',
       'location': {'address': 'Steiner St',
        'crossStreet': 'btwn Fulton & Hayes St',
        'lat': 37.77604493890036,
        '

In [206]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [207]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Alamo Square,Park,37.776045,-122.434363
1,Alamo Square Dog Park,Dog Run,37.775878,-122.435740
2,Painted Ladies,Historic Site,37.776120,-122.433389
3,The Independent,Rock Club,37.775573,-122.437835
4,The Mill,Bakery,37.776425,-122.437970
...,...,...,...,...
95,Mazzat,Mediterranean Restaurant,37.775514,-122.426140
96,Marine Layer,Clothing Store,37.776689,-122.424547
97,El Castillito,Mexican Restaurant,37.768764,-122.429308
98,Sheba Piano Lounge,Jazz Club,37.783107,-122.432823


In [208]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))


100 venues were returned by Foursquare.


In [209]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [210]:
sf_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['lat'],
                                   longitudes=df['lng']
                                  )

Alamo Square
Anza Vista
Ashbury Heights
Balboa Park
Bayview
Belden Place
Bernal Heights
Buena Vista
Butchertown (Old and New)
Castro
Cayuga Terrace
China Basin
Chinatown
Civic Center
Clarendon Heights
Cole Valley
Corona Heights
Cow Hollow
Crocker-Amazon
Design District
Diamond Heights
Dogpatch
Dolores Heights
Duboce Triangle
Embarcadero
Eureka Valley
Excelsior
Fillmore
Financial District
Financial District South
Fisherman's Wharf
Forest Hill
Forest Knolls
Glen Park
Golden Gate Heights
Haight-Ashbury
Hayes Valley
Hunters Point
India Basin
Ingleside
Ingleside Terraces
Inner Sunset
Irish Hill
Islais Creek
Japantown
Jordan Park
Laguna Honda
Lake Street
Laurel Heights
Little Hollywood
Little Russia
Little Saigon
Lone Mountain
Lower Haight
Lower Pacific Heights
Lower Nob Hill
Marina District
Merced Heights
Merced Manor
Midtown Terrace
Mid-Market
Miraloma Park
Mission
Mission District
Mission Dolores
Monterey Heights
Mount Davidson
Nob Hill
Noe Valley
North Beach
North of Panhandle
Oceanview


In [211]:
print(sf_venues.shape)
sf_venues

(5107, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Alamo Square,37.776360,-122.434688,Alamo Square,37.776045,-122.434363,Park
1,Alamo Square,37.776360,-122.434688,Alamo Square Dog Park,37.775878,-122.435740,Dog Run
2,Alamo Square,37.776360,-122.434688,Painted Ladies,37.776120,-122.433389,Historic Site
3,Alamo Square,37.776360,-122.434688,The Independent,37.775573,-122.437835,Rock Club
4,Alamo Square,37.776360,-122.434688,The Mill,37.776425,-122.437970,Bakery
...,...,...,...,...,...,...,...
5102,Westwood Park,37.725726,-122.458199,Old Balboa Reservoir Berm,37.727362,-122.453289,Dog Run
5103,Westwood Park,37.725726,-122.458199,Orchids Cafe,37.723141,-122.453710,Cha Chaan Teng
5104,Westwood Park,37.725726,-122.458199,Wiley's No Limit Liquor & Food Mart,37.723354,-122.453505,Liquor Store
5105,Butchertown,37.784827,-122.727802,JazzDeck,37.784800,-122.727800,Music Venue


In [212]:
sf_venues.groupby('Neighborhood')['Venue'].count().reset_index(name='count').sort_values(['count'], ascending=False).head()


Unnamed: 0,Neighborhood,count
89,South of Market,196
37,Hayes Valley,100
35,Golden Gate Heights,100
13,Chinatown,100
9,Butchertown (Old and New),100


In [213]:
print('There are {} uniques categories.'.format(len(sf_venues['Venue Category'].unique())))


There are 338 uniques categories.


In [214]:
# one hot encoding
sf_onehot = pd.get_dummies(sf_venues[['Venue Category']], prefix="", prefix_sep="")

# add city column back to dataframe
sf_onehot['Neighborhood'] = sf_venues['Neighborhood'] 

# move city column to the first column
fixed_columns = [sf_onehot.columns[-1]] + list(sf_onehot.columns[:-1])
sf_onehot = sf_onehot[fixed_columns]

sf_onehot.head()

Unnamed: 0,Yoga Studio,ATM,Acai House,Accessories Store,Acupuncturist,Adult Boutique,Alternative Healer,American Restaurant,Animal Shelter,Antique Shop,...,Video Store,Vietnamese Restaurant,Vineyard,Warehouse,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [215]:
sf_grouped = sf_onehot.groupby('Neighborhood').mean().reset_index()
sf_grouped

Unnamed: 0,Neighborhood,Yoga Studio,ATM,Acai House,Accessories Store,Acupuncturist,Adult Boutique,Alternative Healer,American Restaurant,Animal Shelter,...,Video Store,Vietnamese Restaurant,Vineyard,Warehouse,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store
0,Alamo Square,0.013514,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.027027,0.000000,0.0,0.0,0.0
1,Anza Vista,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0
2,Ashbury Heights,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.038462,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0
3,Balboa Park,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.055556,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0
4,Bayview,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,Vista del Mar,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.031579,0.010526,0.0,0.0,0.0
101,West Portal,0.023810,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.047619,0.023810,0.0,0.0,0.0
102,Western Addition,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.038462,0.000000,0.0,0.0,0.0
103,Westwood Highlands,0.056604,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.000000,0.037736,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0


In [216]:
num_top_venues = 5

for hood in sf_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = sf_grouped[sf_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alamo Square----
                venue  freq
0                 Bar  0.05
1         Record Shop  0.03
2  Seafood Restaurant  0.03
3           BBQ Joint  0.03
4               Hotel  0.03


----Anza Vista----
                     venue  freq
0                     Café  0.16
1             Burger Joint  0.05
2            Big Box Store  0.05
3  Health & Beauty Service  0.05
4       Mexican Restaurant  0.05


----Ashbury Heights----
                         venue  freq
0                         Café  0.15
1                         Bank  0.08
2                  Coffee Shop  0.08
3                  Yoga Studio  0.04
4  Eastern European Restaurant  0.04


----Balboa Park----
              venue  freq
0    Baseball Field  0.17
1              Café  0.11
2  Asian Restaurant  0.06
3       Bus Station  0.06
4    Breakfast Spot  0.06


----Bayview----
                             venue  freq
0  Southern / Soul Food Restaurant  0.13
1                           Bakery  0.13
2                     Hom

In [217]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [218]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = sf_grouped['Neighborhood']

for ind in np.arange(sf_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(sf_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Alamo Square,Bar,Hotel,Café,Record Shop,Sushi Restaurant,Liquor Store,BBQ Joint,Seafood Restaurant,Park,Wine Bar
1,Anza Vista,Café,Cosmetics Shop,Tunnel,Burger Joint,Southern / Soul Food Restaurant,Big Box Store,Mexican Restaurant,Grocery Store,Health & Beauty Service,Liquor Store
2,Ashbury Heights,Café,Bank,Coffee Shop,Dog Run,Supermarket,Middle Eastern Restaurant,Mexican Restaurant,Outdoor Sculpture,Sculpture Garden,Massage Studio
3,Balboa Park,Baseball Field,Café,Breakfast Spot,Bus Station,Flower Shop,BBQ Joint,Asian Restaurant,Light Rail Station,Skate Park,Bus Stop
4,Bayview,Bakery,Southern / Soul Food Restaurant,Café,Mexican Restaurant,Dumpling Restaurant,Pharmacy,Light Rail Station,Home Service,Coffee Shop,Piercing Parlor


In [236]:
kclusters = 4

sf_grouped_clustering = sf_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(sf_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 3, 1], dtype=int32)

In [237]:
neighborhoods_venues_sorted.drop(['Labels'],axis=1,inplace = True)

In [238]:
neighborhoods_venues_sorted.insert(0, 'Labels', kmeans.labels_)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(neighborhoods_venues_sorted)

     Labels               Neighborhood       1st Most Common Venue  \
0         1               Alamo Square                         Bar   
1         1                 Anza Vista                        Café   
2         1            Ashbury Heights                        Café   
3         1                Balboa Park              Baseball Field   
4         1                    Bayview                      Bakery   
5         1               Belden Place                 Coffee Shop   
6         1             Bernal Heights                 Coffee Shop   
7         1                Buena Vista               Historic Site   
8         3                Butchertown                 Music Venue   
9         1  Butchertown (Old and New)                 Coffee Shop   
10        1                     Castro                     Gay Bar   
11        3             Cayuga Terrace                 Music Venue   
12        1                China Basin            Baseball Stadium   
13        1         

In [239]:
# # add clustering labels

sf_merged = df

# merge bay_area_grouped with df_merged to add latitude/longitude for each city
sf_merged = sf_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

sf_merged # check the last columns!

Unnamed: 0,Neighborhood,lat,lng,Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Alamo Square,37.776360,-122.434688,1,Bar,Hotel,Café,Record Shop,Sushi Restaurant,Liquor Store,BBQ Joint,Seafood Restaurant,Park,Wine Bar
1,Anza Vista,37.780836,-122.443149,1,Café,Cosmetics Shop,Tunnel,Burger Joint,Southern / Soul Food Restaurant,Big Box Store,Mexican Restaurant,Grocery Store,Health & Beauty Service,Liquor Store
2,Ashbury Heights,37.775599,-122.448068,1,Café,Bank,Coffee Shop,Dog Run,Supermarket,Middle Eastern Restaurant,Mexican Restaurant,Outdoor Sculpture,Sculpture Garden,Massage Studio
3,Balboa Park,37.721427,-122.447547,1,Baseball Field,Café,Breakfast Spot,Bus Station,Flower Shop,BBQ Joint,Asian Restaurant,Light Rail Station,Skate Park,Bus Stop
4,Bayview,37.728889,-122.392500,1,Bakery,Southern / Soul Food Restaurant,Café,Mexican Restaurant,Dumpling Restaurant,Pharmacy,Light Rail Station,Home Service,Coffee Shop,Piercing Parlor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101,West Portal,37.741141,-122.465634,1,Italian Restaurant,Wine Bar,Burger Joint,Mexican Restaurant,Chinese Restaurant,Coffee Shop,Pizza Place,Yoga Studio,Diner,Bookstore
102,Western Addition,37.779559,-122.429810,1,Liquor Store,Boutique,Grocery Store,Park,Playground,Seafood Restaurant,Farmers Market,Theater,Historic Site,German Restaurant
103,Westwood Highlands,37.725726,-122.458199,1,Asian Restaurant,Yoga Studio,Chinese Restaurant,Café,Pharmacy,Coffee Shop,Bubble Tea Shop,Mexican Restaurant,Grocery Store,Bank
104,Westwood Park,37.725726,-122.458199,1,Asian Restaurant,Yoga Studio,Chinese Restaurant,Café,Pharmacy,Coffee Shop,Bubble Tea Shop,Mexican Restaurant,Grocery Store,Bank


In [240]:
sf_merged.loc[sf_merged['Labels'] == 0, sf_merged.columns[[0] + list(range(4, sf_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Clarendon Heights,Trail,Park,Art Gallery,Wine Bar,Garden,Reservoir,Monument / Landmark,Playground,Bus Stop,Eye Doctor
31,Forest Hill,Japanese Restaurant,Playground,Park,Hotpot Restaurant,French Restaurant,Football Stadium,Food Truck,Eye Doctor,Falafel Restaurant,Farmers Market
46,Laguna Honda,Trail,Light Rail Station,Jewelry Store,Art Gallery,Hotpot Restaurant,Lake,French Restaurant,Park,Event Space,Bus Stop
58,Merced Manor,Trail,Light Rail Station,Jewelry Store,Art Gallery,Hotpot Restaurant,Lake,French Restaurant,Park,Event Space,Bus Stop
61,Miraloma Park,Bus Stop,Jewelry Store,Park,Trail,Gym,Mountain,Monument / Landmark,Farmers Market,Fast Food Restaurant,Filipino Restaurant
96,Twin Peaks,Trail,Scenic Lookout,Hill,Bus Stop,Bus Station,Reservoir,Food Truck,Filipino Restaurant,Eye Doctor,Fountain


In [241]:
sf_merged.loc[sf_merged['Labels'] == 1, sf_merged.columns[[0] + list(range(4, sf_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Alamo Square,Bar,Hotel,Café,Record Shop,Sushi Restaurant,Liquor Store,BBQ Joint,Seafood Restaurant,Park,Wine Bar
1,Anza Vista,Café,Cosmetics Shop,Tunnel,Burger Joint,Southern / Soul Food Restaurant,Big Box Store,Mexican Restaurant,Grocery Store,Health & Beauty Service,Liquor Store
2,Ashbury Heights,Café,Bank,Coffee Shop,Dog Run,Supermarket,Middle Eastern Restaurant,Mexican Restaurant,Outdoor Sculpture,Sculpture Garden,Massage Studio
3,Balboa Park,Baseball Field,Café,Breakfast Spot,Bus Station,Flower Shop,BBQ Joint,Asian Restaurant,Light Rail Station,Skate Park,Bus Stop
4,Bayview,Bakery,Southern / Soul Food Restaurant,Café,Mexican Restaurant,Dumpling Restaurant,Pharmacy,Light Rail Station,Home Service,Coffee Shop,Piercing Parlor
...,...,...,...,...,...,...,...,...,...,...,...
100,Vista del Mar,Coffee Shop,Café,Hotel,Park,Wine Bar,Theater,Cocktail Bar,Gym,Sushi Restaurant,Juice Bar
101,West Portal,Italian Restaurant,Wine Bar,Burger Joint,Mexican Restaurant,Chinese Restaurant,Coffee Shop,Pizza Place,Yoga Studio,Diner,Bookstore
102,Western Addition,Liquor Store,Boutique,Grocery Store,Park,Playground,Seafood Restaurant,Farmers Market,Theater,Historic Site,German Restaurant
103,Westwood Highlands,Asian Restaurant,Yoga Studio,Chinese Restaurant,Café,Pharmacy,Coffee Shop,Bubble Tea Shop,Mexican Restaurant,Grocery Store,Bank


In [242]:
sf_merged.loc[sf_merged['Labels'] == 2, sf_merged.columns[[0] + list(range(4, sf_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
38,India Basin,Home Service,Park,Women's Store,Flower Shop,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish Market,Flea Market
55,Lower Nob Hill,Park,Convenience Store,Trail,Road,Dog Run,Scenic Lookout,Shoe Store,Food,Monument / Landmark,Szechuan Restaurant
66,Mount Davidson,Park,Playground,Bus Line,Monument / Landmark,Tree,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish Market
79,Potrero Hill,Grocery Store,Park,Hill,Japanese Restaurant,Gym / Fitness Center,Deli / Bodega,Cosmetics Shop,Convenience Store,Café,Liquor Store
86,Silver Terrace,Grocery Store,Park,Athletics & Sports,Dessert Shop,Soccer Field,Women's Store,Flea Market,Farmers Market,Fast Food Restaurant,Filipino Restaurant


In [243]:
sf_merged.loc[sf_merged['Labels'] == 3, sf_merged.columns[[0] + list(range(4, sf_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Cayuga Terrace,Music Venue,Park,Food,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish Market,Flea Market,Flower Shop
59,Midtown Terrace,Music Venue,Park,Food,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish Market,Flea Market,Flower Shop
60,Mid-Market,Music Venue,Park,Food,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish Market,Flea Market,Flower Shop
105,Butchertown,Music Venue,Park,Food,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish Market,Flea Market,Flower Shop


In [244]:
# create map
map_clusters = folium.Map(
    location=[37.7647993, -122.4629897],
    tiles='Stamen Toner',
    zoom_start=12, 
)
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(sf_merged['lat'], sf_merged['lng'], sf_merged['Neighborhood'], sf_merged['Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

In [245]:
map_clusters