# IBM Capstone project

In [1]:
# import required libraries

import numpy as np

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from bs4 import BeautifulSoup

import requests # library to handle requests

# import k-means from clustering stage
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

import folium # map rendering library

import matplotlib.cm as cm
import matplotlib.colors as colors

# import geocoder
%pip install geocoder
import geocoder 
from geopy.geocoders import Nominatim

print('Libraries imported.')

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |███▎                            | 10kB 16.0MB/s eta 0:00:01[K     |██████▋                         | 20kB 6.5MB/s eta 0:00:01[K     |██████████                      | 30kB 6.7MB/s eta 0:00:01[K     |█████████████▎                  | 40kB 5.5MB/s eta 0:00:01[K     |████████████████▋               | 51kB 4.8MB/s eta 0:00:01[K     |████████████████████            | 61kB 4.6MB/s eta 0:00:01[K     |███████████████████████▎        | 71kB 4.7MB/s eta 0:00:01[K     |██████████████████████████▋     | 81kB 4.9MB/s eta 0:00:01[K     |██████████████████████████████  | 92kB 4.6MB/s eta 0:00:01[K     |████████████████████████████████| 102kB 3.7MB/s 
Collecting ratelim
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad4

# Get the list of San Francisco neighborhoods from Wiki

In [103]:
# Extract list of San Francisco areas using BeautifullSoup
source = requests.get('https://en.wikipedia.org/wiki/List_of_neighborhoods_in_San_Francisco').text

soup = BeautifulSoup(source, 'html5lib')
neibname = []
for name in soup.find_all('span', class_='mw-headline'):
  neibname += [name.text]
neibname = neibname[0:-4]
print(neibname)

['Alamo Square', 'Anza Vista', 'Ashbury Heights', 'Balboa Park', 'Balboa Terrace', 'Bayview', 'Belden Place', 'Bernal Heights', 'Buena Vista', 'Butchertown (Old and New)', 'Castro', 'Cathedral Hill', 'Cayuga Terrace', 'China Basin', 'Chinatown', 'Civic Center', 'Clarendon Heights', 'Cole Valley', 'Corona Heights', 'Cow Hollow', 'Crocker-Amazon', 'Design District', 'Diamond Heights', 'Dogpatch', 'Dolores Heights', 'Duboce Triangle', 'Embarcadero', 'Eureka Valley', 'Excelsior', 'Fillmore', 'Financial District', 'Financial District South', "Fisherman's Wharf", 'Forest Hill', 'Forest Knolls', 'Glen Park', 'Golden Gate Heights', 'Haight-Ashbury', 'Hayes Valley', 'Hunters Point', 'India Basin', 'Ingleside', 'Ingleside Terraces', 'Inner Sunset', 'Irish Hill', 'Islais Creek', 'Jackson Square', 'Japantown', 'Jordan Park', 'Laguna Honda', 'Lake Street', 'Lakeside', 'Lakeshore', 'Laurel Heights', 'Lincoln Manor', 'Little Hollywood', 'Little Russia', 'Little Saigon', 'Lone Mountain', 'Lower Haight

# Coordinates of neibs from geocoder

In [104]:
# define the dataframe columns
column_names = ['Neiborhood', 'Latitude', 'Longitude'] 

# initiate the dataframe
san_fran = pd.DataFrame(columns=column_names)

san_fran

Unnamed: 0,Neiborhood,Latitude,Longitude


In [105]:
# extract coordonates from geocoder

Latitude = []
Longitude = []

for neib in neibname:
  lat_lng_coords = None

  while(lat_lng_coords is None):
    g = geocoder.arcgis('{}, San Francisco, CA, USA'.format(neib))
    lat_lng_coords = g.latlng
    Latitude.append(g.latlng[0])
    Longitude.append(g.latlng[1])
    
san_fran['Neiborhood'] = neibname
san_fran['Latitude'] = Latitude
san_fran['Longitude'] = Longitude
san_fran.head()


Unnamed: 0,Neiborhood,Latitude,Longitude
0,Alamo Square,37.77722,-122.43146
1,Anza Vista,37.78048,-122.44358
2,Ashbury Heights,37.76467,-122.44587
3,Balboa Park,37.72493,-122.44314
4,Balboa Terrace,37.7318,-122.4674


In [106]:
san_fran.shape

(119, 3)

In [107]:
address = 'San Francisco, USA'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of San Francisco, USA are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of San Francisco, USA are 37.7790262, -122.4199061.


In [0]:
# specify details on some areas (geocoder has a bad day sometime)
san_fran.loc[113:113, ('Latitude')] = 37.779005
san_fran.loc[113:113, ('Longitude')] = -122.507642
san_fran.loc[57:57, ('Latitude')] = 37.784260
san_fran.loc[57:57, ('Longitude')] = -122.417724
san_fran.loc[11:11, ('Latitude')] = 37.784344
san_fran.loc[11:11, ('Longitude')] = -122.424496


In [111]:
# map it
map_san_fran = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, n in zip(san_fran['Latitude'], san_fran['Longitude'], san_fran['Neiborhood']):
    label = '{}'.format(n)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_san_fran)  
    
map_san_fran

# Get venue list from Foursquare

In [0]:
# Foursquare ID
CLIENT_ID = 'O1ABNSSJ1HBKFPCMYENYNATS44ADNBZZW5Y2PU1U2JLF5POV' 
CLIENT_SECRET = 'NW3UZGYEJ41WWQFBVEPHZQZSDCJE5R2BSTKD2KXO5VPGXBZO' 
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: O1ABNSSJ1HBKFPCMYENYNATS44ADNBZZW5Y2PU1U2JLF5POV
CLIENT_SECRET:NW3UZGYEJ41WWQFBVEPHZQZSDCJE5R2BSTKD2KXO5VPGXBZO


In [0]:
# function to get venues with it's location and id to all the neighborhoods of SF
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['id'],
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue id',
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [0]:
# and get the venues
LIMIT = 100
radius = 500

venues = getNearbyVenues(names=san_fran['Neiborhood'],
                                   latitudes=san_fran['Latitude'],
                                   longitudes=san_fran['Longitude']
                                  )

Alamo Square
Anza Vista
Ashbury Heights
Balboa Park
Balboa Terrace
Bayview
Belden Place
Bernal Heights
Buena Vista
Butchertown (Old and New)
Castro
Cathedral Hill
Cayuga Terrace
China Basin
Chinatown
Civic Center
Clarendon Heights
Cole Valley
Corona Heights
Cow Hollow
Crocker-Amazon
Design District
Diamond Heights
Dogpatch
Dolores Heights
Duboce Triangle
Embarcadero
Eureka Valley
Excelsior
Fillmore
Financial District
Financial District South
Fisherman's Wharf
Forest Hill
Forest Knolls
Glen Park
Golden Gate Heights
Haight-Ashbury
Hayes Valley
Hunters Point
India Basin
Ingleside
Ingleside Terraces
Inner Sunset
Irish Hill
Islais Creek
Jackson Square
Japantown
Jordan Park
Laguna Honda
Lake Street
Lakeside
Lakeshore
Laurel Heights
Lincoln Manor
Little Hollywood
Little Russia
Little Saigon
Lone Mountain
Lower Haight
Lower Pacific Heights
Lower Nob Hill
Marina District
Merced Heights
Merced Manor
Midtown Terrace
Mid-Market
Miraloma Park
Mission Bay
Mission District
Mission Dolores
Mission Ter

In [0]:
print(venues.shape)
venues.head(10)

(4783, 8)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue id,Venue Latitude,Venue Longitude,Venue Category
0,Alamo Square,37.77722,-122.43146,Painted Ladies,4b9afa7ef964a520c1e835e3,37.77612,-122.433389,Historic Site
1,Alamo Square,37.77722,-122.43146,Alamo Square,4460d38bf964a5200a331fe3,37.776045,-122.434363,Park
2,Alamo Square,37.77722,-122.43146,Church of 8 Wheels,5296faaf498ea202f87be0f8,37.774733,-122.430862,Roller Rink
3,Alamo Square,37.77722,-122.43146,The Center SF,522926b5498e3269edeba28d,37.774545,-122.43073,Spiritual Center
4,Alamo Square,37.77722,-122.43146,Originals Vinyl,5802ebd1d67c1420ae082a52,37.775835,-122.431227,Record Shop
5,Alamo Square,37.77722,-122.43146,Kebab King,56fef705498ed9078efe13ac,37.779786,-122.431589,Pakistani Restaurant
6,Alamo Square,37.77722,-122.43146,Alamo Square Dog Park,4c2f7b013896e21e7efee390,37.775878,-122.43574,Dog Run
7,Alamo Square,37.77722,-122.43146,African American Art & Culture Center,4b03763ef964a520824f22e3,37.778329,-122.429306,Museum
8,Alamo Square,37.77722,-122.43146,Petit Crenn,55c29cba498e3e3e7a929bb7,37.776344,-122.426455,French Restaurant
9,Alamo Square,37.77722,-122.43146,Suppenküche,42c5d900f964a520d5251fe3,37.776324,-122.426382,German Restaurant


In [0]:
# save to file
from google.colab import files

venues.to_csv('sf-venues.csv')
files.download('sf-venues.csv')

# Clean the data

In [112]:
# upload from file saved on previous step
url = 'https://raw.githubusercontent.com/aka-tori/Coursera_Capstone/master/sf-venues.csv'
venues = pd.read_csv(url)
venues.drop('Unnamed: 0', axis=1, inplace=True)
venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue id,Venue Latitude,Venue Longitude,Venue Category
0,Alamo Square,37.77722,-122.43146,Painted Ladies,4b9afa7ef964a520c1e835e3,37.77612,-122.433389,Historic Site
1,Alamo Square,37.77722,-122.43146,Alamo Square,4460d38bf964a5200a331fe3,37.776045,-122.434363,Park
2,Alamo Square,37.77722,-122.43146,Church of 8 Wheels,5296faaf498ea202f87be0f8,37.774733,-122.430862,Roller Rink
3,Alamo Square,37.77722,-122.43146,The Center SF,522926b5498e3269edeba28d,37.774545,-122.43073,Spiritual Center
4,Alamo Square,37.77722,-122.43146,Originals Vinyl,5802ebd1d67c1420ae082a52,37.775835,-122.431227,Record Shop


In [14]:
print('There are {} uniques venues.'.format(len(venues['Venue id'].unique())))
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))


There are 3091 uniques venues.
There are 355 uniques categories.


In [15]:
# We are looking for restaurants category only, so clean our dataframe

# filter for restaurants 
sfv = venues[venues['Venue Category'].str.contains('Restaurant')]

# get unique venue id's only
sfv = sfv.sort_values('Venue id')
sfv.reset_index(drop=True, inplace=True)

indlist = []
prev = 0
for i, vid in enumerate(sfv['Venue id']):
  if vid != prev:
    indlist.append(i)
    prev = vid
  else:
    prev = vid

sfv = sfv.loc[indlist]
sfv.head()

print('There are {} uniques venues.'.format(len(sfv['Venue id'].unique())))
print('There are {} uniques categories.'.format(len(sfv['Venue Category'].unique())))

There are 630 uniques venues.
There are 69 uniques categories.


In [16]:
sfv.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue id,Venue Latitude,Venue Longitude,Venue Category
0,Nob Hill,37.79336,-122.41787,Acquerello,3fd66200f964a52013ed1ee3,37.791669,-122.421407,Italian Restaurant
1,Inner Sunset,37.76211,-122.46886,Ebisu,3fd66200f964a52017ed1ee3,37.764354,-122.466358,Sushi Restaurant
2,Jackson Square,37.79708,-122.39841,Slanted Door,3fd66200f964a52018ed1ee3,37.796222,-122.393895,Vietnamese Restaurant
4,South Park,37.78145,-122.39386,Fringale,3fd66200f964a5201bed1ee3,37.778731,-122.397105,French Restaurant
5,Russian Hill,37.80203,-122.41963,Gary Danko,3fd66200f964a52025ee1ee3,37.805751,-122.420488,New American Restaurant


# Get additional information from Foursquare

Given rather strict limit from Foursquare API, let's do what we can with regular access and grab the amount of likes. For our purposes we may consider amount of likes to indicate popularity of venues.



In [0]:
# request for likes

likes = []

for vid in sfv['Venue id']:
  venue_id = vid
  
  #make url for request
  url = 'https://api.foursquare.com/v2/venues/{}/likes?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)

  # get responce
  result = requests.get(url).json()

  try:
    a = result['response']['likes']['count']
  except:
    a = 0

  likes.append(a)

sfv['likes'] = likes
sfv.head()



Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue id,Venue Latitude,Venue Longitude,Venue Category,likes
0,Nob Hill,37.79336,-122.41787,Acquerello,3fd66200f964a52013ed1ee3,37.791669,-122.421407,Italian Restaurant,82
1,Inner Sunset,37.76211,-122.46886,Ebisu,3fd66200f964a52017ed1ee3,37.764354,-122.466358,Sushi Restaurant,264
2,Jackson Square,37.79708,-122.39841,Slanted Door,3fd66200f964a52018ed1ee3,37.796222,-122.393895,Vietnamese Restaurant,1200
4,South Park,37.78145,-122.39386,Fringale,3fd66200f964a5201bed1ee3,37.778731,-122.397105,French Restaurant,60
5,Russian Hill,37.80203,-122.41963,Gary Danko,3fd66200f964a52025ee1ee3,37.805751,-122.420488,New American Restaurant,293


In [0]:
#save to the file for technical access reasons
sfv.to_csv('sfv.csv')
files.download('sfv.csv')

In [113]:
# upload back from the file
url = 'https://raw.githubusercontent.com/aka-tori/Coursera_Capstone/master/sfv.csv'
sfv = pd.read_csv(url)
sfv.drop('Unnamed: 0', axis=1, inplace=True)
sfv.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue id,Venue Latitude,Venue Longitude,Venue Category,likes
0,Nob Hill,37.79336,-122.41787,Acquerello,3fd66200f964a52013ed1ee3,37.791669,-122.421407,Italian Restaurant,82
1,Inner Sunset,37.76211,-122.46886,Ebisu,3fd66200f964a52017ed1ee3,37.764354,-122.466358,Sushi Restaurant,264
2,Jackson Square,37.79708,-122.39841,Slanted Door,3fd66200f964a52018ed1ee3,37.796222,-122.393895,Vietnamese Restaurant,1200
3,South Park,37.78145,-122.39386,Fringale,3fd66200f964a5201bed1ee3,37.778731,-122.397105,French Restaurant,60
4,Russian Hill,37.80203,-122.41963,Gary Danko,3fd66200f964a52025ee1ee3,37.805751,-122.420488,New American Restaurant,293


In [114]:
# find the most popular category
df = sfv.groupby('Venue Category').count().reset_index()
df['likes'] = sfv[['Venue Category', 'likes']].groupby('Venue Category').sum().reset_index()['likes']
df = df.sort_values('likes', ascending = False).reset_index()

print('Most popular category is', df['Venue Category'][0], 'with total', df['likes'][0], 'likes')

Most popular category is New American Restaurant with total 11199 likes


# Take a look at most popular category

In [118]:
df = sfv[sfv['Venue Category'].str.contains('New American Restaurant')].sort_values('likes', ascending = False)
df.shape

(28, 9)

In [119]:
map_sfv = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, n in zip(df['Venue Latitude'], df['Venue Longitude'], df['Venue']):
    label = '{}'.format(n)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_sfv)  
    
map_sfv

# Cluster all restaurant by area

Looking for place to open new venue, we do not want to find an empty area only. But rather prefer popular "food place" i.e. area wich have concentration of sought-for venues and good attendance.


In [120]:
# cluster all restaurants to define "food place" areas

# drop unnecessary
sfvclu = sfv.drop(columns=['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude', 'Venue', 'Venue id', 'Venue Category', 'likes'], axis=1)
# sfvclu.head()

# normalize
X = sfvclu.values[:,:]
X = np.nan_to_num(X)
cluster_dataset = StandardScaler().fit_transform(X)
cluster_dataset

# get clusters
num_clusters = 10

k_means = KMeans(init="k-means++", n_clusters=num_clusters, n_init=12)
k_means.fit(cluster_dataset)
sfv['labels'] = k_means.labels_
sfv.head()


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue id,Venue Latitude,Venue Longitude,Venue Category,likes,labels
0,Nob Hill,37.79336,-122.41787,Acquerello,3fd66200f964a52013ed1ee3,37.791669,-122.421407,Italian Restaurant,82,4
1,Inner Sunset,37.76211,-122.46886,Ebisu,3fd66200f964a52017ed1ee3,37.764354,-122.466358,Sushi Restaurant,264,9
2,Jackson Square,37.79708,-122.39841,Slanted Door,3fd66200f964a52018ed1ee3,37.796222,-122.393895,Vietnamese Restaurant,1200,0
3,South Park,37.78145,-122.39386,Fringale,3fd66200f964a5201bed1ee3,37.778731,-122.397105,French Restaurant,60,7
4,Russian Hill,37.80203,-122.41963,Gary Danko,3fd66200f964a52025ee1ee3,37.805751,-122.420488,New American Restaurant,293,5


In [121]:
# define clusters by attendance

cl = sfv[['likes', 'labels']].groupby('labels', as_index = False).mean()

catname = []
for i in cl['likes']:
  a1 = cl.likes.max() - ((cl.likes.max()-cl.likes.min())/3)
  a2 = cl.likes.min() + ((cl.likes.max()-cl.likes.min())/3)
  if i > a1:
    catname.append('Very popular')
  elif a1 > i > a2:
    catname.append('Somewhat popular')
  else:
    catname.append('Not really popular')
cl['catname'] = catname
cl

Unnamed: 0,labels,likes,catname
0,0,249.415929,Very popular
1,1,244.516854,Very popular
2,2,82.641791,Not really popular
3,3,38.039216,Not really popular
4,4,195.77907,Very popular
5,5,116.40678,Somewhat popular
6,6,17.46,Not really popular
7,7,172.784314,Very popular
8,8,31.157895,Not really popular
9,9,135.066667,Somewhat popular


In [122]:
# add cluster category to venues list
dict_l = dict(zip(cl.labels, cl.catname))
sfv['ccat'] = [dict_l[i] for i in sfv.labels]
sfv.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue id,Venue Latitude,Venue Longitude,Venue Category,likes,labels,ccat
0,Nob Hill,37.79336,-122.41787,Acquerello,3fd66200f964a52013ed1ee3,37.791669,-122.421407,Italian Restaurant,82,4,Very popular
1,Inner Sunset,37.76211,-122.46886,Ebisu,3fd66200f964a52017ed1ee3,37.764354,-122.466358,Sushi Restaurant,264,9,Somewhat popular
2,Jackson Square,37.79708,-122.39841,Slanted Door,3fd66200f964a52018ed1ee3,37.796222,-122.393895,Vietnamese Restaurant,1200,0,Very popular
3,South Park,37.78145,-122.39386,Fringale,3fd66200f964a5201bed1ee3,37.778731,-122.397105,French Restaurant,60,7,Very popular
4,Russian Hill,37.80203,-122.41963,Gary Danko,3fd66200f964a52025ee1ee3,37.805751,-122.420488,New American Restaurant,293,5,Somewhat popular


In [123]:
# create map with all our food clusters
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(num_clusters)
ys = [i + x + (i*x)**2 for i in range(num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, ccat, cluster in zip(sfv['Venue Latitude'], sfv['Venue Longitude'], sfv['Venue'], sfv['ccat'], sfv['labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster) + ' ' + ccat, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [124]:
# define areas with does not have our sought-for category

l1 = set(sfv['labels'])
df = sfv[sfv['Venue Category'].str.contains('New American Restaurant')]
l2 = set(df['labels'])
area = l1-l2

print('Areas', area, 'does not have "New American Restaurant" category')

Areas {9, 3, 6} does not have "New American Restaurant" category


In [125]:
# map only clusters that fit our conditions

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(num_clusters)
ys = [i + x + (i*x)**2 for i in range(num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, ccat, cluster in zip(sfv['Venue Latitude'], sfv['Venue Longitude'], sfv['Venue'], sfv['ccat'], sfv['labels']):
    if cluster in area:
      label = folium.Popup(str(poi) + ' Cl ' + str(cluster) + ' ' + ccat, parse_html=True)
      folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
        
map_clusters

We can see 3 cluster which fits our needs and can choose a more suitable one taking into account area popularity.
Let's map neighborhoods that may become our final choise.

In [128]:
# map it
map_san_fran = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, n, cluster in zip(sfv['Neighborhood Latitude'], sfv['Neighborhood Longitude'], sfv['Neighborhood'], sfv['labels']):
    if cluster == 9:
      label = '{}'.format(n)
      label = folium.Popup(label, parse_html=True)
      folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_san_fran)  
    
map_san_fran