In [91]:
# The code was removed by Watson Studio for sharing.

# 1. Geographical Coordinates of the Neighborhoods of San Diego

Wikipedia lists all the neighborhoods of San Diego: https://en.wikipedia.org/wiki/List_of_communities_and_neighborhoods_of_San_Diego

Web scraping --> I will use the "Beautiful Soup" python library to get the data from the table.

## 1.1 Extract the list of neighborhoods of San Diego from a Wikipedia

In [1]:
# import the request library
import requests
from bs4 import BeautifulSoup

url   = 'https://en.wikipedia.org/wiki/List_of_communities_and_neighborhoods_of_San_Diego'
website_url = requests.get(url).text

# Prettify() function in BeautifulSoup will enable us to view how the tags are nested in the document
soup = BeautifulSoup(website_url,'lxml')

In [325]:
# extract the table
mytable  = soup.find('table')

# find all the rows that have an href to something
all_href = mytable.find_all('a',href=True)
# transform into an array of strings
all_text = np.array([str(aa) for aa in all_href])

# find the part that has the image (I need to remove this from the all_href)
image    = mytable.find_all('a',{'class':'image'},href=True)
img_text = [str(tt) for tt in image]

# find at which index the image text is in all_href
idx      = np.where(all_text == img_text)

# delete the image text from the all_text
all_text = np.delete(all_text,idx,0)
all_text

array([ '<a class="mw-redirect" href="/wiki/Balboa_Park,_San_Diego" title="Balboa Park, San Diego">Balboa Park</a>',
       '<a href="/wiki/Bankers_Hill,_San_Diego" title="Bankers Hill, San Diego">Bankers Hill</a>',
       '<a href="/wiki/Barrio_Logan,_San_Diego" title="Barrio Logan, San Diego">Barrio Logan</a>',
       '<a class="mw-redirect" href="/wiki/Bay_Ho,_San_Diego" title="Bay Ho, San Diego">Bay Ho</a>',
       '<a class="mw-redirect" href="/wiki/Bay_Park,_San_Diego" title="Bay Park, San Diego">Bay Park</a>',
       '<a href="/wiki/Birdland,_San_Diego" title="Birdland, San Diego">Birdland</a>',
       '<a href="/wiki/Black_Mountain_Ranch,_San_Diego" title="Black Mountain Ranch, San Diego">Black Mountain Ranch</a>',
       '<a class="mw-redirect" href="/wiki/Border,_San_Diego" title="Border, San Diego">Border</a>',
       '<a href="/wiki/Burlingame,_San_Diego" title="Burlingame, San Diego">Burlingame</a>',
       '<a href="/wiki/Carmel_Mountain_Ranch,_San_Diego" title="Carmel Mo

From the soup, I can get all the href lines, and select only those with "../zip-code/california/san_diego/" in it. From those, I can split each row, and get only the postal code.

In [326]:
neighs = []
for aa in all_text:
    if 'title' in aa:
        try:
            neighs.append(aa.split('title')[1].split('>')[1].split('<')[0])
        except:
            print("no suburb here")
        
"""
# post codes from this website:
url = 'https://www.bestplaces.net/find/zip.aspx?st=ca&city=0666000'
tmp = soup.find_all('a', href=True)
post_codes = []
for tt in tmp:
    if "../zip-code/california/san_diego/" in str(tt):
        post_codes.append(str(tt).split('<u>')[1].split()[0])"""

no suburb here
no suburb here
no suburb here
no suburb here
no suburb here


'\n# post codes from this website:\nurl = \'https://www.bestplaces.net/find/zip.aspx?st=ca&city=0666000\'\ntmp = soup.find_all(\'a\', href=True)\npost_codes = []\nfor tt in tmp:\n    if "../zip-code/california/san_diego/" in str(tt):\n        post_codes.append(str(tt).split(\'<u>\')[1].split()[0])'

In [327]:
print("There are %i neighborhoods in San Diego" %len(neighs))

There are 122 neighborhoods in San Diego


Now I can use the list that I just found to create a dataframe.

In [341]:
# create a dataframe with PostalCode, Borough, and Neighborhood using the lists found above
import pandas as pd
import numpy as np

df = pd.DataFrame()
df['Neighborhood'] = neighs
df["Latitude"]     = np.nan
df["Longitude"]    = np.nan
df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Balboa Park,,
1,Bankers Hill,,
2,Barrio Logan,,
3,Bay Ho,,
4,Bay Park,,


I need to clean the table, because there are rows with missing neighborhoods and the first row has a neighborhood called "neighborhoods"... very unlikely...

In [342]:
# drop empty Neighborhoods rows and the row that contains "Neighborhood"
to_drop = ['San Diego', 'neighborhoods','Neighborhood','']
for tt in to_drop:
    df = df[df['Neighborhood']!=tt]
df.reset_index(drop=True,inplace=True)
df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Balboa Park,,
1,Bankers Hill,,
2,Barrio Logan,,
3,Bay Ho,,
4,Bay Park,,


So, now the dataframe has the following shape:

In [343]:
df.shape

(121, 3)

## 1.2 Get the coordinates of the neighborhoods using the libaray _geopy.geocoders.Nominatim_

In [82]:
import json # library to handle JSON files
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  55.58 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  35.24 MB/s
vincent-0.4.4- 100% |################################| Time: 0:00:00  40.19 MB/s
folium-0.5.0-p 100% |################################| Time: 0:00:00  44.49 MB/s
Libraries imported.


In [344]:
geolocator  = Nominatim(user_agent="ny_explorer")
for neigh in df['Neighborhood']:
    try:
        address   = '%s, San Diego, California ' %(neigh)
        location  = geolocator.geocode(address)
        latitude  = location.latitude
        longitude = location.longitude
        #print('The geographical coordinates of {}, San Diego California are {}, {}.'.format(neigh,latitude, longitude))
        df.loc[df['Neighborhood']==neigh, 'Latitude']  = latitude
        df.loc[df['Neighborhood']==neigh, 'Longitude'] = longitude
    except:
        print('%s is not a neighborhood in the library... Hence, I cannot get the coordinates, and I will drop it from the dataframe' %neigh)
        df = df[df['Neighborhood']!=neigh]

df.reset_index(drop=True,inplace=True)        

College Area is not a neighborhood in the library... Hence, I cannot get the coordinates, and I will drop it from the dataframe
Downtown San Diego (Centre City) is not a neighborhood in the library... Hence, I cannot get the coordinates, and I will drop it from the dataframe
East Elliott is not a neighborhood in the library... Hence, I cannot get the coordinates, and I will drop it from the dataframe
Egger Highlands is not a neighborhood in the library... Hence, I cannot get the coordinates, and I will drop it from the dataframe
Marston Hills is not a neighborhood in the library... Hence, I cannot get the coordinates, and I will drop it from the dataframe
Ocean Crest is not a neighborhood in the library... Hence, I cannot get the coordinates, and I will drop it from the dataframe
Ocean View Hills is not a neighborhood in the library... Hence, I cannot get the coordinates, and I will drop it from the dataframe
Point Loma Heights is not a neighborhood in the library... Hence, I cannot ge

There was still some rubbish, and getting the coordinates allowed to further clean the dataset

In [345]:
df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Balboa Park,32.731357,-117.146527
1,Bankers Hill,32.728293,-117.162105
2,Barrio Logan,32.693886,-117.138007
3,Bay Ho,32.879353,-117.2311
4,Bay Park,32.784638,-117.202605


Now the size of the dataframe is:

In [346]:
df.shape

(111, 3)

## 2. Create a map of the Neighborhoods of San Diego

In [419]:
# create map of San Diego:
address     = 'San Diego, California'
location    = geolocator.geocode(address)
latitudeSD  = location.latitude
longitudeSD = location.longitude
map_SD      = folium.Map(location=[latitudeSD, longitudeSD], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(df['Latitude'],df['Longitude'],df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_SD)  
    
map_SD

## !!! THE MAP DOES NOT SHOW ON GITHUB.. To visualize the whole notebook with the folium map, Please, copy the link of this notebook and paste it on https://nbviewer.jupyter.org/ !!! Thank you!!

## 3. Explore the neighborhoods using the _Foursquare API_

In [350]:
# The code was removed by Watson Studio for sharing.

### 3.1 Extract 100 venues that are in each neighborhood, within a radius of 500 m 

In [357]:
# create a function that will take the name of the neighborhood and its lat and lon, 
# and will return the venues in a radius of 500 m
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):            
        print(name)
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood',
                             'Neighborhood Latitude', 
                             'Neighborhood Longitude', 
                             'Venue', 
                             'Venue Latitude', 
                             'Venue Longitude', 
                             'Venue Category']
    return(nearby_venues)

In [364]:
# run the function to get 100 venues in the previously selected Boroughs
LIMIT  = 100
neighs_venues = getNearbyVenues(names      = df['Neighborhood'],
                                 latitudes  = df['Latitude'],
                                 longitudes = df['Longitude'],
                                 radius     = 500
                                )


Balboa Park
Bankers Hill
Barrio Logan
Bay Ho
Bay Park
Birdland
Black Mountain Ranch
Border
Burlingame
Carmel Mountain Ranch
Carmel Valley
City Heights
Clairemont
Del Mar Heights
Del Mar Mesa
Columbia
Core
Cortez Hill
East Village
Gaslamp Quarter
Little Italy
Marina
El Cerrito
Gateway
Golden Hill
Grant Hill
Harbor Island
Harborview
Hillcrest
Kearny Mesa
Kensington
La Jolla
La Jolla Village
Torrey Pines
Village of La Jolla
Linda Vista
Logan Heights
Memorial
Midtown
Mira Mesa
Miramar
Mission Beach
Mission Hills
Mission Valley
Civita
Morena
Navajo
Allied Gardens
Del Cerro
Grantville
San Carlos
Nestor
Normal Heights
North City
North Park
North Clairemont
Oak Park
Ocean Beach
Old Town
Otay Mesa
Otay Mesa West
Pacific Beach
Pacific Highlands Ranch
Palm City
Point Loma
La Playa
Loma Portal
Midway
Roseville-Fleetridge
Sunset Cliffs
Wooded Area
Rancho Bernardo
Rancho Encantada
Rancho Peñasquitos
Rolando
Rolando Park
Sabre Springs
San Pasqual Valley
San Ysidro
Scripps Ranch
Serra Mesa
Shelter Isl

In [365]:
neighs_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Balboa Park,32.731357,-117.146527,San Diego Natural History Museum,32.732239,-117.147395,History Museum
1,Balboa Park,32.731357,-117.146527,San Diego Model Railroad Museum,32.731132,-117.148365,Museum
2,Balboa Park,32.731357,-117.146527,Balboa Park Fountain,32.731453,-117.146809,Fountain
3,Balboa Park,32.731357,-117.146527,San Diego History Center,32.731205,-117.148279,History Museum
4,Balboa Park,32.731357,-117.146527,Botanical Building & Lily Pond,32.732237,-117.149288,Botanical Garden


In [366]:
# size of the venues
print(neighs_venues.shape)

(2693, 7)


### 3.2 Extract the popular neighborhoods

In [399]:
# count by neighborhood:
#borough_venues['Neighborhood'] = borough_venues['Neighborhood'].apply(tuple)
df2 = neighs_venues.groupby('Neighborhood')['Neighborhood','Venue'].count()
df2.rename(columns={'Neighborhood': 'count'},inplace=True)
df2.reset_index(level=0, inplace=True)

# let's use only those neighborhoods that have at least 20 venues (i.e. likely there are more people visiting them)
popular_df = neighs_venues[neighs_venues['Neighborhood'].isin(df2.loc[df2['count']>=20,'Neighborhood'])]
popular_df.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Balboa Park,32.731357,-117.146527,San Diego Natural History Museum,32.732239,-117.147395,History Museum
1,Balboa Park,32.731357,-117.146527,San Diego Model Railroad Museum,32.731132,-117.148365,Museum
2,Balboa Park,32.731357,-117.146527,Balboa Park Fountain,32.731453,-117.146809,Fountain
3,Balboa Park,32.731357,-117.146527,San Diego History Center,32.731205,-117.148279,History Museum
4,Balboa Park,32.731357,-117.146527,Botanical Building & Lily Pond,32.732237,-117.149288,Botanical Garden


In [403]:
print('There are a total of {} neighborhoods, with {} uniques categories.'.format(len(popular_df['Neighborhood'].unique()),len(popular_df['Venue Category'].unique())))

There are a total of 43 neighborhoods, with 254 uniques categories.


### 3.3 Analyze each popular neighborhood

In [408]:
# one hot encoding (since I want to use the k-mean clustering algorithm, I have to transform my categorical data into numerical)
neigh_onehot   = pd.get_dummies(popular_df[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
neigh_onehot['Neighborhood'] = popular_df['Neighborhood'] 
# find where which column is 'Neighborhood'
locNeigh       = neigh_onehot.columns.get_loc('Neighborhood')
# move neighborhood column to the first column
fixed_columns  = [neigh_onehot.columns[locNeigh]] + list(neigh_onehot.columns[:locNeigh]) + list(neigh_onehot.columns[locNeigh+1:])
neigh_onehot   = neigh_onehot[fixed_columns]

In [409]:
# size of the new dataframe
neigh_onehot.shape

(2182, 254)

In [410]:
neigh_onehot.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,American Restaurant,Amphitheater,Antique Shop,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,...,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,Balboa Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Balboa Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Balboa Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Balboa Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Balboa Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [411]:
# since Neighborhood is a ndarray, it needs to be firs converted into a tuple
SD_grouped = neigh_onehot.groupby('Neighborhood').mean().reset_index()
SD_grouped.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,American Restaurant,Amphitheater,Antique Shop,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,...,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo Exhibit
0,Allied Gardens,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Balboa Park,0.0,0.0,0.020408,0.020408,0.0,0.0,0.0,0.0,0.081633,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.102041
2,Bankers Hill,0.0,0.0,0.074074,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bay Ho,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Border,0.017544,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.035088,0.0,0.0


In [412]:
SD_grouped.shape

(43, 254)

Let's print each neighborhood along with the top 10 most common venues

In [413]:
num_top_venues = 10

for hood in SD_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = SD_grouped[SD_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Allied Gardens----
                venue  freq
0  Mexican Restaurant  0.10
1      Discount Store  0.05
2         Pizza Place  0.05
3        Optical Shop  0.05
4              Office  0.05
5        Burger Joint  0.05
6         Gas Station  0.05
7                Park  0.05
8         Flower Shop  0.05
9       Grocery Store  0.05


----Balboa Park----
                          venue  freq
0                   Zoo Exhibit  0.10
1                        Garden  0.10
2                    Art Museum  0.08
3                       Theater  0.06
4  Theme Park Ride / Attraction  0.04
5                History Museum  0.04
6         Performing Arts Venue  0.04
7                        Museum  0.04
8                       Exhibit  0.04
9                          Park  0.04


----Bankers Hill----
                 venue  freq
0  American Restaurant  0.07
1                  Spa  0.04
2                 Park  0.04
3       Cosmetics Shop  0.04
4       Breakfast Spot  0.04
5                Motel  0.04
6  

Save what we just found in a dataframe

In [414]:
# first, sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [423]:
# then, create the dataframe
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = SD_grouped['Neighborhood']

for ind in np.arange(SD_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(SD_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allied Gardens,Mexican Restaurant,Donut Shop,Shipping Store,Massage Studio,Optical Shop,Office,Grocery Store,Liquor Store,Park,Convenience Store
1,Balboa Park,Zoo Exhibit,Garden,Art Museum,Theater,Performing Arts Venue,Park,Exhibit,Theme Park Ride / Attraction,History Museum,Museum
2,Bankers Hill,American Restaurant,Lounge,Bank,Sushi Restaurant,Taco Place,Breakfast Spot,Coffee Shop,Gastropub,Gay Bar,Sports Club
3,Bay Ho,Indian Restaurant,Baseball Stadium,Greek Restaurant,Frozen Yogurt Shop,Food Truck,Music Venue,New American Restaurant,Snack Place,Fast Food Restaurant,Farmers Market
4,Border,Clothing Store,Shoe Store,Accessories Store,Outlet Store,Men's Store,Women's Store,ATM,Sandwich Place,Food Court,Snack Place


## 4. Cluster the neighborhoods in _5_ groups, using the k-Means algorithm

### 4.1 Find the clusters

In [424]:
# set number of clusters
kclusters = 5

# drop the Neighborhood column, since we will just need numbers for the model
SD_grouped_clustering = SD_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(SD_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 0, 0, 0, 2, 1, 4, 4, 4, 0], dtype=int32)

### 4.2 Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [425]:
SD_merged  = df[df['Neighborhood'].isin(df2.loc[df2['count']>=20,'Neighborhood'])]

# merge toronto_grouped with borough to add latitude/longitude for each neighborhood
SD_merged  = SD_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

SD_merged.head() 

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Balboa Park,32.731357,-117.146527,0,Zoo Exhibit,Garden,Art Museum,Theater,Performing Arts Venue,Park,Exhibit,Theme Park Ride / Attraction,History Museum,Museum
1,Bankers Hill,32.728293,-117.162105,0,American Restaurant,Lounge,Bank,Sushi Restaurant,Taco Place,Breakfast Spot,Coffee Shop,Gastropub,Gay Bar,Sports Club
3,Bay Ho,32.879353,-117.2311,0,Indian Restaurant,Baseball Stadium,Greek Restaurant,Frozen Yogurt Shop,Food Truck,Music Venue,New American Restaurant,Snack Place,Fast Food Restaurant,Farmers Market
7,Border,32.543817,-117.046425,2,Clothing Store,Shoe Store,Accessories Store,Outlet Store,Men's Store,Women's Store,ATM,Sandwich Place,Food Court,Snack Place
8,Burlingame,32.73166,-117.129321,1,Bookstore,Bar,Café,Boutique,Mexican Restaurant,Kids Store,Nail Salon,Convenience Store,Pizza Place,Food Truck


## 5. Visualize the results using _Folium_ to create a map

In [427]:
# create map
map_clusters = folium.Map(location=[latitudeSD, longitudeSD], zoom_start=11)

# set color scheme for the clusters
x  = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(SD_merged['Latitude'], SD_merged['Longitude'], SD_merged['Neighborhood'], SD_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7,
        parse_html=False).add_to(map_clusters)
       
map_clusters

## !!! THE MAP DOES NOT SHOW ON GITHUB.. To visualize the whole notebook with the folium map, Please, copy the link of this notebook and paste it on https://nbviewer.jupyter.org/ !!! Thank you!!

### Look at the first clusters

#### Cluster 0

In [439]:
# selec the results for the cluster 1
SD_merged.loc[SD_merged['Cluster Labels'] == 0, SD_merged.columns[[0] + list(range(4, SD_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Balboa Park,Zoo Exhibit,Garden,Art Museum,Theater,Performing Arts Venue,Park,Exhibit,Theme Park Ride / Attraction,History Museum,Museum
1,Bankers Hill,American Restaurant,Lounge,Bank,Sushi Restaurant,Taco Place,Breakfast Spot,Coffee Shop,Gastropub,Gay Bar,Sports Club
3,Bay Ho,Indian Restaurant,Baseball Stadium,Greek Restaurant,Frozen Yogurt Shop,Food Truck,Music Venue,New American Restaurant,Snack Place,Fast Food Restaurant,Farmers Market
15,Columbia,Hotel,Coffee Shop,Italian Restaurant,New American Restaurant,Café,Gym,Sandwich Place,Sushi Restaurant,Mexican Restaurant,French Restaurant
16,Core,Hotel,Italian Restaurant,Café,Mexican Restaurant,Bar,Coffee Shop,Sushi Restaurant,American Restaurant,Lingerie Store,Seafood Restaurant
17,Cortez Hill,Coffee Shop,Hotel,Café,Mexican Restaurant,Park,Taco Place,Pizza Place,Deli / Bodega,South American Restaurant,Concert Hall
18,East Village,Mexican Restaurant,Gastropub,Sandwich Place,Bagel Shop,Bar,Grocery Store,Breakfast Spot,Brewery,Fast Food Restaurant,Deli / Bodega
19,Gaslamp Quarter,Bar,Hotel,Mexican Restaurant,Italian Restaurant,Café,Steakhouse,American Restaurant,Lounge,Breakfast Spot,Gastropub
20,Little Italy,Italian Restaurant,Hotel,American Restaurant,Coffee Shop,Wine Bar,Pizza Place,Brewery,ATM,Convenience Store,Café
22,El Cerrito,Hotel,Coffee Shop,Sushi Restaurant,Mexican Restaurant,New American Restaurant,Lingerie Store,American Restaurant,Seafood Restaurant,Italian Restaurant,Café


_It looks like Cluster 0 is popuar for restaurants, hotels, breweries.. lots of activities_

#### Cluster 1

In [438]:
SD_merged.loc[SD_merged['Cluster Labels'] == 1, SD_merged.columns[[0] + list(range(4, SD_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Burlingame,Bookstore,Bar,Café,Boutique,Mexican Restaurant,Kids Store,Nail Salon,Convenience Store,Pizza Place,Food Truck
45,Morena,Sandwich Place,Mexican Restaurant,Café,Convenience Store,Gym,Brewery,Donut Shop,Outlet Store,Deli / Bodega,Massage Studio
47,Allied Gardens,Mexican Restaurant,Donut Shop,Shipping Store,Massage Studio,Optical Shop,Office,Grocery Store,Liquor Store,Park,Convenience Store
49,Grantville,Sandwich Place,Mexican Restaurant,Fast Food Restaurant,Coffee Shop,Brewery,Pet Store,Convenience Store,Salad Place,Pharmacy,Greek Restaurant
52,Normal Heights,Liquor Store,Convenience Store,Taco Place,ATM,Pizza Place,Ice Cream Shop,Park,Sandwich Place,Café,Food Truck
58,Old Town,Mexican Restaurant,History Museum,Gift Shop,Hobby Shop,Hotel,Arts & Crafts Store,Café,Rental Car Location,Gourmet Shop,Bar
78,San Ysidro,Mexican Restaurant,Motel,Insurance Office,Financial or Legal Service,Convenience Store,Food,Motorsports Shop,Gas Station,Market,Liquor Store
80,Serra Mesa,Smoke Shop,ATM,Bakery,Pharmacy,Convenience Store,Park,Donut Shop,Salon / Barbershop,Sandwich Place,Sports Bar


#### Cluster 2

In [437]:
SD_merged.loc[SD_merged['Cluster Labels'] == 2, SD_merged.columns[[0] + list(range(4, SD_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Border,Clothing Store,Shoe Store,Accessories Store,Outlet Store,Men's Store,Women's Store,ATM,Sandwich Place,Food Court,Snack Place


_This cluster has a little bit of everything, from restaurant and cafes, to stores, gyms and movie theaters._

#### Cluster 3

In [436]:
SD_merged.loc[SD_merged['Cluster Labels'] == 3, SD_merged.columns[[0] + list(range(4, SD_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,Memorial,Boat or Ferry,Seafood Restaurant,Pier,Harbor / Marina,Pizza Place,Tour Provider,Museum,Fish Market,Café,Cheese Shop


#### Cluster 4

In [435]:
SD_merged.loc[SD_merged['Cluster Labels'] == 4, SD_merged.columns[[0] + list(range(4, SD_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Carmel Mountain Ranch,Fast Food Restaurant,Mexican Restaurant,Coffee Shop,Chinese Restaurant,Greek Restaurant,Grocery Store,Burger Joint,Mobile Phone Shop,Juice Bar,Cosmetics Shop
11,City Heights,Mexican Restaurant,Vietnamese Restaurant,Grocery Store,Pizza Place,Pharmacy,Sandwich Place,Chinese Restaurant,Farmers Market,Taco Place,Snack Place
12,Clairemont,Mexican Restaurant,Bakery,Grocery Store,Burger Joint,Supplement Shop,Mediterranean Restaurant,Pizza Place,Coffee Shop,Big Box Store,Martial Arts Dojo
23,Gateway,Coffee Shop,Cosmetics Shop,Gym,Clothing Store,Sandwich Place,New American Restaurant,Food Truck,Furniture / Home Store,Skating Rink,Restaurant
24,Golden Hill,Coffee Shop,Mexican Restaurant,Pizza Place,Taco Place,Mediterranean Restaurant,Sandwich Place,Market,Thai Restaurant,Grocery Store,Liquor Store
28,Hillcrest,Mexican Restaurant,Restaurant,Coffee Shop,Pizza Place,Italian Restaurant,Sushi Restaurant,Chinese Restaurant,Pharmacy,Taco Place,Thai Restaurant
30,Kensington,Pizza Place,Bank,Spa,Burger Joint,Café,French Restaurant,Shipping Store,Mexican Restaurant,Sculpture Garden,Organic Grocery
38,Midtown,Mexican Restaurant,Sushi Restaurant,Coffee Shop,Pizza Place,Chinese Restaurant,Restaurant,Gay Bar,Greek Restaurant,Bakery,Italian Restaurant
39,Mira Mesa,Seafood Restaurant,Dessert Shop,Grocery Store,Fast Food Restaurant,Smoothie Shop,Tex-Mex Restaurant,Mexican Restaurant,Breakfast Spot,Bubble Tea Shop,Burger Joint
41,Mission Beach,Breakfast Spot,Beach,Falafel Restaurant,Board Shop,Greek Restaurant,Chinese Restaurant,Gym / Fitness Center,Surf Spot,Park,Restaurant
