In [None]:
#!conda install -c anaconda xlrd --yes

import requests # library to handle requests
from bs4 import BeautifulSoup as bs
import re

import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis

import json # library to handle JSON files

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!pip install geocoder
import geocoder

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

!pip install folium
import folium

print('Libraries imported.')

In [2]:
# Use the Notebook to build the code to scrape the following Wikipedia page, https://en.wikipedia.org

# Assign the link of the website to variable named url
url = requests.get('https://en.wikipedia.org/wiki/List_of_California_locations_by_income').text

# Load url, turn into soup and get the tables
soup = bs(url,'lxml')
tables = soup.find_all('table',{'class':'wikitable sortable'})

#Extract the first table
table1 = tables[0].find_all("tr")

#Extract the columns
data   = [[td.findAll(text=True)[0].strip() for td in tr.find_all("td")]
              for tr in table1]
data    = [row for row in data if len(row) == 6]

In [3]:
#Convert the data into Pandas DataFrame
df = pd.DataFrame(data, columns = ['County', 'Population', 'Population Density', 'Per Capita Income', 'Median Household Income', 'Median Family Income']) 
df

Unnamed: 0,County,Population,Population Density,Per Capita Income,Median Household Income,Median Family Income
0,Alameda,1559308,2109.8,"$36,439","$73,775","$90,822"
1,Alpine,1202,1.6,"$24,375","$61,343","$71,932"
2,Amador,37159,62.5,"$27,373","$52,964","$68,765"
3,Butte,221578,135.4,"$24,430","$43,165","$56,934"
4,Calaveras,44921,44.0,"$29,296","$54,936","$67,100"
5,Colusa,21424,18.6,"$22,211","$50,503","$56,472"
6,Contra Costa,1081232,1496.0,"$38,770","$79,799","$95,087"
7,Del Norte,28066,27.9,"$19,424","$39,302","$52,452"
8,El Dorado,181465,106.3,"$35,128","$68,507","$84,690"
9,Fresno,948844,159.2,"$20,231","$45,201","$50,046"


In [4]:
# remove unused column
df = df.drop(['Population Density', 'Per Capita Income', 'Median Household Income', 'Median Family Income'], axis=1)

# remove ',' from the value
df['Population'] = df['Population'].str.replace(r',', '')

# convert Population column to 'int' dtype 
df['Population'] = np.asarray(df['Population'], dtype='int')

df

Unnamed: 0,County,Population
0,Alameda,1559308
1,Alpine,1202
2,Amador,37159
3,Butte,221578
4,Calaveras,44921
5,Colusa,21424
6,Contra Costa,1081232
7,Del Norte,28066
8,El Dorado,181465
9,Fresno,948844


In [5]:
# get coordinate of California
address = 'California, US'

geolocator2 = Nominatim(user_agent="CA_explorer")
location2 = geolocator2.geocode(address)
latitude2 = location2.latitude
longitude2 = location2.longitude
print('The geograpical coordinate of California are {}, {}.'.format(latitude2, longitude2))

The geograpical coordinate of California are 36.7014631, -118.7559974.


In [6]:
#download California's geojson
!wget --quiet https://gist.githubusercontent.com/threestory/ed0f322d7bb2e3be8ded/raw/2aa8df2f15817985c60b67d726e6d13197e8ffb6/cb_2014_us_county_5m.json -O california.json
world_geo = r'california.json' # geojson file
print('GeoJSON file downloaded!')

GeoJSON file downloaded!


In [7]:
cal_choropleth = folium.Map(location = [latitude2, longitude2], zoom_start=6)

# create a numpy array of length 6 and has linear spacing from the minium population to the maximum population
threshold_scale = np.linspace(df['Population'].min(),
                              df['Population'].max(),
                              8, dtype=int)
threshold_scale = threshold_scale.tolist() # change the numpy array to a list
threshold_scale[-1] = threshold_scale[-1] + 1 # make sure that the last value of the list is greater than the maximum immigration

# generate choropleth map using the population of each County in California
cal_choropleth.choropleth(
    geo_data=world_geo,
    data=df,
    columns=['County','Population'],
    key_on='feature.properties.NAME',
    threshold_scale=threshold_scale,
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Population of Each Counties in California',
    reset=True
)
cal_choropleth



From the map we can conclude that Los Angeles is the county with highest population in California.

I will choose Los Angeles to further investigate the number of venues and find out which is the best place to open Restaurant.

Get the second table in wikipedia page and extract cities for Los Angeles

In [8]:
#Extract the second table
table2 = tables[1].find_all("tr")

#Extract the columns
data_la   = [[td.findAll(text=True)[0].strip() for td in tr.find_all("td")]
              for tr in table2]
data_la    = [row for row in data_la if len(row) == 7]
    
df_la = pd.DataFrame(data_la, columns = ['City', 'County', 'Population', 'Population Density', 'Per Capita Income', 'Median Household Income', 'Median Family Income']) 

# remove unused column
df_la = df_la.drop(['Population', 'Population Density', 'Per Capita Income', 'Median Household Income', 'Median Family Income'], axis=1)

df_la.loc[df_la['County'] == 'Los Angeles']

Unnamed: 0,City,County
2,Acton,Los Angeles
5,Agoura Hills,Los Angeles
6,Agua Dulce,Los Angeles
15,Alhambra,Los Angeles
22,Alondra Park,Los Angeles
27,Altadena,Los Angeles
47,Arcadia,Los Angeles
54,Artesia,Los Angeles
66,Avalon,Los Angeles
70,Avocado Heights,Los Angeles


Get coordinates for each cities of Los Angeles

In [9]:
#https://drive.google.com/uc?authuser=0&id=1xj-hMq0h_wcnyUZIoJzeGOxnDFDNLJiG&export=download
url='https://docs.google.com/spreadsheets/d/1hOuAeqMuu6IJwKrtTepyHdk0ljAouGr_BIWtCTHEvmM/export?format=csv&gid=2047060771'

df_la_coors = pd.read_csv(url)
df_new = pd.concat([df_la, df_la_coors], axis=1)
df_new = pd.merge(df_la, df_la_coors, on = 'City')

#remove duplicate row
df_new = df_new.drop_duplicates(subset='City', keep='first')

#rename City column to Neighborhood
df_new.rename(columns = {"City": "Neighborhood"}, inplace = True) 
df_new

Unnamed: 0,Neighborhood,County,Latitude,Longitude
0,Acton,Los Angeles,34.467170,-118.197180
1,Agoura Hills,Los Angeles,34.147980,-118.781242
2,Agua Dulce,Los Angeles,34.496320,-118.326200
3,Alhambra,Los Angeles,34.095287,-118.127014
4,Alondra Park,Los Angeles,33.887980,-118.330580
5,Altadena,Los Angeles,34.185680,-118.131508
6,Arcadia,Los Angeles,34.127361,-118.045868
7,Artesia,Los Angeles,33.861370,-118.082161
8,Avalon,Los Angeles,33.342800,-118.327900
9,Avocado Heights,Los Angeles,34.036300,-117.991050


In [10]:
# get coordinate of California
address = 'Los Angeles, California, US'

geolocator_la = Nominatim(user_agent="CA_explorer")
location_la = geolocator_la.geocode(address)
latitude_la = location_la.latitude
longitude_la = location_la.longitude
print('The geograpical coordinate of Los Angeles are {}, {}.'.format(latitude_la, longitude_la))

The geograpical coordinate of Los Angeles are 34.0536909, -118.2427666.


Use Foursquare to get nearby venues under food category

In [11]:
#Foursquare API credential
# @hidden_cell
CLIENT_ID = '0NGYIP4TSICNSCBWACDBYBGRKFB5PT10QJTUHW1OAHCIMCTO' # your Foursquare ID
CLIENT_SECRET = 'BEJEOWDI1A4UXBK23ESQSTLYODA5KJQA0GHT1B302XUDI5XW' # your Foursquare Secret
SECTION = 'food'
VERSION = '20190610'
LIMIT = 500

get the restaurant list from Foursquare API and clean the json and structure it into a pandas dataframe.

In [12]:
def getNearbyRestaurant(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&section={}&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            SECTION,
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [13]:
la_venues = getNearbyRestaurant(names=df_new['Neighborhood'],
                                latitudes=df_new['Latitude'],
                                longitudes=df_new['Longitude']
                                )

Acton
Agoura Hills
Agua Dulce
Alhambra
Alondra Park
Altadena
Arcadia
Artesia
Avalon
Avocado Heights
Azusa
Baldwin Park
Bell
Bellflower
Bell Gardens
Beverly Hills
Bradbury
Burbank
Calabasas
Carson
Castaic
Cerritos
Charter Oak
Citrus
Claremont
Commerce
Compton
Covina
Cudahy
Culver City
Del Aire
Desert View Highlands
Diamond Bar
Downey
Duarte
East Los Angeles
East Pasadena
East Rancho Dominguez
East San Gabriel
East Whittier
Elizabeth Lake
El Monte
El Segundo
Florence-Graham
Gardena
Glendale
Glendora
Green Valley
Hacienda Heights
Hasley Canyon
Hawaiian Gardens
Hawthorne
Hermosa Beach
Hidden Hills
Huntington Park
Industry
Inglewood
Irwindale
La Cañada Flintridge
La Crescenta-Montrose
Ladera Heights
La Habra Heights
Lake Hughes
Lake Los Angeles
Lakewood
La Mirada
Lancaster
La Puente
La Verne
Lawndale
Lennox
Leona Valley
Littlerock
Lomita
Long Beach
Los Angeles
Lynwood
Malibu
Manhattan Beach
Marina del Rey
Mayflower Village
Maywood
Monrovia
Montebello
Monterey Park
North El Monte
Norwalk
Pal

In [14]:
la_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Acton,34.467170,-118.197180,High Mesa,34.468089,-118.195704,Food
1,Acton,34.467170,-118.197180,Wences Bistro,34.468023,-118.196026,Café
2,Agoura Hills,34.147980,-118.781242,Pizza Nosh,34.148311,-118.782181,Pizza Place
3,Agoura Hills,34.147980,-118.781242,Twisted Oak Tavern,34.145308,-118.778679,Gastropub
4,Agoura Hills,34.147980,-118.781242,Grissini Ristorante,34.145815,-118.778534,Italian Restaurant
5,Agoura Hills,34.147980,-118.781242,Niroj Kurdish Cuisine,34.148437,-118.782240,Mediterranean Restaurant
6,Agoura Hills,34.147980,-118.781242,Matana Sushi And Grill,34.148395,-118.782365,Sushi Restaurant
7,Agoura Hills,34.147980,-118.781242,Naked Ramen,34.148031,-118.782556,Ramen Restaurant
8,Agoura Hills,34.147980,-118.781242,Cafe 14,34.148196,-118.782743,Café
9,Agoura Hills,34.147980,-118.781242,Cafe Bizou,34.148410,-118.782587,Café


In [15]:
print('{} restaurants were returned by Foursquare.'.format(la_venues.shape[0]))

1896 restaurants were returned by Foursquare.


In [16]:
print('There are {} uniques categories.'.format(len(la_venues['Venue Category'].unique())))

There are 87 uniques categories.


In [17]:
#sort the venue category in descending order
category = la_venues.groupby('Venue Category').count()
category.sort_values('Neighborhood', ascending  = False)

Unnamed: 0_level_0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Mexican Restaurant,192,192,192,192,192,192
Pizza Place,126,126,126,126,126,126
Chinese Restaurant,116,116,116,116,116,116
American Restaurant,96,96,96,96,96,96
Sandwich Place,87,87,87,87,87,87
Fast Food Restaurant,86,86,86,86,86,86
Bakery,71,71,71,71,71,71
Burger Joint,68,68,68,68,68,68
Asian Restaurant,58,58,58,58,58,58
Italian Restaurant,55,55,55,55,55,55


#### Analyze Each Neighborhood

In [18]:
# one hot encoding
la_onehot = pd.get_dummies(la_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
la_onehot['Neighborhood'] = la_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [la_onehot.columns[-1]] + list(la_onehot.columns[:-1])
la_onehot = la_onehot[fixed_columns]

la_onehot.head()

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bistro,...,Szechuan Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Tex-Mex Restaurant,Thai Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
0,Acton,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Acton,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Agoura Hills,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Agoura Hills,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Agoura Hills,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [19]:
la_grouped = la_onehot.groupby('Neighborhood').mean().reset_index()
la_grouped

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bistro,...,Szechuan Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Tex-Mex Restaurant,Thai Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
0,Acton,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
1,Agoura Hills,0.0,0.111111,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
2,Agua Dulce,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.200000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
3,Alhambra,0.0,0.018519,0.000000,0.037037,0.0,0.018519,0.000000,0.037037,0.0,...,0.000000,0.000000,0.000000,0.018519,0.000000,0.018519,0.0,0.000000,0.055556,0.018519
4,Alondra Park,0.0,0.000000,0.000000,0.083333,0.0,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
5,Altadena,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.083333,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
6,Arcadia,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
7,Artesia,0.0,0.000000,0.000000,0.028169,0.0,0.000000,0.000000,0.056338,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.014085,0.0,0.014085,0.056338,0.000000
8,Avalon,0.0,0.147059,0.000000,0.000000,0.0,0.000000,0.000000,0.058824,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
9,Azusa,0.0,0.090909,0.000000,0.000000,0.0,0.045455,0.000000,0.000000,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000


print each neighborhood along with the top 5 most common venues

In [20]:
num_top_venues = 5

for hood in la_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = la_grouped[la_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Acton----
                venue  freq
0                Café   0.5
1                Food   0.5
2  African Restaurant   0.0
3        Noodle House   0.0
4    Ramen Restaurant   0.0


----Agoura Hills----
                      venue  freq
0                      Café  0.22
1          Sushi Restaurant  0.11
2                 Gastropub  0.11
3               Pizza Place  0.11
4  Mediterranean Restaurant  0.11


----Agua Dulce----
                venue  freq
0          Restaurant   0.2
1         Pizza Place   0.2
2              Bakery   0.2
3                Café   0.2
4  Mexican Restaurant   0.2


----Alhambra----
                   venue  freq
0     Chinese Restaurant  0.07
1       Sushi Restaurant  0.07
2     Seafood Restaurant  0.06
3  Vietnamese Restaurant  0.06
4                  Diner  0.06


----Alondra Park----
                      venue  freq
0      Fast Food Restaurant  0.17
1            Breakfast Spot  0.17
2            Sandwich Place  0.08
3  Mediterranean Restaurant  0.08
4   

Write a function to sort the venues in descending order

In [21]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Create the new dataframe and display the top 10 venues for each neighborhood

In [22]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Restaurant'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Restaurant'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = la_grouped['Neighborhood']

for ind in np.arange(la_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(la_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
0,Acton,Café,Food,Wings Joint,Eastern European Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place
1,Agoura Hills,Café,Ramen Restaurant,Sushi Restaurant,Italian Restaurant,Mediterranean Restaurant,Pizza Place,Gastropub,American Restaurant,Bagel Shop,Fast Food Restaurant
2,Agua Dulce,Restaurant,Pizza Place,Café,Bakery,Mexican Restaurant,Eastern European Restaurant,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop
3,Alhambra,Chinese Restaurant,Sushi Restaurant,Vietnamese Restaurant,Burger Joint,Diner,Seafood Restaurant,Korean Restaurant,Asian Restaurant,Bakery,Japanese Curry Restaurant
4,Alondra Park,Fast Food Restaurant,Breakfast Spot,Mediterranean Restaurant,Restaurant,Asian Restaurant,Sandwich Place,Donut Shop,Pizza Place,Burger Joint,Mexican Restaurant
5,Altadena,Pizza Place,Food,Bakery,Diner,Sandwich Place,Mexican Restaurant,Burger Joint,Breakfast Spot,Fast Food Restaurant,Food Truck
6,Arcadia,Restaurant,Himalayan Restaurant,Cuban Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant
7,Artesia,Indian Restaurant,Chinese Restaurant,Vietnamese Restaurant,Bakery,Korean Restaurant,Seafood Restaurant,Sushi Restaurant,Japanese Restaurant,Pizza Place,Filipino Restaurant
8,Avalon,American Restaurant,Seafood Restaurant,Mexican Restaurant,Restaurant,Bakery,Deli / Bodega,Pizza Place,Breakfast Spot,Burger Joint,Caribbean Restaurant
9,Azusa,Mexican Restaurant,Pizza Place,Italian Restaurant,American Restaurant,Burger Joint,Japanese Restaurant,Café,Sandwich Place,Sushi Restaurant,Burrito Place


Cluster Neighborhoods

Run k-means to cluster the neighborhood into 5 clusters

In [23]:
kclusters = 5

grouped_clustering = la_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 3, 3, 3, 3, 3, 3, 3, 3, 0], dtype=int32)

In [24]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

la_merged = df_new

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
la_merged = la_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

#remove row with NaN
la_merged['Cluster Labels']= la_merged['Cluster Labels'].fillna(0).astype(int)
la_merged = la_merged.dropna()

la_merged.head(10)

Unnamed: 0,Neighborhood,County,Latitude,Longitude,Cluster Labels,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
0,Acton,Los Angeles,34.46717,-118.19718,1,Café,Food,Wings Joint,Eastern European Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place
1,Agoura Hills,Los Angeles,34.14798,-118.781242,3,Café,Ramen Restaurant,Sushi Restaurant,Italian Restaurant,Mediterranean Restaurant,Pizza Place,Gastropub,American Restaurant,Bagel Shop,Fast Food Restaurant
2,Agua Dulce,Los Angeles,34.49632,-118.3262,3,Restaurant,Pizza Place,Café,Bakery,Mexican Restaurant,Eastern European Restaurant,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop
3,Alhambra,Los Angeles,34.095287,-118.127014,3,Chinese Restaurant,Sushi Restaurant,Vietnamese Restaurant,Burger Joint,Diner,Seafood Restaurant,Korean Restaurant,Asian Restaurant,Bakery,Japanese Curry Restaurant
4,Alondra Park,Los Angeles,33.88798,-118.33058,3,Fast Food Restaurant,Breakfast Spot,Mediterranean Restaurant,Restaurant,Asian Restaurant,Sandwich Place,Donut Shop,Pizza Place,Burger Joint,Mexican Restaurant
5,Altadena,Los Angeles,34.18568,-118.131508,3,Pizza Place,Food,Bakery,Diner,Sandwich Place,Mexican Restaurant,Burger Joint,Breakfast Spot,Fast Food Restaurant,Food Truck
6,Arcadia,Los Angeles,34.127361,-118.045868,3,Restaurant,Himalayan Restaurant,Cuban Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant
7,Artesia,Los Angeles,33.86137,-118.082161,3,Indian Restaurant,Chinese Restaurant,Vietnamese Restaurant,Bakery,Korean Restaurant,Seafood Restaurant,Sushi Restaurant,Japanese Restaurant,Pizza Place,Filipino Restaurant
8,Avalon,Los Angeles,33.3428,-118.3279,3,American Restaurant,Seafood Restaurant,Mexican Restaurant,Restaurant,Bakery,Deli / Bodega,Pizza Place,Breakfast Spot,Burger Joint,Caribbean Restaurant
10,Azusa,Los Angeles,34.13348,-117.907539,0,Mexican Restaurant,Pizza Place,Italian Restaurant,American Restaurant,Burger Joint,Japanese Restaurant,Café,Sandwich Place,Sushi Restaurant,Burrito Place


In [25]:
# create map
map_clusters = folium.Map(location=[latitude_la, longitude_la], zoom_start=9)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(la_merged['Latitude'], la_merged['Longitude'], la_merged['Neighborhood'], la_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

map_clusters

Examine the Cluster

Cluster 0

In [26]:
la_merged.loc[la_merged['Cluster Labels'] == 0, la_merged.columns[[0] + list(range(5, la_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
10,Azusa,Mexican Restaurant,Pizza Place,Italian Restaurant,American Restaurant,Burger Joint,Japanese Restaurant,Café,Sandwich Place,Sushi Restaurant,Burrito Place
11,Baldwin Park,Mexican Restaurant,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Bakery,Sandwich Place,Burger Joint,Japanese Restaurant,Buffet,Taco Place
12,Bell,Mexican Restaurant,Food,Buffet,Cuban Restaurant,Chinese Restaurant,Restaurant,Donut Shop,Mediterranean Restaurant,Latin American Restaurant,South American Restaurant
14,Bell Gardens,Mexican Restaurant,Seafood Restaurant,Burger Joint,Fried Chicken Joint,Donut Shop,Fast Food Restaurant,Latin American Restaurant,New American Restaurant,Wings Joint,Dumpling Restaurant
32,Desert View Highlands,Mexican Restaurant,Chinese Restaurant,Mediterranean Restaurant,Restaurant,Sandwich Place,Diner,Donut Shop,Pizza Place,American Restaurant,Fish & Chips Shop
35,Duarte,Mexican Restaurant,Mediterranean Restaurant,Fast Food Restaurant,Breakfast Spot,Deli / Bodega,Vegetarian / Vegan Restaurant,Food Truck,Dim Sum Restaurant,Diner,Donburi Restaurant
36,East Los Angeles,Mexican Restaurant,Southern / Soul Food Restaurant,Food,Deli / Bodega,Fast Food Restaurant,Sushi Restaurant,Chinese Restaurant,Food Court,Dosa Place,Cuban Restaurant
37,East Pasadena,Mexican Restaurant,Szechuan Restaurant,Wings Joint,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant
38,East Rancho Dominguez,Food,Mexican Restaurant,Food Truck,Bagel Shop,Bakery,Wings Joint,Fast Food Restaurant,Dim Sum Restaurant,Diner,Donburi Restaurant
52,Hawaiian Gardens,Mexican Restaurant,Sandwich Place,Fried Chicken Joint,Wings Joint,Asian Restaurant,Buffet,Fast Food Restaurant,Korean Restaurant,Noodle House,Persian Restaurant


Cluster 1

In [27]:
la_merged.loc[la_merged['Cluster Labels'] == 1, la_merged.columns[[0] + list(range(5, la_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
0,Acton,Café,Food,Wings Joint,Eastern European Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place
22,Cerritos,Food Truck,Food,Wings Joint,Eastern European Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place
44,Florence-Graham,Mexican Restaurant,Food,Fast Food Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant
48,Green Valley,Food,Wings Joint,Fast Food Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant
114,South El Monte,Food,Mexican Restaurant,Fast Food Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant
127,Val Verde,Food,Wings Joint,Fast Food Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant


Cluster 2

In [28]:
la_merged.loc[la_merged['Cluster Labels'] == 2, la_merged.columns[[0] + list(range(5, la_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
57,Industry,Café,Burger Joint,Wings Joint,Cuban Restaurant,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant
67,La Mirada,Diner,Café,Wings Joint,Creperie,Deli / Bodega,Dim Sum Restaurant,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant
90,Palos Verdes Estates,Café,Breakfast Spot,French Restaurant,Fast Food Restaurant,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant
137,Westlake Village,Burger Joint,Wings Joint,Creperie,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant


Cluster 3

In [29]:
la_merged.loc[la_merged['Cluster Labels'] == 3, la_merged.columns[[0] + list(range(5, la_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
1,Agoura Hills,Café,Ramen Restaurant,Sushi Restaurant,Italian Restaurant,Mediterranean Restaurant,Pizza Place,Gastropub,American Restaurant,Bagel Shop,Fast Food Restaurant
2,Agua Dulce,Restaurant,Pizza Place,Café,Bakery,Mexican Restaurant,Eastern European Restaurant,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop
3,Alhambra,Chinese Restaurant,Sushi Restaurant,Vietnamese Restaurant,Burger Joint,Diner,Seafood Restaurant,Korean Restaurant,Asian Restaurant,Bakery,Japanese Curry Restaurant
4,Alondra Park,Fast Food Restaurant,Breakfast Spot,Mediterranean Restaurant,Restaurant,Asian Restaurant,Sandwich Place,Donut Shop,Pizza Place,Burger Joint,Mexican Restaurant
5,Altadena,Pizza Place,Food,Bakery,Diner,Sandwich Place,Mexican Restaurant,Burger Joint,Breakfast Spot,Fast Food Restaurant,Food Truck
6,Arcadia,Restaurant,Himalayan Restaurant,Cuban Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant
7,Artesia,Indian Restaurant,Chinese Restaurant,Vietnamese Restaurant,Bakery,Korean Restaurant,Seafood Restaurant,Sushi Restaurant,Japanese Restaurant,Pizza Place,Filipino Restaurant
8,Avalon,American Restaurant,Seafood Restaurant,Mexican Restaurant,Restaurant,Bakery,Deli / Bodega,Pizza Place,Breakfast Spot,Burger Joint,Caribbean Restaurant
13,Bellflower,Mexican Restaurant,BBQ Joint,Food,Italian Restaurant,Southern / Soul Food Restaurant,Diner,Café,Korean Restaurant,Sandwich Place,Snack Place
15,Beverly Hills,Breakfast Spot,Creperie,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant,Eastern European Restaurant


Cluster 4

In [30]:
la_merged.loc[la_merged['Cluster Labels'] == 4, la_merged.columns[[1] + list(range(5, la_merged.shape[1]))]]

Unnamed: 0,County,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
21,Los Angeles,Taco Place,Wings Joint,Fast Food Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant
29,Los Angeles,Taco Place,Wings Joint,Fast Food Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant
113,Los Angeles,Fast Food Restaurant,Taco Place,Wings Joint,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dosa Place,Dumpling Restaurant
118,Los Angeles,Pizza Place,Food,Taco Place,Wings Joint,Eastern European Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop
