<h1>Determining the Best Place for Restaurants in California<\h1>

Installation and imports

In [1]:
pip install lxml

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import requests
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium 
import lxml
print('Libraries imported.')

Libraries imported.


In [3]:
#Setting view options for dataframes
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)


The first step is to define a dataframe with the zipcodes and the relevent cities with population data.

In [4]:
dfTemp = pd.read_html("https://worldpopulationreview.com/zips/california")

In [5]:
#Data Cleaning and preparing
df1 = dfTemp[0].drop("County", axis = 1)
df1.dropna(axis = 0, inplace = True)
df1.reset_index(inplace = True)
df1.shape

(1720, 4)

The initial dataframe

In [6]:
df1

Unnamed: 0,index,Zip Code,City,Population
0,0,90011,Los Angeles,109414.0
1,1,90650,Norwalk,105886.0
2,2,91331,Pacoima,105799.0
3,3,90201,Bell,102433.0
4,4,92335,Fontana,99284.0
...,...,...,...,...
1715,1715,95735,Twin Bridges,10.0
1716,1716,95680,Ryde,10.0
1717,1717,95430,Duncans Mills,7.0
1718,1718,95375,Strawberry,5.0


Now that the intial dataset is complete, the geo data of the zipcodes will be inserted in order to use foursquare data

In [7]:
dfGeo = pd.read_csv('https://gist.githubusercontent.com/erichurst/7882666/raw/5bdc46db47d9515269ab12ed6fb2850377fd869e/US%2520Zip%2520Codes%2520from%25202013%2520Government%2520Data')
dfGeo.set_index("ZIP", inplace = True)
dfGeo

Unnamed: 0_level_0,LAT,LNG
ZIP,Unnamed: 1_level_1,Unnamed: 2_level_1
601,18.180555,-66.749961
602,18.361945,-67.175597
603,18.455183,-67.119887
606,18.158345,-66.932911
610,18.295366,-67.125135
...,...,...
99923,56.002315,-130.041026
99925,55.550204,-132.945933
99926,55.138352,-131.470424
99927,56.239062,-133.457924


Merging the two datasets

In [8]:
dfMer = df1.join(dfGeo, on= "Zip Code", how='left', lsuffix='nk', rsuffix='', sort=False)
dfMer

Unnamed: 0,index,Zip Code,City,Population,LAT,LNG
0,0,90011,Los Angeles,109414.0,34.007090,-118.258681
1,1,90650,Norwalk,105886.0,33.906956,-118.082640
2,2,91331,Pacoima,105799.0,34.255442,-118.421314
3,3,90201,Bell,102433.0,33.970343,-118.171368
4,4,92335,Fontana,99284.0,34.085972,-117.464597
...,...,...,...,...,...,...
1715,1715,95735,Twin Bridges,10.0,38.840282,-120.150225
1716,1716,95680,Ryde,10.0,38.240419,-121.587535
1717,1717,95430,Duncans Mills,7.0,38.461425,-123.047186
1718,1718,95375,Strawberry,5.0,38.186595,-120.026235


In [9]:
dfMer.dropna(axis = 0, inplace = True)
dfMer.drop("index", axis = 1, inplace = True)

In [10]:
dfMer

Unnamed: 0,Zip Code,City,Population,LAT,LNG
0,90011,Los Angeles,109414.0,34.007090,-118.258681
1,90650,Norwalk,105886.0,33.906956,-118.082640
2,91331,Pacoima,105799.0,34.255442,-118.421314
3,90201,Bell,102433.0,33.970343,-118.171368
4,92335,Fontana,99284.0,34.085972,-117.464597
...,...,...,...,...,...
1715,95735,Twin Bridges,10.0,38.840282,-120.150225
1716,95680,Ryde,10.0,38.240419,-121.587535
1717,95430,Duncans Mills,7.0,38.461425,-123.047186
1718,95375,Strawberry,5.0,38.186595,-120.026235


The merged dataset

In [11]:
dfMer.reset_index(inplace = True)
dfMer.drop("index", axis = 1, inplace = True)
dfMer

Unnamed: 0,Zip Code,City,Population,LAT,LNG
0,90011,Los Angeles,109414.0,34.007090,-118.258681
1,90650,Norwalk,105886.0,33.906956,-118.082640
2,91331,Pacoima,105799.0,34.255442,-118.421314
3,90201,Bell,102433.0,33.970343,-118.171368
4,92335,Fontana,99284.0,34.085972,-117.464597
...,...,...,...,...,...
1715,95735,Twin Bridges,10.0,38.840282,-120.150225
1716,95680,Ryde,10.0,38.240419,-121.587535
1717,95430,Duncans Mills,7.0,38.461425,-123.047186
1718,95375,Strawberry,5.0,38.186595,-120.026235


In [12]:
dfMer.shape

(1720, 5)

Take the population in to consideration and select the 100 countries with most population

In [13]:
dfMer.sort_values("Population", ascending=False, inplace = True)

In [14]:
dfPop = dfMer.iloc[0:100]

The final dataframe ready to be evaluated

In [15]:
dfPop

Unnamed: 0,Zip Code,City,Population,LAT,LNG
0,90011,Los Angeles,109414.0,34.00709,-118.258681
1,90650,Norwalk,105886.0,33.906956,-118.08264
2,91331,Pacoima,105799.0,34.255442,-118.421314
3,90201,Bell,102433.0,33.970343,-118.171368
4,92335,Fontana,99284.0,34.085972,-117.464597
5,90250,Hawthorne,96836.0,33.914775,-118.348083
6,90805,Long Beach,96708.0,33.864617,-118.180567
7,92336,Fontana,96575.0,34.147718,-117.464506
8,91342,Sylmar,96487.0,34.326584,-118.382261
9,90044,Los Angeles,96436.0,33.952725,-118.291904


Importing Visualization tools

In [16]:
from sklearn.cluster import KMeans
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

The initial map

In [17]:
map_Cal = folium.Map(location=[34.007090, -118.082640], zoom_start=10)

# add markers to map
for lat, lng, zipcode, city in zip(dfPop['LAT'], dfPop['LNG'], dfPop['Zip Code'], dfPop['City']):
    label = '{}, {}'.format(city, zipcode)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='bluetrus',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Cal)  
    
map_Cal

Foursquare Details

In [18]:
CLIENT_ID = '41C35DWGPTRVQJXE5O21I0EMF3AALOFDUHAVKBDWVMP0QUMD' # your Foursquare ID
CLIENT_SECRET = '22MVGBEFOQ4SCZTH32D5WYQT1CBLV35ZRGI1KL2TZX5KSYDU' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 41C35DWGPTRVQJXE5O21I0EMF3AALOFDUHAVKBDWVMP0QUMD
CLIENT_SECRET:22MVGBEFOQ4SCZTH32D5WYQT1CBLV35ZRGI1KL2TZX5KSYDU


Calling the API

In [19]:
def getNearbyVenues(zipcodes, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for zipcode, lat, lng in zip(zipcodes, latitudes, longitudes):
        print(zipcode)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            500, 
            50)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            zipcode, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Zip', 
                  'Zip Latitude', 
                  'Zip Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [20]:
cal_venues = getNearbyVenues(zipcodes=dfPop['Zip Code'],
                                   latitudes=dfPop['LAT'],
                                   longitudes=dfPop['LNG']
                                  )

90011
90650
91331
90201
92335
90250
90805
92336
91342
90044
94565
90280
92503
92683
91710
92704
92804
91744
92376
92154
95076
91911
94112
93307
92592
93033
93722
93727
92126
92509
92345
91335
91709
95823
94544
95035
90706
91706
93550
95630
93257
92553
94533
90255
94587
92805
91910
94110
94536
93274
93306
93535
92105
90003
93536
91766
93065
92882
91730
92021
90262
91402
92703
92114
92880
90026
90631
92407
94558
92563
94538
95687
94080
95123
92801
94509
93230
91343
92677
90019
94541
90022
92115
95624
95127
92201
95747
93906
90037
94015
94806
92562
95206
90042
95758
95020
94501
91732
91770
91977


In [21]:
print(cal_venues.shape)
cal_venues.head()

(697, 7)


Unnamed: 0,Zip,Zip Latitude,Zip Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,90011,34.00709,-118.258681,Cascada Refrescante,34.005618,-118.256094,Ice Cream Shop
1,90011,34.00709,-118.258681,Super Tortas,34.009264,-118.256799,Mexican Restaurant
2,90011,34.00709,-118.258681,Dollar Tree,34.007595,-118.256371,Discount Store
3,90011,34.00709,-118.258681,Pizza Hut,34.004515,-118.256699,Pizza Place
4,90011,34.00709,-118.258681,KFC,34.005971,-118.256253,Fried Chicken Joint


In [22]:
cal_venues["Venue Category"].unique()

array(['Ice Cream Shop', 'Mexican Restaurant', 'Discount Store',
       'Pizza Place', 'Fried Chicken Joint', 'Fast Food Restaurant',
       'Video Game Store', 'Frozen Yogurt Shop', 'Coffee Shop',
       'Wings Joint', 'Movie Theater', 'Supplement Shop', 'Video Store',
       'ATM', 'Buffet', 'Gym / Fitness Center',
       'Construction & Landscaping', 'Pharmacy', 'Grocery Store',
       'Intersection', 'Convenience Store', 'Taco Place',
       'Martial Arts Dojo', 'Latin American Restaurant',
       'Thai Restaurant', 'Market', 'Indian Restaurant',
       'Mobile Phone Shop', 'Big Box Store', 'Sandwich Place',
       'Cosmetics Shop', 'Burger Joint', 'Donut Shop', 'Food',
       'Insurance Office', 'Peruvian Restaurant', 'Pool', 'Park',
       'Basketball Court', 'Home Service', 'Garden', 'Dessert Shop',
       'Café', 'Bank', 'Bakery', 'Organic Grocery', 'Bar', 'Smoke Shop',
       'Automotive Shop', 'BBQ Joint', 'Dive Bar', 'Gas Station',
       'Dance Studio', 'Hot Dog Joint', 'Mi

Categorizing Data

In [89]:
dfs = cal_venues[cal_venues['Venue Category'].str.contains("Restaurant")] 
dft = cal_venues[cal_venues['Venue Category'].str.contains("Coffee")] 
dfu = cal_venues[cal_venues['Venue Category'].str.contains("Food")] 
dfv = cal_venues[cal_venues['Venue Category'].str.contains("Diner")] 
dfw = cal_venues[cal_venues['Venue Category'].str.contains("Tea")] 
dfx = cal_venues[cal_venues['Venue Category'].str.contains("Cafe")] 
dfy = cal_venues[cal_venues['Venue Category'].str.contains("Burger")] 
dfz = cal_venues[cal_venues['Venue Category'].str.contains("Pizza")] 
dfa = cal_venues[cal_venues['Venue Category'].str.contains("Taco")] 
dfb = cal_venues[cal_venues['Venue Category'].str.contains("Joint")] 

In [90]:
cal_venues['Venue Category'] = cal_venues['Venue Category'].replace(dfs["Venue Category"],'Restaurant')
cal_venues['Venue Category'] = cal_venues['Venue Category'].replace(dft["Venue Category"],'Cafe')
cal_venues['Venue Category'] = cal_venues['Venue Category'].replace(dfv["Venue Category"],'Restaurant')
cal_venues['Venue Category'] = cal_venues['Venue Category'].replace(dfu["Venue Category"],'Restaurant')
cal_venues['Venue Category'] = cal_venues['Venue Category'].replace(dfy["Venue Category"],'Restaurant')
cal_venues['Venue Category'] = cal_venues['Venue Category'].replace(dfz["Venue Category"],'Restaurant')
cal_venues['Venue Category'] = cal_venues['Venue Category'].replace(dfa["Venue Category"],'Restaurant')
cal_venues['Venue Category'] = cal_venues['Venue Category'].replace(dfw["Venue Category"],'Restaurant')
cal_venues['Venue Category'] = cal_venues['Venue Category'].replace(dfx["Venue Category"],'Restaurant')
cal_venues['Venue Category'] = cal_venues['Venue Category'].replace(dfb["Venue Category"],'Restaurant')

cal_venues.head(40)

Unnamed: 0,Zip,Zip Latitude,Zip Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,90011,34.00709,-118.258681,Cascada Refrescante,34.005618,-118.256094,Ice Cream Shop
1,90011,34.00709,-118.258681,Super Tortas,34.009264,-118.256799,Restaurant
2,90011,34.00709,-118.258681,Dollar Tree,34.007595,-118.256371,Discount Store
3,90011,34.00709,-118.258681,Pizza Hut,34.004515,-118.256699,Restaurant
4,90011,34.00709,-118.258681,KFC,34.005971,-118.256253,Restaurant
5,90011,34.00709,-118.258681,Taco Bell,34.006014,-118.256228,Restaurant
6,90011,34.00709,-118.258681,Jack in the Box,34.003654,-118.256806,Restaurant
7,90011,34.00709,-118.258681,McDonald's,34.010537,-118.256903,Restaurant
8,90011,34.00709,-118.258681,Lousianna Famous Fried Chicken,34.003757,-118.256417,Restaurant
9,90650,33.906956,-118.08264,GameStop,33.90486,-118.083344,Video Game Store


In [91]:
#one hot encoding
cal_onehot = pd.get_dummies(cal_venues[['Venue Category']], prefix="", prefix_sep="")


cal_onehot['Zip'] = cal_venues['Zip'] 


fixed_columns = [cal_onehot.columns[-1]] + list(cal_onehot.columns[:-1])
cal_onehot = cal_onehot[fixed_columns]

cal_onehot.head()

Unnamed: 0,Zip,ATM,Arcade,Art Gallery,Arts & Crafts Store,Auto Garage,Auto Workshop,Automotive Shop,Bakery,Bank,Bar,Basketball Court,Big Box Store,Bookstore,Breakfast Spot,Brewery,Buffet,Burrito Place,Bus Station,Business Service,Café,Candy Store,City Hall,Clothing Store,Cocktail Bar,Comic Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Discount Store,Dive Bar,Dog Run,Donut Shop,Dry Cleaner,Electronics Store,Event Space,Farm,Farmers Market,Fish Market,Flower Shop,Football Stadium,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hardware Store,Health & Beauty Service,Home Service,Hotel,IT Services,Ice Cream Shop,Indie Movie Theater,Insurance Office,Intersection,Juice Bar,Karaoke Bar,Kids Store,Laundromat,Lawyer,Liquor Store,Lounge,Market,Martial Arts Dojo,Massage Studio,Medical Center,Metro Station,Miscellaneous Shop,Mobile Phone Shop,Motel,Motorcycle Shop,Mountain,Movie Theater,Moving Target,Music Store,Music Venue,Optical Shop,Organic Grocery,Other Repair Shop,Park,Performing Arts Venue,Pet Store,Pharmacy,Playground,Pool,Pool Hall,Record Shop,Recreation Center,Rental Car Location,Residential Building (Apartment / Condo),Resort,Restaurant,Road,Rock Club,Salon / Barbershop,Sandwich Place,Scenic Lookout,Shipping Store,Shoe Store,Shopping Mall,Shopping Plaza,Smoke Shop,Snack Place,Spa,Sporting Goods Shop,Stables,Storage Facility,Street Art,Supermarket,Supplement Shop,Theater,Thrift / Vintage Store,Toy / Game Store,Track,Trail,Train Station,Video Game Store,Video Store,Warehouse Store,Weight Loss Center,Well,Wine Bar,Yoga Studio
0,90011,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,90011,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,90011,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,90011,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,90011,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [92]:
cal_grouped = cal_onehot.groupby('Zip').mean().reset_index()
cal_grouped = cal_grouped.astype({"Zip" :str})

Top Venues in each zip code

In [93]:
num_top_venues = 5

for hood in cal_grouped['Zip']:
    print("----"+hood+"----")
    temp = cal_grouped[cal_grouped['Zip'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----90003----
                   venue  freq
0             Restaurant   1.0
1                    ATM   0.0
2  Performing Arts Venue   0.0
3            Music Store   0.0
4            Music Venue   0.0


----90011----
            venue  freq
0      Restaurant  0.78
1  Discount Store  0.11
2  Ice Cream Shop  0.11
3       Pet Store  0.00
4     Music Venue  0.00


----90019----
                    venue  freq
0              Restaurant  0.24
1  Furniture / Home Store  0.07
2          Sandwich Place  0.07
3                    Bank  0.07
4          Hardware Store  0.03


----90022----
               venue  freq
0         Restaurant  0.52
1  Mobile Phone Shop  0.10
2           Pharmacy  0.10
3               Bank  0.05
4         Donut Shop  0.05


----90026----
                 venue  freq
0           Restaurant  0.41
1                 Café  0.10
2  Arts & Crafts Store  0.05
3            Pet Store  0.05
4           Donut Shop  0.02


----90037----
           venue  freq
0     Restaurant  0.25
1 

Tabulating Data

In [94]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [95]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Zip']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Zip'] = cal_grouped['Zip']

for ind in np.arange(cal_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(cal_grouped.iloc[ind, :], num_top_venues)

    
neighborhoods_venues_sorted.head()

Unnamed: 0,Zip,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,90003,Restaurant,Yoga Studio,Farm,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Event Space
1,90011,Restaurant,Discount Store,Ice Cream Shop,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Electronics Store
2,90019,Restaurant,Sandwich Place,Furniture / Home Store,Bank,Supermarket,Supplement Shop,Shipping Store,Mobile Phone Shop,Shopping Mall,Dessert Shop
3,90022,Restaurant,Mobile Phone Shop,Pharmacy,Shoe Store,Music Store,Convenience Store,Bank,Video Game Store,Donut Shop,Electronics Store
4,90026,Restaurant,Café,Arts & Crafts Store,Pet Store,Record Shop,Indie Movie Theater,Wine Bar,Gym / Fitness Center,Gastropub,Music Venue


K means clustering Algorithms

In [96]:

kclusters = 8

cal_grouped_clustering = cal_grouped.drop('Zip', 1)


kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(cal_grouped_clustering)


kmeans.labels_


array([0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 3,
       0, 3, 0, 0, 3, 2, 0, 3, 3, 3, 3, 3, 0, 3, 3, 4, 3, 7, 3, 3, 2, 0,
       3, 2, 3, 0, 3, 2, 5, 4, 6, 3, 0, 3, 3, 0, 0, 0, 0, 0, 3, 0, 0, 3,
       3, 3, 3, 3, 1, 3, 3, 2, 0, 0], dtype=int32)

Results in Tabular Form

In [97]:

neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

cal_merged = dfMer.astype({"Zip Code":str})

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
cal_merged = cal_merged.join(neighborhoods_venues_sorted.set_index('Zip'), on='Zip Code')

cal_merged.head(10)


Unnamed: 0,Zip Code,City,Population,LAT,LNG,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,90011,Los Angeles,109414.0,34.00709,-118.258681,0.0,Restaurant,Discount Store,Ice Cream Shop,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Electronics Store
1,90650,Norwalk,105886.0,33.906956,-118.08264,3.0,Discount Store,Restaurant,Gym / Fitness Center,Ice Cream Shop,Movie Theater,Frozen Yogurt Shop,Supplement Shop,ATM,Buffet,Video Game Store
2,91331,Pacoima,105799.0,34.255442,-118.421314,0.0,Construction & Landscaping,Restaurant,Yoga Studio,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Farm,Event Space
3,90201,Bell,102433.0,33.970343,-118.171368,3.0,Intersection,Video Store,Grocery Store,Pharmacy,Convenience Store,Ice Cream Shop,Donut Shop,Dog Run,Dry Cleaner,Electronics Store
4,92335,Fontana,99284.0,34.085972,-117.464597,,,,,,,,,,,
5,90250,Hawthorne,96836.0,33.914775,-118.348083,0.0,Restaurant,Mobile Phone Shop,Market,Convenience Store,Discount Store,Martial Arts Dojo,Insurance Office,Donut Shop,Sandwich Place,Big Box Store
6,90805,Long Beach,96708.0,33.864617,-118.180567,0.0,Restaurant,Pharmacy,Yoga Studio,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Electronics Store
7,92336,Fontana,96575.0,34.147718,-117.464506,3.0,Park,Pool,Home Service,Basketball Court,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market
8,91342,Sylmar,96487.0,34.326584,-118.382261,,,,,,,,,,,
9,90044,Los Angeles,96436.0,33.952725,-118.291904,0.0,Restaurant,Pharmacy,Yoga Studio,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Electronics Store


In [98]:
cal_merged.dropna(axis = 0, inplace = True)

In [99]:
cal_merged = cal_merged.astype({"Cluster Labels" : int})

In [100]:
cal_merged.reset_index(inplace = True, drop = True)

In [101]:
cal_merged.head(10)

Unnamed: 0,Zip Code,City,Population,LAT,LNG,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,90011,Los Angeles,109414.0,34.00709,-118.258681,0,Restaurant,Discount Store,Ice Cream Shop,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Electronics Store
1,90650,Norwalk,105886.0,33.906956,-118.08264,3,Discount Store,Restaurant,Gym / Fitness Center,Ice Cream Shop,Movie Theater,Frozen Yogurt Shop,Supplement Shop,ATM,Buffet,Video Game Store
2,91331,Pacoima,105799.0,34.255442,-118.421314,0,Construction & Landscaping,Restaurant,Yoga Studio,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Farm,Event Space
3,90201,Bell,102433.0,33.970343,-118.171368,3,Intersection,Video Store,Grocery Store,Pharmacy,Convenience Store,Ice Cream Shop,Donut Shop,Dog Run,Dry Cleaner,Electronics Store
4,90250,Hawthorne,96836.0,33.914775,-118.348083,0,Restaurant,Mobile Phone Shop,Market,Convenience Store,Discount Store,Martial Arts Dojo,Insurance Office,Donut Shop,Sandwich Place,Big Box Store
5,90805,Long Beach,96708.0,33.864617,-118.180567,0,Restaurant,Pharmacy,Yoga Studio,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Electronics Store
6,92336,Fontana,96575.0,34.147718,-117.464506,3,Park,Pool,Home Service,Basketball Court,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market
7,90044,Los Angeles,96436.0,33.952725,-118.291904,0,Restaurant,Pharmacy,Yoga Studio,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Electronics Store
8,94565,Pittsburg,96081.0,38.014576,-121.906255,3,Dessert Shop,Café,Garden,Donut Shop,Dry Cleaner,Electronics Store,Event Space,Farm,Discount Store,Fish Market
9,90280,South Gate,95103.0,33.944159,-118.192761,3,Bank,Restaurant,Mobile Phone Shop,Video Game Store,Pharmacy,Bakery,Convenience Store,Donut Shop,Fish Market,Fruit & Vegetable Store


Results on the map

In [102]:
# create map
map_clusters = folium.Map(location=[32.726237,-117.293031], zoom_start=6)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi,city, cluster in zip(cal_merged['LAT'], cal_merged['LNG'], cal_merged['Zip Code'],cal_merged['City'], cal_merged['Cluster Labels']):
    label = folium.Popup(str(city) +str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Cluster 0

In [103]:
cal_merged.loc[cal_merged['Cluster Labels'] == 0, cal_merged.columns[[0] + list(range(5, cal_merged.shape[1]))]]

Unnamed: 0,Zip Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,90011,0,Restaurant,Discount Store,Ice Cream Shop,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Electronics Store
2,91331,0,Construction & Landscaping,Restaurant,Yoga Studio,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Farm,Event Space
4,90250,0,Restaurant,Mobile Phone Shop,Market,Convenience Store,Discount Store,Martial Arts Dojo,Insurance Office,Donut Shop,Sandwich Place,Big Box Store
5,90805,0,Restaurant,Pharmacy,Yoga Studio,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Electronics Store
7,90044,0,Restaurant,Pharmacy,Yoga Studio,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Electronics Store
10,92683,0,Restaurant,Smoke Shop,Convenience Store,Automotive Shop,Organic Grocery,Bar,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market
13,92804,0,Restaurant,Department Store,Bar,Laundromat,Liquor Store,Grocery Store,Pharmacy,Discount Store,IT Services,Automotive Shop
17,94112,0,Restaurant,Bus Station,Sandwich Place,Liquor Store,Cosmetics Shop,Flower Shop,Gas Station,Metro Station,Dessert Shop,Gym / Fitness Center
19,93727,0,Convenience Store,Restaurant,Event Space,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Farm,Yoga Studio
20,92126,0,Restaurant,Park,Bakery,Farm,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market


Cluster 1

In [104]:
cal_merged.loc[cal_merged['Cluster Labels'] == 1, cal_merged.columns[[0] + list(range(5, cal_merged.shape[1]))]]

Unnamed: 0,Zip Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
62,95127,1,Resort,Yoga Studio,Farm,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Event Space


Cluster 2

In [105]:
cal_merged.loc[cal_merged['Cluster Labels'] == 2, cal_merged.columns[[0] + list(range(5, cal_merged.shape[1]))]]

Unnamed: 0,Zip Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,92704,2,Construction & Landscaping,Park,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Yoga Studio,Discount Store
40,91766,2,Park,Yoga Studio,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Farm,Insurance Office
48,92880,2,Park,Construction & Landscaping,Furniture / Home Store,Donut Shop,Dry Cleaner,Electronics Store,Event Space,Farm,Discount Store,Dog Run
57,92677,2,Park,Hotel,Health & Beauty Service,Yoga Studio,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market
64,95747,2,Park,Yoga Studio,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Farm,Insurance Office


Cluster 3

In [106]:
cal_merged.loc[cal_merged['Cluster Labels'] == 3, cal_merged.columns[[0] + list(range(5, cal_merged.shape[1]))]]

Unnamed: 0,Zip Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,90650,3,Discount Store,Restaurant,Gym / Fitness Center,Ice Cream Shop,Movie Theater,Frozen Yogurt Shop,Supplement Shop,ATM,Buffet,Video Game Store
3,90201,3,Intersection,Video Store,Grocery Store,Pharmacy,Convenience Store,Ice Cream Shop,Donut Shop,Dog Run,Dry Cleaner,Electronics Store
6,92336,3,Park,Pool,Home Service,Basketball Court,Farm,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market
8,94565,3,Dessert Shop,Café,Garden,Donut Shop,Dry Cleaner,Electronics Store,Event Space,Farm,Discount Store,Fish Market
9,90280,3,Bank,Restaurant,Mobile Phone Shop,Video Game Store,Pharmacy,Bakery,Convenience Store,Donut Shop,Fish Market,Fruit & Vegetable Store
11,91710,3,Sandwich Place,Restaurant,Dance Studio,Gas Station,Convenience Store,Dive Bar,Dry Cleaner,Electronics Store,Donut Shop,Fruit & Vegetable Store
14,91744,3,Smoke Shop,Warehouse Store,Home Service,Auto Workshop,Trail,Farm,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop
16,91911,3,Home Service,Yoga Studio,Farm,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Event Space
18,92592,3,Breakfast Spot,Yoga Studio,Fish Market,Furniture / Home Store,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop,Farmers Market,Garden Center
21,92509,3,Donut Shop,Auto Garage,Electronics Store,Construction & Landscaping,Yoga Studio,Fish Market,Furniture / Home Store,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium


Cluster 4

In [107]:
cal_merged.loc[cal_merged['Cluster Labels'] == 4, cal_merged.columns[[0] + list(range(5, cal_merged.shape[1]))]]

Unnamed: 0,Zip Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,92376,4,Dog Run,Yoga Studio,Farmers Market,Furniture / Home Store,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farm
41,93065,4,Dog Run,Yoga Studio,Farmers Market,Furniture / Home Store,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farm


Cluster 5

In [108]:
cal_merged.loc[cal_merged['Cluster Labels'] == 5, cal_merged.columns[[0] + list(range(5, cal_merged.shape[1]))]]

Unnamed: 0,Zip Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
42,92882,5,Trail,Mountain,Farmers Market,Furniture / Home Store,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Yoga Studio


Cluster 6

In [109]:
cal_merged.loc[cal_merged['Cluster Labels'] == 6, cal_merged.columns[[0] + list(range(5, cal_merged.shape[1]))]]

Unnamed: 0,Zip Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
55,93230,6,Well,Yoga Studio,Farm,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Event Space


Cluster 7

In [110]:
cal_merged.loc[cal_merged['Cluster Labels'] == 7, cal_merged.columns[[0] + list(range(5, cal_merged.shape[1]))]]

Unnamed: 0,Zip Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
30,92553,7,Playground,Yoga Studio,Farm,Fruit & Vegetable Store,Frozen Yogurt Shop,Football Stadium,Flower Shop,Fish Market,Farmers Market,Event Space


By observing the clusters, we an say that Cluster 2 and 7 can be identified as the best zip codes to start a restaurant/cafe in California