## The Restaurantour - Ratings Adjustment

Use Bayesian Smoothing to adjust restaurant ratings based on clustering

In [17]:
import pickle
fpath = 'drlaurenmac/the-restaurantour/data-wrangling/datasets/'

#load clusters:
with open(fpath + 'clusters_dict.pkl','rb') as f:
    clusters_dict = pickle.load(f)

#load restaurant info:
with open(fpath + 'df_rests_all.pkl','rb') as f:
    df_rests_all = pickle.load(f)

In [2]:
df_rests_all.head()

Unnamed: 0,yelp_id,fsq_id,yelp_name,fsq_name,yelp_categories,fsq_categories,neighborhood,latitude,longitude,census_block,...,fsq_popularity,fsq_price,fsq_rating,fsq_total_photos,fsq_total_ratings,fsq_total_tips,fsq_tastes,fsq_features,website,formatted_address
0,Iue8gacs66BJeFRG83RLEA,593e7324e7a23754a3d8e6fd,Halal Kitchen Cafe,Halal Kitchen Cafe,"[Halal, Middle Eastern, Afghan]",[Restaurant],[Northridge],34.228354,-118.528353,60371154031000,...,0.922282,2.0,,,,,,[{'services': {'delivery': True}}],http://www.halalkc.com,
1,B8MrNvNvvCtRVx4VqypLOQ,4f45397e19836ed0019960dc,Nan Dagh Kabob Dagh,Nan Dagh Kabob Dagh,"[Halal, Afghan]","[Pizzeria, Mediterranean Restaurant, Middle Ea...",[Reseda],34.200769,-118.527648,60371323012000,...,0.409895,1.0,,,,,,,http://naan-persian.keeq.io/,
2,-YE-UnethM8jbYFWl9ktRQ,6057e66e84fc74200d24489e,Flavors From Afar,Flavors From Afar,"[Middle Eastern, Halal, African]","[African Restaurant, Middle Eastern Restaurant...",[Mid-Wilshire],34.056837,-118.364192,60372169022000,...,0.913157,,,,,,,"[{'food_and_drink': {'meals': {'lunch': True, ...",http://www.flavorsfromafar.co,
3,Mj2BC82wH_oy1YC9PanJXw,15cff4ee744c4f0870ec98b4,Mama D's African Cuisine,Mama D's African Cuisine,[African],[African Restaurant],[Boyle Heights],34.026788,-118.219327,60372051102000,...,,,,,,,,,https://www.mamadsfood.com/,
4,Tr4E7gyf2UUNkk3piQFdGg,619ac68726c0a15634e46af4,Jaliz Cuisine of East Africa,Jaliz Cuisine of East Africa,[African],"[Coffee Shop, African Restaurant]",[Van Nuys],34.19105,-118.454684,60371278053004,...,0.916832,2.0,,,,,,[{'payment': {'credit_cards': {'accepts_credit...,http://www.jalizcuisine.com,


In [3]:
#choose a subset of the columns:
rest_cols = [
    'yelp_id',
    'yelp_name',
    'yelp_categories',
    'neighborhood',
    'latitude',
    'longitude',
    'display_address',
    'display_phone',
    'is_chain',
    'yelp_is_claimed',
    'yelp_price',
    'yelp_rating',
    'yelp_review_count',
    'fsq_popularity',
    'fsq_price',
    'fsq_rating',
    'fsq_total_photos',
    'fsq_total_ratings',
    'fsq_total_tips',
    'website'
]

In [5]:
df_rests = df_rests_all[rest_cols]
df_rests.shape

(22201, 20)

In [7]:
#remove restaurants without a neighborhood:
df_rests = df_rests.assign(neighborhood_rem = df_rests['neighborhood'].apply(lambda x: x if len(x) > 0 else float('NaN')))
df_rests = df_rests.loc[df_rests['neighborhood_rem'].notna(),rest_cols]
df_rests.shape

(20312, 20)

In [8]:
#remove restaurants that are not claimed on yelp:
df_rests = df_rests[df_rests['yelp_is_claimed'] == True]
df_rests.shape

(17221, 20)

In [58]:
#select only restaurants with >= 4.0 yelp rating
df_rests_selected = df_rests.query('yelp_rating >= 4.0')
df_rests_selected.shape

(10168, 20)

In [59]:
#remove chain restaurants:
df_rests_selected = df_rests_selected[df_rests_selected['is_chain'] != 1]
df_rests_selected.shape

(10013, 20)

In [60]:
#remove restaurants that do not have at least 20 reviews
df_rests_selected = df_rests_selected.query('yelp_review_count > 20')
df_rests_selected.shape

(8708, 20)

In [61]:
#create an additional column of a distribution of yelp ratings (since do not have this info):
import numpy as np
df_rests_selected.loc[:,'yelp_rating_dist'] = ''

for ind in df_rests_selected.index:
    ratings = np.random.normal(loc=df_rests_selected.loc[ind,'yelp_rating'],scale=0.5,size=df_rests_selected.loc[ind,'yelp_review_count'])
    thresh_ratings = []
    for rating in ratings:
        if rating > 5:
            thresh_ratings.append(5)
        elif rating < 0:
            thresh_ratings.append(0)
        else:
            thresh_ratings.append(rating)
    df_rests_selected.loc[:,'yelp_rating_dist'].loc[ind] = thresh_ratings

df_rests_selected.head()

Unnamed: 0,yelp_id,yelp_name,yelp_categories,neighborhood,latitude,longitude,display_address,display_phone,is_chain,yelp_is_claimed,...,yelp_rating,yelp_review_count,fsq_popularity,fsq_price,fsq_rating,fsq_total_photos,fsq_total_ratings,fsq_total_tips,website,yelp_rating_dist
0,Iue8gacs66BJeFRG83RLEA,Halal Kitchen Cafe,"[Halal, Middle Eastern, Afghan]",[Northridge],34.228354,-118.528353,"[18112 Parthenia St, Ste B, Northridge, CA 91325]",(818) 886-6106,0,True,...,4.0,182,0.922282,2.0,,,,,http://www.halalkc.com,"[2.9497774452882055, 4.147791424236157, 3.9281..."
2,-YE-UnethM8jbYFWl9ktRQ,Flavors From Afar,"[Middle Eastern, Halal, African]",[Mid-Wilshire],34.056837,-118.364192,"[1046 S Fairfax, Los Angeles, CA 90019]",(323) 879-9778,0,True,...,5.0,113,0.913157,,,,,,http://www.flavorsfromafar.co,"[4.370065654437283, 4.812044260796708, 4.10354..."
3,Mj2BC82wH_oy1YC9PanJXw,Mama D's African Cuisine,[African],[Boyle Heights],34.026788,-118.219327,"[1240 S Soto St, Los Angeles, CA 90023]",(562) 269-9818,0,True,...,4.5,57,,,,,,,https://www.mamadsfood.com/,"[4.748355119565722, 4.25590733351322, 5, 4.691..."
4,Tr4E7gyf2UUNkk3piQFdGg,Jaliz Cuisine of East Africa,[African],[Van Nuys],34.19105,-118.454684,"[14747 Kittridge St, Van Nuys, CA 91405]",(818) 571-2188,0,True,...,4.5,40,0.916832,2.0,,,,,http://www.jalizcuisine.com,"[4.371011025338866, 4.1840064065837215, 4.9456..."
5,u8U8Zc1Se_gY30zNeOTSqg,Moun Of Tunis Restaurant,"[Moroccan, African, Halal]",[Hollywood],34.098451,-118.352524,"[7445 1/2 W Sunset Blvd, Los Angeles, CA 90046]",(323) 874-3333,0,True,...,4.0,294,0.902242,2.0,8.2,26.0,34.0,10.0,http://www.mounoftunis.com/,"[4.286687403425167, 3.977754792037238, 4.13492..."


In [62]:
#use the clusters_dict to alter the ratings differently in different clusters
#in this version of the cluster dict, 0 = tourist, 1 = trendy, 2 = hipster, 3 = other wealthy, 4 = waterfront, etc
#in less traffic areas, restaurants with a lot of reviews might not be the best (bias towards low quality)
#also, can use price to sort differently. Starting mean is lower for $ and $$$$ priced restaurants
cluster_list = []
for index, row in df_rests_selected.iterrows():
    clusts = []
    for neigh in row['neighborhood']:
        if neigh in clusters_dict['cluster']:
            clusts.append(clusters_dict['cluster'][neigh])
        elif neigh == 'East La Mirada':
            clusts.append(clusters_dict['cluster']['East Whittier'])
        else:
            print(neigh) #check to see which neighborhoods present that are not in clusters
    if clusts:
        cluster_list.append(min(clusts)) #take the min cluster value
    else:
        cluster_list.append(float('NaN'))

df_rests_selected['cluster'] = cluster_list

Universal City
Griffith Park
Universal City
Griffith Park
Sepulveda Basin
Sepulveda Basin
Ramona
Unincorporated Santa Monica Mountains
Unincorporated Santa Monica Mountains
Unincorporated Santa Monica Mountains
Unincorporated Santa Susana Mountains
Ramona
Chatsworth Reservoir
Lancaster
Lancaster
Palmdale
Unincorporated Santa Susana Mountains
Castaic Canyons


In [69]:
#use the clusters to create distributions of the fsq ratings (since so many NaNs)
#calculate the averages before assigning nan ratings to -1
avg_fsq_rating = np.nanmean(list(df_rests_selected['fsq_rating']))
avg_fsq_total_rating = np.nanmean(list(df_rests_selected['fsq_total_ratings']))

rest_clusters = df_rests_selected.groupby('cluster')
avg_fsq_ratings = {}
avg_fsq_total_ratings = {}
for clust, inds in rest_clusters.groups.items():
    avg_fsq_ratings[clust] = np.nanmean(list(df_rests_selected.loc[inds,'fsq_rating']))
    avg_fsq_total_ratings[clust] = np.nanmean(list(df_rests_selected.loc[inds,'fsq_total_ratings']))

#assign nan ratings to -1 (to fix DataSpell glitch when running code below):
df_rests_selected.loc[df_rests_selected['fsq_rating'].isna(),'fsq_rating'] = -1

In [None]:
#create an additional column of a distribution of yelp ratings:
df_rests_selected.loc[:,'fsq_rating_dist'] = ''

for ind in df_rests_selected.index:
    if df_rests_selected.loc[ind,'fsq_rating'] == -1: #there is no rating, so increase the uncertainty
        clust = df_rests_selected.loc[ind,'cluster']
        if clust >= 0.0:
            ratings = np.random.normal(loc=avg_fsq_ratings[clust],scale=0.4,size=int(np.round(avg_fsq_total_ratings[clust])))
        else:
            ratings = np.random.normal(loc=avg_fsq_rating,scale=0.4,size=int(np.round(avg_fsq_total_rating)))
    else:
        ratings = np.random.normal(loc=df_rests_selected.loc[ind,'fsq_rating'],scale=0.2,size=df_rests_selected.loc[ind,'fsq_total_ratings'])
    thresh_ratings = []
    for rating in ratings:
        if rating > 10:
            thresh_ratings.append(10)
        elif rating < 0:
            thresh_ratings.append(0)
        else:
            thresh_ratings.append(rating)
    df_rests_selected.loc[:,'fsq_rating_dist'].loc[ind] = thresh_ratings

#convert -1 back to NaNs:
df_rests_selected.loc[df_rests_selected['fsq_rating'] == -1,'fsq_rating'] = float('NaN')

In [26]:
#Bayesian smoothing function
def bayes_sum(x, N=100, mu=3):
    return (np.sum(x) + mu*N) / (len(x) + N)

In [135]:
#using the price and cluster information, use bayesian smoothing to adjust the restaurants:
#this is a simple version that will be adjusted later (requires more research into clusters)
bayes_ratings_yelp = []
bayes_ratings_fsq = []
for index, row in df_rests_selected.iterrows():
    if row['yelp_price'] == '$' or row['yelp_price'] == '$$$$':
        mu_yelp = 2
        mu_fsq = 4
    else:
        mu_yelp = 3
        mu_fsq = 7

    #add negative bias for cafes and sandwiches (tend to be lunch spots):
    if 'Sandwiches' in row['yelp_categories'] or 'Cafes' in row['yelp_categories']:
        mu_yelp -= 1
        mu_fsq -= 1

    if row['cluster'] < 4: #restaurant hotspots have more reviews
        N_yelp = 50
        N_fsq = 25 #bias towards places with less fsq reviews
    else:
        N_yelp = 25
        N_fsq = 10
    bayes_ratings_yelp.append(bayes_sum(row['yelp_rating_dist'], N_yelp, mu_yelp))
    bayes_ratings_fsq.append(bayes_sum(row['fsq_rating_dist'], N_fsq, mu_fsq))

df_rests_selected['bayes_yelp_rating'] = bayes_ratings_yelp
df_rests_selected['bayes_fsq_rating'] = bayes_ratings_fsq
df_rests_selected.head()

Unnamed: 0,yelp_id,yelp_name,yelp_categories,neighborhood,latitude,longitude,display_address,display_phone,is_chain,yelp_is_claimed,...,fsq_total_photos,fsq_total_ratings,fsq_total_tips,website,yelp_rating_dist,cluster,fsq_rating_dist,bayes_yelp_rating,bayes_fsq_rating,metric
0,Iue8gacs66BJeFRG83RLEA,Halal Kitchen Cafe,"[Halal, Middle Eastern, Afghan]",[Northridge],34.228354,-118.528353,"[18112 Parthenia St, Ste B, Northridge, CA 91325]",(818) 886-6106,0,True,...,,,,http://www.halalkc.com,"[2.9497774452882055, 4.147791424236157, 3.9281...",7.0,"[7.403515952622316, 7.8253898574929615, 7.6631...",3.812923,7.442284,6.753657
2,-YE-UnethM8jbYFWl9ktRQ,Flavors From Afar,"[Middle Eastern, Halal, African]",[Mid-Wilshire],34.056837,-118.364192,"[1046 S Fairfax, Los Angeles, CA 90019]",(323) 879-9778,0,True,...,,,,http://www.flavorsfromafar.co,"[4.370065654437283, 4.812044260796708, 4.10354...",1.0,"[8.018558927385394, 8.195349944035662, 7.86164...",4.263252,7.676177,7.343778
3,Mj2BC82wH_oy1YC9PanJXw,Mama D's African Cuisine,[African],[Boyle Heights],34.026788,-118.219327,"[1240 S Soto St, Los Angeles, CA 90023]",(562) 269-9818,0,True,...,,,,https://www.mamadsfood.com/,"[4.748355119565722, 4.25590733351322, 5, 4.691...",8.0,"[7.191225952107329, 7.5795151030085, 7.4688658...",4.036243,7.503887,6.917342
4,Tr4E7gyf2UUNkk3piQFdGg,Jaliz Cuisine of East Africa,[African],[Van Nuys],34.19105,-118.454684,"[14747 Kittridge St, Van Nuys, CA 91405]",(818) 571-2188,0,True,...,,,,http://www.jalizcuisine.com,"[4.371011025338866, 4.1840064065837215, 4.9456...",2.0,"[7.718855460334855, 7.896433987712421, 7.35014...",3.62033,7.465515,6.258716
5,u8U8Zc1Se_gY30zNeOTSqg,Moun Of Tunis Restaurant,"[Moroccan, African, Halal]",[Hollywood],34.098451,-118.352524,"[7445 1/2 W Sunset Blvd, Los Angeles, CA 90046]",(323) 874-3333,0,True,...,26.0,34.0,10.0,http://www.mounoftunis.com/,"[4.286687403425167, 3.977754792037238, 4.13492...",0.0,"[8.205236312969054, 7.9854174572136785, 8.1547...",3.84294,7.6695,6.973409


In [136]:
#create an overall metric, taking into account both yelp and fsq ratings:
import pandas as pd
metric_cols = [
    'bayes_yelp_rating',
    'bayes_fsq_rating'
]

df_metric = df_rests_selected[metric_cols]

#scale these columns
from sklearn.preprocessing import MinMaxScaler
max_scaler = MinMaxScaler()
df_metric = pd.DataFrame(max_scaler.fit_transform(df_metric),columns=df_metric.columns,index=df_metric.index)

df_metric.head()

Unnamed: 0,bayes_yelp_rating,bayes_fsq_rating
0,0.654256,0.675818
2,0.815541,0.717598
3,0.734238,0.686822
4,0.585278,0.679967
5,0.665006,0.716406


In [137]:
#average the scaled columns and mult by 10:
df_rests_selected['metric'] = df_metric.mean(axis=1) * 10
df_rests_selected.head()

Unnamed: 0,yelp_id,yelp_name,yelp_categories,neighborhood,latitude,longitude,display_address,display_phone,is_chain,yelp_is_claimed,...,fsq_total_photos,fsq_total_ratings,fsq_total_tips,website,yelp_rating_dist,cluster,fsq_rating_dist,bayes_yelp_rating,bayes_fsq_rating,metric
0,Iue8gacs66BJeFRG83RLEA,Halal Kitchen Cafe,"[Halal, Middle Eastern, Afghan]",[Northridge],34.228354,-118.528353,"[18112 Parthenia St, Ste B, Northridge, CA 91325]",(818) 886-6106,0,True,...,,,,http://www.halalkc.com,"[2.9497774452882055, 4.147791424236157, 3.9281...",7.0,"[7.403515952622316, 7.8253898574929615, 7.6631...",3.812923,7.442284,6.650367
2,-YE-UnethM8jbYFWl9ktRQ,Flavors From Afar,"[Middle Eastern, Halal, African]",[Mid-Wilshire],34.056837,-118.364192,"[1046 S Fairfax, Los Angeles, CA 90019]",(323) 879-9778,0,True,...,,,,http://www.flavorsfromafar.co,"[4.370065654437283, 4.812044260796708, 4.10354...",1.0,"[8.018558927385394, 8.195349944035662, 7.86164...",4.263252,7.676177,7.665696
3,Mj2BC82wH_oy1YC9PanJXw,Mama D's African Cuisine,[African],[Boyle Heights],34.026788,-118.219327,"[1240 S Soto St, Los Angeles, CA 90023]",(562) 269-9818,0,True,...,,,,https://www.mamadsfood.com/,"[4.748355119565722, 4.25590733351322, 5, 4.691...",8.0,"[7.191225952107329, 7.5795151030085, 7.4688658...",4.036243,7.503887,7.105299
4,Tr4E7gyf2UUNkk3piQFdGg,Jaliz Cuisine of East Africa,[African],[Van Nuys],34.19105,-118.454684,"[14747 Kittridge St, Van Nuys, CA 91405]",(818) 571-2188,0,True,...,,,,http://www.jalizcuisine.com,"[4.371011025338866, 4.1840064065837215, 4.9456...",2.0,"[7.718855460334855, 7.896433987712421, 7.35014...",3.62033,7.465515,6.326229
5,u8U8Zc1Se_gY30zNeOTSqg,Moun Of Tunis Restaurant,"[Moroccan, African, Halal]",[Hollywood],34.098451,-118.352524,"[7445 1/2 W Sunset Blvd, Los Angeles, CA 90046]",(323) 874-3333,0,True,...,26.0,34.0,10.0,http://www.mounoftunis.com/,"[4.286687403425167, 3.977754792037238, 4.13492...",0.0,"[8.205236312969054, 7.9854174572136785, 8.1547...",3.84294,7.6695,6.907061


In [173]:
#create dictionaries with the restaurant info
from collections import defaultdict
rest_neigh_dict = defaultdict(list)
rest_cuisine_dict = defaultdict(list)
rest_price_dict = defaultdict(list)

for row in df_rests_selected.index:
    this_rest = {
        'yelp_id': df_rests_selected.loc[row,'yelp_id'],
        'name': df_rests_selected.loc[row,'yelp_name'],
        'neighborhood': df_rests_selected.loc[row,'neighborhood'],
        'categories': df_rests_selected.loc[row,'yelp_categories'],
        'price': df_rests_selected.loc[row,'yelp_price'],
        'yelp_rating': df_rests_selected.loc[row,'yelp_rating'],
        'fsq_rating': df_rests_selected.loc[row,'fsq_rating'],
        'yelp_review_count': df_rests_selected.loc[row,'yelp_review_count'],
        'fsq_total_ratings': df_rests_selected.loc[row,'fsq_total_ratings'],
        'bayes_yelp_rating': df_rests_selected.loc[row,'bayes_yelp_rating'],
        'bayes_fsq_rating': df_rests_selected.loc[row,'bayes_fsq_rating'],
        'metric': df_rests_selected.loc[row,'metric'],
        'lat': df_rests_selected.loc[row,'latitude'],
        'long': df_rests_selected.loc[row,'longitude'],
        'display_address': df_rests_selected.loc[row,'display_address'],
        'display_phone': df_rests_selected.loc[row,'display_phone'],
        'website': df_rests_selected.loc[row,'website']
    }
    for neigh in df_rests_selected.loc[row,'neighborhood']:
        rest_neigh_dict[neigh].append(this_rest)
    for cat in df_rests_selected.loc[row,'yelp_categories']:
        rest_cuisine_dict[cat].append(this_rest)
    if '$' in str(df_rests_selected.loc[row,'yelp_price']):
        price = df_rests_selected.loc[row,'yelp_price']
        rest_price_dict[price].append(this_rest)

In [139]:
top_rests = pd.DataFrame(rest_neigh_dict['West Hollywood'])
top_rests.sort_values(by=['metric'],axis=0,ascending=False)
#it appears that people using fsq have bias towards fast casual
#do not use fsq ratings, instead just use the bayes_yelp_rating

Unnamed: 0,yelp_id,name,categories,price,yelp_rating,fsq_rating,yelp_review_count,fsq_total_ratings,bayes_yelp_rating,bayes_fsq_rating,metric,lat,long,display_address,display_phone,website
88,mfRAMQGOdQk_v1CmjXq3Eg,Soho House West Hollywood,"[Middle Eastern, Cocktail Bars, Venues & Event...",$$$$,4.5,9.1,408,873.0,4.173791,8.961024,8.653073,34.089801,-118.392811,"[9200 Sunset Blvd, West Hollywood, CA 90069]",(310) 432-9200,http://www.sohohousewh.com
92,Su2ts0CfPazGJe6zWNb12A,Fresh Corn Grill,"[Pizza, Sandwiches, Salad]",$$,4.5,8.8,1100,234.0,4.369628,8.530342,8.619098,34.086900,-118.380350,"[8714 Santa Monica Blvd, West Hollywood, CA 90...",(310) 855-9592,http://www.freshcorngrill.com
32,5miNhLxuxaW5zLEchv6oPw,Dialog Cafe,"[Cafes, Breakfast & Brunch]",$$,4.5,8.7,1953,184.0,4.391599,8.379129,8.523384,34.090509,-118.382850,"[8766 Holloway Dr, West Hollywood, CA 90069]",(310) 289-1630,http://www.dialog-cafe.com
0,cal0Wpupxj9c_AV7WzDXsw,GRANVILLE,"[American (New), Cocktail Bars]",$$,4.5,8.5,2171,73.0,4.433676,8.096241,8.346069,34.077130,-118.380680,"[8701 Beverly Blvd, West Hollywood, CA 90048]",(424) 522-5161,http://www.granville.net/
104,QLXO-aOTh06CaA8_E9wt2w,Otus Thai Kitchen & Coffee,"[Thai, Coffee & Tea, Breakfast & Brunch]",$$,4.5,8.6,615,86.0,4.365808,8.193647,8.311534,34.093785,-118.344225,"[1253 N La Brea Ave, West Hollywood, CA 90038]",(323) 969-8611,https://www.otusthaikitchen.com/
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39,oVL1_eSh_gSOMKpDIkEg4Q,Coffee Coffee,"[Coffee & Tea, Breakfast & Brunch, Creperies]",$,4.0,8.0,165,36.0,3.483393,6.368078,5.100821,34.089808,-118.361236,"[1040 N Fairfax Ave, West Hollywood, CA 90046]",(323) 952-6590,http://www.coffeecoffeela.com
66,Qs_XdaqCGWU8CF5_Y_gKwA,Sushi Ginza Onodera,"[Sushi Bars, Japanese]",$$$$,4.5,8.2,176,12.0,3.968693,5.365808,5.074680,34.082126,-118.376655,"[609 North La Cienega Blvd, West Hollywood, CA...",(323) 792-7776,https://onodera-group.com/en/
36,UdY7cSva5qr15zy8c3t6fw,The Artist Tree Studio Dispensary Lounge,"[Cafes, Cannabis Dispensaries, Lounges]",,4.0,,25,,2.639879,7.484812,4.587724,34.087960,-118.379970,"[8625 Santa Monica Blvd, West Hollywood, CA 90...",,https://www.theartisttree.com/studio-cannabis-...
42,qNB9rcprlKr87SKC6RutJQ,Mechta Deli,[Delis],$,4.0,,33,,2.822555,7.093512,4.565357,34.090510,-118.357020,"[7712 Santa Monica Blvd, West Hollywood, CA 90...",(323) 654-2893,


In [157]:
top_rests = pd.DataFrame(rest_neigh_dict['West Hollywood'])
top_rests.sort_values(by=['bayes_yelp_rating'],axis=0,ascending=False)

Unnamed: 0,yelp_id,name,neighborhood,categories,price,yelp_rating,fsq_rating,yelp_review_count,fsq_total_ratings,bayes_yelp_rating,bayes_fsq_rating,metric,lat,long,display_address,display_phone,website
103,pl5_OLDDXMOdJneeTya48A,LOVE,[West Hollywood],"[Vegan, Vegetarian, Thai]",$$,5.0,,214,,4.459488,7.763852,8.095414,34.091105,-118.367844,"[8205 Santa Monica Blvd, Unit 5, 6, West Holl...",(323) 688-2065,http://www.loveinweho.com/
0,cal0Wpupxj9c_AV7WzDXsw,GRANVILLE,[West Hollywood],"[American (New), Cocktail Bars]",$$,4.5,8.5,2171,73.0,4.433676,8.096241,8.346069,34.077130,-118.380680,"[8701 Beverly Blvd, West Hollywood, CA 90048]",(424) 522-5161,http://www.granville.net/
2,1YedTUOBgR5XmJ1BMlv70Q,La Bohème,[West Hollywood],"[Bars, American (New), Venues & Event Spaces]",$$$,4.5,8.0,2249,89.0,4.432827,7.772212,8.055138,34.090363,-118.373489,"[8400 Santa Monica Blvd, West Hollywood, CA 90...",(323) 848-2360,http://www.globaldiningca.com/laboheme/
32,5miNhLxuxaW5zLEchv6oPw,Dialog Cafe,[West Hollywood],"[Cafes, Breakfast & Brunch]",$$,4.5,8.7,1953,184.0,4.391599,8.379129,8.523384,34.090509,-118.382850,"[8766 Holloway Dr, West Hollywood, CA 90069]",(310) 289-1630,http://www.dialog-cafe.com
55,_DiPHeKsR9Pzk-lrAiwv-Q,Pura Vita,[West Hollywood],"[Italian, Vegan, Wine Bars]",$$,4.5,8.7,715,15.0,4.375980,7.627332,7.823938,34.090440,-118.369670,"[8274 Santa Monica Blvd, West Hollywood, CA 90...",(323) 688-2303,http://www.puravitalosangeles.com/
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,xj7b4ueNQed23k7qHRIg-A,Birdies,[West Hollywood],"[Donuts, Breakfast & Brunch, Sandwiches]",,4.0,,88,,3.320805,7.497558,5.818477,34.090540,-118.361740,"[7900 Santa Monica Blvd, West Hollywood, CA 90...",(323) 848-7161,
99,lH5_ACeOSEOKdVMWeHnZcw,Tasty Donuts and Cafe,[West Hollywood],"[Donuts, Coffee & Tea, Sandwiches]",$,4.0,,171,,3.306074,6.943392,5.297136,34.091073,-118.367701,"[8205 Santa Monica Blvd, Ste 8, West Hollywood...",(323) 650-1325,https://www.groupon.com/biz/west-hollywood-ca/...
34,X9EPhqojom3ha6QLq5POCg,Citrus On Sunset,[West Hollywood],"[Cafes, Breakfast & Brunch, Pizza]",$,4.0,5.9,147,12.0,3.192965,3.946329,2.417722,34.089882,-118.392856,"[9200 Sunset Blvd, Ste 102, West Hollywood, CA...",(310) 273-6900,http://www.citrusonsunset.com
42,qNB9rcprlKr87SKC6RutJQ,Mechta Deli,[West Hollywood],[Delis],$,4.0,,33,,2.822555,7.093512,4.565357,34.090510,-118.357020,"[7712 Santa Monica Blvd, West Hollywood, CA 90...",(323) 654-2893,


In [175]:
#save the rest dicts:
with open(fpath + 'rest_neigh_dict.pkl','wb') as f:
    pickle.dump(rest_neigh_dict,f)

with open(fpath + 'rest_cuisine_dict.pkl','wb') as f:
    pickle.dump(rest_cuisine_dict,f)

with open(fpath + 'rest_price_dict.pkl','wb') as f:
    pickle.dump(rest_price_dict,f)

In [182]:
#create a dictionary with all cuisines by neighborhood
neigh_cuisines_dict = defaultdict(list)

cuisines = ['Acai Bowls',
            'Afghan',
            'African',
            'American (New)',
            'American (Traditional)',
            'Arabic',
            'Argentine',
            'Armenian',
            'Asian Fusion',
            'Australian',
            'Bagels',
            'Bakeries',
            'Bangladeshi',
            'Barbeque',
            'Bars',
            'Basque',
            'Beer Gardens',
            'Belgian',
            'Brasseries',
            'Brazilian',
            'Breakfast & Brunch',
            'Breweries',
            'Brewpubs',
            'British',
            'Bubble Tea',
            'Buffets',
            'Burgers',
            'Burmese',
            'Cafes',
            'Cajun/Creole',
            'Cambodian',
            'Cantonese',
            'Caribbean',
            'Cheesesteaks',
            'Chicken Shop',
            'Chicken Wings',
            'Chinese',
            'Cocktail Bars',
            'Coffee & Tea',
            'Coffee Roasteries',
            'Colombian',
            'Comfort Food',
            'Conveyor Belt Sushi',
            'Creperies',
            'Cuban',
            'Cupcakes',
            'Delis',
            'Desserts',
            'Dim Sum',
            'Diners',
            'Dinner Theater',
            'Dive Bars',
            'Donuts',
            'Empanadas',
            'Ethiopian',
            'Falafel',
            'Farmers Market',
            'Fast Food',
            'Filipino',
            'Fish & Chips',
            'Fondue',
            'Food Stands',
            'Food Trucks',
            'French',
            'Gastropubs',
            'Gelato',
            'Georgian',
            'German',
            'Gluten-Free',
            'Greek',
            'Hainan',
            'Halal',
            'Hawaiian',
            'Himalayan/Nepalese',
            'Honduran',
            'Hong Kong Style Cafe',
            'Hookah Bars',
            'Hot Dogs',
            'Hot Pot',
            'Ice Cream & Frozen Yogurt',
            'Indian',
            'Indonesian',
            'Irish',
            'Irish Pub',
            'Italian',
            'Izakaya',
            'Japanese',
            'Japanese Curry',
            'Juice Bars & Smoothies',
            'Karaoke',
            'Kebab',
            'Korean',
            'Kosher',
            'Laotian',
            'Latin American',
            'Lebanese',
            'Live/Raw Food',
            'Malaysian',
            'Mediterranean',
            'Mexican',
            'Middle Eastern',
            'Modern European',
            'Mongolian',
            'Moroccan',
            'New Mexican Cuisine',
            'Nicaraguan',
            'Noodles',
            'Pakistani',
            'Pan Asian',
            'Pancakes',
            'Pasta Shops',
            'Patisserie/Cake Shop',
            'Persian/Iranian',
            'Peruvian',
            'Piano Bars',
            'Pizza',
            'Poke',
            'Polish',
            'Polynesian',
            'Pop-Up Restaurants',
            'Portuguese',
            'Poutineries',
            'Pretzels',
            'Pubs',
            'Puerto Rican',
            'Ramen',
            'Russian',
            'Salad',
            'Salvadoran',
            'Sandwiches',
            'Sardinian',
            'Scandinavian',
            'Seafood',
            'Seafood Markets',
            'Shanghainese',
            'Shaved Ice',
            'Sicilian',
            'Singaporean',
            'Smokehouse',
            'Somali',
            'Soul Food',
            'Soup',
            'South African',
            'Southern',
            'Spanish',
            'Speakeasies',
            'Sports Bars',
            'Sri Lankan',
            'Steakhouses',
            'Street Vendors',
            'Supper Clubs',
            'Sushi Bars',
            'Syrian',
            'Szechuan',
            'Tacos',
            'Taiwanese',
            'Tapas Bars',
            'Tapas/Small Plates',
            'Tea Rooms',
            'Teppanyaki',
            'Tex-Mex',
            'Thai',
            'Themed Cafes',
            'Tiki Bars',
            'Trinidadian',
            'Turkish',
            'Tuscan',
            'Ukrainian',
            'Uzbek',
            'Vegan',
            'Vegetarian',
            'Venezuelan',
            'Vietnamese',
            'Waffles',
            'Whiskey Bars',
            'Wine Bars',
            'Wine Tasting Room',
            'Wineries',
            'Wraps']
neigh_cuisines_dict['All'] = cuisines

for neigh in rest_neigh_dict:
    rests = pd.DataFrame(rest_neigh_dict[neigh])
    for ind in rests.index:
        for cat in rests.loc[ind,'categories']:
            if cat in cuisines:
                if cat not in neigh_cuisines_dict[neigh]:
                    neigh_cuisines_dict[neigh].append(cat)
    neigh_cuisines_dict[neigh] = sorted(neigh_cuisines_dict[neigh])

neigh_cuisines_dict

defaultdict(list,
            {'All': ['Acai Bowls',
              'Afghan',
              'African',
              'American (New)',
              'American (Traditional)',
              'Arabic',
              'Argentine',
              'Armenian',
              'Asian Fusion',
              'Australian',
              'Bagels',
              'Bakeries',
              'Bangladeshi',
              'Barbeque',
              'Bars',
              'Basque',
              'Beer Gardens',
              'Belgian',
              'Brasseries',
              'Brazilian',
              'Breakfast & Brunch',
              'Breweries',
              'Brewpubs',
              'British',
              'Bubble Tea',
              'Buffets',
              'Burgers',
              'Burmese',
              'Cafes',
              'Cajun/Creole',
              'Cambodian',
              'Cantonese',
              'Caribbean',
              'Cheesesteaks',
              'Chicken Shop',
              'Chic

In [183]:
#save the neigh_cuisines_dict
with open(fpath + 'neigh_cuisines_dict.pkl','wb') as f:
    pickle.dump(neigh_cuisines_dict,f)

In [190]:
#create a dictionary with all prices by neighborhood
neigh_prices_dict = defaultdict(list)

prices = ['$','$$','$$$','$$$$']
neigh_prices_dict['All'] = prices

for neigh in rest_neigh_dict:
    rests = pd.DataFrame(rest_neigh_dict[neigh])
    for ind in rests.index:
        if rests.loc[ind,'price'] in prices:
            if rests.loc[ind,'price'] not in neigh_prices_dict[neigh]:
                neigh_prices_dict[neigh].append(rests.loc[ind,'price'])
    neigh_prices_dict[neigh] = sorted(neigh_prices_dict[neigh])

neigh_prices_dict

defaultdict(list,
            {'All': ['$', '$$', '$$$', '$$$$'],
             'Northridge': ['$', '$$', '$$$'],
             'Mid-Wilshire': ['$', '$$'],
             'Boyle Heights': ['$', '$$', '$$$'],
             'Van Nuys': ['$', '$$'],
             'Hollywood': ['$', '$$', '$$$', '$$$$'],
             'Inglewood': ['$', '$$'],
             'Westlake': ['$', '$$', '$$$', '$$$$'],
             'Culver City': ['$', '$$', '$$$', '$$$$'],
             'Pasadena': ['$', '$$', '$$$', '$$$$'],
             'Sherman Oaks': ['$', '$$', '$$$', '$$$$'],
             'Toluca Lake': ['$', '$$', '$$$'],
             'Downtown': ['$', '$$', '$$$', '$$$$'],
             'Arlington Heights': ['$', '$$', '$$$'],
             'Beverly Hills': ['$', '$$', '$$$', '$$$$'],
             'Glendale': ['$', '$$', '$$$', '$$$$'],
             'East Hollywood': ['$', '$$', '$$$'],
             'Larchmont': ['$', '$$', '$$$', '$$$$'],
             'Studio City': ['$', '$$', '$$$', '$$$$'],
             'Shad

In [191]:
#save the neigh_prices_dict
with open(fpath + 'neigh_prices_dict.pkl','wb') as f:
    pickle.dump(neigh_prices_dict,f)

In [192]:
#create a dictionary with all prices by cuisine
cuisine_prices_dict = defaultdict(list)

prices = ['$','$$','$$$','$$$$']
cuisine_prices_dict['All'] = prices

for cuisine in neigh_cuisines_dict['All']:
    rests = df_rests_selected
    for ind in rests.index:
        for cat in rests.loc[ind,'yelp_categories']:
            if cat in cuisine and rests.loc[ind,'yelp_price'] in prices:
                if rests.loc[ind,'yelp_price'] not in cuisine_prices_dict[cuisine]:
                    cuisine_prices_dict[cuisine].append(rests.loc[ind,'yelp_price'])
    cuisine_prices_dict[cuisine] = sorted(cuisine_prices_dict[cuisine])

cuisine_prices_dict

defaultdict(list,
            {'All': ['$', '$$', '$$$', '$$$$'],
             'Acai Bowls': ['$', '$$'],
             'Afghan': ['$$'],
             'African': ['$$'],
             'American (New)': ['$', '$$', '$$$', '$$$$'],
             'American (Traditional)': ['$', '$$', '$$$', '$$$$'],
             'Arabic': ['$$'],
             'Argentine': ['$', '$$', '$$$'],
             'Armenian': ['$', '$$', '$$$'],
             'Asian Fusion': ['$', '$$', '$$$', '$$$$'],
             'Australian': ['$$'],
             'Bagels': ['$', '$$'],
             'Bakeries': ['$', '$$', '$$$'],
             'Bangladeshi': ['$', '$$'],
             'Barbeque': ['$', '$$', '$$$', '$$$$'],
             'Bars': ['$$', '$$$', '$$$$'],
             'Basque': ['$$', '$$$'],
             'Beer Gardens': ['$', '$$'],
             'Belgian': ['$$'],
             'Brasseries': ['$$$', '$$$$'],
             'Brazilian': ['$', '$$', '$$$', '$$$$'],
             'Breakfast & Brunch': ['$', '$$', '$$$', '$$$$'],

In [193]:
#save the cuisine_prices_dict
with open(fpath + 'cuisine_prices_dict.pkl','wb') as f:
    pickle.dump(cuisine_prices_dict,f)

In [196]:
#create a dictionary with all prices by neigh & cuisine choice
neigh_cuisine_prices_dict = defaultdict(list)

prices = ['$','$$','$$$','$$$$']
neigh_cuisine_prices_dict['All'] = prices

for neigh in rest_neigh_dict:
    rests = pd.DataFrame(rest_neigh_dict[neigh])
    for ind in rests.index:
        for cat in rests.loc[ind,'categories']:
            if cat in neigh_cuisines_dict['All'] and rests.loc[ind,'price'] in prices:
                if rests.loc[ind,'price'] not in neigh_cuisine_prices_dict[neigh + '_' + cat]:
                    neigh_cuisine_prices_dict[neigh + '_' + cat].append(rests.loc[ind,'price'])
#sort:
for key in neigh_cuisine_prices_dict:
    neigh_cuisine_prices_dict[key] = sorted(neigh_cuisine_prices_dict[key])

neigh_cuisine_prices_dict

defaultdict(list,
            {'All': ['$', '$$', '$$$', '$$$$'],
             'Northridge_Halal': ['$', '$$'],
             'Northridge_Middle Eastern': ['$$'],
             'Northridge_Afghan': ['$$'],
             'Northridge_Burgers': ['$', '$$'],
             'Northridge_American (Traditional)': ['$', '$$'],
             'Northridge_Gastropubs': ['$$'],
             'Northridge_Bars': ['$$'],
             'Northridge_Sports Bars': ['$$'],
             'Northridge_Asian Fusion': ['$', '$$', '$$$'],
             'Northridge_Ramen': ['$$'],
             'Northridge_Fast Food': ['$', '$$'],
             'Northridge_Sandwiches': ['$', '$$'],
             'Northridge_Caribbean': ['$$'],
             'Northridge_Latin American': ['$$'],
             'Northridge_Desserts': ['$', '$$'],
             'Northridge_Comfort Food': ['$'],
             'Northridge_Bakeries': ['$', '$$'],
             'Northridge_Delis': ['$', '$$'],
             'Northridge_Food Trucks': ['$', '$$'],
            

In [197]:
#save the neigh_cuisine_prices_dict
with open(fpath + 'neigh_cuisine_prices_dict.pkl','wb') as f:
    pickle.dump(neigh_cuisine_prices_dict,f)