In [6]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')


# Best-Value Restaurants in Berlin

There are so many good restaurants in Berlin , but it is sometimes hard to find both cheap and good places. It turns out that the Yelp database might help. I looked at 5,000 restaurants in Berlin and would like to share the results.

**Dark green** = Highest value restaurants (Highest rating, lowest prices and many reviews)

**Dark red** = Lowest value restaurants (Lower rating, higher prices and fewer reviews)

**Bubble size** = Relative number of reviews on Yelp
<br><br>


*Note: The following weights apply:*

*45% Raing, 36% Price & 19% Number of reviews*

In [7]:
import gmaps
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

%matplotlib inline

gmaps.configure(api_key="")

df = pd.read_csv("BER_restaurants.csv")
df = df.drop_duplicates(subset = "id")

In [8]:
# Scale rating and price to same range (1-4)
rating = df["rating"]
price = df["price"]

rating = ((rating - rating.min(axis=0)) / (rating.max(axis=0) - rating.min(axis=0)))* (4. - 1.) + 1. #scaled
price = ((price - price.min(axis=0)) / (price.max(axis=0) - price.min(axis=0)))* (4. - 1.) + 1. #scaled


#Review counts for plotting
df["true_review_count"] = df["review_count"]
df["log_true_review_count"],review_count = np.log(df["true_review_count"]),np.log(df["true_review_count"])
df["scaled_true_review_count"] = ((df["true_review_count"] - df["true_review_count"].min(axis=0)) / 
                                  (df["true_review_count"].max(axis=0) - df["true_review_count"].min(axis=0)))* (8. - 4.) + 4. #scaled

log_count = df["log_true_review_count"]
df["scaled_log_true_review_count"] = ((log_count - log_count.min(axis=0)) / 
                                  (log_count.max(axis=0) - log_count.min(axis=0)))* (4. - 1.) + 1. #scaled


#Calculate Value (Gwichtung: 40% = Preis, 60% = Qualitaet)
rat_weig = 0.6
price_weig = 0.4
df["value"] = rat_weig*rating-(price_weig*price)

#Calculate hype-value

rat_weig = 0.5
price_weig = 0.4
hype_weig = 0.2
df["hype_value"] = rat_weig*rating-(price_weig*price)+hype_weig*df["scaled_log_true_review_count"]

In [9]:
##########################################
# Remove all that have less than 50 reviews and Sort by
##########################################

df = df[df["true_review_count"]>=50] # This removes all 5 star restaurants 


df.sort_values(["hype_value","true_review_count"], axis=0, ascending=[False, False], inplace=True) # Sort data by value & Reviews counts

df = df.reset_index(drop = True)

#Reference vars
num_display = 500 ###### <<<----- Display Top X restaurants

locations = df.loc[0:num_display,["lat", "long"]]
name = df.loc[0:num_display,["id"]]
value = df.loc[0:num_display,["value"]]
hype_value = df.loc[0:num_display,["hype_value"]]
size = df["scaled_true_review_count"].astype(int).loc[0:num_display] ###### <<<----- bubble size by parameter

In [10]:
print ("Number of top restaurants displayed on the map: ", num_display, "(all with at least 50 reviews)")

Number of top restaurants displayed on the map:  500 (all with at least 50 reviews)


In [11]:
import matplotlib.cm as cm
import matplotlib 

def return_colors(data_series):
    minima = data_series.min()
    maxima = data_series.max()

    norm = matplotlib.colors.Normalize(vmin=minima, vmax=maxima, clip=True)
    mapper = cm.ScalarMappable(norm=norm, cmap=cm.RdYlGn)
    
    colors = []
    for v,i in data_series.iterrows():
        colors.append(matplotlib.colors.rgb2hex(mapper.to_rgba(i.values[0])))
    
    return colors

In [12]:
import sys  

#reload(sys)  
#sys.setdefaultencoding('utf8')

colors = return_colors(hype_value) ####### <<<----- Color by parameter
name2 = [i.values[0] for e,i in name.iterrows()]

fig = gmaps.figure()
symbols = gmaps.symbol_layer(locations, 
                            fill_color=colors,
                            fill_opacity=0.6,
                            stroke_opacity=0.6,
                            stroke_color=colors,
                            hover_text=name2,
                            info_box_content=name2,
                            display_info_box = True,
                            scale = size.tolist())
fig.add_layer(symbols)
fig