# Plotly Figures for the Website:

This document contains the plotly figures used for the part of the project that investigates the influence of economic factors on the rating of beers. <br>
We did not comment the code extensively, as the graphs contain the same information as the ones in the notebook. <br>
It is just for completeness.

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import datetime

In [2]:
df_US_reviews_dates = pd.read_csv('../DataframeStorage/df_US_reviews.csv', usecols = ['beer_name','text','date','user_location', 'month_year', 'year'])


In [122]:
with open("../DataframeStorage/index_price.bin", "rb") as data:
    index_price = pickle.load(data)

In [169]:
df_price_related = df_US_reviews_dates.loc[index_price]
df_price_related = df_price_related.sort_values('date', ascending=True)

monthly_counts_price_reviews = df_price_related.groupby('month_year').count().reset_index()

#Total number of reviews of US users
#We remove all month that have lower counts than 50 
monthly_counts_US_reviews = df_US_reviews_dates.groupby('month_year').count().reset_index()
monthly_counts_US_reviews = monthly_counts_US_reviews[monthly_counts_US_reviews['beer_name']>50]

#Define a dataframe that contains the percentage of reviews mentionning price per month:
#First we extract the data of the monthly counts and the corresponding month/years.
price_count = monthly_counts_price_reviews[['month_year', 'beer_name']]
review_count = monthly_counts_US_reviews[['month_year', 'beer_name']]

#The two counts are merged over the dates and the columns are renamed. Subsequently the ratio is calculated and stored in a new column.
df_ratio = price_count.merge(review_count, on='month_year')
df_ratio.rename(columns = {'beer_name_x':'price_related', 'beer_name_y':'all_reviews'}, inplace = True)
df_ratio['ratio'] = (df_ratio['price_related']/df_ratio['all_reviews']*100).round(2)

#Define ticks:
major_ticks = np.linspace(9,201,17).astype(int)
minor_ticks = np.asarray(range(208))
minor_ticks = [i for i in minor_ticks if i not in major_ticks]

--------------------------------------------------------------

# Figure 1

In [376]:
monthly_counts_price_reviews['date'] = pd.to_datetime(monthly_counts_price_reviews['month_year'], format="%Y-%m")
monthly_counts_US_reviews['date'] = pd.to_datetime(monthly_counts_US_reviews['month_year'], format="%Y-%m")
cutoff = datetime.datetime(2000,4,1)
monthly_counts_price_reviews = monthly_counts_price_reviews[monthly_counts_price_reviews['date']>cutoff]
monthly_counts_US_reviews = monthly_counts_US_reviews[monthly_counts_US_reviews['date']>cutoff]

In [379]:
#Plot how many price mentioning results we have:
fig_1 = px.bar(monthly_counts_price_reviews, x='month_year', y='beer_name', log_y=True)
fig_1.update_traces(marker_color='green')

#Plot the total number of reviews
fig_2 = px.bar(monthly_counts_US_reviews, x='month_year', y='beer_name', log_y=True)
fig_2.update_traces(marker_color='red')

#Put them into the same figure
fig2 = go.Figure(data = fig_1.data + fig_2.data)
#For better visualization a logarithmic axis is used.
fig2.update_yaxes(
    type="log",
    dtick=2)
fig2.update_xaxes(ticks="outside",)
fig2.update_layout(
    xaxis_title="Date", yaxis_title="Count",
    title= "Number of reviews mentioning price compared to total count of ratings",
    #position title in the center
    title_x=0.5,
)


'''
fig2.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            #If buttons are arranged on top or beside
            direction="down",
            #set positions
            x=-0.08,
            y= 1.12,
            buttons=list([
                dict(
                    label="Reviews mentioning <br> price",
                    method="restyle",
                    visible=True,
                    args=[{"visible": False},{"title": "Reviews mentioning price",}, [0]], 
                    args2=[{"visible": True}, {"title": "Reviews mentioning price",}, [0]],      
                    ),
                dict(
                    label="Total reviews",
                    method="restyle",
                    visible=True,
                    args=[{"visible": False}, [1]],
                    args2=[{"visible": True}, {"title": "Total number of reviews",}, [1]],       
                    ),

                dict(
                    label="Both",
                    method="restyle",
                    visible=True,
                    args=[{"visible": False}, [0,1]], 
                    args2=[{"visible": True}, {"title": "Number of reviews mentioning price compared to total count of ratings",}, [0,1]],
                    ),
            ]),
        )
    ])
'''"plots for html"
fig2.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            #If buttons are arranged on top or beside
            direction="down",
            #set positions
            x=-0.08,
            y= 1.12,
            buttons=list([
                dict(
                    label="Price-mentioning",
                    method="update",
                    args=[{"visible": [True, False]},
                            {"title": "Number of reviews mentioning price related words"}]),
                dict(label="Total",
                    method="update",
                    args=[{"visible": [False, True]},
                            {"title": "Total number of reviews",
                            }]),
                dict(label="Both",
                    method="update",
                    args=[{"visible": [True, True]},
                            {"title": "Number of reviews mentioning price compared to total count of ratings",
                            }]),
            ]),
        )
    ])

fig2.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
)
fig2.show()

In [380]:
fig2.write_html("./plots for html/figure1.html")

# Figure 2:

In [381]:
fig = px.bar(df_ratio, x='month_year', y='ratio')
fig.update_traces(marker_color='blue')

fig.update_layout(
    #set title for x and y axis
    title = "Ratio of reviews that mention at least one price related word:",
    xaxis_title="Date", 
    yaxis_title="Percent of reviews mentionning price",
    #set margins
    margin=dict(l=30, r=190),
    #position title in the center
    title_x=0.5
)

fig.update_xaxes(
    #show x ticks
    ticks="outside",
)

#add the graph connecting the bars
fig.add_trace(go.Scatter(
    x=df_ratio['month_year'],
    y=df_ratio['ratio'],
    name="Graph connecting <br> the bars",
    line=dict(color="#f6111c"),
))

#Add the buttons to chose whether you want to see the graph
fig.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            #If buttons are arranged on top or beside
            direction="down",
            #set positions
            x=-0.08,
            y= 1.03,
            buttons=list([
                dict(
                    label="Graph",
                    method="update",
                    args=[{"visible": [True, True]}]),
                dict(label="Hide <br> Graph",
                    method="update",
                    args=[{"visible": [True, False]}]),
            ]),
        )
    ])

fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
)

fig.show()

In [382]:
fig.write_html("./plots for html/figure2.html")

In [187]:
'''#Change xticks 
datelist = df_ratio['month_year'][0::12]
fig.update_xaxes(tickangle=30,
                 tickmode = 'array',
                 tickvals = df_ratio['month_year'][0::24],
                 ticktext= datelist)

#change size and margins
fig.update_layout(
    autosize=False,
    width=900,
    height=500,
    margin=dict(l=60, r=20, t=40, b=20))

fig.show()'''

"#Change xticks \ndatelist = df_ratio['month_year'][0::12]\nfig.update_xaxes(tickangle=30,\n                 tickmode = 'array',\n                 tickvals = df_ratio['month_year'][0::24],\n                 ticktext= datelist)\n\n#change size and margins\nfig.update_layout(\n    autosize=False,\n    width=900,\n    height=500,\n    margin=dict(l=60, r=20, t=40, b=20))\n\nfig.show()"

## Add interpolation

In [383]:
fig_1a = go.Figure(fig)

In [230]:
poly = np.polyfit(df_ratio.index,df_ratio['ratio'],14)
poly_y = np.poly1d(poly)(df_ratio.index)
poly_y_series = pd.Series(poly_y)
df_ratio['interpolate'] = poly_y_series

In [384]:
#add the graph connecting the bars
fig_1a.add_trace(go.Scatter(
    x=df_ratio['month_year'],
    y=df_ratio['interpolate'],
    name="Interpolation <br> degree 14",
    line=dict(color="#05e80d"),
))

fig_1a.show()

In [385]:
#Add the buttons to chose whether you want to see the graph
fig_1a.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            #If buttons are arranged on top or beside
            direction="down",
            #set positions
            x=-0.08,
            y= 1.03,
            buttons=list([
                dict(
                    label="Bar Plot",
                    method="restyle",
                    visible=True,
                    args=[{'visible':False}, [0]],
                    args2=[{"visible": True}, [0]],
                ),
                    
                dict(label="Graph <br>connecting bars",
                    method="restyle",
                    visible=True,
                    args=[{"visible": False}, [1]],
                    args2=[{"visible": True}, [1]],
                ),
                dict(label="Interpolation",
                    method="restyle",
                    visible=True,
                    args=[{"visible": False}, [2]],
                    args2=[{"visible": True}, [2]],
                ),
            ]),
        )
    ])
fig_1a.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            #If buttons are arranged on top or beside
            direction="down",
            #set positions
            x=-0.08,
            y= 1.03,
            buttons=list([
                dict(
                    label="Bar Plot",
                    method="restyle",
                    visible=True,
                    args=[{'visible':False}, [0]],
                    args2=[{"visible": True}, [0]],
                ),
                    
                dict(label="Graph <br>connecting bars",
                    method="restyle",
                    visible=True,
                    args=[{"visible": False}, [1]],
                    args2=[{"visible": True}, [1]],
                ),
                dict(label="Interpolation",
                    method="restyle",
                    visible=True,
                    args=[{"visible": False}, [2]],
                    args2=[{"visible": True}, [2]],
                ),
            ]),
        )
    ])
fig_1a.update_yaxes(
    range =[0,7],
)

In [386]:
fig_1a.write_html("./plots for html/figure3.html")

## Derivative of interpolation:

In [387]:
fig_1b = go.Figure(fig_1a)

In [192]:
def derivative(f):
    length = len(f) - 1
    d_f = []
    for index, a in enumerate(f[:-1]):
        power = length - index
        d_f.append(power * a) 
    return np.asarray(d_f)

In [193]:
d_f = derivative(poly)
d_f_poly_y = np.poly1d(d_f)(df_ratio.index)
d_f_poly_y_series = pd.Series(d_f_poly_y)
df_ratio['derivative'] = d_f_poly_y_series

In [204]:
derivative_ = df_ratio['derivative']
changes_in_sign_positions = []
sign = 0

for index, value in enumerate(derivative_):
    if (sign == 0):
        changes_in_sign_positions.append(index)
        if(value < 0):
            sign = -1
        if(value > 0):
            sign = 1
    if ((sign != -1) & (value < 0)):
        sign = -1
        changes_in_sign_positions.append(index-1)
    if ((sign != 1) & (value > 0)):
        sign = 1
        changes_in_sign_positions.append(index-1)
        
changes_in_sign_positions.append(len(derivative_)-1)

In [300]:
x_positions = df_ratio.iloc[changes_in_sign_positions]['month_year']
x_positions.reset_index(inplace=True, drop=True)
x_positions

0     2000-04
1     2000-07
2     2001-07
3     2003-02
4     2005-12
5     2008-07
6     2011-03
7     2016-01
8     2016-07
9     2017-06
10    2017-07
Name: month_year, dtype: object

In [388]:
colors=['red','green']
annotations=['','','decline', 'increase']
annotation_positions = ["top right", "top right"]
for i in range(len(x_positions)-1):
    fig_1b.add_vrect(x0=str(x_positions[i]), x1=str(x_positions[i+1]),
              annotation_text=annotations[i%4], annotation_position=annotation_positions[0],
              fillcolor=colors[i%2], opacity=0.25, line_width=0)
    annotations[i%4] = ''

fig_1b.show()

In [389]:
fig_1b.write_html("./plots for html/figure4.html")

## Add the p-values

In [390]:
fig_1c = go.Figure(fig_1a)

In [270]:
p_values = pd.read_csv("../DataframeStorage/p_values_chi_squares.csv")

In [325]:
p_values_series = p_values['p_value']
changes_in_significance_positions = []
sign = 0

#store all the positions where the significance level changes:
for index, value in enumerate(p_values_series):
    if (sign == 0):
        changes_in_significance_positions.append(index)
        if(value < 0.05):
            sign = 1
        if(value > 0.05):
            sign = -1
    if ((sign != -1) & (value > 0.05)):
        sign = -1
        changes_in_significance_positions.append(index-1)
    if ((sign != 1) & (value < 0.05)):
        sign = 1
        changes_in_significance_positions.append(index-1)


#add the last index to be able to color the last part too
changes_in_significance_positions.append(len(p_values_series+1)-1)
changes_in_significance_positions.pop(0)

changes_in_significance_positions = [i * 6 for i in changes_in_significance_positions]

x_positions_2 = df_ratio.iloc[changes_in_significance_positions]['month_year']
x_positions_2.reset_index(inplace=True, drop=True)

0

In [391]:
colors=['green','red']
annotations=['','','','','','','','','','','significant']
annotation_positions = ["top right", "top right"]
opacities = [0.25, 0]
for i in range(len(x_positions_2)-1):
    fig_1c.add_vrect(x0=str(x_positions_2[i]), x1=str(x_positions_2[i+1]),
              annotation_text=annotations[i%11], annotation_position=annotation_positions[0],
              fillcolor=colors[i%2], opacity=opacities[i%2], line_width=0)
    annotations[i%4] = ''

fig_1c.show()

In [392]:
fig_1c.write_html("./plots for html/figure5.html")

## Add unemployment rate:

In [393]:
fig_1d = go.Figure(fig_1a)

In [340]:
df_ratio_MS = pd.read_csv("../DataframeStorage/df_ratio.csv")

In [342]:
derivative_unemp = df_ratio_MS['derivative_unemployment']
changes_in_sign_positions_unemp = []
sign = 0

#store all the positions at which the sign of the derivative changes.
for index, value in enumerate(derivative_unemp):
    if (sign == 0):
        changes_in_sign_positions_unemp.append(index)
        if(value < 0):
            sign = -1
        if(value > 0):
            sign = 1
    if ((sign != -1) & (value < 0)):
        sign = -1
        changes_in_sign_positions_unemp.append(index-1)
    if ((sign != 1) & (value > 0)):
        sign = 1
        changes_in_sign_positions_unemp.append(index-1)


#add the last index to be able to color the last part too
changes_in_sign_positions_unemp.append(len(derivative_unemp))

In [394]:
fig_1d = go.Figure(fig_1a)
fig_1d.add_trace(go.Scatter(
    x=df_ratio_MS['month_year'],
    y=df_ratio_MS['interpolate_unemployment'],
    name="unemployment <br> rate",
    line=dict(color="#101211"),
))
fig_1d.update_layout(
    autosize=False,
    width=1000,
    height=600,)
fig_1d.update_yaxes(
    range =[0,10],
)

fig_1d.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            #If buttons are arranged on top or beside
            direction="down",
            #set positions
            x=-0.08,
            y= 1.03,
            buttons=list([
                dict(
                    label="Bar Plot",
                    method="restyle",
                    visible=True,
                    args=[{'visible':False}, [0]],
                    args2=[{"visible": True}, [0]],
                ),
                    
                dict(label="Graph <br>connecting bars",
                    method="restyle",
                    visible=True,
                    args=[{"visible": False}, [1]],
                    args2=[{"visible": True}, [1]],
                ),
                dict(label="Interpolation",
                    method="restyle",
                    visible=True,
                    args=[{"visible": False}, [2]],
                    args2=[{"visible": True}, [2]],
                ),
                dict(label="Unemployment",
                    method="restyle",
                    visible=True,
                    args=[{"visible": False}, [3]],
                    args2=[{"visible": True}, [3]],
                ),
            ]),
        )
    ])

fig_1d.show()

In [395]:
fig_1e = go.Figure(fig_1d)

colors=['red','green']
annotations=['','','decline', 'increase']
for i in range(len(x_positions)-1):
    fig_1e.add_vrect(x0=str(x_positions[i]), x1=str(x_positions[i+1]), y0=0, y1= 0.45,
              fillcolor=colors[i%2], opacity=0.4, line_width=0)
    annotations[i%4] = ''

fig_1e.show()

In [360]:
derivative_unemp = df_ratio_MS['derivative_unemployment']
changes_in_sign_positions_unemp = []
sign = 0

#store all the positions at which the sign of the derivative changes.
for index, value in enumerate(derivative_unemp):
    if (sign == 0):
        changes_in_sign_positions_unemp.append(index)
        if(value < 0):
            sign = -1
        if(value > 0):
            sign = 1
    if ((sign != -1) & (value < 0)):
        sign = -1
        changes_in_sign_positions_unemp.append(index-1)
    if ((sign != 1) & (value > 0)):
        sign = 1
        changes_in_sign_positions_unemp.append(index-1)


#add the last index to be able to color the last part too
changes_in_sign_positions_unemp.append(len(derivative_unemp)-1)

x_positions_3 = df_ratio.iloc[changes_in_sign_positions_unemp]['month_year']
x_positions_3.reset_index(inplace=True, drop=True)

In [396]:
fig_1f = go.Figure(fig_1e)

colors=['green','red']
annotations=['','','increase', 'decline']
annotation_positions = ["top right", "top right"]
for i in range(len(x_positions_3)-1):
    fig_1f.add_vrect(x0=str(x_positions_3[i]), x1=str(x_positions_3[i+1]), y0=0.48, y1= 1,
              annotation_text=annotations[i%4], annotation_position=annotation_positions[0],
              fillcolor=colors[i%2], opacity=0.25, line_width=0)
    annotations[i%4] = ''

fig_1f.show()

In [397]:
fig_1f.write_html("./plots for html/figure6.html")