# Seasonal Number of Ratings

In [None]:
# Loading Libraries

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output
import plotly.graph_objs as go
import plotly.io as pio
from src.utils.jeanneHelper import JeanneHelper
helper = JeanneHelper()
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.express as px

pio.renderers.default = 'notebook'



In [4]:
# Load data
df = pd.read_csv('data/beerAdvocateReviewsUSA.csv')

df.head()

Unnamed: 0.1,Unnamed: 0,rating,appearance,aroma,palate,taste,overall,country_name,country_code3,state,date_object,month,year,style,beer_name,beer_id
0,0,2.88,3.25,2.75,3.25,2.75,3.0,United States,USA,Washington,2015-08-20 12:00:00,8,2015,Euro Pale Lager,Régab,142544
1,1,3.67,3.0,3.5,3.5,4.0,3.5,United States,USA,New York,2009-02-20 12:00:00,2,2009,English Pale Ale,Barelegs Brew,19590
2,5,3.91,4.25,4.5,3.25,3.75,3.75,United States,USA,North Carolina,2013-02-13 12:00:00,2,2013,English Pale Ale,Legbiter,19827
3,6,3.64,4.0,3.75,3.5,3.5,3.75,United States,USA,New York,2013-01-09 12:00:00,1,2013,English Pale Ale,Legbiter,19827
4,7,2.77,3.0,3.5,2.5,2.5,2.5,United States,USA,Illinois,2012-11-17 12:00:00,11,2012,English Pale Ale,Legbiter,19827


## 0 - Intro

We define the a beer style as seasonal in its number of ratings if its share of the total number of beers reviewed has a high variability across the months.

TODO: maybe do statisticall test to say that there is a significant difference?

In [5]:
top_styles_per_month_percentage = helper.top_k_beer_styles_percentage(df = df, k = 10, group_by = 'month')

unique_styles = top_styles_per_month_percentage['style'].unique()
palette = sns.color_palette("tab20", len(unique_styles))
style_colors = dict(zip(unique_styles, palette))

In [8]:
top_styles_per_month_percentage = helper.top_k_beer_styles_percentage(df = df, k = 10, group_by = 'month')
top_styles_per_month_percentage.head()

Unnamed: 0,month,style,rating_count,total_ratings,percentage
0,1,American IPA,18626,207511,8.97591
1,1,American Double / Imperial IPA,14424,207511,6.950957
2,1,American Double / Imperial Stout,11703,207511,5.639701
3,1,Russian Imperial Stout,8187,207511,3.945333
4,1,American Pale Ale (APA),7838,207511,3.777149


In [62]:
top_styles_per_month_percentage_sorted = top_styles_per_month_percentage.sort_values(by=['month', 'percentage'], ascending=[True, False])

unique_styles = top_styles_per_month_percentage['style'].unique()
palette = sns.color_palette("tab20", len(unique_styles)).as_hex()
style_colors = dict(zip(unique_styles, palette))

# Uncomment the category_orders dictionary
category_orders = {
    'month': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
    #'style': top_styles_list
}

frames = [go.Frame(
    data=[go.Bar(
        x=top_styles_per_month_percentage_sorted[top_styles_per_month_percentage_sorted['month'] == month]['percentage'],
        y=top_styles_per_month_percentage_sorted[top_styles_per_month_percentage_sorted['month'] == month]['style'],
        orientation='h',
        marker_color=[style_colors[x] for x in top_styles_per_month_percentage_sorted[top_styles_per_month_percentage_sorted['month'] == month]['style'].to_list()],
    )],
    name=str(month),
) for month in category_orders['month']]

fig = go.Figure(
    data=[go.Bar(
        x=top_styles_per_month_percentage_sorted[top_styles_per_month_percentage_sorted['month'] == 1]['percentage'],
        y=top_styles_per_month_percentage_sorted[top_styles_per_month_percentage_sorted['month'] == 1]['style'],
        orientation='h',
        marker_color=[style_colors[x] for x in top_styles_per_month_percentage_sorted[top_styles_per_month_percentage_sorted['month'] == 1]['style'].to_list()],
    )],
    layout=go.Layout(
        title="Top Beer Styles by Percentage of Reviews per Month",
        xaxis=dict(title="Percentage of Ratings"),
        yaxis=dict(title="Beer Style", automargin=True),
        updatemenus=[dict(
            type="buttons",
            buttons=[dict(label="Play",
                          method="animate",
                          args=[None, dict(frame=dict(duration=1, redraw=True), fromcurrent=True)])],
            x=1.15,  # Position the button on the top right
            xanchor='right',
            y=1.15,
            yanchor='top'
        )],
        sliders=[{
            'steps': [
                {'args': [
                    [str(month)],
                    {'frame': {'duration': 1, 'redraw': True}, 'mode': 'immediate'}
                ],
                'label': str(month),
                'method': 'animate'} for month in category_orders['month']
            ],
            'transition': {'duration': 1},
            'x': 0.1,
            'len': 0.9,
            'currentvalue': {
                'prefix': 'month: ',
                'font': {'size': 20, 'color': '#666'}
            }
        }]
    ),
    frames=frames
)

fig.write_html("illustrations/Jeanne/top_styles_per_month_percentage.html")

### Analysis
In the plot below, we can observe the top 10 beer styles the most frequently reviewed for each month, as the percentage of the total ratings of that specific month. 

We can observe that certain beer styles appear only during specific periods, such as Pumpkin Ale in the fall, Fruit and Vegetable beers in late spring and summer, Märzen/Oktoberfest beers in September and October, and Winter Warmer in December.   

Other beer styles appear to maintain consistent popularity throughout the year. American IPA is the most rated beer type each month, with American Double/Imperial IPA consistently in second place. Saison/Farmhouse Ale tends to occupy a middle position, while American Amber/Red Ale often ranks near the bottom of the set.  

## 1 - Define seasonal beer styles in Nr of ratings

TODO: use jakob metrics

## 2 - Apply metrics

TODO: define new set of beers by removing seasonal beers, use Jakobs metrics on new set of beer to see if seasonality flattened.