# Load libraries

In [1]:
import src.utils.seasonalityMetrics as sm # calls helper file

import pandas as pd
import numpy as np
from statsmodels.tsa.seasonal import STL

from plotly.subplots import make_subplots
import plotly.graph_objects as go


## data preprocessing functions

In [2]:

df_ratings = pd.read_csv(r"src/data/beerAdvocateReviewsUSA.csv") # df_ratings needs to contain 'rating', 'month', and 'year' columns.

In [3]:
df_processed_mean = sm.data_preprocessing_mean_rating_per_month(df_ratings, 'mean_rating') # evaluates based on average of rating score
df_processed_number = sm.data_preprocessing_number_of_ratings_per_month(df_ratings) # based on number of ratings in a month

## plot and report

note that since the FFT is discrete, we dont have a value for frequency 0.083, 
but only 0.085. This for a year-periodic signal it will give 11.75 months as main period, instead of 12.


In [None]:
# plot 
plot = sm.seasonality_report_plot(df_processed_mean, "Seasonality Report - Mean Ratings of all Beers")
plot.show()

      
# metrics

peak_ratio, avg_amplitude = sm.timeseries_seasonality_metric(df_processed_mean)


print(f"peak_ratio: {peak_ratio}.      avg_amplitude: {avg_amplitude}")

(188, 1)


peak_ratio: 3.7528.      avg_amplitude: 0.0276


## test on filtered dataset

In [6]:

df_pale_ales = df_ratings[
    (df_ratings['style'] == 'English Pale Ale')
]

df_pale_ales.head(100)
df_processed_pale_ale = sm.data_preprocessing_mean_rating_per_month(df_pale_ales, 'mean_rating')
sm.seasonality_report_plot(df_processed_pale_ale)

(187, 1)


# Metric Function


input timeseries with dates (either ratings or numbers of ratings)
gives seasonality scores

- fourier transform 12 month peak ratio to second highest peak
- seasonality amplitude above a certain threshold


In [7]:

df_ratings = pd.read_csv(r"src\data\beerAdvocateReviews.csv") # df_ratings needs to contain 'rating', 'month', and 'year' columns.

timeseries_data  = sm.data_preprocessing_mean_rating_per_month(df_ratings, 'mean_rating')
timeseries_data.head()

FileNotFoundError: [Errno 2] No such file or directory: 'src\\data\\beerAdvocateReviews.csv'

In [8]:
peak_ratio, avg_amplitude = sm.timeseries_seasonality_metric(timeseries_data)

print(f"peak_ratio: {peak_ratio}. avg_amplitude: {avg_amplitude}")

peak_ratio: 3.7551. avg_amplitude: 0.0276


## On a filtered dataset

In [None]:
import src.utils.seasonalityMetrics as sm # calls helper file

# on filtered dataset

df_kölsch = df_ratings[
    (df_ratings['style'] == 'Kölsch')
]


df_kölsch_number_ratings_timeseries = sm.data_preprocessing_number_of_ratings_per_month(df_kölsch)
peak_ratio_number_rartings, avg_amplitude_number_ratings = sm.timeseries_seasonality_metric(df_kölsch_number_ratings_timeseries)

df_kölsch_mean_rating_timeseries = sm.data_preprocessing_mean_rating_per_month(df_kölsch, data_column_name= 'mean_rating')
peak_ratio_mean_rartings, avg_amplitude_mean_ratings = sm.timeseries_seasonality_metric(df_kölsch_mean_rating_timeseries)

print("Seasonality for number of ratings per monthv:")
print(f"peak_ratio_number_rartings: {peak_ratio_number_rartings}.    avg_amplitude_number_ratings: {avg_amplitude_number_ratings}")
print()
print("Seasnoality for mean rating score per monthv:")
print(f"peak_ratio_mean_rartings: {peak_ratio_mean_rartings}.    avg_amplitude_mean_ratings: {avg_amplitude_number_ratings}")


Seasonality for number of ratings per monthv:
peak_ratio_number_rartings: 4.8269.    avg_amplitude_number_ratings: 31.6891

Seasnoality for mean rating score per monthv:
peak_ratio_mean_rartings: 1.2002.    avg_amplitude_mean_ratings: 31.6891
