In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error
from statsmodels.tools.eval_measures import rmse
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
%matplotlib inline

This notebook is 'copied' alongside the notebook by Caner Dabkoglu of the same name. Generally speaking the code is the same, however I will modify the dataset occasionally to practice Pandas and I will change hyperparameters where I see fit to practice writing my own code. Otherwise, I do not claim the work below.

In [None]:
#Start by reading the dataset

df = pd.read_csv('../input/monthly-beer-production-in-austria/monthly-beer-production-in-austr.csv')

In [None]:
#Rename Columns, Set Index to the Year / Month
df = df.rename(columns = {'Monthly beer production': 'Monthly Beer Production'})
df.Month = pd.to_datetime(df.Month)
df = df.set_index("Month")
df

In [None]:
df.index.freq = 'MS'

In [None]:
#Set Figure size, set up plot, label the axes, then show the plot. Color for show.
plt.figure(figsize=(18,9))
plt.plot(df.index,df["Monthly Beer Production"],color = 'green',linestyle = "-")
plt.xlabel("Dates")
plt.ylabel("Monthly Beer Production")
plt.show()

In [None]:
#Seasonal Deecomposition for Total, Trend, Seasonal, and Residual
sd = seasonal_decompose(df["Monthly Beer Production"],model = 'additive')

sd.plot();

In [None]:
#Plot the Seasonal Values
plt.figure(figsize = (17,9))
plt.plot(sd.seasonal,color = 'green')
plt.xlabel("Dates")
plt.ylabel("Seasonal Trend of Production")
plt.show()

In [None]:
#Split up the Data into Train/Test
train_data = df[:len(df)-10]
test_data = df[len(df)-10:]

In [None]:
#Set Up Arima (SARIMA) model
arima_model = SARIMAX(train_data['Monthly Beer Production'],order = (2,1,1),seasonal_order = (4,0,3,12))
arima_result = arima_model.fit()
arima_result.summary()

In [None]:
#Predict from the SARIMA model
arima_pred = arima_result.predict(start = len(train_data),end = len(df)-1,typ = 'levels').rename("ARIMA Predictions")
arima_pred

In [None]:
#Plot Test vs SARIMA Prediction
test_data['Monthly Beer Production'].plot(figsize = (16,5),legend = True)
arima_pred.plot(legend = True)

In [None]:
#Calculate Metric for Accuracy
arima_rmse_error = rmse(test_data["Monthly Beer Production"],arima_pred)
arima_mse_error = arima_rmse_error**2
mean_value = df["Monthly Beer Production"].mean()
print(f'MSE Error : {arima_mse_error}\nRMSE Error: {arima_rmse_error}\nMean: {mean_value}')

In [None]:
#? Not sure what this part does. I think it
#adds the SARIMA predictions to the df.
predictions = df[-10:].copy()
predictions["ARIMA_Predictions"] = arima_pred

In [None]:
#Now we will predict on the dataset using LSTM.
#First we will scale the data with the MinMaxScaler.

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [None]:
scaler.fit(train_data)
scaled_train_data = scaler.transform(train_data)
scaled_test_data = scaler.transform(test_data)


In [None]:
#Before creating a LSTM model, we need to
#create a TS Generator Object.

from keras.preprocessing.sequence import TimeseriesGenerator

n_input = 10
n_features = 1
generator = TimeseriesGenerator(scaled_train_data,
                               scaled_train_data, length = n_input , batch_size = 1)


In [None]:
#Now we need to import a few software bits
#and create the NN
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

lstm_model = Sequential()
lstm_model.add(LSTM(300,activation='relu',input_shape=(n_input,n_features)))
lstm_model.add(Dense(1))
lstm_model.compile(optimizer='adam',loss='mse')

lstm_model.summary()

In [None]:
#Fit the model to the data!
lstm_model.fit_generator(generator,epochs=20)

In [None]:
#Plot the losses against the epochs.
losses_lstm = lstm_model.history.history['loss']
plt.figure(figsize = (12,4))
plt.xticks(np.arange(0,21,1))
plt.plot(range(len(losses_lstm)),losses_lstm)


In [None]:
#Create a list of the predictions, 
#get ready to scale them back.
lstm_predictions_scaled = list()

batch = scaled_train_data[-n_input:]
current_batch = batch.reshape((1,n_input,n_features))

for i in range(len(test_data)):
    lstm_pred = lstm_model.predict(current_batch)[0]
    lstm_predictions_scaled.append(lstm_pred)
    current_batch = np.append(current_batch[:,1:,:],[[lstm_pred]],axis=1)
    

In [None]:
lstm_predictions = scaler.inverse_transform(lstm_predictions_scaled)

In [None]:
lstm_predictions

In [None]:
predictions["LSTM_Predictions"] = lstm_predictions

In [None]:
predictions['Monthly Beer Production'].plot(figsize = (16,5),legend = True)
predictions['LSTM_Predictions'].plot(legend = True)

In [None]:
lstm_rmse_error = rmse(test_data['Monthly Beer Production'],predictions['LSTM_Predictions'])
lstm_mse_error = lstm_rmse_error**2
mean_value = df['Monthly Beer Production'].mean()
print(f'MSE Error: {lstm_mse_error}\nRMSE Error:{lstm_rmse_error}\nMean: {mean_value}')

In [None]:
df.info()

In [None]:
#Now we will use Prophet to Forecast.
#First we will copy the dataframe, and
#then we will rename the columns accordingly.
#Columns must be renamed as so to use Prophet.
df_pr = df.copy()
df_pr = df.reset_index()
df_pr.columns = ['ds','y']

In [None]:
#Create the data to be trained and forecated on.
train_data_pr = df_pr.iloc[:len(df)-10]
test_data_pr = df_pr.iloc[len(df)-10:]

In [None]:
from prophet import Prophet

In [None]:
#Initiate Prophet, fit it to the data, and predict.
prophet_model = Prophet()
prophet_model.fit(train_data_pr)
prophet_future = prophet_model.make_future_dataframe(periods=12,freq='MS')
prophet_pred = prophet_model.predict(prophet_future)

In [None]:
#Make Prophet a PD dataframe.
prophet_pred = pd.DataFrame({"Date": prophet_pred[-10:]['ds'],"Pred": prophet_pred[-10:]['yhat']})

In [None]:
#Change the index.
prophet_pred = prophet_pred.set_index("Date")

In [None]:
#Change the index frequency.
prophet_pred.index.freq = 'MS'

In [None]:
#Add the Prophet predictions to the predictions df.
predictions['Prophet_Predictions'] = prophet_pred['Pred'].values

In [None]:
#Import Seaborn and make a composite lineplot.
import seaborn as sns
plt.figure(figsize = (16,5))
ax = sns.lineplot(x = test_data.index,y = test_data["Monthly Beer Production"])
sns.lineplot(x = test_data.index , y = predictions["Prophet_Predictions"])


In [None]:
prophet_rmse_error = rmse(test_data['Monthly Beer Production'],predictions["Prophet_Predictions"])
prophet_mse_error = prophet_rmse_error**2
mean_value = df['Monthly Beer Production'].mean()

print(f'MSE Error: {prophet_mse_error}\nRMSE Error: {prophet_rmse_error}\nMean: {mean_value}')

In [None]:
#Plot the graphs together.
plt.figure(figsize = (18,10))
plt.plot_date(test_data.index,test_data["Monthly Beer Production"],linestyle = '-')
plt.plot_date(test_data.index,predictions['ARIMA_Predictions'],linestyle = '-.')
plt.plot_date(test_data.index,predictions["LSTM_Predictions"],linestyle = '--')
plt.plot_date(test_data.index,predictions['Prophet_Predictions'],linestyle=':')
plt.legend(labels = ['Monthly Beer Production', 'ARIMA', 'LSTM', 'Prophet'])
plt.show()

In [None]:
#I realize now in hindsight that the issue with the predictions in the latter 2 methods may well be because I decided
#to only use the latter 10 months, because I wanted to switch up the numbers and get creative.
#The next time I do something like this, I will be using multiples of the test dataset proportional to the 
#frequency of the data.