In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model
from statsmodels.tsa.vector_ar.var_model import VAR
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [10]:
# Load data
data = pd.read_csv("../src/data/merged_data.csv", parse_dates=["Date"], index_col="Date")

In [11]:
# --------------------------
# 1. Data Preprocessing
# --------------------------
scaler = MinMaxScaler()
data_scaled = pd.DataFrame(scaler.fit_transform(data), columns=data.columns, index=data.index)

# Split data into train and test
train_size = int(len(data) * 0.8)
train, test = data_scaled.iloc[:train_size], data_scaled.iloc[train_size:]


In [12]:
# --------------------------
# 2. ARIMA Model
# --------------------------
arima_model = ARIMA(train["Price"], order=(5,1,0)).fit()
arima_pred = arima_model.forecast(steps=len(test))

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


In [16]:
# --------------------------
# 3. GARCH Model
# --------------------------
scaled_price = train["Price"] * 10  # Rescale Price
garch_model = arch_model(scaled_price, vol='Garch', p=1, q=1, rescale=False).fit()
garch_pred = garch_model.forecast(start=len(train), horizon=len(test)).variance.mean(axis=1) / 10  # Rescale back


Iteration:      1,   Func. Count:      6,   Neg. LLF: 441576962561.84973
Iteration:      2,   Func. Count:     16,   Neg. LLF: 71146720105.34631
Iteration:      3,   Func. Count:     23,   Neg. LLF: 856978871.3776429
Iteration:      4,   Func. Count:     29,   Neg. LLF: 10487.884962238506
Iteration:      5,   Func. Count:     34,   Neg. LLF: 10304.295870362965
Iteration:      6,   Func. Count:     46,   Neg. LLF: 9655.979252262961
Iteration:      7,   Func. Count:     51,   Neg. LLF: 179770390409.35413
Iteration:      8,   Func. Count:     66,   Neg. LLF: 753112.0553529745
Iteration:      9,   Func. Count:     79,   Neg. LLF: 55849.04185920563
Iteration:     10,   Func. Count:     94,   Neg. LLF: 10953.940485327956
Iteration:     11,   Func. Count:     99,   Neg. LLF: 6559068.932206704
Iteration:     12,   Func. Count:    106,   Neg. LLF: 1025108.5184233341
Iteration:     13,   Func. Count:    112,   Neg. LLF: 10431.087184883369
Iteration:     14,   Func. Count:    118,   Neg. LLF: 886

In [19]:
# --------------------------
# 4. VAR Model
# --------------------------
var_model = VAR(train)
var_fitted = var_model.fit(5)
var_pred = var_fitted.forecast(train.values[-5:], steps=len(test))

  self._init_dates(dates, freq)


In [23]:
# --------------------------
# 5. Markov-Switching Model
# --------------------------
markov_model = MarkovRegression(train["Price"], k_regimes=2, trend='c', switching_variance=True).fit()
markov_pred = markov_model.smoothed_marginal_probabilities

  self._init_dates(dates, freq)


In [24]:
# --------------------------
# 6. LSTM Model
# --------------------------
X_train, y_train = train["Price"].values[:-1], train["Price"].values[1:]
X_test, y_test = test["Price"].values[:-1], test["Price"].values[1:]
X_train, X_test = X_train.reshape(-1,1,1), X_test.reshape(-1,1,1)

lstm_model = Sequential([
    LSTM(50, activation='relu', return_sequences=True, input_shape=(1,1)),
    LSTM(50, activation='relu'),
    Dense(1)
])
lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.fit(X_train, y_train, epochs=20, batch_size=16, verbose=1)
lstm_pred = lstm_model.predict(X_test)

  super().__init__(**kwargs)


Epoch 1/20
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.0571
Epoch 2/20
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 4.0648e-04
Epoch 3/20
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 9.9402e-05
Epoch 4/20
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 9.2388e-05
Epoch 5/20
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 8.5305e-05
Epoch 6/20
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 8.4879e-05
Epoch 7/20
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 7.6621e-05
Epoch 8/20
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 7.7379e-05
Epoch 9/20
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 8.0521e-05
Epoch 10/20
[1m457/457[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[