In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.keras import TqdmCallback

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, GRU, Bidirectional, Embedding
from tensorflow.keras.utils import plot_model

import os
import sys
import warnings
from dotenv import load_dotenv

warnings.filterwarnings("ignore")

load_dotenv()
REPO_PATH: str = os.getenv('REPO_PATH')

sys.path.insert(0, rf'{REPO_PATH}src_HF')
from utils import *

### Load Data

In [None]:
df = pd.read_csv(rf'{REPO_PATH}data\time_series\CLc1_processed.csv').dropna()
df.drop(columns=['Date.1'], inplace=True)
df.head(3)

### Train/test split

In [None]:
TEST_FRACTION: float = 0.2

df['GARCH_1'] = df['GARCH'].shift(1)

X_VARIABLES = ['VOLUME', 'GARCH', 'YEAR', 'MONTH', 'DAY', 'WEEKDAY', 'WEEK']
Y_VARIABLE = ['GARCH']

X = df[X_VARIABLES]
y = df[Y_VARIABLE]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_FRACTION, shuffle=False)

SCALER = MinMaxScaler()

X_train_scaled = SCALER.fit_transform(X_train)
X_test_scaled = SCALER.transform(X_test)

y_train_scaled = SCALER.fit_transform(y_train)
y_test_scaled = SCALER.transform(y_test)

X_train_scaled = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_test_scaled = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])


### Compile model

In [None]:

TIMESTEPS: int = 60  # Number of timesteps
N_FEATURES: int = 1  # Number of features

layers: list = [
    Bidirectional(LSTM(64, return_sequences=True, input_shape=(TIMESTEPS, N_FEATURES))),
    Bidirectional(LSTM(64, return_sequences=False)),
    Dense(25),
    Dense(1)
]

model: Sequential = Sequential(layers)

# Compile the model for a regression problem
model.compile(
    optimizer='adam',
    loss='mean_squared_error', 
    metrics=['mae']
)  # Mean Absolute Error as an additional metric

# Model summary
model.build(input_shape=(None, TIMESTEPS, N_FEATURES))
model.summary()

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# plot model
fig = plot_model(model, to_file=rf'model_vizualizations\BI_LSTM.png', show_shapes=True, show_layer_names=False, dpi=200)


<!-- ### Train model -->

### Train model