## IMPORTS

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, precision_score, recall_score, f1_score

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, LSTM, RepeatVector, TimeDistributed)
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.python.client import device_lib

import os

## DEVICE SETUP

In [26]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.set_visible_devices(gpus[0], 'GPU')
        print("✅ GPU is available and will be used.")
    except RuntimeError as e:
        print(e)
else:
    print("⚠️ No GPU detected, running on CPU.")

✅ GPU is available and will be used.


## LOAD AND PREPROCESS DATA

In [27]:
file_path = '../forecast_datasets/training_set_labeled.csv'
df = pd.read_csv(file_path, delimiter=',')
df['DateTime'] = pd.to_datetime(df['DateTime'], errors='coerce')
df.set_index('DateTime', inplace=True)
df.drop(columns=['labels'], inplace=True)

# Normalize training set
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df.values)
df_scaled = pd.DataFrame(scaled_data, index=df.index, columns=df.columns).astype(np.float32)

print(f"✅ Scaled dataset shape: {df_scaled.shape}")


file_path2 = '../forecast_datasets/test_set_labeled.csv'
df_test = pd.read_csv(file_path2, delimiter=',')
df_test['DateTime'] = pd.to_datetime(df_test['DateTime'], errors='coerce')
df_test.set_index('DateTime', inplace=True)
df_test.drop(columns=['labels'], inplace=True)

# Normalize test set
scaled_test_data = scaler.transform(df_test.values)
df_test_scaled = pd.DataFrame(scaled_test_data, index=df_test.index, columns=df_test.columns).astype(np.float32)

print(f"✅ Scaled dataset shape: {df_test_scaled.shape}")

✅ Scaled dataset shape: (62174, 26)
✅ Scaled dataset shape: (40436, 26)


## CONFIGURATION

In [28]:
INPUT_STEPS = 10
FORECAST_STEPS = 10
UNITS_LIST= [128]

# Tuning parameters
EPOCHS_LIST = [20]
BATCH_SIZES = [1024]
TEST_SIZE_POURCENTAGE = 0.4
WINDOW_SIZE_SIMULATION = 10  # 30 mins window

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [None]:
# Split for test set
split_idx_test = int(TEST_SIZE_POURCENTAGE * len(df_test_scaled))
test_set_intermediaire = df_test_scaled.iloc[:split_idx_test]

test_data = test_set_intermediaire.tail(15600).reset_index(drop=True)

print(f"✅ Forecast model-Testing samples: {test_data.shape}")

✅ Forecast model-Testing samples: (15600, 26)


## SEQUENCE CREATION

In [30]:
# ========================
# 4. SEQUENTIAL TRAIN/TEST SPLIT
# ========================
train_data = df_scaled
print(f"✅ Training samples: {len(train_data)}, Testing samples: {len(test_data)}")

# ========================
# 5. CREATE SEQUENCES
# ========================
def create_sequences(data, input_steps, forecast_steps):
    X, y = [], []
    for i in range(len(data) - input_steps - forecast_steps):
        X.append(data[i:i+input_steps])
        y.append(data[i+input_steps:i+input_steps+forecast_steps])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

X_train_seq, y_train_seq = create_sequences(train_data.values, INPUT_STEPS, FORECAST_STEPS)
X_test_seq, y_test_seq = create_sequences(test_data.values, INPUT_STEPS, FORECAST_STEPS)

print(f"✅ Training sequences: {X_train_seq.shape}, Testing sequences: {X_test_seq.shape}")

✅ Training samples: 62174, Testing samples: 15600
✅ Training sequences: (62154, 10, 26), Testing sequences: (15580, 10, 26)


## BUILD LSTM SEQ2SEQ MODEL

In [31]:
def build_lstm_seq2seq(input_steps, forecast_steps, input_dim, units=128):
    inputs = Input(shape=(input_steps, input_dim))
    encoded = LSTM(units)(inputs)
    repeated = RepeatVector(forecast_steps)(encoded)
    decoded = LSTM(units, return_sequences=True)(repeated)
    outputs = TimeDistributed(Dense(input_dim))(decoded)
    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='mse')
    return model

In [32]:
def build_lstm_seq2seq_layered(input_steps, forecast_steps, input_dim, units=128):
    inputs = Input(shape=(input_steps, input_dim))
    
    # encoder
    layer1 = LSTM(units, return_sequences=True)(inputs)
    encoded = LSTM(units)(layer1)

    repeated = RepeatVector(forecast_steps)(encoded)

    #decoder
    decoded = LSTM(units, return_sequences=True)(repeated)
    outputs = TimeDistributed(Dense(input_dim))(decoded)
    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='mse')
    return model

## TRAINING + TUNING

In [33]:
best_val_mse = np.inf
best_model = None
history_records = []

for epochs in EPOCHS_LIST:
    for batch_size in BATCH_SIZES:
        for units in UNITS_LIST:
            print(f"\n🔵 Training LSTM Seq2Seq with epochs={epochs}, batch_size={batch_size}")
            
            model = build_lstm_seq2seq(INPUT_STEPS, FORECAST_STEPS, X_train_seq.shape[2], units)
            es = EarlyStopping(patience=5, restore_best_weights=True)

            history = model.fit(X_train_seq, y_train_seq,
                                validation_split=0.1,
                                epochs=epochs,
                                batch_size=batch_size,
                                callbacks=[es],
                                verbose=1,
                                shuffle=False)
            
            val_preds = model.predict(X_test_seq, batch_size=batch_size)
            val_rmse = np.sqrt(mean_squared_error(y_test_seq.reshape(-1), val_preds.reshape(-1)))
            val_mae = mean_absolute_error(y_test_seq.reshape(-1), val_preds.reshape(-1))
            val_mse = mean_squared_error(y_test_seq.reshape(-1), val_preds.reshape(-1))

            print(f"✅ Validation MSE: {val_mse:.5f}, MAE: {val_mae:.5f}")


            history_records.append({
                "epochs": epochs,
                "batch_size": batch_size,
                "LSTM-units": units,
                "val_mse": val_mse,
                "val_mae": val_mae
            })

            if val_mse < best_val_mse:
                best_val_mse = val_mse
                best_model = model

# Save tuning history
history_df = pd.DataFrame(history_records)
history_df.to_csv("lstm_seq2seq_tuning_history.csv", index=False)
print("\n📋 Tuning Results Summary:")
print(history_df)

# Save best model
best_model.save("best_lstm_seq2seq_forecaster.h5")
print("\n✅ Best LSTM Seq2Seq model saved.")


🔵 Training LSTM Seq2Seq with epochs=20, batch_size=1024
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
✅ Validation MSE: 0.07581, MAE: 0.20040

📋 Tuning Results Summary:
   epochs  batch_size  LSTM-units   val_mse  val_mae
0      20        1024         128  0.075812   0.2004

✅ Best LSTM Seq2Seq model saved.


### REAL-TIME SIMULATION ON TEST SET

In [35]:
simulation_X, simulation_y = create_sequences(test_data.values, INPUT_STEPS, FORECAST_STEPS)

forecast_list = []
true_windows = []

for i in range(0, len(simulation_X), WINDOW_SIZE_SIMULATION):
    window_X = simulation_X[i:i+1]
    window_y_true = simulation_y[i]

    y_pred_future = best_model.predict(window_X,batch_size=1024, verbose=1)[0]

    forecast_list.append(y_pred_future)
    true_windows.append(window_y_true)

print("\n✅ Real-time simulation complete.")


✅ Real-time simulation complete.


### EVALUATION

In [36]:
# Forecasting metrics
y_pred_all = np.vstack(forecast_list)
y_true_all = np.vstack(true_windows)

forecast_mse = mean_squared_error(y_true_all.reshape(-1), y_pred_all.reshape(-1))
forecast_mae = mean_absolute_error(y_true_all.reshape(-1), y_pred_all.reshape(-1))

print(f"\n📈 Forecasting Evaluation on Test:")
print(f"MSE: {forecast_mse:.5f}")
print(f"MAE:  {forecast_mae:.5f}")

# ========================
# SAVE METRICS
# ========================

metrics_results = {
    "Model": "LSTM Seq2Seq",
    "Forecast_MSE": forecast_mse,
    "Forecast_MAE": forecast_mae
}


# Save tuning history
metrics_df = pd.DataFrame([metrics_results])
metrics_df.to_csv("lstm_seq2seq_test_evaluation.csv", index=False)
print("\n📋 Test Results Summary:")
print(metrics_df)


📈 Forecasting Evaluation on Test:
MSE: 0.07583
MAE:  0.20041

📋 Test Results Summary:
          Model  Forecast_MSE  Forecast_MAE
0  LSTM Seq2Seq       0.07583      0.200415
