In [1]:
# ========================
# 0. IMPORTS
# ========================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, precision_score, recall_score, f1_score

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, LSTM, RepeatVector, TimeDistributed,Dropout)
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.python.client import device_lib

import os

# ========================
# 1. CONFIGURATION
# ========================
INPUT_STEPS = 10
FORECAST_STEPS = 10
TEST_RATIO = 0.05

# Tuning parameters
EPOCHS_LIST = [20]
BATCH_SIZES = [128]

WINDOW_SIZE_SIMULATION = 10  # 6h window
THRESHOLD_PERCENTILE = 90

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

# ========================
# 2. DEVICE SETUP
# ========================
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.set_visible_devices(gpus[0], 'GPU')
        print("‚úÖ GPU is available and will be used.")
    except RuntimeError as e:
        print(e)
else:
    print("‚ö†Ô∏è No GPU detected, running on CPU.")

# ========================
# 3. LOAD AND PREPROCESS DATA
# ========================
file_path = '../../data/preprocessed_data.csv'
df = pd.read_csv(file_path, delimiter=',')
df['DateTime'] = pd.to_datetime(df['DateTime'], errors='coerce')
df.set_index('DateTime', inplace=True)
df = df.drop(columns=['labels'])
# Clean
df.dropna(axis=1, thresh=int(0.7 * len(df)), inplace=True)
df.ffill(inplace=True)
df.bfill(inplace=True)

‚úÖ GPU is available and will be used.


In [2]:
file_path = '../../data/test_set.csv'
df_test = pd.read_csv(file_path, delimiter=',')
df_test['DateTime'] = pd.to_datetime(df_test['DateTime'], errors='coerce')
df_test.set_index('DateTime', inplace=True)
df_test = df_test.drop(columns=['labels'])
# Clean
df_test.dropna(axis=1, thresh=int(0.7 * len(df_test)), inplace=True)
df_test.ffill(inplace=True)
df_test.bfill(inplace=True)
# Normalize

scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df.values)
scaled_data_test = scaler.transform(df_test.values)

df_scaled = pd.DataFrame(scaled_data, index=df.index, columns=df.columns).astype(np.float32)
df_scaled_test = pd.DataFrame(scaled_data_test, index=df_test.index, columns=df_test.columns).astype(np.float32)
print(f"‚úÖ Scaled dataset shape: {df_scaled.shape}")

# ========================
# 4. SEQUENTIAL TRAIN/TEST SPLIT
# ========================
split_idx = int((1 - TEST_RATIO) * len(df_scaled))
train_data = df_scaled.iloc[:split_idx]
#test_data = df_scaled_test.iloc[10000:11000]

print(f"‚úÖ Training samples: {len(train_data)}, Testing samples: {len(df_scaled_test)}")

‚úÖ Scaled dataset shape: (30103, 26)
‚úÖ Training samples: 21072, Testing samples: 10861


In [3]:
# Split for test set
split_idx_test = int(0.4 * len(df_scaled_test))
test_set_intermediaire = df_scaled_test.iloc[:split_idx_test]

#test_data = test_set_intermediaire.tail(1000)
test_data = df_scaled.iloc[split_idx:]
print(f"‚úÖ Forecast model-Testing samples: {test_data.shape}")

‚úÖ Forecast model-Testing samples: (9031, 26)


In [4]:
# ========================
# 5. CREATE SEQUENCES
# ========================
def create_sequences(data, input_steps, forecast_steps):
    X, y = [], []
    for i in range(len(data) - input_steps - forecast_steps):
        X.append(data[i:i+input_steps])
        y.append(data[i+input_steps:i+input_steps+forecast_steps])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

X_train_seq, y_train_seq = create_sequences(train_data.values, INPUT_STEPS, FORECAST_STEPS)
X_test_seq, y_test_seq = create_sequences(test_data.values, INPUT_STEPS, FORECAST_STEPS)

print(f"‚úÖ Training sequences: {X_train_seq.shape}, Testing sequences: {X_test_seq.shape}")


‚úÖ Training sequences: (21052, 10, 26), Testing sequences: (9011, 10, 26)


In [5]:
# ========================
# 6. BUILD LSTM SEQ2SEQ MODEL
# ========================
def build_lstm_seq2seq(input_steps, forecast_steps, input_dim, units=128):
    inputs = Input(shape=(input_steps, input_dim))
    encoded = LSTM(units)(inputs)
    repeated = RepeatVector(forecast_steps)(encoded)
    decoded = LSTM(units, return_sequences=True)(repeated)
    outputs = TimeDistributed(Dense(input_dim))(decoded)
    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='mse')
    return model



# ========================
# 7. TRAINING + TUNING
# ========================
best_val_rmse = np.inf
best_model = None
history_records = []

for epochs in EPOCHS_LIST:
    for batch_size in BATCH_SIZES:
        print(f"\nüîµ Training LSTM Seq2Seq with epochs={epochs}, batch_size={batch_size}")
        
        model = build_lstm_seq2seq(INPUT_STEPS, FORECAST_STEPS, X_train_seq.shape[2])
        es = EarlyStopping(patience=5, restore_best_weights=True)

        history = model.fit(X_train_seq, y_train_seq,
                            validation_split=0.1,
                            epochs=epochs,
                            batch_size=batch_size,
                            callbacks=[es],
                            verbose=1,
                            shuffle=False)
        
        val_preds = model.predict(X_test_seq, batch_size=batch_size)
        val_rmse = np.sqrt(mean_squared_error(y_test_seq.reshape(-1), val_preds.reshape(-1)))
        val_mae = mean_absolute_error(y_test_seq.reshape(-1), val_preds.reshape(-1))

        print(f"‚úÖ Validation RMSE: {val_rmse:.5f}, MAE: {val_mae:.5f}")

        history_records.append({
            "epochs": epochs,
            "batch_size": batch_size,
            "val_rmse": val_rmse,
            "val_mae": val_mae
        })

        if val_rmse < best_val_rmse:
            best_val_rmse = val_rmse
            best_model = model

# Save tuning history
history_df = pd.DataFrame(history_records)
history_df.to_csv("lstm_seq2seq_tuning_history.csv", index=False)
print("\nüìã Tuning Results Summary:")
print(history_df)

# Save best model
best_model.save("best_lstm_seq2seq_forecaster.keras")
print("\n‚úÖ Best LSTM Seq2Seq model saved.")



üîµ Training LSTM Seq2Seq with epochs=20, batch_size=128
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
‚úÖ Validation RMSE: 0.09723, MAE: 0.05650

üìã Tuning Results Summary:
   epochs  batch_size  val_rmse   val_mae
0      20         128  0.097227  0.056501

‚úÖ Best LSTM Seq2Seq model saved.


In [6]:
#y_pred_reshaped2 = X_test_seq[6197].reshape(-1, X_test_seq[6197].shape[2])
denorm2 = scaler.inverse_transform(X_train_seq[10000])
denorm_df2 = pd.DataFrame(denorm2, columns=df.columns)

In [7]:
denorm_df2

Unnamed: 0,500UZ0009E01.LZI0012A.MEAS,505D002D01.TI0012.MEAS,520MX051D01.FIC0028.MEAS,520D007D02.TI0058.MEAS,530K001S01.FI0007.MEAS,530R001D01.FI0043.MEAS,530E001D01.FIC0015.MEAS,530R001D01.FIC0029.MEAS,530R001D01.FIC0030.MEAS,530R001D01.FIC0047.MEAS,...,530F001D01.PIC0023.MEAS,530R002D02.TI0037.MEAS,530F001D01.TIC0012.MEAS,530M105D01.TIC0022.MEAS,530UZ1099E01.TZI0068A.MEAS,530M103D01.ZI2103A.MEAS,530M104D01.ZI2104A.MEAS,530M105D01.ZI2105A.MEAS,535D005D01.LI0011.MEAS,535INT920D01.TI0046.MEAS
0,0.0,24.105469,4.465376,129.263672,89986.195312,2655.308105,1199.138672,-40.662701,2562.602051,310.002991,...,1.495781,722.640625,141.421875,682.921875,155.992065,20.251953,18.851562,61.791012,13.23925,58.064453
1,0.0,24.0,4.465376,129.029297,90182.5625,2645.722412,1198.278931,-40.446575,2556.857178,309.892548,...,1.495781,722.606262,141.390625,682.936523,155.860809,20.601561,18.851562,61.783207,13.135653,57.962891
2,0.0,24.0,4.465376,128.8125,90063.109375,2654.899658,1202.272461,-40.806171,2564.078857,310.314087,...,1.495781,722.657837,141.296875,682.579102,155.598312,20.203125,18.851562,62.294918,13.236738,58.070316
3,0.0,24.0,4.465376,128.8125,90188.679688,2644.483887,1200.261353,-40.455685,2552.957764,309.714935,...,1.495781,722.760925,141.0625,682.710938,155.204529,20.337889,18.851562,61.931641,13.247351,57.865231
4,0.0,24.109375,4.465376,128.935547,90325.789062,2649.131348,1198.304565,-40.611515,2559.112305,309.986176,...,1.495781,722.846863,141.078125,682.359375,155.204529,20.0,18.851562,62.257812,13.342425,57.865231
5,0.0,24.109375,4.465376,129.164062,90146.492188,2648.185059,1201.266968,-40.747894,2557.558105,309.92688,...,1.495781,723.173462,141.03125,682.757812,155.33577,19.728516,18.851562,62.019535,13.241167,57.865231
6,0.0,24.117188,4.465376,129.380859,90203.1875,2651.395996,1199.411499,-40.744907,2566.471191,310.178711,...,1.495781,723.431274,140.890625,682.705078,155.33577,20.505859,18.851562,62.298828,13.230665,57.966797
7,0.0,24.117188,4.465376,129.492188,90296.609375,2647.02002,1199.782837,-40.698368,2563.441162,309.81839,...,1.495781,723.414062,140.671875,683.162109,155.33577,19.931641,18.851562,62.230469,13.227235,57.966797
8,0.0,24.117188,4.465376,129.492188,90182.960938,2652.313477,1200.217285,-40.757545,2558.457764,309.866211,...,1.495781,723.173462,140.703125,682.693359,155.598312,20.451172,18.851562,62.121098,13.113352,58.068359
9,0.0,24.011719,4.465376,129.263672,90037.546875,2651.382812,1201.240967,-40.867073,2563.063965,309.924225,...,1.495781,723.173462,140.828125,682.412109,155.729538,20.134766,18.851562,61.880859,13.220572,58.068359


In [8]:
 input_seq1 = np.expand_dims(X_train_seq[10000], axis=0)
val_preds2 = model.predict(input_seq1, batch_size=128)
y_pred_reshaped = val_preds2.reshape(-1, val_preds2.shape[2])
denorm = scaler.inverse_transform(y_pred_reshaped)
denorm_df = pd.DataFrame(denorm, columns=df.columns)



In [9]:
denorm_df

Unnamed: 0,500UZ0009E01.LZI0012A.MEAS,505D002D01.TI0012.MEAS,520MX051D01.FIC0028.MEAS,520D007D02.TI0058.MEAS,530K001S01.FI0007.MEAS,530R001D01.FI0043.MEAS,530E001D01.FIC0015.MEAS,530R001D01.FIC0029.MEAS,530R001D01.FIC0030.MEAS,530R001D01.FIC0047.MEAS,...,530F001D01.PIC0023.MEAS,530R002D02.TI0037.MEAS,530F001D01.TIC0012.MEAS,530M105D01.TIC0022.MEAS,530UZ1099E01.TZI0068A.MEAS,530M103D01.ZI2103A.MEAS,530M104D01.ZI2104A.MEAS,530M105D01.ZI2105A.MEAS,535D005D01.LI0011.MEAS,535INT920D01.TI0046.MEAS
0,-0.000549,16.236654,4.516489,123.519699,90793.132812,2647.489014,1194.808594,82.140625,2570.171631,309.883392,...,1.506208,722.683655,144.444427,683.246643,153.526184,20.46447,13.795214,62.307137,13.301111,57.882587
1,-0.012995,14.945056,4.52644,122.025459,90647.125,2647.051514,1189.330078,25.017721,2567.322266,309.515778,...,1.505186,722.29657,140.12146,682.368164,153.643524,20.376234,13.831427,61.52623,13.268514,57.811844
2,-0.012989,14.71288,4.524982,123.466591,91008.398438,2648.747559,1195.302002,10.307064,2570.90918,309.682648,...,1.505384,722.777283,138.747543,682.474548,153.720428,20.456865,13.915933,61.830078,13.296455,57.867706
3,-0.01305,14.735068,4.523589,123.405388,91046.382812,2648.730225,1194.841797,2.613378,2569.560059,309.680573,...,1.50607,722.698853,138.245926,682.480042,153.663528,20.432051,13.928574,61.78878,13.298195,57.861149
4,-0.01291,14.788752,4.523014,123.52121,91088.171875,2648.873047,1195.147583,0.328965,2569.388916,309.707703,...,1.50661,722.712585,138.126038,682.524414,153.642075,20.427689,13.943923,61.799332,13.301736,57.864895
5,-0.012873,14.837499,4.522808,123.546913,91104.453125,2648.920166,1195.207153,-0.387395,2569.216553,309.722656,...,1.506971,722.712036,138.088531,682.555176,153.62709,20.422394,13.951812,61.797153,13.303014,57.866146
6,-0.012853,14.875781,4.522782,123.573448,91117.117188,2648.969727,1195.32373,-0.47027,2569.217041,309.735931,...,1.507205,722.720642,138.076965,682.580627,153.61882,20.419992,13.957911,61.79842,13.30398,57.868
7,-0.012853,14.903897,4.52282,123.585846,91125.195312,2649.00293,1195.409424,-0.363072,2569.239258,309.745239,...,1.507357,722.7276,138.069748,682.599487,153.613342,20.418261,13.962096,61.79855,13.304522,57.869354
8,-0.012858,14.924623,4.522869,123.592445,91130.992188,2649.028076,1195.478638,-0.212588,2569.274414,309.752075,...,1.507458,722.733643,138.062576,682.613525,153.609375,20.417059,13.965095,61.798515,13.304875,57.870422
9,-0.012868,14.939612,4.522914,123.591225,91134.429688,2649.041992,1195.5177,-0.07389,2569.288086,309.756592,...,1.507528,722.736877,138.055405,682.623535,153.606125,20.415918,13.967061,61.797459,13.305032,57.87104


In [10]:
input_seq3 = X_train_seq[1:2]
y_true3 = y_train_seq[1]
y_pred3 = model.predict(input_seq3)[0]



In [11]:
y_true3[3]

array([0.0000000e+00, 7.8223214e-02, 1.4264606e-01, 2.4271001e-01,
       1.5437941e-01, 8.2945883e-01, 4.2575714e-01, 3.5945050e-04,
       7.5413567e-01, 7.9479456e-01, 3.7808505e-01, 8.9620426e-03,
       5.0918389e-02, 5.6991690e-01, 5.0452828e-01, 9.8857856e-01,
       7.7378440e-01, 5.2777845e-01, 9.2596360e-02, 3.6307204e-01,
       3.1417617e-01, 2.0365700e-01, 6.6152251e-01, 2.1282369e-01,
       6.8390602e-01, 9.4280642e-01], dtype=float32)

In [12]:
y_pred3[3]

array([-0.01422784,  0.2275099 ,  0.08431946,  0.5535095 ,  0.5717354 ,
        0.7093936 ,  0.77143115,  0.02682186,  0.6604152 ,  0.62538654,
        0.71407783,  0.6246385 ,  0.5642347 ,  0.3770187 ,  0.34792998,
        0.4155381 , -0.00672259,  0.6990679 ,  0.07589823,  0.10891727,
        0.39818904,  0.5647566 ,  0.4270487 ,  0.572025  ,  0.47351575,
        0.678601  ], dtype=float32)