## IMPORTS

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, precision_score, recall_score, f1_score

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, LSTM, RepeatVector, TimeDistributed,
                                     Dropout, LayerNormalization)
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.python.client import device_lib

import os
import gc
import tensorflow.keras.backend as K

## DEVICE SETUP

In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.set_visible_devices(gpus[0], 'GPU')
        print("✅ GPU is available and will be used.")
    except RuntimeError as e:
        print(e)
else:
    print("⚠️ No GPU detected, running on CPU.")

✅ GPU is available and will be used.


## LOAD AND PREPROCESS DATA

In [3]:
file_path = '../forecast_datasets/training_set_labeled.csv'
df = pd.read_csv(file_path, delimiter=',')
df['DateTime'] = pd.to_datetime(df['DateTime'], errors='coerce')
df.set_index('DateTime', inplace=True)
df.drop(columns=['labels'], inplace=True)

# Normalize training set
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df.values)
df_scaled = pd.DataFrame(scaled_data, index=df.index, columns=df.columns).astype(np.float32)

print(f"✅ Scaled dataset shape: {df_scaled.shape}")


file_path2 = '../forecast_datasets/test_set_labeled.csv'
df_test = pd.read_csv(file_path2, delimiter=',')
df_test['DateTime'] = pd.to_datetime(df_test['DateTime'], errors='coerce')
df_test.set_index('DateTime', inplace=True)
df_test.drop(columns=['labels'], inplace=True)

# Normalize test set
scaled_test_data = scaler.transform(df_test.values)
df_test_scaled = pd.DataFrame(scaled_test_data, index=df_test.index, columns=df_test.columns).astype(np.float32)

print(f"✅ Scaled dataset shape: {df_test_scaled.shape}")

✅ Scaled dataset shape: (62174, 26)
✅ Scaled dataset shape: (40436, 26)


## CONFIGURATION

In [4]:
INPUT_STEPS = 10
FORECAST_STEPS = 10
EMBED_DIMS = [128]
NB_HEADS = [4]
FF_DIMS = [128]

# Tuning parameters
EPOCHS_LIST = [20]
BATCH_SIZES = [1024]
TEST_SIZE_POURCENTAGE = 0.4
WINDOW_SIZE_SIMULATION = 10  # 30 mins window

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

## SEQUENCE CREATION

In [5]:
# Split for test set
split_idx_test = int(TEST_SIZE_POURCENTAGE * len(df_test_scaled))
test_set_intermediaire = df_test_scaled.iloc[:split_idx_test]

test_data = test_set_intermediaire.tail(15600).reset_index(drop=True)

print(f"✅ Forecast model-Testing samples: {test_data.shape}")

# ========================
# 4. SEQUENTIAL TRAIN/TEST SPLIT
# ========================
train_data = df_scaled
print(f"✅ Training samples: {len(train_data)}, Testing samples: {len(test_data)}")

# ========================
# 5. CREATE SEQUENCES
# ========================
def create_sequences(data, input_steps, forecast_steps):
    X, y = [], []
    for i in range(len(data) - input_steps - forecast_steps):
        X.append(data[i:i+input_steps])
        y.append(data[i+input_steps:i+input_steps+forecast_steps])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

X_train_seq, y_train_seq = create_sequences(train_data.values, INPUT_STEPS, FORECAST_STEPS)
X_test_seq, y_test_seq = create_sequences(test_data.values, INPUT_STEPS, FORECAST_STEPS)

print(f"✅ Training sequences: {X_train_seq.shape}, Testing sequences: {X_test_seq.shape}")

✅ Forecast model-Testing samples: (15600, 26)
✅ Training samples: 62174, Testing samples: 15600
✅ Training sequences: (62154, 10, 26), Testing sequences: (15580, 10, 26)


## BUILD TRANSFORMER MODEL

In [6]:
class PositionalEmbedding(tf.keras.layers.Layer):
    def __init__(self, sequence_length, input_dim, embed_dim, **kwargs):
        super().__init__(**kwargs)
        self.sequence_length = sequence_length
        self.input_dim = input_dim
        self.embed_dim = embed_dim
        self.token_dense = Dense(embed_dim)
        self.pos_embedding = self.add_weight("pos_embedding", shape=[1, sequence_length, embed_dim])

    def call(self, x):
        x = self.token_dense(x)
        return x + self.pos_embedding

    def get_config(self):
        config = super().get_config()
        config.update({
            "sequence_length": self.sequence_length,
            "input_dim": self.input_dim,
            "embed_dim": self.embed_dim,
        })
        return config


class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.rate = rate

        self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs)
        out1 = self.layernorm1(inputs + self.dropout1(attn_output, training=training))
        ffn_output = self.ffn(out1)
        return self.layernorm2(out1 + self.dropout2(ffn_output, training=training))

    def get_config(self):
        config = super().get_config()
        config.update({
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "ff_dim": self.ff_dim,
            "rate": self.rate,
        })
        return config


def build_transformer_model(input_steps, input_dim, forecast_steps, embed_dim, num_heads, ff_dim):
    inputs = Input(shape=(input_steps, input_dim))
    x = PositionalEmbedding(input_steps, input_dim, embed_dim)(inputs)
    x = TransformerBlock(embed_dim, num_heads, ff_dim)(x)
    x = TransformerBlock(embed_dim, num_heads, ff_dim)(x)
    x = Dense(embed_dim, activation='relu')(x)
    x = Dense(input_dim)(x)
    outputs = x[:, -forecast_steps:, :]
    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='mse')
    return model

## TRAINING + TUNING

In [7]:
best_val_mse = np.inf
best_model = None
history_records = []

for epochs in EPOCHS_LIST:
    for batch_size in BATCH_SIZES:
        for embed_dim in EMBED_DIMS:
            for nb_head in NB_HEADS:
                for ff_dim in FF_DIMS:
                    print(f"\n🔵 Training Transformer with epochs={epochs}, batch_size={batch_size}")
                    
                    model = build_transformer_model(INPUT_STEPS, X_train_seq.shape[2], FORECAST_STEPS, embed_dim, nb_head, ff_dim)
                    es = EarlyStopping(patience=5, restore_best_weights=True)
                    history = model.fit(X_train_seq, y_train_seq,
                                        validation_split=0.1,
                                        epochs=epochs,
                                        batch_size=batch_size,
                                        callbacks=[es],
                                        verbose=1,
                                        shuffle=False)
                    
                    val_preds = model.predict(X_test_seq, batch_size=batch_size)
                    val_mse = mean_squared_error(y_test_seq.reshape(-1), val_preds.reshape(-1))
                    val_mae = mean_absolute_error(y_test_seq.reshape(-1), val_preds.reshape(-1))

                    print(f"✅ Validation MSE: {val_mse:.5f}, MAE: {val_mae:.5f}")

                    history_records.append({
                            "epochs": epochs,
                            "batch_size": batch_size,
                            "EMBED_DIMS": embed_dim, 
                            "NB_HEADS": nb_head,
                            "FF_DIMS": ff_dim,
                            "val_mse": val_mse,
                            "val_mae": val_mae
                        })

                    if val_mse < best_val_mse:
                        best_val_mse = val_mse
                        best_model = model

# Save tuning history
history_df = pd.DataFrame(history_records)
history_df.to_csv("transformer_tuning_history.csv", index=False)
print("\n📋 Tuning Results Summary:")
print(history_df)

# Save best model
best_model.save("best_transformer_forecaster.h5")
print("\n✅ Best transformer model saved.")


🔵 Training Transformer with epochs=20, batch_size=1024
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
✅ Validation MSE: 0.10784, MAE: 0.23213

📋 Tuning Results Summary:
   epochs  batch_size  EMBED_DIMS  NB_HEADS  FF_DIMS   val_mse   val_mae
0      20        1024         128         4      128  0.107835  0.232134

✅ Best transformer model saved.


In [8]:
K.clear_session()
gc.collect()

2144

### REAL-TIME SIMULATION ON TEST SET

In [9]:
simulation_X, simulation_y = create_sequences(test_data.values, INPUT_STEPS, FORECAST_STEPS)

forecast_list = []
true_windows = []

for i in range(0, len(simulation_X), WINDOW_SIZE_SIMULATION):
    window_X = simulation_X[i:i+1]
    window_y_true = simulation_y[i]

    y_pred_future = best_model.predict(window_X,batch_size=1024, verbose=1)[0]

    forecast_list.append(y_pred_future)
    true_windows.append(window_y_true)

print("\n✅ Real-time simulation complete.")


✅ Real-time simulation complete.


### EVALUATION

In [11]:
# Forecasting metrics
y_pred_all = np.vstack(forecast_list)
y_true_all = np.vstack(true_windows)

forecast_mse = mean_squared_error(y_true_all.reshape(-1), y_pred_all.reshape(-1))
forecast_mae = mean_absolute_error(y_true_all.reshape(-1), y_pred_all.reshape(-1))

print(f"\n📈 Forecasting Evaluation on Test:")
print(f"MSE: {forecast_mse:.5f}")
print(f"MAE:  {forecast_mae:.5f}")

# ========================
# SAVE METRICS
# ========================

metrics_results = {
    "Model": "Transformer",
    "Forecast_MSE": forecast_mse,
    "Forecast_MAE": forecast_mae
}


# Save tuning history
metrics_df = pd.DataFrame([metrics_results])
metrics_df.to_csv("transformer_test_evaluation.csv", index=False)
print("\n📋 Test Results Summary:")
print(metrics_df)


📈 Forecasting Evaluation on Test:
MSE: 0.10786
MAE:  0.23217

📋 Test Results Summary:
         Model  Forecast_MSE  Forecast_MAE
0  Transformer       0.10786      0.232172
