### IMPORTS


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Input, LSTM, Dense, RepeatVector, TimeDistributed, Conv1D, LayerNormalization, MultiHeadAttention, Add
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import gc
import tensorflow.keras.backend as K

### DEVICE SETUP


In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.set_visible_devices(gpus[0], 'GPU')
        print("✅ GPU is available and will be used.")
    except RuntimeError as e:
        print(e)
else:
    print("⚠️ No GPU detected, running on CPU.")

✅ GPU is available and will be used.


### LOAD AND PREPROCESS DATA


In [3]:
file_path = '../forecast_datasets/training_set_labeled.csv'
df = pd.read_csv(file_path, delimiter=',')
df['DateTime'] = pd.to_datetime(df['DateTime'], errors='coerce')
df.set_index('DateTime', inplace=True)
df.drop(columns=['labels'], inplace=True)

# Normalize training set
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df.values)
df_scaled = pd.DataFrame(scaled_data, index=df.index, columns=df.columns).astype(np.float32)

print(f"✅ Scaled dataset shape: {df_scaled.shape}")


file_path2 = '../forecast_datasets/test_set_labeled.csv'
df_test = pd.read_csv(file_path2, delimiter=',')
df_test['DateTime'] = pd.to_datetime(df_test['DateTime'], errors='coerce')
df_test.set_index('DateTime', inplace=True)
df_test.drop(columns=['labels'], inplace=True)

# Normalize test set
scaled_test_data = scaler.transform(df_test.values)
df_test_scaled = pd.DataFrame(scaled_test_data, index=df_test.index, columns=df_test.columns).astype(np.float32)

print(f"✅ Scaled dataset shape: {df_test_scaled.shape}")

✅ Scaled dataset shape: (62174, 26)
✅ Scaled dataset shape: (40436, 26)


# PREDICTION INFORMER


### CONFIGURATION


In [4]:
INPUT_STEPS = 10
FORECAST_STEPS = 10
EMBED_DIMS = [128]
NB_HEADS = [4]
TOP_K_LIST = [5]

# Tuning parameters
EPOCHS_LIST = [20]
BATCH_SIZES = [1024]
TEST_SIZE_POURCENTAGE = 0.4
WINDOW_SIZE_SIMULATION = 10  # 30 mins window

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

### SEQUENTIAL TRAIN/TEST SPLIT


In [5]:
# Split for test set
split_idx_test = int(TEST_SIZE_POURCENTAGE * len(df_test_scaled))
test_set_intermediaire = df_test_scaled.iloc[:split_idx_test]

test_data = test_set_intermediaire.tail(15600).reset_index(drop=True)

print(f"✅ Forecast model-Testing samples: {test_data.shape}")

# ========================
# 4. SEQUENTIAL TRAIN/TEST SPLIT
# ========================
train_data = df_scaled
print(f"✅ Training samples: {len(train_data)}, Testing samples: {len(test_data)}")

# ========================
# 5. CREATE SEQUENCES
# ========================
def create_sequences(data, input_steps, forecast_steps):
    X, y = [], []
    for i in range(len(data) - input_steps - forecast_steps):
        X.append(data[i:i+input_steps])
        y.append(data[i+input_steps:i+input_steps+forecast_steps])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

X_train_seq, y_train_seq = create_sequences(train_data.values, INPUT_STEPS, FORECAST_STEPS)
X_test_seq, y_test_seq = create_sequences(test_data.values, INPUT_STEPS, FORECAST_STEPS)

print(f"✅ Training sequences: {X_train_seq.shape}, Testing sequences: {X_test_seq.shape}")

✅ Forecast model-Testing samples: (15600, 26)
✅ Training samples: 62174, Testing samples: 15600
✅ Training sequences: (62154, 10, 26), Testing sequences: (15580, 10, 26)


### BUILD INFORMER-LIKE MODEL


In [6]:
class MultiHeadProbSparseAttention(Layer):
    def __init__(self, num_heads=4, k=5, sampling_factor=5, **kwargs):
        super(MultiHeadProbSparseAttention, self).__init__(**kwargs)
        self.num_heads = num_heads
        self.k = k  # top-k values to keep
        self.sampling_factor = sampling_factor  # c in ln(L), typiquement 5

    def build(self, input_shape):
        self.embed_dim = input_shape[0][-1]
        if self.embed_dim % self.num_heads != 0:
            raise ValueError("embed_dim must be divisible by num_heads")
        self.depth = self.embed_dim // self.num_heads

        self.wq = tf.keras.layers.Dense(self.embed_dim)
        self.wk = tf.keras.layers.Dense(self.embed_dim)
        self.wv = tf.keras.layers.Dense(self.embed_dim)
        self.dense = tf.keras.layers.Dense(self.embed_dim)
        super().build(input_shape)

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])  # (batch, heads, seq_len, depth)

    def compute_sparsity_measure(self, scores):
        """Calcule M(q_i) = max_j(qi.kj/sqrt(d)) - mean_j(qi.kj/sqrt(d))"""
        max_scores = tf.reduce_max(scores, axis=-1)  # (batch, heads, seq_len)
        mean_scores = tf.reduce_mean(scores, axis=-1)  # (batch, heads, seq_len)
        return max_scores - mean_scores  # (batch, heads, seq_len)

    def call(self, inputs):
        q_input, k_input, v_input = inputs
        batch_size = tf.shape(q_input)[0]
        seq_len = tf.shape(q_input)[1]

        # Projections linéaires et split des heads
        q = self.wq(q_input)
        k = self.wk(k_input)
        v = self.wv(v_input)
        
        q = self.split_heads(q, batch_size)  # (batch, heads, seq_len, depth)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)

        # Calcul des scores d'attention
        scores = tf.matmul(q, k, transpose_b=True) / tf.sqrt(tf.cast(self.depth, tf.float32))  # (batch, heads, seq_len, seq_len)

        # Étape clé: ProbSparse via max-mean measurement
        sparsity_measure = self.compute_sparsity_measure(scores)  # (batch, heads, seq_len)
        u = self.sampling_factor * tf.math.log(tf.cast(seq_len, tf.float32))  # c * ln(L)
        u = tf.cast(u, tf.int32)
        u = tf.minimum(u, seq_len)
        
        # Sélection des top-u queries les plus "sparses"
        _, top_u_indices = tf.math.top_k(sparsity_measure, k=u)  # (batch, heads, u)
        top_u_indices = tf.sort(top_u_indices)  # Tri pour maintenir l'ordre temporel

        # Création d'un masque pour ne garder que les top-u queries
        mask = tf.reduce_sum(
            tf.one_hot(top_u_indices, depth=seq_len),
            axis=-2
        )  # (batch, heads, seq_len)
        mask = tf.cast(mask, tf.bool)

        # Application du masque aux scores
        sparse_scores = tf.where(
            mask[..., tf.newaxis],  # Étendre pour (batch, heads, seq_len, 1)
            scores,
            -1e9 * tf.ones_like(scores)
        )

        # Softmax et calcul de l'attention
        attention_weights = tf.nn.softmax(sparse_scores, axis=-1)
        scaled_attention = tf.matmul(attention_weights, v)  # (batch, heads, seq_len, depth)

        # Concaténation et projection finale
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.embed_dim))
        output = self.dense(concat_attention)
        return output

    def get_config(self):
        config = super().get_config()
        config.update({
            "num_heads": self.num_heads,
            "k": self.k,
            "sampling_factor": self.sampling_factor
        })
        return config

In [7]:
def build_informer_like_model(input_steps, forecast_steps, input_dim, embed_dim, num_heads, top_k):
    inputs = tf.keras.Input(shape=(input_steps, input_dim))
    x = tf.keras.layers.Dense(embed_dim)(inputs)

    sparse_attention = MultiHeadProbSparseAttention(num_heads=num_heads, k=top_k, sampling_factor=5)
    attn_out = sparse_attention([x, x, x])

    x = tf.keras.layers.Add()([x, attn_out])
    x = tf.keras.layers.LayerNormalization()(x)

    x = tf.keras.layers.Conv1D(filters=embed_dim, kernel_size=3, padding='same', activation='relu')(x)
    x = tf.keras.layers.Conv1D(filters=embed_dim, kernel_size=3, padding='same', activation='relu')(x)

    x = tf.keras.layers.RepeatVector(forecast_steps)(x[:, -1])
    x = tf.keras.layers.LSTM(embed_dim, return_sequences=True)(x)
    outputs = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(input_dim))(x)

    model = tf.keras.Model(inputs, outputs)
    model.compile(optimizer='adam', loss='mse')
    return model

### TRAINING + TUNING LOOP


In [9]:
best_val_mse = np.inf
best_model = None
history_records = []

for epochs in EPOCHS_LIST:
    for batch_size in BATCH_SIZES:
        for embed_dim in EMBED_DIMS:
            for num_heads in NB_HEADS:
                for top_k in TOP_K_LIST:
                    print(f"\n🔵 Training model with epochs={epochs}, batch_size={batch_size}")
                    
                    model = build_informer_like_model(INPUT_STEPS, FORECAST_STEPS, X_train_seq.shape[2], embed_dim, num_heads, top_k)
                    es = EarlyStopping(patience=5, restore_best_weights=True)

                    history = model.fit(X_train_seq, y_train_seq, 
                                        validation_split=0.1,
                                        epochs=epochs, 
                                        batch_size=batch_size,
                                        callbacks=[es],
                                        verbose=1,
                                    shuffle=False)
                    
                    # Evaluate on validation
                    val_preds = model.predict(X_test_seq, batch_size=batch_size)
                    val_mse = mean_squared_error(y_test_seq.reshape(-1), val_preds.reshape(-1))
                    val_mae = mean_absolute_error(y_test_seq.reshape(-1), val_preds.reshape(-1))

                    print(f"✅ Validation MSE: {val_mse:.5f}, MAE: {val_mae:.5f}")

                    history_records.append({
                            "epochs": epochs,
                            "batch_size": batch_size,
                            "EMBED_DIMS": embed_dim, 
                            "NB_HEADS": num_heads,
                            "TOP_K_LIST": top_k,
                            "val_mse": val_mse,
                            "val_mae": val_mae
                        })
                    
                    # Save the best model
                    if val_mse < best_val_mse:
                        best_val_mse = val_mse
                        best_model = model

# Save all results
history_df = pd.DataFrame(history_records)
history_df.to_csv("informer_tuning_history.csv", index=False)
print("\n📋 Tuning Results Summary:")
print(history_df)

# Save best model
best_model.save("best_informer_forecaster.h5")
print("\n✅ Best model saved: best_informer_forecaster.h5")


🔵 Training model with epochs=20, batch_size=1024
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
✅ Validation MSE: 0.07891, MAE: 0.20379

📋 Tuning Results Summary:
   epochs  batch_size  EMBED_DIMS  NB_HEADS  TOP_K_LIST   val_mse   val_mae
0      20        1024         128         4           5  0.078906  0.203791

✅ Best model saved: best_informer_forecaster.h5


In [10]:
K.clear_session()
gc.collect()

27866

### REAL-TIME SIMULATION ON TEST SET

In [11]:
simulation_X, simulation_y = create_sequences(test_data.values, INPUT_STEPS, FORECAST_STEPS)

forecast_list = []
true_windows = []

for i in range(0, len(simulation_X), WINDOW_SIZE_SIMULATION):
    window_X = simulation_X[i:i+1]
    window_y_true = simulation_y[i]

    y_pred_future = best_model.predict(window_X,batch_size=1024, verbose=1)[0]

    forecast_list.append(y_pred_future)
    true_windows.append(window_y_true)

print("\n✅ Real-time simulation complete.")


✅ Real-time simulation complete.


### EVALUATION

In [13]:
# Forecasting metrics
y_pred_all = np.vstack(forecast_list)
y_true_all = np.vstack(true_windows)

forecast_mse = mean_squared_error(y_true_all.reshape(-1), y_pred_all.reshape(-1))
forecast_mae = mean_absolute_error(y_true_all.reshape(-1), y_pred_all.reshape(-1))

print(f"\n📈 Forecasting Evaluation on Test:")
print(f"MSE: {forecast_mse:.5f}")
print(f"MAE:  {forecast_mae:.5f}")

# ========================
# SAVE METRICS
# ========================

metrics_results = {
    "Model": "Informer",
    "Forecast_MSE": forecast_mse,
    "Forecast_MAE": forecast_mae
}


# Save tuning history
metrics_df = pd.DataFrame([metrics_results])
metrics_df.to_csv("informer_test_evaluation.csv", index=False)
print("\n📋 Test Results Summary:")
print(metrics_df)


📈 Forecasting Evaluation on Test:
MSE: 0.07892
MAE:  0.20381

📋 Test Results Summary:
      Model  Forecast_MSE  Forecast_MAE
0  Informer      0.078921      0.203807
