# Step 3: LSTM Forecasting + Residual Anomaly Detection

In this section we:

1. Load our scaled data and the original `MinMaxScaler`  
2. Build Keras `TimeseriesGenerator`s  
3. Define & train an LSTM model  
4. Forecast & evaluate (MAE, RMSE, MAPE, directional accuracy)  
5. Flag anomalies via a 3σ threshold on residuals

In [1]:
# ─────────────────────────────────────────────────────────────────────────────
# 3.1 Imports & Data Loading

import os
import sys
import joblib
from tqdm import tqdm
import itertools
import numpy as np
import pandas as pd
from datetime import datetime

import warnings

# Silence that specific PyDataset warning
warnings.filterwarnings(
    "ignore",
    category=UserWarning
)

# Full suppression of GPU logs at OS level
sys.stderr = open(os.devnull, "w")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"

from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error
)

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.feature_selection import mutual_info_regression
from sklearn.ensemble import RandomForestRegressor

import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping, Callback
from tensorflow.keras.utils import to_categorical, Sequence
import tensorflow.keras.backend as K

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from utils.utilities import save_model_outputs



ABS_PATH      = "/home/tristan/Desktop/SpringBoard/Projects/Martian-Weather-Analysis/"
DATA_PATH     = os.path.join(ABS_PATH, "data/processed/")
MODEL_PATH    = os.path.join(ABS_PATH, "models/")
LOG_PATH_ROOT = os.path.join(ABS_PATH, "logs/")
lstm_log_path = os.path.join(LOG_PATH_ROOT, "lstm_training.log")

E0000 00:00:1746699399.035441 2251644 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746699399.039142 2251644 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746699399.048894 2251644 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746699399.048901 2251644 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746699399.048902 2251644 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746699399.048903 2251644 computation_placer.cc:177] computation placer already registered. Please check linka

In [2]:
class CombinedSequence(Sequence):
    def __init__(self, reg_gen, clf_gen):
        self.reg_gen = reg_gen
        self.clf_gen = clf_gen
        assert len(self.reg_gen) == len(self.clf_gen)
    def __len__(self):
        return len(self.reg_gen)
    def __getitem__(self, idx):
        x, y_reg = self.reg_gen[idx]
        _, y_clf = self.clf_gen[idx]
        return x, {"forecast": y_reg, "direction": y_clf}

In [3]:
# Load scaled data
'''Note: 
These data ssets were scaled on all the features at once. 
This could potentially posse an issue later on.
It might be worth simply reprocessing this data and adding it to the preprocessing phase notebook later.'''
X_scaled_train = pd.read_csv(os.path.join(DATA_PATH, "scaled_train.csv"), index_col="sol_number")
X_scaled_test  = pd.read_csv(os.path.join(DATA_PATH, "scaled_test.csv"),  index_col="sol_number")
X_unscaled_test= pd.read_csv(os.path.join(DATA_PATH, "unscaled_test.csv"),index_col="sol_number")

# Raw train/test target for metrics & direction labels
y_train_raw = pd.read_csv(os.path.join(DATA_PATH, "unscaled_train.csv"),
                          index_col="sol_number")["avg_ground_temp"].values
y_test_raw  = X_unscaled_test["avg_ground_temp"].values

# Precompute rise/fall/no-change labels for the ENTIRE train & test series
train_diff   = np.sign(np.diff(y_train_raw,  prepend=y_train_raw[0]))
train_dir_lbl= to_categorical(
    np.where(train_diff>0, 0, np.where(train_diff<0, 1, 2)),
    num_classes=3
)

test_diff    = np.sign(np.diff(y_test_raw,   prepend=y_test_raw[0]))
test_dir_lbl = to_categorical(
    np.where(test_diff>0,  0, np.where(test_diff<0, 1, 2)),
    num_classes=3
)

# Load target variable scaler object
y_scaler_target = joblib.load(os.path.join(MODEL_PATH, "y_target_scaler.pkl"))


In [4]:
# Prepare feature and target arrays
DROPPED = ["avg_ground_temp"]
FEATURE_COLS = [c for c in X_scaled_train.columns if c not in DROPPED]

feature_sets = {
    "uni":      [],  # pure univariate
    "multi":    FEATURE_COLS
}

param_grid = [
    (seq, units, drp, name, cols)
    for seq in (10,15,20,25,30,60)
    for units in (5,10,15,20,25,32,50,75)
    for drp in (0.1,0.2,0.3)
    for name, cols in feature_sets.items()
]

n_models = len(param_grid)

# Prepare log
grid_log = open(os.path.join(LOG_PATH_ROOT, "lstm_grid_search.log"), "w")
grid_log.write("LSTM Grid Search Log\n" + "="*60 + "\n")
grid_log.write(f"Started: {datetime.now():%Y-%m-%d %H:%M:%S}\n")


param_grid = [
    (seq, units, drp, name, cols)
    for seq in (10,15,20,25,30,60)
    for units in (5,10,15,20,25,32,50,75)
    for drp in (0.1,0.2,0.3)
    for name, cols in feature_sets.items()
]

n_models = len(param_grid)

# Prepare log
grid_log = open(os.path.join(LOG_PATH_ROOT, "lstm_grid_search.log"), "w")
grid_log.write("LSTM Grid Search Log\n" + "="*60 + "\n")
grid_log.write(f"Started: {datetime.now():%Y-%m-%d %H:%M:%S}\n")
grid_log.write(f"Total models: {n_models}\n\n")

19

In [5]:
# Track best model
best_model      = None
best_params     = None
best_mae        = np.inf
best_rmse       = np.inf
best_dir_acc    = 0
best_score      = np.inf  # Combined score: lower is better

 # Add smape
def smape(y_true, y_pred):
    return 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred) + 1e-8))

# Grid search
for seq_len, units, dropout, fs_name, cols in tqdm(
    param_grid,
    desc="LSTM Grid Search",
    ncols=80,
    unit="model",
    leave=True,
    file=sys.stdout
):
    tf.keras.backend.clear_session()

    # Prepare targets
    y_train_scaled = y_scaler_target.transform(y_train_raw.reshape(-1, 1)).flatten()
    y_test_scaled  = y_scaler_target.transform(y_test_raw.reshape(-1, 1)).flatten()

    # Prepare feature arrays and enforce float32
    if cols:
        X_tr = X_scaled_train[cols].values.astype(np.float32)
        X_te = X_scaled_test[cols].values.astype(np.float32)
    else:
        # Use avg_ground_temp as the univariate feature
        X_tr = X_scaled_train[["avg_ground_temp"]].values.astype(np.float32)
        X_te = X_scaled_test[["avg_ground_temp"]].values.astype(np.float32)
    assert X_tr.dtype == np.float32 and X_te.dtype == np.float32, "Data must be float32"
    assert not np.isnan(X_tr).any() and not np.isnan(X_te).any(), "Input data contains NaNs"


    # Timeseries Generators + CombinedSequence
    reg_train = TimeseriesGenerator(X_tr, y_train_scaled, length=seq_len, batch_size=32)
    clf_train = TimeseriesGenerator(X_tr, train_dir_lbl,  length=seq_len, batch_size=32)
    reg_test  = TimeseriesGenerator(X_te, y_test_scaled,  length=seq_len, batch_size=32)
    clf_test  = TimeseriesGenerator(X_te, test_dir_lbl,   length=seq_len, batch_size=32)
    train_seq = CombinedSequence(reg_train, clf_train)
    test_seq  = CombinedSequence(reg_test,  clf_test)

    # Build model: CuDNN-compatible LSTM
    inp     = Input(shape=(seq_len, X_tr.shape[1]))
    x       = LSTM(
        units,
        activation="tanh",
        recurrent_activation="sigmoid",
        use_bias=True,
        unroll=False,
        return_sequences=False,
        dropout=0.0  # Must be 0 for GPU CuDNN compatibility
    )(inp)
    x       = Dropout(dropout)(x)  # External dropout layer
    out_reg = Dense(1, activation=None,      name="forecast")(x)
    out_clf = Dense(3, activation="softmax", name="direction")(x)
    model   = Model(inp, [out_reg, out_clf])
    model.compile(
        optimizer="adam",
        loss={"forecast": "mse", "direction": "categorical_crossentropy"},
        metrics={"forecast": "mae", "direction": "accuracy"}
    )

    es = EarlyStopping(monitor="val_forecast_loss", mode="min", patience=3, restore_best_weights=True)
    model.fit(train_seq, validation_data=test_seq, epochs=30, callbacks=[es], verbose=0)

    # Evaluate
    y_pred_scaled, _ = model.predict(test_seq, verbose=0)
    y_pred = y_scaler_target.inverse_transform(y_pred_scaled).flatten()
    y_true = y_test_raw[seq_len:]
    mae    = mean_absolute_error(y_true, y_pred)
    rmse   = np.sqrt(np.mean((y_true - y_pred) ** 2))
    smape_ = smape(y_true, y_pred)

    preds_dir = np.argmax(model.predict(test_seq, verbose=0)[1], axis=1)
    true_dir  = np.argmax(test_dir_lbl[seq_len:], axis=1)
    dir_acc   = (preds_dir == true_dir).mean() * 100

    # Combined selection score
    score = mae + 0.02 * (100 - dir_acc)

    # Log the result
    grid_log.write(
        f"{fs_name:6s} | seq={seq_len:<2d} | units={units:<3d} | drop={dropout:<4.2f} "
        f"→ MAE={mae:.3f} °C | RMSE={rmse:.3f} °C | DirAcc={dir_acc:5.1f}% | Score={score:.3f}\n"
    )
    grid_log.flush()

    # Save best if improved
    if score < best_score:
        best_score   = score
        best_model   = model
        best_mae     = mae
        best_rmse    = rmse
        best_dir_acc = dir_acc
        best_smape   = smape_
        best_params  = (seq_len, units, dropout, fs_name, cols)
        best_y_pred  = y_pred
        best_y_test  = y_true


# Finalize log
grid_log.close()

# Save best model
best_model_path = os.path.join(MODEL_PATH, "best_lstm_model.keras")
best_model.save(best_model_path)

# Print best config
seq_len, units, dropout, fs_name, _ = best_params
print("\nBest LSTM Configuration")
print("="*40)
print(f"Feature Set     : {fs_name}")
print(f"Sequence Length : {seq_len}")
print(f"LSTM Units      : {units}")
print(f"Dropout Rate    : {dropout:.2f}")
print(f"MAE             : {best_mae:.3f} °C")
print(f"RMSE            : {best_rmse:.3f} °C")
print(f"sMAPE           : {best_smape:.2f} %")
print(f"Dir. Accuracy   : {best_dir_acc:.2f} %")
print(f"Selection Score : {best_score:.3f}")
print("="*40)


LSTM Grid Search: 100%|████████████████████| 288/288 [26:57<00:00,  5.62s/model]

Best LSTM Configuration
Feature Set     : uni
Sequence Length : 15
LSTM Units      : 25
Dropout Rate    : 0.10
MAE             : 1.707 °C
RMSE            : 2.870 °C
sMAPE           : 4.05 %
Dir. Accuracy   : 46.56 %
Selection Score : 2.776


In [8]:
# Save details of model for later use
save_model_outputs(
    model_name="LSTM",
    y_true=best_y_test,
    y_pred=best_y_pred,
    metrics={"mae":            best_mae, 
             "rmse":           best_rmse,
             "smape":          best_smape,
             "Dir. Accuracy ": best_dir_acc}
)

Saved outputs for LSTM to model_outputs/


## 3.4 Forecasting & Metrics

We’ll predict on `test_gen`, invert the scaling, then compute MAE, RMSE, MAPE, and directional accuracy.
