# Deep Learning for Anomaly Detection on Machine Temperature Data

This notebook demonstrates how to build and evaluate an LSTM Autoencoder for anomaly detection using the NAB machine temperature dataset.  
We train the model on normal operating data and use reconstruction errors to flag anomalies.  
The evaluation covers both point-level detection accuracy and anomaly window–based metrics, reflecting industrial needs for early fault detection.


## Setup

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import sys
import os
import pathlib

repo_root = pathlib.Path.cwd().resolve().parents[1]  
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

# utils from this repo
from configs.config import DEFAULTS
from utils.data_loader import load_dataset
from utils.detectors_dl import train_autoencoder, ae_scores, windows_to_pointwise_last
from utils.visualizations import plot_methods_subplots, shade_windows
from utils.evaluation import evaluate_predictions

def adapt_config(defaults):
    cfg = {"stats_global":{}, "stats_local":{}, "stats_sequential":{}, "stats_trend":{}}
    for section, sub in defaults.items():
        if section not in cfg:
            continue
        for key, opts in sub.items():
            if isinstance(opts, dict) and "enabled" in opts:
                if opts["enabled"]:
                    cfg[section][key] = {k:v for k,v in opts.items() if k != "enabled"}
    return cfg

config = adapt_config(DEFAULTS)

In [2]:
torch.manual_seed(42)

In [3]:
dfs, merged, windows = load_dataset("machine_temperature")
df = dfs[0]

## Configuration
Read hyperparameters from your utils.config.DEFAULTS_machine_temperature. 
Tune sequence length, model size, epochs, and threshold quantile if needed.

In [4]:
# Pull DL + threshold params from your config defaults
cfg = DEFAULTS

SEQ_LEN = cfg["dl"]["autoencoder"]["seq_len"]
BATCH = cfg["dl"]["autoencoder"]["batch_size"]
EPOCHS = cfg["dl"]["autoencoder"]["epochs"]
HID = cfg["dl"]["autoencoder"]["hidden"]
LAYERS = cfg["dl"]["autoencoder"]["layers"]
DROP = cfg["dl"]["autoencoder"]["dropout"]
DECAY = cfg["dl"]["autoencoder"]["weight_decay"]
LR = cfg["dl"]["autoencoder"]["lr"]
EPOCHS = cfg["dl"]["autoencoder"]["epochs"]
PATIENCE = cfg["dl"]["autoencoder"]["patience"]


print(f"seq_len={SEQ_LEN}, batch={BATCH}, epochs={EPOCHS}, hidden={HID}, layers={LAYERS},\
       dropout={DROP}, DCCAY={DECAY}, lr={LR}, epochs={EPOCHS}, patience={PATIENCE}")


## Preprocessing

The model is trained only on normal segments (outside failure windows). This ensures the autoencoder is trained solely on normal, healthy behavior, preventing it from learning patterns associated with failures and preserving the model’s ability to generalize to unseen anomalies.



In [5]:
def in_any_failure_window(ts, windows):
    for s, e in windows:
        if s <= ts <= e:
            return True
    return False

# Mark timestamps inside windows
df["in_window"] = df["timestamp"].apply(lambda t: in_any_failure_window(t, windows))

In [6]:
# Split by time (70/15/15) to simulate realistic streaming
n = len(df)
t1 = int(0.7 * n)
t2 = int(0.85 * n)

train_df = df.iloc[:t1].copy()
val_df   = df.iloc[t1:t2].copy()
test_df  = df.iloc[t2:].copy()

In [7]:
print(train_df["in_window"].sum())
print(val_df["in_window"].sum())
print(test_df["in_window"].sum())

failure window in all 3 splits 

In [8]:
print(min(train_df['timestamp']), max(train_df['timestamp']))
print(min(val_df['timestamp']), max(val_df['timestamp']))
print(min(test_df['timestamp']), max(test_df['timestamp']))

In [9]:
def make_sequences(df, seq_len, value_col, use_only_normal=True):
    x = df[value_col].to_numpy(dtype=np.float32)
    ts = df["timestamp"].to_numpy()
    inw = df["in_window"].to_numpy(dtype=bool)

    seqs = []
    idxs = []
    for i in range(len(df) - seq_len + 1):
        seg_inw = inw[i:i+seq_len]
        if use_only_normal and seg_inw.any():
            continue
        seq = x[i:i+seq_len].copy()
        seqs.append(seq[:, None])  # shape (seq_len, 1)
        idxs.append(i + seq_len - 1)  # align to last timestamp for scoring
    return np.stack(seqs) if len(seqs)>0 else np.empty((0, seq_len, 1), dtype=np.float32), np.array(idxs)

In [10]:
Xtr, tr_idx = make_sequences(train_df, SEQ_LEN, value_col ='machine_temperature_system_failure', use_only_normal=True)
Xva, va_idx = make_sequences(val_df,   SEQ_LEN, value_col ='machine_temperature_system_failure', use_only_normal=True)
Xte, te_idx = make_sequences(test_df,  SEQ_LEN, value_col ='machine_temperature_system_failure', use_only_normal=False)

print("Train seqs:", Xtr.shape, "Val seqs:", Xva.shape, "Test seqs:", Xte.shape)

Normalization
To avoid data leakage, the scaler is fit only on the clean training data. The same scaler is then applied to both training and test sets to maintain consistency.

In [11]:
# Standardize using training stats
mu = Xtr.mean()
sd = Xtr.std() if Xtr.std() > 0 else 1.0
Xtr_n = (Xtr - mu)/sd
Xva_n = (Xva - mu)/sd
Xte_n = (Xte - mu)/sd
mu, sd

## Training Autoencoder 

In [12]:
n_features = 1
model, history = train_autoencoder(Xtr_n, Xva_n, n_features, cfg["dl"]["autoencoder"], verbose=True)

# Optional: quick look at losses
print("Last train loss:", history["train_loss"][-1] if history["train_loss"] else None)
print("Last val   loss:", history["val_loss"][-1] if history["val_loss"] else None)


## Anomaly dection 

In [13]:
# 1) Choose a reduction 
REDUCE = "last_t"  # or 'time_feature_mean' / 'time_feature_max' / 'time_feature_q

# 2) Score validation windows and pick a high-quantile threshold 
val_scores = ae_scores(model, Xva_n, cfg, reduce=REDUCE)   
thr = torch.quantile(val_scores, 0.995).item()  

# 3) Score test windows
test_scores = ae_scores(model, Xte_n, cfg, reduce=REDUCE) 

# 4) Map window scores to the original timeline (assign to last index of each window)
point_scores = windows_to_pointwise_last(test_scores, SEQ_LEN, total_len=len(df), offset=t2)

# 5) Threshold → boolean anomaly series
point_mask = (point_scores > thr).astype(bool)

In [None]:
test_scores

In [None]:
val_scores = ae_scores(model, dl_val, cfg, reduce="last_t")

In [None]:
preds = predict_batched(model, Xte_n, cfg, dtype=torch.float32)

In [None]:
preds.mean(dim=(1,2))

In [None]:
pred_temp = np.full(len(test_df), np.nan, dtype=float)
pred_temp[SEQ_LEN-1:] = preds.mean(axis=(1,2)).numpy() * sd + mu
test_df['preds']= pred_temp

In [None]:
fig, ax = plt.figure(figsize=(14,3))
ax.plot(test_df['timestamp'], test_df['machine_temperature_system_failure'], label='Temperature')
ax.plot(test_df['timestamp'], test_df['preds'], label='Predicted Temperature')
shade_windows(ax, windows, color="red", alpha=0.25, first_label="Failure Time")
plt.legend()

## Anomaly Detection

In [None]:
# Training
X_train_pred = reconstruct(model, Xtr_n, device=cfg['dl']["autoencoder"]["device"])
train_err = reconstruction_error(Xtr, X_train_pred, reduce="time_feature_mean")

# Validation
X_val_pred = reconstruct(model, Xva_n,  device=cfg['dl']["autoencoder"]["device"])
val_err = reconstruction_error(Xva_n, X_val_pred, reduce="time_feature_mean")

In [None]:
threshold = np.quantile(list(val_err)+list(test_err), 0.9)
mask_val  = (val_err  > threshold)
mask_test = (test_err > threshold)

print(threshold)


In [None]:
len(df[df.index>t1]) - len(va_idx)- len(te_idx)

In [None]:
print(val_err)
print(len(train_err))

In [None]:
mask = np.zeros(len(df), dtype=bool)
for i, err in enumerate(val_err):
    if err> threshold:
        mask[va_idx[i] + len(tr_idx)] = True
    else:
        mask[va_idx[i] + len(tr_idx)] = False
        
for i, err in enumerate(test_err):
    if err> threshold:
        mask[te_idx[i] + len(tr_idx) + len(va_idx)] = True
    else:
        mask[te_idx[i] + len(tr_idx) + len(va_idx)] = False


In [None]:
methods_preds = {'LSTM Autoencoder': mask}

plot_methods_subplots(df, methods_preds, value_col="machine_temperature_system_failure", windows=windows)

In [None]:
val_test_df = df[df.index>t1]
val_test_pred = methods_preds['LSTM Autoencoder'][len(tr_idx):]

In [None]:
eval_results = {'LSTM Autoencoder': evaluate_predictions(val_test_df, val_test_pred , windows, early_tolerance="5h")}
        
print(pd.DataFrame(eval_results).T.round(3))