In [None]:
# === 0. Imports ===
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import joblib

# === 1. Load Models ===
forecast_model = tf.keras.models.load_model("best_lstm_seq2seq_forecaster.h5")
detection_model = tf.keras.models.load_model("best_transformer_autoencoder.h5")

# === 2. Load Scalers ===
forecast_scaler = joblib.load("forecast_scaler.pkl")
detection_scaler = joblib.load("detection_scaler.pkl")

# === 3. Load Full Test Set ===
file_path = "../../data/test_sequences/full_test.csv"
df = pd.read_csv(file_path)

if 'DateTime' in df.columns:
    df['DateTime'] = pd.to_datetime(df['DateTime'])
    df.set_index('DateTime', inplace=True)

# === 4. Save and Drop Labels ===
INPUT_STEPS = 10
FORECAST_STEPS = 10

if 'labels' in df.columns:
    ground_truth_labels = df['labels'].values[INPUT_STEPS + FORECAST_STEPS - 1:]
    df = df.drop(columns=['labels'])
else:
    ground_truth_labels = None

# === 5. Normalize for Forecast ===
scaled_forecast = forecast_scaler.transform(df.values)
df_scaled_forecast = pd.DataFrame(scaled_forecast, index=df.index, columns=df.columns)

# === 6. Create Forecasting Sequences ===
def create_forecast_sequences(data, input_steps, forecast_steps):
    X, y = [], []
    for i in range(len(data) - input_steps - forecast_steps + 1):
        X.append(data[i:i+input_steps])
        y.append(data[i+input_steps:i+input_steps+forecast_steps])
    return np.array(X), np.array(y)

X_forecast_seq, y_true_forecast_seq = create_forecast_sequences(df_scaled_forecast.values, INPUT_STEPS, FORECAST_STEPS)

# === 7. Forecast ===
y_pred = forecast_model.predict(X_forecast_seq)







# === 8. Evaluate Forecasting ===
y_pred_flat = y_pred.reshape(-1, y_pred.shape[2])
y_true_flat = y_true_forecast_seq.reshape(-1, y_true_forecast_seq.shape[2])

# ⚠️ Calculate MSE and MAE in normalized space (same as training)
forecast_mse = mean_squared_error(y_true_flat, y_pred_flat)
forecast_mae = mean_absolute_error(y_true_flat, y_pred_flat)

print(f"📊 Forecast Evaluation (on normalized data):")
print(f"   - MSE: {forecast_mse:.5f}")
print(f"   - MAE: {forecast_mae:.5f}")

# === 9. De-normalize Forecast Output for Detection Phase ===
y_pred_denorm = forecast_scaler.inverse_transform(y_pred_flat)


# === 9. Detection Phase on Forecast Output ===
scaled_for_detection = detection_scaler.transform(y_pred_denorm)

X_detect_seq = np.array([
    scaled_for_detection[i:i+INPUT_STEPS]
    for i in range(len(scaled_for_detection) - INPUT_STEPS)
])

reconstructed = detection_model.predict(X_detect_seq)
reconstruction_errors = np.mean((X_detect_seq - reconstructed) ** 2, axis=(1, 2))

# === 10. Thresholding ===
MANUAL_PERCENTILE = 90
threshold = np.percentile(reconstruction_errors, MANUAL_PERCENTILE)
anomaly_flags = (reconstruction_errors > threshold).astype(int)

print(f"\n🚨 Anomaly Detection Summary:")
print(f"   - Threshold: {threshold:.6f}")
print(f"   - Detected Anomalies: {anomaly_flags.sum()} / {len(anomaly_flags)}")

# === 11. Evaluate Detection (if ground truth exists) ===
if ground_truth_labels is not None and len(ground_truth_labels) == len(anomaly_flags):
    precision = precision_score(ground_truth_labels, anomaly_flags)
    recall = recall_score(ground_truth_labels, anomaly_flags)
    f1 = f1_score(ground_truth_labels, anomaly_flags)
    print("\n🎯 Detection Evaluation:")
    print(f"   - Precision: {precision:.4f}")
    print(f"   - Recall:    {recall:.4f}")
    print(f"   - F1-score:  {f1:.4f}")
else:
    print("\n⚠️ Ground truth labels not available or length mismatch for detection evaluation.")

# === 12. Optional Plot ===
plt.figure(figsize=(14, 5))
plt.plot(reconstruction_errors, label="Reconstruction Error")
plt.axhline(y=threshold, color='r', linestyle='--', label="Threshold")
plt.title("Reconstruction Error over Sliding Windows")
plt.xlabel("Sliding Window Index")
plt.ylabel("MSE Error")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations




ValueError: X has 26 features, but MinMaxScaler is expecting 25 features as input.

In [None]:
input_seq.shape