In [2]:
# === 0. Imports ===
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import joblib

# === 1. Load Models ===
forecast_model = tf.keras.models.load_model("best_lstm_seq2seq_forecaster.h5")
detection_model = tf.keras.models.load_model("best_transformer_autoencoder.h5")

# === 2. Load Scalers ===
forecast_scaler = joblib.load("forecast_scaler.pkl")
detection_scaler = joblib.load("detection_scaler.pkl")

# === 3. Load Input CSV ===
file_path = "../../data/test_sequences/sequence_1_len10.csv"  # <-- your input file
df = pd.read_csv(file_path)

if 'DateTime' in df.columns:
    df['DateTime'] = pd.to_datetime(df['DateTime'])
    df.set_index('DateTime', inplace=True)

if 'labels' in df.columns:
    df = df.drop(columns=['labels'])

# === 4. Normalize Input for Forecast Model ===
scaled_forecast = forecast_scaler.transform(df.values)
df_scaled_forecast = pd.DataFrame(scaled_forecast, index=df.index, columns=df.columns)y

# === 5. Create Input Sequence ===
INPUT_STEPS = 10
FORECAST_STEPS = 10

input_seq = df_scaled_forecast.values[-INPUT_STEPS:]  # last 10
input_seq = np.expand_dims(input_seq, axis=0)

# === 6. Forecast ===
y_pred = forecast_model.predict(input_seq)

# === 7. De-normalize Forecast to Real World ===
y_pred_reshaped = y_pred.reshape(-1, y_pred.shape[2])
denorm = forecast_scaler.inverse_transform(y_pred_reshaped)
denorm_df = pd.DataFrame(denorm, columns=df.columns)

# === 8. Re-normalize for Detection Model ===
scaled_for_detection = detection_scaler.transform(denorm)
scaled_for_detection = np.expand_dims(scaled_for_detection, axis=0)

# === 9. Anomaly Detection ===
reconstructed = detection_model.predict(scaled_for_detection)
reconstruction_errors = np.mean((scaled_for_detection - reconstructed) ** 2, axis=(1, 2))

# === 10. Thresholding ===
MANUAL_PERCENTILE = 90
threshold = np.percentile(reconstruction_errors, MANUAL_PERCENTILE)
anomaly_flag = int(reconstruction_errors[0] > threshold)

# === 11. Add Timestamps ===
time_step_minutes = int((df.index[1] - df.index[0]).total_seconds() // 60)
start_time = df.index[-1] + pd.Timedelta(minutes=time_step_minutes)
timestamps = [start_time + pd.Timedelta(minutes=i * time_step_minutes) for i in range(FORECAST_STEPS)]
denorm_df.insert(0, "DateTime", timestamps)

# === 12. Display Results ===
print("📈 Forecasted Values:")
display(denorm_df)

print(f"🔍 Reconstruction Error: {reconstruction_errors[0]:.6f}")
print(f"🚨 Anomaly Detected: {'Yes' if anomaly_flag else 'No'} (Threshold: {threshold:.6f})")

# === 13. Optional Plot ===
plt.figure(figsize=(14, 5))
for col in df.columns[:3]:  # first 3 variables
    plt.plot(denorm_df['DateTime'], denorm_df[col], label=col)
plt.title("Forecasted Sequence")
plt.xlabel("Time")
plt.ylabel("Value")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations




ValueError: X has 26 features, but MinMaxScaler is expecting 25 features as input.

In [None]:
input_seq.shape