# 🔍 Hybrid State-Based LSTM Inference Pipeline

This notebook loads separate LSTM models trained per machine state (`run`, `idle`, `off`), runs reconstruction-based anomaly detection, and combines the results across all states.

## 📦 Step 1: Import Libraries

In [7]:

import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt


## 📥 Step 2: Load Reshaped Sensor Data

In [8]:

df = pd.read_csv("../test_csv/motor_monitor_0_reshaped.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.head()


Unnamed: 0,timestamp,power,rpm,temperature,vibration,is_anomaly,state,label
0,2025-04-07 00:00:00,5.022329,2.653643,22.0,7.523581,False,off,Normal
1,2025-04-07 00:01:00,5.077981,2.22689,22.0,7.32466,False,off,Normal
2,2025-04-07 00:02:00,4.833379,2.293508,22.0,7.432946,False,off,Power_outage
3,2025-04-07 00:03:00,6.071584,2.802438,22.0,7.827574,False,off,Normal
4,2025-04-07 00:04:00,4.644964,2.028206,22.0,7.241896,False,off,Power_outage


## 🧰 Step 3: Utility Functions

In [9]:

def sliding_windows(data, seq_length=30):
    return np.array([data.iloc[i:(i + seq_length)].values for i in range(len(data) - seq_length)])

def detect_anomalies(df_subset, model_path, feature_cols, threshold_percentile=95):
    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(df_subset[feature_cols])
    sequences = sliding_windows(pd.DataFrame(scaled))
    if len(sequences) == 0:
        return pd.Series([False] * len(df_subset))
    
    model = load_model(model_path, compile=False)
    reconstructions = model.predict(sequences)
    loss = np.mean((reconstructions - sequences) ** 2, axis=(1, 2))
    threshold = np.percentile(loss, threshold_percentile)
    
    # create anomaly flags aligned with original dataframe
    flags = np.array([False] * len(df_subset))
    flags[30:len(loss)+30] = loss > threshold
    return pd.Series(flags, index=df_subset.index)


## 🧠 Step 4: Run Anomaly Detection Per State

In [10]:

df['is_anomaly'] = False
feature_cols = ['power', 'rpm', 'temperature', 'vibration']

state_model_map = {
    'run': "../test_cases/saved_models/lstm_run.h5",
    'idle': "../test_cases/saved_models/lstm_idle.h5",
    'off': "../test_cases/saved_models/lstm_off.h5"
}

for state, model_path in state_model_map.items():
    print(f"Processing state: {state}")
    df_state = df[df['state'] == state].copy()
    df.loc[df_state.index, 'is_anomaly'] = detect_anomalies(df_state, model_path, feature_cols)


Processing state: run
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
Processing state: idle
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 133ms/step
Processing state: off
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 93ms/step


## 📊 Step 5: Review or Save Results

In [12]:

print("Anomaly counts by state:")
print(df.groupby('state')['is_anomaly'].sum())

# Optionally, save results
df.to_csv("../test_cases/results/results_state_lstm_inference.csv", index=False)


Anomaly counts by state:
state
idle     4
off      6
run     58
Name: is_anomaly, dtype: int64


## 🤖 Step 6: Classify Anomalies Using Random Forest

In [13]:

import joblib
from sklearn.metrics import classification_report

# Load trained Random Forest model
rf_model = joblib.load("../test_cases/saved_models/random_forest.pkl")

# One-hot encode 'state' again to match training format
df_encoded = pd.get_dummies(df, columns=["state"])
for col in ["state_off", "state_run", "state_idle"]:
    if col not in df_encoded.columns:
        df_encoded[col] = 0

# Filter anomaly rows
anomaly_df = df_encoded[df_encoded["is_anomaly"] == True].copy()

# Features used by the Random Forest model
features = ['power', 'rpm', 'temperature', 'vibration', 'state_off', 'state_run', 'state_idle']

# Check if label exists for evaluation
if 'label' in anomaly_df.columns:
    X_anomaly = anomaly_df[features]
    y_true = anomaly_df['label']
    y_pred = rf_model.predict(X_anomaly)

    print("🔍 Classification Report (for detected anomalies):")
    print(classification_report(y_true, y_pred))
else:
    print("⚠️ 'label' column not found in data. Cannot compute classification report.")


🔍 Classification Report (for detected anomalies):
                    precision    recall  f1-score   support

            Normal       0.98      1.00      0.99        63
      Power_outage       1.00      1.00      1.00         1
Vibration_increase       1.00      0.75      0.86         4

          accuracy                           0.99        68
         macro avg       0.99      0.92      0.95        68
      weighted avg       0.99      0.99      0.98        68



Feature names must be in the same order as they were in fit.

