# 🔍 Inference with Saved State-LSTM + Random Forest Classification

This notebook loads previously saved state-based LSTM models, detects anomalies, and classifies the detected anomalies using a pre-trained Random Forest model.

## 📦 Step 1: Import Libraries

In [None]:

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
import joblib
from tensorflow.keras.models import load_model


## 📥 Step 2: Load and Prepare Reshaped Data

In [None]:

df = pd.read_csv("../test_csv/motor_monitor_0_reshaped.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.head()


## ⚙️ Step 3: Define Sliding Window Anomaly Detector

In [None]:

def create_sequences(data, seq_length=30):
    return np.array([data.iloc[i:i+seq_length].values for i in range(len(data)-seq_length)])

def detect_state_anomalies(df_state, model_path, feature_cols):
    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(df_state[feature_cols])
    sequences = create_sequences(pd.DataFrame(scaled))
    
    model = load_model(model_path)
    reconstructions = model.predict(sequences)
    errors = np.mean((reconstructions - sequences) ** 2, axis=(1, 2))
    
    threshold = np.percentile(errors, 95)
    print(f"Threshold for {model_path}: {threshold:.6f}")
    
    flags = np.array([False] * len(df_state))
    flags[30:len(errors)+30] = errors > threshold
    df_state['reconstruction_error'] = [0.0]*30 + errors.tolist()
    df_state['is_anomaly'] = flags
    return df_state


## 🧠 Step 4: Apply Detection by State

In [None]:

df_all = pd.DataFrame()
feature_cols = ['power', 'rpm', 'temperature', 'vibration']
model_paths = {
    'run': "../test_cases/saved_models/lstm_run.h5",
    'idle': "../test_cases/saved_models/lstm_idle.h5",
    'off': "../test_cases/saved_models/lstm_off.h5"
}

for state in df['state'].unique():
    df_state = df[df['state'] == state].copy()
    model_path = model_paths.get(state)
    if model_path:
        df_state = detect_state_anomalies(df_state, model_path, feature_cols)
        df_all = pd.concat([df_all, df_state])
df = df_all.sort_values('timestamp').reset_index(drop=True)


## 🤖 Step 5: Classify Detected Anomalies with Random Forest

In [None]:

# One-hot encode state to match training
df_encoded = pd.get_dummies(df, columns=["state"])
for col in ["state_off", "state_run", "state_idle"]:
    if col not in df_encoded.columns:
        df_encoded[col] = 0

# Load RF model
rf_model = joblib.load("../test_cases/saved_models/random_forest.pkl")

# Filter anomaly rows
anomaly_df = df_encoded[df_encoded["is_anomaly"] == True].copy()
features = ['power', 'rpm', 'temperature', 'vibration', 'state_off', 'state_run', 'state_idle']

# Evaluate
if 'label' in anomaly_df.columns:
    X_anomaly = anomaly_df[features]
    y_true = anomaly_df['label']
    y_pred = rf_model.predict(X_anomaly)
    
    print("🔍 Classification Report (for detected anomalies):")
    print(classification_report(y_true, y_pred))
else:
    print("⚠️ 'label' column not found. Cannot compute classification report.")
