In [None]:
import numpy as np
import pandas as pd
import joblib
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import MinMaxScaler

# ==== Training ====
def train_stall_if_model(df, model_path="stall_if_model.pkl", scaler_path="stall_if_scaler.pkl", stall_threshold=8):
    # Compute stall difference
    df["stall_diff"] = df["steam_diff"] - df["condensate_diff"]
    features = df[["stall_diff"]].values

    # Train IsolationForest with optimized parameters
    model = IsolationForest(
        n_estimators=242,
        max_samples="auto",
        contamination=0.05,
        random_state=42,
        n_jobs=-1
    )
    model.fit(features)

    # Fit scaler for mapping scores → severity
    scores = -model.decision_function(features)
    scaler = MinMaxScaler(feature_range=(0, 5))
    scaler.fit(scores.reshape(-1, 1))

    # Save model + scaler
    joblib.dump(model, model_path)
    joblib.dump(scaler, scaler_path)

    print("[INFO] Stall IF model and scaler saved.")
    return model, scaler


# ==== Real-time inference ====
def predict_stall_severity(new_data, model_path="stall_if_model.pkl", scaler_path="stall_if_scaler.pkl", stall_threshold=8):
    # Load model + scaler
    model = joblib.load(model_path)
    scaler = joblib.load(scaler_path)

    # Ensure input format (single value: stall_diff)
    stall_diff = float(new_data)  # e.g., steam_diff - condensate_diff
    features = np.array([[stall_diff]])

    # Predict anomaly score
    score = -model.decision_function(features)[0]

    # Apply scaler to map into severity 0–5
    severity = int(np.round(scaler.transform([[score]])[0][0]))

    # Apply stall threshold rule
    if abs(stall_diff) < stall_threshold:
        severity = 0

    return severity, score


# ==== Example usage ====
# Train on historical dataset
df = pd.read_csv("/home/marcius/mb_files/19.python/ssd/systems2.csv", parse_dates=["timestamp"])
train_stall_if_model(df, model_path="stall_if_model.pkl", scaler_path="stall_if_scaler.pkl")

# Real-time scoring
example_stall_diff = 12.3  # Example (steam_diff - condensate_diff)
severity, score = predict_stall_severity(example_stall_diff)
print(f"Stall Severity={severity}, IF score={score:.4f}")
