In [None]:
import numpy as np
import pandas as pd
import joblib
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import MinMaxScaler

# ==== Training ====
def train_if_model(df, model_path="if_model.pkl", scaler_path="if_scaler.pkl"):
    features = df[["steam_diff", "condensate_diff"]].values

    # Train IsolationForest with tuned params
    model = IsolationForest(
        n_estimators=158,
        max_samples="auto",
        contamination=0.04,
        random_state=42,
        n_jobs=-1
    )
    model.fit(features)

    # Fit scaler for mapping scores to severity
    scores = -model.decision_function(features)
    scaler = MinMaxScaler(feature_range=(0, 5))
    scaler.fit(scores.reshape(-1, 1))

    # Save model + scaler
    joblib.dump(model, model_path)
    joblib.dump(scaler, scaler_path)

    print("[INFO] Model and scaler saved.")
    return model, scaler


# ==== Real-time inference ====
def predict_severity(new_data, model_path="if_model.pkl", scaler_path="if_scaler.pkl"):
    # Load model + scaler
    model = joblib.load(model_path)
    scaler = joblib.load(scaler_path)

    # Ensure input format
    features = np.array(new_data).reshape(1, -1)  # [[steam_diff, condensate_diff]]

    # Predict anomaly score
    score = -model.decision_function(features)[0]

    # Scale into severity 0–5
    severity = int(np.round(scaler.transform([[score]])[0][0]))
    return severity, score


# ==== Example usage ====
# Train model on historical dataset
df = pd.read_csv("/home/marcius/mb_files/19.python/ssd/systems2.csv", parse_dates=["timestamp"])
train_if_model(df, model_path="if_model.pkl", scaler_path="if_scaler.pkl")

# Real-time scoring
example_point = [3.2, -1.5]  # steam_diff, condensate_diff
severity, score = predict_severity(example_point)
print(f"Severity={severity}, IF score={score:.4f}")
