In [None]:
import numpy as np
import pandas as pd
import joblib
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import MinMaxScaler


# ==== Training ====
def train_if_model(df,
                   model_path="/home/marcius/mb_files/19.python/sss/stress_model/if_model.pkl",
                   scaler_path="/home/marcius/mb_files/19.python/sss/stress_model/if_scaler.pkl"):
    features = df[["steam_diff", "condensate_diff"]].values

    # Train IsolationForest
    model = IsolationForest(
        n_estimators=158,
        max_samples="auto",
        contamination=0.04,
        random_state=42,
        n_jobs=-1
    )
    model.fit(features)

    # Fit scaler to map scores to a 0–5 severity scale
    scores = -model.decision_function(features)
    scaler = MinMaxScaler(feature_range=(0, 5))
    scaler.fit(scores.reshape(-1, 1))

    # Save model and scaler
    joblib.dump(model, model_path)
    joblib.dump(scaler, scaler_path)

    print("[INFO] Model and scaler saved.")
    return model, scaler


# ==== Real-time inference (single point) ====
def predict_severity(new_data, threshold=3,
                     model_path="/home/marcius/mb_files/19.python/sss/stress_model/if_model.pkl",
                     scaler_path="/home/marcius/mb_files/19.python/sss/stress_model/if_scaler.pkl"):
    """
    new_data: [steam_diff, condensate_diff]
    threshold: numeric value applied directly to raw input data
    """
    # Load model + scaler
    model = joblib.load(model_path)
    scaler = joblib.load(scaler_path)

    # Ensure input format
    features = np.array(new_data).reshape(1, -1)
    steam_diff, condensate_diff = features[0]

    # Apply raw data threshold logic
    if abs(steam_diff) < threshold and abs(condensate_diff) < threshold:
        severity = 0
        score = 0.0
        anomaly_flag = False
    else:
        # Predict anomaly score
        score = -model.decision_function(features)[0]

        # Scale into integer severity (1–5)
        severity = int(np.clip(np.round(scaler.transform([[score]])[0][0]), 1, 5))
        anomaly_flag = True

    return severity, score, anomaly_flag


# ==== Batch inference (label full dataset) ====
def label_dataset_with_severity(df, threshold=3,
                                model_path="/home/marcius/mb_files/19.python/sss/stress_model/if_model.pkl",
                                scaler_path="/home/marcius/mb_files/19.python/sss/stress_model/if_scaler.pkl",
                                save_path=None):
    """
    Labels full dataset with severity and anomaly flag based on threshold on raw data.
    """
    # Load model + scaler
    model = joblib.load(model_path)
    scaler = joblib.load(scaler_path)

    features = df[["steam_diff", "condensate_diff"]].values
    scores = -model.decision_function(features)

    # Scale scores into integer severities (1–5)
    severities = np.clip(np.round(scaler.transform(scores.reshape(-1, 1))).astype(int), 1, 5).flatten()

    # Apply raw threshold rule
    raw_mask = (np.abs(df["steam_diff"]) < threshold) & (np.abs(df["condensate_diff"]) < threshold)
    severities[raw_mask] = 0  # override with 0 if below threshold

    # Create anomaly flag
    anomalies = severities > 0

    # Add to DataFrame
    df_out = df.copy()
    df_out["if_score"] = scores
    df_out["severity"] = severities
    df_out["anomaly_flag"] = anomalies

    if save_path:
        df_out.to_csv(save_path, index=False)
        print(f"[INFO] Labeled dataset saved to {save_path}")

    print(f"[INFO] Labeled dataset created: {anomalies.sum()} anomalies out of {len(df_out)} rows.")
    return df_out


# ==== Example usage ====
if __name__ == "__main__":
    df = pd.read_csv("/home/marcius/mb_files/19.python/sss/systems2.csv", parse_dates=["timestamp"])

    # Train model
    train_if_model(df)

   


[INFO] Model and scaler saved.
[INFO] Labeled dataset saved to /home/marcius/mb_files/19.python/sss/stress_3.csv
[INFO] Labeled dataset created: 773568 anomalies out of 6383697 rows.
     system           timestamp  steam  condensate  steam_diff  \
0  system_1 2023-11-29 22:01:00  120.2        89.5         3.1   
1  system_1 2023-11-29 22:02:00  122.8        89.8         2.6   
2  system_1 2023-11-29 22:03:00  124.4        88.9         1.6   
3  system_1 2023-11-29 22:04:00  124.5        87.5         0.1   
4  system_1 2023-11-29 22:05:00  124.4        85.8        -0.1   

   condensate_diff  stall  thermal_stress  y  if_score  severity  anomaly_flag  
0              1.5      1               0  1 -0.177209         1          True  
1              0.3      1               0  1 -0.220124         0         False  
2             -0.9      1               0  1 -0.230159         0         False  
3             -1.4      1               0  1 -0.249099         0         False  
4             -