In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report


In [None]:
# Load dataset (adjust path if needed)
data = pd.read_csv("../data/raw/k8_synthetic_dataset.csv")
data.head()


In [None]:
# Separate features and labels
X = data[['cpu_usage', 'memory_usage', 'network_io', 'disk_io']]
y_true = data['label']


In [None]:
# Initialize Isolation Forest model
iso_forest = IsolationForest(contamination=0.05, random_state=42)

# Train model
iso_forest.fit(X)

# Predict anomalies
y_pred = iso_forest.predict(X)


In [None]:
# Convert predictions: -1 → 1 (Anomaly), 1 → 0 (Normal)
y_pred = [1 if pred == -1 else 0 for pred in y_pred]


In [None]:
# Print evaluation report
report = classification_report(y_true, y_pred, target_names=['Normal', 'Anomaly'], digits=4)

print("📊 Isolation Forest Baseline Performance:\n")
print(report)
