In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_blobs
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report

# Configuration
n_samples = 300
outliers_fraction = 0.15
n_outliers = int(outliers_fraction * n_samples)
n_inliers = n_samples

# Generate blob dataset (2D for visualization and testing)
X_inliers, _ = make_blobs(n_samples=n_inliers, centers=[[0, 0]], cluster_std=0.5, random_state=42)
X_outliers = np.random.uniform(low=-6, high=6, size=(n_outliers, 2))
X = np.vstack((X_inliers, X_outliers))
y_true = np.hstack((np.zeros(n_inliers), np.ones(n_outliers)))  # 0 = normal, 1 = anomaly

# Fit Isolation Forest
iso_forest = IsolationForest(contamination=outliers_fraction, random_state=42)
iso_forest.fit(X)
y_pred = iso_forest.predict(X)

# Convert predictions: -1 = anomaly → 1, 1 = normal → 0
y_pred = [1 if pred == -1 else 0 for pred in y_pred]

# Evaluation
report = classification_report(y_true, y_pred, target_names=['Normal', 'Anomaly'], digits=4)
print("📊 Isolation Forest (with make_blobs) Performance:\n")
print(report)


📊 Isolation Forest (with make_blobs) Performance:

              precision    recall  f1-score   support

      Normal     0.9863    0.9633    0.9747       300
     Anomaly     0.7885    0.9111    0.8454        45

    accuracy                         0.9565       345
   macro avg     0.8874    0.9372    0.9100       345
weighted avg     0.9605    0.9565    0.9578       345

