# Isolation Forest

This notebook applies the Isolation Forest algorithm for semi-supervised anomaly detection in thermal images from DED processes. It isolates observations by randomly selecting features and split values, making it efficient for high-dimensional datasets.


In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, classification_report, f1_score

# Load dataset
df = pd.read_csv("labeled_thermal_features.csv")

# Use correct top 5 features from the feature importance chart
top_5_features = ["IQR", "Std_Temp", "Q1", "Min_Temp", "Median_Temp"]

# Ensure all features exist
missing = [col for col in top_5_features if col not in df.columns]
if missing:
    raise ValueError(f"Missing expected features: {missing}")

# Prepare data
X = df[top_5_features]
y = df["Porosity Label"]

# Normalize
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Train only on normal samples
X_train = X_scaled[y == 0]

# Train Isolation Forest
iso_forest = IsolationForest(n_estimators=100, contamination=0.045, random_state=42)
iso_forest.fit(X_train)

# Predict
scores = iso_forest.decision_function(X_scaled)
threshold = np.percentile(scores, 4.5)
anomaly_predictions = (scores < threshold).astype(int)

# Evaluate
conf_matrix = confusion_matrix(y, anomaly_predictions)
report = classification_report(y, anomaly_predictions)
f1 = f1_score(y, anomaly_predictions)

print("\n✅ Isolation Forest Evaluation (Top 5 Features):")
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", report)
print(f"F1 Score: {f1:.4f}")



✅ Isolation Forest Evaluation (Top 5 Features):
Confusion Matrix:
 [[1475   18]
 [  18   53]]
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      1493
           1       0.75      0.75      0.75        71

    accuracy                           0.98      1564
   macro avg       0.87      0.87      0.87      1564
weighted avg       0.98      0.98      0.98      1564

F1 Score: 0.7465
