In [1]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix

In [21]:
# Load dataset
df = pd.read_csv("labeled_thermal_features.csv")

# Drop the 'Frame' column since it's not a numerical feature
X = df.drop(columns=["Frame", "Porosity Label"])  # Keep only numerical features
y = df["Porosity Label"]  # Only for evaluation, not used in training

# Initialize Isolation Forest
iso_forest = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
iso_forest.fit(X)

# Predict anomalies (-1 = anomaly, 1 = normal)
predictions = iso_forest.predict(X)
anomaly_predictions = (predictions == -1).astype(int)  # Convert -1 to 1 (anomaly), 1 to 0 (normal)

# Evaluate results
print("\nIsolation Forest Evaluation:")
print("Confusion Matrix:\n", confusion_matrix(y, anomaly_predictions))
print("Classification Report:\n", classification_report(y, anomaly_predictions))



Isolation Forest Evaluation:
Confusion Matrix:
 [[1466   27]
 [  19   52]]
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.98      0.98      1493
           1       0.66      0.73      0.69        71

    accuracy                           0.97      1564
   macro avg       0.82      0.86      0.84      1564
weighted avg       0.97      0.97      0.97      1564

