In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# === Load datasets ===
labeled_df = pd.read_csv('labeled_thermal_features.csv')
unlabeled_df = pd.read_csv('thermal_frame_features.csv')

# === Features to use (drop labels, frame info) ===
feature_cols = [col for col in labeled_df.columns if col not in ['Frame', 'Porosity Label']]
X_labeled = labeled_df[feature_cols]
y_labeled = labeled_df['Porosity Label']

X_unlabeled = unlabeled_df[feature_cols]

# === Normalize the data ===
scaler = StandardScaler()
X_labeled_scaled = scaler.fit_transform(X_labeled)
X_unlabeled_scaled = scaler.transform(X_unlabeled)

# === Train One-Class SVM on NORMAL data (label 0 only) ===
X_train = X_labeled_scaled[y_labeled == 0]
oc_svm = OneClassSVM(kernel='rbf', gamma='auto', nu=0.05)  # nu is roughly the % of outliers
oc_svm.fit(X_train)

# === Predict anomalies on ALL data ===
# Returns: 1 for normal, -1 for anomaly → we convert to 0 (normal), 1 (anomaly)
pred_labeled = oc_svm.predict(X_labeled_scaled)
pred_unlabeled = oc_svm.predict(X_unlabeled_scaled)

pred_labeled = np.where(pred_labeled == -1, 1, 0)
pred_unlabeled = np.where(pred_unlabeled == -1, 1, 0)

# === Evaluate predictions on labeled set ===
print("Evaluation on labeled data:")
print(confusion_matrix(y_labeled, pred_labeled))
print(classification_report(y_labeled, pred_labeled))
print("Accuracy:", accuracy_score(y_labeled, pred_labeled))

# === Save predictions for unlabeled data ===
unlabeled_df['SVM_Anomaly'] = pred_unlabeled
unlabeled_df.to_csv('svm_unlabeled_predictions.csv', index=False)
print("SVM predictions saved to svm_unlabeled_predictions.csv")


Evaluation on labeled data:
[[1416   77]
 [   2   69]]
              precision    recall  f1-score   support

           0       1.00      0.95      0.97      1493
           1       0.47      0.97      0.64        71

    accuracy                           0.95      1564
   macro avg       0.74      0.96      0.80      1564
weighted avg       0.97      0.95      0.96      1564

Accuracy: 0.9494884910485933
SVM predictions saved to svm_unlabeled_predictions.csv
