In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as scio
import joblib
from typing import List

import os 
os.chdir('..')
# print(f'Current Directory: {os.getcwd()}')

import logging
logging.basicConfig(
    level=logging.INFO
)
logger = logging.getLogger(__name__)

Current Directory: C:\Users\user\Desktop\Masters\ISYE-6740 Computational Data Analytics\project\Anomaly-Detection-in-Industrial-Inspection


In [4]:
from utility_functions.utility_functions import (
    load_data,
    reshape_images,
    plot_class_distribution_grid,
    _report_metrics,
    _report_metrics_per_class_instance,
    _report_metrics_subset,
    report_all_metrics,
    _filter_to_subset,
    plot_confusion_matrix,
    plot_confusion_matrix_simple,
    load_model_and_predict,
    train_predict_pipe
)
from data.target_mappings import TargetMappings

DATA_PATH = './data/project_data.mat'
N_INIT = 7
RAND_SEED = 123
DEFECTIVE = [i for i in range(0, 29, 2)]
NON_DEFECTIVE = [i for i in range(1, 29, 2)]

In [67]:
def _filter_to_dataset_subset(y_set: np.array, x_set: np.array, target_classes: List):
    combined_array = np.column_stack((y_train, x_train_reshaped))
    mask = np.isin(combined_array[:, 0], target_classes)
    filtered_combined = combined_array[mask]
    filtered_y_set = filtered_combined[:, 0]
    filtered_x_set = filtered_combined[:, 1:]
    return filtered_y_set, filtered_x_set

In [7]:
data, x_train, x_test, y_train, y_test = load_data(DATA_PATH)
target_mappings = TargetMappings.MAPPING_DICT.value

INFO:utility_functions.utility_functions:Sample Size: 5354 | Train size: 4283 (0.800) | Test size: 1071 (0.200)
INFO:utility_functions.utility_functions:Image Dimension: (5354, 128, 128, 3)


In [9]:
x_train_reshaped = reshape_images(x_train)
x_test_reshaped = reshape_images(x_test)

Original dimensions: (4283, 128, 128, 3)
Reshaped dimensions: (4283, 49152)
Original dimensions: (1071, 128, 128, 3)
Reshaped dimensions: (1071, 49152)


# One Class SVM

In [121]:
DEFECT_CLASS = 6
NON_DEFECT_CLASS = 7

In [123]:
y_train_one_class, x_train_reshaped_one_class = _filter_to_dataset_subset(y_train, x_train_reshaped, [NON_DEFECT_CLASS])
y_test_one_class, x_test_reshaped_one_class = _filter_to_dataset_subset(y_test, x_test_reshaped, [DEFECT_CLASS, NON_DEFECT_CLASS])

y_train_one_class = np.where(y_train_one_class == NON_DEFECT_CLASS, 0, 1) # 0 is non-defective, 1 is anomaly
y_test_one_class = np.where(y_test_one_class == NON_DEFECT_CLASS, 0, 1)

In [84]:
from sklearn.svm import (
    SVC,
    LinearSVC,
    OneClassSVM
)
models = {
    "SVM": SVC(kernel='linear'),
    "Kernel SVM": SVC(kernel='rbf'),
    "One Class SVM": OneClassSVM(kernel='linear', gamma=0.1)
}

In [125]:
one_cls_svm = models['One Class SVM']
one_cls_svm.fit(x_train_reshaped_one_class)

### Train Split Result

In [127]:
y_preds_train = one_cls_svm.predict(x_train_reshaped_one_class)
y_preds_train = np.where(y_preds_train == -1, 1, 0)
_report_metrics(y_train_one_class, y_train_one_class, y_preds_train, 'One Class SVM')

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,Model,Train Size,Test Size,Accuracy,F1 Score,Precision,Recall
0,One Class SVM,246,246,0.504065,0.335135,0.5,0.252033


### Test Split Result

In [133]:
model, preds = train_predict_pipe(X_train=x_train_reshaped_one_class, 
                    y_train=y_train_one_class, 
                    X_test=x_test_reshaped_one_class, 
                    model=one_cls_svm)
preds = np.where(preds == -1, 1, 0)
_report_metrics(y_test_one_class, y_train_one_class, preds, 'One Class SVM')

INFO:utility_functions.utility_functions: Fitting OneClassSVM with 246 samples
INFO:utility_functions.utility_functions: Predicting OneClassSVM with 317 samples


Unnamed: 0,Model,Train Size,Test Size,Accuracy,F1 Score,Precision,Recall
0,One Class SVM,246,317,0.495268,0.449939,0.489168,0.484427


In [138]:
results = []
for defect_class, non_defect_class in zip(DEFECTIVE, NON_DEFECTIVE):
    y_train_one_class, x_train_reshaped_one_class = _filter_to_dataset_subset(y_train, x_train_reshaped, [non_defect_class])
    y_test_one_class, x_test_reshaped_one_class = _filter_to_dataset_subset(y_test, x_test_reshaped, [defect_class, non_defect_class])

    y_train_one_class = np.where(y_train_one_class == non_defect_class, 0, 1) # 0 is non-defective, 1 is anomaly
    y_test_one_class = np.where(y_test_one_class == non_defect_class, 0, 1)

    one_cls_svm = models['One Class SVM']
    one_cls_svm.fit(x_train_reshaped_one_class)

    model, preds = train_predict_pipe(X_train=x_train_reshaped_one_class, 
                        y_train=y_train_one_class, 
                        X_test=x_test_reshaped_one_class, 
                        model=one_cls_svm)
    preds = np.where(preds == -1, 1, 0)
    result = _report_metrics(y_test_one_class, y_train_one_class, preds, 'One Class SVM')
    result['Class'] = defect_class
    results.append(result)

INFO:utility_functions.utility_functions: Fitting OneClassSVM with 183 samples
INFO:utility_functions.utility_functions: Predicting OneClassSVM with 233 samples
INFO:utility_functions.utility_functions: Fitting OneClassSVM with 225 samples
INFO:utility_functions.utility_functions: Predicting OneClassSVM with 299 samples
INFO:utility_functions.utility_functions: Fitting OneClassSVM with 194 samples
INFO:utility_functions.utility_functions: Predicting OneClassSVM with 281 samples
INFO:utility_functions.utility_functions: Fitting OneClassSVM with 246 samples
INFO:utility_functions.utility_functions: Predicting OneClassSVM with 317 samples
INFO:utility_functions.utility_functions: Fitting OneClassSVM with 228 samples
INFO:utility_functions.utility_functions: Predicting OneClassSVM with 274 samples
INFO:utility_functions.utility_functions: Fitting OneClassSVM with 345 samples
INFO:utility_functions.utility_functions: Predicting OneClassSVM with 401 samples
INFO:utility_functions.utility_fun

In [139]:
combined_df = pd.concat(results, axis=0, ignore_index=True)
combined_df

Unnamed: 0,Model,Train Size,Test Size,Accuracy,F1 Score,Precision,Recall,Class
0,One Class SVM,183,233,0.476395,0.419404,0.460347,0.441366,0
1,One Class SVM,225,299,0.461538,0.410473,0.440006,0.42003,2
2,One Class SVM,194,281,0.530249,0.517885,0.541921,0.548851,4
3,One Class SVM,246,317,0.495268,0.449939,0.489168,0.484427,6
4,One Class SVM,228,274,0.481752,0.409232,0.46959,0.445652,8
5,One Class SVM,345,401,0.486284,0.401406,0.474575,0.447153,10
6,One Class SVM,222,296,0.425676,0.358163,0.385994,0.351351,12
7,One Class SVM,194,268,0.5,0.475431,0.503342,0.504179,14
8,One Class SVM,234,347,0.440922,0.408703,0.419193,0.409292,16
9,One Class SVM,289,384,0.395833,0.309181,0.337832,0.29124,18


In [161]:
combined_df[['Test Size', 'Accuracy', 'F1 Score', 'Precision',
       'Recall']].mean()

Test Size    283.571429
Accuracy       0.487926
F1 Score       0.437791
Precision      0.480184
Recall         0.469863
dtype: float64

In [149]:
### Result of experiment 1

	Model	Train Size	Test Size	Accuracy	F1 Score	Precision	Recall
    Linear SVC	4283	252	        0.440476	0.272804	0.466667	0.205944

There is a slight increase in Accuracy and precision over the joint models, however there is a large increase on the Recall and F1 Score on individual models.

# PCA + One Class SVM

In [169]:
import joblib
pca = joblib.load('supervised_learning_stage/ex_1_pca_and_svm/pca_n13_model.pkl')
x_train_transformed = pca.transform(x_train_reshaped)
x_test_transformed = pca.transform(x_test_reshaped)

In [171]:
results = []
for defect_class, non_defect_class in zip(DEFECTIVE, NON_DEFECTIVE):
    y_train_one_class, x_train_reshaped_one_class = _filter_to_dataset_subset(y_train, x_train_transformed, [non_defect_class])
    y_test_one_class, x_test_reshaped_one_class = _filter_to_dataset_subset(y_test, x_test_transformed, [defect_class, non_defect_class])

    y_train_one_class = np.where(y_train_one_class == non_defect_class, 0, 1) # 0 is non-defective, 1 is anomaly
    y_test_one_class = np.where(y_test_one_class == non_defect_class, 0, 1)

    one_cls_svm = models['One Class SVM']
    one_cls_svm.fit(x_train_reshaped_one_class)

    model, preds = train_predict_pipe(X_train=x_train_reshaped_one_class, 
                        y_train=y_train_one_class, 
                        X_test=x_test_reshaped_one_class, 
                        model=one_cls_svm)
    preds = np.where(preds == -1, 1, 0)
    result = _report_metrics(y_test_one_class, y_train_one_class, preds, 'One Class SVM')
    result['Class'] = defect_class
    results.append(result)

INFO:utility_functions.utility_functions: Fitting OneClassSVM with 183 samples
INFO:utility_functions.utility_functions: Predicting OneClassSVM with 233 samples
INFO:utility_functions.utility_functions: Fitting OneClassSVM with 225 samples
INFO:utility_functions.utility_functions: Predicting OneClassSVM with 299 samples
INFO:utility_functions.utility_functions: Fitting OneClassSVM with 194 samples
INFO:utility_functions.utility_functions: Predicting OneClassSVM with 281 samples
INFO:utility_functions.utility_functions: Fitting OneClassSVM with 246 samples
INFO:utility_functions.utility_functions: Predicting OneClassSVM with 317 samples
INFO:utility_functions.utility_functions: Fitting OneClassSVM with 228 samples
INFO:utility_functions.utility_functions: Predicting OneClassSVM with 274 samples
INFO:utility_functions.utility_functions: Fitting OneClassSVM with 345 samples
INFO:utility_functions.utility_functions: Predicting OneClassSVM with 401 samples
INFO:utility_functions.utility_fun

In [181]:
combined_df = pd.concat(results, axis=0, ignore_index=True)
combined_df

Unnamed: 0,Model,Train Size,Test Size,Accuracy,F1 Score,Precision,Recall,Class
0,One Class SVM,183,233,0.476395,0.419404,0.460347,0.441366,0
1,One Class SVM,225,299,0.461538,0.410473,0.440006,0.42003,2
2,One Class SVM,194,281,0.530249,0.517885,0.541921,0.548851,4
3,One Class SVM,246,317,0.495268,0.449939,0.489168,0.484427,6
4,One Class SVM,228,274,0.481752,0.409232,0.46959,0.445652,8
5,One Class SVM,345,401,0.486284,0.401406,0.474575,0.447153,10
6,One Class SVM,222,296,0.425676,0.358163,0.385994,0.351351,12
7,One Class SVM,194,268,0.5,0.475431,0.503342,0.504179,14
8,One Class SVM,234,347,0.440922,0.408703,0.419193,0.409292,16
9,One Class SVM,289,384,0.395833,0.309181,0.337832,0.29124,18


In [183]:
combined_df[['Test Size', 'Accuracy', 'F1 Score', 'Precision',
       'Recall']].mean()

Test Size    283.571429
Accuracy       0.487926
F1 Score       0.437791
Precision      0.480184
Recall         0.469863
dtype: float64