In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#  !wget https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938113-1629952094/mvtec_anomaly_detection.tar.xz

--2024-06-19 14:39:01--  https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938113-1629952094/mvtec_anomaly_detection.tar.xz
Resolving www.mydrive.ch (www.mydrive.ch)... 91.214.169.64
Connecting to www.mydrive.ch (www.mydrive.ch)|91.214.169.64|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5264982680 (4.9G) [application/x-xz]
Saving to: ‘mvtec_anomaly_detection.tar.xz’


2024-06-19 14:40:12 (71.4 MB/s) - ‘mvtec_anomaly_detection.tar.xz’ saved [5264982680/5264982680]



In [None]:
# cd Mvtec/

/content/drive/MyDrive/Machine_Learning/Mvtec


In [None]:
# !tar -xf mvtec_anomaly_detection.tar.xz

In [None]:
# cd drive/MyDrive/Machine_Learning/

/content/drive/MyDrive/Machine_Learning


In [None]:
# cd ..

/content/drive/MyDrive/Machine_Learning


In [4]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [None]:
def load_mvtec_dataset(data_dir, img_size=(128, 128)):
    classes = [cls for cls in os.listdir(data_dir) if not cls.startswith('.')]
    data = []
    labels = []

    for cls in classes:
        # Load training data (only good images)
        train_good_path = os.path.join(data_dir, cls, 'train', 'good')
        if os.path.exists(train_good_path):
            for img_name in os.listdir(train_good_path):
                if img_name.startswith('.'):
                    continue
                img_path = os.path.join(train_good_path, img_name)
                img = cv2.imread(img_path)
                img = cv2.resize(img, img_size)
                data.append(img)
                labels.append(0)  # Label '0' for good images

        # Load testing data (good and anomaly images)
        test_good_path = os.path.join(data_dir, cls, 'test', 'good')
        if os.path.exists(test_good_path):
            for img_name in os.listdir(test_good_path):
                if img_name.startswith('.'):
                    continue
                img_path = os.path.join(test_good_path, img_name)
                img = cv2.imread(img_path)
                img = cv2.resize(img, img_size)
                data.append(img)
                labels.append(0)  # Label '0' for good images

        # Load anomaly data
        test_path = os.path.join(data_dir, cls, 'test')
        for anomaly_type in os.listdir(test_path):
            if anomaly_type == 'good' or anomaly_type.startswith('.'):
                continue
            anomaly_dir = os.path.join(test_path, anomaly_type)
            for img_name in os.listdir(anomaly_dir):
                if img_name.startswith('.'):
                    continue
                img_path = os.path.join(anomaly_dir, img_name)
                img = cv2.imread(img_path)
                img = cv2.resize(img, img_size)
                data.append(img)
                labels.append(1)  # Label '1' for anomaly images

    data = np.array(data)
    labels = np.array(labels)
    return data, labels

data_dir = '/content/drive/MyDrive/Machine_Learning/Mvtec'
data, labels = load_mvtec_dataset(data_dir)
data = data / 255.0  # Normalize the data

X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42, stratify=labels)


# CNN Model

In [None]:
def cnn_model(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
input_shape = X_train.shape[1:]
model = cnn_model(input_shape)

# Calculate class weights to handle class imbalance
class_weights = {0: 1.0, 1: float(len(y_train)) / sum(y_train)}

history = model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test), class_weight=class_weights)


In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

In [None]:
# Predict and evaluate
y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

# SVM

In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier


In [None]:
# Reshape the data for SVM (flatten each image)
X_train_svm = X_train.reshape(X_train.shape[0], -1)
X_test_svm = X_test.reshape(X_test.shape[0], -1)

# Initialize SVM classifier
svm_model = SVC(kernel='linear', random_state=42)

# Train the SVM model
svm_model.fit(X_train_svm, y_train)

# Predict on the test set
y_pred_svm = svm_model.predict(X_test_svm)

# Evaluate the model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f'SVM Test Accuracy: {accuracy_svm * 100:.2f}%')

# Classification report (precision, recall, F1-score)
print(classification_report(y_test, y_pred_svm))


# KNN

In [None]:
# Reshape the data for KNN (flatten each image)
X_train_knn = X_train.reshape(X_train.shape[0], -1)
X_test_knn = X_test.reshape(X_test.shape[0], -1)

# Initialize KNN classifier
knn_model = KNeighborsClassifier(n_neighbors=5)

# Train the KNN model
knn_model.fit(X_train_knn, y_train)

# Predict on the test set
y_pred_knn = knn_model.predict(X_test_knn)

# Evaluate the model
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print(f'KNN Test Accuracy: {accuracy_knn * 100:.2f}%')

# Classification report (precision, recall, F1-score)
print(classification_report(y_test, y_pred_knn))

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Data for each model
data = {
    'Model': ['CNN', 'SVM', 'KNN'],
    'Accuracy': [0.8310, 0.7647, 0.7694],
    'Precision_0': [0.86, 0.76, 0.78],
    'Precision_1': [0.71, 0.00, 0.55],
    'Recall_0': [0.94, 1.00, 0.97],
    'Recall_1': [0.48, 0.00, 0.11],
    'F1_0': [0.89, 0.87, 0.87],
    'F1_1': [0.57, 0.00, 0.18],
    'Macro_F1': [0.73, 0.43, 0.52],
    'Weighted_F1': [0.82, 0.66, 0.70]
}

# Convert the data into a DataFrame
df = pd.DataFrame(data)

# Plotting
plt.figure(figsize=(12, 8))

# Accuracy
plt.subplot(2, 2, 1)
sns.barplot(x='Model', y='Accuracy', data=df, palette='viridis')
plt.title('Model Accuracy')
plt.ylim(0, 1)

# Precision for Class 0 and Class 1
plt.subplot(2, 2, 2)
precision_df = df.melt(id_vars='Model', value_vars=['Precision_0', 'Precision_1'], var_name='Class', value_name='Precision')
sns.barplot(x='Model', y='Precision', hue='Class', data=precision_df, palette='viridis')
plt.title('Model Precision')
plt.ylim(0, 1)

# Recall for Class 0 and Class 1
plt.subplot(2, 2, 3)
recall_df = df.melt(id_vars='Model', value_vars=['Recall_0', 'Recall_1'], var_name='Class', value_name='Recall')
sns.barplot(x='Model', y='Recall', hue='Class', data=recall_df, palette='viridis')
plt.title('Model Recall')
plt.ylim(0, 1)

# F1-score for Class 0 and Class 1
plt.subplot(2, 2, 4)
f1_df = df.melt(id_vars='Model', value_vars=['F1_0', 'F1_1', 'Macro_F1', 'Weighted_F1'], var_name='Metric', value_name='F1-score')
sns.barplot(x='Model', y='F1-score', hue='Metric', data=f1_df, palette='viridis')
plt.title('Model F1-score')
plt.ylim(0, 1)

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Define the true labels and predicted labels for each model
true_labels = np.array([0, 1])
# Confusion matrices
confusion_matrices = {
    'CNN': np.array([[768, 51], [130, 122]]),
    'SVM': np.array([[819, 0], [252, 0]]),
    'KNN': np.array([[794, 25], [224, 28]])
}

# Plotting confusion matrices
plt.figure(figsize=(15, 5))

for i, (model, cm) in enumerate(confusion_matrices.items()):
    plt.subplot(1, 3, i + 1)
    sns.heatmap(cm, annot=True, cmap='Blues', fmt='g', xticklabels=true_labels, yticklabels=true_labels)
    plt.title(f'Confusion Matrix - {model}')
    plt.xlabel('Predicted')
    plt.ylabel('True')

plt.tight_layout()
plt.show()
