<a href="https://colab.research.google.com/github/ckirby04/UArk-Projects/blob/Classification/MVTech_30class.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1) Mount google drive in order to connect dataset to colab instance

In [None]:
import pandas as pd


from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive







## 2) Load, Label, and Split data

In [None]:
import os
import cv2
import numpy as np
import glob
from sklearn.model_selection import train_test_split

def load_mvtec_flat_split(base_path, target_size=(128, 128)):
    X, y = [], []
    label_names = []

    for obj_class in sorted(os.listdir(base_path)):
        class_path = os.path.join(base_path, obj_class)
        if not os.path.isdir(class_path): continue

        # === Load "good" images (flat)
        good_path = os.path.join(class_path, 'good')
        label_good = f"{obj_class}_good"
        if label_good not in label_names:
            label_names.append(label_good)
        label_index_good = label_names.index(label_good)

        for img_path in glob.glob(os.path.join(good_path, '*')):
            img = cv2.imread(img_path)
            if img is None: continue
            img = cv2.resize(img, target_size)
            img = img.astype(np.float32) / 255.0
            X.append(img)
            y.append(label_index_good)

        # === Load "anomaly" images (recursive)
        anomaly_root = os.path.join(class_path, 'anomaly')
        label_anomaly = f"{obj_class}_anomaly"
        if label_anomaly not in label_names:
            label_names.append(label_anomaly)
        label_index_anomaly = label_names.index(label_anomaly)

        # Use glob to find all images in any subdirectory of "anomaly/"
        for img_path in glob.glob(os.path.join(anomaly_root, '**', '*'), recursive=True):
            if not os.path.isfile(img_path): continue
            img = cv2.imread(img_path)
            if img is None: continue
            img = cv2.resize(img, target_size)
            img = img.astype(np.float32) / 255.0
            X.append(img)
            y.append(label_index_anomaly)

    X = np.array(X)
    y = np.array(y)
    label_map = {name: idx for idx, name in enumerate(label_names)}

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.5, stratify=y, random_state=42
    )

    return X_train, y_train, X_test, y_test, label_map


## 3) Ensure images are properly stored with labels


In [None]:
base_path = '/content/drive/MyDrive/30_class_mvtec_anomaly_dataset'
X_train, y_train, X_test, y_test, label_map = load_mvtec_flat_split(base_path)

# Check dataset
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

print("Train class counts:", np.bincount(y_train))
print("Test class counts:", np.bincount(y_test))

print("Number of classes:", len(label_map))
print("Label map:", label_map)

X_train shape: (2447, 128, 128, 3)
y_train shape: (2447,)
X_test shape: (2447, 128, 128, 3)
y_test shape: (2447,)
Train class counts: [105  32 112  46 109  54 140  45 132  28 196  35 122  46 110  46 134  74
 160  60 115  42  30  15 106  20 124  30 120  59]
Test class counts: [104  31 112  46 110  55 140  44 132  29 195  35 123  46 110  47 133  74
 160  59 115  42  30  15 107  20 123  30 120  60]
Number of classes: 30
Label map: {'bottle_good': 0, 'bottle_anomaly': 1, 'cable_good': 2, 'cable_anomaly': 3, 'capsule_good': 4, 'capsule_anomaly': 5, 'carpet_good': 6, 'carpet_anomaly': 7, 'grid_good': 8, 'grid_anomaly': 9, 'hazelnut_good': 10, 'hazelnut_anomaly': 11, 'leather_good': 12, 'leather_anomaly': 13, 'metal_nut_good': 14, 'metal_nut_anomaly': 15, 'pill_good': 16, 'pill_anomaly': 17, 'screw_good': 18, 'screw_anomaly': 19, 'tile_good': 20, 'tile_anomaly': 21, 'toothbrush_good': 22, 'toothbrush_anomaly': 23, 'transistor_good': 24, 'transistor_anomaly': 25, 'wood_good': 26, 'wood_anomaly

In [None]:
import matplotlib.pyplot as plt

# Invert the label map
inv_label_map = {v: k for k, v in label_map.items()}

# Show 5 random training samples
for i in range(3):
    idx = np.random.randint(0, len(X_train))
    img = X_train[idx]
    label = y_train[idx]
    plt.imshow(img[..., ::-1])  # BGR to RGB
    plt.title(f"Label: {inv_label_map[label]}")
    plt.axis('off')
    plt.show()


## 4) Build RezNet backbone CNN model

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv2D, BatchNormalization, ReLU, Add,
    MaxPooling2D, Flatten, Dense, Dropout
)

def residual_block(x, filters, kernel_size=3):
    shortcut = x

    x = Conv2D(filters, kernel_size, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Conv2D(filters, kernel_size, padding='same')(x)
    x = BatchNormalization()(x)

    if shortcut.shape[-1] != filters:
        shortcut = Conv2D(filters, 1, padding='same')(shortcut)
        shortcut = BatchNormalization()(shortcut)

    x = Add()([shortcut, x])
    x = ReLU()(x)
    return x

def build_resnet_like_model(input_shape=(128, 128, 3), num_classes=30):
    inputs = Input(shape=input_shape)

    x = Conv2D(32, 3, padding='same')(inputs)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = MaxPooling2D(2)(x)

    x = residual_block(x, 32)
    x = MaxPooling2D(2)(x)

    x = residual_block(x, 64)
    x = MaxPooling2D(2)(x)

    x = residual_block(x, 128)
    x = MaxPooling2D(2)(x)

    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, x)
    return model

## 5) Compile

In [None]:
model = build_resnet_like_model(input_shape=(128, 128, 3), num_classes=30)

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, y_train, validation_split=0.2, epochs=500)

import os

save_dir = '/content/drive/MyDrive/saved_models'
os.makedirs(save_dir, exist_ok=True)  # creates the folder if it doesn't exist

# Now save the model
model.save(os.path.join(save_dir, 'mvtec_model.keras'))

In [None]:
print("Train Loss:", history.history['loss'][-1])
print("Val Loss:", history.history['val_loss'][-1])
print("Val Accuracy:", history.history['val_accuracy'][-1])

print("X_train:", X_train.shape)
print("y_train:", np.bincount(y_train))
print("X_test:", X_test.shape)
print("y_test:", np.bincount(y_test))

print("Actual labels:", y_test[:5])

## Re-load trained model from drive

In [None]:
import os

save_dir = '/content/drive/MyDrive/saved_models'
os.makedirs(save_dir, exist_ok=True)  # creates the folder if it doesn't exist

# Now save the model
model.save(os.path.join(save_dir, 'mvtec_model.keras'))

with open('/content/drive/MyDrive/saved_models/train_history.json', 'w') as f:
    json.dump(history.history, f)


In [None]:
from tensorflow.keras.models import load_model
import json

model = load_model('/content/drive/MyDrive/saved_models/mvtec_model.keras')

#load training history
with open('/content/drive/MyDrive/saved_models/train_history.json', 'r') as f:
    history_data = json.load(f)


## Confusion Matrix


In [None]:
y_pred_probs = model.predict(X_test)  # shape: (n_samples, num_classes)
y_pred = np.argmax(y_pred_probs, axis=1)  # shape: (n_samples,)

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[k for k, v in sorted(label_map.items(), key=lambda x: x[1])])
fig, ax = plt.subplots(figsize=(12, 12))
disp.plot(ax=ax, cmap='Blues', xticks_rotation=90)
plt.title("Confusion Matrix")
plt.show()



NameError: name 'X_test' is not defined