In [1]:
from google.colab import drive
drive.mount('/content/drive')

# Set path to your dataset folder
DATASET_PATH = '/content/drive/MyDrive/CSS586_Visual_Defect_Detection/datasets'


Mounted at /content/drive


In [2]:

!pip install torchvision matplotlib scikit-learn

# Imports
import os
import torch
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision import models, datasets
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.6.0->torchvision)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch==2.6.0->torchvision)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch==2.6.0->torchvision)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86

In [3]:
import os
import shutil

source_path = '/content/drive/MyDrive/CSS586_Visual_Defect_Detection/datasets/mvtec_anomaly_detection/bottle/test'
target_path = '/content/drive/MyDrive/CSS586_Visual_Defect_Detection/datasets/bottle_binary'

# Create target folders
os.makedirs(os.path.join(target_path, 'good'), exist_ok=True)
os.makedirs(os.path.join(target_path, 'defective'), exist_ok=True)

# Copy 'good' images
good_src = os.path.join(source_path, 'good')
good_dst = os.path.join(target_path, 'good')
for fname in os.listdir(good_src):
    shutil.copy(os.path.join(good_src, fname), os.path.join(good_dst, fname))

# Copy all defect images into 'defective'
for folder in os.listdir(source_path):
    if folder == 'good':
        continue
    defect_src = os.path.join(source_path, folder)
    for fname in os.listdir(defect_src):
        shutil.copy(os.path.join(defect_src, fname), os.path.join(target_path, 'defective', f"{folder}_{fname}"))

print("✅ bottle_binary/ created at:", target_path)


✅ bottle_binary/ created at: /content/drive/MyDrive/CSS586_Visual_Defect_Detection/datasets/bottle_binary


In [4]:
from torchvision import datasets
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms

DATASET_PATH = '/content/drive/MyDrive/CSS586_Visual_Defect_Detection/datasets/bottle_binary'

# Transform: Resize to 224x224, convert to tensor, normalize
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load dataset
full_dataset = datasets.ImageFolder(root=DATASET_PATH, transform=transform)

# Split 80% train, 20% val
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Show class mapping
print("✅ Classes:", full_dataset.classes)
print("✅ Class indices:", full_dataset.class_to_idx)


✅ Classes: ['defective', 'good']
✅ Class indices: {'defective': 0, 'good': 1}


In [5]:
import torch
import torch.nn as nn
from torchvision import models

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Load pre-trained ResNet-50
model = models.resnet50(pretrained=True)

# Freeze all layers
for param in model.parameters():
    param.requires_grad = False

# Replace the final layer (fc) with a new classifier
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)  # 2 classes

# Move to GPU/CPU
model = model.to(device)



Using device: cuda


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 187MB/s]


In [6]:
import torch.optim as optim

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# Training loop
num_epochs = 5  # Start small — you can increase later

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {avg_loss:.4f}")


Epoch [1/5] - Loss: 0.9601
Epoch [2/5] - Loss: 0.4175
Epoch [3/5] - Loss: 0.8589
Epoch [4/5] - Loss: 0.6105
Epoch [5/5] - Loss: 0.5016


In [7]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in val_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.numpy())

# Convert to arrays
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# Results
print("✅ Classification Report:")
print(classification_report(all_labels, all_preds, target_names=full_dataset.classes))

print("✅ Confusion Matrix:")
print(confusion_matrix(all_labels, all_preds))



✅ Classification Report:
              precision    recall  f1-score   support

   defective       0.62      1.00      0.77        10
        good       1.00      0.14      0.25         7

    accuracy                           0.65        17
   macro avg       0.81      0.57      0.51        17
weighted avg       0.78      0.65      0.56        17

✅ Confusion Matrix:
[[10  0]
 [ 6  1]]


In [9]:
import os
import shutil

# Source: All categories inside mvtec_anomaly_detection
mvtec_path = '/content/drive/MyDrive/CSS586_Visual_Defect_Detection/datasets/mvtec_anomaly_detection'
output_path = '/content/drive/MyDrive/CSS586_Visual_Defect_Detection/datasets/mvtec_binary'

# Create output folders
os.makedirs(os.path.join(output_path, 'good'), exist_ok=True)
os.makedirs(os.path.join(output_path, 'defective'), exist_ok=True)

# Go through each category (e.g., bottle, pill)
for category in os.listdir(mvtec_path):
    category_test_path = os.path.join(mvtec_path, category, 'test')
    if not os.path.isdir(category_test_path):
        continue

    for subfolder in os.listdir(category_test_path):
        full_path = os.path.join(category_test_path, subfolder)
        if not os.path.isdir(full_path):
            continue

        for fname in os.listdir(full_path):
            src = os.path.join(full_path, fname)

            if subfolder == 'good':
                dst = os.path.join(output_path, 'good', f'{category}_{fname}')
            else:
                dst = os.path.join(output_path, 'defective', f'{category}_{subfolder}_{fname}')

            shutil.copy(src, dst)

print("✅ Dataset created at:", output_path)


✅ Dataset created at: /content/drive/MyDrive/CSS586_Visual_Defect_Detection/datasets/mvtec_binary


In [10]:
from torchvision import datasets
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms

DATASET_PATH = '/content/drive/MyDrive/CSS586_Visual_Defect_Detection/datasets/mvtec_binary'

# Transform: Resize to 224x224, normalize like ResNet expects
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load the dataset
full_dataset = datasets.ImageFolder(root=DATASET_PATH, transform=transform)

# Split: 80% train, 20% val
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Print classes
print("✅ Classes:", full_dataset.classes)
print("✅ Total Images:", len(full_dataset))
print("✅ Class indices:", full_dataset.class_to_idx)


✅ Classes: ['defective', 'good']
✅ Total Images: 1725
✅ Class indices: {'defective': 0, 'good': 1}


In [11]:
from torchvision import models
import torch.nn as nn
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Load pre-trained ResNet-50
model = models.resnet50(weights='IMAGENET1K_V1')  # New way instead of pretrained=True

# Freeze all layers
for param in model.parameters():
    param.requires_grad = False

# Replace classifier
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)  # 2 classes: defective, good

# Move to GPU/CPU
model = model.to(device)


Using device: cuda


In [12]:
import torch.optim as optim

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# Training loop
num_epochs = 8  # Feel free to increase this
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {total_loss/len(train_loader):.4f}")


Epoch [1/8] - Loss: 0.5920
Epoch [2/8] - Loss: 0.5214
Epoch [3/8] - Loss: 0.5362
Epoch [4/8] - Loss: 0.4833
Epoch [5/8] - Loss: 0.4768
Epoch [6/8] - Loss: 0.4783
Epoch [7/8] - Loss: 0.4798
Epoch [8/8] - Loss: 0.4546


In [13]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in val_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.numpy())

# Convert to arrays
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# Print results
print("Classification Report:")
print(classification_report(all_labels, all_preds, target_names=full_dataset.classes))

print(" Confusion Matrix:")
print(confusion_matrix(all_labels, all_preds))


Classification Report:
              precision    recall  f1-score   support

   defective       0.92      0.81      0.86       267
        good       0.53      0.74      0.62        78

    accuracy                           0.79       345
   macro avg       0.72      0.78      0.74       345
weighted avg       0.83      0.79      0.80       345

 Confusion Matrix:
[[216  51]
 [ 20  58]]
