In [43]:
import cv2
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from PIL import Image

In [3]:
base_dir = "Images"

In [44]:
class TestDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        # Root directory contains all subdirectories (e.g., 'good', 'crack', etc.)
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        # Iterate through subdirectories
        for subdir in os.listdir(root_dir):
            subdir_path = os.path.join(root_dir, subdir)
            if os.path.isdir(subdir_path):
                # If the subdirectory is 'good', label as 0 (non-anomalous)
                label = 0 if subdir == 'good' else 1
                for image_name in os.listdir(subdir_path):
                    image_path = os.path.join(subdir_path, image_name)
                    self.image_paths.append(image_path)
                    self.labels.append(label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')  # Open image and convert to RGB
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label


In [47]:
# Define the autoencoder architecture
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),  # 1 input channel (grayscale image)
            nn.ReLU(),
            nn.BatchNorm2d(16),  # Batch normalization after convolution
            nn.Dropout(0.25),    # Dropout layer to prevent overfitting
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),  # Batch normalization after convolution
            nn.Dropout(0.25),    # Dropout layer
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),  # New deeper layer
            nn.ReLU(),
            nn.BatchNorm2d(64),  # Batch normalization after convolution
            nn.Dropout(0.25),    # Dropout layer
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),  # Batch normalization after deconvolution
            nn.Dropout(0.25),    # Dropout layer
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(16),  # Batch normalization after deconvolution
            nn.Dropout(0.25),    # Dropout layer
            nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid(),  # To normalize output to [0, 1]
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [37]:
# Prepare the data
transform = transforms.Compose([
    transforms.Grayscale(),  # Convert to grayscale
    transforms.Resize((512, 512)),  # Resize to a fixed size
    transforms.ToTensor()
])

In [21]:
# Initialize loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [48]:
def train_autoencoder(autoencoder, dataloader, item, num_epochs, device, save_path="."):
    # Ensure the save directory exists
    os.makedirs(save_path, exist_ok=True)
    
    # Move the model to the specified device
    autoencoder.to(device)
    
    # Training loop
    for epoch in range(num_epochs):
        autoencoder.train()
        running_loss = 0.0
        
        for images, _ in dataloader:
            images = images.to(device)  # Move data to the same device as the model
            
            # Forward pass
            reconstructed = autoencoder(images)
            loss = criterion(reconstructed, images)  # Compare reconstructed and original images
            
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()

        # Print epoch summary
        avg_loss = running_loss / len(dataloader)
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.6f}")
    
    # Save the trained model
    model_filename = f"{item}_autoencoder.pth"
    model_path = os.path.join(save_path, model_filename)
    torch.save(autoencoder.state_dict(), model_path)
    print(f"Model saved to: {model_path}")
    
    return autoencoder

In [51]:
def test_autoencoder(model, test_loader, device, threshold):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    anomalies = []
    false_positives = 0  # Counter for false positives

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)  # These are the class labels (good, crack, poke, etc.)
            print(labels)
            # Get the model's reconstruction of the images
            reconstructed = model(images)

            # Calculate the reconstruction error (difference between input and output)
            reconstruction_error = torch.mean((reconstructed - images) ** 2, dim=[1, 2, 3])

            # Identify anomalies based on reconstruction error exceeding the threshold
            predicted_anomalies = reconstruction_error > threshold

            # Track false positives: if the image is from the "good" class and predicted as anomalous
            for i, is_anomalous in enumerate(predicted_anomalies):
                image_label = labels[i].item()
                if is_anomalous.item():
                    anomalies.append((image_label, "Anomaly"))
                    if image_label == 0:  # 0 is assumed to be "good"
                        false_positives += 1
                else:
                    anomalies.append((image_label, "Good"))

            # Flatten the tensors for comparison
            predicted_anomalies = predicted_anomalies.view(-1)  # Flatten the anomalies tensor
            labels = (labels != 0).view(-1)  # Flatten labels and treat "good" as non-anomalous

            # Count correct predictions (for anomaly detection, assuming "good" = non-anomalous, others = anomalous)
            correct += (predicted_anomalies == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    false_positive_rate = false_positives / total if total > 0 else 0  # Avoid division by zero
    print(f"Test Accuracy: {accuracy:.4f}")
    print(f"False Positive Rate: {false_positive_rate:.4f}")

In [50]:
# Will use the images in the "train/good" directory to establish a baseline of features for defect-free images.
categories = ["capsule", "screw", "cable", "transistor"]
threshold = 0.05  # Threshold for anomaly detection

# Iterate through the directories of images
for item in categories:
    item_path = os.path.join(base_dir, item)
    train_path = os.path.join(item_path, 'train')
    test_path = os.path.join(item_path, 'test')
    ground_truth_path = os.path.join(item_path, 'ground_truth')

    print(f"Processing: {item}")
    train_dataset = datasets.ImageFolder(root=train_path, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    # Create your dataset and DataLoader    
    model = Autoencoder()
    trained_autoencoder = train_autoencoder(model, train_loader, item, epochs, device)    
    break

Processing: capsule
Epoch [1/10], Loss: 0.128091
Epoch [2/10], Loss: 0.128075
Epoch [3/10], Loss: 0.128120
Epoch [4/10], Loss: 0.128095
Epoch [5/10], Loss: 0.128174
Epoch [6/10], Loss: 0.128117
Epoch [7/10], Loss: 0.128091
Epoch [8/10], Loss: 0.128197
Epoch [9/10], Loss: 0.128156
Epoch [10/10], Loss: 0.128107
Model saved to: ./capsule_autoencoder.pth


In [52]:
test_dataset = TestDataset(root_dir='Images/capsule/test', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

test_autoencoder(model, test_loader, device, threshold)

tensor([0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 0, 1, 1, 1])
tensor([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 0, 1, 1, 1, 1, 0])
tensor([1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1,
        1, 1, 0, 1, 1, 1, 1, 1])
tensor([1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1])
tensor([1, 1, 1, 1])
Test Accuracy: 0.8258
False Positive Rate: 0.1742
