In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets
import time
import os
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter

In [2]:
class VGG1Block(nn.Module):
    def __init__(self):
        super(VGG1Block, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(64 * 32 * 32, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


class VGG3Blocks(nn.Module):
    def __init__(self):
        super(VGG3Blocks, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(256 * 8 * 8, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


class VGG3BlocksAugmented(nn.Module):
    def __init__(self):
        super(VGG3BlocksAugmented, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.classifier = nn.Sequential(
            nn.Linear(256 * 8 * 8, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


In [3]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=20):
    model.train()
    iteration = 0

    epoch_loss = float('inf')
    epoch_acc = 0
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = total = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels.float().unsqueeze(1))
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            predicted = (outputs > 0.5).float()
            correct += (predicted == labels.unsqueeze(1)).sum().item()
            total += labels.size(0)

            # Log scalars
            writer.add_scalar("Training Loss", loss.item(), iteration)
            writer.add_scalar("Training Accuracy", correct / total * 100, iteration)
            test_model(model, test_loader, iteration)
            model.train()

            iteration += 1

        epoch_loss = running_loss / total
        epoch_acc = correct / total * 100
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%")
    
    print(f"Training loss: {epoch_loss:.4f}")
    print(f"Training accuracy: {epoch_acc:.2f}%")


def test_model(model, test_loader, iteration=-1):
    model.eval()
    correct = 0
    total = 0
    
    img_cnt = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            predicted = (outputs > 0.5).float()
            correct += (predicted == labels.unsqueeze(1)).sum().item()
            total += labels.size(0)

            # Log test images and predictions
            for i in range(len(labels)):
                # plt.imshow(inputs[i].cpu().permute(1, 2, 0))
                # plt.title(f"Predicted: {int(predicted[i].item())}, Actual: {int(labels[i].item())}")
                # plt.axis('off')
                # plt.show()

                img_cnt += 1
                
                img_grid = inputs[i].cpu()
                
                if iteration == -1:
                    writer.add_image(f"Test Image {img_cnt} - Prediction: {int(predicted[i].item())}, Actual: {int(labels[i].item())}", img_grid, 0)

    accuracy = correct / total * 100
    print(f"Testing Accuracy: {accuracy:.2f}%")
    
    if iteration != -1:
        writer.add_scalar(f"Test Accuracy", accuracy, iteration)

In [4]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

train_dataset = ImageFolder(root='dataset/train', transform=transform)
test_dataset = ImageFolder(root='dataset/test', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

transform_augmented = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor()
])

train_dataset_augmented = ImageFolder(root='dataset/train', transform=transform_augmented)
train_loader_augmented = DataLoader(train_dataset_augmented, batch_size=8, shuffle=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.BCELoss()

In [5]:
print('VGG (1 block) - ')

start_time = time.time()
writer = SummaryWriter("runs/vgg_1block_model")
vgg_1block = VGG1Block().to(device)
optimizer_1block = optim.Adam(vgg_1block.parameters(), lr=0.001)
train_time_1block = train_model(vgg_1block, train_loader, criterion, optimizer_1block)

print(f'Training time: {time.time() - start_time:.2f}s')
print('Number of parameters:', sum(p.numel() for p in vgg_1block.parameters() if p.requires_grad))
test_model(vgg_1block, test_loader)

VGG (1 block) - 
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 72.50%
Testing Accuracy: 60.00%
Testing Accuracy: 75.00%
Testing Accuracy: 60.00%
Testing Accuracy: 70.00%
Testing Accuracy: 70.00%
Testing Accuracy: 65.00%
Testing Accuracy: 65.00%
Testing Accuracy: 75.00%
Testing Accuracy: 80.00%
Testing Accuracy: 75.00%
Testing Accuracy: 82.50%
Testing Accuracy: 72.50%
Testing Accuracy: 62.50%
Testing Accuracy: 62.50%
Testing Accuracy: 62.50%
Testing Accuracy: 70.00%
Testing Accuracy: 85.00%
Testing Accuracy: 80.00%
Testing Accuracy: 70.00%
Testing Accuracy: 60.00%
Testing Accuracy: 65.00%
Epoch [1/20], Loss: 0.8772, Accuracy: 66.67%
Testing Accuracy: 72.50%
Testing Accuracy: 85.00%
Testing Accuracy: 75.00%
Testing Accuracy: 62.50%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 65.00%
Testing Accuracy: 82.50%
Testing Accuracy: 85.00%
Testing Accuracy: 87.50%
Testing Accuracy: 87.50%
Testing Accuracy: 92.50%
Testing Accuracy: 85.00%
Testing Accur

In [6]:
print('VGG (3 blocks) - ')

start_time = time.time()
writer = SummaryWriter("runs/vgg_3blocks_model")
vgg_3blocks = VGG3Blocks().to(device)
optimizer_3blocks = optim.Adam(vgg_3blocks.parameters(), lr=0.001)
train_time_3blocks = train_model(vgg_3blocks, train_loader, criterion, optimizer_3blocks)

print(f'Training time: {time.time() - start_time:.2f}s')
print('Number of parameters:', sum(p.numel() for p in vgg_3blocks.parameters() if p.requires_grad))
test_model(vgg_3blocks, test_loader)

VGG (3 blocks) - 
Testing Accuracy: 50.00%
Testing Accuracy: 80.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 52.50%
Testing Accuracy: 60.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 60.00%
Testing Accuracy: 65.00%
Testing Accuracy: 55.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 55.00%
Epoch [1/20], Loss: 0.6906, Accuracy: 52.08%
Testing Accuracy: 57.50%
Testing Accuracy: 60.00%
Testing Accuracy: 72.50%
Testing Accuracy: 65.00%
Testing Accuracy: 77.50%
Testing Accuracy: 82.50%
Testing Accuracy: 80.00%
Testing Accuracy: 82.50%
Testing Accuracy: 82.50%
Testing Accuracy: 90.00%
Testing Accuracy: 80.00%
Testing Accuracy: 85.00%
Testing Accuracy: 90.00%
Testing Accu

In [7]:
print('VGG (3 Blocks) with image augmentation - ')

start_time = time.time()
writer = SummaryWriter("runs/vgg_3blocks_augmented_model")
vgg_3blocks_augmented = VGG3BlocksAugmented().to(device)
optimizer_3blocks_augmented = optim.Adam(vgg_3blocks_augmented.parameters(), lr=0.001)
train_time_3blocks_augmented = train_model(vgg_3blocks_augmented, train_loader_augmented, criterion, optimizer_3blocks_augmented)

print(f'Training time: {time.time() - start_time:.2f}s')
print("Number of parameters:", sum(p.numel() for p in vgg_3blocks_augmented.parameters() if p.requires_grad))
test_model(vgg_3blocks_augmented, test_loader)

VGG (3 Blocks) with image augmentation - 
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 75.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Testing Accuracy: 50.00%
Epoch [1/20], Loss: 0.8511, Accuracy: 52.60%
Testing Accuracy: 57.50%
Testing Accuracy: 57.50%
Testing Accuracy: 60.00%
Testing Accuracy: 57.50%
Testing Accuracy: 67.50%
Testing Accuracy: 92.50%
Testing Accuracy: 82.50%
Testing Accuracy: 82.50%
Testing Accuracy: 82.50%
Testing Accuracy: 92.50%
Testing Accuracy: 92.50%
Testing Accuracy: 92.50%
Testing Accur

In [8]:
# print("Number of parameters in VGG (1 block):", sum(p.numel() for p in vgg_1block.parameters() if p.requires_grad))
# print("Number of parameters in VGG (3 blocks):", sum(p.numel() for p in vgg_3blocks.parameters() if p.requires_grad))
# print("Number of parameters in VGG (3 blocks):", sum(p.numel() for p in vgg_3blocks_augmented.parameters() if p.requires_grad))