In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
class SimpleShapeCNN(nn.Module):
    def __init__(self):
        super(SimpleShapeCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(32 * 8 * 8, 64)
        self.fc2 = nn.Linear(64, 3)
        
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32 * 8 * 8)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [3]:
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor()
])

In [4]:
train_dataset = datasets.ImageFolder(root='dataset_custom_base/train', transform=transform)
test_dataset_backgrounds_matched = datasets.ImageFolder(root='dataset_custom_base/test_same_backgrounds', transform=transform)
test_dataset_backgrounds_shuffled = datasets.ImageFolder(root='dataset_custom_base/test_swapped_backgrounds', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader_backgrounds_matched = DataLoader(test_dataset_backgrounds_matched, batch_size=16, shuffle=False)
test_loader_backgrounds_shuffled = DataLoader(test_dataset_backgrounds_shuffled, batch_size=16, shuffle=False)

In [5]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"CUDA is available. Using device: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("CUDA not available. Using CPU.")

CUDA is available. Using device: NVIDIA GeForce RTX 4050 Laptop GPU


In [6]:
model = SimpleShapeCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [7]:
for epoch in range(3):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f'Epoch {epoch+1}, Loss: {total_loss/len(train_loader)}')

model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader_backgrounds_matched:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    print(f'Accuracy background patterns matched: {100 * correct / total}%')

    correct = 0
    total = 0
    for images, labels in test_loader_backgrounds_shuffled:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    print(f'Accuracy background patterns mixed: {100 * correct / total}%')

Epoch 1, Loss: 0.4880156647314628
Epoch 2, Loss: 0.04950924684635053
Epoch 3, Loss: 0.016583456796593964
Accuracy background patterns matched: 99.5%
Accuracy background patterns mixed: 48.166666666666664%
