In [None]:
%pip install pandas
%pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121
import pandas as pd
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset 
from torch.utils.data import DataLoader
from PIL import Image
import json
import os

In [10]:
# Preparing the data (Load and manage the images and labels)

# Create subclass of the pytorch base custom dataset.
class CardDataset(torch.utils.data.Dataset):

    # Initializes the parameters and loads in the native data.
    def __init__(self, root_dir, transform=None, is_table=False):
        self.root_dir = root_dir
        self.transform = transform
        self.is_table = is_table
        
        # If the dataset is the table screenshots:
        if self.is_table:
            # Opens the .json and loads the table labels, saves info required to load images later, and stores this to 'self.table_info'.
            with open(os.path.join(root_dir, '..\Tables.json'), 'r') as f:
                self.table_info = json.load(f)
        # Otherwise the dataset is the card images. Lists all the files in 'Card Images' and stores them to 'self.card_info'.
        else:
            self.card_info = os.listdir(os.path.join(root_dir, '..\Card Images'))

    # Gets total number of samples, used to determine the number of iterations required to go through entire dataset.
    def __len__(self):
        if self.is_table:
            return len(self.table_info)
        else:
            return len(self.card_info)

    # Load and return a sample from the dataset given an index. When using data loaders, this is called to fetch a specific sample from the dataset.
    def __getitem__(self, idx):

        if self.is_table:
            # Retrieves the tables image path and associated label for the table image.
            img_path = os.path.join(self.root_dir, self.table_info[idx]['table_image'])
            label = self.table_info[idx]['table_label']
        else:
            # Retrieves the card image name and path and then extracts the truth label as the images name before '.png'.
            img_name = self.card_info[idx]
            img_path = os.path.join(self.root_dir, img_name)
            label = img_name[:-4]

        # Opens the image via the card and table image paths.
        img = Image.open(img_path)
        # Applies the transformations to the images when they are accessed through the dataset instances. 
        if self.transform:
            img = self.transform(img)
        
        return img, label

# Number of classes in the card dataset (52 cards).
num_classes = 52
# (channels, [dimensions of image in pixels]). RGB = 3.
input_shape = (3, 27, 38)

# Transforms the images shape to account for different sized cards and various table screenshot dimensions, and converts image into tensor.
cards_transform = transforms.Compose([transforms.RandomResizedCrop(27, scale=(0.6, 2.0)),transforms.ToTensor()])
table_transform = transforms.Compose([transforms.Resize(505, Image.BICUBIC),transforms.ToTensor()])

# CardDataset instances.
root_dir = os.path.abspath('Card Detection')
cards_dataset = CardDataset(root_dir=root_dir, transform=cards_transform)
table_dataset = CardDataset(root_dir=root_dir, transform=table_transform, is_table=True)

# Use the cards_dataset as the training set and table_dataset as the validation set.
train_dataset = cards_dataset
test_dataset = table_dataset

# Objects to provide access to the tensor data during training and testing.
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# Building the model (Creating custom CNN 'CNN' using nn.Module class)
class CardCNN(nn.Module):
    # Initialization function
    def __init__(self):
        # Constructor which initializes the layers and defines parameters.
        super(CardCNN, self).__init__()
        # convolutional layer for extracting local features/patterns. (input channels, output channels, kernel size, stride).
        self.conv1 = nn.Conv2d(3, 4, 3, 1)
        # max pooling layer for downsampling to retain important features & reduce spatial dimensions. (kernel size, stride).
        self.pool = nn.MaxPool2d(2, 2)
        # convolutional layer for extracting local features/patterns. (input channels, output channels, kernel size, stride).
        self.conv2 = nn.Conv2d(4, 8, 3, 1)
        # dropout layer for regularization. Prevents overfitting. (Probability of element dropped during dropout).
        self.dropout = nn.Dropout2d(0.5)
        # fully connected layer which flattens for classification. (input size, output size).
        self.fc1 = nn.Linear(8 * 5 * 8, 256)
        self.fc2 = nn.Linear(256, num_classes)

    # Function to define how the input flows through the layers.
    def forward(self, x):
        # Applies convolutional layer, activation function (ReLU) for non-linearity, and pooling layer to the input.
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        # Reshapes the output tensor to match the input size of the fully connected layer.
        x = x.view(-1, 8 * 5 * 8)
        # Applies dropout layer to prevent overfitting by randomly zeroing some elements.
        x = self.dropout(x)
        # ???
        x = F.relu(self.fc1(x))
        # ???
        x = self.dropout(x)
        # Flattens and passes the input through the fully connected layer, performs linear transformation for classification.
        x = self.fc2(x)
        # Logarithmic softmax function applied to produce the output probabilities.
        return F.log_softmax(x, dim=1)

model = CardCNN()
print(model)

In [None]:
# Training the model

# Device selection
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Optimizer using Adam op algo to optimize parameters during training with the set learning rate.
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Loss function (combines softmax activation and negative log-likelihood loss).
criterion = nn.CrossEntropyLoss()

# Performs the training loop for a number of epochs.
def train(model, device, train_loader, optimizer, epoch):
    # Sets model in training mode, so it can update it's parameters during training.
    model.train()
    # Iterates over the loader.
    for batch_idx, (data, target) in enumerate(train_loader):
        # Input data and target labels moved to the GPU/CPU.
        data, target = data.to(device), target.to(device)
        # Optimizer's gradients reset to clear accumulated gradients from pervious batches.
        optimizer.zero_grad()
        # forward pass is performed, obtaining predicted outputs.
        output = model(data)
        # Loss calculated by comparing predicted outputs with target labels.
        loss = criterion(output, target)
        # Gradients are computed and backproped.
        loss.backward()
        # Optimizer updates models parameters.
        optimizer.step()
        # Prints current training process when we hit a round number. Loss.item() is scalar value of the loss tensor.
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item()))

In [None]:
# Evaluating the trained model
def test(model, device, test_loader):
    # Sets model in evaluation mode (disables dropout and batch normalization).
    model.eval()
    # Initialization of cumulative loss and correctly predicted samples.
    test_loss = 0
    correct = 0
    # Ensures no gradient calcs are performed during the evaluation, reducing memory consumption.
    with torch.no_grad():
        # Iterates over (image, label) batches in test dataset.
        for data, target in test_loader:
            # Input data and target labels moved to the GPU/CPU.
            data, target = data.to(device), target.to(device)
            # forward pass is performed, obtaining predicted outputs.
            output = model(data)
            # Accumulated test loss between predicted output and target labels.
            test_loss += criterion(output, target).item()
            # Tensor containing the predicted labels by taking the idx of the max value .
            pred = output.argmax(dim=1, keepdim=True)
            # Accumulated correct predictions by comparing predicted labels with target labels.
            correct += pred.eq(target.view_as(pred)).sum().item()
    # Obtains average loss.
    test_loss /= len(test_loader.dataset)
    # Average loss and accuracy are printed.
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
# Runs training process for n-1 epochs.
for epoch in range(1, 6):
    # Train function called to train the model on training dataset.
    train(model, device, train_loader, optimizer, epoch)
    # Test function called to evaluate models performance on test dataset.
    test(model, device, test_loader)