# Action Recognition with Custom CNN

In [1]:
from torch.utils.data import DataLoader
from main import Stanford40Dataset
from torchvision import transforms
import torch

## Prepare image transformation and constants

In [2]:
# Transforms and constant definition
# Define transformations (resize, normalization, etc.)
transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomApply([
        transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8
    ),
    transforms.RandomGrayscale(0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

# Paths to the image directory (data/train and data/test)
data_dir = 'data'
LR = 1e-3
BATCH_SIZE = 196
EPOCHS = 50
TRAIN_SPLIT = 0.75
VAL_SPLIT = 1 - TRAIN_SPLIT

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


## Load dataset

In [3]:
from torch.utils.data import random_split

# Instantiate the dataset and data loaders
train_dataset = Stanford40Dataset(data_dir=data_dir, split='train', transform=transform)
test_dataset = Stanford40Dataset(data_dir=data_dir, split='test', transform=transform)

numTrainSamples = int(len(train_dataset) * TRAIN_SPLIT)
numValSamples = int(len(train_dataset) * VAL_SPLIT)
(trainData, valData) = random_split(train_dataset,
                                    [numTrainSamples, numValSamples],
                                    generator=torch.Generator().manual_seed(42))

train_loader = DataLoader(trainData, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
val_loader = DataLoader(valData, batch_size=BATCH_SIZE)


Loaded 4000 images from train split.
Loaded 5532 images from test split.


## Instantiate Custom CNN model

In [None]:
from CustomCNN import CustomCNN
from torch import optim, nn

# Create an instance of the model class and allocate it to the device
model = CustomCNN(num_classes=40).to(device)

print(model)

optimizer = optim.Adam(model.parameters(), lr=LR)
loss_criteria = nn.CrossEntropyLoss()


## Training loop

In [None]:
from TrainTestUtils import train, validate, plot_loss

epoch_nums = []
training_loss = []
validation_loss = []

for epoch in range(1, EPOCHS + 1):
    print(f"Epoch {epoch}/{EPOCHS}")
    # Call the train function
    train_loss = train(model, device, loss_criteria, train_loader, optimizer, epoch)
    # Call the validation function
    val_loss = validate(model, device, loss_criteria, val_loader)

    # Track losses and epochs
    epoch_nums.append(epoch)
    training_loss.append(train_loss)
    validation_loss.append(val_loss)
    
plot_loss(epoch_nums, training_loss, validation_loss)

## Testing (performing predictions)

In [None]:
from TrainTestUtils import make_predictions, compute_accuracy

print("Getting predictions from test set...")
truelabels, predictions = make_predictions(model, test_loader, device)

accuracy = compute_accuracy(truelabels, predictions)
print(f"Test Set Accuracy: {accuracy:.2f}%")

# Exploration of hyperparameters with GridSearch

In [None]:
import itertools
from CustomCNN import CustomCNN
from TrainTestUtils import train, validate, make_predictions, compute_accuracy
import torch.optim as optim
import torch.nn as nn
# Parameter grid for grid search

param_grid = {
    'num_layers': [2, 3, 4],
    'base_filters': [12, 24, 48],
    'kernel_size': [3, 5],
    'pool_size': [2, 3],
    'lr': [0.001, 0.0001],
    'epochs': [15]
}

param_combinations = list(itertools.product(*param_grid.values()))

print(f"Testing {len(param_combinations)} parameter combinations...")

# Perform grid search
best_params = None
best_accuracy = 0.0

for i, params in enumerate(param_combinations):
    num_layers, base_filters, kernel_size, pool_size, lr, epochs = params

    print(f"Testing combination {i + 1}/{len(param_combinations)}: {params}")

    model = CustomCNN(
        num_classes=40,
        num_layers=num_layers,
        base_filters=base_filters,
        kernel_size=kernel_size,
        pool_size=pool_size,
    ).to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_criteria = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        train_loss = train(model, device, loss_criteria, train_loader, optimizer, epoch)
        test_loss = validate(model, device, loss_criteria, val_loader)

    truelabels, predictions = make_predictions(model, val_loader, device)
    accuracy = compute_accuracy(truelabels, predictions)

    print(f"Validation Accuracy: {accuracy:.4f}")

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = params

print(f"Best Parameters: {best_params}")
print(f"Best Accuracy: {best_accuracy:.4f}")

Testing 72 parameter combinations...
Testing combination 1/72: (2, 12, 3, 2, 0.001, 15)
Epoch: 0


Epoch 0:  88%|████████▊ | 14/16 [00:16<00:02,  1.09s/it, loss=3.79]