In [1]:
"""Imports."""

from pathlib import Path
from PIL import Image

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader, random_split
from torchvision import models, transforms

In [2]:
"""Specify which device (CPU or GPU) to use"""

if torch.cuda.is_available():  # if we have a GPU and its available to pytorch, use it
    device = torch.device("cuda")
    print(f"Using GPU device: {torch.cuda.get_device_name(device)}\n")
else:  # else, use cpu
    device = torch.device("cpu")
    print("Using CPU.\n")

Using GPU device: NVIDIA GeForce RTX 3090



In [3]:
"""Initialize target outputs: read our labels from a file and convert them to a 2-d tensor."""

labels_path = Path(r"C:\Users\jai\veo_nu\data\labels\Initial_combined_labels.csv")  # path to labels file
labels_df = pd.read_csv(labels_path)  # read the labels file into a pandas dataframe table
display(labels_df.head())  # display the first few rows of the labels dataframe

Unnamed: 0,Frame,Possession,Set piece
0,410,1,0
1,420,1,0
2,430,1,0
3,440,1,0
4,450,1,0


In [4]:
Y = labels_df[["Possession", "Set piece"]].values  # extract the "Possession" and "Set piece" values
Y = torch.from_numpy(Y).float().to(device)  # convert the extracted values to a 2-d tensor
print(Y.shape)  # print the shape of the initialized target outputs
N, n_classes = Y.shape[0], Y.shape[1]  # number of samples, number of classes

torch.Size([680, 2])


In [5]:
"""Initialize inputs: convert the frame images to a list of 3-d tensors (width X height X rgb)."""

frames_path = Path(r"C:\Users\jai\veo_nu\data\initial_combined_frames")  # path to image directory
n_channels, height, width = 3, 224, 224  # initialize the dimensions of the frames
X = torch.empty((N, n_channels, height, width))  # initialize a tensor that will store all frames
print(X.shape)  # print shape of initialized inputs

torch.Size([680, 3, 224, 224])


In [6]:
"""Fill in `X` frame-by-frame."""

for i, file in enumerate(frames_path.glob("*.png")):  # find each image file in `frames_path` directory
    with Image.open(file).convert("RGB") as img:  # open the image file and convert to RGB
        to_tensor = transforms.ToTensor()  # initialize a ToTensor conversion object
        resize = transforms.Resize((height, width))  # initialize a Resize object
        img_tensor = to_tensor(img).float()  # use the ToTensor object to convert the image to a tensor (will be normalized pixel values between 0-1)
        img_tensor = resize(img_tensor)  # use the Resize object to resize the tensor to the specified dimensions
        X[i, :] = img_tensor  # add the tensor of the current frame to our list of tensors



In [20]:
"""Create Datasets and DataLoader."""

# Create training, testing, and validation datasets.
dataset = TensorDataset(X, Y)
train_data, val_data = random_split(dataset, [0.90, 0.10])
# View training and validation data subsets.
print(train_data[:][0].shape, train_data[:][1].shape, val_data[:][0].shape, val_data[:][1].shape)


torch.Size([612, 3, 224, 224]) torch.Size([612, 2]) torch.Size([68, 3, 224, 224]) torch.Size([68, 2])


In [11]:
"""Create function to perform gradient centralization during training."""

def apply_gradient_centralization(optimizer):
    """Applies gradient centralization to the optimizer.
    
    This function should be called before optimizer.step() in the training loop.
    """
    for group in optimizer.param_groups:
        for param in group['params']:
            if param.grad is not None:
                # Compute the mean of the gradient
                grad_mean = param.grad.data.mean(dim=tuple(range(1, len(param.grad.shape))), keepdim=True)
                # Centralize the gradient
                param.grad.data -= grad_mean

In [12]:
"""Create function to train the model."""

def train(
    model: nn.Module,  # model
    train_loader: DataLoader,  # batched dataset for training
    val_loader: DataLoader,  # batched dataset for validation
    optimizer: optim,  # optimizer
    loss_fn: nn.modules.loss,  # loss function
    max_epochs: int = 5,  # max n training epochs
    val_check_interval: int = 10,  # check val loss every `val_check_interval` batches
    print_loss_interval: int = 10,  # print loss every `print_loss` batches
) -> tuple[torch.Tensor, np.ndarray, np.ndarray]:  # -> loss, train_losses, val_losses
    """Trains a model, returns loss."""
    # <s Create Trackers
    best_val_loss, val_loss = np.inf, np.inf
    patience_ct = 0
    train_losses = np.zeros((max_epochs + 1) * len(train_loader))
    val_losses = np.zeros((max_epochs + 1) * len(train_loader) // val_check_interval)
    train_losses_i, val_losses_i = 0, 0
    # /s>

    # <s Go through training and validation loop
    for epoch in range(max_epochs):  # epoch is all frames in our "train" dataset
        for i, (x_train, y_train) in enumerate(train_loader):  # batch of frames and labels
            # <ss Model training.
            model.train()  # set model to training mode (which means it's computing gradients)
            optimizer.zero_grad()  # set all gradients to zero for the current step
            out = model(x_train)  # forward pass through the model
            loss = loss_fn(out, y_train)  # compute loss
            loss.backward()  # backward pass back through the model to compute gradients
            apply_gradient_centralization(optimizer)  # normalizing gradients before performing the update step
            optimizer.step()  # update model parameters based on gradients
            if print_loss_interval and i % print_loss_interval == 0:
                print(f"Epoch {epoch + 1}:  Batch {i + 1}: {loss.item()=}  {val_loss=}")
            train_losses[train_losses_i], train_losses_i = loss.item(), train_losses_i + 1
            # /ss>
            # <ss Model validation (for early stopping).
            if i % val_check_interval == 0:
                model.eval()  # set model to eval mode (doesn't compute gradients)
                with torch.no_grad():
                    x_val, y_val = next(iter(val_loader))
                    val_loss = loss_fn(model(x_val), y_val).item()
                    patience_ct = 0 if val_loss < best_val_loss else patience_ct + 1
                    best_val_loss = min(best_val_loss, val_loss)
                    val_losses[val_losses_i], val_losses_i = val_loss, val_losses_i + 1
                if patience_ct >= patience_thresh:
                    print("Early stopping.")
                    print(f"Epoch {epoch + 1}:  Batch {i + 1}: {loss.item()=}  {val_loss=}")
                    return loss, train_losses[train_losses != 0], val_losses[val_losses != 0]
            # /ss>
    # /s>
    print("Finished training:")
    print(f"Epoch {epoch + 1}:  Batch {i + 1}: {loss.item()=}  {val_loss=}")
    return loss, train_losses[train_losses != 0], val_losses[val_losses != 0]

In [13]:
"""Load in initial pretrained model."""

# Load in model that was used for AlexNet
# Possible pretrained models to try: DenseNet121_Weights, DenseNet169_Weights, ResNet50_Weights, ResNet101_Weights

model = models.densenet121(weights=models.DenseNet121_Weights.DEFAULT)
print(model)


DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [14]:
"""Configure model (just change last layer in model)"""

dropout_rate = 0.2  # 20% cell removal to help with generalization

# Modify the classifier to output 2 probabilities
model.classifier = nn.Sequential(
    nn.Dropout(dropout_rate),
    nn.Linear(model.classifier.in_features, n_classes),
    nn.Sigmoid()  # Use sigmoid for binary multiclass, multilabel classification
)

In [15]:
"""Train model."""

shuffle = True if device == "cpu" else False
batch_size = 32
learning_rate = 0.02

# Create data loaders
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=shuffle)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=shuffle)
# Set loss function and optimizer
loss_fn = nn.BCELoss()
# lr = learning rate: scale gradients down before using them to update model parameters
optimizer = optim.SGD(model.parameters(), lr=0.02, weight_decay=1e-7, momentum=0.5, nesterov=True)
# Train
loss, train_losses, val_losses = train(
    model, train_loader, val_loader, optimizer, loss_fn, max_epochs=2,
)

Epoch 1:  Batch 1: loss.item()=0.7061241269111633  val_loss=inf
Epoch 1:  Batch 11: loss.item()=0.5424782037734985  val_loss=0.417776495218277
Finished training:
Epoch 1:  Batch 20: loss.item()=0.3121889531612396  val_loss=0.3161523938179016


In [None]:
# Plot training and validation losses

n_plot_batches = len(train_losses) if len(train_losses) < len(val_losses) else len(val_losses)

fig, ax = plt.subplots()
ax.plot(train_losses[0:n_plot_batches], label="Train")
ax.plot(val_losses[0:n_plot_batches], label="Val")
ax.legend()
ax.set_xlabel("Batch")
ax.set_ylabel("Loss")
ax.set_title("Training and Validation Losses")


In [None]:
# View val images and see how it performed