In [1]:
from pathlib import Path
from PIL import Image

import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset

In [32]:
"""Initialize target outputs: read our labels from a file and convert them to a 2-d tensor."""

labels_path = Path(r"C:\Users\jai\veo_nu\data\labels\Initial_combined_labels.csv")  # path to labels file
labels_df = pd.read_csv(labels_path)  # read the labels file into a pandas dataframe table
display(labels_df.head())  # display the first few rows of the labels dataframe

Unnamed: 0,Frame,Possession,Set piece
0,410,1,0
1,420,1,0
2,430,1,0
3,440,1,0
4,450,1,0


In [33]:
Y = labels_df[["Possession", "Set piece"]].values  # extract the "Possession" and "Set piece" values
Y = torch.from_numpy(Y).float()  # convert the extracted values to a 2-d tensor
print(Y.shape)  # print the shape of the initialized target outputs
N = Y.shape[0]  # number of samples (total number of combined labeled frames)

torch.Size([680, 2])


In [37]:
"""Initialize inputs: convert the frame images to a list of 3-d tensors (width X height X rgb)."""

frames_path = Path(r"C:\Users\jai\veo_nu\data\initial_combined_frames")  # path to image directory
n_channels, height, width = 3, 360, 640  # initialize the dimensions of the frames
X = torch.empty((N, n_channels, height, width))  # initialize a tensor that will store all frames
print(X.shape)  # print shape of initialized inputs

torch.Size([680, 3, 360, 640])


In [38]:
for i, file in enumerate(frames_path.glob("*.png")):  # find each image file in `frames_path` directory
    with Image.open(file).convert("RGB") as img:  # open the image file and convert to RGB
        to_tensor = transforms.ToTensor()  # initialize a ToTensor conversion object
        img_tensor = to_tensor(img).float()  # use the ToTensor object to convert the image to a tensor
        X[i, :] = img_tensor  # add the tensor of the current frame to our list of tensors

In [None]:
"""Create Dataset and DataLoader."""

dataset = CustomDataset(images, labels, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
"""Configure model"""

model = models.densenet121(pretrained=True)

# Modify the classifier to output 2 probabilities
model.classifier = nn.Sequential(
    nn.Linear(model.classifier.in_features, 2),
    nn.Sigmoid()  # Use sigmoid for binary multiclass (multi-label) classification
)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
"""Train model"""

num_epochs = 10  # Define the number of epochs

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    
    for inputs, labels in dataloader:
        optimizer.zero_grad()  # Zero the parameter gradients
        
        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Calculate loss
        loss.backward()  # Backward pass
        optimizer.step()  # Optimize

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')