In [8]:
#imports
import os
import time
import copy
import json
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from sklearn.model_selection import train_test_split


from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
cudnn.benchmark = True

Using device: cuda


In [9]:
# train dataset


class TrainLegibilityDataset(Dataset):
    def __init__(self, txt_file, img_dir, transform=None, isBalanced=False):

        self.img_dir = img_dir
        self.transform = transform

        # Read CSV file
        self.img_labels = pd.read_csv(txt_file, header=None, names=["image", "label"])


       # balancing the datatset to ensure thatlegible and illegible images are used in a balanced way to train the legibility classifier
        if isBalanced:
            legible = self.img_labels[self.img_labels["label"] == 1]
            count_legible = len(legible)
            illegible = self.img_labels[self.img_labels["label"] == 0]
            # Sample from the larger class to match smaller class
            if len(illegible) > count_legible:
                illegible = illegible.sample(n=count_legible, random_state=42)
            self.img_labels = pd.concat([legible, illegible])
            print(f"Balanced dataset: {count_legible} legible vs {len(self.img_labels) - count_legible} non-legible")
        else:
            legible_count = (self.img_labels["label"] == 1).sum()
            total = len(self.img_labels)
            print(f"As-is dataset: {legible_count} legible out of {total} total samples.")

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        row = self.img_labels.iloc[idx]
        img_filename = row["image"]
        label = row["label"]

        img_path = os.path.join(self.img_dir, img_filename)
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        # Return (image, label, filename)
        return image, label, img_filename






In [10]:

#resnet
class LegibilityClassifier34(nn.Module):
    def __init__(self, finetune=False):

        super(LegibilityClassifier34, self).__init__()
        self.model_ft = models.resnet34(pretrained=True)
        if finetune:
            for param in self.model_ft.parameters():
                param.requires_grad = False

        num_ftrs = self.model_ft.fc.in_features
        self.model_ft.fc = nn.Sequential(
            nn.Linear(num_ftrs, 1),
            nn.Dropout(0.5)
        )

        # We apply sigmoid in forward() to get probabilities
    def forward(self, x):
        x = self.model_ft(x)
        x = torch.sigmoid(x)
        return x


In [11]:
# training
def train_model(model, dataloaders, dataset_sizes, criterion, optimizer, scheduler, num_epochs):

    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch}/{num_epochs - 1}")
        print("-" * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            # progress bar
            phase_bar = tqdm(dataloaders[phase], desc=f"{phase.capitalize()} Phase", leave=False,dynamic_ncols=True)

            for inputs, labels, _ in phase_bar:
                # Convert labels to float, shape [batch_size, 1]
                labels = torch.tensor(labels, dtype=torch.float).view(-1, 1)
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)  # shape: [batch_size, 1]
                    loss = criterion(outputs, labels)

                    # Predictions: threshold at 0.5
                    preds = (outputs >= 0.5).float()


                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                phase_bar.set_postfix(loss=loss.item())

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            print(f"{phase} Loss: {epoch_loss:.4f}  Acc: {epoch_acc:.4f}")

            # Save best weights
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        print()

    time_elapsed = time.time() - since
    print(f"Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
    print(f"Best val Acc: {best_acc:.4f}")

    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model

#evaluate

def evaluate_model(model, dataloader, dataset_size):

    model.eval()
    running_corrects = 0
    all_preds = []
    all_labels = []
    all_filenames = []

    with torch.no_grad():
        for inputs, labels, filenames in tqdm(dataloader):
            labels = torch.tensor(labels, dtype=torch.float).view(-1, 1)
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            preds = (outputs >= 0.5).float()

            running_corrects += torch.sum(preds == labels.data)
            all_preds.extend(preds.cpu().numpy().flatten().tolist())
            all_labels.extend(labels.cpu().numpy().flatten().tolist())
            all_filenames.extend(filenames)

    accuracy = running_corrects.double() / dataset_size
    return accuracy.item(), all_preds, all_labels, all_filenames

In [12]:

data_root = "/content/SoccerNetLegibility"

train_txt = os.path.join(data_root, "train", "train_gt.txt")
train_img_dir = os.path.join(data_root, "train", "images")

full_df = pd.read_csv(train_txt, header=None, names=["image", "label"])

train_df, val_df = train_test_split(
    full_df,
    test_size=0.1,
    random_state=42,
    stratify=full_df["label"]  # keeps class distribution similar
)

# Write these to temporary CSV files for our Dataset class
train_df.to_csv("train_split.txt", index=False, header=False)
val_df.to_csv("val_split.txt", index=False, header=False)


# Transforms (for 'resnet')
# We'll define them here, but you could also inline them in the dataset classes
train_transform = transforms.Compose([
    transforms.RandomGrayscale(),
    transforms.ColorJitter(brightness=0.5, hue=0.3),
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Create Datasets
train_dataset = TrainLegibilityDataset(
    txt_file=train_txt,
    img_dir=train_img_dir,
    transform=train_transform,
    isBalanced=True  # If you want balanced classes
)

val_dataset = TrainLegibilityDataset(
    txt_file="val_split.txt",
    img_dir=train_img_dir,
    transform=val_transform,
    isBalanced=False
)

# Create DataLoaders
batch_size = 4  # Adjust if you run into memory issues
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False, num_workers=2)

dataloaders = {
    'train': train_loader,
    'val': val_loader
}
dataset_sizes = {
    'train': len(train_dataset),
    'val': len(val_dataset)
}

print("Number of training samples:", dataset_sizes['train'])
print("Number of validation samples:", dataset_sizes['val'])

# Instantiate the ResNet-34 model
model = LegibilityClassifier34(finetune=False)
model = model.to(device)

# Define loss, optimizer, and learning rate scheduler
criterion = nn.BCELoss()  # since we have a final sigmoid in the model
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# Train the model
num_epochs = 5
model = train_model(model, dataloaders, dataset_sizes, criterion, optimizer, lr_scheduler, num_epochs)

# Save the best model weights
model_save_path = "resnet34_legibility_soccernet_best.pth"
torch.save(model.state_dict(), model_save_path)
print(f"Best model weights saved to: {model_save_path}")


Balanced dataset: 12422 legible vs 12408 non-legible
As-is dataset: 1242 legible out of 2483 total samples.
Number of training samples: 24830
Number of validation samples: 2483




Epoch 0/4
----------


  labels = torch.tensor(labels, dtype=torch.float).view(-1, 1)


train Loss: 0.4396  Acc: 0.7243




val Loss: 0.0587  Acc: 0.9774

Epoch 1/4
----------




train Loss: 0.3684  Acc: 0.7444




val Loss: 0.0615  Acc: 0.9932

Epoch 2/4
----------




train Loss: 0.3647  Acc: 0.7451




val Loss: 0.0867  Acc: 0.9940

Epoch 3/4
----------




train Loss: 0.3625  Acc: 0.7480




val Loss: 0.0252  Acc: 0.9948

Epoch 4/4
----------




train Loss: 0.3558  Acc: 0.7488




val Loss: 0.1198  Acc: 0.9895

Training complete in 24m 5s
Best val Acc: 0.9948
Best model weights saved to: resnet34_legibility_soccernet_best.pth
