#Step 1: Load the Dataset

In [17]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import cv2
import os

In [20]:
# Load the dataset
def load_dataset(file_path):
    return pd.read_parquet(file_path)

dataset_path = 'easy-500/labels.parquet'
data = load_dataset(dataset_path)

In [21]:
data.head()

Unnamed: 0,image_id,x,y,orientation,radius,class
0,0,269,450,0.0,17,0
1,0,533,299,0.663225,45,1
2,0,539,427,0.610865,46,1
3,0,365,148,0.488692,45,1
4,0,472,136,2.426008,40,1


In [205]:
len(data)

11500

In [22]:
print("\nUnique classes:", data['class'].unique())


Unique classes: [0 1 2]


## Function to preprocess data

To complete the preprocess_data function, we need to adapt our dataset to fit the requirements of the YOLO architecture, but with our unique setup of using bounding circles instead of boxes. The steps include scaling coordinates to our defined grid size, converting player orientations, and preparing the labels for YOLO. Let's break down the process:

Scaling Coordinates: We'll scale the x and y coordinates of each label to match the grid size we've defined. This is necessary because our CNN output will be an S x S grid, and each cell in this grid needs to predict objects within its boundaries.

One-hot Encoding for Classes: Convert the class labels into a one-hot encoded format since we're dealing with a multi-class classification problem (ball, team 1 player, team 2 player).

Label Structure per Grid Cell: For each grid cell, we prepare the labels in the following format:

A confidence score (1 if there's an object, 0 otherwise).
The (x,y) center of the bounding circle, relative to the grid cell.
The radius of the bounding circle.
The orientation of the object (0 for the ball, orientation in radians for players).
The one-hot encoded class of the object.

In [23]:
# Assuming an example image shape and grid size
image_shape = (1024, 1024)  # Example image dimensions
grid_size = 8  # S x S grid

In [24]:
import numpy as np

def preprocess_data(data, image_shape, grid_size):
    """
    Preprocess the data to fit the YOLO model requirements.
    
    Parameters:
    - data: The dataset containing player and ball positions and attributes.
    - image_shape: A tuple of (height, width) of the images.
    - grid_size: The size of the S x S grid to split the image into.
    
    Returns:
    - A preprocessed version of the data suitable for training with YOLO.
    """
    # Initialize an empty array for the preprocessed labels
    grid_cell_size = (image_shape[0] / grid_size, image_shape[1] / grid_size)
    labels = np.zeros((len(data), grid_size, grid_size, 8))  # 6 for confidence, x, y, radius, orientation, and 3-class one-hot
    
    for idx, row in data.iterrows():
        # Scale coordinates
        grid_x = int(row['x'] / grid_cell_size[1])
        grid_y = int(row['y'] / grid_cell_size[0])
        
        # Ensure grid coordinates are within bounds
        grid_x = min(grid_size - 1, max(0, grid_x))
        grid_y = min(grid_size - 1, max(0, grid_y))
        
        # Relative positions within a grid cell
        rel_x = (row['x'] % grid_cell_size[1]) / grid_cell_size[1]
        rel_y = (row['y'] % grid_cell_size[0]) / grid_cell_size[0]
        radius = row['radius'] / max(grid_cell_size)  # Normalize radius
        
        # One-hot encode the class
        class_onehot = np.zeros(3)  # Assuming 3 classes: ball, team1, team2
        class_onehot[int(row['class'])] = 1
        
        # Set the label for this object
        labels[idx, grid_y, grid_x] = [1, rel_x, rel_y, radius, row['orientation'], *class_onehot]
        
    return labels

preprocessed_labels = preprocess_data(data, image_shape, grid_size)

In [207]:
preprocessed_labels

array([[[[0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ],
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ],
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ],
         ...,
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ],
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ],
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ]],

        [[0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ],
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ],
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0.        ],
         ...,
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.        , 0. 

In [209]:
def process_labels(row, grid_size, B=1, num_classes=3):
    """
    Convert a single row of the dataset to the label format required for training.
    
    Args:
    - row: A Series or a dict containing 'grid_x', 'grid_y', 'x', 'y', 'radius', 'orientation', 'class'.
    - grid_size: The size of the S x S grid.
    - B: The number of bounding boxes per grid cell. Assuming B=1 for simplicity.
    - num_classes: The number of distinct classes, including the ball.
    
    Returns:
    - A tensor representing the label for a single grid cell.
    """
    
    label_tensor = torch.zeros((grid_size, grid_size, B, 6 + num_classes))  # Including one-hot for classes
    grid_x = int(round(row['grid_x']))
    grid_y = int(round(row['grid_y']))
    object_class = int(row['class'])

    try:
        grid_x = int(float(row['grid_x']))  # Handle potential floats
    except ValueError:
        print(f"Error: Invalid grid_x value in row: {row}")
        grid_x = 0  # Assign a default

    try: 
        grid_y = int(float(row['grid_y']))  # Handle potential floats
    except ValueError:
        print(f"Error: Invalid grid_y value in row: {row}")
        grid_y = 0  # Assign a default

    if row['class'] is not None:  # Handle potential missing values
        object_class = int(row['class'])
    else:
        print(f"Error: Missing class value in row: {row}")
        object_class = 0  # Assign a default

    
    if B == 1:  # Simplified for one object per grid cell
        # Confidence score
        label_tensor[grid_y, grid_x, 0, 0] = 1
        # x, y coordinates within grid cell, normalized to [0, 1]
        label_tensor[grid_y, grid_x, 0, 1:3] = torch.tensor([row['x'], row['y']])
        # Radius, normalized as necessary
        label_tensor[grid_y, grid_x, 0, 3] = row['radius']
        # Orientation
        label_tensor[grid_y, grid_x, 0, 4] = row['orientation']
        # One-hot encoding of class
        label_tensor[grid_y, grid_x, 0, 5:(5 + num_classes)] = torch.eye(num_classes)[object_class]
    
    return label_tensor

In [212]:
data.columns

Index(['image_id', 'x', 'y', 'orientation', 'radius', 'class', 'file_path'], dtype='object')

In [210]:
test = process_labels(data, grid_size, B=1, num_classes=3)

KeyError: 'grid_x'

In [27]:
# Define your transformations for data augmentation
transformations = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor(),  # Convert the PIL Image back to a tensor
    # If you want to normalize, you can include the following line
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [190]:
# Implementing the PyTorch Dataset


class FootballDataset(Dataset):
    def __init__(self, dataframe, image_shape, grid_size, transform=None, target_dim=(224, 224), num_classes=3):
        self.dataframe = dataframe
        self.transform = transform
        self.grid_size = grid_size
        self.target_dim = target_dim
        self.image_paths = self._get_image_paths()  # Placeholder for actual implementation
        self.preprocessed_labels = preprocess_data(dataframe, image_shape, grid_size)
        
        self.B = 1  # Assuming one bounding box per grid cell for simplicity
        self.C = num_classes
     

    def __len__(self):
        return self.dataframe['image_id'].nunique()



    '''def _get_image_paths(self):
        base_dir = "easy-500/"  # Replace with your image directory
        self.dataframe['file_path'] = self.dataframe['image_id'].astype(str).apply(lambda img_id: base_dir + img_id + '.jpg')  # Assuming .jpg images
    
        image_paths = {}  #create an empty dictionary. This provides a place to store the mapping between image_ids and file paths.

        #Inside the loop, for each row, we assign the constructed file path to the image_paths dictionary using the image_id as the key.
        for i, row in self.dataframe.iterrows():
            image_path = row['file_path']
            print(f"Image Path: {image_path}")  # Print the constructed part
            image_paths[row['image_id']] = image_path
        return image_paths
        '''
    def _get_image_paths(self):
        
        image_paths = {}
        for i, row in self.dataframe.iterrows():
            image_id = row['image_id']
            image_path = os.path.join('easy-500', f"{0}.jpg")
            #print(f"Image ID: {image_id}, Image Path: {image_path}")  # This will print out the image path
            
            # Check if the image file exists
            if not os.path.isfile(image_path):
                raise ValueError(f"Image not found: {image_path}")
            
            image_paths[image_id] = image_path
        return image_paths


    def __getitem__(self, idx):
        # This method should be expanded to:
        image_id = self.dataframe.iloc[idx]['image_id'] # 1. Load the image based on 'image_id'.
        image_path = self.image_paths[image_id]

        loaded_image = cv2.imread(image_path)
        
        # Check if the image is loaded correctly
        if loaded_image is not None:
            # Convert from BGR to RGB
            image = cv2.cvtColor(loaded_image, cv2.COLOR_BGR2RGB) 
            
            # Resize the image if necessary
            if self.target_dim is not None:
                image = cv2.resize(image, self.target_dim, interpolation=cv2.INTER_AREA)
        else:
            raise FileNotFoundError(f"The image file {image_path} was not found.")

        '''# Verify path correctness
        if not os.path.exists(image_path):
            raise ValueError(f"Image not found at: {image_path}") 
        
        #image = cv2.imread(image_path) 
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB '''

    
        # Resize the image
        if self.target_dim is not None:
            image = cv2.resize(image, self.target_dim, interpolation=cv2.INTER_AREA)

        if self.transform is not None:
            image = self.transform(image)
        else:
            image = ToTensor()(image)

         
    # Process the label information

        row = self.preprocessed_labels[idx]  # Get the corresponding preprocessed labels
        #label_processed = process_labels(row, self.grid_size, self.B, self.C)
    

        # Returns: A tensor representing the label for a single grid cell.
        #label_tensor = torch.zeros((self.grid_size, self.grid_size, self.B, 6))
    

        # Populate the label tensor based on the preprocessed dataframe
        # This includes setting the correct indices for confidence, coordinates, and class info
        label = process_labels(row, self.grid_size, self.B, self.C)
        return {'image': image, 'label': torch.tensor(label, dtype=torch.float32)}


num_classes = 3
# Assuming 'data' has already been loaded and is available
football_dataset = FootballDataset(data, image_shape, grid_size, transform= transformations, num_classes=num_classes )

#dataloader = DataLoader(football_dataset, batch_size=4, shuffle=True)
from torch.utils.data import DataLoader, RandomSampler


sampler = RandomSampler(football_dataset, replacement=True)
data_loader = DataLoader(football_dataset, batch_size=4, sampler=sampler)

Certainly, handling the label data correctly is crucial for training your object detection model. The placeholder for label processing needs to be replaced with logic that converts your dataset's labels into a format compatible with your model's training process, especially considering the specifics of object detection tasks like YOLO, which requires a particular arrangement of label data.

Given the initial description of your project, labels for each object in an image include the object's center coordinates within its grid cell, its radius (scaled appropriately), its orientation (with the orientation of the ball always set to 0), and its class in a one-hot encoded format.

Let's outline a method to prepare these labels. This method assumes that you've preprocessed your dataset to include grid_x and grid_y positions for each object, as described earlier. The label tensor for a single image will have the shape (grid_size, grid_size, B, 6), where B is the number of bounding boxes (or circles, in your case) per grid cell, and 6 stands for the label components (confidence score, x, y, radius, orientation, and class).

Completing the Label Processing
For simplicity, let's assume each grid cell can contain only one object (i.e., B=1). This means for each grid cell that contains an object, you'll set the confidence score to 1, fill in the x and y coordinates relative to the cell, the radius, orientation, and use a one-hot encoding for the class.

In [181]:
print(data.columns)

Index(['image_id', 'x', 'y', 'orientation', 'radius', 'class', 'file_path'], dtype='object')


In [182]:
##Simplified version of a YOLO architecture in PyTorch. We will use a basic CNN as the backbone for feature extraction.

In [191]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Define the CNN Backbone for YOLO
class YOLOCNN(nn.Module):
    def __init__(self, grid_size, num_bounding_boxes, num_classes):
        super(YOLOCNN, self).__init__()
        
        # Parameters
        self.grid_size = grid_size
        self.B = num_bounding_boxes
        self.C = num_classes

        # backbone layers: Layer 1
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Layer 2
        self.conv2 = nn.Conv2d(16, 32, 3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)

        # Layer 3
        self.conv3 = nn.Conv2d(32, 64, 3, stride=1, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)

        # Fully Connected Layers
        self.fc1 = nn.Linear(64 * (self.grid_size // 4) ** 2, 512)

        
        # Adjust the output layer to (B * 5 + C) where 5 includes x, y, radius, confidence, and orientation
        self.fc2 = nn.Linear(512, self.grid_size * self.grid_size * (self.B * 5 + self.C))

    def forward(self, x):
        # Pass through the layers
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))

        # Flatten for the fully connected layers
        x = torch.flatten(x, start_dim=1)

        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        # Reshape the output to (S, S, B*5+C)
        x = x.view(-1, self.grid_size, self.grid_size, (self.B * 5 + self.C))

        # Apply a sigmoid function to the output for the confidence scores
        # and class probabilities to keep them between 0 and 1
        x[..., :self.B * 5] = torch.sigmoid(x[..., :self.B * 5])
        return x

# Example usage:
grid_size = 7  # S x S grid
num_bounding_boxes = 1  # B bounding boxes
num_classes = 3  # C classes



# Create the YOLO-CNN model
yolo_cnn = YOLOCNN(grid_size, num_bounding_boxes, num_classes)

# Let's assume `images` is your batch of input images
#images = torch.rand((batch_size, 3, 448, 448))  # Example input batch

# Get predictions from the model
#predictions = yolo_cnn(images)


In this simplified architecture:

We've set up three convolutional layers, each followed by a max-pooling layer.
Two fully connected layers predict the output vector.
The output is reshaped to have dimensions (S, S, B*5+C) where:
S is the size of the grid,
B is the number of bounding boxes per grid cell,

self.B * 5 because for each bounding box we now predict: x-center, y-center, radius, confidence score, and orientation angle.
self.C for the class predictions (team 1, team 2, and ball).

# Loss function

In [192]:
import torch

def custom_yolo_loss(predictions, targets, S, B, C):
    """
    Custom loss function for YOLO incorporating localization, confidence, classification, and orientation loss.

    Args:
    - predictions: The output from the model (N, S, S, B*5+C) tensor.
    - targets: The labeled data that the output is compared to (N, S, S, B*5+C) tensor.
    - S: Size of grid (SxS).
    - B: Number of bounding boxes.
    - C: Number of classes.

    Returns:
    - loss: Total loss as a scalar.
    """

    # Split the predictions and targets into their respective parts
    pred_boxes = predictions[..., :B*5].reshape(-1, S, S, B, 5)
    pred_classes = predictions[..., B*5:]

    target_boxes = targets[..., :B*5].reshape(-1, S, S, B, 5)
    target_classes = targets[..., B*5:]

    # Localization Loss (x, y centers and radius)
    loc_loss = ((pred_boxes[..., 0:3] - target_boxes[..., 0:3]) ** 2).sum()

    # Confidence Loss
    obj_mask = target_boxes[..., 3] > 0  # Mask for cells with objects
    no_obj_mask = target_boxes[..., 3] == 0  # Mask for cells without objects
    conf_loss = ((pred_boxes[..., 3] - target_boxes[..., 3]) ** 2).sum()
    no_obj_loss = (pred_boxes[..., 3] ** 2)[no_obj_mask].sum()  # Penalize confidence where there should be no object

    # Classification Loss
    class_loss = torch.nn.functional.cross_entropy(pred_classes, target_classes.argmax(-1), reduction='sum')

    # Orientation Loss
    orientation_loss = ((pred_boxes[..., 4] - target_boxes[..., 4]) ** 2).sum()

    # Combine the components of the loss
    total_loss = loc_loss + conf_loss + no_obj_loss + class_loss + orientation_loss

    return total_loss


#predictions = model(images)  # Predictions from the model (dummy variable here)
#targets = torch.randn(predictions.shape)  # Dummy variable for targets
#loss = custom_yolo_loss(predictions, targets, grid_size, num_bounding_boxes, num_classes)


In [193]:
#Step 2: Model, Optimizer, and Loss Function Setup

#Instantiate the model, optimizer, and loss function. We will use the custom_yolo_loss we defined earlier.

model = YOLOCNN(grid_size, num_bounding_boxes, num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  

Step 3: Training Loop

Train the model using the data loaders and the custom loss function. Note that this is a simplified example that does not include important training aspects like model validation, checkpointing, learning rate scheduling, early stopping, etc.

In [194]:
num_epochs = 25  # Set the number of epochs

# Send the model to the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0

    for images, labels in data_loader:
        images, labels = images.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        predictions = model(images)

        # Calculate the loss
        loss = custom_yolo_loss(predictions, labels, grid_size, num_bounding_boxes, num_classes)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    
    # Print loss every epoch
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

    # TODO: Add validation, checkpointing, and early stopping

# Save the final trained model
torch.save(model.state_dict(), 'yolo_football_detection.pth')


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [150]:
# data mismatch
# error while attempting to index or access elements of the input row.
# tensor size not matching
# data passed to 'process_labels' is not in expected format.
# 