# Trying to load a dataset

In [2]:
from torchvision.io import read_image, ImageReadMode
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image
import os
import torch



# this function is for the DataLoader, it makes sure the tensors within a batch are the same dimension (don't ask how, I don't know)
def collate_fn(batch):
    return tuple(zip(*batch))


# this is a class that loads the data, according to how pytorch wants it
class playersDataset(Dataset):
    def __init__(self, folder_path, img_size=416):
        self.root = folder_path
        self.images_folder = os.path.join(folder_path, "images")
        self.labels_folder = os.path.join(folder_path, "annotations")
        
        self.image_files = os.listdir(self.images_folder)
        self.label_files = os.listdir(self.labels_folder)

        self.convert_tensor = transforms.ToTensor()

    def readLabelsFile(self, file_path, index):
        boxes = []
        labels = []
        areas = []
 
        with open(file_path) as f:
            for row in f:
                annotation = [float(x) for x in row.split()]
                #print(annotation)
                labels.append(int(annotation[0]))
                [x0, y0, x1, y1] = annotation[1:5]
                boxes.append([x0, y0, x1, y1])

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)         
        labels = torch.as_tensor(labels, dtype=torch.int64)

        areas = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((len(labels),), dtype=torch.int64)

        target = {
            "boxes" : boxes,
            "labels" : labels,
            "image_id" : torch.tensor(index),
            "area" : areas,
            "iscrowd" : iscrowd
            }

        return target


    # pytorch needs this, it returns a single (image, output) pair
    def __getitem__(self, index):
        # load and format the image file as a tensor
        
        imgPath = os.path.join(self.images_folder, self.image_files[index])
        img = Image.open(imgPath)

        # TEMPORARY - REMOVE THIS LATER, WE SHOULD DECIDE HOW LARGE THE IMAGES ARE
        img = img.resize((1080, 1920))

        input_img = self.convert_tensor(img)

        # load and format the corresponding labels
        labelPath = os.path.join(self.labels_folder, self.label_files[index])
        target = self.readLabelsFile(labelPath, index)
        
        return input_img, target

    # pytorch also needs the length of the dataset
    def __len__(self):
        return len(self.image_files)


In [3]:
import torchvision
import torch
#from torchinfo import summary
from torchvision.models.detection.ssdlite import SSDLite320_MobileNet_V3_Large_Weights

# set the device (GPU is much faster)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = 2


# load a model, where the backbone is already trained, and the output layers aren't (at least, this should be the case...)
# also set the number of output classes to the number we need
model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights=None, num_classes=num_classes, weights_backbone=True)



device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_workers = 1 if torch.cuda.is_available() else 0


batchSize = 16 # LOWER THIS IF NEEDED!

train_dataset = playersDataset("SOD_Dataset")
train_dataloader = DataLoader(train_dataset,  batch_size=batchSize, shuffle=True, num_workers=0,  collate_fn=collate_fn)



## try to see if a training loop works (this absolutely needs changing, bits and pieces are copied off the internet and they do not work well together, but at least the inputs and outputs are working, I think)

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy


model.to(device)


# these need changing, probably
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)



num_epochs = 1

dataset_size = len(train_dataset.image_files)

for epoch in range(num_epochs):
    print(f'Epoch {epoch}/{num_epochs - 1}')
    print('-' * 10)

    # Each epoch has a training and validation phase
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()  # Set model to training mode
            print("TRAIN")
        else:
            model.eval()   # Set model to evaluate mode
            print("EVAL")

        running_loss = 0.0
        running_corrects = 0

        
        # Iterate over data.
        for i, (images, targets) in enumerate(train_dataloader):

            # zero the parameter gradients
            optimizer.zero_grad()

            # send both the input images and output targets to the device
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # forward
            # track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                loss_dict = model(images, targets) 
                print(loss_dict)
                losses = sum(loss for loss in loss_dict.values())  # sum the loss for all images of this epoch

                print(losses)
                running_loss += float(losses)
                print(running_loss)


                # backward + optimize only if in training phase
                if phase == 'train':
                    losses.backward()
                    optimizer.step()


        if phase == 'train':
            scheduler.step()

        epoch_loss = running_loss / dataset_size
        #epoch_acc = running_corrects.double() / dataset_size

Epoch 0/0
----------
TRAIN
{'bbox_regression': tensor(5.5312, device='cuda:0', grad_fn=<DivBackward0>), 'classification': tensor(0.1704, device='cuda:0', grad_fn=<DivBackward0>)}
tensor(5.7016, device='cuda:0', grad_fn=<AddBackward0>)
5.701630592346191
{'bbox_regression': tensor(5.4059, device='cuda:0', grad_fn=<DivBackward0>), 'classification': tensor(0.1515, device='cuda:0', grad_fn=<DivBackward0>)}
tensor(5.5574, device='cuda:0', grad_fn=<AddBackward0>)
11.259039878845215
{'bbox_regression': tensor(4.9948, device='cuda:0', grad_fn=<DivBackward0>), 'classification': tensor(0.2397, device='cuda:0', grad_fn=<DivBackward0>)}
tensor(5.2345, device='cuda:0', grad_fn=<AddBackward0>)
16.493540287017822
{'bbox_regression': tensor(5.7730, device='cuda:0', grad_fn=<DivBackward0>), 'classification': tensor(0.1484, device='cuda:0', grad_fn=<DivBackward0>)}
tensor(5.9214, device='cuda:0', grad_fn=<AddBackward0>)
22.414964199066162
{'bbox_regression': tensor(5.3848, device='cuda:0', grad_fn=<DivBa

AttributeError: 'list' object has no attribute 'values'

In [None]:
labels

{'boxes': tensor([[[ 768.,  741.,  821.,  834.],
          [1069.,  531., 1122.,  632.],
          [ 318.,  451.,  370.,  541.],
          [ 689.,  910.,  714.,  934.],
          [1297.,  507., 1335.,  601.],
          [ 883.,  433.,  921.,  517.],
          [1026.,  384., 1069.,  465.],
          [1085.,  419., 1136.,  506.],
          [ 740.,  398.,  787.,  486.],
          [1343.,  500., 1382.,  596.],
          [ 996.,  438., 1041.,  526.],
          [  38.,  492.,   86.,  588.],
          [1446.,  401., 1487.,  489.],
          [ 534.,  458.,  584.,  546.],
          [1231.,  479., 1283.,  569.],
          [ 971.,  473., 1014.,  573.],
          [ 892.,  352.,  937.,  440.],
          [1240.,  564., 1290.,  658.],
          [1399.,  500., 1472.,  558.],
          [ 670.,  809.,  714.,  893.],
          [1190.,  565., 1232.,  665.]]]),
 'labels': tensor([[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
 'image_id': tensor([941]),
 'area': tensor([[4929., 5353., 46