In [3]:
# notebook playground for bigger dataset
import os
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
import torch
import matplotlib.pyplot as plt
import cv2 as cv
import numpy as np
import albumentations as A
from torchvision.io import read_image

In [4]:
# google docstring template
"""
    A class representing an employee.
 
    Attributes:
        name (str): The name of the employee.
        age (int): The age of the employee.
        department (str): The department the employee works in.
        salary (float): The salary of the employee.
    """

'\n    A class representing an employee.\n \n    Attributes:\n        name (str): The name of the employee.\n        age (int): The age of the employee.\n        department (str): The department the employee works in.\n        salary (float): The salary of the employee.\n    '

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [6]:
classes_labels = ['bicycle', 'bus', 'car', 'motorbike', 'person']

In [86]:
class VtecDataset():
    """
    Traffic Detection Dataset
        <class_ID> <x_center> <y_center> <width> <height>
    """
    
    def __init__(self, img_dir, label_dir, transform=None, target_transform=None):
        """
        Attributes:
            img_dir (string): Path to training image directory
            label_dir (string): Path to training label directory
            transform (callable, optional): Optional transform to be applied on a sample
        """
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.transform = transform
        self.target_transform = target_transform
        self.images = [f for f in os.listdir(img_dir) if f.endswith('.jpg')]
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, i):
        """Returns subset of data and targets corresponding to given indices."""
        image_name = self.images[i]
        image_path = os.path.join(self.img_dir, image_name)
        image = read_image(image_path)
        
        label_name = image_name.replace('.jpg', '.txt')
        label_path = os.path.join(self.label_dir, label_name)
        labels = self.get_labels(label_path)
        
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            labels = self.target_transform(labels)
        
        return image, labels
    
    
    def get_labels(self, label_path):
        """
        Loads labels for the corresponding image and returns a dictionary
        with keys "boxes" and "labels" 
        """
        labels = {"boxes": [], "labels": []}
        with open(label_path, 'r') as f:
            lines = f.readlines()
        
            for line in lines:
                class_id, x_center, y_center, width, height = map(float, line.strip().split())
                labels["boxes"].append([x_center, y_center, width, height])
                labels["labels"].append(class_id)
            
        labels["boxes"] = torch.tensor(labels["boxes"], dtype=torch.float32)
        labels["labels"] =torch.tensor(labels["labels"], dtype=torch.long)
            
        return labels

In [61]:
train_img_dir = "../data/DetectionDataset/train/images/"
train_label_dir = "../data/DetectionDataset/train/labels/"
test_img_dir = "../data/DetectionDataset/test/images/"
test_label_dir = "../data/DetectionDataset/test/images/"

In [78]:
def collate_fn(batch):
    """Handles variable label sizes : stacks images instead of labels"""
    
    images = []
    boxes = []
    labels = []
        
    for sample in batch:
        image, target = sample
        images.append(image)
        boxes.append(target["boxes"])
        labels.append(target["labels"])

    images = torch.stack(images, dim=0)
    
    return images, {"boxes": boxes, "labels": labels}

In [94]:
train_transforms = transforms.Compose([transforms.Normalize(0.1, 0.5, 0.2)])

train_dataset = VtecDataset(train_img_dir, train_label_dir)
train_dataloader = DataLoader(train_dataset, 4, collate_fn=collate_fn, shuffle=True)

In [95]:
train_features, train_labels = next(iter(train_dataloader))

In [99]:
train_features

tensor([[[[ 64,  50,  43,  ...,  88, 100, 106],
          [ 64,  51,  44,  ...,  89,  95,  99],
          [ 64,  53,  46,  ...,  80,  69,  65],
          ...,
          [206, 189, 199,  ..., 139, 124, 127],
          [216, 203, 215,  ..., 134, 119, 121],
          [201, 194, 208,  ..., 139, 123, 107]],

         [[111,  97,  92,  ...,  88, 100, 106],
          [111,  98,  93,  ...,  89,  95,  99],
          [111, 100,  95,  ...,  80,  69,  65],
          ...,
          [207, 190, 200,  ..., 140, 125, 128],
          [217, 204, 216,  ..., 135, 120, 122],
          [202, 195, 209,  ..., 140, 124, 108]],

         [[119, 105,  97,  ...,  88, 100, 106],
          [119, 106,  98,  ...,  89,  95,  99],
          [119, 108, 100,  ...,  80,  69,  65],
          ...,
          [191, 174, 186,  ..., 144, 129, 132],
          [201, 188, 202,  ..., 139, 124, 126],
          [186, 179, 195,  ..., 144, 128, 112]]],


        [[[ 37,  43,  49,  ...,  58,  85,  78],
          [ 41,  45,  49,  ...,   0

In [97]:
train_labels

{'boxes': [tensor([[0.7516, 0.5844, 0.1922, 0.3422],
          [0.4234, 0.2328, 0.1172, 0.2313],
          [0.2875, 0.7305, 0.1016, 0.1250]]),
  tensor([[0.3727, 0.7141, 0.0297, 0.1469],
          [0.1336, 0.4844, 0.0219, 0.0969],
          [0.6586, 0.8211, 0.1281, 0.2438],
          [0.7656, 0.6156, 0.2219, 0.3953],
          [0.5086, 0.7070, 0.0312, 0.1594],
          [0.5641, 0.6672, 0.0281, 0.1484],
          [0.5672, 0.5664, 0.0312, 0.1344],
          [0.6203, 0.3977, 0.0625, 0.1078],
          [0.5227, 0.4078, 0.0609, 0.1172],
          [0.5406, 0.2852, 0.0516, 0.0891],
          [0.5047, 0.2406, 0.0516, 0.0766],
          [0.4617, 0.2070, 0.0125, 0.0531],
          [0.6133, 0.2523, 0.0531, 0.0875],
          [0.5891, 0.1922, 0.0391, 0.0625],
          [0.6445, 0.1492, 0.0344, 0.0625],
          [0.6672, 0.0977, 0.0281, 0.0453],
          [0.6344, 0.0805, 0.0266, 0.0391],
          [0.6492, 0.0445, 0.0266, 0.0359],
          [0.6406, 0.0203, 0.0203, 0.0312],
          [0.6852, 0.