In [1]:
from os import listdir
from PIL import Image
from torchvision import transforms
import torch
import json
import random


class DataLoader():
    """
    This class uses its attributes to load the training data and transforms it into tensors.
    The tensors are then stored in mini-batches inside the train_data list which is the final 
    product of this class. Multiple function calls of LoadData() will initialize the train_data 
    list with new tensors from the training data, excluding all the previous ones. 
    """

    def __init__(self, train_files_path, target_files_path, category_list, split_size, batch_size, train_size):
        """
        Initialize all parameters for loading and transforming the data into tensors.
        
        Parameters:
            train_files_path (string): The path to the train image folder
            target_files_path (string): The path to the json file containg the image labels
            category_list (list): Reference list to all the label categories for object detection
            split_size (int): Amount of grid cells
            batch_size (int): Batch size
            train_size (int): Amount of images which are loaded as training data for one epoch
        """
        
        self.train_files_path = train_files_path
        self.target_files_path = target_files_path       
        self.category_list = category_list        
        self.num_classes = len(category_list)       
        self.cells = split_size        
        self.batch_size = batch_size      
        self.train_size = train_size
        
        self.train_files = [] # Will contain the remaining image names from the folder
        self.target_files = [] # Will contain the json elements with the ground-truth labels
        
        self.train_data = [] # Will contain tuples with mini-batches of image and label tensors    
        self.img_tensors = [] # Used to temporary store samples from a single batch
        self.target_tensors = [] # Used to temporary store samples from a single batch
        
        # Define transform which is applied to every single image to resize and convert it into a tensor
        self.transform = transforms.Compose([
            transforms.Resize((448,448), Image.NEAREST),
            transforms.ToTensor(),
            ])
    

    def LoadFiles(self):
        """
        First function to be executed.
        Loads the images and the label file using the respective system path.
        """
            
        # All image names from the directory are loaded into the list train_files.
        self.train_files = listdir(self.train_files_path)
        
        # The json file containing the labels is loaded into the list target_files.
        f = open(self.target_files_path)
        self.target_files = json.load(f)
        
        
    def LoadData(self):
        """
        Transforms the training images and labels into tensors and loads them into batches. Once a batch is
        full, it is stored in the train_data list. Fills the train_data list with batches until the desired
        train_size is reached. Every image that is loaded, is being excluded from future calls of this 
        function.
        """
        
        # Reset the cache
        self.train_data = []    
        self.img_tensors = [] 
        self.target_tensors = [] 

        for i in range(len(self.train_files)):
            if i == self.train_size:
                break # The train_data list is full with the desired amount of batches
                
            # Check if batch is full and perhaps start a new one
            if len(self.img_tensors) >= self.batch_size:
                self.train_data.append((torch.stack(self.img_tensors), self.target_tensors))
                self.img_tensors = []
                self.target_tensors = []
                print('Loaded batch ', len(self.train_data), 'of ', int(self.train_size/self.batch_size))
                print('Percentage Done: ', round(len(self.train_data)/int(self.train_size/self.batch_size)*100., 2), '%')
                print('')
                
            # Extracts a single random image and the corresponding label, and transforms them into
            # tensors. Both are appended to the img_tensors and target_tensors lists
            self.extract_image_and_label() 


    def extract_image_and_label(self):
        """
        Chooses a random image which is then being transformed into a tensor and stored.
        Finds the corresponding label inside the json file which is then being transformed into a tensor
        and stored. Stores both tensors inside the img_tensors and target_tensors lists.
        """
        
        img_tensor, chosen_image = self.extract_image()
        target_tensor = self.extract_json_label(chosen_image)

        self.img_tensors.append(img_tensor)
        self.target_tensors.append(target_tensor)

        
    def extract_image(self):   
        """
        Finds a random image from the train_files list and applies the transform to it. 
 
        Returns:
            img_tensor (tensor): The tensor which contains the image values
            f (string): The string name of the image file
        """    
        
        f = random.choice(self.train_files)
        self.train_files.remove(f)
        global img
        img = Image.open(self.train_files_path + f)
        img_tensor = self.transform(img) # Apply the transform to the image.
        return img_tensor, f


    def extract_json_label(self, chosen_image):
        """
        Uses the name of the image to find the corresponding json element. Then it extracts the data and
        transforms it into a tensor which is stored inside the target_tensors list.

        Parameters:
            chosen_image (string): The name of the image for which the label is needed.

        Returns:
            target_tensor (tensor): The tensor which contains the image labels
        """
        
        for json in self.target_files:
            if json['name'] == chosen_image:
                img_label = json
                break

        target_tensor = self.transform_label_to_tensor(img_label)
        return target_tensor


    def transform_label_to_tensor(self, img_label):
        """
        Extracts the useful information from the json element and transforms them into a tensor.
        
        Parameters:
            img_label (): A specific json element
            
        Returns:
            target_tensor (tensor): A tensor of size (5+num_classes,cells,cells) which is used as the target of 
            the image.
        """
        
        target_tensor = torch.zeros(5+self.num_classes, self.cells, self.cells) # Here are the information stored

        for labels in range(len(img_label["labels"])):

            # Store the category index if its contained within the category_list.
            category = img_label["labels"][labels]["category"]         
            if category not in self.category_list:
                continue
            ctg_idx = self.category_list.index(category)

            # Store the bounding box information and rescale it by the resize factor.
            x1 = img_label["labels"][labels]["box2d"]["x1"] * (448/img.size[0])
            y1 = img_label["labels"][labels]["box2d"]["y1"] * (448/img.size[1])
            x2 = img_label["labels"][labels]["box2d"]["x2"] * (448/img.size[0])
            y2 = img_label["labels"][labels]["box2d"]["y2"] * (448/img.size[1])

            # Transforms the corner bounding box information into a mid bounding box information
            x_mid = abs(x2 - x1) / 2 + x1
            y_mid = abs(y2 - y1) / 2 + y1
            width = abs(x2 - x1) 
            height = abs(y2 - y1) 

            # Size of a single cell
            cell_dim = int(448 / self.cells)

            # Determines the cell position of the bounding box
            cell_pos_x = int(x_mid // cell_dim)
            cell_pos_y = int(y_mid // cell_dim)

            # Stores the information inside the target_tensor
            if target_tensor[0][cell_pos_y][cell_pos_x] == 1: # Check if the cell already contains an object
                continue
            target_tensor[0][cell_pos_y][cell_pos_x] = 1
            target_tensor[1][cell_pos_y][cell_pos_x] = (x_mid % cell_dim) / cell_dim
            target_tensor[2][cell_pos_y][cell_pos_x] = (y_mid % cell_dim) / cell_dim
            target_tensor[3][cell_pos_y][cell_pos_x] = width / 448
            target_tensor[4][cell_pos_y][cell_pos_x] = height / 448
            target_tensor[ctg_idx+5][cell_pos_y][cell_pos_x] = 1

        return target_tensor

In [2]:
# Used for testing
train_files_path = "C:/Users/alens/Desktop/Real-time-Object-Detection-for-Autonomous-Driving-using-Deep-Learning/YOLO v1/bdd100k/images/100k/val/"
target_files_path = "C:/Users/alens/Desktop/Real-time-Object-Detection-for-Autonomous-Driving-using-Deep-Learning/YOLO v1/bdd100k_labels_release/bdd100k/labels/det_v2_val_release.json"
category_list = ["other vehicle", "pedestrian", "traffic light", "traffic sign", "truck", "train", "other person", "bus", "car", "rider", "motorcycle", "bicycle", "trailer"]
split_size = 14
batch_size = 64
train_size = 500

data = DataLoader(train_files_path, target_files_path, category_list, split_size, batch_size, train_size)
data.LoadFiles()
data.LoadData()

Loaded batch  1 of  7
Percentage Done:  14.29 %

Loaded batch  2 of  7
Percentage Done:  28.57 %

Loaded batch  3 of  7
Percentage Done:  42.86 %

Loaded batch  4 of  7
Percentage Done:  57.14 %

Loaded batch  5 of  7
Percentage Done:  71.43 %

Loaded batch  6 of  7
Percentage Done:  85.71 %

Loaded batch  7 of  7
Percentage Done:  100.0 %



In [4]:
# Used for testing
import cv2
import numpy

color = (0, 255, 0)
thickness = 1
cell_dim = int(448/split_size)
batch_idx = 0
sample_idx = 1

image = data.train_data[batch_idx][0][sample_idx].numpy().transpose(1, 2, 0)
img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

for h in range(split_size):
    for w in range(split_size):
        pass
        if data.train_data[batch_idx][1][sample_idx][0,h,w] == 0:
            continue
        centre_x = data.train_data[batch_idx][1][sample_idx][1,h,w]*cell_dim + cell_dim*w
        centre_y = data.train_data[batch_idx][1][sample_idx][2,h,w]*cell_dim + cell_dim*h
        width = data.train_data[batch_idx][1][sample_idx][3,h,w] * 448
        height = data.train_data[batch_idx][1][sample_idx][4,h,w] * 448
        
        start_point = (int(centre_x - width/2), int(centre_y - height/2))
        end_point = (int(centre_x + width/2), int(centre_y + height/2))
        cv2.rectangle(img_rgb, start_point, end_point, color, thickness)
        
        for i in range(13):
            if data.train_data[batch_idx][1][sample_idx][i+5,h,w] == 1:
                category_idx = i
                break
        start_point = (int(centre_x - width/2),int(centre_y - height/2)-10)
        cv2.putText(img_rgb, str(category_list[category_idx]), start_point, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

cv2.imshow("image", img_rgb)
cv2.waitKey()

-1

In [60]:
# Used for testing
data.LoadData()

In [5]:
import torch
for (train_data, target_data)  in  data.train_data:
    a = train_data
    print(train_data.shap)

RuntimeError: CUDA error: unknown error

In [5]:
print(len(data.train_data))

7


In [1]:
import import_ipynb
import dataloader

importing Jupyter notebook from dataloader.ipynb


OSError: [WinError 1455] The paging file is too small for this operation to complete. Error loading "C:\ProgramData\Anaconda3\lib\site-packages\torch\lib\caffe2_detectron_ops_gpu.dll" or one of its dependencies.