In [1]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# TODO: Change the TODO_Directory_and_Folder_Name
# Change the directory to the GTSRB folder that contains the dataset
cd '/content/drive/My Drive/TODO_Directory_and_Folder_Name'

In [None]:
import matplotlib.pyplot as plt
import csv
import random
import torchvision.transforms as transforms
import torch
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler


class TrainDataset():
    '''
    Training Dataset class for pytorch dataloader
    '''
    def __init__(self, rootpath, transforms=None):
        '''
        Initiate class variables

        Parameters
        ----------
        rootpath : str
            The directory of the training dataset
        transforms : <class 'torchvision.transforms.transforms.Compose'>
            Transforms that will be applied to the images

        Returns
        -------
        None.

        '''
        self.rootpath = rootpath
        self.images = []  # images
        self.labels = []  # corresponding labels
        self.transforms = transforms
        for c in range(0, 43):
            # subdirectory for class
            prefix = self.rootpath + '/' + format(c, '05d') + '/'
            # annotations file
            gtFile = open(prefix + 'GT-' + format(c, '05d') + '.csv')
            # csv parser for annotations file
            gtReader = csv.reader(gtFile, delimiter=';')
            # skip header
            next(gtReader)
            # loop over all images in current annotations file
            for row in gtReader:
                image = Image.open(prefix + row[0], 'r')
                image_arr = np.array(image)
                image.close()
                image = Image.fromarray(image_arr)
                # the 1th column is the filename
                self.images.append(image)
                # the 8th column is the label
                self.labels.append(int(row[7]))
            gtFile.close()

    def __len__(self):
        '''
        Length of the class or dataset

        Returns
        -------
        int
            Length of the dataset

        '''
        return len(self.images)

    def __getitem__(self, idx):
        '''
        Used for list indexing

        Parameters
        ----------
        idx : int
            indexs for the list

        Returns
        -------
        image : PIL file
            Returns PIL file for later transformation
        label : torch.tensor
            Returns torch.tensor of the labels

        '''
        label = torch.tensor((self.labels[idx]))
        image = self.transforms(self.images[idx])
        return image, label

    def img_transform(image):
        '''
        Transforms applied to the image

        Parameters
        ----------
        image : PIL Image
            A list of PIL images

        Returns
        -------
        image : torch.Tensor
            transforms the images with the transform class

        '''
        image = self.transforms(image)
        return image

In [19]:
class TestDataset():
    '''
    Testing Dataset class for pytorch dataloader
    '''
    def __init__(self, rootpath, filename, transforms=None):
        '''
        Initiate class variables

        Parameters
        ----------
        rootpath : str
            The directory of the training dataset
        filename : str
            Excel file name that contains the ground truth of testing dataset
        transforms : <class 'torchvision.transforms.transforms.Compose'>
            Transforms that will be applied to the images

        Returns
        -------
        None.

        '''
        self.rootpath = rootpath
        self.images = []  # images
        self.labels = []  # corresponding labels
        self.transforms = transforms
        prefix = self.rootpath + '/'
        with open(filename) as f:
            gtReader = csv.reader(f, delimiter=';')
            next(gtReader)
            for row in gtReader:
                image = Image.open(prefix + row[0], 'r')
                image_arr = np.array(image)
                image.close()
                image = Image.fromarray(image_arr)
                self.images.append(image)  # the 1th column is the filename
                self.labels.append(int(row[7]))  # the 8th column is the label

    def __len__(self):
        '''
        Length of the class or dataset

        Returns
        -------
        int
            Length of the dataset

        '''
        return len(self.images)

    def __getitem__(self, idx):
        '''
        Used for list indexing

        Parameters
        ----------
        idx : TYPE
            DESCRIPTION.

        Returns
        -------
        image : PIL file
            Returns PIL file for later transformation
        label : torch.tensor
            Returns torch.tensor of the labels

        '''
        label = torch.tensor((self.labels[idx]))
        image = self.transforms(self.images[idx])
        return image, label

    def img_transform(image):
        '''
        Transforms applied to the image

        Parameters
        ----------
        image : PIL Image
            A list of PIL images

        Returns
        -------
        image : torch.Tensor
            transforms the images with the transform class

        '''
        image = self.transforms(image)
        return image

In [20]:
img_transforms = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.3337, 0.3064, 0.3171),
                         (0.2672, 0.2564, 0.2629))
])

In [4]:
train_dataset = TrainDataset('./GTSRB/Final_Training/Images',
                             transforms=img_transforms)

In [None]:
indices = list(range(len(train_dataset)))
# Randomly shuffle the dataset before splitting it into training dataset
# and validation dataset
random.shuffle(indices)
train_sample = SubsetRandomSampler(indices[:35000])
val_sample = SubsetRandomSampler(indices[35000:])

In [None]:
# Used the default batch_size of 1, which can be increased based on the GPU
train_loader = torch.utils.data.DataLoader(train_dataset, sampler=train_sample)
val_loader = torch.utils.data.DataLoader(train_dataset, sampler=val_sample)

In [None]:
# You can check the loaded images along with the label for training.
x, y = next(iter(train_loader))
print(y)
plt.imshow(x.numpy().squeeze(axis=0).transpose(1, 2, 0))

In [None]:
# You can check the loaded images along with the label for training.
x, y = next(iter(val_loader))
print(y)
plt.imshow(x.numpy().squeeze(axis=0).transpose(1, 2, 0))

In [None]:
# Save the loaded training dataset for later uses.
torch.save(train_loader, './GTSRB/train_dataloader_32_random_colab.pth')

In [None]:
# Save the loaded validation dataset for later uses.
torch.save(val_loader, './GTSRB/val_dataloader_32_random_colab.pth')

In [None]:
test_dataset = TestDataset('./GTSRB/Final_Test/Images', 'GT-final_test.csv',
                           transforms=img_transforms)

In [None]:
test_loader = torch.utils.data.DataLoader(test_dataset)

In [None]:
x2, y2 = next(iter(test_loader))
print(y2)
plt.imshow(x2.numpy().squeeze(axis=0).transpose(1, 2, 0))

In [None]:
torch.save(test_loader, './GTSRB/test_dataloader_32_colab.pth')