In [1]:
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
import os
import io
from PIL import Image

torch.manual_seed(123)

<torch._C.Generator at 0x1d5ea7b6c30>

## Class Dataset
inherit from torch.utils.data.Dataset. Will later be used for DataLoader.

In [2]:
# inherit the torch.utils.data.Dataset class
class Dataset(Dataset) :

    def __init__(self, image_dir, label_frame, transform=None) :
        """
        Args:
            image_dir (string): Directory with all the images.
            label_frame (string): Path to the csv file with class or regression labels.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.image_dir = image_dir
        self.label_frame = pd.read_csv(label_frame)
        self.transform = transform

    def __len__(self) :
        return self.label_frame.shape[0]

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.image_dir,
                                self.label_frame.iloc[idx, 0])
        image = io.imread(img_name)
        labels = self.label_frame.iloc[idx, 1:]
        labels = np.array([labels])
        labels = labels.astype('float').reshape(-1, 2)

        if self.transform:
            sample = self.transform(image)
        sample = {'image': image, 'label': labels}

        return sample

In [5]:
from torchvision import transforms

root = "C:/Phanh/BuAnhNet/EAAI23/"
train_dir = root + "data/train_set/New folder/images"
train_frame = root + "data/train_reg.csv"

# the mean and std was calculated using ImageNet dataset.
#   Do we want to recalculate these values to fit with our dataset ?
transform_fnc = transforms.Compose([
    transforms.Resize(128),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],             # might have to recalculate these values
        std=[0.229, 0.224, 0.225],              #   for our dataset. otherwise, do not change.
    ),
])


dataSet = Dataset(train_dir, train_frame, transform=transform_fnc)                      # data was pre-shuffled before saved to csv file
dataLoader = torch.utils.data.DataLoader(dataSet, batch_size=1024, shuffle=False)       #   DataLoader will return the number of batches accordingly to the batch_size