### Custom Dataset Class (3)

When all images are stored together in a single folder, and filename indicates the class.</br>
Structure like the following:
```
Root
├── train.txt
├── val.txt
├── test.txt
└── Images
    ├── image01.jpg
    ├── image02.jpg
    ├── image03.jpg
    ├── image04.jpg
    └── ...
```

In [2]:
import glob
import numpy
from pathlib import Path

import torch
from torch.utils.data import Dataset, DataLoader
import torchvision

In [3]:
# Change this for your own directory.
ROOT_DIR = Path("/root/jupyter_projects")  # Your working directory.
DATASET_DIR = ROOT_DIR / "Data"  # Folder to store dataset.

DATA_DIR = DATASET_DIR / "Flowers102"

In [45]:
image_dir = DATA_DIR / "oxford-102-flowers"

train_label_path = DATA_DIR / "oxford-102-flowers/train.txt"
val_label_path = DATA_DIR / "oxford-102-flowers/valid.txt"
test_label_path = DATA_DIR / "oxford-102-flowers/test.txt"

In [65]:
path_list = []  # Store the path of images.
class_list = []  # Store the class name.

In [66]:
with open(train_label_path) as file:  # Get the content of label file.
    lines = file.readlines()

In [67]:
for line in lines:
    image_path, image_class = line.strip().split(' ')
    image_path = str(image_dir / image_path)  # Convert to string type.
    path_list.append(image_path)  # Store the path of images in list.
    class_list.append(int(image_class))

In [68]:
print(len(path_list))  # Total number of image in training set.
print(len(class_list))

1020
1020


In [69]:
print(len(set(class_list)))  # 102 unique class.

102


In [70]:
class Flowers102Dataset(Dataset):
    def __init__(self, image_paths, image_class, transform=None):
        self.image_paths = image_paths
        self.image_class = image_class
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        
        image_filepath = self.image_paths[idx]
        
        # For numpy array:
        # image = cv2.imread(image_filepath)
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # For torch tensor:
        image = torchvision.io.read_image(image_filepath)
        
        label = self.image_class[idx]
        
        if self.transform is not None:
            image = self.transform(image=image)["image"]
        
        return image, label

In [71]:
Flowers102 = Flowers102Dataset(path_list, class_list)

In [72]:
for X, y in Flowers102:
    print(type(X))
    print(X.shape)  # (channel, width, height)
    
    print(type(y))
    print(y)
    

<class 'torch.Tensor'>
torch.Size([3, 500, 667])
<class 'int'>
16
<class 'torch.Tensor'>
torch.Size([3, 638, 500])
<class 'int'>
13
<class 'torch.Tensor'>
torch.Size([3, 667, 500])
<class 'int'>
42
<class 'torch.Tensor'>
torch.Size([3, 500, 656])
<class 'int'>
55
<class 'torch.Tensor'>
torch.Size([3, 601, 500])
<class 'int'>
96
<class 'torch.Tensor'>
torch.Size([3, 542, 500])
<class 'int'>
5
<class 'torch.Tensor'>
torch.Size([3, 500, 667])
<class 'int'>
91
<class 'torch.Tensor'>
torch.Size([3, 587, 500])
<class 'int'>
96
<class 'torch.Tensor'>
torch.Size([3, 500, 597])
<class 'int'>
25
<class 'torch.Tensor'>
torch.Size([3, 500, 621])
<class 'int'>
86
<class 'torch.Tensor'>
torch.Size([3, 750, 500])
<class 'int'>
60
<class 'torch.Tensor'>
torch.Size([3, 500, 667])
<class 'int'>
6
<class 'torch.Tensor'>
torch.Size([3, 500, 752])
<class 'int'>
61
<class 'torch.Tensor'>
torch.Size([3, 500, 667])
<class 'int'>
56
<class 'torch.Tensor'>
torch.Size([3, 500, 750])
<class 'int'>
41
<class 'torc