In [1]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset

In [28]:
class LazyLoadDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.samples = []  # To store tuples of (X_paths, Y_path)

        for class_name in os.listdir(root_dir):
            class_dir = os.path.join(root_dir, class_name)
            if os.path.isdir(class_dir):
                ink_label_dir = os.path.join(class_dir, "inklabel_crops")
                surface_volume_dir = os.path.join(class_dir, "surface_volume")

                for i in range(65):  # Assuming there are 65 crop images
                    Y_path = os.path.join(ink_label_dir, f"crop_{i}.png")
                    X_paths = [
                        os.path.join(surface_volume_dir, f"{j:02d}_crops", f"crop_{i}.png")
                        for j in range(65)
                    ]
                    self.samples.append((X_paths, Y_path))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        X_paths, Y_path = self.samples[idx]

        return X_paths, Y_path

In [43]:
# Example usage
root_dir = "../../Datasets/vesuvius-challenge-ink-detection/cropped_train_1024"
transform = None  # You can add transformations if needed

dataset = LazyLoadDataset(root_dir=root_dir)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True)

for batch in data_loader:
    X_paths, Y_path = batch
    print("X paths:", X_paths)
    print("Y path:", Y_path)
    break  # Print the first batch for demonstration

X paths: [('../../Datasets/vesuvius-challenge-ink-detection/cropped_train_1024\\3\\surface_volume\\00_crops\\crop_32.png',), ('../../Datasets/vesuvius-challenge-ink-detection/cropped_train_1024\\3\\surface_volume\\01_crops\\crop_32.png',), ('../../Datasets/vesuvius-challenge-ink-detection/cropped_train_1024\\3\\surface_volume\\02_crops\\crop_32.png',), ('../../Datasets/vesuvius-challenge-ink-detection/cropped_train_1024\\3\\surface_volume\\03_crops\\crop_32.png',), ('../../Datasets/vesuvius-challenge-ink-detection/cropped_train_1024\\3\\surface_volume\\04_crops\\crop_32.png',), ('../../Datasets/vesuvius-challenge-ink-detection/cropped_train_1024\\3\\surface_volume\\05_crops\\crop_32.png',), ('../../Datasets/vesuvius-challenge-ink-detection/cropped_train_1024\\3\\surface_volume\\06_crops\\crop_32.png',), ('../../Datasets/vesuvius-challenge-ink-detection/cropped_train_1024\\3\\surface_volume\\07_crops\\crop_32.png',), ('../../Datasets/vesuvius-challenge-ink-detection/cropped_train_1024\\

In [44]:
X_paths[64]

('../../Datasets/vesuvius-challenge-ink-detection/cropped_train_1024\\3\\surface_volume\\64_crops\\crop_32.png',)

In [45]:
Y_path[0]

'../../Datasets/vesuvius-challenge-ink-detection/cropped_train_1024\\3\\inklabel_crops\\crop_32.png'