In [None]:
import os

In [None]:
class TaskDataset(Dataset):
    """Dataset for a single task"""
    def __init__(self, input_dir, target_dir, transform=None):
        self.input_dir = input_dir
        self.target_dir = target_dir
        self.filenames = [f for f in os.listdir(self.input_dir) 
                         if os.path.isfile(os.path.join(self.input_dir, f))]
        self.transform = transform

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        input_img = Image.open(os.path.join(self.input_dir, self.filenames[idx])).convert('RGB')
        target_img = Image.open(os.path.join(self.target_dir, self.filenames[idx])).convert('RGB')

        if self.transform:
            return self.transform(input_img, target_img)
        return TF.to_tensor(input_img), TF.to_tensor(target_img)

In [None]:

class TaskDatasetNoise(Dataset):
    """Dataset for a single task.
    
    For each index `idx` we load:
      - the clean image from `target_dir` at filenames[idx]
      - a noisy image from one of the three noise folders, chosen at random
    If a filename doesn't exist in the randomly chosen noise folder, we try the other folders.
    """

    def __init__(self, transform=None):
        # default paths (you can replace these or pass root_dir and adjust as needed)
        self.input_dir1 = '/kaggle/input/noise-train/denoising_dataset_train/noisy/sigma_15'
        self.input_dir2 = '/kaggle/input/noise-train/denoising_dataset_train/noisy/sigma_25'
        self.input_dir3 = '/kaggle/input/noise-train/denoising_dataset_train/noisy/sigma_50'
        self.target_dir = '/kaggle/input/noise-train/denoising_dataset_train/clean'

        # pack input dirs into a list for easy random choice
        self.input_dirs = [self.input_dir1, self.input_dir2, self.input_dir3]
        self.transform = transform

        # gather filenames that exist in the clean folder and at least one noisy folder
        all_clean_files = [
            f for f in os.listdir(self.target_dir)
            if os.path.isfile(os.path.join(self.target_dir, f))
        ]
        self.filenames = [
            f for f in sorted(all_clean_files)
            if any(os.path.isfile(os.path.join(d, f)) for d in self.input_dirs)
        ]

        if len(self.filenames) == 0:
            raise RuntimeError("No matching files found between target_dir and input dirs.")

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        filename = self.filenames[idx]
        target_path = os.path.join(self.target_dir, filename)

        # choose a noisy folder at random
        random_dirs = self.input_dirs.copy()
        random.shuffle(random_dirs)

        # pick the first noisy file that exists (from the shuffled order)
        input_path = None
        for d in random_dirs:
            candidate = os.path.join(d, filename)
            if os.path.isfile(candidate):
                input_path = candidate
                break

        if input_path is None:
            # Fallback (shouldn't happen because we filtered filenames), but safe-guard
            raise FileNotFoundError(f"No noisy version found for {filename} in any input dir.")

        input_img = Image.open(input_path).convert('RGB')
        target_img = Image.open(target_path).convert('RGB')

        if self.transform:
            
            # allow the transform to handle a (input_img, target_img) pair
            return self.transform(input_img, target_img)

        # default: return tensor pair
        return TF.to_tensor(input_img), TF.to_tensor(target_img)