In [7]:
import os
import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset
from collections import defaultdict

class BalancedImageFolder(Dataset):
    def __init__(self, root_dir, transform=None, n_images_per_class=None):
        self.root_dir = root_dir
        self.transform = transform

        # Load initial dataset
        self.full_dataset = ImageFolder(root=self.root_dir, transform=self.transform)

        # Create a dict to store the indices for each class
        self.class_indices = defaultdict(list)
        for idx, (_, class_label) in enumerate(self.full_dataset.imgs):
            self.class_indices[class_label].append(idx)

        # Balance the dataset
        self.filtered_indices = []
        if n_images_per_class is not None:
            for class_label, indices in self.class_indices.items():
                self.filtered_indices.extend(indices[:n_images_per_class])
        else:
            self.filtered_indices = list(range(len(self.full_dataset)))

    def __len__(self):
        return len(self.filtered_indices)

    def __getitem__(self, index):
        original_idx = self.filtered_indices[index]
        img, label = self.full_dataset[original_idx]
        return img, label




In [9]:
import os
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
from tqdm import tqdm

def calculate_mean_and_std(folder_path, batch_size=32):
    transform = transforms.Compose([
        transforms.ToTensor(),
    ])

    dataset = BalancedImageFolder(folder_path, transform=transform, n_images_per_class=1000)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4)

    mean = 0.0
    std = 0.0
    nb_samples = 0

    for data, _ in tqdm(loader):
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)
        mean += data.mean(2).sum(0)
        std += data.std(2).sum(0)
        nb_samples += batch_samples

    mean /= nb_samples
    std /= nb_samples

    return mean.numpy(), std.numpy()

folder_path = "/home/flix/Documents/Datasets/OCT_Dataset_Masterthesis/CellData/OCT_resized/val"  # replace with your folder path
mean, std = calculate_mean_and_std(folder_path)
print(f"Calculated mean: {mean}")
print(f"Calculated std: {std}")


100%|██████████| 125/125 [00:04<00:00, 26.73it/s]

Calculated mean: [0.19196127 0.19196127 0.19196127]
Calculated std: [0.19903535 0.19903535 0.19903535]





In [10]:
folder_path = '/home/flix/Downloads/slow_dataset_v2 (2)/slow_dataset_v2'  # replace with your folder path
mean, std = calculate_mean_and_std(folder_path)
print(f"Calculated mean: {mean}")
print(f"Calculated std: {std}")

100%|██████████| 125/125 [00:04<00:00, 27.14it/s]

Calculated mean: [0.16797477 0.16927803 0.16864358]
Calculated std: [0.16557804 0.1659938  0.16580833]



