In [4]:
import os
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image 

### Batch normalization

In [5]:
transform_train = transforms.Compose([
    transforms.Resize(size=[60, 60]),
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.Resize(size=[60, 60]),
    transforms.ToTensor(),
])

class CustomDataset(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.image_paths = []

        classes = os.listdir(self.root)
        for class_name in classes:
            class_path = os.path.join(self.root, class_name)
            if os.path.isdir(class_path):
                images = os.listdir(class_path)
                for image_name in images:
                    image_path = os.path.join(class_path, image_name)
                    self.image_paths.append((image_path, int(class_name.split('_')[0])))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path, label = self.image_paths[idx]
        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, label

# Create custom datasets
dataset_folder = r'./dataset/'

train_folder = os.path.join(dataset_folder, 'swatdcnn/data/Augmented/stage_2/train')
test_folder = os.path.join(dataset_folder, 'swatdcnn/data/Augmented/stage_2/validation')

train_dataset = CustomDataset(root=train_folder, transform=transform_train)
test_dataset = CustomDataset(root=test_folder, transform=transform_test)

train_dataset_size = len(train_dataset)
test_dataset_size = len(test_dataset)

print(f"Number of images in train dataset: {train_dataset_size}")
print(f"Number of images in test/validation dataset: {test_dataset_size}")

# Create data loaders
trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
testloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Number of images in train dataset: 6000
Number of images in test/validation dataset: 625


In [6]:
# Initialize variables to accumulate sums and counts for mean and std
mean_sum = 0.0
std_sum = 0.0
total_samples = 0

# Iterate through the train dataset to calculate mean and std
for batch_images, _ in trainloader:
    batch_images = batch_images.view(batch_images.size(0), batch_images.size(1), -1)
    mean_sum += batch_images.mean(2).sum(0)
    std_sum += batch_images.std(2).sum(0)
    total_samples += batch_images.size(0)

# Calculate mean and std
dataset_mean = mean_sum / total_samples
dataset_std = std_sum / total_samples

print("Calculated mean:", dataset_mean)
print("Calculated std:", dataset_std)

Calculated mean: tensor([0.6410, 0.6595, 0.5589])
Calculated std: tensor([0.2477, 0.2294, 0.3135])
