In [2]:
import sys
import torch

sys.path.append("../src")

from data_utils import UnlearningDataLoader


DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
def compute_mean_std(loader):
    # var[X] = E[X**2] - E[X]**2
    channels_sum, channels_sqrd_sum, num_batches = 0, 0, 0

    for data, _ in loader:
        channels_sum += torch.mean(data, dim=[0, 2, 3])
        channels_sqrd_sum += torch.mean(data**2, dim=[0, 2, 3])
        num_batches += 1

    mean = channels_sum / num_batches
    std = (channels_sqrd_sum / num_batches - mean**2) ** 0.5

    return mean, std

In [3]:
UDL = UnlearningDataLoader("pneumoniamnist", 128, 224, 3407)
dl, _ = UDL.load_data()

In [9]:
train_mean, train_std = compute_mean_std(dl["train"])
val_mean, val_std = compute_mean_std(dl["val"])
test_mean, test_std = compute_mean_std(dl["test"])

In [10]:
print(f"train mean & std: {train_mean}, {train_std}")
print(f"val mean & std: {val_mean}, {val_std}")
print(f"test mean & std: {test_mean}, {test_std}")

train mean & std: tensor([0.5717, 0.5717, 0.5717]), tensor([0.1770, 0.1770, 0.1770])
val mean & std: tensor([0.5698, 0.5698, 0.5698]), tensor([0.1781, 0.1781, 0.1781])
test mean & std: tensor([0.5640, 0.5640, 0.5640]), tensor([0.1783, 0.1783, 0.1783])


In [6]:
import numpy as np

labels = dl["train"].dataset.targets
zeros = np.count_nonzero(labels == 0)
ones = np.count_nonzero(labels == 1)

print(f"Number of zeros: {zeros}")
print(f"Number of ones: {ones}")


Number of zeros: 1214
Number of ones: 3494
