In [1]:
import numpy as np
import torch
import os

In [2]:
def _gaussian_kernel_matrix(x, y, sigmas):
    """
    Computes the Gaussian Kernel matrix
    """
    dist = ((x[:, None, :] - y[None, :, :]) ** 2).sum(axis=-1)[:, :, None]
    beta = 1.0 / (2.0 * torch.Tensor(sigmas)[None, :])#.cuda()
    s = torch.matmul(dist, beta)
    k = torch.exp(-s).sum(axis=-1)
    return k

def _mmd(embedding, factor_z=2):
    """
    Computes the Maximum-Mean-Discrepancy (MMD): MMD(embedding,z) where
    z follows a standard normal distribution
    Computation is performed for multiple scales
    """
    # z = torch.randn(embedding.shape)
    z = torch.randn(embedding.shape[0] * factor_z, embedding.shape[1])#.cuda()
    sigmas = [
        1e-6,
        # 1e-5,
        # 1e-4,
        1e-3,
        # 1e-2,
        1e-1,
        1,
        5,
        # 10,
        15,
        # 20,
        25,
        30,
        # 35,
        100,
        # 1e3,
        1e4,
        # 1e5,
        1e6,
    ]

    loss = torch.mean(_gaussian_kernel_matrix(embedding, embedding, sigmas))
    loss += torch.mean(_gaussian_kernel_matrix(z, z, sigmas))
    loss -= 2 * torch.mean(_gaussian_kernel_matrix(embedding, z, sigmas))
    return loss

In [19]:
# Here you have to include your own path
# dataset_path = "/home/DATA/ITWM/lorenzp/"
dataset_path = "/home/lorenzp/workspace/competence_estimation/features/cifar10"
model = "resnet18"
mode = "benign"

# Features/logits/labels Trainings datad
features_id_train =  torch.load(f"{dataset_path}/{mode}/features_{mode}_{model}_train.pt")[:3000]
features_id_test  =  torch.load(f"{dataset_path}/{mode}/features_{mode}_{model}_test.pt")[:3000]

In [20]:
# _mmd(torch.from_numpy(features_id_train), factor_z=2)

In [21]:
# _mmd(torch.from_numpy(features_id_test), factor_z=2)

In [22]:
features_id_train.shape

(3000, 512)

In [23]:
import torchvision.datasets as datasets
import numpy as np

# Load the CIFAR-10 dataset
cifar10_trainset = datasets.CIFAR10(root='/home/DATA/ITWM/lorenzp/cifar10', train=True, download=True)

# Get the image data from the train set
images = cifar10_trainset.data

# Convert the images to a numpy array
dataset = np.array(images)

# Calculate the mean and standard deviation
mean = np.mean(dataset)
std_dev = np.std(dataset)

print("Mean:", mean)
print("Standard Deviation:", std_dev)

Files already downloaded and verified
Mean: 120.70756512369792
Standard Deviation: 64.1500758911213


In [24]:
# Calculate the mean, standard deviation, and variance along the axis of features
mean = np.mean(features_id_train)
std = np.std(features_id_train, )
variance = np.var(features_id_train, )

print("Mean:", mean)
print("Standard Deviation:", std)
print("Variance:", variance)

Mean: 0.1837909
Standard Deviation: 0.28158644
Variance: 0.07929093


In [25]:
mean = np.mean(features_id_test)
std = np.std(features_id_test, )
variance = np.var(features_id_test, )

print("Mean:", mean)
print("Standard Deviation:", std)
print("Variance:", variance)

Mean: 0.18315665
Standard Deviation: 0.27723405
Variance: 0.07685872
