In [1]:
import numpy as np
import yaml
import torch
import torch.nn as nn
import torch.nn.functional as F

from competence_estimation.utils import load_data, get_network_weights, mix_open
from competence_estimation.scores  import create_score_function
from competence_estimation.metrics  import compute_metric

# Short Introduction

The following files are required:
* {x}_id_train.npy
* {x}_id_val.npy
* {x}_id_test.npy
where x = features, logits, labels

and the same for ood samples:
* {x}_ood
where x = features, logits, labels

Additionally we require the weights w and biases b of the last layer

# Data and Config Loading

In [27]:
def calculate_fourier_spectrum(im, typ='MFS'):
    # im = im.float()
    # im = im.cpu()
    # im = im.data.numpy() # transform to numpy
    fft = np.fft.fft2(im)
    if typ == 'MFS':
        fourier_spectrum = np.abs(fft)
    elif typ == 'PFS':
        fourier_spectrum = np.abs(np.angle(fft))
    # if  (args.net == 'cif100' or args.net == 'cif100vgg') and (args.attack=='cw' or args.attack=='df'):
    #     fourier_spectrum *= 1/np.max(fourier_spectrum)
    return fourier_spectrum

In [28]:
with open("config.yml", "r") as file:
    config = yaml.safe_load(file)

In [29]:
# Here you have to include your own path
# dataset_path = "/home/DATA/ITWM/lorenzp/"
dataset_path = "/home/lorenzp/workspace/competence_estimation/features/cifar10"
# algorithm = "ERM"
model = "resnet18"

w = np.load(f"{dataset_path}/{model}_W.npy")
b = np.load(f"{dataset_path}/{model}_b.npy")

# Features/logits/labels Trainings data
features_id_train =  np.load(f"{dataset_path}/features_{model}_train.npy")[:5000]
logits_id_train = np.load(f"{dataset_path}/logits_{model}_train.npy")[:5000]
labels_id_train =  np.load(f"{dataset_path}/labels_{model}_train.npy")[:5000]

# Features/logits/labels Validation data
features_id_val = np.load(f"{dataset_path}/features_{model}_test.npy")[:5000]
logits_id_val = np.load(f"{dataset_path}/logits_{model}_test.npy")[:5000]
labels_id_val = np.load(f"{dataset_path}/labels_{model}_test.npy")[:5000]

# Features/logits/labels Test data
features_id_test = np.load(f"{dataset_path}/features_{model}_test.npy")[5000:]
logits_id_test = np.load(f"{dataset_path}/logits_{model}_test.npy")[5000:]
labels_id_test = np.load(f"{dataset_path}/labels_{model}_test.npy")[5000:]

In [30]:
w.shape

(10, 512)

In [31]:
b.shape

(10,)

In [32]:
features_id_train.shape

(5000, 512)

In [33]:
np.squeeze(features_id_train).shape

(5000, 512)

In [34]:
features_id_val.shape

(5000, 512)

In [35]:
features_id_test.shape

(5000, 512)

In [36]:
features_id_train = np.squeeze(features_id_train)
features_id_val = np.squeeze(features_id_val)
features_id_test = np.squeeze(features_id_test)

In [37]:
logits_id_train.shape

(5000, 10)

In [38]:
labels_id_train.shape

(5000,)

In [55]:
# typ = 'MFS'
typ = 'PFS'
features_id_train = calculate_fourier_spectrum(features_id_train, typ=typ)
features_id_val   = calculate_fourier_spectrum(features_id_val, typ=typ)
features_id_test  = calculate_fourier_spectrum(features_id_test, typ=typ)

# Score function creation

### "max_softmax", "max_logit", "vim", "mahalanobis", "knn", "energy", "energy_react", "GMM", "HBOS", "PCA"

In [56]:
score_function_name = 'GMM' # 'vim'

scores_id_val, score_function = create_score_function(
    features_id_train,
    logits_id_train,
    labels_id_train,
    features_id_val,
    logits_id_val,
    labels_id_val,
    w,
    b,
    score_function =  score_function_name,
    **config
)

In [57]:
scores_id_val

array([-847.7535549 , -802.24501866, -429.0641285 , ..., -847.7535549 ,
       -429.0641285 , -802.24501866])

In [58]:
score_function

<function competence_estimation.scores.create_score_function.<locals>.score_function(feature, logit)>

## Compute various scores and metrics

In [59]:
scores_id_train = score_function(features_id_train, logits_id_train)
scores_id_test  = score_function(features_id_test, logits_id_test)

scores_id_train

array([-847.7535549 , -731.31143017,   73.5924203 , ..., -809.37492389,
         73.5924203 , -731.31143017])

In [60]:
scores_id_test

array([-847.7535549 , -773.62578166, -847.7535549 , ..., -847.7535549 ,
       -847.7535549 , -773.62578166])

In [61]:
# Here you would compute the scores for OOD data
# Features/logits/labels Validation data
attack = 'linfpgd'
# attack = 'fgsm'
# attack = 'l2df'
# attack = 'linfdf'

features_ood = np.squeeze(np.load(f"{dataset_path}/features_{attack}_{model}_test.npy"))[5000:8000]
logits_ood = np.load(f"{dataset_path}/logits_{attack}_{model}_test.npy")[5000:8000]
labels_ood = np.load(f"{dataset_path}/labels_{attack}_{model}_test.npy")[5000:8000]

In [62]:
features_ood.shape

(3000, 512)

In [63]:
logits_ood.shape

(3000, 10)

In [64]:
labels_ood.shape

(3000,)

In [65]:
scores_ood_test = score_function(features_ood, logits_ood)

In [66]:
metrics = ['quantiles']

outs = compute_metric(
    scores_id_val,
    scores_id_test,
    scores_ood_test,
    features_id_train,
    logits_id_train,
    labels_id_train,
    features_id_test,
    logits_id_test,
    labels_id_test,
    features_ood,
    logits_ood,
    labels_ood,
    metrics=metrics
)

In [67]:
#n_95 is the accuracy at threshold 95%percentile of validation set
#n_95_frac is the fraction of included samples at this particular threshold
print(outs)

{'n_95_frac': 0.0, 'n_95': nan}


In [68]:
# attack = 'linfpgd'
# attack = 'fgsm'
# attack = 'l2df'
# attack = 'linfdf'

for attack in ['linfpgd', 'fgsm', 'l2df', 'linfdf']:

    print("attack: ", attack)

    score_fct = ["max_softmax", "max_logit", "vim", "mahalanobis", "knn", "energy", "energy_react", "GMM",  "PCA"]

    for score_function_name in score_fct:

        scores_id_val, score_function = create_score_function(
            features_id_train,
            logits_id_train,
            labels_id_train,
            features_id_val,
            logits_id_val,
            labels_id_val,
            w,
            b,
            score_function =  score_function_name,
            **config
        )
        scores_id_train = score_function(features_id_train, logits_id_train)
        scores_id_test  = score_function(features_id_test, logits_id_test)

        features_ood = np.squeeze(np.load(f"{dataset_path}/features_{attack}_{model}_test.npy"))[5000:8000]
        logits_ood = np.load(f"{dataset_path}/logits_{attack}_{model}_test.npy")[5000:8000]
        labels_ood = np.load(f"{dataset_path}/labels_{attack}_{model}_test.npy")[5000:8000]

        features_ood = calculate_fourier_spectrum(features_ood, typ='MFS')

        
        scores_ood_test = score_function(features_ood, logits_ood)

        metrics = ['quantiles']

        outs = compute_metric(
            scores_id_val,
            scores_id_test,
            scores_ood_test,
            features_id_train,
            logits_id_train,
            labels_id_train,
            features_id_test,
            logits_id_test,
            labels_id_test,
            features_ood,
            logits_ood,
            labels_ood,
            metrics=metrics
        )

        print(score_function_name, outs)

attack:  linfpgd
max_softmax {'n_95_frac': 1.0, 'n_95': 0.0026666666381061077}
max_logit {'n_95_frac': 1.0, 'n_95': 0.0026666666381061077}
vim {'n_95_frac': 0.0, 'n_95': nan}
mahalanobis {'n_95_frac': 0.0, 'n_95': nan}
knn {'n_95_frac': 0.929, 'n_95': 0.0028704700525850058}
energy {'n_95_frac': 0.9996666666666667, 'n_95': 0.002667555818334222}
energy_react {'n_95_frac': 0.0, 'n_95': nan}
GMM {'n_95_frac': 0.0, 'n_95': nan}
PCA {'n_95_frac': 0.0, 'n_95': nan}
attack:  fgsm
max_softmax {'n_95_frac': 0.708, 'n_95': 0.27212804555892944}
max_logit {'n_95_frac': 0.623, 'n_95': 0.24023541808128357}
vim {'n_95_frac': 0.0, 'n_95': nan}
mahalanobis {'n_95_frac': 0.0, 'n_95': nan}
knn {'n_95_frac': 0.9036666666666666, 'n_95': 0.25230541825294495}
energy {'n_95_frac': 0.617, 'n_95': 0.23230686783790588}
energy_react {'n_95_frac': 0.0, 'n_95': nan}
GMM {'n_95_frac': 0.0, 'n_95': nan}
PCA {'n_95_frac': 0.0, 'n_95': nan}
attack:  l2df
max_softmax {'n_95_frac': 0.151, 'n_95': 0.0}
max_logit {'n_95_fra