In [2]:
import numpy as np
import yaml

from competence_estimation.utils import load_data, get_network_weights, mix_open
from competence_estimation.scores  import create_score_function
from competence_estimation.metrics  import compute_metric

import torch

# Short Introduction

The following files are required:
* {x}_id_train.npy
* {x}_id_val.npy
* {x}_id_test.npy
where x = features, logits, labels

and the same for ood samples:
* {x}_ood
where x = features, logits, labels

Additionally we require the weights w and biases b of the last layer

# Data and Config Loading

In [6]:
with open("config.yml", "r") as file:
    config = yaml.safe_load(file)

In [4]:
# Here you have to include your own path
# dataset_path = "/home/DATA/ITWM/lorenzp/"
dataset_path = "/home/lorenzp/workspace/competence_estimation/features/example_features"
algorithm = "ERM"

w = np.load(f"{dataset_path}/{algorithm}_W.npy")
b = np.load(f"{dataset_path}/{algorithm}_b.npy")

# Features/logits/labels Trainings data
features_id_train =  np.load(f"{dataset_path}/{algorithm}_features_iid_train.npy")
logits_id_train   =  np.load(f"{dataset_path}/{algorithm}_logits_iid_train.npy")
labels_id_train   =  np.load(f"{dataset_path}/{algorithm}_labels_iid_train.npy")
    
# Features/logits/labels Validation data
features_id_val = np.load(f"{dataset_path}/{algorithm}_features_iid_val.npy")
logits_id_val   = np.load(f"{dataset_path}/{algorithm}_logits_iid_val.npy")
labels_id_val   = np.load(f"{dataset_path}/{algorithm}_labels_iid_val.npy")

# Features/logits/labels Test data
features_id_test = np.load(f"{dataset_path}/{algorithm}_features_iid_test.npy")
logits_id_test   = np.load(f"{dataset_path}/{algorithm}_logits_iid_test.npy")
labels_id_test   = np.load(f"{dataset_path}/{algorithm}_labels_iid_test.npy")

In [3]:
# Here you have to include your own path
# dataset_path = "/home/DATA/ITWM/lorenzp/"
dataset_path = "/home/lorenzp/workspace/competence_estimation/features/cifar10"
# algorithm = "ERM"
model = "resnet18"

load_data = model
mode ="spatial"

w = torch.load(f"{dataset_path}/spatial/{model}_W.pt").numpy()
b = torch.load(f"{dataset_path}/spatial/{model}_b.pt").numpy()

print(w.shape, b.shape)

(10, 512) (10,)


In [4]:
# Features/logits/labels Trainings datad
features_id_train =  torch.load(f"{dataset_path}/spatial/features_{mode}_{model}_train.pt")[:30000]
logits_id_train = torch.load(f"{dataset_path}/spatial/logits_{mode}_{model}_train.pt")[:30000]
labels_id_train =  torch.load(f"{dataset_path}/spatial/labels_{mode}_{model}_train.pt")[:30000]

# Features/logits/labels Validation data
features_id_val = torch.load(f"{dataset_path}/spatial/features_{mode}_{model}_train.pt")[30000:50000]
logits_id_val = torch.load(f"{dataset_path}/spatial/logits_{mode}_{model}_train.pt")[30000:50000]
labels_id_val = torch.load(f"{dataset_path}/spatial/labels_{mode}_{model}_train.pt")[30000:50000]

# Features/logits/labels Test data
features_id_test = torch.load(f"{dataset_path}/spatial/features_{mode}_{model}_test.pt")[:5000]
logits_id_test = torch.load(f"{dataset_path}/spatial/logits_{mode}_{model}_test.pt")[:5000]
labels_id_test = torch.load(f"{dataset_path}/spatial/labels_{mode}_{model}_test.pt")[:5000]

print(
    features_id_train.shape,
    features_id_val.shape,
    features_id_test.shape
)

metrics = ['quantiles']
results = {}
results_frac = {}
results_id_test = {}
results_id_test_frac = {}
attacks = ['nor', 'fgsm', 'linfpgd', 'aa', 'linfdf', 'l2df']
# attacks = ['nor', 'fgsm']
score_fct = ["max_softmax", "max_logit", "vim", "mahalanobis", "knn", "energy", "energy_react", "GMM",  "PCA"]
# score_fct = [ "vim" ]

(30000, 512) (20000, 512) (5000, 512)


# Score function creation

### "max_softmax", "max_logit", "vim", "mahalanobis", "knn", "energy", "energy_react", "GMM", "HBOS", "PCA"

In [7]:
score_function_name = 'vim' # 'vim'

scores_id_val, score_function = create_score_function(
    features_id_train,
    logits_id_train,
    labels_id_train,
    features_id_val,
    logits_id_val,
    labels_id_val,
    w,
    b,
    score_function =  score_function_name,
    **config
)

lam1 1


## Compute various scores and metrics

In [8]:
scores_id_train = score_function(features_id_train, logits_id_train)
scores_id_test  = score_function(features_id_test, logits_id_test)

scores_id_train

lam2:  1
lam2:  1


array([ 0.97001934, -0.80391407,  2.3348494 , ..., -1.1966772 ,
        0.94078255, -0.71409893], dtype=float32)

In [None]:
scores_id_test

In [10]:
# Here you would compute the scores for OOD data
# Features/logits/labels Validation data
# features_ood = np.load(f"{dataset_path}/{algorithm}_features_ood_test.npy")
# logits_ood = np.load(f"{dataset_path}/{algorithm}_logits_ood_test.npy")
# labels_ood = np.load(f"{dataset_path}/{algorithm}_labels_ood_test.npy")
# scores_ood_test = score_function(features_ood, logits_ood)

In [14]:
features_ood = torch.load(f"{dataset_path}/spatial/features_{mode}_{model}_test.pt")[5000:10000]
logits_ood   = torch.load(f"{dataset_path}/spatial/logits_{mode}_{model}_test.pt")[5000:10000]
labels_ood   = torch.load(f"{dataset_path}/spatial/labels_{mode}_{model}_test.pt")[5000:10000]
scores_ood_test = score_function(features_ood, logits_ood)

lam2:  1


In [15]:
features_ood

array([[7.706e-03, 2.689e-02, 6.866e-05, ..., 2.480e-01, 3.289e-01,
        1.419e-02],
       [4.439e-04, 0.000e+00, 1.600e-01, ..., 9.094e-03, 4.536e-01,
        0.000e+00],
       [1.102e-02, 1.278e-02, 2.344e-02, ..., 2.659e-01, 1.852e-02,
        5.264e-03],
       ...,
       [2.852e-01, 5.508e-01, 0.000e+00, ..., 5.723e-01, 6.752e-03,
        0.000e+00],
       [1.682e-01, 2.593e-01, 8.716e-02, ..., 2.571e-01, 9.167e-02,
        6.683e-02],
       [3.721e-03, 3.204e-03, 1.598e-02, ..., 2.406e-01, 1.361e-01,
        4.990e-03]], dtype=float16)

In [16]:
features_ood.shape

(5000, 512)

In [17]:
logits_ood.shape

(5000, 10)

In [18]:
logits_ood

array([[-1.623  , -0.4878 , -2.004  , ..., 10.45   , -1.494  ,  0.02615],
       [-2.559  , -1.204  ,  0.0411 , ..., -1.394  , -1.477  , -2.055  ],
       [ 0.1929 , -0.2888 , -1.989  , ..., -1.814  , 11.97   ,  0.5713 ],
       ...,
       [-3.229  , -2.19   ,  0.412  , ...,  0.4185 , -1.361  , -1.855  ],
       [-1.631  ,  9.016  ,  0.732  , ..., -1.598  , -0.2625 , -2.277  ],
       [-1.473  , -1.414  , -1.63   , ..., 10.38   , -1.66   , -0.2057 ]],
      dtype=float16)

In [19]:
labels_ood

array([7, 6, 8, ..., 5, 1, 7])

In [20]:
labels_ood.shape

(5000,)

In [21]:
metrics = ['quantiles']

outs = compute_metric(
    scores_id_val,
    scores_id_test,
    scores_ood_test,
    features_id_train,
    logits_id_train,
    labels_id_train,
    features_id_test,
    logits_id_test,
    labels_id_test,
    features_ood,
    logits_ood,
    labels_ood,
    metrics=metrics
)

In [22]:
#n_95 is the accuracy at threshold 95%percentile of validation set
#n_95_frac is the fraction of included samples at this particular threshold
print(outs)

{'n_95_frac': 0.8538, 'n_95': 0.9845396876335144, 'n_95_frac_id_test': 0.8534, 'n_95_id_test': 0.982188880443573}
