In [2]:
import numpy as np
import yaml

from competence_estimation.utils import load_data, get_network_weights, mix_open
from competence_estimation.scores  import create_score_function
from competence_estimation.metrics  import compute_metric

# Short Introduction

The following files are required:
* {x}_id_train.npy
* {x}_id_val.npy
* {x}_id_test.npy
where x = features, logits, labels

and the same for ood samples:
* {x}_ood
where x = features, logits, labels

Additionally we require the weights w and biases b of the last layer

# Data and Config Loading

In [3]:
with open("config.yml", "r") as file:
    config = yaml.safe_load(file)

In [4]:
# Here you have to include your own path
# dataset_path = "/home/DATA/ITWM/lorenzp/"
dataset_path = "/home/lorenzp/workspace/competence_estimation/features/example_features"
algorithm = "ERM"

w = np.load(f"{dataset_path}/{algorithm}_W.npy")
b = np.load(f"{dataset_path}/{algorithm}_b.npy")

# Features/logits/labels Trainings data
features_id_train =  np.load(f"{dataset_path}/{algorithm}_features_iid_train.npy")
logits_id_train   =  np.load(f"{dataset_path}/{algorithm}_logits_iid_train.npy")
labels_id_train   =  np.load(f"{dataset_path}/{algorithm}_labels_iid_train.npy")
    
# Features/logits/labels Validation data
features_id_val = np.load(f"{dataset_path}/{algorithm}_features_iid_val.npy")
logits_id_val   = np.load(f"{dataset_path}/{algorithm}_logits_iid_val.npy")
labels_id_val   = np.load(f"{dataset_path}/{algorithm}_labels_iid_val.npy")

# Features/logits/labels Test data
features_id_test = np.load(f"{dataset_path}/{algorithm}_features_iid_test.npy")
logits_id_test   = np.load(f"{dataset_path}/{algorithm}_logits_iid_test.npy")
labels_id_test   = np.load(f"{dataset_path}/{algorithm}_labels_iid_test.npy")

# Score function creation

### "max_softmax", "max_logit", "vim", "mahalanobis", "knn", "energy", "energy_react", "GMM", "HBOS", "PCA"

In [5]:
score_function_name = 'vim' # 'vim'

scores_id_val, score_function = create_score_function(
    features_id_train,
    logits_id_train,
    labels_id_train,
    features_id_val,
    logits_id_val,
    labels_id_val,
    w,
    b,
    score_function =  score_function_name,
    **config
)

lam1 1


## Compute various scores and metrics

In [6]:
scores_id_train = score_function(features_id_train, logits_id_train)
scores_id_test  = score_function(features_id_test, logits_id_test)

scores_id_train

lam2:  1
lam2:  1


array([-23.557867 ,   1.4121437,   7.2673664, ...,   8.016261 ,
        -0.3362856, -10.060137 ], dtype=float32)

In [7]:
scores_id_test

array([ -3.142868  ,  -0.6790447 ,  10.343153  ,  -2.6293259 ,
         3.479065  ,  20.858238  ,  18.590065  ,  12.579017  ,
         4.4617577 ,  10.010528  ,   6.915554  ,  15.009462  ,
         8.807881  ,  24.767492  ,   7.508813  ,  22.967121  ,
       -13.371651  ,  16.789845  ,  12.012699  ,  11.731744  ,
        10.769051  ,  17.338736  ,  18.458155  ,   5.06112   ,
         2.376915  ,   3.2745705 ,   0.5778198 ,   7.6969833 ,
       -14.27841   ,  19.122799  ,   6.721428  ,  12.247072  ,
        17.018078  ,  19.326298  ,   2.7081242 ,  -7.4028034 ,
       -11.729143  ,  28.055672  ,  -9.977844  ,  -5.8121815 ,
        23.394997  ,  10.133744  ,   1.5531769 ,  -5.7636623 ,
        -3.071949  ,  -2.4384212 ,  26.043638  , -13.257196  ,
         7.642082  ,  -7.8019485 ,  -7.5677814 ,   8.805329  ,
         2.413825  ,  11.680776  ,  22.579617  ,   1.8142452 ,
       -12.32645   ,  19.503572  ,   4.52555   ,   8.163733  ,
         1.541008  ,   7.6087446 ,  15.15527   ,  -2.09

In [8]:
# Here you would compute the scores for OOD data
# Features/logits/labels Validation data
features_ood = np.load(f"{dataset_path}/{algorithm}_features_ood_test.npy")
logits_ood = np.load(f"{dataset_path}/{algorithm}_logits_ood_test.npy")
labels_ood = np.load(f"{dataset_path}/{algorithm}_labels_ood_test.npy")
scores_ood_test = score_function(features_ood, logits_ood)

lam2:  1


In [9]:
features_ood

array([[1.2910001e+00, 3.0857370e+00, 2.6997972e-01, ..., 3.8805985e-01,
        8.1281179e-01, 7.1679465e-02],
       [6.9043016e-01, 5.4645791e+00, 4.2763495e-01, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [2.6983038e-01, 9.4914727e-02, 7.0710227e-02, ..., 3.6953717e-02,
        5.7352042e-01, 4.4760182e-01],
       ...,
       [2.1955989e-01, 0.0000000e+00, 1.0073609e-01, ..., 7.5290763e-01,
        8.8036251e-01, 1.4433448e-02],
       [8.1418520e-01, 3.0095163e-01, 2.4730463e-01, ..., 7.3645985e-01,
        4.7921285e-01, 1.7307192e-02],
       [6.2401450e-01, 7.3395565e-02, 1.7222862e-03, ..., 1.8998228e-02,
        3.7397391e-01, 5.5813424e-02]], dtype=float32)

In [10]:
features_ood.shape

(2048, 2048)

In [11]:
logits_ood.shape

(2048, 7)

In [12]:
logits_ood

array([[ 2.1850822 , -1.7292246 , -5.7999616 , ...,  8.395324  ,
        -3.92716   ,  8.283982  ],
       [ 1.7644033 , -5.623045  , -6.709762  , ...,  1.7478005 ,
        -2.2425988 , 14.654401  ],
       [-3.3703334 , -0.29221353,  0.22391015, ..., -2.8689823 ,
        -4.1863256 , -2.9076629 ],
       ...,
       [ 0.14755909,  1.2648841 ,  3.0822575 , ...,  0.59514374,
        -0.9065864 , -0.27907914],
       [ 1.050271  ,  2.1275423 ,  1.7535232 , ...,  5.4503126 ,
         0.19324636, -3.2838979 ],
       [ 2.1367285 ,  2.1052532 ,  2.0281599 , ...,  0.9372236 ,
        -1.830505  , -2.8454196 ]], dtype=float32)

In [13]:
labels_ood

array([6, 6, 3, ..., 0, 1, 1])

In [14]:
labels_ood.shape

(2048,)

In [15]:
metrics = ['quantiles']

outs = compute_metric(
    scores_id_val,
    scores_id_test,
    scores_ood_test,
    features_id_train,
    logits_id_train,
    labels_id_train,
    features_id_test,
    logits_id_test,
    labels_id_test,
    features_ood,
    logits_ood,
    labels_ood,
    metrics=metrics
)

In [16]:
#n_95 is the accuracy at threshold 95%percentile of validation set
#n_95_frac is the fraction of included samples at this particular threshold
print(outs)

{'n_95_frac': 0.56640625, 'n_95': 0.9672414064407349, 'n_95_frac_id_test': 0.929471032745592, 'n_95_id_test': 0.9756097793579102}
