In [21]:
import numpy as np
import yaml

from competence_estimation.utils import load_data, get_network_weights, mix_open
from competence_estimation.scores  import create_score_function
from competence_estimation.metrics  import compute_metric

# Short Introduction

The following files are required:
* {x}_id_train.npy
* {x}_id_val.npy
* {x}_id_test.npy
where x = features, logits, labels

and the same for ood samples:
* {x}_ood
where x = features, logits, labels

Additionally we require the weights w and biases b of the last layer

# Data and Config Loading

In [53]:
with open("config.yml", "r") as file:
    config = yaml.safe_load(file)

In [54]:
# Here you have to include your own path
# dataset_path = "/home/DATA/ITWM/lorenzp/"
dataset_path = "/home/lorenzp/workspace/competence_estimation/features/example_features"
algorithm = "ERM"

w = np.load(f"{dataset_path}/{algorithm}_W.npy")
b = np.load(f"{dataset_path}/{algorithm}_b.npy")

# Features/logits/labels Trainings data
features_id_train =  np.load(f"{dataset_path}/{algorithm}_features_iid_train.npy")
logits_id_train   =  np.load(f"{dataset_path}/{algorithm}_logits_iid_train.npy")
labels_id_train   =  np.load(f"{dataset_path}/{algorithm}_labels_iid_train.npy")
    
# Features/logits/labels Validation data
features_id_val = np.load(f"{dataset_path}/{algorithm}_features_iid_val.npy")
logits_id_val   = np.load(f"{dataset_path}/{algorithm}_logits_iid_val.npy")
labels_id_val   = np.load(f"{dataset_path}/{algorithm}_labels_iid_val.npy")

# Features/logits/labels Test data
features_id_test = np.load(f"{dataset_path}/{algorithm}_features_iid_test.npy")
logits_id_test   = np.load(f"{dataset_path}/{algorithm}_logits_iid_test.npy")
labels_id_test   = np.load(f"{dataset_path}/{algorithm}_labels_iid_test.npy")

In [55]:
w.shape

(7, 2048)

In [56]:
b.shape

(7,)

In [57]:
features_id_train.shape

(6369, 2048)

In [58]:
logits_id_train.shape

(6369, 7)

In [59]:
labels_id_train.shape

(6369,)

# Score function creation

### "max_softmax", "max_logit", "vim", "mahalanobis", "knn", "energy", "energy_react", "GMM", "HBOS", "PCA"

In [60]:
score_function_name = 'vim' # 'vim'

scores_id_val, score_function = create_score_function(
    features_id_train,
    logits_id_train,
    labels_id_train,
    features_id_val,
    logits_id_val,
    labels_id_val,
    w,
    b,
    score_function =  score_function_name,
    **config
)

## Compute various scores and metrics

In [61]:
scores_id_train = score_function(features_id_train, logits_id_train)
scores_id_test  = score_function(features_id_test, logits_id_test)

scores_id_train

array([-23.556686  ,   1.4115973 ,   7.269142  , ...,   8.015827  ,
        -0.33691406, -10.059322  ], dtype=float32)

In [62]:
scores_id_test

array([ -3.1421204 ,  -0.6797142 ,  10.343679  ,  -2.6297283 ,
         3.4795742 ,  20.857525  ,  18.590517  ,  12.578637  ,
         4.461241  ,  10.011418  ,   6.9155636 ,  15.008051  ,
         8.807006  ,  24.769766  ,   7.5085917 ,  22.968039  ,
       -13.371336  ,  16.79132   ,  12.012598  ,  11.731669  ,
        10.7676735 ,  17.33752   ,  18.459084  ,   5.0608797 ,
         2.3771515 ,   3.273281  ,   0.57849884,   7.6973534 ,
       -14.278738  ,  19.1225    ,   6.722851  ,  12.2487335 ,
        17.017563  ,  19.328318  ,   2.708479  ,  -7.4020233 ,
       -11.728733  ,  28.053986  ,  -9.978165  ,  -5.8113804 ,
        23.396343  ,  10.134261  ,   1.552084  ,  -5.7634697 ,
        -3.0722904 ,  -2.4378529 ,  26.043074  , -13.257246  ,
         7.641264  ,  -7.8015594 ,  -7.566679  ,   8.807872  ,
         2.4137774 ,  11.679185  ,  22.581081  ,   1.8140488 ,
       -12.327469  ,  19.503408  ,   4.524312  ,   8.16357   ,
         1.5408039 ,   7.609622  ,  15.156078  ,  -2.09

In [44]:
# Here you would compute the scores for OOD data
# Features/logits/labels Validation data
features_ood = np.load(f"{dataset_path}/{algorithm}_features_ood_test.npy")
logits_ood = np.load(f"{dataset_path}/{algorithm}_logits_ood_test.npy")
labels_ood = np.load(f"{dataset_path}/{algorithm}_labels_ood_test.npy")
scores_ood_test = score_function(features_ood, logits_ood)

In [45]:
features_ood

array([[1.2910001e+00, 3.0857370e+00, 2.6997972e-01, ..., 3.8805985e-01,
        8.1281179e-01, 7.1679465e-02],
       [6.9043016e-01, 5.4645791e+00, 4.2763495e-01, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [2.6983038e-01, 9.4914727e-02, 7.0710227e-02, ..., 3.6953717e-02,
        5.7352042e-01, 4.4760182e-01],
       ...,
       [2.1955989e-01, 0.0000000e+00, 1.0073609e-01, ..., 7.5290763e-01,
        8.8036251e-01, 1.4433448e-02],
       [8.1418520e-01, 3.0095163e-01, 2.4730463e-01, ..., 7.3645985e-01,
        4.7921285e-01, 1.7307192e-02],
       [6.2401450e-01, 7.3395565e-02, 1.7222862e-03, ..., 1.8998228e-02,
        3.7397391e-01, 5.5813424e-02]], dtype=float32)

In [46]:
features_ood.shape

(2048, 2048)

In [47]:
logits_ood.shape

(2048, 7)

In [48]:
logits_ood

array([[ 2.1850822 , -1.7292246 , -5.7999616 , ...,  8.395324  ,
        -3.92716   ,  8.283982  ],
       [ 1.7644033 , -5.623045  , -6.709762  , ...,  1.7478005 ,
        -2.2425988 , 14.654401  ],
       [-3.3703334 , -0.29221353,  0.22391015, ..., -2.8689823 ,
        -4.1863256 , -2.9076629 ],
       ...,
       [ 0.14755909,  1.2648841 ,  3.0822575 , ...,  0.59514374,
        -0.9065864 , -0.27907914],
       [ 1.050271  ,  2.1275423 ,  1.7535232 , ...,  5.4503126 ,
         0.19324636, -3.2838979 ],
       [ 2.1367285 ,  2.1052532 ,  2.0281599 , ...,  0.9372236 ,
        -1.830505  , -2.8454196 ]], dtype=float32)

In [49]:
labels_ood

array([6, 6, 3, ..., 0, 1, 1])

In [50]:
labels_ood.shape

(2048,)

In [51]:
metrics = ['quantiles']

outs = compute_metric(
    scores_id_val,
    scores_id_test,
    scores_ood_test,
    features_id_train,
    logits_id_train,
    labels_id_train,
    features_id_test,
    logits_id_test,
    labels_id_test,
    features_ood,
    logits_ood,
    labels_ood,
    metrics=metrics
)

dafsfsafasfdddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd


In [52]:
#n_95 is the accuracy at threshold 95%percentile of validation set
#n_95_frac is the fraction of included samples at this particular threshold
print(outs)

{'n_95_frac': 0.56640625, 'n_95': 0.9672414064407349}
