In [1]:
import numpy as np
import yaml

from competence_estimation.utils import load_data, get_network_weights, mix_open
from competence_estimation.scores  import create_score_function
from competence_estimation.metrics  import compute_metric

# Short Introduction

The following files are required:
* {x}_id_train.npy
* {x}_id_val.npy
* {x}_id_test.npy
where x = features, logits, labels

and the same for ood samples:
* {x}_ood
where x = features, logits, labels

Additionally we require the weights w and biases b of the last layer

# Data and Config Loading

In [23]:
with open("config.yml", "r") as file:
    config = yaml.safe_load(file)

In [28]:
# Here you have to include your own path
# dataset_path = "/home/DATA/ITWM/lorenzp/"
dataset_path = "/home/lorenzp/workspace/competence_estimation/example_features"
algorithm = "ERM"

w = np.load(f"{dataset_path}/{algorithm}_W.npy")
b = np.load(f"{dataset_path}/{algorithm}_b.npy")

# Features/logits/labels Trainings data
features_id_train =  np.load(f"{dataset_path}/{algorithm}_features_iid_train.npy")
logits_id_train = np.load(f"{dataset_path}/{algorithm}_logits_iid_train.npy")
labels_id_train =  np.load(f"{dataset_path}/{algorithm}_labels_iid_train.npy")
    
# Features/logits/labels Validation data
features_id_val = np.load(f"{dataset_path}/{algorithm}_features_iid_val.npy")
logits_id_val = np.load(f"{dataset_path}/{algorithm}_logits_iid_val.npy")
labels_id_val = np.load(f"{dataset_path}/{algorithm}_labels_iid_val.npy")

# Features/logits/labels Test data
features_id_test = np.load(f"{dataset_path}/{algorithm}_features_iid_test.npy")
logits_id_test = np.load(f"{dataset_path}/{algorithm}_logits_iid_test.npy")
labels_id_test = np.load(f"{dataset_path}/{algorithm}_labels_iid_test.npy")

# Score function creation

### "max_softmax", "max_logit", "vim", "mahalanobis", "knn", "energy", "energy_react", "GMM", "HBOS", "PCA"

In [29]:
score_function_name = 'knn' # 'vim'

scores_id_val, score_function = create_score_function(
    features_id_train,
    logits_id_train,
    labels_id_train,
    features_id_val,
    logits_id_val,
    labels_id_val,
    w,
    b,
    score_function =  score_function_name,
    **config
)

## Compute various scores and metrics

In [30]:
scores_id_train = score_function(features_id_train, logits_id_train)
scores_id_test = score_function(features_id_test, logits_id_test)

# Here you would compute the scores for OOD data
# Features/logits/labels Validation data
features_ood = np.load(f"{dataset_path}/{algorithm}_features_ood_test.npy")
logits_ood = np.load(f"{dataset_path}/{algorithm}_logits_ood_test.npy")
labels_ood = np.load(f"{dataset_path}/{algorithm}_labels_ood_test.npy")
scores_ood_test = score_function(features_ood, logits_ood)

metrics = ['quantiles']

outs = compute_metric(scores_id_val,
    scores_id_test,
    scores_ood_test,
    features_id_train,
    logits_id_train,
    labels_id_train,
    features_id_test,
    logits_id_test,
    labels_id_test,
    features_ood,
    logits_ood,
    labels_ood,
    metrics=metrics
)

In [31]:
#n_95 is the accuracy at threshold 95%percentile of validation set
#n_95_frac is the fraction of included samples at this particular threshold
print(outs)

{'n_95_frac': 0.0, 'n_95': nan}
