In [1]:
import os
import seaborn as sns
import pandas as pd
import numpy as np
import scipy
from tqdm import tqdm

In [2]:
def softmax(x, T = 1):
    """Compute softmax values for each sets of scores in x."""
    return np.exp(x / T) / np.sum(np.exp(x / T), axis=0)

# Class-Specific Temperature-Scaling (CS TS)

### Temperature-Scaling

In [3]:
# prepare:
# -> preacts. N x C
# -> labels. N
kcls = 10
cifar_resultsdir = './data/cifar10results/'
cnn_pred = pd.read_csv(cifar_resultsdir + 'predictions_val.csv')
targets_all = np.array(cnn_pred[['target_0', 'target_1', 'target_2', 'target_3', 'target_4', 'target_5', 'target_6', 'target_7', 'target_8', 'target_9']])
logit_all = np.array(cnn_pred[['logit_0', 'logit_1', 'logit_2', 'logit_3', 'logit_4', 'logit_5', 'logit_6', 'logit_7', 'logit_8', 'logit_9']])
preds_all_argmax = np.argmax(logit_all, axis = 1)
preacts = logit_all
labels = np.argmax(targets_all, axis = 1)
def eval_func(x):
   
    ts_logits = preacts/x
    exp_ts_logits = np.exp(ts_logits)
    sum_exp = np.sum(exp_ts_logits, axis=1, keepdims=True)
    AC = np.mean(np.max(exp_ts_logits/sum_exp, axis=1))
    preds = np.argmax(preacts, axis = 1)
    acc = np.sum(labels == preds) / len(labels)
    MC = np.abs(AC-acc)

    return MC

In [4]:
optimization_result = scipy.optimize.minimize(
                          fun=eval_func,
                          x0=np.array([1.0]),
                          method='Nelder-Mead',
                          tol=1e-07)

In [5]:
LearedTemp = optimization_result.x[0]
print(LearedTemp)

3.0181127548217805


### Class-Specific Temperature-Scaling

In [6]:
LearnedTempsCS = []
for kcls in range(10):
    label = kcls
    # -> preacts. N x C
    # -> labels. N
    def eval_func(x):

        targets_y1 = np.where(preds_all_argmax==label)[0]
        pred_class = np.argmax(logit_all, axis = 1)[targets_y1]
        target_class = np.argmax(targets_all, axis = 1)[targets_y1]

        acc = np.sum(pred_class == target_class) / len(target_class)
        prob_Topt = softmax(logit_all.transpose(), T = x).transpose()[targets_y1]
        AC = np.mean(np.max(prob_Topt, axis = 1))

        MC = np.abs(AC-acc)

        return MC
    optimization_result = scipy.optimize.minimize(
                          fun=eval_func,
                          x0=np.array([1.0]),
                          method='Nelder-Mead',
                          bounds=[(0,None)],
                          tol=1e-07)
    LearnedTempsCS.append(optimization_result.x[0])

  return np.exp(x / T) / np.sum(np.exp(x / T), axis=0)
  return np.exp(x / T) / np.sum(np.exp(x / T), axis=0)


In [7]:
print(LearnedTempsCS)

[5.052276134490974, 5.051852416992195, 3.1226871490478563, 3.831060886383063, 2.1635234832763697, 1.9345229148864767, 1.0908403396606448, 0.5756251335144038, 0.3874131202697748, 0.18110818862914965]


### Test performance

In [8]:
# bring a test condition here.
acc_results = []
AC_results = []
TS_results = []
CSTS_results = []
cifar_resultsdir = './data/cifar10results/'
corruptions = ['motion_blur']
cifar_resultsdir = './data/cifar10results/'
cnn_pred = pd.read_csv(cifar_resultsdir + 'predictions_val.csv')
for cname in tqdm(corruptions):
    csvfilename = cifar_resultsdir + 'predictions_val_' + cname + '.csv'
    cnn_pred_all = pd.read_csv(csvfilename)
    for severity in range(5):
        cnn_pred = cnn_pred_all.iloc[severity * 10000:(severity + 1) * 10000, :]
        
        kcls = 10
        targets_all = np.array(cnn_pred[['target_0', 'target_1', 'target_2', 'target_3', 'target_4', 'target_5', 'target_6', 'target_7', 'target_8', 'target_9']])
        logit_all = np.array(cnn_pred[['logit_0', 'logit_1', 'logit_2', 'logit_3', 'logit_4', 'logit_5', 'logit_6', 'logit_7', 'logit_8', 'logit_9']])
        preds_all_argmax = np.argmax(logit_all, axis = 1)
        # acc
        target_class = np.argmax(targets_all, axis = 1)
        pred_class = np.argmax(logit_all, axis = 1)
        acc = np.sum(pred_class == target_class) / len(target_class)
        prob = softmax(logit_all.transpose(), T = 1).transpose()
        probmax = np.max(prob, axis = 1)
        prob_Topt = softmax(logit_all.transpose(), T = LearedTemp).transpose()
        prob_Toptmax = np.max(prob_Topt, axis = 1)
        acc_results.append(acc)
        AC_results.append(np.mean(probmax))
        TS_results.append(np.mean(prob_Toptmax))
        targets_all = []
        preds_class_all = []
        for label in range(kcls):
            preds_all = softmax(logit_all.transpose(), T = LearnedTempsCS[label]).transpose()
            preds_all_max = np.max(preds_all, axis = 1)
            targets_y1 = np.where(preds_all_argmax==label)[0]
            preds_class = preds_all_max[targets_y1]

            preds_class_all = np.concatenate((preds_class_all, preds_class), axis=0)
        CSTS_results.append(np.mean(preds_class_all))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.23it/s]


In [9]:
print('AC_results:')
print(np.mean(np.abs(np.array(acc_results)-np.array(AC_results))))
print('TS_results:')
print(np.mean(np.abs(np.array(acc_results)-np.array(TS_results))))
print('CSTS_results:')
print(np.mean(np.abs(np.array(acc_results)-np.array(CSTS_results))))

AC_results:
0.40090712038075144
TS_results:
0.13801236936781258
CSTS_results:
0.07333104279565443


## Class-Specific Difference of Confidences (CS DoC)

### Difference of Confidences

In [10]:
kcls = 10
cifar_resultsdir = './data/cifar10results/'
cnn_pred = pd.read_csv(cifar_resultsdir + 'predictions_val.csv')
targets_all = np.array(cnn_pred[['target_0', 'target_1', 'target_2', 'target_3', 'target_4', 'target_5', 'target_6', 'target_7', 'target_8', 'target_9']])
logit_all = np.array(cnn_pred[['logit_0', 'logit_1', 'logit_2', 'logit_3', 'logit_4', 'logit_5', 'logit_6', 'logit_7', 'logit_8', 'logit_9']])
# acc
target_class = np.argmax(targets_all, axis = 1)
pred_class = np.argmax(logit_all, axis = 1)
acc = np.sum(pred_class == target_class) / len(target_class)
prob = softmax(logit_all.transpose(), T = 1).transpose()
probmax = np.max(prob, axis = 1)
AC = np.mean(probmax)
DoC = AC-acc

In [11]:
print(DoC)

0.2052864551906859


### Class-Specific Difference of Confidences

In [12]:
targets_all = []
preds_class_all = []
CS_DoC = []
preds_all = np.array(cnn_pred[['class_0', 'class_1', 'class_2', 'class_3', 'class_4', 'class_5', 'class_6', 'class_7', 'class_8', 'class_9']])
preds_all_argmax = np.argmax(preds_all, axis = 1)
preds_all_max = np.max(preds_all, axis = 1)
class_all = np.array(cnn_pred[['target_0', 'target_1', 'target_2', 'target_3', 'target_4', 'target_5', 'target_6', 'target_7', 'target_8', 'target_9']])
class_all_argmax = np.argmax(class_all, axis = 1)
for label in range(kcls):
    targets_y1 = np.where(preds_all_argmax==label)[0]
    preds_class = preds_all_max[targets_y1]
    
    preds_class_all = np.concatenate((preds_class_all, preds_class), axis=0)
    
    preds_realclass = class_all_argmax[targets_y1]
    targets_all = np.concatenate((targets_all, np.ones(len(preds_class)) * label), axis=0)
    CS_DoC.append(np.mean(preds_class)-np.sum(preds_realclass == label) / len(targets_y1))

In [13]:
print(CS_DoC)

[0.40958592283590023, 0.2789425803337875, 0.22524099353658522, 0.32926383236198475, 0.11855897708978325, 0.10672835044499385, 0.008215537769131087, -0.041816004999999934, -0.060100967121211935, -0.0783179302022472]


### Test performance

In [14]:
# bring a test condition here.
acc_results = []
AC_results = []
DoC_results = []
CSDoC_results = []
cifar_resultsdir = './data/cifar10results/'
corruptions = ['motion_blur']

for cname in tqdm(corruptions):
    csvfilename = cifar_resultsdir + 'predictions_val_' + cname + '.csv'
    cnn_pred_all = pd.read_csv(csvfilename)
    for severity in range(5):
        cnn_pred = cnn_pred_all.iloc[severity * 10000:(severity + 1) * 10000, :]
        
        kcls = 10
        targets_all = np.array(cnn_pred[['target_0', 'target_1', 'target_2', 'target_3', 'target_4', 'target_5', 'target_6', 'target_7', 'target_8', 'target_9']])
        logit_all = np.array(cnn_pred[['logit_0', 'logit_1', 'logit_2', 'logit_3', 'logit_4', 'logit_5', 'logit_6', 'logit_7', 'logit_8', 'logit_9']])
        preds_all_argmax = np.argmax(logit_all, axis = 1)
        # acc
        target_class = np.argmax(targets_all, axis = 1)
        pred_class = np.argmax(logit_all, axis = 1)
        acc_t = np.sum(pred_class == target_class) / len(target_class)
        prob = softmax(logit_all.transpose(), T = 1).transpose()
        probmax = np.max(prob, axis = 1)
        acc_results.append(acc_t)
        AC_results.append(np.mean(probmax))
        DoC_results.append(np.mean(probmax)-DoC)
        targets_all = []
        preds_class_all = []
        for label in range(kcls):
            preds_all = softmax(logit_all.transpose(), T = 1).transpose()
            preds_all_max = np.max(preds_all, axis = 1)
            targets_y1 = np.where(preds_all_argmax==label)[0]
            preds_class = preds_all_max[targets_y1]

            preds_class_all = np.concatenate((preds_class_all, preds_class - CS_DoC[label]), axis=0)
        CSDoC_results.append(np.mean(preds_class_all))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.17it/s]


In [15]:
print('AC_results:')
print(np.mean(np.abs(np.array(acc_results)-np.array(AC_results))))
print('DoC_results:')
print(np.mean(np.abs(np.array(acc_results)-np.array(DoC_results))))
print('CSDoC_results:')
print(np.mean(np.abs(np.array(acc_results)-np.array(CSDoC_results))))

AC_results:
0.40090712038075144
DoC_results:
0.19562066519006555
CSDoC_results:
0.12715689026860238


## Class-Specific Average Thresholded Confidence (CS ATC)

### Average Thresholded Confidence

In [16]:
# prepare:
# -> preacts. N x C
# -> labels. N
kcls = 10
cifar_resultsdir = './data/cifar10results/'
cnn_pred = pd.read_csv(cifar_resultsdir + 'predictions_val.csv')
targets_all = np.array(cnn_pred[['target_0', 'target_1', 'target_2', 'target_3', 'target_4', 'target_5', 'target_6', 'target_7', 'target_8', 'target_9']])
logit_all = np.array(cnn_pred[['logit_0', 'logit_1', 'logit_2', 'logit_3', 'logit_4', 'logit_5', 'logit_6', 'logit_7', 'logit_8', 'logit_9']])
preds_all_argmax = np.argmax(logit_all, axis = 1)
preacts = logit_all
labels = np.argmax(targets_all, axis = 1)
def eval_func(x):
    
    prob = softmax(logit_all.transpose(), T = 1).transpose()
    probmax = np.max(prob, axis = 1)
    acc_appr = np.sum(probmax > x) / len(labels)
    
    preds = np.argmax(preacts, axis = 1)
    acc = np.sum(labels == preds) / len(labels)
    
    MC = np.abs(acc_appr-acc)

    return MC

In [17]:
optimization_result = scipy.optimize.minimize(
                          fun=eval_func,
                          x0=np.array([1.0]),
                          method='Nelder-Mead',
                          tol=1e-07)

In [18]:
LearedThreshold = optimization_result.x[0]
print(LearedThreshold)

0.9285644531249999


### Class-Specific Average Thresholded Confidence

In [19]:
LearnedThresholdCS = []
for kcls in range(10):
    label = kcls
    # -> preacts. N x C
    # -> labels. N
    def eval_func(x):

        targets_y1 = np.where(preds_all_argmax==label)[0]
        target_class = np.argmax(targets_all, axis = 1)[targets_y1]
        pred_class = np.argmax(logit_all, axis = 1)[targets_y1]
        acc = np.sum(pred_class == target_class) / len(target_class)
        prob = softmax(logit_all.transpose(), T = 1).transpose()[targets_y1]
        probmax = np.max(prob, axis = 1)

        acc_appr = np.sum(probmax > x) / len(target_class)

        MC = np.abs(acc_appr-acc)

        return MC
    optimization_result = scipy.optimize.minimize(
                          fun=eval_func,
                          x0=np.array([1.0]),
                          method='Nelder-Mead',
                          tol=1e-07)
    LearnedThresholdCS.append(optimization_result.x[0])

In [20]:
print(LearnedThresholdCS)

[0.9974121093750001, 0.9984619140625002, 0.9273437499999999, 0.9602539062499998, 0.840234375, 0.7640624999999998, 0.6718749999999997, 0.5499999999999996, 0.5468749999999996, 0.39999999999999947]


### Test performance

In [21]:
# bring a test condition here.
acc_results = []
AC_results = []
ATC_results = []
CSATC_results = []
cifar_resultsdir = './data/cifar10results/'
corruptions = ['motion_blur']

for cname in tqdm(corruptions):
    csvfilename = cifar_resultsdir + 'predictions_val_' + cname + '.csv'
    cnn_pred_all = pd.read_csv(csvfilename)
    for severity in range(5):
        cnn_pred = cnn_pred_all.iloc[severity * 10000:(severity + 1) * 10000, :]
        
        kcls = 10
        targets_all = np.array(cnn_pred[['target_0', 'target_1', 'target_2', 'target_3', 'target_4', 'target_5', 'target_6', 'target_7', 'target_8', 'target_9']])
        logit_all = np.array(cnn_pred[['logit_0', 'logit_1', 'logit_2', 'logit_3', 'logit_4', 'logit_5', 'logit_6', 'logit_7', 'logit_8', 'logit_9']])
        preds_all_argmax = np.argmax(logit_all, axis = 1)
        # acc
        target_class = np.argmax(targets_all, axis = 1)
        pred_class = np.argmax(logit_all, axis = 1)
        acc = np.sum(pred_class == target_class) / len(target_class)
        prob = softmax(logit_all.transpose(), T = 1).transpose()
        probmax = np.max(prob, axis = 1)
        acc_results.append(acc)
        AC_results.append(np.mean(probmax))
        ATC_results.append(np.sum(probmax > LearedThreshold) / len(target_class))
        hit_cnt = 0
        for label in range(kcls):
            preds_all = softmax(logit_all.transpose(), T = 1).transpose()
            preds_all_max = np.max(preds_all, axis = 1)
            targets_y1 = np.where(preds_all_argmax==label)[0]
            preds_class = preds_all_max[targets_y1]

        #     print(np.sum(preds_class > learnTs[label]) / len(targets_y1))
            hit_cnt += np.sum(preds_class > LearnedThresholdCS[label])
        CSATC_results.append(hit_cnt / len(target_class))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.52it/s]


In [22]:
print('AC_results:')
print(np.mean(np.abs(np.array(acc_results)-np.array(AC_results))))
print('ATC_results:')
print(np.mean(np.abs(np.array(acc_results)-np.array(ATC_results))))
print('CSATC_results:')
print(np.mean(np.abs(np.array(acc_results)-np.array(CSATC_results))))

AC_results:
0.40090712038075144
ATC_results:
0.12510000000000004
CSATC_results:
0.04414


## Class-Specific Temperature-Scaling Average Thresholded Confidence (CS TS-ATC)

### Temperature-Scaling Average Thresholded Confidence

In [23]:
# prepare:
# -> preacts. N x C
# -> labels. N
kcls = 10
cifar_resultsdir = './data/cifar10results/'
cnn_pred = pd.read_csv(cifar_resultsdir + 'predictions_val.csv')
targets_all = np.array(cnn_pred[['target_0', 'target_1', 'target_2', 'target_3', 'target_4', 'target_5', 'target_6', 'target_7', 'target_8', 'target_9']])
logit_all = np.array(cnn_pred[['logit_0', 'logit_1', 'logit_2', 'logit_3', 'logit_4', 'logit_5', 'logit_6', 'logit_7', 'logit_8', 'logit_9']])
preds_all_argmax = np.argmax(logit_all, axis = 1)
preacts = logit_all
labels = np.argmax(targets_all, axis = 1)
def eval_func(x):
    
    prob = softmax(logit_all.transpose(), T = LearedTemp).transpose()
    probmax = np.max(prob, axis = 1)
    acc_appr = np.sum(probmax > x) / len(labels)
    
    preds = np.argmax(preacts, axis = 1)
    acc = np.sum(labels == preds) / len(labels)
    
    MC = np.abs(acc_appr-acc)

    return MC

In [24]:
optimization_result = scipy.optimize.minimize(
                          fun=eval_func,
                          x0=np.array([1.0]),
                          method='Nelder-Mead',
                          tol=1e-07)

In [25]:
LearedThreshold = optimization_result.x[0]
print(LearedThreshold)

0.5443847656249996


### Class-Specific Temperature-Scaling Average Thresholded Confidence

In [26]:
LearnedThresholdCS = []
for kcls in range(10):
    label = kcls
    # -> preacts. N x C
    # -> labels. N
    def eval_func(x):

        targets_y1 = np.where(preds_all_argmax==label)[0]
        target_class = np.argmax(targets_all, axis = 1)[targets_y1]
        pred_class = np.argmax(logit_all, axis = 1)[targets_y1]
        acc = np.sum(pred_class == target_class) / len(target_class)
        prob = softmax(logit_all.transpose(), T = LearnedTempsCS[label]).transpose()[targets_y1]
        probmax = np.max(prob, axis = 1)

        acc_appr = np.sum(probmax > x) / len(target_class)

        MC = np.abs(acc_appr-acc)

        return MC
    optimization_result = scipy.optimize.minimize(
                          fun=eval_func,
                          x0=np.array([1.0]),
                          method='Nelder-Mead',
                          tol=1e-07)
    LearnedThresholdCS.append(optimization_result.x[0])

In [27]:
print(LearnedThresholdCS)

[0.48828124999999956, 0.5343749999999996, 0.5203124999999996, 0.48749999999999954, 0.5683593749999997, 0.5812499999999996, 0.6437499999999997, 0.6624999999999996, 0.6437499999999997, 0.6499999999999997]


### Test performance

In [28]:
# bring a test condition here.
acc_results = []
AC_results = []
ATC_results = []
CSATC_results = []
cifar_resultsdir = './data/cifar10results/'
corruptions = ['motion_blur']

for cname in tqdm(corruptions):
    csvfilename = cifar_resultsdir + 'predictions_val_' + cname + '.csv'
    cnn_pred_all = pd.read_csv(csvfilename)
    for severity in range(5):
        cnn_pred = cnn_pred_all.iloc[severity * 10000:(severity + 1) * 10000, :]
        
        kcls = 10
        targets_all = np.array(cnn_pred[['target_0', 'target_1', 'target_2', 'target_3', 'target_4', 'target_5', 'target_6', 'target_7', 'target_8', 'target_9']])
        logit_all = np.array(cnn_pred[['logit_0', 'logit_1', 'logit_2', 'logit_3', 'logit_4', 'logit_5', 'logit_6', 'logit_7', 'logit_8', 'logit_9']])
        preds_all_argmax = np.argmax(logit_all, axis = 1)
        # acc
        target_class = np.argmax(targets_all, axis = 1)
        pred_class = np.argmax(logit_all, axis = 1)
        acc = np.sum(pred_class == target_class) / len(target_class)
        prob = softmax(logit_all.transpose(), T = LearedTemp).transpose()
        probmax = np.max(prob, axis = 1)
        acc_results.append(acc)
        AC_results.append(np.mean(probmax))
        ATC_results.append(np.sum(probmax > LearedThreshold) / len(target_class))
        hit_cnt = 0
        for label in range(kcls):
            preds_all = softmax(logit_all.transpose(), T = LearnedTempsCS[label]).transpose()
            preds_all_max = np.max(preds_all, axis = 1)
            targets_y1 = np.where(preds_all_argmax==label)[0]
            preds_class = preds_all_max[targets_y1]

        #     print(np.sum(preds_class > learnTs[label]) / len(targets_y1))
            hit_cnt += np.sum(preds_class > LearnedThresholdCS[label])
        CSATC_results.append(hit_cnt / len(target_class))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.33it/s]


In [29]:
print('AC_results:')
print(np.mean(np.abs(np.array(acc_results)-np.array(AC_results))))
print('TS_ATC_results:')
print(np.mean(np.abs(np.array(acc_results)-np.array(ATC_results))))
print('CSTS_ATC_results:')
print(np.mean(np.abs(np.array(acc_results)-np.array(CSATC_results))))

AC_results:
0.13801236936781258
TS_ATC_results:
0.09695999999999999
CSTS_ATC_results:
0.022860000000000002
