In [1]:
import os, os.path
import numpy as np
import math
import torch
import torchvision
import torchvision.models as models
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import dirichlet
from tqdm import tqdm
from time import time
from torchvision import datasets, transforms
from torch import nn, optim
from torch.nn.utils import clip_grad_norm_
from torch.optim.lr_scheduler import LambdaLR
from sklearn import metrics
from torch.distributions import Dirichlet
from torch.distributions.kl import _kl_dirichlet_dirichlet
from scipy.special import digamma
from scipy.stats import entropy

In [2]:
torch.cuda.is_available()

True

In [3]:
normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])

In [4]:
trainset = datasets.CIFAR10('cifar10', download=True, train=True, transform=transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4, padding_mode='reflect'),
            transforms.ToTensor(),
            normalize,
        ]))

trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4)

testset = datasets.CIFAR10('cifar10', download=True, train=False, transform=transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4, padding_mode='reflect'),
            transforms.ToTensor(),
            normalize,
        ]))

testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=4)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
oodset1 = datasets.CIFAR100('cifar100', download=True, train=False, transform=transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4, padding_mode='reflect'),
            transforms.ToTensor(),
            normalize,
        ]))
oodloader1 = torch.utils.data.DataLoader(oodset1, batch_size=128, shuffle=False, num_workers=4)

Files already downloaded and verified


In [6]:
oodset2 = datasets.ImageFolder(root='images/', transform=transforms.Compose([
            transforms.Resize(32),
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4, padding_mode='reflect'),
            transforms.ToTensor(),
            normalize,
        ]))

oodloader2 = torch.utils.data.DataLoader(oodset2, batch_size=128, shuffle=False, num_workers=4)

In [7]:
class VGGNet(nn.Module):
    def __init__(self, num_classes):
        super(VGGNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1, padding_mode='reflect'),    # Conv1
            nn.ReLU(True),                     # ReLU1
            nn.BatchNorm2d(64),                 # BatchNorm1
            nn.Dropout(0.3),                     # Dropout1
            nn.Conv2d(64, 64, 3, padding=1, padding_mode='reflect'),   # Conv2
            nn.ReLU(True),                     # ReLU2
            nn.BatchNorm2d(64),                 # BatchNorm2
            nn.MaxPool2d(2),                  # MaxPool2
            nn.Conv2d(64, 128, 3, padding=1, padding_mode='reflect'),  # Conv3
            nn.ReLU(True),                     # ReLU3
            nn.BatchNorm2d(128),                # BatchNorm3
            nn.Dropout(0.4),                     # Dropout3
            nn.Conv2d(128, 128, 3, padding=1, padding_mode='reflect'), # Conv4
            nn.ReLU(True),                     # ReLU4
            nn.BatchNorm2d(128),                # BatchNorm4
            nn.MaxPool2d(2),                  # MaxPool4
            nn.Conv2d(128, 256, 3, padding=1, padding_mode='reflect'), # Conv5
            nn.ReLU(True),                     # ReLU5
            nn.BatchNorm2d(256),                # BatchNorm5
            nn.Dropout(0.4),                     # Dropout5
            nn.Conv2d(256, 256, 3, padding=1, padding_mode='reflect'), # Conv6
            nn.ReLU(True),                     # ReLU6
            nn.BatchNorm2d(256),                # BatchNorm6
            nn.Dropout(0.4),                     # Dropout6
            nn.Conv2d(256, 256, 3, padding=1, padding_mode='reflect'), # Conv7
            nn.ReLU(True),                     # ReLU7
            nn.BatchNorm2d(256),                # BatchNorm7
            nn.MaxPool2d(2),                  # MaxPool7
            nn.Conv2d(256, 512, 3, padding=1, padding_mode='reflect'), # Conv8
            nn.ReLU(True),                     # ReLU8
            nn.BatchNorm2d(512),                # BatchNorm8
            nn.Dropout(0.4),                     # Dropout8
            nn.Conv2d(512, 512, 3, padding=1, padding_mode='reflect'), # Conv9
            nn.ReLU(True),                     # ReLU9
            nn.BatchNorm2d(512),                # BatchNorm9
            nn.Dropout(0.4),                     # Dropout9
            nn.Conv2d(512, 512, 3, padding=1, padding_mode='reflect'), # Conv10
            nn.ReLU(True),                     # ReLU10
            nn.BatchNorm2d(512),                # BatchNorm10
            nn.MaxPool2d(2),                  # MaxPool10
            nn.Conv2d(512, 512, 3, padding=1, padding_mode='reflect'), # Conv11
            nn.ReLU(True),                     # ReLU11
            nn.BatchNorm2d(512),                # BatchNorm11
            nn.Dropout(0.4),                     # Dropout11
            nn.Conv2d(512, 512, 3, padding=1, padding_mode='reflect'), # Conv12
            nn.ReLU(True),                     # ReLU12
            nn.BatchNorm2d(512),                # BatchNorm12
            nn.Dropout(0.4),                     # Dropout12
            nn.Conv2d(512, 512, 3, padding=1, padding_mode='reflect'), # Conv13
            nn.ReLU(True),                     # ReLU13
            nn.BatchNorm2d(512),                # BatchNorm13
            nn.MaxPool2d(2),                  # MaxPool13
            nn.Dropout(p=0.5),                                         # EndDropout
        )
        self.classifier = nn.Sequential(
            nn.Linear(512, 512),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(512, num_classes),
        )
            
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [8]:
beta = 100

In [9]:
def kl_div_dirichlet(a_concentrations, b_concentrations):
    a_dirichlet = Dirichlet(a_concentrations)
    b_dirichlet = Dirichlet(b_concentrations)
    return _kl_dirichlet_dirichlet(p=a_dirichlet, q=b_dirichlet)
def RKL_loss(outputs, labels):
    alphas = torch.exp(outputs)
    alpha_0 = torch.sum(alphas, dim=1)
    y_onehot_target = torch.eye(10)[labels].to(device)
    alpha_y = (alphas * y_onehot_target).sum(dim=1)
    rce_loss = torch.digamma(alpha_0) - torch.digamma(alpha_y)
    uniform_concentrations = torch.ones_like(y_onehot_target).to(device)
    kl_uniform_reg = kl_div_dirichlet(alphas, uniform_concentrations)
    kl_uniform_reg = torch.clamp(kl_uniform_reg, 0, 1000)  
    loss = rce_loss + 1/beta * kl_uniform_reg
    return torch.mean(loss)

In [10]:
device = 'cuda'

In [11]:
m_number = len([name for name in os.listdir('Models'+str(beta))])

In [12]:
models = [VGGNet(10) for _i in range(m_number)]
models = [torch.nn.DataParallel(model.to(device)) for model in models]
cudnn.benchmark = True

In [13]:
for i in range(m_number):
    models[i].load_state_dict(torch.load('Models'+str(beta)+'/'+str(i)))
    models[i].eval()

In [14]:
outputs_test = [[] for _i in range(m_number)]
outputs_ood1 = [[] for _i in range(m_number)]
outputs_ood2 = [[] for _i in range(m_number)]
labs = np.array([])

In [15]:
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        labs = np.concatenate((labs, labels.cpu().numpy()))
        for i in range(m_number):
            output = models[i](images)
            outputs_test[i].append(output.cpu().numpy())

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [16]:
with torch.no_grad():
    for data_ood1 in oodloader1:
        images_ood1, labels_ood1 = data_ood1
        for i in range(m_number):
            output_ood1 = models[i](images_ood1)
            outputs_ood1[i].append(output_ood1.cpu().numpy())

In [17]:
with torch.no_grad():
    for data_ood2 in oodloader2:
        images_ood2, labels_ood2 = data_ood2
        for i in range(m_number):
            output_ood2 = models[i](images_ood2)
            outputs_ood2[i].append(output_ood2.cpu().numpy())

In [18]:
for i in range(m_number):
    outputs_test[i] = np.exp(np.concatenate(outputs_test[i]))

for i in range(m_number):
    outputs_ood1[i] = np.exp(np.concatenate(outputs_ood1[i]))

for i in range(m_number):
    outputs_ood2[i] = np.exp(np.concatenate(outputs_ood2[i]))

In [19]:
def ensemble_output(outputs):
    mean_output = np.zeros(outputs[0].shape)
    for i in tqdm(range(outputs[0].shape[0])):
        samples = np.concatenate([np.random.dirichlet(output[i], 100) for output in outputs])
        mean_output[i] = dirichlet.mle(samples)
    return(mean_output)

In [20]:
ensemble_outputs_test = ensemble_output(outputs_test)
ensemble_outputs_ood1 = ensemble_output(outputs_ood1)
ensemble_outputs_ood2 = ensemble_output(outputs_ood2)

100%|██████████| 10000/10000 [02:46<00:00, 60.10it/s]
100%|██████████| 10000/10000 [05:47<00:00, 28.76it/s]
100%|██████████| 10000/10000 [05:34<00:00, 29.89it/s]


In [21]:
eot_classic = np.mean(outputs_test, axis = 0)
eo1_classic = np.mean(outputs_ood1, axis = 0)
eo2_classic = np.mean(outputs_ood2, axis = 0)

In [22]:
def model_accuracy(outputs, labels):
    acc = np.round(100*np.mean(np.argmax(outputs, axis = 1) == labels),1)
    return(acc)

print('For the classic ensemble, the accuracy is: ', model_accuracy(eot_classic, labs),'%.', sep ='')
print('For my ensemble, the accuracy is: ', model_accuracy(ensemble_outputs_test, labs),'%.', sep ='')

For the classic ensemble, the accuracy is: 95.0%.
For my ensemble, the accuracy is: 95.0%.


In [23]:
def model_accuracy_e(outputs, labels):
    accs = []
    for i in range(len(outputs)):
        acc = model_accuracy(outputs[i], labels)
        accs.append(acc)
        print('For the model ',i,', the accuracy is: ', acc, '%.', sep = '')
    print('The average accuracy is: ', np.round(np.mean(accs),1),'%.', sep ='')
    return(accs)

model_accuracy_e(outputs_test, labs)

For the model 0, the accuracy is: 93.7%.
For the model 1, the accuracy is: 93.7%.
For the model 2, the accuracy is: 93.6%.
For the model 3, the accuracy is: 93.3%.
For the model 4, the accuracy is: 93.7%.
For the model 5, the accuracy is: 93.7%.
For the model 6, the accuracy is: 93.5%.
For the model 7, the accuracy is: 93.5%.
For the model 8, the accuracy is: 93.4%.
For the model 9, the accuracy is: 93.5%.
The average accuracy is: 93.6%.


[93.7, 93.7, 93.6, 93.3, 93.7, 93.7, 93.5, 93.5, 93.4, 93.5]

In [24]:
big_outputs_ood1 = [np.concatenate((outputs_test[i], outputs_ood1[i]), axis = 0) for i in range(m_number)]
big_outputs_ood2 = [np.concatenate((outputs_test[i], outputs_ood2[i]), axis = 0) for i in range(m_number)]
big_eo1_classic = np.mean(big_outputs_ood1, axis = 0)
big_eo2_classic = np.mean(big_outputs_ood2, axis = 0)
big_eo1 = np.concatenate((ensemble_outputs_test, ensemble_outputs_ood1), axis = 0)
big_eo2 = np.concatenate((ensemble_outputs_test, ensemble_outputs_ood2), axis = 0)

In [25]:
labs_ood = np.concatenate((np.zeros(10000), np.ones(10000)))

In [26]:
def MCP(outputs, labels, ood = False):
    probas = (outputs/np.sum(outputs, axis = 1).reshape(-1, 1))
    miss = (1-(np.argmax(probas, axis = 1) == labels))
    if ood:
        miss = labels
    fpr, tpr, thresholds = metrics.roc_curve(miss, -np.max(probas, axis = 1), pos_label=1)
    return(np.round(100*metrics.auc(fpr, tpr), 1))

print('For the classic ensemble (mis.), the MCP is: ', MCP(eot_classic, labs),'%.', sep ='')
print('For my ensemble (mis.), the MCP is: ', MCP(ensemble_outputs_test, labs),'%.', sep ='')

print('For the classic ensemble (OOD1.), the MCP is: ', MCP(big_eo1_classic, labs_ood, True),'%.', sep ='')
print('For my ensemble (OOD1.), the MCP is: ', MCP(big_eo1, labs_ood, True),'%.', sep ='')

print('For the classic ensemble (OOD2.), the MCP is: ', MCP(big_eo2_classic, labs_ood, True),'%.', sep ='')
print('For my ensemble (OOD2.), the MCP is: ', MCP(big_eo2, labs_ood, True),'%.', sep ='')

For the classic ensemble (mis.), the MCP is: 92.4%.
For my ensemble (mis.), the MCP is: 92.4%.
For the classic ensemble (OOD1.), the MCP is: 88.1%.
For my ensemble (OOD1.), the MCP is: 88.3%.
For the classic ensemble (OOD2.), the MCP is: 87.6%.
For my ensemble (OOD2.), the MCP is: 87.9%.


In [27]:
def MCP_Ensemble(outputs, labels, ood = False):
    MCPs = []
    for i in range(len(outputs)):
        MCP_model = MCP(outputs[i], labels, ood)
        MCPs.append(MCP_model)
    print('Average:',  np.round(np.mean(MCPs),1), '\n')
    return(MCPs)

MCP_Ensemble(outputs_test, labs)
MCP_Ensemble(big_outputs_ood1 , labs_ood, True)
MCP_Ensemble(big_outputs_ood2, labs_ood, True)

Average: 87.3 

Average: 81.5 

Average: 81.4 



[81.4, 81.5, 81.1, 81.3, 82.1, 81.4, 80.6, 81.3, 81.6, 82.1]

In [28]:
def Entropy(outputs, labels, ood = False):
    probas = (outputs/np.sum(outputs, axis = 1).reshape(-1, 1))
    entropies = entropy(probas, axis = 1)
    miss = (1-(np.argmax(probas, axis = 1) == labels))
    if ood:
        miss = labels
    fpr, tpr, thresholds = metrics.roc_curve(miss, entropies, pos_label=1)
    return(np.round(100*metrics.auc(fpr, tpr), 1))

print('For the classic ensemble (mis.), the Entropy is: ', Entropy(eot_classic, labs),'%.', sep ='')
print('For my ensemble (mis.), the Entropy is: ', Entropy(ensemble_outputs_test, labs),'%.', sep ='')

print('For the classic ensemble (OOD1.), the Entropy is: ', Entropy(big_eo1_classic, labs_ood, True),'%.', sep ='')
print('For my ensemble (OOD1.), the Entropy is: ', Entropy(big_eo1, labs_ood, True),'%.', sep ='')

print('For the classic ensemble (OOD2.), the Entropy is: ', Entropy(big_eo2_classic, labs_ood, True),'%.', sep ='')
print('For my ensemble (OOD2.), the Entropy is: ', Entropy(big_eo2, labs_ood, True),'%.', sep ='')

For the classic ensemble (mis.), the Entropy is: 92.2%.
For my ensemble (mis.), the Entropy is: 92.2%.
For the classic ensemble (OOD1.), the Entropy is: 88.7%.
For my ensemble (OOD1.), the Entropy is: 88.4%.
For the classic ensemble (OOD2.), the Entropy is: 88.3%.
For my ensemble (OOD2.), the Entropy is: 88.0%.


In [29]:
def Entropy_Ensemble(outputs, labels, ood = False):
    Entropys = []
    for i in range(len(outputs)):
        Entropy_model = Entropy(outputs[i], labels, ood)
        Entropys.append(Entropy_model)
    print('Average:',  np.round(np.mean(Entropys),1), '\n')
    return(Entropys)

Entropy_Ensemble(outputs_test, labs)
Entropy_Ensemble(big_outputs_ood1 , labs_ood, True)
Entropy_Ensemble(big_outputs_ood2, labs_ood, True)

Average: 86.9 

Average: 81.5 

Average: 81.6 



[81.5, 81.6, 81.2, 81.4, 82.3, 81.5, 80.7, 81.4, 81.7, 82.2]

In [30]:
def Alpha(outputs, labels, ood = False):
    probas = (outputs/np.sum(outputs, axis = 1).reshape(-1, 1))
    miss = (1-(np.argmax(probas, axis = 1) == labels))
    Alphas = np.sum(outputs, axis = 1)
    if ood:
        miss = labels
    fpr, tpr, thresholds = metrics.roc_curve(miss, -Alphas, pos_label=1)
    return(np.round(100*metrics.auc(fpr, tpr), 1))

print('For the classic ensemble (mis.), the Alpha is: ', Alpha(eot_classic, labs),'%.', sep ='')
print('For my ensemble (mis.), the Alpha is: ', Alpha(ensemble_outputs_test, labs),'%.', sep ='')

print('For the classic ensemble (OOD1.), the Alpha is: ', Alpha(big_eo1_classic, labs_ood, True),'%.', sep ='')
print('For my ensemble (OOD1.), the Alpha is: ', Alpha(big_eo1, labs_ood, True),'%.', sep ='')

print('For the classic ensemble (OOD2.), the Alpha is: ', Alpha(big_eo2_classic, labs_ood, True),'%.', sep ='')
print('For my ensemble (OOD2.), the Alpha is: ', Alpha(big_eo2, labs_ood, True),'%.', sep ='')

For the classic ensemble (mis.), the Alpha is: 79.2%.
For my ensemble (mis.), the Alpha is: 91.4%.
For the classic ensemble (OOD1.), the Alpha is: 81.7%.
For my ensemble (OOD1.), the Alpha is: 87.3%.
For the classic ensemble (OOD2.), the Alpha is: 83.3%.
For my ensemble (OOD2.), the Alpha is: 86.8%.


In [31]:
def Alpha_Ensemble(outputs, labels, ood = False):
    Alphas = []
    for i in range(len(outputs)):
        Alpha_model = Alpha(outputs[i], labels, ood)
        Alphas.append(Alpha_model)
    print('Average:',  np.round(np.mean(Alphas),1), '\n')
    return(Alphas)

Alpha_Ensemble(outputs_test, labs)
Alpha_Ensemble(big_outputs_ood1 , labs_ood, True)
Alpha_Ensemble(big_outputs_ood2, labs_ood, True)

Average: 72.3 

Average: 70.8 

Average: 72.6 



[72.8, 72.2, 72.2, 71.4, 75.2, 70.5, 72.9, 71.9, 74.0, 73.3]

In [32]:
def MI(outputs, labels, ood = False):
    probas = (outputs/np.sum(outputs, axis = 1).reshape(-1, 1))
    miss = (1-(np.argmax(probas, axis = 1) == labels))
    Alphas = np.sum(outputs, axis = 1)
    MutInfo = np.sum(outputs/Alphas.reshape(-1,1)*(np.log(outputs/Alphas.reshape(-1,1))\
                                                         -digamma(outputs+1)\
                                                         +digamma(Alphas.reshape(-1,1))+1), axis = 1)
    if ood:
        miss = labels
    fpr, tpr, thresholds = metrics.roc_curve(miss, -MutInfo, pos_label=1)
    return(np.round(100*metrics.auc(fpr, tpr), 1))

print('For the classic ensemble (mis.), the MutInfo is: ', MI(eot_classic, labs),'%.', sep ='')
print('For my ensemble (mis.), the MutInfo is: ', MI(ensemble_outputs_test, labs),'%.', sep ='')

print('For the classic ensemble (OOD1.), the MutInfo is: ', MI(big_eo1_classic, labs_ood, True),'%.', sep ='')
print('For my ensemble (OOD1.), the MutInfo is: ', MI(big_eo1, labs_ood, True),'%.', sep ='')

print('For the classic ensemble (OOD2.), the MutInfo is: ', MI(big_eo2_classic, labs_ood, True),'%.', sep ='')
print('For my ensemble (OOD2.), the MutInfo is: ', MI(big_eo2, labs_ood, True),'%.', sep ='')

For the classic ensemble (mis.), the MutInfo is: 81.8%.
For my ensemble (mis.), the MutInfo is: 91.5%.
For the classic ensemble (OOD1.), the MutInfo is: 83.4%.
For my ensemble (OOD1.), the MutInfo is: 87.4%.
For the classic ensemble (OOD2.), the MutInfo is: 84.6%.
For my ensemble (OOD2.), the MutInfo is: 87.0%.


In [33]:
def MI_Ensemble(outputs, labels, ood = False):
    MIs = []
    for i in range(len(outputs)):
        MI_model = MI(outputs[i], labels, ood)
        MIs.append(MI_model)
    print('Average:',  np.round(np.mean(MIs),1), '\n')
    return(MIs)

MI_Ensemble(outputs_test, labs)
MI_Ensemble(big_outputs_ood1 , labs_ood, True)
MI_Ensemble(big_outputs_ood2, labs_ood, True)

Average: 73.8 

Average: 71.9 

Average: 73.6 



[73.7, 73.3, 73.1, 72.4, 76.1, 71.7, 73.8, 72.9, 74.9, 74.3]

In [34]:
def KLoS(outputs, labels, ood = False):
    probas = (outputs/np.sum(outputs, axis = 1).reshape(-1, 1))
    miss = (1-(np.argmax(probas, axis = 1) == labels))
    predicted_labs = np.argmax(probas, axis = 1)
    pred_dists = beta*np.eye(10)[predicted_labs.astype('int')]+1
    KLoSs = kl_div_dirichlet(torch.tensor(outputs), torch.tensor(pred_dists))
    if ood:
        miss = labels
    fpr, tpr, thresholds = metrics.roc_curve(miss, KLoSs, pos_label=1)
    return(np.round(100*metrics.auc(fpr, tpr), 1))

print('For the classic ensemble (mis.), the KLoS is: ', KLoS(eot_classic, labs),'%.', sep ='')
print('For my ensemble (mis.), the KLoS is: ', KLoS(ensemble_outputs_test, labs),'%.', sep ='')

print('For the classic ensemble (OOD1.), the KLoS is: ', KLoS(big_eo1_classic, labs_ood, True),'%.', sep ='')
print('For my ensemble (OOD1.), the KLoS is: ', KLoS(big_eo1, labs_ood, True),'%.', sep ='')

print('For the classic ensemble (OOD2.), the KLoS is: ', KLoS(big_eo2_classic, labs_ood, True),'%.', sep ='')
print('For my ensemble (OOD2.), the KLoS is: ', KLoS(big_eo2, labs_ood, True),'%.', sep ='')

For the classic ensemble (mis.), the KLoS is: 93.8%.
For my ensemble (mis.), the KLoS is: 93.6%.
For the classic ensemble (OOD1.), the KLoS is: 89.3%.
For my ensemble (OOD1.), the KLoS is: 89.4%.
For the classic ensemble (OOD2.), the KLoS is: 88.7%.
For my ensemble (OOD2.), the KLoS is: 88.9%.


In [35]:
def KLoS_Ensemble(outputs, labels, ood = False):
    KLoSs = []
    for i in range(len(outputs)):
        KLoS_model = KLoS(outputs[i], labels, ood)
        KLoSs.append(KLoS_model)
    print('Average:',  np.round(np.mean(KLoSs),1), '\n')
    return(KLoSs)

KLoS_Ensemble(outputs_test, labs)
KLoS_Ensemble(big_outputs_ood1 , labs_ood, True)
KLoS_Ensemble(big_outputs_ood2, labs_ood, True)

Average: 92.2 

Average: 86.4 

Average: 86.0 



[86.2, 85.4, 86.0, 85.6, 86.3, 85.9, 85.5, 86.2, 86.4, 86.1]

In [36]:
def MCP_double(outputs, oods, labels):
    probas = (outputs/np.sum(outputs, axis = 1).reshape(-1, 1))
    miss = (1-(np.argmax(probas, axis = 1) == labels))
    miss_index = [index for index in range(outputs.shape[0]) if miss[index] == 1]
    miss_need = 10000-len(miss_index)
    miss_choice = miss_need//len(miss_index)
    if miss_choice > 0:
        new_outputs = outputs.copy()
        for i in miss_index:
            new_outputs = np.concatenate((outputs, np.repeat(outputs[miss_index], miss_choice, axis = 0)), axis = 0)
        new_outputs = np.concatenate((new_outputs, oods), axis = 0)
        new_miss = np.concatenate((miss, np.ones(oods.shape[0]+len(miss_index)*miss_choice)))
        new_probas = (new_outputs/np.sum(new_outputs, axis = 1).reshape(-1, 1))
        fpr, tpr, thresholds = metrics.roc_curve(new_miss, -np.max(new_probas, axis = 1), pos_label=1)
        return(np.round(100*metrics.auc(fpr, tpr), 1))
    else:
        print('Unexpected distribution...')
        return()

In [37]:
print(MCP_double(eot_classic, eo1_classic, labs))
print(MCP_double(eot_classic, eo2_classic, labs))
print(MCP_double(ensemble_outputs_test, ensemble_outputs_ood1, labs))
print(MCP_double(ensemble_outputs_test, ensemble_outputs_ood2, labs))

91.3
91.0
91.4
91.1


In [38]:
def MCP_double_ensemble(outputs, oods, labels):
    MCPs = []
    for i in range(len(outputs)):
        MCP_model = MCP_double(outputs[i], oods[i], labels)
        MCPs.append(MCP_model)
    print('Average:',  np.round(np.mean(MCPs),1), '\n')
    return(MCPs)

In [39]:
print(MCP_double_ensemble(outputs_test, outputs_ood1, labs))
print(MCP_double_ensemble(outputs_test, outputs_ood2, labs))

Average: 85.6 

[85.1, 85.0, 85.6, 86.1, 85.7, 85.7, 85.9, 85.6, 85.7, 85.5]
Average: 85.5 

[85.0, 85.0, 85.6, 86.1, 85.6, 85.6, 85.6, 85.3, 85.8, 85.8]


In [40]:
def Entropy_double(outputs, oods, labels):
    probas = (outputs/np.sum(outputs, axis = 1).reshape(-1, 1))
    miss = (1-(np.argmax(probas, axis = 1) == labels))
    miss_index = [index for index in range(outputs.shape[0]) if miss[index] == 1]
    miss_need = 10000-len(miss_index)
    miss_choice = miss_need//len(miss_index)
    if miss_choice > 0:
        new_outputs = outputs.copy()
        for i in miss_index:
            new_outputs = np.concatenate((outputs, np.repeat(outputs[miss_index], miss_choice, axis = 0)), axis = 0)
        new_outputs = np.concatenate((new_outputs, oods), axis = 0)
        new_miss = np.concatenate((miss, np.ones(oods.shape[0]+len(miss_index)*miss_choice)))
        new_probas = (new_outputs/np.sum(new_outputs, axis = 1).reshape(-1, 1))
        entropies = entropy(new_probas, axis = 1)
        fpr, tpr, thresholds = metrics.roc_curve(new_miss, entropies, pos_label=1)
        return(np.round(100*metrics.auc(fpr, tpr), 1))
    else:
        print('Unexpected distribution...')
        return()

In [41]:
print(Entropy_double(eot_classic, eo1_classic, labs))
print(Entropy_double(eot_classic, eo2_classic, labs))
print(Entropy_double(ensemble_outputs_test, ensemble_outputs_ood1, labs))
print(Entropy_double(ensemble_outputs_test, ensemble_outputs_ood2, labs))

91.4
91.2
91.3
91.1


In [42]:
def Entropy_double_ensemble(outputs, oods, labels):
    MCPs = []
    for i in range(len(outputs)):
        MCP_model = Entropy_double(outputs[i], oods[i], labels)
        MCPs.append(MCP_model)
    print('Average:',  np.round(np.mean(MCPs),1), '\n')
    return(MCPs)

In [43]:
print(Entropy_double_ensemble(outputs_test, outputs_ood1, labs))
print(Entropy_double_ensemble(outputs_test, outputs_ood2, labs))

Average: 85.4 

[84.9, 84.8, 85.4, 85.9, 85.5, 85.5, 85.8, 85.4, 85.4, 85.2]
Average: 85.4 

[84.8, 84.8, 85.4, 85.9, 85.5, 85.4, 85.5, 85.1, 85.6, 85.5]


In [44]:
def Alpha_double(outputs, oods, labels):
    probas = (outputs/np.sum(outputs, axis = 1).reshape(-1, 1))
    miss = (1-(np.argmax(probas, axis = 1) == labels))
    miss_index = [index for index in range(outputs.shape[0]) if miss[index] == 1]
    miss_need = 10000-len(miss_index)
    miss_choice = miss_need//len(miss_index)
    if miss_choice > 0:
        new_outputs = outputs.copy()
        for i in miss_index:
            new_outputs = np.concatenate((outputs, np.repeat(outputs[miss_index], miss_choice, axis = 0)), axis = 0)
        new_outputs = np.concatenate((new_outputs, oods), axis = 0)
        new_miss = np.concatenate((miss, np.ones(oods.shape[0]+len(miss_index)*miss_choice)))
        new_probas = (new_outputs/np.sum(new_outputs, axis = 1).reshape(-1, 1))
        Alphas = np.sum(new_outputs, axis = 1)
        fpr, tpr, thresholds = metrics.roc_curve(new_miss, -Alphas, pos_label=1)
        return(np.round(100*metrics.auc(fpr, tpr), 1))
    else:
        print('Unexpected distribution...')
        return()

In [45]:
print(Alpha_double(eot_classic, eo1_classic, labs))
print(Alpha_double(eot_classic, eo2_classic, labs))
print(Alpha_double(ensemble_outputs_test, ensemble_outputs_ood1, labs))
print(Alpha_double(ensemble_outputs_test, ensemble_outputs_ood2, labs))

81.1
81.9
90.4
90.1


In [46]:
def Alpha_double_ensemble(outputs, oods, labels):
    MCPs = []
    for i in range(len(outputs)):
        MCP_model = Alpha_double(outputs[i], oods[i], labels)
        MCPs.append(MCP_model)
    print('Average:',  np.round(np.mean(MCPs),1), '\n')
    return(MCPs)

In [47]:
print(Alpha_double_ensemble(outputs_test, outputs_ood1, labs))
print(Alpha_double_ensemble(outputs_test, outputs_ood2, labs))

Average: 72.2 

[72.2, 71.0, 72.7, 71.8, 73.7, 69.7, 74.6, 71.8, 73.5, 71.0]
Average: 73.2 

[72.9, 72.1, 73.5, 73.0, 74.5, 70.6, 75.2, 72.3, 74.9, 72.5]


In [48]:
def MI_double(outputs, oods, labels):
    probas = (outputs/np.sum(outputs, axis = 1).reshape(-1, 1))
    miss = (1-(np.argmax(probas, axis = 1) == labels))
    miss_index = [index for index in range(outputs.shape[0]) if miss[index] == 1]
    miss_need = 10000-len(miss_index)
    miss_choice = miss_need//len(miss_index)
    if miss_choice > 0:
        new_outputs = outputs.copy()
        for i in miss_index:
            new_outputs = np.concatenate((outputs, np.repeat(outputs[miss_index], miss_choice, axis = 0)), axis = 0)
        new_outputs = np.concatenate((new_outputs, oods), axis = 0)
        new_miss = np.concatenate((miss, np.ones(oods.shape[0]+len(miss_index)*miss_choice)))
        new_probas = (new_outputs/np.sum(new_outputs, axis = 1).reshape(-1, 1))
        Alphas = np.sum(new_outputs, axis = 1)
        MutInfo = np.sum(new_outputs/Alphas.reshape(-1,1)*(np.log(new_outputs/Alphas.reshape(-1,1))\
                                                         -digamma(new_outputs+1)\
                                                         +digamma(Alphas.reshape(-1,1))+1), axis = 1)
        fpr, tpr, thresholds = metrics.roc_curve(new_miss, -MutInfo, pos_label=1)
        return(np.round(100*metrics.auc(fpr, tpr), 1))
    else:
        print('Unexpected distribution...')
        return()

In [49]:
print(MI_double(eot_classic, eo1_classic, labs))
print(MI_double(eot_classic, eo2_classic, labs))
print(MI_double(ensemble_outputs_test, ensemble_outputs_ood1, labs))
print(MI_double(ensemble_outputs_test, ensemble_outputs_ood2, labs))

83.3
83.9
90.5
90.2


In [50]:
def MI_double_ensemble(outputs, oods, labels):
    MCPs = []
    for i in range(len(outputs)):
        MCP_model = MI_double(outputs[i], oods[i], labels)
        MCPs.append(MCP_model)
    print('Average:',  np.round(np.mean(MCPs),1), '\n')
    return(MCPs)

In [51]:
print(MI_double_ensemble(outputs_test, outputs_ood1, labs))
print(MI_double_ensemble(outputs_test, outputs_ood2, labs))

Average: 73.5 

[73.5, 72.4, 74.0, 73.0, 74.9, 71.4, 75.9, 73.2, 74.8, 72.3]
Average: 74.4 

[74.2, 73.3, 74.7, 74.2, 75.7, 72.1, 76.5, 73.6, 76.0, 73.8]


In [52]:
def KLoS_double(outputs, oods, labels):
    probas = (outputs/np.sum(outputs, axis = 1).reshape(-1, 1))
    miss = (1-(np.argmax(probas, axis = 1) == labels))
    miss_index = [index for index in range(outputs.shape[0]) if miss[index] == 1]
    miss_need = 10000-len(miss_index)
    miss_choice = miss_need//len(miss_index)
    if miss_choice > 0:
        new_outputs = outputs.copy()
        for i in miss_index:
            new_outputs = np.concatenate((outputs, np.repeat(outputs[miss_index], miss_choice, axis = 0)), axis = 0)
        new_outputs = np.concatenate((new_outputs, oods), axis = 0)
        new_miss = np.concatenate((miss, np.ones(oods.shape[0]+len(miss_index)*miss_choice)))
        new_probas = (new_outputs/np.sum(new_outputs, axis = 1).reshape(-1, 1))
        predicted_labs = np.argmax(new_probas, axis = 1)
        pred_dists = beta*np.eye(10)[predicted_labs.astype('int')]+1
        KLoSs = kl_div_dirichlet(torch.tensor(new_outputs), torch.tensor(pred_dists))
        fpr, tpr, thresholds = metrics.roc_curve(new_miss, KLoSs, pos_label=1)
        return(np.round(100*metrics.auc(fpr, tpr), 1))
    else:
        print('Unexpected distribution...')
        return()

In [53]:
print(KLoS_double(eot_classic, eo1_classic, labs))
print(KLoS_double(eot_classic, eo2_classic, labs))
print(KLoS_double(ensemble_outputs_test, ensemble_outputs_ood1, labs))
print(KLoS_double(ensemble_outputs_test, ensemble_outputs_ood2, labs))

92.6
92.3
92.6
92.3


In [54]:
def KLoS_double_ensemble(outputs, oods, labels):
    MCPs = []
    for i in range(len(outputs)):
        MCP_model = KLoS_double(outputs[i], oods[i], labels)
        MCPs.append(MCP_model)
    print('Average:',  np.round(np.mean(MCPs),1), '\n')
    return(MCPs)

In [55]:
print(KLoS_double_ensemble(outputs_test, outputs_ood1, labs))
print(KLoS_double_ensemble(outputs_test, outputs_ood2, labs))

Average: 90.7 

[90.6, 89.9, 90.7, 91.0, 90.9, 90.6, 90.8, 90.8, 90.9, 90.7]
Average: 90.4 

[90.3, 89.7, 90.5, 90.7, 90.6, 90.4, 90.5, 90.6, 90.7, 90.5]


## Information Mutuelle d'un ensemble

In [56]:
probas_test = [(output/np.sum(output, axis = 1).reshape(-1, 1)) for output in outputs_test]
probas_test_mean = np.mean(probas_test, axis = 0)
entropies_test = []
for proba in probas_test:
    entropies_test.append(entropy(proba, axis = 1))
miss_mi_test = 1-(np.argmax(probas_test_mean, axis = 1) == labs)

In [57]:
mutinf_ens_test = entropy(probas_test_mean, axis = 1) - np.mean(entropies_test, axis = 0)
fpr, tpr, thresholds = metrics.roc_curve(miss_mi_test, mutinf_ens_test , pos_label=1)
mutinfN_ens_test = metrics.auc(fpr, tpr)
print(np.round(100*mutinfN_ens_test,1))

93.0


In [58]:
outputs_mi_ood1 = np.concatenate((outputs_test, outputs_ood1), axis = 1)

In [59]:
probas_ood1 = [(output/np.sum(output, axis = 1).reshape(-1, 1)) for output in outputs_mi_ood1]
probas_ood1_mean = np.mean(probas_ood1, axis = 0)
entropies_ood1 = []
for proba in probas_ood1:
    entropies_ood1.append(entropy(proba, axis = 1))
miss_mi_ood1 = np.concatenate((np.zeros(10000), np.ones(10000)))
mutinf_ens_ood1 = entropy(probas_ood1_mean, axis = 1) - np.mean(entropies_ood1, axis = 0)
fpr, tpr, thresholds = metrics.roc_curve(miss_mi_ood1, mutinf_ens_ood1 , pos_label=1)
mutinfN_ens_ood1 = metrics.auc(fpr, tpr)
print(np.round(100*mutinfN_ens_ood1,1))

89.5


In [60]:
outputs_mi_ood2 = np.concatenate((outputs_test, outputs_ood2), axis = 1)

In [61]:
probas_ood2 = [(output/np.sum(output, axis = 1).reshape(-1, 1)) for output in outputs_mi_ood2]
probas_ood2_mean = np.mean(probas_ood2, axis = 0)
entropies_ood2 = []
for proba in probas_ood2:
    entropies_ood2.append(entropy(proba, axis = 1))
miss_mi_ood2 = np.concatenate((np.zeros(10000), np.ones(10000)))
mutinf_ens_ood2 = entropy(probas_ood2_mean, axis = 1) - np.mean(entropies_ood2, axis = 0)
fpr, tpr, thresholds = metrics.roc_curve(miss_mi_ood2, mutinf_ens_ood2 , pos_label=1)
mutinfN_ens_ood2 = metrics.auc(fpr, tpr)
print(np.round(100*mutinfN_ens_ood2,1))

89.0


In [62]:
probas_MI = np.mean([(output/np.sum(output, axis = 1).reshape(-1, 1)) for output in outputs_test], axis = 0)

In [63]:
miss1 = (1-(np.argmax(probas_MI, axis = 1) == labs))
miss_index1 = [index for index in range(10000) if miss1[index] == 1]
miss_need1 = 10000-len(miss_index1)
miss_choice1 = miss_need1//len(miss_index1)
if miss_choice1 > 0:
    new_miss1 = np.concatenate((miss1, np.ones(outputs_ood1[0].shape[0]+len(miss_index1)*miss_choice1)))
    new_outputs1 = []
    for m in range(10):
        outputs = outputs_test[m].copy()
        oods = outputs_ood1[m].copy()
        new_outputs = outputs.copy()
        for i in miss_index1:
            new_outputs = np.concatenate((outputs, np.repeat(outputs[miss_index1], miss_choice1, axis = 0)), axis = 0)
            new_outputs = np.concatenate((new_outputs, oods), axis = 0)
            new_probas1 = (new_outputs/np.sum(new_outputs, axis = 1).reshape(-1, 1))
        new_outputs1.append(new_outputs)

In [64]:
new_probas_MI1 = [(output/np.sum(output, axis = 1).reshape(-1, 1)) for output in new_outputs1]

In [65]:
mean_probas_MI1 = np.mean(new_probas_MI1, axis = 0)

In [66]:
entropies_ood1_EN = []
for proba in new_probas_MI1:
    entropies_ood1_EN.append(entropy(proba, axis = 1))

In [67]:
mutinf_ens_ood1_EN = entropy(mean_probas_MI1, axis = 1) - np.mean(entropies_ood1_EN, axis = 0)
fpr, tpr, thresholds = metrics.roc_curve(new_miss1, mutinf_ens_ood1_EN , pos_label=1)
mutinfN_ens_ood1_EN = metrics.auc(fpr, tpr)
print(np.round(100*mutinfN_ens_ood1_EN,1))

92.3


In [68]:
miss2 = (1-(np.argmax(probas_MI, axis = 1) == labs))
miss_index2 = [index for index in range(10000) if miss2[index] == 1]
miss_need2 = 10000-len(miss_index2)
miss_choice2 = miss_need2//len(miss_index2)
if miss_choice2 > 0:
    new_miss2 = np.concatenate((miss2, np.ones(outputs_ood2[0].shape[0]+len(miss_index2)*miss_choice2)))
    new_outputs2 = []
    for m in range(10):
        outputs = outputs_test[m].copy()
        oods = outputs_ood2[m].copy()
        new_outputs = outputs.copy()
        for i in miss_index2:
            new_outputs = np.concatenate((outputs, np.repeat(outputs[miss_index2], miss_choice2, axis = 0)), axis = 0)
            new_outputs = np.concatenate((new_outputs, oods), axis = 0)
            new_probas2 = (new_outputs/np.sum(new_outputs, axis = 1).reshape(-1, 1))
        new_outputs2.append(new_outputs)

In [69]:
new_probas_MI2 = [(output/np.sum(output, axis = 1).reshape(-1, 1)) for output in new_outputs2]

In [70]:
mean_probas_MI2 = np.mean(new_probas_MI2, axis = 0)

In [71]:
entropies_ood2_EN = []
for proba in new_probas_MI2:
    entropies_ood2_EN.append(entropy(proba, axis = 1))

In [72]:
mutinf_ens_ood2_EN = entropy(mean_probas_MI2, axis = 1) - np.mean(entropies_ood2_EN, axis = 0)
fpr, tpr, thresholds = metrics.roc_curve(new_miss2, mutinf_ens_ood2_EN , pos_label=1)
mutinfN_ens_ood2_EN = metrics.auc(fpr, tpr)
print(np.round(100*mutinfN_ens_ood2_EN,1))

92.0
