In [1]:
import os
import os.path
import pickle as pkl
import sys

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from sklearn.metrics import roc_auc_score, roc_curve
from torch.autograd import Variable


from torch.utils.data import DataLoader, Subset

from torch.utils.data import Dataset
from matplotlib import image
import fnmatch
from pathlib import Path
import glob
import time
from tqdm import tqdm 



sys.path.append('../src/')
import tforms
import feature_extraction.feature_extraction_utils as futils
from feature_extraction.Network_Latents_Wrapper import NetworkLatents
# import classifier as clf

import novelty_ODD.ODIN_utils as odutils
import novelty_dfm_CL.novelty_detector as novel
import novelty_dfm_CL.novelty_eval as novelval 
import novelty_dfm_CL.classifier as clf
import novelty_dfm_CL.novelty_utils as novelu


import memory as mem
import utils
import datasets as dset
import novelty_dfm_CL.datasets_holdout as dseth
import datasets_utils as dsetutils

import novelty_dfm_CL.scoring_multi_threshold as ThScores


from novelty_dfm_CL.novelty_eval import acuracy_report, scores_metrics_results


from sklearn.metrics import roc_auc_score, roc_curve
from tqdm import tqdm 

utils.seed_torch(0)

device = 2

In [15]:
scores = np.array([-2,-2,-3,-0.1, -0.2])
trues = np.array([0,1,1,1,1])
# trues = np.logical_not(trues).astype(int)

roc_auc_score(trues, scores), np.percentile(scores, 80), scores>-0.18

(0.625, -0.18, array([False, False, False,  True, False]))

In [2]:

h_dict = {
    'cosine':   odutils.CosineDeconf,
    'inner':    odutils.InnerDeconf,
    'baseline': odutils.InnerDeconf,
    'euclid':   odutils.EuclideanDeconf
}

# for fake out of distribution 
generating_loaders_dict = {
    'Gaussian': odutils.GaussianLoader,
    'Uniform': odutils.UniformLoader
}
# for cifar10
r_mean = 125.3/255
g_mean = 123.0/255
b_mean = 113.9/255
r_std = 63.0/255
g_std = 62.1/255
b_std = 66.7/255

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding = 4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((r_mean, g_mean, b_mean), (r_std, g_std, b_std)),
])

test_transform = transforms.Compose([
    transforms.CenterCrop((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((r_mean, g_mean, b_mean), (r_std, g_std, b_std)),
])


def get_datasets(data_dir, data_name, batch_size):

    train_set_in = torchvision.datasets.CIFAR10(root=f'{data_dir}/cifar10', train=True, download=True, transform=train_transform)
    test_set_in  = torchvision.datasets.CIFAR10(root=f'{data_dir}/cifar10', train=False, download=True, transform=test_transform)
    
    if data_name == 'Gaussian' or data_name == 'Uniform':
        normalizer = odutils.Normalizer(r_mean, g_mean, b_mean, r_std, g_std, b_std)
        outlier_loader = generating_loaders_dict[data_name](batch_size = batch_size, num_batches = int(10000 / batch_size), transformers = [normalizer])
    elif data_name=='SVHN':
        outlier_set = torchvision.datasets.SVHN(root=f'{data_dir}/svhn', split='train', transform=train_transform, download=True)
        outlier_loader = DataLoader(outlier_set, batch_size=batch_size, shuffle=False, num_workers=4)
    else:
        outlier_set  = getattr(torchvision.datasets, data_name.upper())(f'{data_dir}/{data_name}', transform=test_transform)
        outlier_loader       =  DataLoader(outlier_set,       batch_size=batch_size, shuffle=False, num_workers=4)
    
    test_indices      = list(range(len(test_set_in)))
    validation_set_in = Subset(test_set_in, test_indices[:1000])
    test_set_in       = Subset(test_set_in, test_indices[1000:])

    train_loader_in      =  DataLoader(train_set_in,      batch_size=batch_size, shuffle=True,  num_workers=4)
    validation_loader_in =  DataLoader(validation_set_in, batch_size=batch_size, shuffle=False, num_workers=4)
    test_loader_in       =  DataLoader(test_set_in,       batch_size=batch_size, shuffle=False, num_workers=4)

    return train_loader_in, validation_loader_in, test_loader_in, outlier_loader



def calc_tnr(id_test_results, ood_test_results, tpr_threshold=0.95):
    scores = np.concatenate((id_test_results, ood_test_results))
    trues = np.array(([1] * len(id_test_results)) + ([0] * len(ood_test_results)))
    fpr, tpr, thresholds = roc_curve(trues, scores)
    return 1 - fpr[np.argmax(tpr>=tpr_threshold)]

def calc_auroc(id_test_results, ood_test_results):
    #calculate the AUROC
    scores = np.concatenate((id_test_results, ood_test_results))
    print(scores)
    trues = np.array(([1] * len(id_test_results)) + ([0] * len(ood_test_results)))
    result = roc_auc_score(trues, scores)

    return result



data_dir = '../../data/'
data_name_out = 'SVHN'
batch_size = 100
#get outlier data
train_data, val_data, test_data, open_data = get_datasets(data_dir, data_name_out, batch_size)


# train_datasets, train_holdout_datasets, test_datasets, seq_tasks, dset_prep = dset.call_dataset_holdout('cifar10', data_dir, './experiments', experiment_filepath=None, 
#                                         experiment_name='tryout2', holdout_percent=0.25,  max_holdout=0.75, scenario='nc', 
#                                         scenario_classif='class', exp_type='class', num_per_task=1, num_classes_first=2, 
#                                         shuffle=False, preload=False)

# train_data      =  DataLoader(train_datasets[0],      batch_size=batch_size, shuffle=True,  num_workers=4)
# val_data =  DataLoader(train_holdout_datasets[0], batch_size=batch_size, shuffle=True, num_workers=4)
# test_data = DataLoader(test_datasets[0], batch_size=batch_size, shuffle=True, num_workers=4)
# open_data       =  DataLoader(train_datasets[1],       batch_size=batch_size, shuffle=False, num_workers=4)



Files already downloaded and verified
Files already downloaded and verified
Downloading http://ufldl.stanford.edu/housenumbers/train_32x32.mat to ../../data//svhn/train_32x32.mat


0it [00:00, ?it/s]

# ODIN

In [3]:
freeze=False
resnet_arch='resnet18'
fc_sizes = [4096]
device = 0

similarity = 'cosine'
epochs = 10
epoch_start = 0
weight_decay = 0.0001
# Freeze or not backbone 
num_classes = 10




network = clf.Resnet(num_classes, resnet_arch=resnet_arch, FC_layers=fc_sizes,  
            resnet_base=-1, multihead_type='single', base_freeze=freeze)

if freeze:
    network.base.train(False)
    print('freeze backbone')
else:
    network.base.train(True)
    print('dont freeze backbone')

network = network.to(device)
network_inner = NetworkLatents(network, ['base.8'], pool_factors={'base.8':-1})



baseline = (similarity == 'baseline')

h = h_dict[similarity](network.classifier_penultimate, num_classes)

h = h.to(device)
deconf_net = odutils.DeconfNet(network, network.classifier_penultimate, num_classes, h, baseline)
deconf_net = deconf_net.to(device)
parameters = []
h_parameters = []
for name, parameter in deconf_net.named_parameters():
    if name == 'h.h.weight' or name == 'h.h.bias':
        h_parameters.append(parameter)
    else:
        parameters.append(parameter)


optimizer = optim.SGD(parameters, lr = 0.1, momentum = 0.9, weight_decay=weight_decay)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones = [int(epochs * 0.5), int(epochs * 0.75)], gamma = 0.1)

h_optimizer = optim.SGD(h_parameters, lr = 0.1, momentum = 0.9) # No weight decay
h_scheduler = optim.lr_scheduler.MultiStepLR(h_optimizer, milestones = [int(epochs * 0.5), int(epochs * 0.75)], gamma = 0.1)
    
criterion = nn.CrossEntropyLoss()

dont freeze backbone
Will fetch activations from:
base.8, average pooled by -1


In [4]:
network.classifier_penultimate

2048

In [5]:
deconf_net.train()

num_batches = len(train_data)
epoch_bar = tqdm(total = num_batches * epochs, initial = num_batches * epoch_start)
epoch_loss = None
for epoch in range(epoch_start, epochs):

    total_loss = 0.0
    num_inputs = 0
    for batch_idx, batch in enumerate(train_data):
        if len(batch)>2:
            inputs,targets,_=batch
        else:
            inputs,targets=batch
        inputs = inputs.to(device)
        targets = targets.to(device)
        h_optimizer.zero_grad()
        optimizer.zero_grad()
        
        logits, _, _ = deconf_net(inputs)
        loss = criterion(logits, targets)
        loss.backward()
        
        optimizer.step()
        h_optimizer.step()
        total_loss += loss.item()
        num_inputs += inputs.shape[0]
        
        epoch_bar.set_description(f'Training | Epoch {epoch + 1}/{epochs} | loss = {loss/inputs.shape[0]:0.5f} | Batch {batch_idx + 1}/{num_batches}')
        epoch_bar.update()

    epoch_loss = total_loss/num_inputs
    h_scheduler.step()
    scheduler.step()

    epoch_bar.set_description(f'Training | Epoch {epochs}/{epochs} | Epoch loss = {epoch_loss:0.5f} | Batch {num_batches}/{num_batches}')
    epoch_bar.update()

epoch_bar.close()



Training | Epoch 10/10 | Epoch loss = 0.00839 | Batch 500/500: : 5010it [04:14, 19.72it/s]


In [6]:

deconf_net.eval()
best_val_score = None
best_auc = None


def testData(model, CUDA_DEVICE, data_loader, noise_magnitude, criterion, score_func = 'h', title = 'Testing'):
    model.eval()
    num_batches = len(data_loader)
    results = []
    data_iter = tqdm(data_loader)
    for j, batch in enumerate(data_iter):
        images = batch[0]
        data_iter.set_description(f'{title} | Processing image batch {j + 1}/{num_batches}')
        images = Variable(images.to(CUDA_DEVICE), requires_grad = True)
        
        
        logits, h, g = model(images)

        if score_func == 'h':
            scores = h
        elif score_func == 'g':
            scores = g
        elif score_func == 'logit':
            scores = logits

        # Calculating the perturbation we need to add, that is,
        # the sign of gradient of the numerator w.r.t. input

        max_scores, _ = torch.max(scores, dim = 1)
        max_scores.backward(torch.ones(len(max_scores)).to(CUDA_DEVICE))
        
        # Normalizing the gradient to binary in {-1, 1}
        if images.grad is not None:
            gradient = torch.ge(images.grad.data, 0)
            gradient = (gradient.float() - 0.5) * 2
            # Normalizing the gradient to the same space of image
            gradient[::, 0] = (gradient[::, 0] )/r_std
            gradient[::, 1] = (gradient[::, 1] )/g_std
            gradient[::, 2] = (gradient[::, 2] )/b_std
            # Adding small perturbations to images
            tempInputs = torch.add(images.data, gradient, alpha=noise_magnitude)
        
            # Now calculate score
            logits, h, g = model(tempInputs)

            if score_func == 'h':
                scores = h
            elif score_func == 'g':
                scores = g
            elif score_func == 'logit':
                scores = logits

        results.extend(torch.max(scores, dim=1)[0].data.cpu().numpy())
        
    data_iter.set_description(f'{title} | Processing image batch {num_batches}/{num_batches}')
    data_iter.close()

    return np.array(results)



noise_magnitudes = [0, 0.0025, 0.005, 0.01, 0.02, 0.04, 0.08]
# noise_magnitudes = [0.02000]
# score_func='h', ['h', 'g', 'logit']
for score_func in ['h', 'g', 'logit']:
    print(f'Score function: {score_func}')
    for noise_magnitude in noise_magnitudes:
        print(f'Noise magnitude {noise_magnitude:.5f}         ')
        validation_results =  np.average(testData(deconf_net, device, val_data, noise_magnitude, criterion, score_func, title = 'Validating'))
        print('ID Validation Score:',validation_results)
        
        id_test_results = testData(deconf_net, device, test_data, noise_magnitude, criterion, score_func, title = 'Testing ID') 
        
        ood_test_results = testData(deconf_net, device, open_data, noise_magnitude, criterion, score_func, title = 'Testing OOD')
        auroc = calc_auroc(id_test_results, ood_test_results)*100
        tnrATtpr = calc_tnr(id_test_results, ood_test_results)
        print('AUROC:', auroc, 'TNR@TPR:', tnrATtpr)
        
        if best_auc is None:
            best_auc = auroc
        else:
            best_auc = max(best_auc, auroc)
            
        if best_val_score is None or validation_results > best_val_score:
            best_val_score = validation_results
            best_val_auc = auroc
            best_tnr = tnrATtpr


print('supposedly best auc: ', best_val_auc, ' and tnr@tpr ', best_tnr)
print('true best auc:'      , best_auc)

  0%|          | 0/10 [00:00<?, ?it/s]

Score function: h
Noise magnitude 0.00000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.00it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.55577916


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:04<00:00, 18.69it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:05<00:00, 18.03it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.6922097  0.6296872  0.32492274 ... 0.46386057 0.46702    0.5168823 ]
AUROC: 72.50381611111112 TNR@TPR: 0.0605
Noise magnitude 0.00250         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.24it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.6004403


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:04<00:00, 18.48it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.64it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.7116208  0.6692249  0.4133578  ... 0.4952236  0.5913173  0.45497993]
AUROC: 78.69279388888889 TNR@TPR: 0.10760000000000003
Noise magnitude 0.00500         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.91it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.631015


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:04<00:00, 18.31it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.51it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.7242788  0.6836441  0.481248   ... 0.45871106 0.59394366 0.50947964]
AUROC: 82.58283388888889 TNR@TPR: 0.1482
Noise magnitude 0.01000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.14it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.66503614


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:04<00:00, 18.16it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.97it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.73539215 0.68781525 0.57725734 ... 0.47386894 0.48297986 0.6698753 ]
AUROC: 84.89083388888888 TNR@TPR: 0.15259999999999996
Noise magnitude 0.02000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.48it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.68074


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:04<00:00, 18.24it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.94it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.7362242  0.6578405  0.6462285  ... 0.6450399  0.5802274  0.64801353]
AUROC: 80.13252888888888 TNR@TPR: 0.04159999999999997
Noise magnitude 0.04000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.82it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.66266835


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:04<00:00, 18.27it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.91it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.72351986 0.5447138  0.6187099  ... 0.68599725 0.6693749  0.65842927]
AUROC: 60.37273722222223 TNR@TPR: 0.00029999999999996696
Noise magnitude 0.08000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.03it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.5952252


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:04<00:00, 18.27it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 16.26it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.6635672  0.32763836 0.38003463 ... 0.63146365 0.6736658  0.6302813 ]
AUROC: 42.24797777777778 TNR@TPR: 0.0
Score function: g
Noise magnitude 0.00000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.46it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.09476013


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:04<00:00, 18.20it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.69it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.08057105 0.08950289 0.1026643  ... 0.09202924 0.09369212 0.08918282]
AUROC: 44.94906666666667 TNR@TPR: 0.0
Noise magnitude 0.00250         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.18it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.098071106


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:04<00:00, 18.13it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 16.17it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.08145729 0.09493756 0.10411864 ... 0.09900812 0.09836993 0.08736531]
AUROC: 50.72075111111112 TNR@TPR: 0.0
Noise magnitude 0.00500         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.14it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.100861244


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:05<00:00, 17.95it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.33it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.0823217  0.09975537 0.10506327 ... 0.09454659 0.09289794 0.10414293]
AUROC: 55.64913 TNR@TPR: 0.00019999999999997797
Noise magnitude 0.01000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.81it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.10495981


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:04<00:00, 18.32it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 16.55it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.08442674 0.10498694 0.10627051 ... 0.09789249 0.09857785 0.1014412 ]
AUROC: 61.029562222222225 TNR@TPR: 0.00029999999999996696
Noise magnitude 0.02000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 10.79it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.10868364


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:04<00:00, 18.31it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.97it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.08948018 0.10426303 0.10603075 ... 0.1026388  0.10224894 0.1041573 ]
AUROC: 58.62560611111112 TNR@TPR: 0.0006000000000000449
Noise magnitude 0.04000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.03it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.10887649


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:04<00:00, 18.25it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.86it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.10027935 0.10110487 0.10289487 ... 0.10468382 0.10670757 0.10429157]
AUROC: 37.424998333333335 TNR@TPR: 0.0
Noise magnitude 0.08000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.52it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 0.10557077


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:05<00:00, 17.90it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 16.40it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[0.10417359 0.09437548 0.09722435 ... 0.10280968 0.10208116 0.10264105]
AUROC: 31.72538777777778 TNR@TPR: 0.0
Score function: logit
Noise magnitude 0.00000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.38it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 6.4443846


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:05<00:00, 17.94it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.79it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[8.591296  7.0353837 3.1649048 ... 4.108378  4.2370605 6.129511 ]
AUROC: 69.13797055555555 TNR@TPR: 0.04179999999999995
Noise magnitude 0.00250         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.41it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 7.1110196


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:05<00:00, 17.82it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.76it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[8.802993  7.6889935 4.064237  ... 5.417504  6.373163  6.577016 ]
AUROC: 74.1822 TNR@TPR: 0.07869999999999999
Noise magnitude 0.00500         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.31it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 7.6270566


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:05<00:00, 17.67it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 16.22it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[8.944961  7.987342  4.7921033 ... 5.8799047 6.2938204 5.18178  ]
AUROC: 76.96291944444445 TNR@TPR: 0.09719999999999995
Noise magnitude 0.01000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 10.83it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 8.327636


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:05<00:00, 17.95it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.33it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[9.075539  8.138698  5.9452734 ... 7.7069736 6.983734  5.060582 ]
AUROC: 77.55752833333332 TNR@TPR: 0.08879999999999999
Noise magnitude 0.02000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.50it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 8.935108


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:05<00:00, 17.95it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.78it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[9.090137 7.666513 7.099595 ... 8.215551 9.180006 8.383198]
AUROC: 66.70986666666667 TNR@TPR: 0.01870000000000005
Noise magnitude 0.04000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.15it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 9.042468


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:05<00:00, 17.67it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 15.78it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[9.03903   5.9901166 6.856672  ... 9.641879  8.874363  8.985945 ]
AUROC: 41.23845055555556 TNR@TPR: 0.0011999999999999789
Noise magnitude 0.08000         


Validating | Processing image batch 10/10: 100%|██████████| 10/10 [00:00<00:00, 11.73it/s]
  0%|          | 0/90 [00:00<?, ?it/s]

ID Validation Score: 8.161018


Testing ID | Processing image batch 90/90: 100%|██████████| 90/90 [00:05<00:00, 17.70it/s]
Testing OOD | Processing image batch 100/100: 100%|██████████| 100/100 [00:06<00:00, 14.61it/s]

[ 8.70383    3.8612165  4.001045  ... 10.263513  10.026801   9.879645 ]
AUROC: 27.481771111111115 TNR@TPR: 0.0
supposedly best auc:  41.23845055555556  and tnr@tpr  0.0011999999999999789
true best auc: 84.89083388888888





# DFM comparisom

In [7]:
# DFM baseline 
detector_params = {'pca_level': 0.995, 'score_type': 'pca', 'n_components': None, 'n_percent_comp':0.2, 'device':device, 'target_ind': 1, 'dfm_layers_input': 'base.8'}

# fit on first task 

current_features = futils.extract_features(network_inner, train_data, \
    target_ind=detector_params['target_ind'], homog_ind=1, 
    device=detector_params['device'])


novelty_detector = novel.NoveltyDetector().create_detector(type='dfm', params=detector_params)



dfm_x = current_features[0][detector_params['dfm_layers_input']]
dfm_y = current_features[1]

novelty_detector.fit_total(dfm_x.T, dfm_y)


n_comp var 0.995
end fit 9.744817018508911


In [8]:
epochs

10

In [9]:
# ------
id_test_results, ood_test_results, np.average(id_test_results), np.average(ood_test_results)


(array([8.70383  , 3.8612165, 4.001045 , ..., 7.180317 , 6.088069 ,
        9.403352 ], dtype=float32),
 array([ 9.999721,  9.991745,  9.857517, ..., 10.263513, 10.026801,
         9.879645], dtype=float32),
 8.184829,
 9.942373)

In [10]:
r_mean, r_std

(0.4913725490196078, 0.24705882352941178)

In [11]:
# all_dsets = dset.call_dataset_holdout('cifar10',  '../../data', './experiments', experiment_filepath=None, 
#                                             experiment_name='tryout', holdout_percent=0.25,  max_holdout=0.75, scenario='nc', 
#                                             scenario_classif='class', exp_type='class', num_per_task=1, num_classes_first=2, 
#                                             shuffle=False, preload=False)

# train_datasets, train_holdout_datasets, test_datasets, seq_tasks, dset_prep = all_dsets

In [12]:
# 1) Select subset of old based on mixing degree 

# Get number_images_old
# current_task=2
# percent_old = 0.5
# num_new = train_datasets[current_task].__len__()
# num_old_total = sum([train_holdout_datasets[i].__len__() for i in range(current_task)])

# num_old = min(int(percent_old*num_new), num_old_total)
# real_percent_old = num_old/num_new

# # uniform sampling accross old tasks, can be changed after
# num_old_per_task = int(num_old/len(train_holdout_datasets[:current_task]))

# print(num_new, num_old, num_old_total, num_old_per_task, real_percent_old)



# print(train_holdout_datasets[0].__len__())

# # redo the loaders every time 
# # # 2) Evaluate DFM on old + new of current task and see predictions based on threshold level.
# old_gt = []
# old_scores = []
# num_samples_old = 0
# for t_old in range(current_task):
#     train_holdout_datasets[t_old].select_random_subset(num_old_per_task)
#     num_samples_old += train_holdout_datasets[t_old].__len__()
#     t_loader_old = torch.utils.data.DataLoader(train_holdout_datasets[t_old], batch_size=100,
#                                             shuffle=True, num_workers=4)
#     break
#     gt, scores = novelty_detector.score(network_inner, args.input_layer_name, t_loader_old)
#     old_gt.append(gt)
#     old_scores.append(scores)

# # filtered indices accross all old 
# th = 0.5
# inds_above_th = [np.arange(l.shape[0])[l>th] for l in old_scores]
# num_fake_new = sum([l.shape[0] for l in inds_above_th])

# print('error mistaking old data for new: ', num_fake_new/num_samples_old)


# new_gt, new_scores = novelty_detector.score(network_inner, args.input_layer_name, train_loaders[current_task])
# inds_below_th_new = np.arange(new_scores.shape[0])[new_scores<=th]
# num_fake_old = inds_below_th_new.shape[0]


# print('error mistaking new data for old: ', num_fake_old/new_scores.shape[0])



In [13]:
def pl_decision(self, scores, thresh):
    scores_max, _ = scores.max(dim=1)
    pl_idx_mask = scores_max > (thresh)
    pl_idxs = pl_idx_mask.nonzero().view(-1)
    no_pl_idx_mask = scores_max <= (thresh)
    no_pl_idxs = no_pl_idx_mask.nonzero().view(-1)
    return pl_idxs, no_pl_idxs