In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from scipy.special import softmax
import collections

from nats_bench import create
from nats_bench.api_utils import time_string
from xautodl.models import get_cell_based_tiny_net

import data.cifar10 as cifar10
import data.cifar100 as cifar100
import data.tiny_imagenet as imagenet
import calibration as cal
import calibration.metric as metric
import calibration.ece_kde as ece_kde
import calibration.tace as tace
from torch.utils.data import DataLoader
from calibration.temp_scale import TemperatureScaling, NLL, BS, accuracy,logistic_func

from data.datatest import get_logits_labels, get_valid_test_loader

from xautodl.datasets import get_dataset_with_transform
from xautodl.datasets.get_dataset_with_transform import get_datasets
from calibration.temperature_scaling import ModelWithTemperature
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
def get_preds_and_targets(model, dataloader, device):
    preds, pred_classes, targets = [], [], []

    model.eval()  # Set model to evaluation mode
    model.to(device)  # Move model to the selected device (CPU or GPU)

    with torch.no_grad():
        for data, target in dataloader:
            data, target = data.to(device), target.to(device)
            output_tuple = model(data)

            output = output_tuple[1]

            prob = F.softmax(output, dim=1)  # Compute probabilities
            _, pred = torch.max(prob, 1)  # Get predicted class

            preds.extend(prob.cpu().numpy())  # Move probabilities to CPU and convert to numpy array
            pred_classes.extend(pred.cpu().numpy())  # Move predictions to CPU and convert to numpy array
            targets.extend(target.cpu().numpy())  # Move targets to CPU and convert to numpy array

    return np.array(preds), np.array(pred_classes), np.array(targets)



In [3]:
def get_preds_and_targets2(model, dataloader, device):
    preds, pred_classes, targets = [], [], []

    model.eval()  # Set model to evaluation mode
    model.to(device)  # Move model to the selected device (CPU or GPU)

    with torch.no_grad():
        for data, target in dataloader:
            data, target = data.to(device), target.to(device)
            output_tuple = model(data)

            output = output_tuple

            prob = F.softmax(output, dim=1)  # Compute probabilities
            _, pred = torch.max(prob, 1)  # Get predicted class

            preds.extend(prob.cpu().numpy())  # Move probabilities to CPU and convert to numpy array
            targets.extend(target.cpu().numpy())  # Move targets to CPU and convert to numpy array

    return np.array(preds), np.array(pred_classes), np.array(targets)



In [4]:
sss_dir = "/hdd/datasets/NATSBench/sss-full/"
tss_dir = "/hdd/datasets/NATSBench/NATS-tss-v1_0-3ffb9-full-extracted/NATS-tss-v1_0-3ffb9-full/"
root = '/home/younan/project_calibration/datasets/ImagenNet16'
api_type = 'tss'
image_dataset = 'ImageNet16-120'
archi_num = 8
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
post_temp = 'True'


if api_type =='tss':
    api = create(tss_dir, api_type, fast_mode=True, verbose=False)
elif api_type =='sss':
    api = create(sss_dir, api_type, fast_mode=True, verbose=False)
else:
    raise ValueError('api_type must be either tss or sss')

In [5]:
config = api.get_net_config(archi_num, image_dataset)
archi_info = api.get_more_info(archi_num, image_dataset,hp ='200',is_random=False)
# get the info of architecture of the 6111-th model on CIFAR-10
net = get_cell_based_tiny_net(config)
arch = api.arch(archi_num) 

# Load the pre-trained weights: params is a dict, where the key is the seed and value is the weights.
params = api.get_net_param(archi_num, image_dataset,None, hp ='200')


In [6]:
net.load_state_dict(next(iter(params.values())))
# net.load_state_dict(params[777])
if image_dataset == 'cifar10':
    if post_temp == 'True':
        test_loader, val_loader = cifar10.get_test_valid_loader(batch_size = 256,
                            random_seed = 42,
                            valid_size=0.2,
                            shuffle=True,
                            num_workers=4, pin_memory=False)
    else:
        test_loader = cifar10.get_test_loader(batch_size=256, shuffle=False, num_workers=4, pin_memory=False)
elif image_dataset == 'cifar100':
    if post_temp == 'True':
        test_loader, val_loader = cifar100.get_test_valid_loader(batch_size = 256,
                            random_seed = 42,
                            valid_size=0.2,
                            shuffle=True,
                            num_workers=4, pin_memory=False)
    else:
        test_loader = cifar100.get_test_loader(batch_size=256, shuffle=False, num_workers=4, pin_memory=False)
elif image_dataset == 'ImageNet16-120':
    

    root = './datasets/ImagenNet16'
    train_data, test_data, xshape, class_num = get_datasets(image_dataset, root, 0)

    if post_temp == 'True':
        def imagenet_get_test_valid_loader(batch_size = 256, random_seed= 42, valid_size = 0.2, shuffle = True,
                                    num_workers=4, pin_memory=False,
                                test_dataset=test_data):
            num_test = len(test_dataset)
            indices = list(range(num_test))
            split = int(np.floor(valid_size * num_test))

            if shuffle:
                np.random.seed(random_seed)
                np.random.shuffle(indices)

            test_idx, valid_idx = indices[split:], indices[:split]
            

            test_sampler = SubsetRandomSampler(test_idx)
            valid_sampler = SubsetRandomSampler(valid_idx)

            test_loader = torch.utils.data.DataLoader(
                test_dataset, batch_size=batch_size, sampler=test_sampler,
                num_workers=num_workers, pin_memory=pin_memory,
            )
            valid_loader = torch.utils.data.DataLoader(
                test_dataset, batch_size=batch_size, sampler=valid_sampler,
                num_workers=num_workers, pin_memory=pin_memory,
            )
            return test_loader, valid_loader
        test_loader, val_loader = imagenet_get_test_valid_loader(batch_size = 256, random_seed= 42, valid_size = 0.2, shuffle = True,
                                num_workers=4, pin_memory=False)
    else:
        test_loader = DataLoader(test_data, batch_size=256, shuffle=False)

In [7]:
val_probs, val_pred_classes, val_targets = get_preds_and_targets(net, val_loader, device)
test_probs, test_pred_classes, test_targets = get_preds_and_targets(net, test_loader, device)

# val_logits = logistic_func(val_probs)
# test_logits = logistic_func(test_probs)


In [8]:
scaled_model = ModelWithTemperature(net)
scaled_model.set_temperature(val_loader,device=device)

Before temperature - NLL: 2.247, ECE: 0.048
Optimal temperature: 1.100
After temperature - NLL: 2.231, ECE: 0.032


ModelWithTemperature(
  (model): TinyNetwork(
    TinyNetwork(C=16, N=5, L=17)
    (stem): Sequential(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (cells): ModuleList(
      (0-4): 5 x InferCell(
        info :: nodes=4, inC=16, outC=16, [1<-(I0-L0) | 2<-(I0-L1,I1-L2) | 3<-(I0-L3,I1-L4,I2-L5)], |nor_conv_1x1~0|+|avg_pool_3x3~0|skip_connect~1|+|skip_connect~0|none~1|nor_conv_3x3~2|
        (layers): ModuleList(
          (0): ReLUConvBN(
            (op): Sequential(
              (0): ReLU()
              (1): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            )
          )
          (1): POOLING(
            (op): AvgPool2d(kernel_size=3, stride=1, padding=1)
          )
          (2-3): 2 x Identity()
          (4): Zero(

In [9]:
preds, pred_classes,targets = get_preds_and_targets(net, test_loader, device)
preds_logit = logistic_func(preds)

In [10]:
preds_post, pred_classes_post,targets_post = get_preds_and_targets2(scaled_model, test_loader, device)
preds_logit_post = logistic_func(preds_post)

In [11]:
accuracy(preds_logit,targets)

0.40708333333333335

In [12]:
accuracy(preds_logit_post,targets_post)

0.40708333333333335

In [13]:
print('ece:', metric.get_ece(test_probs, test_targets))
print('posttemp_ece:', metric.get_ece(preds_post, targets_post))

ece: 0.05860389422625304
posttemp_ece: 0.02285442205378785


In [16]:
print('nll:', metric.get_nll(test_probs, test_targets))
print('posttemp_nll:', metric.get_nll(preds_post, targets_post))

nll: 0.475400347499238
posttemp_nll: 0.45047528305782364


In [19]:
print('val_nll:', metric.get_nll(val_probs, val_targets))
print('val_optimized_nll:', metric.get_nll(val_temp_preds, val_targets))

val_nll: 0.3032133589261297
val_optimized_nll: 0.3026488789708914


In [20]:
print('val_ece:', metric.get_ece(val_probs, val_targets))
print('val_optimized_ece:', metric.get_ece(val_temp_preds, val_targets))

val_ece: 0.008778117203712478
val_optimized_ece: 0.005874584877490989


In [22]:
print('MCE:', metric.get_mce(preds, targets))
print('cwECE:', metric.get_classwise_ece(preds, targets))

print('sce:', metric.get_sce(preds, targets))
print('tace:', metric.get_tace(preds, targets))
print('ace:', metric.get_ace(preds, targets))

MCE: 0.24607335031032562
cwECE: 0.010834139782901622
sce: 0.010834139782901623
tace: 0.009887347967537355
ace: 0.009079010745882231


In [23]:
import inspect
def get_param_dict(func, *args, **kwargs):
    result = func(*args, **kwargs)
    
    # Get the function's signature and parameters
    signature = inspect.signature(func)
    params = signature.parameters

    # Create a dictionary with default parameter values
    default_params = {k: v.default for k, v in params.items() if v.default != inspect.Parameter.empty}

    # Update the default parameter values with the provided kwargs
    all_params = {**default_params, **kwargs}
    
    all_params['result'] = result
    return all_params

In [24]:
ole_loss = tace.OELoss()
ole = ole_loss.loss(preds, targets, n_bins=15,logits = False)
ole_str = str(get_param_dict(ole_loss.loss, preds, targets, n_bins=15,logits = False)) + ', '
print('ole:', ole)

ole: 0.037136059674976026


In [25]:
sce_loss = tace.SCELoss()
tace_loss = tace.TACELoss()
ace_loss = tace.ACELoss()
sce = sce_loss.loss(preds, targets, n_bins=15,logits = False)
tace = tace_loss.loss(preds, targets, n_bins=15,logits = False)
ace = ace_loss.loss(preds, targets, n_bins=15,logits = False)
print('sce:', sce)
print('tace:', tace)
print('ace:', ace)

sce: 0.8778040399349889
tace: 0.0864724029721599
ace: 0.0864724029721599


In [26]:
n_bins = 10

In [27]:
ece_data = get_param_dict(metric.get_ece, preds, targets,n_bins)

In [28]:
ece_data

{'n_bins': 15, 'result': 0.046477467873692516}

In [29]:
n_bin = 10
ece_data = get_param_dict(metric.get_ece, preds, targets,n_bins=n_bin)

In [30]:
ece_data

{'n_bins': 10, 'result': 0.046477467873692516}

In [31]:
print('TCE_debias:', cal.get_top_calibration_error(preds, targets,p=1))
print('Marginal_CE_debias:', cal.get_calibration_error(preds, targets,p=1))
print('TCE:', cal.get_top_calibration_error(preds, targets,debias=False,p=1))
print('Marginal_CE:', cal.get_calibration_error(preds, targets,debias=False,p=1))
print('ECE_em:', cal.get_ece_em(preds, targets))

TCE_debias: 0.04650807754683169
Marginal_CE_debias: 0.009360044983501115
TCE: 0.046481831607222544
Marginal_CE: 0.009520283665955068
ECE_em: 0.046481831607222544


In [32]:
print('KSCE:', metric.get_KSCE(preds, targets))
print('KDECE:', metric.get_KDECE(preds, targets))
print('MMCE:', metric.get_MMCE(preds, targets))
print('NLL:', metric.get_nll(preds, targets))
print('brier:', metric.get_brierscore(preds, targets))

KSCE: 0.04648401403427127
KDECE: 0.04269384100094596
MMCE: 0.037466022106703564
NLL: 0.9962960859691724
brier: 0.022643672204411006


In [33]:
bin_sizes = [5,10]

In [34]:
ece_str = ''

In [35]:
for n_bins in bin_sizes:
    ece_str += str(get_param_dict(metric.get_ece, preds, targets, n_bins)) + ', '
    print(n_bins)

5
10


In [36]:
device

device(type='cuda', index=0)

In [37]:
targets.shape

(10000,)

In [38]:
tensor_preds = torch.tensor(preds).to(device)
tensor_targets = torch.tensor(targets).to(device)
    

print('ECE_KDE:', ece_kde.get_ece_kde(tensor_preds, tensor_targets, bandwidth = 0.001, p = 1, mc_type = 'marginal', device = device).item())

OutOfMemoryError: CUDA out of memory. Tried to allocate 3.73 GiB (GPU 0; 10.76 GiB total capacity; 7.45 GiB already allocated; 676.25 MiB free; 7.46 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
tensor_preds = torch.tensor(preds).to(device)
tensor_targets = torch.tensor(targets).to(device)


print('ECE_KDE:', ece_kde.get_ece_kde(tensor_preds, tensor_targets, bandwidth = 0.001, p = 1, mc_type = 'canonical', device = device).item())

ECE_KDE: 0.24982665479183197


In [None]:
bandiwitdh = ece_kde.get_bandwidth(tensor_preds, device)

In [None]:
bandiwitdh

tensor(0.0010)

In [None]:
log_kern = ece_kde.get_kernel(f =tensor_preds, bandwidth=0.001, device = device)

In [None]:
from torch import nn

In [None]:
log_kern = ece_kde.get_kernel(f =tensor_preds, bandwidth=0.001, device = device)
kern = torch.exp(log_kern)

y_onehot = nn.functional.one_hot(tensor_targets, num_classes=tensor_preds.shape[1]).to(torch.float32)
kern_y = torch.matmul(kern, y_onehot)
den = torch.sum(kern, dim=1)
# to avoid division by 0
den = torch.clamp(den, min=1e-10)

ratio = kern_y / den.unsqueeze(-1)
ratio = torch.sum(torch.abs(ratio - tensor_preds)**1, dim=1)

In [None]:
ratio.mean()

tensor(0.2498, device='cuda:0')

In [None]:
tensor_preds.shape[1]

10

In [None]:
f = torch.rand((50, 3))
f = f / torch.sum(f, dim=1).unsqueeze(-1)
y = torch.randint(0, 3, (50,))

ece_kde.get_ece_kde(f, y, bandwidth=0.001, p=1, mc_type='canonical', device='cpu')

tensor(1.1602)

In [None]:
tensor_preds = torch.tensor(preds).to(device)
tensor_targets = torch.tensor(targets).to(device)
    

print('ECE_KDE:', ece_kde.get_ece_kde(tensor_preds, tensor_targets, bandwidth = 0.001, p = 2, mc_type = 'top_label', device = device))

ECE_KDE: tensor(nan, device='cuda:0')


In [None]:
tensor_targets

tensor([3, 8, 8,  ..., 5, 1, 7], device='cuda:0')

In [None]:
tensor_preds.shape[1]

10