In [None]:
# bzq modifying
len_x_target = 3
len_y_target = 3
stride_x_target = 1
stride_y_target = 1

bins_size = 30  # 統計採樣數
poly_degree = bins_size - 1
window_size = 1

#target image preprocessing
angle = 0
pixels = 0

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
import os
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns
from sklearn.preprocessing import PolynomialFeatures
import pandas as pd
from sklearn.metrics import r2_score
from scipy.interpolate import interp1d
import tensorflow_datasets as tfds
from collections import Counter
import scipy.ndimage
from tensorflow.keras.callbacks import ModelCheckpoint
import keras
import gc
import random

In [None]:
# Define the ResNet-20 V1 architecture
def resnet_block(inputs, filters, kernel_size=3, stride=1, activation='relu'):
    x = layers.Conv2D(filters, kernel_size, strides=stride, padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(activation)(x)
    x = layers.Conv2D(filters, kernel_size, strides=1, padding='same')(x)
    x = layers.BatchNormalization()(x)
    
    if stride != 1 or inputs.shape[-1] != filters:
        shortcut = layers.Conv2D(filters, kernel_size=1, strides=stride, padding='same')(inputs)
    else:
        shortcut = inputs
    
    x = layers.add([x, shortcut])
    x = layers.Activation(activation)(x)
    return x

def resnet_v1(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(16, kernel_size=3, strides=1, padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    for _ in range(3):
        x = resnet_block(x, 16)
    for _ in range(3):
        x = resnet_block(x, 32, stride=2)
    for _ in range(3):
        x = resnet_block(x, 64, stride=2)
    
    x = layers.GlobalAveragePooling2D()(x)
    #x = layers.Dense(num_classes, activation='softmax')(x)
    x = layers.Dense(num_classes, activation='softmax', kernel_initializer='he_normal')(x)

    model = models.Model(inputs, x)
    return model

'''def custom_preprocessing(image, label):
    image = tf.image.random_flip_left_right(image)
    #image = tf.pad(image, [[4, 4], [4, 4], [0, 0]])
    #image = tf.image.random_crop(image, (32, 32, 3))
    image = tf.cast(image, tf.float32)
    return image, label'''

# Load CIFAR-10 data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
y_train, y_test = tf.keras.utils.to_categorical(y_train), tf.keras.utils.to_categorical(y_test)

# 創建 tf.data.Dataset 並加入 RandomRotation 
data_augmentation = tf.keras.Sequential([
    layers.Lambda(lambda x: tf.image.random_flip_left_right(x)),
    #layers.Lambda(lambda x: tf.pad(x, [[4, 4], [4, 4], [0, 0]])), 
    #layers.Lambda(lambda x: tf.image.random_crop(x, (32, 32, 3))), 
    layers.Lambda(lambda x: tf.cast(x, tf.float32)),
    #layers.RandomRotation(1)
    ]) # 隨機旋轉圖片
                                         
# Create tf.data.Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.map(lambda x, y: (data_augmentation(x, training=True), y), num_parallel_calls=tf.data.experimental.AUTOTUNE)
#train_dataset = train_dataset.map(custom_preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_dataset = train_dataset.shuffle(buffer_size=50000).batch(7).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.map(lambda x, y: (tf.cast(x, tf.float32), y)).batch(7)

# Define model
model = resnet_v1(input_shape=(32, 32, 3), num_classes=10)

# Compile model
initial_lr = 0.000717
model.compile(optimizer=optimizers.Adam(learning_rate=initial_lr),
              loss='categorical_crossentropy', 
              metrics=['categorical_accuracy'])

# Learning rate schedule
def lr_schedule(epoch):
    lr = initial_lr
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    return lr

lr_scheduler = callbacks.LearningRateScheduler(lr_schedule)

# Train model
epochs = 200
if os.path.exists('cifarc.weights.h5'):
    model.load_weights("cifarc.weights.h5")
    model.compile(loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.CategoricalAccuracy()])
else:
    model.fit(train_dataset,
              validation_data=test_dataset,
              epochs=epochs,
              callbacks=[lr_scheduler])
    model.save_weights("cifarc.weights.h5")


In [None]:
# 得到了bzq 正確的函數，拿來做cifar 正確的預測

def single_data_bzq_mask_preprocessing_cifar(original_data, start_x, start_y, len_x, len_y, magnification):
    if len_x <= 0 or len_y <= 0:
        return original_data
    new_data = np.copy(original_data)
    new_data[start_y:start_y + len_y, start_x:start_x + len_x, :] *= magnification
    return new_data


#print(random_num_for_bzq_mask_cifar)

def single_data_bzq_mask_preprocessing_cifar_random_global(original_data, start_x, start_y, len_x, len_y, random_num_for_bzq_mask_cifar):
    if len_x <= 0 or len_y <= 0:
        return original_data
    new_data = np.copy(original_data)
    random_num_for_bzq_mask_cifar = random_num_for_bzq_mask_cifar[:len_y, :len_x, :] 
    new_data[start_y:start_y + len_y, start_x:start_x + len_x, :] = random_num_for_bzq_mask_cifar
    return new_data

bzq = []
correct_predictions_cifar = []
incorrect_predictions_cifar = []
bzq_cifar = []

# 下載並準備CIFAR-10-C資料集
def load_cifar10_c():
    url = 'https://zenodo.org/record/2535967/files/CIFAR-10-C.tar'
    path = tf.keras.utils.get_file('CIFAR-10-C.tar', url, untar=True)
    return path

# 載入CIFAR-10-C資料集
def load_cifar10_c_data(data_dir):
    #corruption_types = ['brightness', 'contrast', 'defocus_blur', 'elastic_transform', 'fog', 'frost', 'gaussian_blur', 'gaussian_noise', 'glass_blur', 'impulse_noise', 'jpeg_compression', 'motion_blur', 'pixelate', 'saturate', 'shot_noise', 'snow', 'spatter', 'speckle_noise', 'zoom_blur']
    
    corruption_types = ['brightness', 'contrast', 'defocus_blur', 'elastic_transform', 'fog', 'frost', 'gaussian_blur', 'gaussian_noise', 'glass_blur', 'impulse_noise', 'pixelate', 'saturate', 'shot_noise', 'spatter', 'speckle_noise', 'zoom_blur']
    #corruption_types = ['gaussian_blur']
    images = []
    labels = []

    for corruption in corruption_types:
        print(corruption)
        file_path = os.path.join(data_dir.replace(".tar", ""), f'{corruption}.npy')
        with open(file_path, 'rb') as f:
            all_images = np.load(f)
            images.append(all_images[20000:30000])
            #images.append(np.load(f))
        labels.append(np.load(os.path.join(data_dir.replace(".tar", ""), 'labels.npy'))[20000:30000])

    images = np.concatenate(images, axis=0)
    labels = np.concatenate(labels, axis=0)
    return images, labels

# 下載資料集
cifar10_c_path = load_cifar10_c()

# 載入CIFAR-10-C資料集
test_images, test_labels = load_cifar10_c_data(cifar10_c_path)

test_images = np.float32(test_images)
preprocessed_data = test_images

test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)

In [None]:
print(preprocessed_data.shape, test_labels.shape)

def data_generator(preprocessed_data, test_labels):
    for image, label in zip(preprocessed_data, test_labels):
        yield image, label

batch_size = 32  # 可以根據需要調整批次大小
dataset = tf.data.Dataset.from_generator(
    lambda: data_generator(preprocessed_data, test_labels),
    output_signature=(
        tf.TensorSpec(shape=(32, 32, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(10,), dtype=tf.int64)
    )
)
dataset = dataset.batch(batch_size)

scores = model.evaluate(dataset)
print(f"Test Loss: {scores[0]}")
print(f"Test Accuracy: {scores[1]}")



In [None]:
# bzq modifying
len_x = len_x_target
len_y = len_y_target
stride_x = stride_x_target
stride_y = stride_y_target
batch_size = 10000  # 設定批次大小

original_predictions_cifar = []
for start in range(0, len(preprocessed_data), batch_size):
    end = min(start + batch_size, len(preprocessed_data))
    batch_data = preprocessed_data[start:end]
    batch_labels = test_labels[start:end]

    batch_predictions_cifar = model.predict(batch_data, verbose=0)
    original_predictions_cifar.append(batch_predictions_cifar)

    for i in range(len(batch_data)):
        if np.argmax(batch_predictions_cifar[i]) == np.argmax(batch_labels[i]):
            correct_predictions_cifar.append(start + i)
        else:
            incorrect_predictions_cifar.append(start + i)

original_predictions_cifar = np.vstack(original_predictions_cifar)

print(f"{len(correct_predictions_cifar)}, {len(incorrect_predictions_cifar)}")

print(len(correct_predictions_cifar) / (len(correct_predictions_cifar) + len(incorrect_predictions_cifar)))

In [None]:

if os.path.exists('bzq_cifar.npy'):
    bzq_cifar = np.load('bzq_cifar.npy')
else:
    # 使用NumPy的向量化操作來生成隨機數
    random_num_for_bzq_mask_cifar = np.random.randint(0, 256, (len_y_target, len_x_target, 3)).astype(np.float32)
    for start in range(0, len(preprocessed_data), batch_size):
        end = min(start + batch_size, len(preprocessed_data))
        batch_data = preprocessed_data[start:end]
        batch_labels = test_labels[start:end]

        for k in range(len(batch_data)):
            single_data_bzq_classification_record = []
            targets = []
            
            for i in range(0, 32 - len_y, stride_y):
                for j in range(0, 32 - len_x, stride_x):
                    #target = single_data_bzq_mask_preprocessing_cifar(batch_data[k], i, j, len_x, len_y, 0)
                    target = single_data_bzq_mask_preprocessing_cifar_random_global(batch_data[k], i, j, len_x, len_y, random_num_for_bzq_mask_cifar)
                    targets.append(target)

                # 批次預測
            predictions = model.predict(np.vstack(targets).reshape(-1, 32, 32, 3), verbose=0)
            max_bzq_indices = np.argmax(predictions, axis=1)
                
            single_data_bzq_classification_record.extend(max_bzq_indices)
                
            counter = Counter(single_data_bzq_classification_record)
            most_common_num, most_common_count = counter.most_common(1)[0]
                    
            bzq.append((len(single_data_bzq_classification_record) - most_common_count) / len(single_data_bzq_classification_record))

            original_data = single_data_bzq_mask_preprocessing_cifar(batch_data[k], 0, 0, 0, 0, 0)
            original_prediction = model.predict(original_data.reshape(-1, 32, 32, 3), verbose=0)

            max_original_index = np.argmax(original_prediction)
        
    bzq = np.array(bzq)
    bzq_cifar = bzq
    #np.save('bzq_cifar.npy', bzq_cifar)

In [None]:
print(len(preprocessed_data))


result_bzq_cifar = 1 / bzq_cifar
        
#print(result_bzq_cifar)

counts, bins, patches = plt.hist(bzq_cifar, bins=bins_size)
plt.title('Cumulative Histogram of Correct Predictions')
plt.xlabel('bzq')
plt.ylabel('Count')
plt.legend(loc='upper right')  # 指定圖例位置
plt.show()

# 打印結果
plt.boxplot(bzq_cifar)
plt.show()

# 繪製點狀圖
plt.scatter(bzq_cifar, result_bzq_cifar)

# 設定標題和軸標籤
plt.title('Scatter Plot of x vs f')
plt.xlabel('x')
plt.ylabel('f')

# 顯示圖表
plt.show()

bzq_correct_cifar = np.array([bzq_cifar[i] for i in correct_predictions_cifar])
bzq_incorrect_cifar = np.array([bzq_cifar[i] for i in incorrect_predictions_cifar])

result_bzq_correct_cifar = np.array([result_bzq_cifar[i] for i in correct_predictions_cifar])
result_bzq_incorrect_cifar = np.array([result_bzq_cifar[i] for i in incorrect_predictions_cifar])

# 打印結果

counts, bins, patches = plt.hist(bzq_correct_cifar, bins=bins_size)
plt.title('Cumulative Histogram of Correct Predictions')
plt.xlabel('bzq')
plt.ylabel('Count')
plt.legend(loc='upper right')  # 指定圖例位置
plt.show()
plt.boxplot(bzq_correct_cifar)
plt.show()
# 繪製點狀圖
plt.scatter(bzq_correct_cifar, result_bzq_correct_cifar)

# 設定標題和軸標籤
plt.title('Scatter Plot of x vs f')
plt.xlabel('x')
plt.ylabel('f')

# 顯示圖表
plt.show()

# 打印結果
counts, bins, patches = plt.hist(bzq_incorrect_cifar, bins=bins_size)
plt.title('Cumulative Histogram of Incorrect Predictions')
plt.xlabel('bzq')
plt.ylabel('Count')
plt.legend(loc='upper right')  # 指定圖例位置
plt.show()
plt.boxplot(bzq_incorrect_cifar)
plt.show()
# 繪製點狀圖
plt.scatter(bzq_incorrect_cifar, result_bzq_incorrect_cifar)

# 設定標題和軸標籤
plt.title('Scatter Plot of x vs f')
plt.xlabel('x')
plt.ylabel('f')

# 顯示圖表
plt.show()

In [None]:
# 劃出confidence-acc 圖: confidence由bzq提供，acc由該confidence數值底下預測準確的

result_pred_cifar = np.ones(len(preprocessed_data)) 
for i in incorrect_predictions_cifar:
    result_pred_cifar[i] = 0

print(sum(result_pred_cifar))

result_cifar_dict = {}
for i, val in enumerate(result_bzq_cifar):
    if val not in result_cifar_dict.keys():
        result_cifar_dict[val] = [result_pred_cifar[i]]
    else:
        result_cifar_dict[val].append(result_pred_cifar[i])

# 初始化信心值和準確率列表
confidence_values = []
accuracies = []
element_counts = []

# 計算每個信心值範圍的準確率
for confidence in sorted(result_cifar_dict.keys(), reverse=True):
    combined_results = []
    for key in result_cifar_dict:
        if key >= confidence:
            combined_results.extend(result_cifar_dict[key])
    element_count = len(combined_results)
    accuracy = np.mean(combined_results)
    confidence_values.append(confidence)
    accuracies.append(accuracy)
    element_counts.append(element_count)

# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values, accuracies, marker='o', linestyle='-', color='b')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Accuracy (p(y|x) >= τ)')
plt.title('Confidence vs Accuracy (Rotated 60°)')
plt.grid(True)
plt.show()

# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values, element_counts, marker='o', linestyle='-', color='b')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Number of Elements (p(y|x) >= τ)')
plt.title('Confidence Threshold vs Number of Elements')
plt.grid(True)
plt.show()

In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

confidence_values_scaled = np.array(confidence_values)
confidence_values_scaled = 2 / np.pi * np.arctan(confidence_values_scaled)
#confidence_values_scaled = confidence_values_scaled * confidence_values_scaled / (1 - confidence_values_scaled * confidence_values_scaled)
                                                                                  

#print(confidence_values_scaled)

# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values_scaled, accuracies, marker='o', linestyle='-', color='b')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Accuracy (p(y|x) >= τ)')
plt.title('Confidence vs Accuracy (Rotated 60°)')
plt.grid(True)
plt.show()

scaler = MinMaxScaler()
confidence_values_scaled = scaler.fit_transform(np.array(confidence_values_scaled).reshape(-1, 1)).flatten()
#print(confidence_values_scaled)

# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values_scaled, accuracies, marker='o', linestyle='-', color='b')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Accuracy (p(y|x) >= τ)')
plt.title('Confidence vs Accuracy ')
plt.grid(True)
plt.show()

# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values_scaled, element_counts, marker='o', linestyle='-', color='b')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Accuracy (p(y|x) >= τ)')
plt.title('Confidence vs Accuracy')
plt.grid(True)
plt.show()

In [None]:
from collections import defaultdict
#vanilla
#original_predictions_cifar (800000, 10)
# 初始化信心值和準確率列表

# 初始化 confidence_map_vanilla 為 defaultdict
confidence_map_vanilla = defaultdict(list)

# 將預測結果和信心值存入字典
for i, val in enumerate(original_predictions_cifar):
    conf = np.max(val)
    confidence_map_vanilla[conf].append(result_pred_cifar[i])

print("finish")
print(len(confidence_map_vanilla))

confidence_values_vanilla = []
accuracies_vanilla = []
element_counts_vanilla = []

# 計算每個信心值範圍的準確率
sorted_confidences = sorted(confidence_map_vanilla.keys(), reverse=True)
combined_results_vanilla = []

for confidence in sorted_confidences:
    combined_results_vanilla.extend(confidence_map_vanilla[confidence])
    element_count_vanilla = len(combined_results_vanilla)
    accuracy_vanilla = np.mean(combined_results_vanilla)
    confidence_values_vanilla.append(confidence)
    accuracies_vanilla.append(accuracy_vanilla)
    element_counts_vanilla.append(element_count_vanilla)
 


In [None]:
# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values_vanilla, accuracies_vanilla, marker='o', linestyle='-', color='b')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Accuracy (p(y|x) >= τ)')
plt.title('Confidence vs Accuracy')
plt.grid(True)
plt.show()

# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values_vanilla, element_counts_vanilla, marker='o', linestyle='-', color='b')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Number of Elements (p(y|x) >= τ)')
plt.title('Confidence Threshold vs Number of Elements')
plt.grid(True)
plt.show()

# 假設 confidence_values_vanilla、accuracies_vanilla、confidence_values_scaled 和 accuracies 已經定義
plt.figure(figsize=(10, 6))
plt.plot(confidence_values_vanilla, accuracies_vanilla, marker='.', linestyle='-', color='b', label='Vanilla', markersize=4)
plt.plot(confidence_values_scaled, accuracies, marker='.', linestyle='-', color='r', label='Scaled', markersize=4)

# 新增垂直線
plt.axvline(x=0.6827, color='g', linestyle='--', label='x=0.6827')
plt.axvline(x=0.9545, color='m', linestyle='--', label='x=0.9545')
plt.axvline(x=0.9973, color='c', linestyle='--', label='x=0.9973')

# 找到最接近的值
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

idx_6827_vanilla = find_nearest(confidence_values_vanilla, 0.6827)
idx_9545_vanilla = find_nearest(confidence_values_vanilla, 0.9545)
idx_9973_vanilla = find_nearest(confidence_values_vanilla, 0.9973)

idx_6827_scaled = find_nearest(confidence_values_scaled, 0.6827)
idx_9545_scaled = find_nearest(confidence_values_scaled, 0.9545)
idx_9973_scaled = find_nearest(confidence_values_scaled, 0.9973)

# 新增交點標記
plt.scatter([confidence_values_vanilla[idx_6827_vanilla], confidence_values_vanilla[idx_9545_vanilla], confidence_values_vanilla[idx_9973_vanilla]], 
            [accuracies_vanilla[idx_6827_vanilla], accuracies_vanilla[idx_9545_vanilla], accuracies_vanilla[idx_9973_vanilla]], 
            color='black', zorder=5)
plt.scatter([confidence_values_scaled[idx_6827_scaled], confidence_values_scaled[idx_9545_scaled], confidence_values_scaled[idx_9973_scaled]], 
            [accuracies[idx_6827_scaled], accuracies[idx_9545_scaled], accuracies[idx_9973_scaled]], 
            color='black', zorder=5)

plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Accuracy (p(y|x) >= τ)')
plt.title('Confidence vs Accuracy')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# 存儲到 .npy 檔案 
print(random_num_for_bzq_mask_cifar)
'''np.save('confidence_values_vanilla.npy', confidence_values_vanilla) 
np.save('accuracies_vanilla.npy', accuracies_vanilla) 
np.save('element_counts_vanilla.npy', element_counts_vanilla)'''

In [None]:

print(np.sum([item for sublist in confidence_map_vanilla.values() for item in sublist]))


In [None]:
#ECE calc

def calculate_ece(confidences, labels, num_bins=15):
    bin_boundaries = np.linspace(0, 1, num_bins + 1)
    bin_lowers = bin_boundaries[:-1]
    bin_uppers = bin_boundaries[1:]

    ece = 0.0
    for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
        in_bin = np.logical_and(confidences > bin_lower, confidences <= bin_upper)
        prop_in_bin = np.mean(in_bin)
        if prop_in_bin > 0:
            accuracy_in_bin = np.mean(labels[in_bin])
            avg_confidence_in_bin = np.mean(confidences[in_bin])
            ece += np.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin

    return ece

result_bzq_cifar_modified = scaler.fit_transform(np.array(2 / np.pi * np.arctan(result_bzq_cifar)).reshape(-1, 1)).flatten()

# 計算ECE
ece = [calculate_ece(result_bzq_cifar_modified[10000 * i : 10000 * (i + 1)], 
                     result_pred_cifar[10000 * i : 10000 * (i + 1)]) 
                     for i in range(16)]

print("Expected Calibration Error (ECE):", ece)
fig, ax = plt.subplots() 
ax.boxplot(ece) 
ax.set_title('ECE Boxplot') 
ax.set_ybound(0, 0.7)
ax.set_ylabel('ECE') 
plt.show()


brier_score = [np.mean((result_bzq_cifar_modified[10000 * i : 10000 * (i + 1)] - result_pred_cifar[10000 * i : 10000 * (i + 1)]) ** 2) for i in range(16)]                     
print("Brier Score:", brier_score)
fig, ax = plt.subplots() 
ax.boxplot(brier_score) 
ax.set_title('Brier Score Boxplot') 
ax.set_ybound(0, 1.4)
ax.set_ylabel('Brier Score') 
plt.show()

epsilon = 1e-15 
# 防止 log(0) 的情況 
result_bzq_cifar = np.clip(result_bzq_cifar, epsilon, 1 - epsilon) 
nll = [-np.mean(result_pred_cifar[10000 * i : 10000 * (i + 1)] * np.log(result_bzq_cifar_modified[10000 * i : 10000 * (i + 1)]) + (1 - result_pred_cifar[10000 * i : 10000 * (i + 1)]) * np.log(1 - result_bzq_cifar_modified[10000 * i : 10000 * (i + 1)])) for i in range(16)]
print("NLL:", nll)
fig, ax = plt.subplots() 
ax.boxplot(nll) 
ax.set_title('NLL Boxplot') 
ax.set_ybound(0, 12)
ax.set_ylabel('NLL') 
plt.show()