In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
import os
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns
from sklearn.preprocessing import PolynomialFeatures
import pandas as pd
from sklearn.metrics import r2_score
from scipy.interpolate import interp1d
import tensorflow_datasets as tfds
from collections import Counter
import scipy.ndimage
from tensorflow.keras.callbacks import ModelCheckpoint
import keras
import gc
import random
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
import tarfile
import urllib.request
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split, Subset
import torch.nn.functional as F
from scipy.stats import gaussian_kde
from numba import jit

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

limit = 10000

# bzq modifying
len_x_target = 20
len_y_target = 20
stride_x_target = 10
stride_y_target = 10

# mean, std proportion
alpha = 0.5

bins_size = 30  # 統計採樣數
poly_degree = bins_size - 1
window_size = 1

#target image preprocessing
angle = 0
pixels = 0

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, img_dir, labels_file, transform=None, limit=None):
        self.img_dir = img_dir
        self.transform = transform
        with open(labels_file, 'r') as f:
            self.labels = [int(line.split()[1]) for line in f.readlines()]
        self.img_names = sorted(os.listdir(img_dir))
        
        if limit:
            indices = np.random.choice(len(self.img_names), limit, replace=False) 
            self.img_names = [self.img_names[i] for i in indices] 
            self.labels = [self.labels[i] for i in indices]

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_names[idx])
        image = Image.open(img_path).convert("RGB")
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label


class CustomImageDatasetC(Dataset): # for corruptions
    def __init__(self, img_dirs, labels_file, transform=None, limit=None):
        self.img_dirs = img_dirs
        self.transform = transform
        self.img_names = []
        self.labels = {}

        # **載入標籤**
        with open(labels_file, 'r') as f:
            raw_labels = [line.strip().split() for line in f.readlines()]  # 讀取完整標籤文件
            self.labels = {entry[0]: int(entry[-1]) for entry in raw_labels}  # 影像檔名 → 標籤對應字典

        # **載入影像並匹配標籤**
        total_images = []
        total_labels = []

        for img_dir in img_dirs:
            if os.path.exists(img_dir):
                img_names_in_dir = sorted(os.listdir(img_dir))

                # **每個擾動級別獨立選取 limit 張**
                if limit and len(img_names_in_dir) > limit:
                    img_names_in_dir = img_names_in_dir[:limit]

                # **確保影像有對應標籤**
                valid_images = [(img_dir, img_name) for img_name in img_names_in_dir if img_name in self.labels]

                # **確保標籤數量匹配影像**
                valid_labels = [self.labels[img_name] for _, img_name in valid_images]

                total_images.extend(valid_images)
                total_labels.extend(valid_labels)

        self.img_names = total_images
        self.labels = total_labels  # 確保影像與標籤數量完全一致

        # **檢查是否仍然不匹配**
        if len(self.img_names) != len(self.labels):
            print(f"Warning: Mismatch! {len(self.img_names)} images vs. {len(self.labels)} labels.")

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, idx):
        img_dir, img_name = self.img_names[idx]
        img_path = os.path.join(img_dir, img_name)

        try:
            image = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            return None, None

        # **正確對應標籤**
        label = self.labels[idx]  # 直接從列表索引獲取標籤
        if self.transform:
            image = self.transform(image)

        return image, label



# 設置設備
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 定義圖像轉換
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 加載預訓練的ResNet50模型
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
model = model.to(device)
model.eval()


In [None]:
# 得到了bzq 正確的函數，拿來做imagenet 正確的預測

def single_data_bzq_mask_preprocessing_imagenet(original_data, start_x, start_y, len_x, len_y, magnification):
    if len_x <= 0 or len_y <= 0:
        return original_data
    new_data = np.copy(original_data)
    new_data[:, start_y:start_y + len_y, start_x:start_x + len_x] *= magnification
    #new_data[start_y:start_y + len_y, start_x:start_x + len_x, :] *= magnification
    return new_data


#print(random_num_for_bzq_mask_imagenet)

def single_data_bzq_mask_preprocessing_imagenet_random_global(original_data, start_x, start_y, len_x, len_y, random_num_for_bzq_mask_imagenet):
    if len_x <= 0 or len_y <= 0:
        return original_data
    new_data = np.copy(original_data)
    random_num_for_bzq_mask_imagenet = random_num_for_bzq_mask_imagenet[:, :len_y, :len_x] 
    new_data[:, start_y:start_y + len_y, start_x:start_x + len_x] = random_num_for_bzq_mask_imagenet
    return new_data

bzq = []
correct_predictions_imagenet = []
incorrect_predictions_imagenet = []
bzq_imagenet = []

#bzq = 0的時候，提取mask之下的softmax
correct_predictions_bzq_zero_softmax_mean = []
correct_predictions_bzq_zero_softmax_std = []
incorrect_predictions_bzq_zero_softmax_mean = []
incorrect_predictions_bzq_zero_softmax_std = []


corruption_levels = ['1', '2', '3', '4', '5']
corruption_types = [
    'brightness', 'contrast', 'defocus_blur', 'elastic_transform', 'fog', 'frost',
    'gaussian_blur', 'gaussian_noise', 'glass_blur', 'impulse_noise', 'pixelate',
    'saturate', 'shot_noise', 'spatter', 'speckle_noise', 'zoom_blur'
]

#corruption_levels = ['3']
#corruption_types = ['gaussian_blur']

# 建立所有可能的 img_dirs
img_dirs = []
for level in corruption_levels:
    for corruption in corruption_types:
        img_dirs.append(f"/home/a/bzq_on_confidence/Confidence2022/notebook/confident/grocery/imagenetc/{corruption}/{level}")

# 建立 dataset，載入所有 corruption level 的影像
test_dataset_c = CustomImageDatasetC(
    img_dirs,
    "/home/a/bzq_on_confidence/Confidence2022/notebook/confident/grocery/ILSVRC2012_validation_ground_truth.txt",
    transform=transform,
    limit=limit
)

test_dataset = CustomImageDataset("/home/a/bzq_on_confidence/Confidence2022/notebook/confident/grocery/ILSVRC2012_img_val", 
                                 "/home/a/bzq_on_confidence/Confidence2022/notebook/confident/grocery/ILSVRC2012_validation_ground_truth.txt", 
                                 transform=transform, 
                                 limit=limit)

test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# 預測並顯示分數
'''
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy * 100:.2f}%')'''


In [None]:
# bzq modifying
len_x = len_x_target
len_y = len_y_target
stride_x = stride_x_target
stride_y = stride_y_target
batch_size = 1  # 設定批次大小

original_predictions_imagenet = []
acc_imagenet = []

with torch.no_grad():
    for batch_data, batch_labels in test_loader:
        batch_data = batch_data.to(device)
        batch_predictions_imagenet = model(batch_data).cpu().numpy()
        original_predictions_imagenet.append(batch_predictions_imagenet)

original_predictions_imagenet = np.vstack(original_predictions_imagenet)

# 使用正確的方式來訪問 test_dataset 中的標籤
test_labels = [test_dataset[i][1] for i in range(len(test_dataset))]

predicted_labels = np.argmax(original_predictions_imagenet, axis=1)
correct_indices = np.where(predicted_labels == np.array(test_labels))[0]
incorrect_indices = np.where(predicted_labels != np.array(test_labels))[0]

correct_predictions_imagenet.extend(correct_indices.tolist())
incorrect_predictions_imagenet.extend(incorrect_indices.tolist())

for i in range(len(predicted_labels)):
    if predicted_labels[i] == test_labels[i]:
        acc_imagenet.append(1)
    else:
        acc_imagenet.append(0)

print(f"{len(correct_predictions_imagenet)}, {len(incorrect_predictions_imagenet)}")

'''original_predictions_imagenet = []
with torch.no_grad():
    for batch_data, batch_labels in test_loader:
        batch_data = batch_data.to(device)
        batch_labels = batch_labels.to(device)
        batch_predictions_imagenet = model(batch_data)
        original_predictions_imagenet.append(batch_predictions_imagenet.cpu().numpy())

for i in range(len(test_dataset)):
    if np.argmax(original_predictions_imagenet[i]) == test_dataset[i][1]:
        correct_predictions_imagenet.append(i)
    else:
        incorrect_predictions_imagenet.append(i)

original_predictions_imagenet = np.vstack(original_predictions_imagenet)

print(f"{len(correct_predictions_imagenet)}, {len(incorrect_predictions_imagenet)}")'''


In [None]:
import torch
import numpy as np
from collections import Counter
import torch.nn.functional as F

random_num_for_bzq_mask_imagenet = np.random.randint(0, 256, (3, len_y_target, len_x_target)).astype(np.float32) / 255.0
bzq_list = []
brier_scores = []
uncertainties = []
resolutions = []
reliabilities = []
nll_losses = []

def brier_score_decomposition(predictions, labels, num_classes=1000):
    """
    計算 Brier Score 並拆解為 Uncertainty, Resolution, Reliability
    predictions: (batch_size, num_classes) - softmax 輸出
    """
    # 轉換標籤為 One-Hot 編碼
    labels_one_hot = torch.nn.functional.one_hot(labels, num_classes).float()
    
    # 計算 Brier Score
    brier_score = torch.mean(torch.sum((predictions - labels_one_hot) ** 2, dim=1))

    # 計算 Uncertainty
    marginal_probs = torch.mean(predictions, dim=0)  # 平均預測機率
    uncertainty = torch.sum(marginal_probs * (1 - marginal_probs))

    # 計算 Resolution
    mean_class_probs = torch.mean(labels_one_hot, dim=0)
    resolution = torch.sum(mean_class_probs * (1 - mean_class_probs))

    # 計算 Reliability
    reliability = brier_score - uncertainty + resolution  # 修正計算方式

    return {
        "Brier Score": brier_score.item(),
        "Uncertainty": uncertainty.item(),
        "Resolution": resolution.item(),
        "Reliability": reliability.item()
    }

def compute_nll(predictions, labels):
    """
    計算 Negative Log-Likelihood (NLL)
    predictions: shape (batch_size, num_classes) - softmax 輸出
    labels: shape (batch_size) - 正確的標籤
    """
    log_probs = torch.log(predictions)

    labels = labels.expand(predictions.shape[0])
    nll_loss = F.nll_loss(log_probs, labels)
    return nll_loss.item()

def process_batch(batch_data, batch_label, len_y, stride_y, len_x, stride_x, device, model, alpha):
    bzq = []
    if len(batch_data) == 0:
        return bzq, np.array([]), {}, 0.0  # 修改 return 以包含 Brier Score 分解與 NLL

    single_data_bzq_classification_record = []
    targets = []

    for i in range(0, 224 - len_y, stride_y):
        for j in range(0, 224 - len_x, stride_x):
            target = single_data_bzq_mask_preprocessing_imagenet(batch_data, i, j, len_x, len_y, 0)
            targets.append(target)
    
    targets_tensor = torch.from_numpy(np.vstack(targets).reshape(-1, 3, 224, 224)).float().to(device)
    predictions = model(targets_tensor)

    # 計算 softmax
    #softmax_predictions = predictions
    softmax_predictions = F.softmax(predictions, dim=1)

    labels = batch_label
    if not isinstance(labels, torch.Tensor):
        labels = torch.tensor(labels, device=predictions.device)
    #print(labels)
    # 計算 Brier Score 分解與 NLL
    brier_components = brier_score_decomposition(softmax_predictions, labels)
    nll_loss = compute_nll(softmax_predictions, labels)

    # 計算 bzq
    max_bzq_indices = torch.argmax(predictions, dim=1).cpu().numpy()
    counter = Counter(max_bzq_indices)
    most_common_num, most_common_count = counter.most_common(1)[0]

    temp = softmax_predictions[:, most_common_num].cpu().numpy()
    bzq.append(alpha * np.mean(temp) + (1 - alpha) * (2.0 / np.pi * np.arctan(1.0 / np.std(temp))))

    return bzq, temp, brier_components, nll_loss


with torch.no_grad():
    for start in range(0, len(test_dataset), batch_size):
        end = min(start + batch_size, len(test_dataset))
        batch_data_labels = [test_dataset[i] for i in range(start, end)]
        batch_data, batch_labels = zip(*batch_data_labels)
        
        for k in range(len(batch_data)):
            if len(batch_data[k]) > 0:
                bzq, temp, brier_components, nll_loss = process_batch(
                    batch_data[k], batch_labels[k], len_y, stride_y, len_x, stride_x, device, model, alpha
                )

                # **儲存 Brier Score 分解**
                bzq_list.append(bzq[-1])
                brier_scores.append(brier_components["Brier Score"])
                uncertainties.append(brier_components["Uncertainty"])
                resolutions.append(brier_components["Resolution"])
                reliabilities.append(brier_components["Reliability"])
                nll_losses.append(nll_loss)

                original_data = single_data_bzq_mask_preprocessing_imagenet(batch_data[k], 0, 0, 0, 0, 0)
                original_prediction = model(torch.tensor(original_data.reshape(-1, 3, 224, 224)).float().to(device))
                max_original_index = torch.argmax(original_prediction).item()
                
                if bzq[-1] == 0.0:
                    if (len(bzq_list) - 1) in correct_predictions_imagenet:
                        correct_predictions_bzq_zero_softmax_mean.append(np.mean(temp))
                        correct_predictions_bzq_zero_softmax_std.append(np.std(temp))
                    else:
                        incorrect_predictions_bzq_zero_softmax_mean.append(np.mean(temp))
                        incorrect_predictions_bzq_zero_softmax_std.append(np.std(temp))

bzq_imagenet = np.array(bzq_list)


In [None]:
#result_bzq_imagenet = 1 / bzq_imagenet
result_bzq_imagenet = bzq_imagenet        


counts, bins, patches = plt.hist(bzq_imagenet, bins=bins_size)
plt.title('Cumulative Histogram of Correct Predictions')
plt.xlabel('bzq')
plt.ylabel('Count')
plt.legend(loc='upper right')  # 指定圖例位置
plt.show()

# 打印結果
plt.boxplot(bzq_imagenet)
plt.show()

bzq_correct_imagenet = np.array([bzq_imagenet[i] for i in correct_predictions_imagenet])
bzq_incorrect_imagenet = np.array([bzq_imagenet[i] for i in incorrect_predictions_imagenet])

result_bzq_correct_imagenet = np.array([result_bzq_imagenet[i] for i in correct_predictions_imagenet])
result_bzq_incorrect_imagenet = np.array([result_bzq_imagenet[i] for i in incorrect_predictions_imagenet])


In [None]:

# Separate correct and incorrect predictions
bzq_correct_imagenet = np.array([bzq_imagenet[i] for i in correct_predictions_imagenet])
bzq_incorrect_imagenet = np.array([bzq_imagenet[i] for i in incorrect_predictions_imagenet])

result_bzq_correct_imagenet = np.array([result_bzq_imagenet[i] for i in correct_predictions_imagenet])
result_bzq_incorrect_imagenet = np.array([result_bzq_imagenet[i] for i in incorrect_predictions_imagenet])

# Create a figure with subplots
fig, axs = plt.subplots(2, 2, figsize=(12, 10))

# Cumulative Histogram of Correct Predictions
axs[0, 0].hist(bzq_correct_imagenet, bins=bins_size)
axs[0, 0].set_title('Cumulative Histogram of Correct Predictions')
axs[0, 0].set_xlabel('bzq')
axs[0, 0].set_ylabel('Count')
axs[0, 0].legend(['Correct Predictions'], loc='upper right')

# Boxplot of bzq_imagenet
axs[0, 1].boxplot(bzq_correct_imagenet)
axs[0, 1].set_title('Boxplot of bzq_imagenet')

# Cumulative Histogram of Correct Predictions
axs[1, 0].hist(bzq_incorrect_imagenet, bins=bins_size)
axs[1, 0].set_title('Cumulative Histogram of Incorrect Predictions')
axs[1, 0].set_xlabel('bzq')
axs[1, 0].set_ylabel('Count')
axs[1, 0].legend(['Incorrect Predictions'], loc='upper right')

# Boxplot of bzq_correct_imagenet
axs[1, 1].boxplot(bzq_incorrect_imagenet)
axs[1, 1].set_title('Boxplot of bzq_incorrect_imagenet')

# Adjust layout
plt.tight_layout()
plt.show()


In [None]:
# 劃出confidence-acc 圖: confidence由bzq提供，acc由該confidence數值底下預測準確的

result_pred_imagenet = np.ones(len(test_dataset)) 
for i in incorrect_predictions_imagenet:
    result_pred_imagenet[i] = 0

print(sum(result_pred_imagenet))

result_imagenet_dict = {}
for i, val in enumerate(result_bzq_imagenet):
    if val not in result_imagenet_dict.keys():
        result_imagenet_dict[val] = [result_pred_imagenet[i]]
    else:
        result_imagenet_dict[val].append(result_pred_imagenet[i])

# 初始化信心值和準確率列表
confidence_values = []
accuracies = []
element_counts = []

# 計算每個信心值範圍的準確率
for confidence in sorted(result_imagenet_dict.keys(), reverse=True):
    combined_results = []
    for key in result_imagenet_dict:
        if key >= confidence:
            combined_results.extend(result_imagenet_dict[key])
    element_count = len(combined_results)
    accuracy = np.mean(combined_results)
    confidence_values.append(confidence)
    accuracies.append(accuracy)
    element_counts.append(element_count)

# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values, accuracies, marker='o', linestyle='-', color='b')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Accuracy (p(y|x) >= τ)')
plt.title('Confidence vs Accuracy (Rotated 60°)')
plt.grid(True)
plt.show()

# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values, element_counts, marker='o', linestyle='-', color='b')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Number of Elements (p(y|x) >= τ)')
plt.title('Confidence Threshold vs Number of Elements')
plt.grid(True)
plt.show()

In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

confidence_values_scaled = np.array(confidence_values)
#confidence_values_scaled = 2 / np.pi * np.arctan(confidence_values_scaled)
#confidence_values_scaled = confidence_values_scaled * confidence_values_scaled / (1 - confidence_values_scaled * confidence_values_scaled)
                                                                                  

#print(confidence_values_scaled)

# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values_scaled, accuracies, marker='o', linestyle='-', color='b')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Accuracy (p(y|x) >= τ)')
plt.title('Confidence vs Accuracy (Rotated 60°)')
plt.grid(True)
plt.show()

scaler = MinMaxScaler()
confidence_values_scaled = scaler.fit_transform(np.array(confidence_values_scaled).reshape(-1, 1)).flatten()
#print(confidence_values_scaled)

# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values_scaled, accuracies, marker='o', linestyle='-', color='b')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Accuracy (p(y|x) >= τ)')
plt.title('Confidence vs Accuracy ')
plt.grid(True)
plt.show()

# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values_scaled, element_counts, marker='o', linestyle='-', color='b')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Accuracy (p(y|x) >= τ)')
plt.title('Confidence vs Accuracy')
plt.grid(True)
plt.show()

In [None]:
from collections import defaultdict
#vanilla
#original_predictions_imagenet (800000, 10)
# 初始化信心值和準確率列表

# 初始化 confidence_map_vanilla 為 defaultdict
confidence_map_vanilla = defaultdict(list)

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

# 將預測結果和信心值存入字典
for i, val in enumerate(original_predictions_imagenet):
    conf = np.max(softmax(val))
    confidence_map_vanilla[conf].append(result_pred_imagenet[i])

print("finish")
print(len(confidence_map_vanilla))

confidence_values_vanilla = []
accuracies_vanilla = []
element_counts_vanilla = []

# 計算每個信心值範圍的準確率
sorted_confidences = sorted(confidence_map_vanilla.keys(), reverse=True)
combined_results_vanilla = []

for confidence in sorted_confidences:
    combined_results_vanilla.extend(confidence_map_vanilla[confidence])
    element_count_vanilla = len(combined_results_vanilla)
    accuracy_vanilla = np.mean(combined_results_vanilla)
    confidence_values_vanilla.append(confidence)
    accuracies_vanilla.append(accuracy_vanilla)
    element_counts_vanilla.append(element_count_vanilla)
 


In [None]:
'''
# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values_vanilla, accuracies_vanilla, marker='o', linestyle='-', color='b')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Accuracy (p(y|x) >= τ)')
plt.title('Confidence vs Accuracy')
plt.grid(True)
plt.show()'''

# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values_scaled, element_counts, marker='.', linestyle='-', color='r', label='Scaled', markersize=4)
plt.plot(confidence_values_vanilla, element_counts_vanilla, marker='.', linestyle='-', color='b', label='Vanilla', markersize=4)
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Number of Elements (p(y|x) >= τ)')
plt.title('Confidence Threshold vs Number of Elements')
plt.legend()
plt.grid(True)
plt.show()

'''
# 繪製圖形
plt.figure(figsize=(10, 6))
plt.plot(confidence_values_vanilla, accuracies_vanilla, marker='o', linestyle='-', color='b', label='Vanilla')
plt.plot(confidence_values_scaled, accuracies, marker='o', linestyle='-', color='r', label='Scaled')
plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Accuracy (p(y|x) >= τ)')
plt.title('Confidence vs Accuracy')
plt.legend()
plt.grid(True)
plt.show()'''

# 假設 confidence_values_vanilla、accuracies_vanilla、confidence_values_scaled 和 accuracies 已經定義
plt.figure(figsize=(10, 6))
plt.plot(confidence_values_vanilla, accuracies_vanilla, marker='.', linestyle='-', color='b', label='Vanilla', markersize=4)
plt.plot(confidence_values_scaled, accuracies, marker='.', linestyle='-', color='r', label='Scaled', markersize=4)

# 新增垂直線
plt.axvline(x=0.6827, color='g', linestyle='--', label='x=0.6827')
plt.axvline(x=0.9545, color='m', linestyle='--', label='x=0.9545')
plt.axvline(x=0.9973, color='c', linestyle='--', label='x=0.9973')

# 找到最接近的值
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

idx_6827_vanilla = find_nearest(confidence_values_vanilla, 0.6827)
idx_9545_vanilla = find_nearest(confidence_values_vanilla, 0.9545)
idx_9973_vanilla = find_nearest(confidence_values_vanilla, 0.9973)

idx_6827_scaled = find_nearest(confidence_values_scaled, 0.6827)
idx_9545_scaled = find_nearest(confidence_values_scaled, 0.9545)
idx_9973_scaled = find_nearest(confidence_values_scaled, 0.9973)

# 新增交點標記
plt.scatter([confidence_values_vanilla[idx_6827_vanilla], confidence_values_vanilla[idx_9545_vanilla], confidence_values_vanilla[idx_9973_vanilla]], 
            [accuracies_vanilla[idx_6827_vanilla], accuracies_vanilla[idx_9545_vanilla], accuracies_vanilla[idx_9973_vanilla]], 
            color='black', zorder=5)
plt.scatter([confidence_values_scaled[idx_6827_scaled], confidence_values_scaled[idx_9545_scaled], confidence_values_scaled[idx_9973_scaled]], 
            [accuracies[idx_6827_scaled], accuracies[idx_9545_scaled], accuracies[idx_9973_scaled]], 
            color='black', zorder=5)

plt.text(confidence_values_vanilla[idx_6827_vanilla], accuracies_vanilla[idx_6827_vanilla], f'({confidence_values_vanilla[idx_6827_vanilla]:.3f}, {accuracies_vanilla[idx_6827_vanilla]:.3f})', fontsize=14, ha='left', color='blue') 
plt.text(confidence_values_vanilla[idx_9545_vanilla], accuracies_vanilla[idx_9545_vanilla], f'({confidence_values_vanilla[idx_9545_vanilla]:.3f}, {accuracies_vanilla[idx_9545_vanilla]:.3f})', fontsize=14, ha='left', color='blue') 
plt.text(confidence_values_vanilla[idx_9973_vanilla], accuracies_vanilla[idx_9973_vanilla], f'({confidence_values_vanilla[idx_9973_vanilla]:.3f}, {accuracies_vanilla[idx_9973_vanilla]:.3f})', fontsize=14, ha='left', color='blue') 
plt.text(confidence_values_scaled[idx_6827_scaled], accuracies[idx_6827_scaled], f'({confidence_values_scaled[idx_6827_scaled]:.3f}, {accuracies[idx_6827_scaled]:.3f})', fontsize=14, ha='right', color='red') 
plt.text(confidence_values_scaled[idx_9545_scaled], accuracies[idx_9545_scaled], f'({confidence_values_scaled[idx_9545_scaled]:.3f}, {accuracies[idx_9545_scaled]:.3f})', fontsize=14, ha='right', color='red') 
plt.text(confidence_values_scaled[idx_9973_scaled], accuracies[idx_9973_scaled], f'({confidence_values_scaled[idx_9973_scaled]:.3f}, {accuracies[idx_9973_scaled]:.3f})', fontsize=14, ha='right', color='red')

plt.xlabel('Confidence Threshold (τ)')
plt.ylabel('Accuracy (p(y|x) >= τ)')
plt.title('Confidence vs Accuracy')
plt.legend()
plt.grid(True)
plt.show()

print(accuracies[idx_6827_scaled], accuracies[idx_9545_scaled], accuracies[idx_9973_scaled])


In [None]:
# 存儲到 .npy 檔案 
print(random_num_for_bzq_mask_imagenet)
'''np.save('confidence_values_vanilla.npy', confidence_values_vanilla) 
np.save('accuracies_vanilla.npy', accuracies_vanilla) 
np.save('element_counts_vanilla.npy', element_counts_vanilla)'''

In [None]:
result_bzq_imagenet_modified = scaler.fit_transform(np.array(result_bzq_imagenet).reshape(-1, 1)).flatten()
print(np.sum([item for sublist in confidence_map_vanilla.values() for item in sublist]))
#ECE calc

def calculate_ece(confidences, labels, num_bins=15):
    bin_boundaries = np.linspace(0, 1, num_bins + 1)
    bin_lowers = bin_boundaries[:-1]
    bin_uppers = bin_boundaries[1:]

    ece = 0.0
    for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
        in_bin = np.logical_and(confidences > bin_lower, confidences <= bin_upper)
        prop_in_bin = np.mean(in_bin)
        if prop_in_bin > 0:
            accuracy_in_bin = np.mean(labels[in_bin])
            avg_confidence_in_bin = np.mean(confidences[in_bin])
            ece += np.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin

    return ece


print(result_bzq_imagenet)
# 計算ECE
ece = [calculate_ece(result_bzq_imagenet[limit * i : limit * (i + 1)], 
                     result_pred_imagenet[limit * i : limit * (i + 1)]) 
                     for i in range(80)]
ece_modified = [calculate_ece(result_bzq_imagenet_modified[limit * i : limit * (i + 1)], 
                     result_pred_imagenet[limit * i : limit * (i + 1)]) 
                     for i in range(80)]

print("Expected Calibration Error (ECE):", ece)
fig, ax = plt.subplots() 
ax.boxplot(ece) 
ax.set_title('ECE Boxplot') 
ax.set_ybound(0, 0.7)
ax.set_ylabel('ECE') 
plt.show()

print("Expected Calibration Error (ece_modified):", ece_modified)
fig, ax = plt.subplots() 
ax.boxplot(ece_modified) 
ax.set_title('ECE Boxplot') 
ax.set_ybound(0, 0.7)
ax.set_ylabel('ECE') 
plt.show()

print(ece_modified, np.mean(brier_scores), np.mean(nll_losses))

print(np.percentile(ece, 25), np.percentile(ece, 50), np.percentile(ece, 75))
print(np.percentile(ece_modified, 25), np.percentile(ece_modified, 50), np.percentile(ece_modified, 75))
print(np.percentile(brier_scores, 25), np.percentile(brier_scores, 50), np.percentile(brier_scores, 75))
print(np.percentile(nll_losses, 25), np.percentile(nll_losses, 50), np.percentile(nll_losses, 75))


In [None]:
print(accuracy, np.mean(brier_scores), ece_modified[0], np.mean(nll_losses))

acc = [np.mean(acc_imagenet[limit * i : limit * (i + 1)]) for i in range(len(acc_imagenet) // limit)]
#print(acc)
val_imagenet = [[acc[idx + i * 16] for idx in range(16)] for i in range(5)]
plt.figure(figsize=(10, 5))
plt.boxplot(val_imagenet)
plt.ylim(0, 1)
plt.title("Accuracy")
plt.legend()
plt.show()

top5_val_imagenet = [sorted(values, reverse=True)[:5] for values in val_imagenet]
plt.figure(figsize=(10, 5))
plt.boxplot(top5_val_imagenet)
plt.ylim(0, 1)
plt.title("Accuracy Top 5")
plt.legend()
plt.show()

#print(ece_modified)

val_imagenet = [[ece_modified[idx + i * 16] for idx in range(16)] for i in range(5)]
plt.figure(figsize=(10, 5))
plt.boxplot(val_imagenet)
plt.ylim(0, 0.35)
plt.title("ECE")
plt.legend()
plt.show()

top5_val_imagenet = [sorted(values, reverse=False)[:5] for values in val_imagenet]
plt.figure(figsize=(10, 5))
plt.boxplot(top5_val_imagenet)
plt.ylim(0, 0.12)
plt.title("ECE Top 5")
plt.legend()
plt.show()

nll = [np.mean(nll_losses[limit * i : limit * (i + 1)]) for i in range(len(nll_losses) // limit)]
val_imagenet = [[nll[idx + i * 16] for idx in range(16)] for i in range(5)]
plt.figure(figsize=(10, 5))
plt.boxplot(val_imagenet)
plt.ylim(0, 12)
plt.title("NLL")
plt.legend()
plt.show()

bs = [np.mean(brier_scores[limit * i : limit * (i + 1)]) for i in range(len(brier_scores) // limit)]
val_imagenet = [[bs[idx + i * 16] for idx in range(16)] for i in range(5)]
plt.figure(figsize=(10, 5))
plt.boxplot(val_imagenet)
plt.ylim(0, 1.2)
plt.title("Brier Score")
plt.legend()
plt.show()

In [None]:
acc = [np.mean(acc_imagenet[limit * i : limit * (i + 1)]) for i in range(len(acc_imagenet) // limit)]
#print(acc)
val_imagenet = [[acc[idx + i * 16] for idx in range(16)] for i in range(5)]
plt.figure(figsize=(10, 5))
plt.boxplot(val_imagenet)
plt.ylim(0, 1)
plt.title("Accuracy")
plt.legend()
plt.show()

top5_val_imagenet = [sorted(values, reverse=True)[:5] for values in val_imagenet]
plt.figure(figsize=(10, 5))
plt.boxplot(top5_val_imagenet)
plt.ylim(0, 1)
plt.title("Accuracy Top 5")
plt.legend()
plt.show()

#print(ece_modified)

val_imagenet = [[ece_modified[idx + i * 16] for idx in range(16)] for i in range(5)]
plt.figure(figsize=(10, 5))
plt.boxplot(val_imagenet)
plt.ylim(0, 0.35)
plt.title("ECE")
plt.legend()
plt.show()

top5_val_imagenet = [sorted(values, reverse=False)[:5] for values in val_imagenet]
plt.figure(figsize=(10, 5))
plt.boxplot(top5_val_imagenet)
plt.ylim(0, 0.12)
plt.title("ECE Top 5")
plt.legend()
plt.show()

nll = [np.mean(nll_losses[limit * i : limit * (i + 1)]) for i in range(len(nll_losses) // limit)]
val_imagenet = [[nll[idx + i * 16] for idx in range(16)] for i in range(5)]
plt.figure(figsize=(10, 5))
plt.boxplot(val_imagenet)
plt.ylim(0, 12)
plt.title("NLL")
plt.legend()
plt.show()

bs = [np.mean(brier_scores[limit * i : limit * (i + 1)]) for i in range(len(brier_scores) // limit)]
val_imagenet = [[bs[idx + i * 16] for idx in range(16)] for i in range(5)]
plt.figure(figsize=(10, 5))
plt.boxplot(val_imagenet)
plt.ylim(0, 1.2)
plt.title("Brier Score")
plt.legend()
plt.show()