In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.optim.lr_scheduler import StepLR
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import random

# Early stopping parameters
early_stopping_patience = 150  # Number of epochs with no improvement to stop training
best_test_accuracy = 0.0  # Best test accuracy seen so far
epochs_no_improvement = 0  # Counter for how many epochs with no improvement

# Data Augmentation for Training Set
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),             # Random cropping with padding
    transforms.RandomHorizontalFlip(),                # Random horizontal flip
    transforms.RandomRotation(15),                    # Randomly rotate by 15 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Random jitter in brightness, contrast, etc.
    transforms.ToTensor(),                            # Convert images to tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the images
])

# Data Transformations for the Test Set (no augmentation)
transform_test = transforms.Compose([
    transforms.ToTensor(),                            # Convert images to tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the images
])

# Load the CIFAR100 dataset
train_dataset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)

# Dataloaders
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)

# Set paths and other parameters
PENALTY_WEIGHT = 1 # Weight for penalizing incorrect predictions after 50% accuracy
SAVE_PATH = './saved_models/'  # Directory to save model checkpoints
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

# Temperature Scaling class
class TemperatureScaling(nn.Module):
    def __init__(self, init_temp=1.0):
        super(TemperatureScaling, self).__init__()
        self.temperature = nn.Parameter(torch.ones(1) * init_temp)

    def forward(self, logits):
        return logits / self.temperature

# Focal Loss for handling imbalanced datasets
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, outputs, targets):
        BCE_loss = F.cross_entropy(outputs, targets, reduction='none')
        pt = torch.exp(-BCE_loss)  # Get the probability
        focal_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
        return focal_loss.mean()

# Custom Loss Function with Focal Loss and Penalty for Wrong Predictions after 50% Accuracy
def custom_loss_function(outputs, targets, current_accuracy):
    probabilities = F.softmax(outputs, dim=1)
    _, predicted_classes = torch.max(probabilities, dim=1)

    focal_loss = FocalLoss()(outputs, targets)

    wrong_predictions = (predicted_classes != targets).float()
    if current_accuracy > 0.5:
        wrong_prediction_penalty = PENALTY_WEIGHT * wrong_predictions.sum()
    else:
        wrong_prediction_penalty = 0

    total_loss = focal_loss + wrong_prediction_penalty
    return total_loss

# WideResNeXt Block
class WideResNeXtBlock(nn.Module):
    expansion = 2  # Expansion factor for WideResNeXt

    def __init__(self, in_planes, planes, stride=1, cardinality=32, widen_factor=2):
        super(WideResNeXtBlock, self).__init__()
        D = cardinality * widen_factor
        self.conv1 = nn.Conv2d(in_planes, D, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(D)
        self.conv2 = nn.Conv2d(D, D, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
        self.bn2 = nn.BatchNorm2d(D)
        self.conv3 = nn.Conv2d(D, planes * WideResNeXtBlock.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * WideResNeXtBlock.expansion)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes * WideResNeXtBlock.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes * WideResNeXtBlock.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * WideResNeXtBlock.expansion)
            )

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = torch.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = torch.relu(out)
        return out

# WideResNeXt Model with Temperature Scaling
class WideResNeXt(nn.Module):
    def __init__(self, block, num_blocks, cardinality=32, widen_factor=2, num_classes=100):
        super(WideResNeXt, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)

        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1, cardinality=cardinality, widen_factor=widen_factor)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2, cardinality=cardinality, widen_factor=widen_factor)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2, cardinality=cardinality, widen_factor=widen_factor)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, cardinality=cardinality, widen_factor=widen_factor)

        self.dropout = nn.Dropout(p=0.5)  # Add Dropout layer with 0.5 probability
        self.linear = nn.Linear(512 * WideResNeXtBlock.expansion, num_classes)
        self.temperature_scaling = TemperatureScaling()  # Temperature scaling layer

    def _make_layer(self, block, planes, num_blocks, stride, cardinality, widen_factor):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride, cardinality, widen_factor))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = torch.nn.functional.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.dropout(out)  # Apply Dropout before the final linear layer
        out = self.linear(out)
        out = self.temperature_scaling(out)  # Apply temperature scaling before softmax
        return out

def train_with_penalty(epoch):
    model.train()  # Set the model to training mode
    train_loss = 0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()  # Zero the gradients for the optimizer

        outputs = model(inputs)  # Forward pass

        probabilities = F.softmax(outputs, dim=1)
        _, predicted_classes = torch.max(probabilities, dim=1)

        correct_predictions = predicted_classes.eq(targets).sum().item()
        total += targets.size(0)
        current_accuracy = correct_predictions / total

        loss = custom_loss_function(outputs, targets, current_accuracy)
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        train_loss += loss.item()
        correct += correct_predictions

        if batch_idx % 100 == 0:  # Print every 100 batches
            print(f'Epoch {epoch}, Batch {batch_idx}, Loss: {train_loss / (batch_idx + 1):.3f}, Acc: {100.*correct/total:.3f}%')

    print(f'Epoch {epoch} Training Loss: {train_loss / len(trainloader):.3f}, Accuracy: {100.*correct/total:.3f}%')

# Save model
def save_model(model, optimizer, filename):
    torch.save({'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()},
                filename)
    print(f"Model saved to {filename}")

# Instantiate model, optimizer, and scheduler
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = WideResNeXt(WideResNeXtBlock, [3, 4, 6, 3], cardinality=32, widen_factor=2).to(device)

optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=5e-4)
scheduler = StepLR(optimizer, step_size=30, gamma=0.1)

# Training loop
for epoch in range(5):
    train_with_penalty(epoch)
    scheduler.step()

# Save the trained model after training
save_model(model, optimizer, "model.pth")




In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import numpy as np
import cv2
import matplotlib.pyplot as plt
import random

# Load CIFAR-100 dataset
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
test_dataset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2)

# Modified WideResNeXt Model for Grad-CAM
class WideResNeXtForGradCAM(WideResNeXt):
    def __init__(self, block, num_blocks, cardinality=32, widen_factor=2, num_classes=100):
        super(WideResNeXtForGradCAM, self).__init__(block, num_blocks, cardinality, widen_factor, num_classes)
        self.gradients = None
        self.activations = None

    def save_gradient(self, grad):
        self.gradients = grad

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)

        # Hook for gradients and activations at the last conv layer
        out = self.layer4(out)
        self.activations = out  # Save activations
        out.register_hook(self.save_gradient)  # Save gradients with hook

        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.dropout(out)
        out = self.linear(out)
        out = self.temperature_scaling(out)
        return out

# Function to generate Grad-CAM
def generate_gradcam(model, input_image, target_class):
    model.eval()

    # Forward pass
    output = model(input_image)

    # Zero the gradients and perform backpropagation on the target class
    model.zero_grad()
    one_hot_output = torch.zeros(output.shape, device=output.device)
    one_hot_output[0][target_class] = 1
    output.backward(gradient=one_hot_output)

    # Get the gradients and activations from the last convolutional layer
    gradients = model.gradients.data.cpu().numpy()[0]  # [C, H, W]
    activations = model.activations.data.cpu().numpy()[0]  # Last conv layer activations

    # Compute Grad-CAM
    weights = np.mean(gradients, axis=(1, 2))  # Global average pooling of gradients
    cam = np.zeros(activations.shape[1:], dtype=np.float32)
    for i, w in enumerate(weights):
        cam += w * activations[i]

    cam = np.maximum(cam, 0)  # ReLU on the CAM
    cam = cv2.resize(cam, (32, 32))  # Resize to match input image size
    cam = cam - np.min(cam)
    cam = cam / np.max(cam)  # Normalize CAM to [0, 1]
    return cam

# Helper function to visualize the CAM overlay on the original image
def show_cam_and_image(img, cam, ax_original, ax_cam):
    # Display the original image
    ax_original.imshow(np.uint8(255 * img))  # Original image
    ax_original.axis('off')
    ax_original.set_title("Original Image")

    # Display the Grad-CAM heatmap overlaid on the original image
    heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
    heatmap = np.float32(heatmap) / 255
    cam_img = heatmap + np.float32(img)
    cam_img = cam_img / np.max(cam_img)  # Normalize to [0, 1]
    ax_cam.imshow(np.uint8(255 * cam_img))
    ax_cam.axis('off')
    ax_cam.set_title("Grad-CAM")

# Instantiate the modified model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = WideResNeXtForGradCAM(WideResNeXtBlock, [3, 4, 6, 3], cardinality=32, widen_factor=2).to(device)

# Load the pretrained model's weights
model.load_state_dict(torch.load("model.pth")['model_state_dict'])

# Visualize Grad-CAM on 10 random images from the test set
random_indices = random.sample(range(len(test_dataset)), 10)
fig, axs = plt.subplots(10, 2, figsize=(10, 30))  # 10 rows, 2 columns for each image pair

for i, idx in enumerate(random_indices):
    img, label = test_dataset[idx]

    # Add batch dimension and send image to device
    input_img = img.unsqueeze(0).to(device)

    # Generate Grad-CAM for the true label
    cam = generate_gradcam(model, input_img, target_class=label)

    # Convert image to numpy for visualization
    img_np = img.permute(1, 2, 0).cpu().numpy()
    img_np = (img_np * 0.5 + 0.5)  # Denormalize the image for visualization

    # Plot original image and Grad-CAM side by side
    show_cam_and_image(img_np, cam, axs[i, 0], axs[i, 1])

plt.tight_layout()
plt.show()


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.optim.lr_scheduler import StepLR
import os
import random
from sklearn.model_selection import train_test_split

# Early stopping parameters
early_stopping_patience = 150  # Number of epochs with no improvement to stop training
best_test_accuracy = 0.0  # Best test accuracy seen so far
epochs_no_improvement = 0  # Counter for how many epochs with no improvement

# Data Augmentation for Training Set
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),             # Random cropping with padding
    transforms.RandomHorizontalFlip(),                # Random horizontal flip
    transforms.RandomRotation(15),                    # Randomly rotate by 15 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Random jitter in brightness, contrast, etc.
    transforms.ToTensor(),                            # Convert images to tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the images
])

# Data Transformations for the Test Set (no augmentation)
transform_test = transforms.Compose([
    transforms.ToTensor(),                            # Convert images to tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the images
])

# Load the CIFAR100 dataset
dataset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)

# Randomly select 10,000 examples to move from train to test
train_indices, extra_test_indices = train_test_split(list(range(len(dataset))), test_size=0.2, random_state=42)

# Subset the train dataset to have 40,000 examples
train_subset = torch.utils.data.Subset(dataset, train_indices)
extra_test_subset = torch.utils.data.Subset(dataset, extra_test_indices)

# Combine the additional 10,000 examples with the original test dataset
test_dataset = torch.utils.data.ConcatDataset([test_dataset, extra_test_subset])

# Create dataloaders
trainloader = torch.utils.data.DataLoader(train_subset, batch_size=128, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)

# Set paths and other parameters
PENALTY_WEIGHT = 1  # Weight for penalizing incorrect predictions after 50% accuracy
SAVE_PATH = './saved_models/'  # Directory to save model checkpoints
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

# Temperature Scaling class
class TemperatureScaling(nn.Module):
    def __init__(self, init_temp=1.0):
        super(TemperatureScaling, self).__init__()
        self.temperature = nn.Parameter(torch.ones(1) * init_temp)

    def forward(self, logits):
        return logits / self.temperature

# Focal Loss for handling imbalanced datasets
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, outputs, targets):
        BCE_loss = F.cross_entropy(outputs, targets, reduction='none')
        pt = torch.exp(-BCE_loss)  # Get the probability
        focal_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
        return focal_loss.mean()

# Custom Loss Function with Focal Loss and Penalty for Wrong Predictions after 50% Accuracy
def custom_loss_function(outputs, targets, current_accuracy, confidences):
    # Apply softmax to get probabilities
    probabilities = F.softmax(outputs, dim=1)

    # Get the predicted class and the confidence
    confidences, predicted_classes = torch.max(probabilities, dim=1)

    # Calculate the Focal Loss for class imbalance
    focal_loss = FocalLoss()(outputs, targets)

    # Penalize wrong high-confidence predictions (confidence > 80%)
    high_confidence_penalty = (confidences > 0.8) & (predicted_classes != targets)
    wrong_high_conf_penalty = PENALTY_WEIGHT * high_confidence_penalty.sum()

    # Calculate the total loss
    total_loss = focal_loss + wrong_high_conf_penalty
    return total_loss

# WideResNeXt Block
class WideResNeXtBlock(nn.Module):
    expansion = 2  # Expansion factor for WideResNeXt

    def __init__(self, in_planes, planes, stride=1, cardinality=32, widen_factor=2):
        super(WideResNeXtBlock, self).__init__()
        D = cardinality * widen_factor
        self.conv1 = nn.Conv2d(in_planes, D, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(D)
        self.conv2 = nn.Conv2d(D, D, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
        self.bn2 = nn.BatchNorm2d(D)
        self.conv3 = nn.Conv2d(D, planes * WideResNeXtBlock.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * WideResNeXtBlock.expansion)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes * WideResNeXtBlock.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes * WideResNeXtBlock.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * WideResNeXtBlock.expansion)
            )

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = torch.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = torch.relu(out)
        return out

# WideResNeXt Model with Temperature Scaling
class WideResNeXt(nn.Module):
    def __init__(self, block, num_blocks, cardinality=32, widen_factor=2, num_classes=100):
        super(WideResNeXt, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)

        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1, cardinality=cardinality, widen_factor=widen_factor)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2, cardinality=cardinality, widen_factor=widen_factor)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2, cardinality=cardinality, widen_factor=widen_factor)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, cardinality=cardinality, widen_factor=widen_factor)

        self.dropout = nn.Dropout(p=0.5)  # Add Dropout layer with 0.5 probability
        self.linear = nn.Linear(512 * WideResNeXtBlock.expansion, num_classes)
        self.temperature_scaling = TemperatureScaling()  # Temperature scaling layer

    def _make_layer(self, block, planes, num_blocks, stride, cardinality, widen_factor):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride, cardinality, widen_factor))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = torch.nn.functional.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.dropout(out)  # Apply Dropout before the final linear layer
        out = self.linear(out)
        out = self.temperature_scaling(out)  # Apply temperature scaling before softmax
        return out

# Training function
def train_with_penalty(epoch):
    model.train()  # Set the model to training mode
    train_loss = 0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()  # Zero the gradients for the optimizer

        outputs = model(inputs)  # Forward pass

        # Calculate the overall training accuracy before updating weights
        probabilities = F.softmax(outputs, dim=1)
        _, predicted_classes = torch.max(probabilities, dim=1)

        correct_predictions = predicted_classes.eq(targets).sum().item()
        total += targets.size(0)
        current_accuracy = correct_predictions / total

        # Calculate the custom loss with penalties if training accuracy > 50%
        loss = custom_loss_function(outputs, targets, current_accuracy, probabilities)
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        train_loss += loss.item()
        correct += correct_predictions

        if batch_idx % 100 == 0:  # Print every 100 batches
            print(f'Epoch {epoch}, Batch {batch_idx}, Loss: {train_loss / (batch_idx + 1):.3f}, Acc: {100.*correct/total:.3f}%')

    # At the end of the epoch, print the final training accuracy
    print(f'Epoch {epoch} Training Loss: {train_loss / len(trainloader):.3f}, Accuracy: {100.*correct/total:.3f}%')

# Function to evaluate the model on the test set and calculate high confidence metrics
def evaluate_with_high_confidence():
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    high_confidence_correct = 0
    high_confidence_total = 0
    high_confidence_wrong = 0  # Track high confidence wrong predictions
    confidence_threshold = 0.80  # Define the high confidence threshold

    with torch.no_grad():  # Disable gradient calculation for inference
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)

            # Forward pass
            outputs = model(inputs)

            # Calculate probabilities using softmax
            probabilities = F.softmax(outputs, dim=1)

            # Get the predicted class and the confidence (max probability)
            confidences, predicted_classes = torch.max(probabilities, dim=1)

            # Track total correct predictions
            correct += predicted_classes.eq(targets).sum().item()
            total += targets.size(0)

            # Track high confidence predictions
            high_confidence_mask = confidences > confidence_threshold
            high_confidence_predictions = predicted_classes[high_confidence_mask]
            high_confidence_targets = targets[high_confidence_mask]
            high_confidence_correct += high_confidence_predictions.eq(high_confidence_targets).sum().item()
            high_confidence_total += high_confidence_mask.sum().item()

            # Count the number of wrong high-confidence predictions
            high_confidence_wrong += (high_confidence_predictions != high_confidence_targets).sum().item()

    # Calculate overall test accuracy
    test_accuracy = 100. * correct / total

    # Calculate high confidence accuracy
    if high_confidence_total > 0:
        high_confidence_accuracy = 100. * high_confidence_correct / high_confidence_total
    else:
        high_confidence_accuracy = 0.0

    # Print results
    print(f"Test Accuracy: {test_accuracy:.2f}%")
    print(f"High Confidence Predictions: {high_confidence_total}")
    print(f"High Confidence Accuracy: {high_confidence_accuracy:.2f}%")
    print(f"Wrong High Confidence Predictions: {high_confidence_wrong}")

# Model, loss, optimizer, and scheduler
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = WideResNeXt(WideResNeXtBlock, [3, 4, 6, 3], cardinality=32, widen_factor=2).to(device)

# Example optimizer
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=5e-4)
scheduler = StepLR(optimizer, step_size=30, gamma=0.1)

# Training loop with evaluation after each epoch
for epoch in range(0, 250):
    train_with_penalty(epoch)  # Perform training for this epoch
    scheduler.step()

    if epoch % 10 == 0:  # Evaluate every 10 epochs or at desired intervals
        print(f"Evaluating at Epoch {epoch}...")
        evaluate_with_high_confidence()


Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:05<00:00, 29999812.65it/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified
Epoch 0, Batch 0, Loss: 4.832, Acc: 0.000%
Epoch 0, Batch 100, Loss: 5.461, Acc: 1.926%
Epoch 0, Batch 200, Loss: 5.018, Acc: 3.051%
Epoch 0, Batch 300, Loss: 4.811, Acc: 4.135%
Epoch 0 Training Loss: 4.797, Accuracy: 4.232%
Evaluating at Epoch 0...
Test Accuracy: 7.79%
High Confidence Predictions: 0
High Confidence Accuracy: 0.00%
Wrong High Confidence Predictions: 0
Epoch 1, Batch 0, Loss: 3.900, Acc: 5.469%
Epoch 1, Batch 100, Loss: 3.968, Acc: 8.253%
Epoch 1, Batch 200, Loss: 3.875, Acc: 9.103%
Epoch 1, Batch 300, Loss: 3.796, Acc: 9.941%
Epoch 1 Training Loss: 3.782, Accuracy: 10.085%
Epoch 2, Batch 0, Loss: 3.455, Acc: 14.844%
Epoch 2, Batch 100, Loss: 3.443, Acc: 13.482%
Epoch 2, Batch 200, Loss: 3.389, Acc: 14.603%
Epoch 2, Batch 300, Loss: 3.364, Acc: 15.259%
Epoch 2 Training Loss: 3.362, Accuracy: 15.325%
Epoch 3, Batch 0, Loss: 3.333, Acc: 12.500%
Epoch 3, Batch 100, Loss: 3.156, Acc: 1