In [1]:
!cp -r /kaggle/input/col774a3/* /kaggle/working/

In [2]:
def get_scheduler(optimizer, scheduler_type='step', **kwargs):
    """
    Returns the specified learning rate scheduler.

    Args:
        optimizer: The optimizer for which to schedule the learning rate.
        scheduler_type (str): Type of the scheduler ('step', 'cosine', 'plateau', 'exponential', 'cyclic').
        **kwargs: Additional arguments depending on the scheduler type.

    Returns:
        A learning rate scheduler.
    """
    if scheduler_type == 'step':
        step_size = kwargs.get('step_size', 30)
        gamma = kwargs.get('gamma', 0.1)
        return torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

    elif scheduler_type == 'cosine':
        T_max = kwargs.get('T_max', 100)
        eta_min = kwargs.get('eta_min', 0.0)
        return torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=T_max, eta_min=eta_min)

    elif scheduler_type == 'plateau':
        patience = kwargs.get('patience', 10)
        factor = kwargs.get('factor', 0.1)
        return torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=patience, factor=factor)

    elif scheduler_type == 'exponential':
        gamma = kwargs.get('gamma', 0.9)
        return torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma)

    elif scheduler_type == 'cyclic':
        base_lr = kwargs.get('base_lr', 0.001)
        max_lr = kwargs.get('max_lr', 0.1)
        step_size_up = kwargs.get('step_size_up', 2000)
        step_size_down = kwargs.get('step_size_down', step_size_up)  # Defaults to equal steps up/down
        mode = kwargs.get('mode', 'triangular')
        return torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=base_lr, max_lr=max_lr,
                                                 step_size_up=step_size_up, step_size_down=step_size_down, mode=mode)
    
    else:
        raise ValueError(f"Unsupported scheduler type: {scheduler_type}")


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import pickle
import pandas as pd
import os
import torchvision.transforms as transforms

# Early stopping parameters
early_stopping_patience = 30  # Number of epochs with no improvement to stop training
best_test_accuracy = 0.0  # Best test accuracy seen so far
epochs_no_improvement = 0  # Counter for how many epochs with no improvement

# Data Augmentation for Training Set
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),             # Random cropping with padding
    transforms.RandomHorizontalFlip(),                # Random horizontal flip
    transforms.RandomRotation(15), 
    transforms.RandomGrayscale(p=0.1),
    transforms.RandomVerticalFlip(p=0.1),
    transforms.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 2.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Random jitter in brightness, contrast, etc.
    transforms.ToTensor(),                            # Convert images to tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the images
])

# Data Transformations for the Test Set (no augmentation)
transform_test = transforms.Compose([
    transforms.ToTensor(),                            # Convert images to tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the images
])

from PIL import Image

class CIFAR100Dataset(torch.utils.data.Dataset):
    def __init__(self, file_path, transform=None):
        with open(file_path, 'rb') as f:
            self.data = pickle.load(f)
        self.transform = transform  # Add transform argument

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, label = self.data[idx]
        
        # Convert the tensor to a PIL image before applying transforms
        if isinstance(image, torch.Tensor):
            image = image.permute(1, 2, 0).numpy()  # Convert torch tensor to numpy array (H, W, C)
            image = Image.fromarray((image * 255).astype('uint8'))  # Convert numpy array to PIL Image
        
        if self.transform:
            image = self.transform(image)  # Apply the transform
        
        return image, label



# Load train and test datasets with the new augmentations for the training set
train_dataset = CIFAR100Dataset('train.pkl', transform=transform_train)
test_dataset = CIFAR100Dataset('test.pkl', transform=transform_test)

trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)

# Set paths and other parameters
PENALTY_WEIGHT = 1  # Weight for penalizing incorrect predictions after 50% accuracy
SAVE_PATH = './saved_models/'  # Directory to save model checkpoints
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

# Temperature Scaling class
class TemperatureScaling(nn.Module):
    def __init__(self, init_temp=1.0):
        super(TemperatureScaling, self).__init__()
        self.temperature = nn.Parameter(torch.ones(1) * init_temp)

    def forward(self, logits):
        return logits / self.temperature

# Focal Loss for handling imbalanced datasets
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, outputs, targets):
        BCE_loss = F.cross_entropy(outputs, targets, reduction='none')
        pt = torch.exp(-BCE_loss)  # Get the probability
        focal_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
        return focal_loss.mean()

# Custom Loss Function with Focal Loss and Penalty for Wrong Predictions after 50% Accuracy
def custom_loss_function(outputs, targets, current_accuracy):
    # Apply softmax to get probabilities
    probabilities = F.softmax(outputs, dim=1)
    
    # Get the max probability (confidence) and corresponding predicted class
    confidences, predicted_classes = torch.max(probabilities, dim=1)
    
    # Calculate the Focal Loss for class imbalance
    focal_loss = FocalLoss()(outputs, targets)
    
    # Heavily penalize wrong argmax predictions if training accuracy > 50%
    wrong_predictions = (predicted_classes != targets).float()
    if current_accuracy > 0.5:
        wrong_prediction_penalty = PENALTY_WEIGHT * wrong_predictions.sum()
    else:
        wrong_prediction_penalty = 0

    # Calculate the total loss
    total_loss = focal_loss + wrong_prediction_penalty
    return total_loss

# WideResNeXt Block
class WideResNeXtBlock(nn.Module):
    expansion = 2  # Expansion factor for WideResNeXt

    def __init__(self, in_planes, planes, stride=1, cardinality=32, widen_factor=2):
        super(WideResNeXtBlock, self).__init__()
        D = cardinality * widen_factor
        self.conv1 = nn.Conv2d(in_planes, D, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(D)
        self.conv2 = nn.Conv2d(D, D, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
        self.bn2 = nn.BatchNorm2d(D)
        self.conv3 = nn.Conv2d(D, planes * WideResNeXtBlock.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * WideResNeXtBlock.expansion)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes * WideResNeXtBlock.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes * WideResNeXtBlock.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * WideResNeXtBlock.expansion)
            )

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = torch.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = torch.relu(out)
        return out

# WideResNeXt Model with Temperature Scaling
class WideResNeXt(nn.Module):
    def __init__(self, block, num_blocks, cardinality=32, widen_factor=2, num_classes=100):
        super(WideResNeXt, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)

        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1, cardinality=cardinality, widen_factor=widen_factor)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2, cardinality=cardinality, widen_factor=widen_factor)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2, cardinality=cardinality, widen_factor=widen_factor)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, cardinality=cardinality, widen_factor=widen_factor)

        self.dropout = nn.Dropout(p=0.5)  # Add Dropout layer with 0.5 probability
        self.linear = nn.Linear(512 * WideResNeXtBlock.expansion, num_classes)
        self.temperature_scaling = TemperatureScaling()  # Temperature scaling layer

    def _make_layer(self, block, planes, num_blocks, stride, cardinality, widen_factor):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride, cardinality, widen_factor))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = torch.nn.functional.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.dropout(out)  # Apply Dropout before the final linear layer
        out = self.linear(out)
        out = self.temperature_scaling(out)  # Apply temperature scaling before softmax
        return out

def train_with_penalty(epoch):
    model.train()  # Set the model to training mode
    train_loss = 0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()  # Zero the gradients for the optimizer
        
        outputs = model(inputs)  # Forward pass
        
        # Calculate the overall training accuracy before updating weights
        probabilities = F.softmax(outputs, dim=1)
        _, predicted_classes = torch.max(probabilities, dim=1)
        
        correct_predictions = predicted_classes.eq(targets).sum().item()
        total += targets.size(0)
        current_accuracy = correct_predictions / total
        
        # Calculate the custom loss with penalties if training accuracy > 50%
        loss = custom_loss_function(outputs, targets, current_accuracy)
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights
        
        train_loss += loss.item()
        correct += correct_predictions

        if batch_idx % 100 == 0:  # Print every 100 batches
            print(f'Epoch {epoch}, Batch {batch_idx}, Loss: {train_loss / (batch_idx + 1):.3f}, Acc: {100.*correct/total:.3f}%')

    # At the end of the epoch, print the final training accuracy
    print(f'Epoch {epoch} Training Loss: {train_loss / len(trainloader):.3f}, Accuracy: {100.*correct/total:.3f}%')

# Function to calculate accuracy based on True_label data in test_info.csv
def test_accuracy(epoch, test_info_path):
    global best_test_accuracy, epochs_no_improvement
    model.eval()
    correct_all = 0
    total_all = 0

    # Load True_label from test_info.csv
    test_info = pd.read_csv(test_info_path)
    true_labels = test_info['True_label'].values

    predictions = []  # Store predictions
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            probabilities = F.softmax(outputs, dim=1)
            _, predicted_classes = torch.max(probabilities, dim=1)

            # Store predictions
            predictions.extend(predicted_classes.cpu().numpy())

    # Calculate accuracy based on True_label
    correct_all = (predictions == true_labels).sum()
    total_all = len(true_labels)

    # Print accuracy
    test_accuracy = 100. * correct_all / total_all
    print(f"Epoch {epoch}, Test Accuracy: {test_accuracy:.2f}%")

    # Check for improvement and early stopping condition
    if test_accuracy > best_test_accuracy:
        best_test_accuracy = test_accuracy
        epochs_no_improvement = 0  # Reset the counter when improvement is seen
        # Optionally save the model
        save_model_checkpoint()
    else:
        epochs_no_improvement += 1  # No improvement
        print(f'No improvement for {epochs_no_improvement} epoch(s)')
    
    # Stop if no improvement for 10 epochs
    if epochs_no_improvement >= early_stopping_patience:
        print(f"Early stopping at epoch {epoch} due to no improvement for {early_stopping_patience} epochs.")
        return True  # Signal to stop training
    return False

def save_model_checkpoint():
    save_path = os.path.join(SAVE_PATH, 'best_model.pth')
    torch.save(model.state_dict(), save_path)
    print(f"New best model saved with accuracy: {best_test_accuracy:.2f}%")

# Model, loss, optimizer, and scheduler
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = WideResNeXt(WideResNeXtBlock, [3, 4, 6, 3], cardinality=32, widen_factor=2).to(device)

# Example optimizer
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

# Define CyclicLR scheduler (to be used for the first 70 epochs)
cyclic_scheduler = torch.optim.lr_scheduler.CyclicLR(
    optimizer,
    base_lr=0.001,
    max_lr=0.1,
    step_size_up=20
)

# Define StepLR scheduler (to be used after epoch 70)
step_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=30,  # Decay every 30 epochs
    gamma=0.1  # Reduce learning rate by a factor of 0.1
)

# Training loop with scheduler switching
for epoch in range(0, 150):
    train_with_penalty(epoch)  # Perform training for this epoch
    stop_training = test_accuracy(epoch, 'test_info.csv')  # Evaluate test accuracy

    # Use CyclicLR for the first 70 epochs
    if epoch < 50:
        cyclic_scheduler.step()
    else:
        step_scheduler.step()

    if stop_training:
        break  # Stop the training loop if early stopping is triggered



  return torch.load(io.BytesIO(b))


Epoch 0, Batch 0, Loss: 5.073, Acc: 0.781%
Epoch 0, Batch 100, Loss: 4.684, Acc: 1.160%
Epoch 0, Batch 200, Loss: 4.626, Acc: 1.290%
Epoch 0, Batch 300, Loss: 4.587, Acc: 1.412%
Epoch 0 Training Loss: 4.583, Accuracy: 1.417%
Epoch 0, Test Accuracy: 3.04%
New best model saved with accuracy: 3.04%
Epoch 1, Batch 0, Loss: 4.500, Acc: 2.344%
Epoch 1, Batch 100, Loss: 4.435, Acc: 2.398%
Epoch 1, Batch 200, Loss: 4.353, Acc: 2.771%
Epoch 1, Batch 300, Loss: 4.290, Acc: 3.226%
Epoch 1 Training Loss: 4.281, Accuracy: 3.268%
Epoch 1, Test Accuracy: 5.80%
New best model saved with accuracy: 5.80%
Epoch 2, Batch 0, Loss: 4.249, Acc: 3.906%
Epoch 2, Batch 100, Loss: 4.062, Acc: 5.121%
Epoch 2, Batch 200, Loss: 4.019, Acc: 5.578%
Epoch 2, Batch 300, Loss: 3.971, Acc: 6.170%
Epoch 2 Training Loss: 3.966, Accuracy: 6.200%
Epoch 2, Test Accuracy: 10.52%
New best model saved with accuracy: 10.52%
Epoch 3, Batch 0, Loss: 3.644, Acc: 12.500%
Epoch 3, Batch 100, Loss: 3.802, Acc: 8.161%
Epoch 3, Batch 200