In [1]:
!cp -r /kaggle/input/col-774-a-3/* /kaggle/working/

In [2]:
import pickle
from torch.utils.data import Dataset, DataLoader

class CIFAR100Dataset(Dataset):
    def __init__(self, file_path):
        with open(file_path, 'rb') as f:
            self.data = pickle.load(f)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, label = self.data[idx]
        return image, label

trainset = CIFAR100Dataset('train.pkl')
testset = CIFAR100Dataset('test.pkl')

  return torch.load(io.BytesIO(b))


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.optim.lr_scheduler import CyclicLR
from torch.cuda.amp import GradScaler, autocast
import torch.nn.functional as F
import torchvision.datasets as datasets
import time
startTime = time.time()

# Define the Basic Block used in WideResNet
class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride, drop_rate=0.3):
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.drop_rate = drop_rate
        self.equalInOut = (in_planes == out_planes)
        self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False) or None

    def forward(self, x):
        out = self.relu(self.bn1(x))
        if not self.equalInOut:
            x = out
        out = self.relu(self.bn2(self.conv1(out)))
        if self.drop_rate > 0:
            out = F.dropout(out, p=self.drop_rate, training=self.training)
        out = self.conv2(out)
        return torch.add(x if self.convShortcut is None else self.convShortcut(x), out)

# Define Network Block
class NetworkBlock(nn.Module):
    def __init__(self, nb_layers, in_planes, out_planes, block, stride, drop_rate=0.3):
        super(NetworkBlock, self).__init__()
        self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, drop_rate)

    def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, drop_rate):
        layers = []
        for i in range(nb_layers):
            layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, drop_rate))
        return nn.Sequential(*layers)

    def forward(self, x):
        return self.layer(x)

# Define the WideResNet Model
class WideResNet(nn.Module):
    def __init__(self, depth, num_classes, widen_factor=1, drop_rate=0.3):
        super(WideResNet, self).__init__()
        self.in_planes = 16
        assert (depth - 4) % 6 == 0, 'Depth should be 6n+4'
        n = (depth - 4) // 6
        k = widen_factor
        nStages = [16, 16 * k, 32 * k, 64 * k]

        self.conv1 = nn.Conv2d(3, nStages[0], kernel_size=3, stride=1, padding=1, bias=False)
        self.block1 = NetworkBlock(n, nStages[0], nStages[1], BasicBlock, 1, drop_rate)
        self.block2 = NetworkBlock(n, nStages[1], nStages[2], BasicBlock, 2, drop_rate)
        self.block3 = NetworkBlock(n, nStages[2], nStages[3], BasicBlock, 2, drop_rate)
        self.bn1 = nn.BatchNorm2d(nStages[3])
        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(nStages[3], num_classes)
        self.drop_rate = drop_rate

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def forward(self, x):
        out = self.conv1(x)
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = self.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(-1, self.fc.in_features)
        return self.fc(out)

# Instantiate WideResNet-28-10
model = WideResNet(depth=28, num_classes=100, widen_factor=10, drop_rate=0.3)

# Move to device (GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)



# Hyperparameters
batch_size = 128
learning_rate = 0.01
num_epochs = 150
early_stop_patience = 200

# Data Augmentation
transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
#     transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
#     transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2762)),
])

# Load CIFAR-100 dataset
# trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)


# trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

# testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

# Model, Loss, Optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.9)
# scheduler = CyclicLR(optimizer, base_lr=0.001, max_lr=0.01, step_size_up=20)
scaler = GradScaler()  # Mixed precision

# Early stopping and checkpointing
best_val_acc = 0.0
early_stop_counter = 0



# def check_accuracy(model,testloader):
#     # Validation loop for early stopping
#     model.eval()
#     correct = 0
#     total = 1
#     with torch.no_grad():
#         for inputs, labels in testloader:
#             inputs, labels = inputs.to(device), labels.to(device)
#             outputs = model(inputs)
#             _, predicted = torch.max(outputs.data, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()

#     val_acc = 100 * correct / total
#     return val_acc


# Training and validation loop with early stopping
for epoch in range(num_epochs):
    
            
    model.train()
    running_loss = 0.0
    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        with autocast():  # Mixed precision
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item()

    scheduler.step()

#     val_acc = check_accuracy(model,testloader)
    val_acc = 0.0
#     with open('output.txt', 'a') as file:
#         file.write(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss/len(trainloader)}, accuracy: {val_acc:.2f}%')    
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss/len(trainloader)}, accuracy: {val_acc:.2f}%')    
    if epoch+1 == 100:
        torch.save(model.state_dict(), 'effnet-28-10-cel-100.pth')  # Save checkpoint
        endTime = time.time()
        diff = endTime-startTime
        hours, rem = divmod(diff, 3600)
        minutes, seconds = divmod(rem, 60)
        print(f"effnet-28-10-cel-100.pth took time: {int(hours)} hours, {int(minutes)} minutes, {int(seconds)} seconds")
    if epoch+1 == 120:
        torch.save(model.state_dict(), 'effnet-28-10-cel-120.pth')  # Save checkpoint
        endTime = time.time()
        diff = endTime-startTime
        hours, rem = divmod(diff, 3600)
        minutes, seconds = divmod(rem, 60)
        print(f"effnet-28-10-cel-120.pth took time: {int(hours)} hours, {int(minutes)} minutes, {int(seconds)} seconds")
    if epoch+1 == 150:
        torch.save(model.state_dict(), 'effnet-28-10-cel-150.pth')  # Save checkpoint        
        endTime = time.time()
        diff = endTime-startTime
        hours, rem = divmod(diff, 3600)
        minutes, seconds = divmod(rem, 60)
        print(f"effnet-28-10-cel-150.pth took time: {int(hours)} hours, {int(minutes)} minutes, {int(seconds)} seconds")
    # Early stopping check
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        early_stop_counter = 0
#         torch.save(model.state_dict(), 'best_model_effnet.pth')  # Save checkpoint
    else:
        early_stop_counter += 1
        if early_stop_counter >= early_stop_patience:
            print("Early stopping triggered.")
            break


  scaler = GradScaler()  # Mixed precision
  with autocast():  # Mixed precision


Epoch 1/150, Loss: 4.374072411570686, accuracy: 0.00%
Epoch 2/150, Loss: 3.9657016227039668, accuracy: 0.00%
Epoch 3/150, Loss: 3.6502480933460566, accuracy: 0.00%
Epoch 4/150, Loss: 3.2595244131910914, accuracy: 0.00%
Epoch 5/150, Loss: 2.872187847527452, accuracy: 0.00%
Epoch 6/150, Loss: 2.4844055430957686, accuracy: 0.00%
Epoch 7/150, Loss: 2.117907867264062, accuracy: 0.00%
Epoch 8/150, Loss: 1.8141315866963932, accuracy: 0.00%
Epoch 9/150, Loss: 1.5425914548837338, accuracy: 0.00%
Epoch 10/150, Loss: 1.2369928691333856, accuracy: 0.00%
Epoch 11/150, Loss: 0.9939478614840644, accuracy: 0.00%
Epoch 12/150, Loss: 0.7492879238753273, accuracy: 0.00%
Epoch 13/150, Loss: 0.4946881271779728, accuracy: 0.00%
Epoch 14/150, Loss: 0.33269714390317473, accuracy: 0.00%
Epoch 15/150, Loss: 0.23373800204299128, accuracy: 0.00%
Epoch 16/150, Loss: 0.13558593518532122, accuracy: 0.00%
Epoch 17/150, Loss: 0.10194075991884588, accuracy: 0.00%
Epoch 18/150, Loss: 0.10198680656596114, accuracy: 0.00%

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.optim.lr_scheduler import CyclicLR
from torch.cuda.amp import GradScaler, autocast
import torch.nn.functional as F
import torchvision.datasets as datasets
import time
startTime = time.time()

# Define the Basic Block used in WideResNet
class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride, drop_rate=0.3):
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.drop_rate = drop_rate
        self.equalInOut = (in_planes == out_planes)
        self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False) or None

    def forward(self, x):
        out = self.relu(self.bn1(x))
        if not self.equalInOut:
            x = out
        out = self.relu(self.bn2(self.conv1(out)))
        if self.drop_rate > 0:
            out = F.dropout(out, p=self.drop_rate, training=self.training)
        out = self.conv2(out)
        return torch.add(x if self.convShortcut is None else self.convShortcut(x), out)

# Define Network Block
class NetworkBlock(nn.Module):
    def __init__(self, nb_layers, in_planes, out_planes, block, stride, drop_rate=0.3):
        super(NetworkBlock, self).__init__()
        self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, drop_rate)

    def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, drop_rate):
        layers = []
        for i in range(nb_layers):
            layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, drop_rate))
        return nn.Sequential(*layers)

    def forward(self, x):
        return self.layer(x)

# Define the WideResNet Model
class WideResNet(nn.Module):
    def __init__(self, depth, num_classes, widen_factor=1, drop_rate=0.3):
        super(WideResNet, self).__init__()
        self.in_planes = 16
        assert (depth - 4) % 6 == 0, 'Depth should be 6n+4'
        n = (depth - 4) // 6
        k = widen_factor
        nStages = [16, 16 * k, 32 * k, 64 * k]

        self.conv1 = nn.Conv2d(3, nStages[0], kernel_size=3, stride=1, padding=1, bias=False)
        self.block1 = NetworkBlock(n, nStages[0], nStages[1], BasicBlock, 1, drop_rate)
        self.block2 = NetworkBlock(n, nStages[1], nStages[2], BasicBlock, 2, drop_rate)
        self.block3 = NetworkBlock(n, nStages[2], nStages[3], BasicBlock, 2, drop_rate)
        self.bn1 = nn.BatchNorm2d(nStages[3])
        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(nStages[3], num_classes)
        self.drop_rate = drop_rate

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def forward(self, x):
        out = self.conv1(x)
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = self.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(-1, self.fc.in_features)
        return self.fc(out)

# Instantiate WideResNet-28-10
model = WideResNet(depth=28, num_classes=100, widen_factor=10, drop_rate=0.3)

# Move to device (GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)



# Hyperparameters
batch_size = 128
learning_rate = 0.01
num_epochs = 150
early_stop_patience = 200

# Data Augmentation
transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
#     transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
#     transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2762)),
])

# Load CIFAR-100 dataset
# trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)

class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0, alpha=None, reduction='mean'):
        """
        Focal Loss for multi-class classification.
        
        Args:
            gamma (float): focusing parameter, default is 2.0.
            alpha (Tensor, optional): weight of each class, default is None.
            reduction (str): 'mean', 'sum', or 'none'. Default is 'mean'.
        """
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = reduction

    def forward(self, inputs, targets):
        device = inputs.device  # Get the device of the input tensor

        # Apply softmax to get class probabilities
        probs = F.softmax(inputs, dim=1)
        
        # Convert targets to one-hot encoding and ensure it is on the same device as inputs
        targets_one_hot = torch.eye(inputs.size(1)).to(device)[targets]

        # Gather probabilities of the target class
        pt = (probs * targets_one_hot).sum(dim=1)

        # Compute the focal loss term
        log_pt = torch.log(pt + 1e-10)  # Add small value to avoid log(0)
        loss = - (1 - pt) ** self.gamma * log_pt

        # Optionally apply weighting per class
        if self.alpha is not None:
            alpha_weight = self.alpha[targets].to(device)  # Ensure alpha is on the correct device
            loss = loss * alpha_weight

        # Apply reduction ('mean', 'sum', or 'none')
        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        else:
            return loss


# Usage:

# trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

# testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

# Model, Loss, Optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# criterion = nn.CrossEntropyLoss()
criterion = FocalLoss(gamma=2.0)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.9)
# scheduler = CyclicLR(optimizer, base_lr=0.001, max_lr=0.01, step_size_up=20)
scaler = GradScaler()  # Mixed precision

# Early stopping and checkpointing
best_val_acc = 0.0
early_stop_counter = 0



# def check_accuracy(model,testloader):
#     # Validation loop for early stopping
#     model.eval()
#     correct = 0
#     total = 1
#     with torch.no_grad():
#         for inputs, labels in testloader:
#             inputs, labels = inputs.to(device), labels.to(device)
#             outputs = model(inputs)
#             _, predicted = torch.max(outputs.data, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()

#     val_acc = 100 * correct / total
#     return val_acc


# Training and validation loop with early stopping
for epoch in range(num_epochs):
    
            
    model.train()
    running_loss = 0.0
    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        with autocast():  # Mixed precision
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item()

    scheduler.step()

#     val_acc = check_accuracy(model,testloader)
    val_acc = 0.0
#     with open('output.txt', 'a') as file:
#         file.write(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss/len(trainloader)}, accuracy: {val_acc:.2f}%')    
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss/len(trainloader)}, accuracy: {val_acc:.2f}%')    
    if epoch+1 == 100:
        torch.save(model.state_dict(), 'effnet-28-10-fl-100.pth')  # Save checkpoint
        endTime = time.time()
        diff = endTime-startTime
        hours, rem = divmod(diff, 3600)
        minutes, seconds = divmod(rem, 60)
        print(f"effnet-28-10-fl-100.pth took time: {int(hours)} hours, {int(minutes)} minutes, {int(seconds)} seconds")
    if epoch+1 == 120:
        torch.save(model.state_dict(), 'effnet-28-10-fl-120.pth')  # Save checkpoint
        endTime = time.time()
        diff = endTime-startTime
        hours, rem = divmod(diff, 3600)
        minutes, seconds = divmod(rem, 60)
        print(f"effnet-28-10-fl-120.pth took time: {int(hours)} hours, {int(minutes)} minutes, {int(seconds)} seconds")
    if epoch+1 == 150:
        torch.save(model.state_dict(), 'effnet-28-10-fl-150.pth')  # Save checkpoint        
        endTime = time.time()
        diff = endTime-startTime
        hours, rem = divmod(diff, 3600)
        minutes, seconds = divmod(rem, 60)
        print(f"effnet-28-10-fl-150.pth took time: {int(hours)} hours, {int(minutes)} minutes, {int(seconds)} seconds")
    # Early stopping check
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        early_stop_counter = 0
#         torch.save(model.state_dict(), 'best_model_effnet.pth')  # Save checkpoint
    else:
        early_stop_counter += 1
        if early_stop_counter >= early_stop_patience:
            print("Early stopping triggered.")
            break


  scaler = GradScaler()  # Mixed precision
  with autocast():  # Mixed precision


Epoch 1/150, Loss: 4.423204236137219, accuracy: 0.00%
Epoch 2/150, Loss: 3.7557099040704793, accuracy: 0.00%
Epoch 3/150, Loss: 3.336494390956891, accuracy: 0.00%
Epoch 4/150, Loss: 2.8895268988685485, accuracy: 0.00%
Epoch 5/150, Loss: 2.494589251070358, accuracy: 0.00%
Epoch 6/150, Loss: 2.081440505890039, accuracy: 0.00%
Epoch 7/150, Loss: 1.6877026085655529, accuracy: 0.00%
Epoch 8/150, Loss: 1.3910857686600364, accuracy: 0.00%
Epoch 9/150, Loss: 1.1345461460348136, accuracy: 0.00%
Epoch 10/150, Loss: 0.8917794341858203, accuracy: 0.00%
Epoch 11/150, Loss: 0.683162379855165, accuracy: 0.00%
Epoch 12/150, Loss: 0.502106307032771, accuracy: 0.00%
Epoch 13/150, Loss: 0.3133092863491168, accuracy: 0.00%
Epoch 14/150, Loss: 0.20445464594295612, accuracy: 0.00%
Epoch 15/150, Loss: 0.1384005480871414, accuracy: 0.00%
Epoch 16/150, Loss: 0.0892670484848868, accuracy: 0.00%
Epoch 17/150, Loss: 0.06481840200841237, accuracy: 0.00%
Epoch 18/150, Loss: 0.0605084146209323, accuracy: 0.00%
Epoch