In [1]:
import torch
from torch import nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from torch.utils.data import random_split
import torch.optim as optim

import os
import pandas as pd

In [2]:
image_size = 224

num_classes = 50
channels = 3

In [3]:
batch_size = 128
learning_rate = 0.001
num_epochs = 30

print_interval = 10
save_model = True

validation_ratio = 0.15

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transforms_train = transforms.Compose(
    [
        transforms.Resize((image_size, image_size)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(degrees=10),
        transforms.RandomResizedCrop(image_size),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ]
)

transforms_test = transforms.Compose(
    [
        transforms.Resize((image_size, image_size)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ]
)


train_dataset = datasets.ImageFolder('/kaggle/input/iith-dl-contest-2024/train/train/', transform=transforms_train)
test_dataset = datasets.ImageFolder('/kaggle/input/iith-dl-contest-2024/test/', transform=transforms_test)

train_dataset, validation_dataset = random_split(train_dataset, [len(train_dataset) - int(validation_ratio * len(train_dataset)), int(validation_ratio * len(train_dataset))])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)


In [5]:
import torch
import torch.nn as  nn
import torch.nn.functional as F


class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, i_downsample=None, stride=1):
        super(Bottleneck, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)

        self.conv3 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=1, stride=1, padding=0)
        self.batch_norm3 = nn.BatchNorm2d(out_channels*self.expansion)

        self.i_downsample = i_downsample
        self.stride = stride
        self.relu = nn.ReLU()

    def forward(self, x):
        identity = x.clone()
        x = self.relu(self.batch_norm1(self.conv1(x)))

        x = self.relu(self.batch_norm2(self.conv2(x)))

        x = self.conv3(x)
        x = self.batch_norm3(x)

        #downsample if needed
        if self.i_downsample is not None:
            identity = self.i_downsample(identity)
        #add identity
        x+=identity
        x=self.relu(x)

        return x

class ResNet(nn.Module):
    def __init__(self, ResBlock, layer_list, num_classes, num_channels=3):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(num_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool2d(kernel_size = 3, stride=2, padding=1)

        self.layer1 = self._make_layer(ResBlock, layer_list[0], planes=64)
        self.layer2 = self._make_layer(ResBlock, layer_list[1], planes=128, stride=2)
        self.layer3 = self._make_layer(ResBlock, layer_list[2], planes=256, stride=2)
        self.layer4 = self._make_layer(ResBlock, layer_list[3], planes=512, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512*ResBlock.expansion, num_classes)

    def forward(self, x):
        x = self.relu(self.batch_norm1(self.conv1(x)))
        x = self.max_pool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x

    def _make_layer(self, ResBlock, blocks, planes, stride=1):
        ii_downsample = None
        layers = []

        if stride != 1 or self.in_channels != planes*ResBlock.expansion:
            ii_downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, planes*ResBlock.expansion, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes*ResBlock.expansion)
            )

        layers.append(ResBlock(self.in_channels, planes, i_downsample=ii_downsample, stride=stride))
        self.in_channels = planes*ResBlock.expansion

        for i in range(blocks-1):
            layers.append(ResBlock(self.in_channels, planes))

        return nn.Sequential(*layers)



def ResNet50(num_classes, channels=3):
    return ResNet(Bottleneck, [3,4,6,3], num_classes, channels)

def ResNet101(num_classes, channels=3):
    return ResNet(Bottleneck, [3,4,23,3], num_classes, channels)

def ResNet152(num_classes, channels=3):
    return ResNet(Bottleneck, [3,8,36,3], num_classes, channels)

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
vit = ResNet50(50)
vit.load_state_dict(torch.load('/kaggle/input/resnet-30-epochs/vit.pth'))
vit.to(device)

loss_func = nn.CrossEntropyLoss()
adam = optim.SGD(vit.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(adam, factor = 0.1, patience=3)

In [7]:
for epoch in range(num_epochs):
    vit.train()
    epoch_loss = 0.0
    num_correct_predictions = 0
    total_samples = 0

    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        adam.zero_grad()
        outputs = vit(images)
        loss = loss_func(outputs, labels)
        loss.backward()
        adam.step()

        epoch_loss += loss.item()

        max_val, predicted = torch.max(outputs, 1)
        num_correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)

        if i % print_interval == 0:
            average_loss = epoch_loss / (i + 1)
            accuracy = num_correct_predictions / total_samples
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {average_loss}, Accuracy: {accuracy}')

    average_loss = epoch_loss / len(train_loader)
    accuracy = num_correct_predictions / total_samples
    print(f'Training Loss - Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss}, Accuracy: {accuracy}')

    vit.eval()
    validation_loss = 0.0
    validation_correct_predictions = 0
    validation_total_samples = 0

    with torch.no_grad():
        for images, labels in validation_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = vit(images)
            loss = loss_func(outputs, labels)

            validation_loss += loss.item()

            max_val, predicted = torch.max(outputs, 1)
            validation_correct_predictions += (predicted == labels).sum().item()
            validation_total_samples += labels.size(0)

    validation_average_loss = validation_loss / len(validation_loader)
    validation_accuracy = validation_correct_predictions / validation_total_samples
    print(f'Validation Loss - Epoch [{epoch+1}/{num_epochs}], Loss: {validation_average_loss}, Accuracy: {validation_accuracy}')
    
    scheduler.step(validation_average_loss)


Epoch [1/30], Step [1/432], Loss: 0.9949268102645874, Accuracy: 0.6796875
Epoch [1/30], Step [11/432], Loss: 1.0786901062185115, Accuracy: 0.6917613636363636
Epoch [1/30], Step [21/432], Loss: 1.0604775377682276, Accuracy: 0.6994047619047619
Epoch [1/30], Step [31/432], Loss: 1.0356915266283098, Accuracy: 0.7086693548387096
Epoch [1/30], Step [41/432], Loss: 1.0374557041540378, Accuracy: 0.7124618902439024
Epoch [1/30], Step [51/432], Loss: 1.0310498031915403, Accuracy: 0.7121629901960784
Epoch [1/30], Step [61/432], Loss: 1.0189785888937652, Accuracy: 0.7164446721311475
Epoch [1/30], Step [71/432], Loss: 1.0089991033916743, Accuracy: 0.7191901408450704
Epoch [1/30], Step [81/432], Loss: 0.998645407182199, Accuracy: 0.7217399691358025
Epoch [1/30], Step [91/432], Loss: 0.9922151585201641, Accuracy: 0.7224416208791209
Epoch [1/30], Step [101/432], Loss: 0.9777702382295439, Accuracy: 0.7263304455445545
Epoch [1/30], Step [111/432], Loss: 0.9663702330073795, Accuracy: 0.729518581081081
Ep

In [8]:
#Code to optionally Save the model for future use
if save_model:
    torch.save(vit.state_dict(), '/kaggle/working/vit-v2.pth')
    torch.save(adam.state_dict(), '/kaggle/working/sgd-v2.pth')

In [9]:
class_folder_names = sorted(os.listdir('/kaggle/input/iith-dl-contest-2024/train/train'))
id_to_class = {id : class_name for id, class_name in enumerate(class_folder_names)}

In [10]:
vit.eval()
vit_predictions = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = vit(images)
        max_val, predicted_classes = torch.max(outputs, 1)
        vit_predictions.extend(predicted_classes.tolist())

output_df = pd.DataFrame({'ID': sorted(os.listdir('/kaggle/input/iith-dl-contest-2024/test/test')),
                          'Category': [id_to_class[id] for id in vit_predictions] })

output_df.to_csv('/kaggle/working/submission.csv', index=False)