In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
import os
import pandas as pd

In [2]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        
        if self.downsample is not None:
            identity = self.downsample(x)
        
        out += identity
        out = self.relu(out)
        
        return out

class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self.make_layer(block, 64, layers[0])
        self.layer2 = self.make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self.make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self.make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        
        return x

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_path = '/kaggle/input/iith-dl-contest-2024/train/train/'
test_path = '/kaggle/input/iith-dl-contest-2024/test/'
image_size = 224
batch_size = 256
validation_ratio = 0.15
num_classes = 50
num_epochs = 1
learning_rate = 3e-2
print_interval = 25
save_model = False

In [4]:
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(image_size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [5]:
train_dataset = ImageFolder(root=train_path, transform=transform_train)
test_dataset = ImageFolder(root=test_path, transform=transform_test)

train_size = int((1 - validation_ratio) * len(train_dataset))
validation_size = len(train_dataset) - train_size
train_dataset, validation_dataset = random_split(train_dataset, [train_size, validation_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

model = ResNet(ResidualBlock, [2, 2, 2, 2], num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [6]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        if i % print_interval == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {running_loss / (i + 1)}, Accuracy: {100 * correct / total}')
    
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    
    with torch.no_grad():
        for inputs, labels in validation_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            val_total += labels.size(0)
            val_correct += predicted.eq(labels).sum().item()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {running_loss / len(train_loader)}, Training Accuracy: {100 * correct / total}, Validation Loss: {val_loss / len(validation_loader)}, Validation Accuracy: {100 * val_correct / val_total}')

model.eval()
test_correct = 0
test_total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = outputs.max(1)
        test_total += labels.size(0)
        test_correct += predicted.eq(labels).sum().item()

print(f'Test Accuracy: {test_correct / test_total}')

Epoch [1/1], Step [1/216], Loss: 4.011035442352295, Accuracy: 1.953125
Epoch [1/1], Step [26/216], Loss: 5.51279799754803, Accuracy: 2.509014423076923
Epoch [1/1], Step [51/216], Loss: 4.733001021777882, Accuracy: 2.6501225490196076
Epoch [1/1], Step [76/216], Loss: 4.4450432281745105, Accuracy: 2.909128289473684
Epoch [1/1], Step [101/216], Loss: 4.2833395995716055, Accuracy: 3.190748762376238
Epoch [1/1], Step [126/216], Loss: 4.173197748169066, Accuracy: 3.779141865079365
Epoch [1/1], Step [151/216], Loss: 4.095979125294464, Accuracy: 4.133899006622516
Epoch [1/1], Step [176/216], Loss: 4.033942291682417, Accuracy: 4.487748579545454
Epoch [1/1], Step [201/216], Loss: 3.979960701358852, Accuracy: 4.912935323383085
Epoch [1/1], Training Loss: 3.9472913344701133, Training Accuracy: 5.218099547511312, Validation Loss: 3.581664256560497, Validation Accuracy: 8.246153846153845
Test Accuracy: 0.024109888964187042


In [7]:
if save_model:
    torch.save(model.state_dict(), '/kaggle/working/vit.pth')
    
class_folder_names = sorted(os.listdir('/kaggle/input/iith-dl-contest-2024/train/train'))
id_to_class = {id : class_name for id, class_name in enumerate(class_folder_names)}

In [8]:
model.eval()
resnet_predictions = []

with torch.no_grad():
    for images, _ in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted_classes = torch.max(outputs, 1)
        resnet_predictions.extend(predicted_classes.cpu().tolist())

output_df_resnet = pd.DataFrame({'ID': sorted(os.listdir('/kaggle/input/iith-dl-contest-2024/test/test')),
                                 'Category': [id_to_class[id] for id in resnet_predictions] })

output_df_resnet.to_csv('/kaggle/working/submission.csv', index=False)