# Importing Required Libraries

## PyTorch and Torchvision Libraries

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision.models import vit_b_16
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import autocast, GradScaler

## Other Libraries

In [None]:
from PIL import Image
import os

# Transformation Function for Images

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


# Loading the Dataset

In [None]:
train_dataset = datasets.ImageFolder(root="/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/train", transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)

In [None]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader

class TestDataset(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.image_paths = [os.path.join(root, img) for img in os.listdir(root)]
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = Image.open(img_path).convert("RGB") 
        if self.transform:
            img = self.transform(img)
        filename = os.path.basename(img_path)  
        return img, filename 

test_dataset = TestDataset(root="/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test", transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)


# Loading the Models (VIT L, VIT B, Swin T)

In [None]:
from torchvision.models import vit_l_16 

In [None]:
from torchvision.models import swin_t 

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = swin_t(pretrained=True)
num_classes = len(train_dataset.classes)  # Auto-detect number of classes
# model.heads.head = nn.Linear(model.hidden_dim, num_classes)
model.head = nn.Linear(model.head.in_features, num_classes)

# Freeze All Layers Initially
for param in model.parameters():
    param.requires_grad = False

# Unfreeze only the classifier head
for param in model.head.parameters():
    param.requires_grad = True

model.to(device)


# Setting up the Device

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

scaler = GradScaler()


  scaler = GradScaler()  # Mixed Precision Training


# Training Function

In [None]:
def train(model, train_loader, optimizer, criterion, scaler, scheduler, num_epochs=8):
    model.train()
    for epoch in range(num_epochs):
        total_loss, correct, total = 0, 0, 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()

            with autocast():  
                outputs = model(inputs)
                loss = criterion(outputs, targets)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        scheduler.step()
        acc = 100. * correct / total
        print(f"Epoch {epoch+1}: Loss = {total_loss:.4f}, Accuracy = {acc:.2f}%")



# Prediction Function

In [None]:
def predict(model, test_loader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for inputs, filenames in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1) 

            for i in range(len(filenames)):
                predictions.append((filenames[i], predicted[i].item()))
    
    return predictions


# Training the Model

In [None]:
train(model, train_loader, optimizer, criterion, scaler, scheduler, num_epochs=8)


  with autocast():  # Mixed Precision Training


Epoch 1: Loss = 387.7096, Accuracy = 64.69%
Epoch 2: Loss = 258.5520, Accuracy = 74.02%
Epoch 3: Loss = 236.6052, Accuracy = 75.70%
Epoch 4: Loss = 223.9331, Accuracy = 77.03%
Epoch 5: Loss = 216.3614, Accuracy = 77.87%
Epoch 6: Loss = 212.9473, Accuracy = 77.97%
Epoch 7: Loss = 210.6648, Accuracy = 78.25%
Epoch 8: Loss = 207.5805, Accuracy = 78.74%


# Finding the Predictions

In [None]:
test_predictions = predict(model, test_loader)

In [None]:
for filename, label in test_predictions[:5]:
    print(f"{filename} -> Predicted Class: {label}")

Image_0612.jpg -> Predicted Class: 5
Image_0946.jpg -> Predicted Class: 4
Image_0755.jpg -> Predicted Class: 8
Image_0178.jpg -> Predicted Class: 2
Image_1593.jpg -> Predicted Class: 5


# Understanding the Submission File

In [None]:
import pandas as pd

sample = pd.read_csv("/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/sample_submission.csv")

In [None]:
sample

Unnamed: 0,Image_ID,Label
0,Image_0001,5
1,Image_0002,0
2,Image_0003,0
3,Image_0004,4
4,Image_0005,7
...,...,...
1995,Image_1996,3
1996,Image_1997,0
1997,Image_1998,6
1998,Image_1999,7


# Prepare the Submission File

In [None]:
df_submission = pd.DataFrame(test_predictions, columns=['Image_ID', 'Label'])
df_submission['Image_ID'] = df_submission['Image_ID'].apply(lambda x: x.split('.')[0])

In [None]:
df_submission

Unnamed: 0,Image_ID,Label
0,Image_0612,5
1,Image_0946,4
2,Image_0755,8
3,Image_0178,2
4,Image_1593,5
...,...,...
1995,Image_0955,5
1996,Image_0576,4
1997,Image_0356,2
1998,Image_1287,0


In [None]:
df_submission.to_csv('submission.csv', index=False)