In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing Required Libraries

In [None]:
#import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.transforms import InterpolationMode
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import f1_score
import pandas as pd
import numpy as np
import os
from tqdm import tqdm

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data Augmentation & Preprocessing

In [None]:
#transformation for train dataset
transform_train = transforms.Compose([
    transforms.Resize([224], interpolation=InterpolationMode.BICUBIC),  
    transforms.RandomHorizontalFlip(p=0.5), #randomly flip image horizontally
    transforms.RandomVerticalFlip(p=0.5), #randomly flip vertically
    transforms.RandomRotation(degrees=30),  
    transforms.RandomCrop([224], padding=4),  
    transforms.ToTensor(),  #convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  
])


#tranformation for Test dataset
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Loading and Splitting the dataset

In [None]:
dataset = datasets.ImageFolder(root="/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/train", transform=transform_train)


In [None]:
train_size = int(0.8 * len(dataset))  # 80% train, 20% val
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])


In [None]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True,num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False,num_workers=4, pin_memory=True)


# Visualizing the image

In [None]:
# Unnormalize an image
import matplotlib.pyplot as plt

def unnormalize(img):
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    img = img.numpy().transpose((1, 2, 0))  
    img = img * std + mean 
    img = np.clip(img, 0, 1)  
    return img

# getting batch of training data
image, label = next(iter(train_loader))

# Plotting the first image
plt.imshow(unnormalize(image[0]))
plt.title(f"Label: {label[0].item()}")
plt.show()

In [None]:
class_counts = np.bincount([label for _, label in dataset.samples])
class_weights = torch.tensor(class_counts.max() / class_counts, dtype=torch.float).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights, label_smoothing=0.1)

# Defining and Modifying the Model

In [None]:
#pre-trained Vision Transformer (ViT) model
model = models.vit_h_14(
    weights = models.ViT_H_14_Weights.IMAGENET1K_SWAG_LINEAR_V1
)

model = model.to(device)



In [None]:
# Freezing all layers except the classification head
for param in model.parameters():
    param.requires_grad = False


in_features = model.heads.head.in_features  

# Modifying the classification head for 10 output classes
model.heads = nn.Sequential(
    nn.Linear(in_features, 128, bias=True),
    nn.LayerNorm(128), 
    nn.GELU(),
    nn.Dropout(0.25),
    nn.Linear(128, 10, bias=True)
)

#training for classifiction head
for param in model.heads.parameters():
    param.requires_grad = True

# print(model)

In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)

# Training the Model

In [None]:
from torch.cuda.amp import GradScaler, autocast #for mixed precision training

def train_model(model, train_loader, val_loader, epochs=10):
    best_f1 = 0 #tracking f1 score
    optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)
    scaler = GradScaler()  
    model.to(device)  
    criterion.to(device)  

    for epoch in range(epochs):
        model.train()
        train_loss, correct, total = 0, 0, 0 #initialise training metric 
        
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)  

            optimizer.zero_grad()
            with torch.amp.autocast(device_type='cuda'):  
                outputs = model(images)
                loss = criterion(outputs, labels)  

            scaler.scale(loss).backward() 
            scaler.step(optimizer)
            scaler.update() #adjust scaler for next iter

            train_loss += loss.item() #accumulate loss
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_acc = correct / total
        val_acc, f1 = evaluate(model, val_loader)

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {train_loss/len(train_loader):.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, F1 Score: {f1:.4f}")

        # Save best model based on F1 score
        if f1 > best_f1:
            best_f1 = f1
            torch.save(model.state_dict(), "best_model_f1.pth")

        scheduler.step()  


# Evaluating the Model

In [None]:
def evaluate(model, loader):
    model.eval()
    correct, total = 0, 0 #initialise for accuracy 
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device) 
            outputs = model(images) #get model prediction
            _, preds = torch.max(outputs, 1) #convert raw score to class prediction

            correct += (preds == labels).sum().item()
            total += labels.size(0)
            all_preds.extend(preds.cpu().numpy())  
            all_labels.extend(labels.cpu().numpy())

    f1 = f1_score(all_labels, all_preds, average="macro")
    return correct / total, f1

In [None]:
train_model(model, train_loader, val_loader, epochs=10)
model.load_state_dict(torch.load("best_model_f1.pth"))  


# Loading and Processing the Test Dataset 

In [None]:
test_dir = "/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test"
test_images = [f for f in os.listdir(test_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]


In [None]:

class TestDataset(torch.utils.data.Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.image_names = sorted(os.listdir(folder_path)) 
        self.transform = transform

    def __len__(self):
        return len(self.image_names) # Return total number of images

    def __getitem__(self, idx):
        img_path = os.path.join(self.folder_path, self.image_names[idx])
        image = datasets.folder.default_loader(img_path) 
        if self.transform:
            image = self.transform(image)
        return image, self.image_names[idx]

In [None]:
test_dataset = TestDataset("/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test", transform=transform_test)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


# Predicting on Test Dataset 

In [None]:
def predict_test(model, test_loader):
    model.eval()
    predictions = []

    with torch.no_grad():
        for images, filenames in tqdm(test_loader): # Iterate over test data
            images = images.to(device)
            outputs = model(images)  # Get model predictions
            _, preds = torch.max(outputs, 1)
            predictions.extend(zip(filenames, preds.cpu().numpy()))

    return predictions

# Saving the Submission

In [None]:
def save_submission(predictions, image_names):
   
    if isinstance(predictions[0], tuple):  
        predictions = [pred[1] for pred in predictions]  

    df = pd.DataFrame({"Image_ID": [img.split('.')[0] for img in image_names], "Label": predictions})
    df.to_csv("21F3000728.csv", index=False)


predictions = predict_test(model, test_loader)
save_submission(predictions, test_dataset.image_names)
