In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Imorting libraries

In [None]:
import torch
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from torchvision.transforms import InterpolationMode
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchvision.models import vit_h_14
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
from torch.cuda.amp import GradScaler, autocast

In [None]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
# warnings.filterwarnings("ignore")


Defining Transformation
- train_transform
- val_transform

In [None]:
# Train transformation
train_transform = transforms.Compose([
    transforms.Resize([224], interpolation=InterpolationMode.BICUBIC),  # 384x384
    transforms.RandomHorizontalFlip(p=0.5),  
    transforms.RandomVerticalFlip(p=0.5),  
    transforms.RandomRotation(degrees=30),  
    transforms.RandomCrop([224], padding=4),  
    transforms.ColorJitter(
    brightness=0.5,  
    contrast=0.5,    
    saturation=0.5,  
    hue=0.1          
    ),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

# Validation transformation
val_transform = transforms.Compose([
    transforms.Resize([224], interpolation=InterpolationMode.BICUBIC), 
    transforms.CenterCrop([224]), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

Dataset path

In [None]:
dataset_path = "/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/"
entire_dataset = datasets.ImageFolder(dataset_path+'train', transform = train_transform)


Spliting the data into validation and traning set .

In [None]:
train_size_fraction = 0.99
train_size = int(train_size_fraction * len(entire_dataset))
val_size = len(entire_dataset) - train_size
train_dataset , val_dataset = random_split(entire_dataset , [train_size , val_size])


In [None]:
import os
num_cpus = os.cpu_count()
num_cpus

Data loader with batch size = 16

In [None]:
train_dataloader = DataLoader(train_dataset , batch_size=16 , shuffle = True , num_workers=num_cpus )
val_dataloader = DataLoader(val_dataset , batch_size=16 , shuffle = False , num_workers=num_cpus )

Model Initialization

In [None]:
model = models.vit_h_14(weights = models.ViT_H_14_Weights.IMAGENET1K_SWAG_LINEAR_V1)

In [None]:
for param in model.parameters():
    param.requires_grad = False
model.heads = nn.Sequential(
    nn.Linear(in_features=1280 , out_features = 128, bias = True ),
    nn.BatchNorm1d(128),
    nn.GELU(),
    nn.Dropout(0.25),
    nn.Linear( in_features = 128 , out_features = 10 , bias = True )
    )
for param in model.heads.parameters():
    param.requires_grad = True

for param in model.encoder.layers.encoder_layer_31.parameters():
    param.requires_grad = True

In [None]:
if torch.cuda.device_count() > 1:
    print(f"Using(GPU's) {torch.cuda.device_count()} GPUs!")
    model = nn.DataParallel(model)  # Wraping model for multi-GPU support

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Initialization loss function and optimizer

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.NAdam(model.parameters(), lr=0.01)

Evaluation and Model Traning

In [None]:
def evaluate_model(model, dataloader, device):
    model.eval() 
    all_preds = []
    all_labels = []
    all_probs = []

    with torch.no_grad():  # Disable gradient computation for efficiency
        for inputs, labels in tqdm(dataloader, desc="Validating Model", total=len(dataloader)):
            # Move inputs and labels to the specified device
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(inputs)
            
            # Get predicted class indices
            preds = torch.argmax(outputs, dim=1)
            
            # Get probabilities (for AUC-ROC in multiclass)
            probs = torch.softmax(outputs, dim=1)  # Keep all class probabilities

            # Accumulate predictions, labels, and probabilities
            all_preds.append(preds.cpu())
            all_labels.append(labels.cpu())
            all_probs.append(probs.cpu())

    # Concatenate all predictions, labels, and probabilities into single tensors
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)
    all_probs = torch.cat(all_probs)

    # Convert to numpy arrays for sklearn
    all_preds_np = all_preds.numpy()
    all_labels_np = all_labels.numpy()
    all_probs_np = all_probs.numpy()

    # Calculate metrics
    accuracy = accuracy_score(all_labels_np, all_preds_np)
    f1 = f1_score(all_labels_np, all_preds_np, average="macro")
    precision = precision_score(all_labels_np, all_preds_np, average="macro")
    recall = recall_score(all_labels_np, all_preds_np, average="macro")

    # ✅ Updated AUC-ROC Calculation (For Multiclass)
    auc_roc = roc_auc_score(all_labels_np, all_probs_np, multi_class='ovr', average='macro')

    metrics = {
        "accuracy": accuracy,
        "f1_score": f1,
        "precision": precision,
        "recall": recall,
        "auc_roc": auc_roc
    }

    return metrics


In [None]:
num_epochs = 5
scaler = GradScaler()
torch.cuda.empty_cache()

# Directory to save checkpoints
checkpoint_dir = "./checkpoints"
os.makedirs(checkpoint_dir, exist_ok=True)

best_accuracy = 0.0  # Track best accuracy

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    train_loss = 0
    with tqdm(train_dataloader, desc=f"Epoch [{epoch+1}/{num_epochs}]", unit="batch") as pbar:
        for images, labels in pbar:
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass with mixed precision
            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)

            # Backward pass with scaled gradients
            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            
            train_loss += loss.item()
            pbar.set_postfix({"loss": f"{train_loss/len(train_dataloader):.4f}"})
    
    # Evaluate the model after each epoch
    metrics = evaluate_model(model, val_dataloader, device)
    accuracy = metrics["accuracy"]
    f1 = metrics["f1_score"]
    auc_roc = metrics["auc_roc"]
    print(f"Epoch {epoch+1} - Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f} , AUC_ROC: {auc_roc:.4f}")

    # Save best model based on accuracy
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_checkpoint_path = os.path.join(checkpoint_dir, "best_checkpoint.pth")
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scaler_state_dict': scaler.state_dict(),
            'loss': train_loss / len(train_dataloader),
            'accuracy': accuracy,
            'f1_score': f1,
        }, best_checkpoint_path)
        print(f"Best model saved with accuracy: {best_accuracy:.4f} at {best_checkpoint_path}")

print("Training completed.")
print(f"Best accuracy achieved: {best_accuracy:.4f}")


Predicting in test dataset and saving the submission.csv

In [None]:
from torchvision.io import read_image
import pandas as pd
from PIL import Image
from tqdm import tqdm
import os
import torch

def classify_images_to_csv(image_folder, model, transform, output_csv):
    
    # Ensure the model is in evaluation mode
    model.eval()

    # Prepare a list to store results
    results = []

    # List all image files
    image_files = [f for f in os.listdir(image_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    total = len(image_files)

    # Process each image in the folder with tqdm progress bar
    for image_name in tqdm(image_files, desc="Processing Images", total=total):
        # Read and preprocess the image
        image_path = os.path.join(image_folder, image_name)
        image = Image.open(image_path).convert("RGB")
        image = transform(image).unsqueeze(0)  # Add batch dimension

        # Perform inference
        with torch.no_grad():
            outputs = model(image)  # Get raw outputs
            probabilities = torch.softmax(outputs, dim=1)  # Apply softmax
            label = torch.argmax(probabilities, dim=1).item()  # Get the predicted label

        # Store the result
        results.append({
            "Image_ID": image_name.split('.')[0],
            "Label": label
        })

    # Save results to a CSV file
    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

In [None]:
# Load the best checkpoint
best_checkpoint_path = "./checkpoints/best_checkpoint.pth"
checkpoint = torch.load(best_checkpoint_path, map_location=device)

# Load model state dict
model.load_state_dict(checkpoint["model_state_dict"])
model.to(device)
model.eval()
classify_images_to_csv('/kaggle/input/deep-learning-practice-week-9-image-c-lassifica/test',model,val_transform,"/kaggle/working/submission.csv")