In [1]:
!pip install timm transformers --quiet


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import timm
import os
from PIL import Image
import numpy as np

# Config
BATCH_SIZE = 16
IMG_SIZE = 224
NUM_CLASSES = 200
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_EPOCHS = 20
LEARNING_RATE = 1e-4
WARMUP_EPOCHS = 3
PATIENCE = 4  # Early stopping patience

# Paths (update to your dataset location in Kaggle)
TRAIN_IMAGES_PATH = "/kaggle/input/dung166/Train/Train"
TRAIN_LABELS_PATH = "/kaggle/input/dung166/train.txt"
TEST_IMAGES_PATH = "/kaggle/input/dung166/Test/Test"
TEST_LABELS_PATH = "/kaggle/input/dung166/test.txt"

# Dataset
class CUB200Dataset(Dataset):
    def __init__(self, images_path, labels_path, transform=None):
        self.images_path = images_path
        self.transform = transform
        self.image_labels = []
        with open(labels_path, 'r') as file:
            for line in file.readlines():
                image_name, label = line.strip().split()
                self.image_labels.append((image_name, int(label)))

    def __len__(self):
        return len(self.image_labels)

    def __getitem__(self, idx):
        img_name, label = self.image_labels[idx]
        img_path = os.path.join(self.images_path, img_name)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# Transforms
transform_train = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

transform_test = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# Datasets and Loaders
train_dataset = CUB200Dataset(TRAIN_IMAGES_PATH, TRAIN_LABELS_PATH, transform=transform_train)
test_dataset = CUB200Dataset(TEST_IMAGES_PATH, TEST_LABELS_PATH, transform=transform_test)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# Model
model = timm.create_model('vit_base_patch8_224', pretrained=True, num_classes=NUM_CLASSES)
model = model.to(DEVICE)

# Loss, Optimizer, Scheduler
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS)

# Early Stopping Variables
best_top1 = 0.0
epochs_without_improvement = 0

# Training Function
def train(model, loader, epoch):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for images, labels in loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)

    return running_loss / len(loader), correct / total

# Evaluation Function
def evaluate(model, loader):
    model.eval()
    correct, total = 0, 0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, predicted = outputs.max(1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)

    top1_acc = correct / total

    # Average Class Accuracy
    all_preds, all_labels = np.array(all_preds), np.array(all_labels)
    class_accuracies = []
    for cls in range(NUM_CLASSES):
        cls_indices = np.where(all_labels == cls)[0]
        if len(cls_indices) > 0:
            cls_correct = (all_preds[cls_indices] == all_labels[cls_indices]).sum()
            class_accuracies.append(cls_correct / len(cls_indices))

    avg_class_acc = np.mean(class_accuracies)

    return top1_acc, avg_class_acc

# Warmup LR Adjuster
def adjust_learning_rate(optimizer, epoch):
    if epoch < WARMUP_EPOCHS:
        lr = LEARNING_RATE * (epoch + 1) / WARMUP_EPOCHS
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

# Training Loop with Early Stopping
for epoch in range(NUM_EPOCHS):
    adjust_learning_rate(optimizer, epoch)

    train_loss, train_acc = train(model, train_loader, epoch)
    top1_acc, avg_class_acc = evaluate(model, test_loader)

    scheduler.step()

    # Save best model & apply early stopping logic
    if top1_acc > best_top1:
        best_top1 = top1_acc
        torch.save(model.state_dict(), 'best_model.pth')
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1

    print(f"Epoch {epoch+1}/{NUM_EPOCHS} - "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
          f"Top-1 Acc: {top1_acc:.4f}, Avg Class Acc: {avg_class_acc:.4f}")

    if epochs_without_improvement >= PATIENCE:
        print(f"Early stopping triggered after {epoch+1} epochs!")
        break

# Final Evaluation
model.load_state_dict(torch.load('best_model.pth'))
top1_acc, avg_class_acc = evaluate(model, test_loader)
print(f"\nFinal Top-1 Accuracy: {top1_acc:.4f}")
print(f"Final Average Per-Class Accuracy: {avg_class_acc:.4f}")


Epoch 1/20 - Train Loss: 3.4103, Train Acc: 0.3835, Top-1 Acc: 0.7550, Avg Class Acc: 0.7480
Epoch 2/20 - Train Loss: 1.5637, Train Acc: 0.8142, Top-1 Acc: 0.7674, Avg Class Acc: 0.7634
Epoch 3/20 - Train Loss: 1.3308, Train Acc: 0.8969, Top-1 Acc: 0.7774, Avg Class Acc: 0.7735
Epoch 4/20 - Train Loss: 1.1380, Train Acc: 0.9586, Top-1 Acc: 0.7998, Avg Class Acc: 0.7960
Epoch 5/20 - Train Loss: 1.0213, Train Acc: 0.9872, Top-1 Acc: 0.8339, Avg Class Acc: 0.8322
Epoch 6/20 - Train Loss: 0.9604, Train Acc: 0.9957, Top-1 Acc: 0.8713, Avg Class Acc: 0.8691
Epoch 7/20 - Train Loss: 0.9288, Train Acc: 0.9977, Top-1 Acc: 0.8729, Avg Class Acc: 0.8726
Epoch 8/20 - Train Loss: 0.9135, Train Acc: 0.9986, Top-1 Acc: 0.8904, Avg Class Acc: 0.8893
Epoch 9/20 - Train Loss: 0.9022, Train Acc: 0.9990, Top-1 Acc: 0.8987, Avg Class Acc: 0.8960
Epoch 10/20 - Train Loss: 0.8938, Train Acc: 0.9988, Top-1 Acc: 0.8929, Avg Class Acc: 0.8905
Epoch 11/20 - Train Loss: 0.8872, Train Acc: 0.9992, Top-1 Acc: 0.893

  model.load_state_dict(torch.load('best_model.pth'))



Final Top-1 Accuracy: 0.8987
Final Average Per-Class Accuracy: 0.8960
