In [6]:
!pip install timm




In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import timm
from tqdm import tqdm



In [8]:
# Basic preprocessing (baseline)
transform_train = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10
trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train
)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test
)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=64, shuffle=True
)

testloader = torch.utils.data.DataLoader(
    testset, batch_size=64, shuffle=False
)

print("Dataset loaded successfully")


100%|██████████| 170M/170M [00:04<00:00, 42.5MB/s]


Dataset loaded successfully


In [9]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Load Vision Transformer
model = timm.create_model(
    'vit_base_patch16_224',
    pretrained=True,
    num_classes=10
)

model = model.to(device)

print("ViT model loaded")


Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

ViT model loaded


In [10]:
# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer with weight decay (regularization)
optimizer = optim.AdamW(
    model.parameters(),
    lr=3e-4,
    weight_decay=0.01
)

print("Loss and optimizer defined")



Loss and optimizer defined


In [11]:
def train_one_epoch():
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in tqdm(trainloader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_acc = 100 * correct / total
    avg_loss = total_loss / len(trainloader)

    return avg_loss, train_acc


In [12]:
def evaluate():
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    test_acc = 100 * correct / total
    return test_acc


In [15]:
# Data augmentation for training
transform_train_aug = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(224, padding=16),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test_aug = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset_aug = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=False, transform=transform_train_aug
)

testset_aug = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=False, transform=transform_test_aug
)

trainloader = torch.utils.data.DataLoader(
    trainset_aug, batch_size=64, shuffle=True
)

testloader = torch.utils.data.DataLoader(
    testset_aug, batch_size=64, shuffle=False
)

print("Augmented dataset loaded")


Augmented dataset loaded


In [16]:
# Faster Vision Transformer (for quick experiments)
model = timm.create_model(
    'vit_tiny_patch16_224',
    pretrained=True,
    num_classes=10
)

model = model.to(device)

# Optimizer again (new model → new optimizer)
optimizer = optim.AdamW(
    model.parameters(),
    lr=3e-4,
    weight_decay=0.01
)

print("Fast ViT-Tiny model loaded")


model.safetensors:   0%|          | 0.00/22.9M [00:00<?, ?B/s]

Fast ViT-Tiny model loaded


In [17]:
epochs = 2

for epoch in range(epochs):
    train_loss, train_acc = train_one_epoch()
    test_acc = evaluate()

    print(f"Epoch {epoch+1}/{epochs}")
    print(f"Train Loss: {train_loss:.4f}")
    print(f"Train Accuracy: {train_acc:.2f}%")
    print(f"Test Accuracy: {test_acc:.2f}%")
    print("-" * 30)


100%|██████████| 782/782 [04:24<00:00,  2.96it/s]


Epoch 1/2
Train Loss: 0.3223
Train Accuracy: 88.94%
Test Accuracy: 90.86%
------------------------------


100%|██████████| 782/782 [04:25<00:00,  2.94it/s]


Epoch 2/2
Train Loss: 0.2078
Train Accuracy: 92.96%
Test Accuracy: 92.48%
------------------------------
