In [None]:
import torch
import torchvision
import torchvision.transforms as transforms



normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std =[0.229, 0.224, 0.225],
)


trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=normalize)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=normalize)

100%|██████████| 170M/170M [00:02<00:00, 75.7MB/s]


In [None]:
!pip install tqdm
!pip install timm


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->timm)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->timm)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->timm)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch->tim

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from tqdm import tqdm


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

#  Data transforms and CIFAR-10 loading
transform = transforms.Compose([
    transforms.Resize(224),  # ViT requires 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Load ViT-B/16 pretrained model

# Tells code whether to use VIT_b (86M parameters) or the tiny ViT from timm (~6M parameters)
UsingVIT_b = False

if (UsingVIT_b):
    from torchvision.models import vit_b_16, ViT_B_16_Weights
    weights = ViT_B_16_Weights.DEFAULT
    model = vit_b_16(weights=weights)

else:
    import timm
    model = timm.create_model('deit_tiny_patch16_224', pretrained=True)

# Replace the classification head
if (UsingVIT_b):
    model.heads.head = nn.Linear(model.heads.head.in_features, 10)
else:
    model.head = nn.Linear(model.head.in_features, 10)
for param in model.parameters():
    param.requires_grad = False

if (UsingVIT_b):
    for param in model.heads.head.parameters():
        param.requires_grad = True
else:
    for param in model.head.parameters():
        param.requires_grad = True
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
if (UsingVIT_b):
    optimizer = optim.AdamW(model.heads.head.parameters(), lr=3e-4, weight_decay=0.01)
else:
    optimizer = optim.AdamW(model.head.parameters(), lr=3e-4, weight_decay=0.01)
for epoch in range(10):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}", leave=True)

    for inputs, labels in progress_bar:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        progress_bar.set_postfix({
            "loss": f"{running_loss/total:.4f}",
            "acc": f"{100.*correct/total:.2f}%"
        })

    print(f"Epoch [{epoch+1}], Loss: {running_loss/len(train_loader.dataset):.4f}, Accuracy: {100.*correct/total:.2f}%")

# Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

print(f"Test Accuracy: {100.*correct/total:.2f}%")


Using device: cuda


Epoch 1: 100%|██████████| 782/782 [01:32<00:00,  8.43it/s, loss=0.8635, acc=74.38%]


Epoch [1], Loss: 0.8635, Accuracy: 74.38%


Epoch 2: 100%|██████████| 782/782 [01:33<00:00,  8.40it/s, loss=0.4636, acc=85.03%]


Epoch [2], Loss: 0.4636, Accuracy: 85.03%


Epoch 3: 100%|██████████| 782/782 [01:33<00:00,  8.34it/s, loss=0.4141, acc=86.27%]


Epoch [3], Loss: 0.4141, Accuracy: 86.27%


Epoch 4: 100%|██████████| 782/782 [01:33<00:00,  8.33it/s, loss=0.3913, acc=86.98%]


Epoch [4], Loss: 0.3913, Accuracy: 86.98%


Epoch 5: 100%|██████████| 782/782 [01:33<00:00,  8.36it/s, loss=0.3781, acc=87.35%]


Epoch [5], Loss: 0.3781, Accuracy: 87.35%


Epoch 6: 100%|██████████| 782/782 [01:33<00:00,  8.35it/s, loss=0.3691, acc=87.55%]


Epoch [6], Loss: 0.3691, Accuracy: 87.55%


Epoch 7: 100%|██████████| 782/782 [01:34<00:00,  8.26it/s, loss=0.3628, acc=87.75%]


Epoch [7], Loss: 0.3628, Accuracy: 87.75%


Epoch 8: 100%|██████████| 782/782 [01:34<00:00,  8.28it/s, loss=0.3580, acc=87.95%]


Epoch [8], Loss: 0.3580, Accuracy: 87.95%


Epoch 9: 100%|██████████| 782/782 [01:33<00:00,  8.33it/s, loss=0.3540, acc=87.91%]


Epoch [9], Loss: 0.3540, Accuracy: 87.91%


Epoch 10: 100%|██████████| 782/782 [01:35<00:00,  8.21it/s, loss=0.3512, acc=88.01%]

Epoch [10], Loss: 0.3512, Accuracy: 88.01%





Test Accuracy: 87.17%


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# ResNet-18 Pretraining on CIFAR-100 (No Pretrained Weights)

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from tqdm import tqdm
import os

checkpoint_dir = '/content/drive/MyDrive/resnet18_checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_path = os.path.join(checkpoint_dir, 'resnet18_latest.pth')

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Data transforms and CIFAR-100 loading
transform = transforms.Compose([
    transforms.Resize(224),  # ResNet expects 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

train_dataset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Load ResNet-18 without pretrained weights
model = models.resnet18(weights=None)
model.fc = nn.Linear(model.fc.in_features, 100)  # CIFAR-100 has 100 classes

# Move model to device
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=0.01)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}", leave=True)

    for inputs, labels in progress_bar:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        progress_bar.set_postfix({
            "loss": f"{running_loss/total:.4f}",
            "acc": f"{100.*correct/total:.2f}%"
        })

    print(f"Epoch [{epoch+1}], Loss: {running_loss/len(train_loader.dataset):.4f}, Accuracy: {100.*correct/total:.2f}%")

    # Evaluation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    print(f"Test Accuracy: {100.*correct/total:.2f}%")

    # Save the model every 5 epochs
    if (epoch + 1) % 5 == 0:
        torch.save(model.state_dict(), checkpoint_path)
        print(f"Checkpoint saved at {checkpoint_path}")


Using device: cuda
Files already downloaded and verified
Files already downloaded and verified


Epoch 1: 100%|██████████| 782/782 [02:44<00:00,  4.76it/s, loss=3.4913, acc=16.55%]

Epoch [1], Loss: 3.4913, Accuracy: 16.55%





Test Accuracy: 24.34%


Epoch 2: 100%|██████████| 782/782 [02:43<00:00,  4.79it/s, loss=2.5692, acc=33.54%]

Epoch [2], Loss: 2.5692, Accuracy: 33.54%





Test Accuracy: 37.74%


Epoch 3: 100%|██████████| 782/782 [02:43<00:00,  4.78it/s, loss=2.0197, acc=45.28%]

Epoch [3], Loss: 2.0197, Accuracy: 45.28%





Test Accuracy: 45.41%


Epoch 4: 100%|██████████| 782/782 [02:43<00:00,  4.78it/s, loss=1.6620, acc=53.61%]

Epoch [4], Loss: 1.6620, Accuracy: 53.61%





Test Accuracy: 51.65%


Epoch 5: 100%|██████████| 782/782 [02:43<00:00,  4.77it/s, loss=1.3922, acc=60.33%]

Epoch [5], Loss: 1.3922, Accuracy: 60.33%





Test Accuracy: 54.42%
Checkpoint saved at /content/drive/MyDrive/resnet18_checkpoints/resnet18_latest.pth


Epoch 6: 100%|██████████| 782/782 [02:43<00:00,  4.77it/s, loss=1.1527, acc=66.42%]

Epoch [6], Loss: 1.1527, Accuracy: 66.42%





Test Accuracy: 52.32%


Epoch 7: 100%|██████████| 782/782 [02:45<00:00,  4.74it/s, loss=0.9269, acc=72.61%]

Epoch [7], Loss: 0.9269, Accuracy: 72.61%





Test Accuracy: 56.79%


Epoch 8: 100%|██████████| 782/782 [02:44<00:00,  4.74it/s, loss=0.7061, acc=78.98%]

Epoch [8], Loss: 0.7061, Accuracy: 78.98%





Test Accuracy: 53.04%


Epoch 9: 100%|██████████| 782/782 [02:47<00:00,  4.68it/s, loss=0.5064, acc=84.80%]

Epoch [9], Loss: 0.5064, Accuracy: 84.80%





Test Accuracy: 54.01%


Epoch 10: 100%|██████████| 782/782 [02:44<00:00,  4.74it/s, loss=0.3317, acc=90.48%]

Epoch [10], Loss: 0.3317, Accuracy: 90.48%





Test Accuracy: 56.84%
Checkpoint saved at /content/drive/MyDrive/resnet18_checkpoints/resnet18_latest.pth
