#### Transfer Learning

It is a concept of training a model, instead of train the model from scratch,
- We take already pre trained model (eg. Resnet, VGG),
- Reuse its convolutional layers (they already learned to detect edges, textures, shapes, objects),
- Replace the last Classification layer with one that matches your dataset classes

#### Transfer Learning on CIFAR10 with ResNet18

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader


# Device 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
print("Using :", device)

# Transforms 
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Cifar dataset
train_dataset = datasets.CIFAR10('./cifar10', train=True, transform=transform, download=True)
test_dataset = datasets.CIFAR10('./cifar10', train=False, transform=transform, download=True)

# Dataloader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=12)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=12)

Using : cuda
Files already downloaded and verified
Files already downloaded and verified


In [2]:
# Load Pretrained ResNet1
model = models.resnet18(pretrained=True)

# Freeze all layers (feature extraction)
for param in model.parameters():
    param.require_grad = False

# Replace the final FC layer (ImageNet = 1000 classes -> CIFAR10 = 10 classes)
model.fc = nn.Linear(model.fc.in_features, 10)

# move to device
model = model.to(device)



In [3]:
# Loss and Optimizer 
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)  # Only train final layer

In [4]:
# Training loop
epochs = 5
for epoch in range(epochs):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for batch_idx, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        # Print progress every 100 batches
        if (batch_idx + 1) % 100 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_idx+1}/{len(train_loader)}], "
                  f"Loss: {loss.item():.4f}")


    acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}] Finished → "
          f"Avg Loss: {running_loss/len(train_loader):.4f}, Train Acc: {acc:.2f}%")

Epoch [1/5], Batch [100/782], Loss: 1.0102
Epoch [1/5], Batch [200/782], Loss: 0.8349
Epoch [1/5], Batch [300/782], Loss: 0.7863
Epoch [1/5], Batch [400/782], Loss: 0.8267
Epoch [1/5], Batch [500/782], Loss: 0.6222
Epoch [1/5], Batch [600/782], Loss: 0.5850
Epoch [1/5], Batch [700/782], Loss: 0.4025
Epoch [1/5] Finished → Avg Loss: 0.8170, Train Acc: 73.63%
Epoch [2/5], Batch [100/782], Loss: 0.4149
Epoch [2/5], Batch [200/782], Loss: 0.6785
Epoch [2/5], Batch [300/782], Loss: 1.0982
Epoch [2/5], Batch [400/782], Loss: 0.4547
Epoch [2/5], Batch [500/782], Loss: 0.5846
Epoch [2/5], Batch [600/782], Loss: 0.6912
Epoch [2/5], Batch [700/782], Loss: 0.7310
Epoch [2/5] Finished → Avg Loss: 0.6164, Train Acc: 78.86%
Epoch [3/5], Batch [100/782], Loss: 0.6712
Epoch [3/5], Batch [200/782], Loss: 0.6266
Epoch [3/5], Batch [300/782], Loss: 0.5086
Epoch [3/5], Batch [400/782], Loss: 0.4583
Epoch [3/5], Batch [500/782], Loss: 0.5126
Epoch [3/5], Batch [600/782], Loss: 0.4507
Epoch [3/5], Batch [70

In [6]:
# Testing 
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
print(f"Test Accuracy: {100 * correct / total:.2f}%")  

Test Accuracy: 80.52%


In [7]:
# Save the trained model 

torch.save(model.state_dict(), "resnet_cifar10.pth")

# Load (make sure model architecture is same!)
model = models.resnet18(weights=None)   # initialize same model
model.fc = nn.Linear(model.fc.in_features, 10)  # replace last layer again
model.load_state_dict(torch.load("resnet_cifar10.pth"))
model.to(device)

  model.load_state_dict(torch.load("resnet_cifar10.pth"))


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  