In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms

# Set up data transformations (resize, normalize)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load CIFAR-10 dataset. The CIFAR-10 dataset consists of 60000 32x32 colour
# images in 10 classes, with 6000 images per class. There are 50000 training
# images and 10000 test images.
train_data = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

test_data = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)



Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:03<00:00, 46.6MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [6]:
print("len(train_data)=",len(train_data))
print("len(train_loader)=",len(train_loader))
print("len(test_data)=",len(test_data))
print("len(test_loader)=",len(test_loader))


len(train_data)= 50000
len(train_loader)= 1563
len(test_data)= 10000
len(test_loader)= 313


In [2]:
# We’ll use ResNet-18, a pre-trained model in PyTorch, and modify the last
# layer for the CIFAR-10 dataset (10 classes instead of 1,000 classes in the
# original model).

# Load the pre-trained ResNet-18 model
model = models.resnet18(pretrained=True)

# Modify the final fully connected layer to output 10 classes
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 10)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 99.9MB/s]


In [3]:
# Define a loss function and an optimizer. For fine-tuning, often a lower
# learning rate is used since we’re making smaller adjustments to the model.
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [4]:
# We’ll freeze some layers in the model to retain learned features and
# fine-tune only a few layers.

# Freeze all layers except the last layer
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the last layer
for param in model.fc.parameters():
    param.requires_grad = True

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")


Epoch [1/5], Loss: 0.7993
Epoch [2/5], Loss: 0.6409
Epoch [3/5], Loss: 0.6212
Epoch [4/5], Loss: 0.6060
Epoch [5/5], Loss: 0.6029


In [5]:
# Set the model to evaluation mode
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the model on the test images: {100 * correct / total:.2f}%')


Accuracy of the model on the test images: 79.96%


Before fine-tuning, the model's accuracy on the CIFAR-10 dataset would likely be quite low, often near random guessing, which would yield around 10% accuracy (since CIFAR-10 has 10 classes and a randomly chosen label would match the true label 10% of the time on average).

This low accuracy is due to the following reasons:

- Class Mismatch: The ResNet-18 model is pre-trained on the ImageNet dataset, which has 1,000 classes (e.g., different dog breeds, vehicles, etc.) that do not directly relate to CIFAR-10’s 10 classes. Thus, its learned features are not initially tuned for the CIFAR-10 class types.

- Output Layer: The model's output layer originally has 1,000 neurons for ImageNet's 1,000 classes, while CIFAR-10 only has 10 classes. Without modifying this layer, the model's output will be mismatched and cannot classify CIFAR-10 classes correctly.

- Image Resolution Difference: CIFAR-10 images are very small (32x32 pixels) compared to the ImageNet images (224x224 pixels). Although we resize CIFAR-10 images to 224x224 to match the ResNet input, the small size and low resolution might reduce the effectiveness of the model's pre-learned features.

**Measuring Initial Accuracy (Optional)**

If you wanted to see the model’s initial performance on CIFAR-10 (before fine-tuning), you could load the model as pre-trained, skip the .train() and optimization steps, and run the evaluation code only on the CIFAR-10 test set. This would give you a direct measure of the pre-trained model’s accuracy on the CIFAR-10 dataset without any additional training, which should indeed be around 10-20% accuracy. The code to do this is shown below:

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms

# Set up data transformations (resize, normalize to match ImageNet)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load CIFAR-10 test dataset
test_data = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

# Load the pre-trained ResNet-18 model
model = models.resnet18(pretrained=True)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Set the model to evaluation mode
model.eval()

# Initialize variables to track accuracy
correct = 0
total = 0

# Disable gradient calculations for faster evaluation
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass: Get model predictions
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)  # Get the predicted class

        # Update total and correct counts
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate and print accuracy
initial_accuracy = 100 * correct / total
print(f'Initial accuracy of the pre-trained model on CIFAR-10 (before fine-tuning): {initial_accuracy:.2f}%')


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:13<00:00, 13.1MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 91.0MB/s]


Initial accuracy of the pre-trained model on CIFAR-10 (before fine-tuning): 0.00%
