In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision import models

In [2]:
# Step 1: Choose a Pre-trained Model
pretrained_model = models.resnet18(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 145MB/s]


In [2]:
# Step 2: Prepare Your Dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to fit the input size of ResNet
    transforms.ToTensor(),           # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize images
])

In [3]:
trainset = torchvision.datasets.CIFAR10(root='/content', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='/content', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:01<00:00, 101571210.52it/s]


Extracting /content/cifar-10-python.tar.gz to /content
Files already downloaded and verified


In [5]:
num_classes = 10

In [6]:
pretrained_model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
pretrained_model.fc.in_features

512

In [8]:
num_ftrs = pretrained_model.fc.in_features

In [9]:
pretrained_model.fc = nn.Linear(num_ftrs, num_classes, bias = False)

In [10]:
pretrained_model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(pretrained_model.parameters(), lr=0.001, momentum=0.9)

In [12]:
# Step 6: Training Loop
num_epochs = 5

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
pretrained_model = pretrained_model.to(device)

In [13]:
print(device)

cuda:0


In [14]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = pretrained_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print("Finished Training")

[1,   100] loss: 1.418
[1,   200] loss: 0.634
[1,   300] loss: 0.458
[1,   400] loss: 0.394
[1,   500] loss: 0.329
[1,   600] loss: 0.323
[1,   700] loss: 0.303
[1,   800] loss: 0.286
[1,   900] loss: 0.273
[1,  1000] loss: 0.258
[1,  1100] loss: 0.265
[1,  1200] loss: 0.276
[1,  1300] loss: 0.258
[1,  1400] loss: 0.239
[1,  1500] loss: 0.231
[2,   100] loss: 0.160
[2,   200] loss: 0.162
[2,   300] loss: 0.151
[2,   400] loss: 0.147
[2,   500] loss: 0.152
[2,   600] loss: 0.156
[2,   700] loss: 0.148
[2,   800] loss: 0.153
[2,   900] loss: 0.139
[2,  1000] loss: 0.160
[2,  1100] loss: 0.136
[2,  1200] loss: 0.146
[2,  1300] loss: 0.153
[2,  1400] loss: 0.152
[2,  1500] loss: 0.131
[3,   100] loss: 0.086
[3,   200] loss: 0.089
[3,   300] loss: 0.078
[3,   400] loss: 0.087
[3,   500] loss: 0.080
[3,   600] loss: 0.076
[3,   700] loss: 0.087
[3,   800] loss: 0.088
[3,   900] loss: 0.082
[3,  1000] loss: 0.075
[3,  1100] loss: 0.077
[3,  1200] loss: 0.085
[3,  1300] loss: 0.084
[3,  1400] 

In [15]:
# Step 7: Evaluation
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = pretrained_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

In [16]:
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 94 %


## Now with model parameters freezed

In [17]:
# Step 1: Choose a Pre-trained Model
pretrained_model = models.resnet18(pretrained=True)



In [18]:
pretrained_model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [19]:
num_ftrs = pretrained_model.fc.in_features

In [20]:
# Freeze parameters of pre-trained layers
for param in pretrained_model.parameters():
    param.requires_grad = False

In [21]:
# Replace the fully connected layer with a new one
pretrained_model.fc = nn.Linear(num_ftrs, num_classes)

In [22]:
# Parameters of the newly added fully connected layer will be updated during training
parameters_to_update = pretrained_model.fc.parameters()
optimizer = optim.SGD(parameters_to_update, lr=0.001, momentum=0.9)

In [23]:
# Step 6: Training Loop
num_epochs = 5

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
pretrained_model = pretrained_model.to(device)

In [24]:
print(device)

cuda:0


In [25]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = pretrained_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print("Finished Training")

[1,   100] loss: 1.765
[1,   200] loss: 1.180
[1,   300] loss: 1.008
[1,   400] loss: 0.922
[1,   500] loss: 0.842
[1,   600] loss: 0.796
[1,   700] loss: 0.772
[1,   800] loss: 0.779
[1,   900] loss: 0.732
[1,  1000] loss: 0.739
[1,  1100] loss: 0.732
[1,  1200] loss: 0.700
[1,  1300] loss: 0.713
[1,  1400] loss: 0.672
[1,  1500] loss: 0.705
[2,   100] loss: 0.659
[2,   200] loss: 0.640
[2,   300] loss: 0.657
[2,   400] loss: 0.672
[2,   500] loss: 0.671
[2,   600] loss: 0.677
[2,   700] loss: 0.673
[2,   800] loss: 0.656
[2,   900] loss: 0.640
[2,  1000] loss: 0.633
[2,  1100] loss: 0.639
[2,  1200] loss: 0.654
[2,  1300] loss: 0.650
[2,  1400] loss: 0.653
[2,  1500] loss: 0.663
[3,   100] loss: 0.631
[3,   200] loss: 0.635
[3,   300] loss: 0.625
[3,   400] loss: 0.648
[3,   500] loss: 0.606
[3,   600] loss: 0.612
[3,   700] loss: 0.650
[3,   800] loss: 0.630
[3,   900] loss: 0.623
[3,  1000] loss: 0.647
[3,  1100] loss: 0.612
[3,  1200] loss: 0.628
[3,  1300] loss: 0.616
[3,  1400] 

In [26]:
# Step 7: Evaluation
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = pretrained_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

In [27]:
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 78 %


## Freeze model parameters and add more fully connected layers

In [4]:
# Step 1: Choose a Pre-trained Model
pretrained_model = models.resnet18(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 140MB/s]


In [5]:
# Step 4: Freeze Pre-trained Model and Add More Fully Connected Layers
num_classes = 10  # Number of classes in CIFAR-10
num_ftrs = pretrained_model.fc.in_features

pretrained_model.fc = nn.Linear(num_ftrs, num_classes)

In [6]:
pretrained_model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
trainable_layer_names = ["layer4", "avgpool", "fc"]

In [8]:
# Iterate through all the modules of the model
for name, module in pretrained_model.named_children():
    # Check if the module name is in the list of trainable_layer_names
    if name in trainable_layer_names:
        # If the module name is in the list, set requires_grad to True for all its parameters
        for param in module.parameters():
            param.requires_grad = True
    else:
        # If the module name is not in the list, freeze its parameters
        for param in module.parameters():
            param.requires_grad = False

In [9]:
# Verify the status of requires_grad for each parameter
for name, param in pretrained_model.named_parameters():
    print(f'{name}: requires_grad={param.requires_grad}')

conv1.weight: requires_grad=False
bn1.weight: requires_grad=False
bn1.bias: requires_grad=False
layer1.0.conv1.weight: requires_grad=False
layer1.0.bn1.weight: requires_grad=False
layer1.0.bn1.bias: requires_grad=False
layer1.0.conv2.weight: requires_grad=False
layer1.0.bn2.weight: requires_grad=False
layer1.0.bn2.bias: requires_grad=False
layer1.1.conv1.weight: requires_grad=False
layer1.1.bn1.weight: requires_grad=False
layer1.1.bn1.bias: requires_grad=False
layer1.1.conv2.weight: requires_grad=False
layer1.1.bn2.weight: requires_grad=False
layer1.1.bn2.bias: requires_grad=False
layer2.0.conv1.weight: requires_grad=False
layer2.0.bn1.weight: requires_grad=False
layer2.0.bn1.bias: requires_grad=False
layer2.0.conv2.weight: requires_grad=False
layer2.0.bn2.weight: requires_grad=False
layer2.0.bn2.bias: requires_grad=False
layer2.0.downsample.0.weight: requires_grad=False
layer2.0.downsample.1.weight: requires_grad=False
layer2.0.downsample.1.bias: requires_grad=False
layer2.1.conv1.wei

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = pretrained_model.to(device)
print(device)

# Step 5: Define Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(pretrained_model.parameters(), lr=0.001, momentum=0.9)

# Step 6: Training Loop
num_epochs = 5


for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = pretrained_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print("Finished Training")

cuda:0
[1,   100] loss: 1.497
[1,   200] loss: 0.800
[1,   300] loss: 0.641
[1,   400] loss: 0.553
[1,   500] loss: 0.535
[1,   600] loss: 0.516
[1,   700] loss: 0.480
[1,   800] loss: 0.430
[1,   900] loss: 0.447
[1,  1000] loss: 0.449
[1,  1100] loss: 0.402
[1,  1200] loss: 0.425
[1,  1300] loss: 0.394
[1,  1400] loss: 0.401
[1,  1500] loss: 0.378
[2,   100] loss: 0.314
[2,   200] loss: 0.326
[2,   300] loss: 0.284
[2,   400] loss: 0.279
[2,   500] loss: 0.294
[2,   600] loss: 0.270
[2,   700] loss: 0.270
[2,   800] loss: 0.275
[2,   900] loss: 0.286
[2,  1000] loss: 0.285
[2,  1100] loss: 0.268
[2,  1200] loss: 0.259
[2,  1300] loss: 0.277
[2,  1400] loss: 0.259
[2,  1500] loss: 0.271
[3,   100] loss: 0.193
[3,   200] loss: 0.197
[3,   300] loss: 0.195
[3,   400] loss: 0.203
[3,   500] loss: 0.187
[3,   600] loss: 0.182
[3,   700] loss: 0.188
[3,   800] loss: 0.191
[3,   900] loss: 0.176
[3,  1000] loss: 0.182
[3,  1100] loss: 0.183
[3,  1200] loss: 0.186
[3,  1300] loss: 0.194
[3, 

In [11]:
# Step 7: Evaluation
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = pretrained_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

In [12]:
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 89 %


## Now training the last two blocks of network

In [13]:
# Step 1: Choose a Pre-trained Model
pretrained_model = models.resnet18(pretrained=True)

# Step 4: Freeze Pre-trained Model and Add More Fully Connected Layers
num_classes = 10  # Number of classes in CIFAR-10
num_ftrs = pretrained_model.fc.in_features

pretrained_model.fc = nn.Linear(num_ftrs, num_classes)

trainable_layer_names = ["layer3", "layer4", "avgpool", "fc"]

# Iterate through all the modules of the model
for name, module in pretrained_model.named_children():
    # Check if the module name is in the list of trainable_layer_names
    if name in trainable_layer_names:
        # If the module name is in the list, set requires_grad to True for all its parameters
        for param in module.parameters():
            param.requires_grad = True
    else:
        # If the module name is not in the list, freeze its parameters
        for param in module.parameters():
            param.requires_grad = False

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = pretrained_model.to(device)
print(device)

# Step 5: Define Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(pretrained_model.parameters(), lr=0.001, momentum=0.9)

# Step 6: Training Loop
num_epochs = 5


for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = pretrained_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print("Finished Training")



# Step 7: Evaluation
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = pretrained_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

cuda:0
[1,   100] loss: 1.470
[1,   200] loss: 0.706
[1,   300] loss: 0.548
[1,   400] loss: 0.485
[1,   500] loss: 0.423
[1,   600] loss: 0.390
[1,   700] loss: 0.354
[1,   800] loss: 0.357
[1,   900] loss: 0.334
[1,  1000] loss: 0.332
[1,  1100] loss: 0.310
[1,  1200] loss: 0.317
[1,  1300] loss: 0.277
[1,  1400] loss: 0.312
[1,  1500] loss: 0.289
[2,   100] loss: 0.193
[2,   200] loss: 0.184
[2,   300] loss: 0.181
[2,   400] loss: 0.186
[2,   500] loss: 0.195
[2,   600] loss: 0.167
[2,   700] loss: 0.197
[2,   800] loss: 0.186
[2,   900] loss: 0.199
[2,  1000] loss: 0.191
[2,  1100] loss: 0.168
[2,  1200] loss: 0.195
[2,  1300] loss: 0.175
[2,  1400] loss: 0.184
[2,  1500] loss: 0.179
[3,   100] loss: 0.106
[3,   200] loss: 0.097
[3,   300] loss: 0.100
[3,   400] loss: 0.091
[3,   500] loss: 0.094
[3,   600] loss: 0.095
[3,   700] loss: 0.081
[3,   800] loss: 0.093
[3,   900] loss: 0.099
[3,  1000] loss: 0.096
[3,  1100] loss: 0.107
[3,  1200] loss: 0.094
[3,  1300] loss: 0.104
[3, 