In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import wandb
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
# import splitfolders

**Part A**

Question 1

In [3]:
class SmallCNN(nn.Module):
    def __init__(self, num_filters, filter_size, activation, num_classes, num_neurons_dense):
        super(SmallCNN, self).__init__()
        self.num_filters=num_filters
        self.filter_size=filter_size
        self.conv_blocks = nn.Sequential(
            # Convolutional blocks
            nn.Conv2d(3, num_filters, filter_size, padding=1),
            activation(),
            nn.MaxPool2d(2),

            nn.Conv2d(num_filters, 2*num_filters, filter_size, padding=1),
            activation(),
            nn.MaxPool2d(2),

            nn.Conv2d(2*num_filters, 4*num_filters, filter_size, padding=1),
            activation(),
            nn.MaxPool2d(2),

            nn.Conv2d(4*num_filters, 8*num_filters, filter_size, padding=1),
            activation(),
            nn.MaxPool2d(2),

            nn.Conv2d(8*num_filters, 16*num_filters, filter_size, padding=1),
            activation(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            # Fully connected layers
            # nn.Linear(num_filters * 4 * 4*8*8, num_neurons_dense),
            nn.Linear(num_filters * 4 * 4*(11-filter_size)*(11-filter_size), num_neurons_dense),
            activation(),
            nn.Linear(num_neurons_dense, num_classes),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.conv_blocks(x)
        # x = x.view(-1, 48*8*8)
        x = x.view(-1, self.num_filters*4*4*(11-self.filter_size)*(11-self.filter_size))
        x = self.fc(x)
        # print(x.shape)
        return x

What is the total number of parameters in your network? (assume mmm filters in each layer of size k×kk\times kk×k and nnn neurons in the dense layer) ?


To calculate the total number of parameters in this network, we need to count the number of learnable parameters in each layer and sum them up.

The convolutional layers have learnable parameters in the form of filters, biases, and the number of parameters in each layer can be calculated as:

First Convolutional Layer: (num_filters * 3 * filter_size * filter_size) + num_filters


Second Convolutional Layer: (2*num_filters * num_filters * filter_size * filter_size) + (2*num_filters)


Third Convolutional Layer: (4*num_filters * 2*num_filters * filter_size * filter_size) + (4*num_filters)


Fourth Convolutional Layer: (8*num_filters * 4*num_filters * filter_size * filter_size) + (8*num_filters)


Fifth Convolutional Layer: (16*num_filters * 8*num_filters * filter_size * filter_size) + (16*num_filters)


The fully connected layers have learnable parameters in the form of weights and biases, and the number of parameters in each layer can be calculated as:

First Fully Connected Layer: (num_filters * 4 * (11-filter_size) * (11-filter_size) * num_neurons_dense) + num_neurons_dense


Second Fully Connected Layer: (num_neurons_dense * num_classes) + num_classes



Thus, the total number of parameters in the network can be calculated by summing up the number of parameters in each layer:

total_parameters = (num_filters * 3 * filter_size * filter_size) + num_filters + (2*num_filters * num_filters * filter_size * filter_size) + (2*num_filters) + (4*num_filters * 2*num_filters * filter_size * filter_size) + (4*num_filters) + (8*num_filters * 4*num_filters * filter_size * filter_size) + (8*num_filters) + (16*num_filters * 8*num_filters * filter_size * filter_size) + (16*num_filters) + (num_filters * 4 * (11-filter_size) * (11-filter_size) * num_neurons_dense) + num_neurons_dense + (num_neurons_dense * num_classes) + num_classes


Substituting the given values :
total_params = (3mkk+1m+2mmkk+2m+4mmkk+4m+8mmkk+8m+16mmkk+16m) + (m * 4 * (11 - k) * (11 - k) * n + n + n * num_classes)

Substituting the given values:

num_filters = 32


filter_size = 3


num_classes = 10


num_neurons_dense = 128


total_params = (33233+132+2323233+232+4323233+432+8323233+832+16323233+1632) + (32 * 4 * (11 - 3) * (11 - 3) * 128 + 128 + 128 * 10)


Simplifying
total_params = 24915328

Question 2

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
cd /content/drive/MyDrive/inaturalist_12K

/content/drive/MyDrive/inaturalist_12K


In [6]:
!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [7]:
config_defaults = {
        'epochs': 5,
        'num_filters':32,
        'filter_size':3,
        'lr': 0.001,
        'activation':'relu',
        'num_classes': 10,      #available: sigmoid,tanh,relu
        'num_neurons_dense':64
        }
config=config_defaults

In [8]:
sweep_config = {
  'name': 'Assignment2_ReLoaded',
  'method': 'grid',
  'parameters': {
        'epochs': {
            'values': [5]
        },
        'num_filters':{
            'values':[16,32]
        },
        'filter_size':{
            'values':[3,5]
        },
        'lr': {
            'values': [0.001]
        },
        'activation':{
            'values':['relu']
        },
        'num_classes': {
            'values': [10]
        },
        'num_neurons_dense':{
            'values':[128]
        }
    }
}

In [9]:
# Define the model
class SmallCNN(nn.Module):
    def __init__(self, num_filters, filter_size, activation, num_classes, num_neurons_dense):
        super(SmallCNN, self).__init__()
        self.num_filters=num_filters
        self.filter_size=filter_size
        self.conv_blocks = nn.Sequential(
            # Convolutional blocks
            nn.Conv2d(3, num_filters, filter_size, padding=1),
            activation(),
            nn.MaxPool2d(2),

            nn.Conv2d(num_filters, 2*num_filters, filter_size, padding=1),
            activation(),
            nn.MaxPool2d(2),

            nn.Conv2d(2*num_filters, 4*num_filters, filter_size, padding=1),
            activation(),
            nn.MaxPool2d(2),

            nn.Conv2d(4*num_filters, 8*num_filters, filter_size, padding=1),
            activation(),
            nn.MaxPool2d(2),

            nn.Conv2d(8*num_filters, 16*num_filters, filter_size, padding=1),
            activation(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            # Fully connected layers
            # nn.Linear(num_filters * 4 * 4*8*8, num_neurons_dense),
            nn.Linear(num_filters * 4 * 4*(11-filter_size)*(11-filter_size), num_neurons_dense),
            activation(),
            nn.Linear(num_neurons_dense, num_classes),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.conv_blocks(x)
        # x = x.view(-1, 48*8*8)
        x = x.view(-1, self.num_filters*4*4*(11-self.filter_size)*(11-self.filter_size))
        x = self.fc(x)
        # print(x.shape)
        return x

In [10]:
# Define the training function

def Train():
    # Initialize WandB
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    best_accuracy=0
    wandb.init(config=config_defaults)
    config = wandb.config
    activation_map={'relu':nn.ReLU}

    epochs=config['epochs']
    num_filters=config['num_filters']
    filter_size=config['filter_size']
    lr=config['lr']
    # activation=config['activation']
    activation = activation_map[config.activation]
    print('activation is........',activation)
    num_classes=config['num_classes']
    num_neurons_dense=config['num_neurons_dense']


    transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    ])

    transform_test = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])

    # Load the data
    train_data = ImageFolder(root='train', transform=transform_train)
    val_data = ImageFolder(root='val', transform=transform_test)

    train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=64, shuffle=False)



    # Define the model
    # Initialize the model and optimizer
    model = SmallCNN(num_filters=num_filters, filter_size=filter_size, activation=activation,
                    num_classes=num_classes, num_neurons_dense=num_neurons_dense)
    if torch.cuda.is_available():
      model.cuda()


    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)



    # Train the model

    for epoch in range(epochs):
        print('epoch is.......',epoch)

        running_loss = 0.0
        running_corrects = 0
        for i, data in enumerate(train_loader):


            inputs, labels = data

            inputs = inputs.to(device)
            labels = labels.to(device)
            # print('inputs are ..........',inputs)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Compute statistics
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

            # Log training loss and accuracy to W&B
            wandb.log({'train_loss': loss.item()})
            wandb.log({'train_accuracy': running_corrects.item() / len(train_data)})

            if i % 100 == 99:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / ((i+1) * 128)))


    print('training is done......,hurrah !')

    # Evaluate the model on the validation set
    running_loss = 0.0
    running_corrects = 0
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            # Get the inputs and labels
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            # Forward
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Compute statistics
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

            # Log test loss and accuracy to W&B
            wandb.log({'val_loss': loss.item()})
            wandb.log({'val_accuracy': running_corrects.item() / len(val_data)})

        val_loss = running_loss / len(val_data)
        val_acc = running_corrects.item() / len(val_data)
        if val_acc>best_accuracy:
          torch.save(model.state_dict(),'best_checkpoint.model')
          best_accuracy=val_acc

    # print('Epoch: {}, Test Loss: {:.3f}, Test Acc: {:.3f}'.format(epoch+1, val_loss, val_acc))

    params={'learning rate':lr,'filter size':filter_size,'activation':activation,'num neurons dense':num_neurons_dense,'validation loss':val_loss,'validation accuracy':val_acc}
    wandb.log(params)

    wandb.log({'final_test_accuracy': val_acc})
    wandb.finish()

In [15]:
model = SmallCNN(num_filters=32,filter_size=5,activation=nn.ReLU,num_classes=10,num_neurons_dense=128)
model.load_state_dict(torch.load('best_checkpoint.model',map_location=torch.device('cpu')))


<All keys matched successfully>

In [17]:
transform_test = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])

test_data = ImageFolder(root='test', transform=transform_test)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)
criterion = nn.CrossEntropyLoss()

running_loss = 0.0
running_corrects = 0
wandb.init()
with torch.no_grad():
    for i, data in enumerate(test_loader):
        # Get the inputs and labels
        inputs, labels = data
        # inputs = inputs.to(device)
        # labels = labels.to(device)
        # Forward
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Compute statistics
        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

        # Log test loss and accuracy to W&B
        wandb.log({'test_loss': loss.item()})
        wandb.log({'test_accuracy': running_corrects.item() / len(test_data)})

    test_loss = running_loss / len(test_data)
    test_acc = running_corrects.item() / len(test_data)

[34m[1mwandb[0m: Currently logged in as: [33mcs23z036[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
sweep_id = wandb.sweep(sweep_config,project='Assignment2_ReLoaded_18thJuly')


Create sweep with ID: 2bj6kli5
Sweep URL: https://wandb.ai/cs23z036/Assignment2_ReLoaded_18thJuly/sweeps/2bj6kli5


In [None]:
wandb.agent(sweep_id, Train)

[34m[1mwandb[0m: Agent Starting Run: fi18g57q with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_classes: 10
[34m[1mwandb[0m: 	num_filters: 16
[34m[1mwandb[0m: 	num_neurons_dense: 128
[34m[1mwandb[0m: Currently logged in as: [33mcs23z036[0m. Use [1m`wandb login --relogin`[0m to force relogin


activation is........ <class 'torch.nn.modules.activation.ReLU'>
epoch is....... 0
[1,   100] loss: 1.151
epoch is....... 1
[2,   100] loss: 1.146
epoch is....... 2
[3,   100] loss: 1.135
epoch is....... 3
[4,   100] loss: 1.131
epoch is....... 4
[5,   100] loss: 1.123
training is done......,hurrah !


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.115739…

0,1
filter size,▁
final_test_accuracy,▁
learning rate,▁
num neurons dense,▁
train_accuracy,▁▁▂▂▃▃▄▅▁▂▂▃▃▄▅▅▁▂▃▄▅▅▆▇▁▂▃▄▅▆▇▇▁▂▃▄▅▆▇█
train_loss,▇▇▇▇▇▇▇▆▇▄▇▆█▅▇▇▆▆▃▄▃▇▄▃▅▆▆▃▄▅▆▂▆▄▁▅▃▁▄▁
val_accuracy,▁▁▁▁▁▁▁▂▂▂▂▃▃▃▄▅▅▅▅▆▆▆▆▆▆▆▇▇████
val_loss,▇▇▇▇▆▇▆▄▂▃▅▄▄▂▁▁▆▄▅▄▅▅▇██▅▂▂▄▆▆▆
validation accuracy,▁
validation loss,▁

0,1
activation,torch.nn.modules.act...
filter size,3
final_test_accuracy,0.20336
learning rate,0.001
num neurons dense,128
train_accuracy,0.19561
train_loss,2.28248
val_accuracy,0.20336
val_loss,2.28621
validation accuracy,0.20336


[34m[1mwandb[0m: Agent Starting Run: d2devm9h with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_classes: 10
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	num_neurons_dense: 128


activation is........ <class 'torch.nn.modules.activation.ReLU'>
epoch is....... 0
[1,   100] loss: 1.151
epoch is....... 1
[2,   100] loss: 1.151
epoch is....... 2
[3,   100] loss: 1.150
epoch is....... 3
[4,   100] loss: 1.138
epoch is....... 4
[5,   100] loss: 1.128
training is done......,hurrah !


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.1125, …

0,1
filter size,▁
final_test_accuracy,▁
learning rate,▁
num neurons dense,▁
train_accuracy,▁▂▂▂▃▃▄▄▁▂▂▃▃▄▄▅▁▂▂▃▃▄▅▅▁▂▃▃▄▅▆▇▁▂▃▄▅▆▇█
train_loss,███████████████████▇█▇▅▆▅▆▇▆▅▃▇▃▂▁▅▆▄▁▄▅
val_accuracy,▁▁▁▂▃▄▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇█████
val_loss,███▃▁▂▄█▇▇█▇▇██▇▇▆▇██████▆▄▅▇███
validation accuracy,▁
validation loss,▁

0,1
activation,torch.nn.modules.act...
filter size,3
final_test_accuracy,0.16881
learning rate,0.001
num neurons dense,128
train_accuracy,0.18513
train_loss,2.22167
val_accuracy,0.16881
val_loss,2.3997
validation accuracy,0.16881


[34m[1mwandb[0m: Agent Starting Run: 6tj4gxqx with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_classes: 10
[34m[1mwandb[0m: 	num_filters: 16
[34m[1mwandb[0m: 	num_neurons_dense: 128


activation is........ <class 'torch.nn.modules.activation.ReLU'>
epoch is....... 0
[1,   100] loss: 1.151
epoch is....... 1
[2,   100] loss: 1.141
epoch is....... 2
[3,   100] loss: 1.126
epoch is....... 3
[4,   100] loss: 1.129
epoch is....... 4
[5,   100] loss: 1.122
training is done......,hurrah !


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
filter size,▁
final_test_accuracy,▁
learning rate,▁
num neurons dense,▁
train_accuracy,▁▁▂▂▃▃▄▄▁▂▂▃▄▄▅▆▁▂▃▄▅▆▇▇▁▂▃▄▅▆▇█▂▂▃▄▅▆▇█
train_loss,███████▇█▆▇▆▇▆▅▅▃▇▂▅▄▆▂▆▆█▅▅▄▇▃▅▅▄▄▆▄▂▃▁
val_accuracy,▁▁▁▂▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇█████
val_loss,▇▇▇▂▂▁▃███▇▇▇▃▃▂▄▄▅▇▇▇▆▅▅▅▁▁▅▅▆▆
validation accuracy,▁
validation loss,▁

0,1
activation,torch.nn.modules.act...
filter size,5
final_test_accuracy,0.19102
learning rate,0.001
num neurons dense,128
train_accuracy,0.19771
train_loss,2.25463
val_accuracy,0.19102
val_loss,2.27377
validation accuracy,0.19102


[34m[1mwandb[0m: Agent Starting Run: kntk3com with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_classes: 10
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	num_neurons_dense: 128


activation is........ <class 'torch.nn.modules.activation.ReLU'>
epoch is....... 0
[1,   100] loss: 1.151
epoch is....... 1
[2,   100] loss: 1.151
epoch is....... 2
[3,   100] loss: 1.151
epoch is....... 3
[4,   100] loss: 1.151
epoch is....... 4
[5,   100] loss: 1.146
training is done......,hurrah !


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.115781…

0,1
filter size,▁
final_test_accuracy,▁
learning rate,▁
num neurons dense,▁
train_accuracy,▁▂▂▃▄▅▅▆▁▂▃▃▄▅▆▇▁▂▃▃▄▅▆▆▁▂▃▄▅▅▆▇▁▂▃▄▅▆▇█
train_loss,▆▅▅▅▅▅▅▅▅▅▆▆▅▅▅▅▅▄▅▅▅▅▅▅▅▅▅▅▅▅▄▆▄▅▂▅▄▅▁█
val_accuracy,▁▁▁▁▁▁▁▂▃▄▄▄▄▅▆▇▇▇▇▇████████████
val_loss,██████▇▅▄▅█▇▇▂▁▁▇██▇▆▇██████████
validation accuracy,▁
validation loss,▁

0,1
activation,torch.nn.modules.act...
filter size,5
final_test_accuracy,0.15597
learning rate,0.001
num neurons dense,128
train_accuracy,0.13222
train_loss,2.27272
val_accuracy,0.15597
val_loss,2.41986
validation accuracy,0.15597


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


Question 3 (15 Marks)
Based on the above plots write down some insightful observations

Insighful Observations:


Increasing the filter size and and adding number of filters improved the accuracy on validation set.


Adding number of dense neurons could have improved the accuracy as well




Part B

Question1 :

a)The dimensions of the images in your data may not be the same as that in the ImageNet data. How will you address this?

Ans:

To address the issue of different image dimensions, we can use image resizing techniques to match the dimensions of our input images with those of the pre-trained model. There are several ways to perform image resizing, and one common approach is to either scale the images proportionally or pad them with zeros to fit the desired size.

We can use libraries such as Pillow or OpenCV to resize the images. These libraries provide functions that allow us to specify the target size of the image, as well as the resizing method to be used, such as nearest neighbor interpolation or bilinear interpolation. It's important to note that resizing can introduce distortions or loss of information, so we need to choose the resizing method carefully, depending on the nature of our data and the requirements of our task.

b)ImageNet has 1000 classes and hence the last layer of the pre-trained model would have 1000 nodes. However, the naturalist dataset has only 10 classes. How will you address this?

Ans:

To address the difference in the number of classes between the pre-trained model and our target dataset, we need to modify the last layer of the pre-trained model to have the same number of output nodes as the number of classes in our dataset. Specifically, we need to replace the last fully connected layer of the pre-trained model with a new fully connected layer that has the desired number of output nodes.

In this case, since the naturalist dataset has only 10 classes, we can replace the last layer of the pre-trained model with a new fully connected layer that has 10 output nodes. We can do this by creating a new instance of the fully connected layer in PyTorch with 10 output nodes and replacing the last layer of the pre-trained model with this new layer.

Once we have replaced the last layer, we can then fine-tune the entire model on the naturalist dataset. During fine-tuning, the weights of the pre-trained model will be updated to adapt to the new dataset, while the weights of the new output layer will be initialized randomly and trained from scratch to classify the new classes.

Question2 :

You will notice that GoogLeNet, InceptionV3, ResNet50, VGG, EfficientNetV2, VisionTransformer are very huge models as compared to the simple model that you implemented in Part A. Even fine-tuning on a small training data may be very expensive. What is a common trick used to keep the training tractable (you will have to read up a bit on this)? Try different variants of this trick and fine-tune the model using the iNaturalist dataset. For example, '___'ing all layers except the last layer, '___'ing upto k layers and '___'ing the rest. Read up on pre-training and fine-tuning to understand what exactly these terms mean.

Write down the at least 3 different strategies that you tried (simple bullet points would be fine).


Ans:

1.Fine-tuning the Classifier:

Fine-tune the pre-trained model on the new dataset, but only update the weights of the last few layers (i.e., the classifier) while keeping the weights of the remaining layers fixed.

2.Fine-tuning the Full Model:

Fine-tune the entire pre-trained model on the new dataset with a lower learning rate.
This strategy can be useful when the pre-trained model is relatively similar to the new dataset, and we have a relatively large amount of training data


Question 3:


Now fine-tune the model using ANY ONE of the listed strategies that you discussed above. Based on these experiments write down some insightful inferences comparing training from scratch and fine-tuning a large pre-trained model


Ans:


Fine-tuning a pre-trained model is usually faster than training a model from scratch, as the pre-trained model already has learned a set of general features that can be useful for the new task.
Fine-tuning a pre-trained model can lead to better performance than training from scratch, especially when the target dataset is relatively small or similar to the pre-training dataset.

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [None]:
#code

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set hyperparameters
num_epochs = 5
batch_size = 64
learning_rate = 0.001

# Load data
"""
train_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.ToTensor(),
])
"""
transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])


train_data = datasets.ImageFolder("train", transform=transform_train)
val_data = datasets.ImageFolder("val", transform=transform_test)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=False)

# Load pre-trained ResNet50 model
model = models.resnet50(pretrained=True)
# model = model()




# if torch.cuda.is_available():
#     model.cuda()

# Replace the last fully connected layer with a new one that has 10 output nodes
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)

# Freeze all the pre-trained layers except the last few layers
for param in model.parameters():
    param.requires_grad = False
for param in model.layer4.parameters():
    param.requires_grad = True
for param in model.fc.parameters():
    param.requires_grad = True

if torch.cuda.is_available():
    model.cuda()
# Define the optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Fine-tune the model
num_epochs = 5

for epoch in range(num_epochs):
    print('epoch is ...........',epoch)
    # Set to training mode
    model.train()

    train_loss = 0.0
    train_acc = 0.0

    # Iterate over the training data
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and update
        loss.backward()
        optimizer.step()

        # Compute the accuracy
        _, preds = torch.max(outputs, 1)
        train_acc += torch.sum(preds == labels.data)

        # Compute the loss
        train_loss += loss.item() * images.size(0)

    # Compute the average accuracy and loss for this epoch
    train_acc = train_acc.double() / len(train_loader.dataset)
    train_loss = train_loss / len(train_loader.dataset)

    # Print the progress
    print('Epoch [{}/{}], Train Loss: {:.4f}, Train Acc: {:.4f}'.format(epoch+1, num_epochs, train_loss, train_acc))

# Set to evaluation mode
model.eval()

val_loss = 0.0
val_acc = 0.0

# Disable gradient computations
with torch.no_grad():
    # Iterate over the validation data
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Compute the accuracy
        _, preds = torch.max(outputs, 1)
        val_acc += torch.sum(preds == labels.data)

        # Compute the loss
        val_loss += loss.item() * images.size(0)

# Compute the average accuracy and loss for this epoch
val_acc = val_acc.double() / len(val_loader.dataset)
val_loss = val_loss / len(val_loader.dataset)

# Print the progress
print('Epoch [{}/{}], Val Loss: {:.4f}, Val Acc: {:.4f}'.format(epoch+1, num_epochs, val_loss, val_acc))


epoch is ........... 0
Epoch [1/5], Train Loss: 1.0439, Train Acc: 0.6569
epoch is ........... 1
Epoch [2/5], Train Loss: 0.6702, Train Acc: 0.7840
epoch is ........... 2
Epoch [3/5], Train Loss: 0.4981, Train Acc: 0.8415
epoch is ........... 3
Epoch [4/5], Train Loss: 0.3877, Train Acc: 0.8756
epoch is ........... 4
Epoch [5/5], Train Loss: 0.2872, Train Acc: 0.9021
Epoch [5/5], Val Loss: 0.9882, Val Acc: 0.7443
