# **Train CNN/VGG model using Transfer Learning Approach**
# Image Classsification using Transfer-Learning VGG11



- Define the CNN image folder data paths
- Set intial parameters of a pre-trained model
- Initialize and download a pre-trained model
- Apply data augmentation techniques
- Create Image Datasets [train & valid]
- Creates Dataloaders [train & valid]
- Check the availablity of GPU
- Send the model to the device and select which parameters to update
- Define the optimizer function
- Define the loss function
- Define model training function
- Train the model
- Save the model
- Evaluate the model using accuracy metrics

### Mount Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


## Add absolute path to the project folder

In [2]:
import sys

sys.path.append("/content/drive/MyDrive/UNECA_MachineLearning_Project/")

# See the full list of paths in sys.path
sys.path

['/content',
 '/env/python',
 '/usr/lib/python310.zip',
 '/usr/lib/python3.10',
 '/usr/lib/python3.10/lib-dynload',
 '',
 '/usr/local/lib/python3.10/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/local/lib/python3.10/dist-packages/IPython/extensions',
 '/root/.ipython',
 '/content/drive/MyDrive/UNECA_MachineLearning_Project/']

## Add Base Directory

In [3]:
# Sets the base directory variable
BASE_DIR = '/content/drive/MyDrive/UNECA_MachineLearning_Project/'

In [4]:
import os
# Change the current working directory
os.chdir(BASE_DIR)

# Print the current working directory to verify the change
print("Current Working Directory:", os.getcwd())

Current Working Directory: /content/drive/MyDrive/UNECA_MachineLearning_Project


## Importing necessary python libraries and modules

In [5]:
# The main PyTorch library
import torch

# Contains classes for defining and training neural networks.
import torch.nn as nn

#  Provides various optimization algorithms for training neural networks.
import torch.optim as optim

#  Provides a progress bar for iterating over loops in Jupyter notebooks.
from tqdm.notebook import tqdm

# A library for numerical computations in Python.
import numpy as np

# Contains standard datasets, pre-trained models, image transformations for data augmentation and preprocessing.
from torchvision import datasets, models, transforms

# A plotting library for creating visualizations
import matplotlib.pyplot as plt

# A module for measuring and manipulating time-related functions
import time

# A module for interacting with the operating system, such as file operations.
import os

# A module for creating copies of objects.
import copy

## Define the CNN image folder data paths

In [6]:
CNN_IMAGE_DIR = os.path.join(BASE_DIR, 'cnn_images')
print(CNN_IMAGE_DIR)

/content/drive/MyDrive/UNECA_MachineLearning_Project/cnn_images


## Define CNN model path

In [21]:
CNN_SAVE_DIR = os.path.join(BASE_DIR, 'models')

# Print the variables
print(CNN_SAVE_DIR)

/content/drive/MyDrive/UNECA_MachineLearning_Project/models


## Create CNN model directory

In [22]:
# os.makedirs(os.path.join(CNN_SAVE_DIR), exist_ok=False)

FileExistsError: ignored

## Set parameters of a pre-trained CNN:VGG11 model



In [7]:
# Data directory
data_dir = CNN_IMAGE_DIR

# Name of the pre-trained model
model_name = "vgg"
# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception, GoogLeNet, ConvNeXt, EfficientNet]
# https://pytorch.org/vision/stable/models.html

# Types of vgg models
# vgg11, vgg11_bn, vgg13, vgg13_bn, vgg16, vgg16_bn, vgg19, vgg19_bn

# Number of classes in the dataset
num_classes = 3

# Batch size for training (change depending on how much memory you have)
batch_size = 8

# Number of epochs
num_epochs = 30

# Flag for feature extracting.
# When False, we finetune the whole model,
feature_extract = True

# A boolean value indicating whether to use pre-trained weights or not.
# By default, it is set to True.
use_pretrained=True

# the input images should have a height and width of 224 pixels.
input_size = 224

## Initializes a pre-trained CNN:VGG11 model

In [8]:
# Define a function name and model parameters (model_name, num_classes, feature_extract, and use_pretrained.)

def initialize_model(model_name, num_classes, feature_extract, use_pretrained):
    # initializes a VGG-11 model with batch normalization, using the pre-trained weights if use_pretrained is True.
    model_ft = models.vgg11_bn(pretrained=use_pretrained)

    # set the gradients of the parameters based on the value of feature_extract.
    set_parameter_requires_grad(model_ft, feature_extract)

    # retrieves the number of input features to the last fully connected layer of the model.
    # number 6 is used to index the last fully connected layer within the classifier of the model.
    num_ftrs = model_ft.classifier[6].in_features

    # replaces the last fully connected layer with a new linear layer that matches the number of input features and the specified num_classes.
    model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)

    # returns the modified model and input_size,
    return model_ft, input_size

# Freezing the pre-trained layers: (Preventing Overfitting, Focusing Training on New Data, Utilizing Pre-trained Features)
# To prevent overfitting, focus training on the new data, and leverage valuable pre-trained features for improved model performance.
# Defines a function named set_parameter_requires_grad that takes two parameters: model and feature_extracting
def set_parameter_requires_grad(model, feature_extracting):
    #  checks if the feature_extracting flag is True.
    if feature_extracting:
        # iterates over all the parameters of the PyTorch model.
        for param in model.parameters():
            # sets the requires_grad attribute of each parameter to False if feature_extracting is True
            param.requires_grad = False

## Initialize and Download the model

In [9]:
# initialize the model and obtain the input size for the new dataset.
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg11_bn-6002323d.pth" to /root/.cache/torch/hub/checkpoints/vgg11_bn-6002323d.pth
100%|██████████| 507M/507M [00:08<00:00, 59.5MB/s]


## Print the model architecture (VGG-11 with batch normalization)

In [10]:
model_ft

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

## Model architecture
---

VGG-11 model has 11 layers,

- Feature Extraction:
  - Convolutional layers: 8

- Classifier:

  - Fully connected layers: 3

---

**Feature Extraction**: Sequential model is used for the feature extraction layers.

```
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
```

- **Input channesl**: number of channels in the input data, such as the red, green, and blue channels in an RGB image.

- **Output channels**: The number of channels produced by a convolutional layer.

- **Kernel size**: The size of the filter or kernel used in a convolutional layer.

- **Stride**: The number of pixels by which the filter is moved during the convolution operation.

- **Batch normalization layer**: A layer that normalizes the input of each layer to have a mean close to 0 and a standard deviation close to 1.

- **Activation layer**: A layer that applies a non-linear activation function to introduce non-linearity into the network.

- **MaxPool2d**: A layer that performs 2D max pooling, which downsamples the input by taking the maximum value over the window defined by the kernel size.

- **Dilation**: The spacing between the kernel elements in a convolutional layer.

- **Ceil_mode**: A parameter in max pooling that determines whether to use the ceil or floor function to compute the output size.

- **eps**: A small value added to the variance to avoid dividing by zero in batch normalization.

- **Momentum**: A parameter in batch normalization that is used for the running mean and standard deviation computation.

- **Affine**: A parameter in batch normalization that determines whether to apply an affine transformation after normalization.

- **Track_running_stats**: A parameter in batch normalization that determines whether to track the running mean and variance.

---

**Classification**: Sequential model is used for classification.

```
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))
```

- **Linear**: refers to the fully connected layer. It applies a linear transformation to the incoming data, i.e., (y = xA^T + b).

- **in_features**: refers to the size of the input to a linear layer.

- **out_features**: refers to the size of the output of a linear layer.

- **bias**: is a parameter in linear layers that determines whether to include a bias or not.

- **Dropout**: is a regularization technique where during training, randomly selected neurons are ignored to prevent overfitting.

---

## **Data Transformation/Augmentation Technique**:


- Involves creating new images by applying various transformations to the existing images, such as:
  - rotating
  - flipping,
  - changing the brightness.

- Date augmentation:
  - help to increase the size of the dataset.
  - improve the robustness of machine learning models.
  - Increased data diversity
  - enhance the model's ability to generalize



- **Tensor transform** converts a **PIL image** or **numpy ndarray** into a PyTorch tensor.
  - Rescales the values to be between 0 and 1 (divides by 255 if image is uint8)
  - Rearranges dimensions for CNN models
  - Rearranges dimensions for CNN models: Dimensions change from HxWxC to CxHxW (channels first)
  - The resulting tensor has a dtype of torch.float32

In [11]:
data_transforms = {
    'train': transforms.Compose([
        # randomly resizes the input image to the specified input size, maintaining the aspect ratio.
        transforms.RandomResizedCrop(input_size),
        # randomly flips the image horizontally with a 50% probability.
        transforms.RandomHorizontalFlip(),
        # converts the image from PIL (Python Imaging Library) format to a PyTorch tensor.
        transforms.ToTensor(),
        # normalizes the input image by subtracting the mean value and dividing by the standard deviation
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        # resizes the input image to the specified input size without cropping.
        transforms.Resize(input_size),
        # crops the image to the specified input size, ensuring that the cropped area is centered.
        transforms.CenterCrop(input_size),
        # converts the image from PIL (Python Imaging Library) format to a PyTorch tensor.
        transforms.ToTensor(),
        # normalizes the input image by subtracting the mean value and dividing by the standard deviation
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

## Create Image Datasets [train & valid]

- It uses the **datasets.ImageFolder** class

- It takes as input the image data directory [data_dir] and the data transformations [data_transforms] functions.

In [12]:
# Create training and validation datasets
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'valid']}

## Creates Dataloaders [train & valid]

It uses the **torch.utils.data.DataLoader** class to load the data

It specifies:

  - batch size (batch_size),
  - shuffling of the data (shuffle=True), and
  - the number of workers for data loading (num_workers=4)

In [13]:
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'valid']}

##  Checks if a GPU is available

- It checks if a GPU is available using **torch.cuda.is_available**

- Assigns the device as "**cuda:0**" if a GPU is available, otherwise it assigns "cpu"

In [14]:
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('device:', device)

device: cuda:0


##  Send the model to the device (GPU or CPU)  and select which parameters to update



In [15]:
# sends the model to the GPU if available
model_ft = model_ft.to(device)

# assigns a list of all the model parameters
params_to_update = model_ft.parameters()

print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

Params to learn:
	 classifier.6.weight
	 classifier.6.bias


## Optimizer: Stochastic Gradient Descent (SGD)

- **learning rate**: 1e-4: is a hyperparameter that determines how much the model weights are updated in response to the estimated error.

- A higher learning rate can lead to faster convergence, but may also cause the optimizer to overshoot the optimal solution.

- **momentum**: 0.9: is a technique used in machine learning optimization algorithms to accelerate the convergence of the optimization. (improve convergence speed, reduce oscillations, avoid becoming trapped in local minima).


In [16]:
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=1e-4, momentum=0.9)

## Defines the loss function

- It combines the softmax activation function and the negative log-likelihood loss.

- it serves as a measure of how well the model is performing during training.

Loss function:

  - Mean Squared Error (MSE):  It is often used for regression problems.
   - average squared difference between the predicted and true values.


  - Binary Cross Entropy (BCE): Computes the average binary cross-entropy loss between the predicted and true binary labels.
   - It is commonly used for binary classification problems.

  
  - Categorical Cross Entropy (CCE): Calculates the average cross-entropy loss between the predicted and true class labels.
   - It is commonly used for multi-class classification problems.

In [17]:
# Setup the loss function
criterion = nn.CrossEntropyLoss()

## Function to train the model

-  It specifies the:
 - data loaders (training and validation sets)
 - loss criterion
 - optimizer
 - number of epochs.

- After each epoch, it calculates and prints the average loss and accuracy for both training and validation phases.

- keep track of the best model weights, best accuracy, and validation accuracy history.



In [18]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25):
    # records the current time, which is used to calculate the total training time at the end of the function
    since = time.time()

    # initializes an empty list to store the validation accuracy history.
    val_acc_history = []

    # creates a deep copy of the model's state, which will be used to store the best model weights.
    best_model_wts = copy.deepcopy(model.state_dict())

    # initializes the best validation accuracy to 0.
    best_acc = 0.0

    # checks if the current epoch is greater than 10. If it is, the following code block is executed:
    # iterates over each parameter in the model
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        if epoch > 10:
            # fine tune whole model
            for param in model_ft.parameters():
                param.requires_grad = True
            optimizer = optim.SGD(model_ft.parameters(), lr=1e-4, momentum=0.9)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'valid':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

##  Train the model
It takes the following arguments:
- specified model: PyTorch model to be trained
- data loaders: dictionary containing the training and validation data loaders
- criterion: loss function used during training
- optimizer: optimizer used to update the model parameters (learning rate of 1e-4 and momentum of 0.9)
- number of epochs: number of epochs to train the model (25)

In [19]:
# Train and evaluate
model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs)

Epoch 0/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.9208 Acc: 0.5623


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.8467 Acc: 0.6034

Epoch 1/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.8967 Acc: 0.5781


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.8329 Acc: 0.6096

Epoch 2/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.8896 Acc: 0.5795


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.8122 Acc: 0.6186

Epoch 3/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.8811 Acc: 0.5861


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.8135 Acc: 0.6190

Epoch 4/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.8771 Acc: 0.5917


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.8072 Acc: 0.6167

Epoch 5/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.8755 Acc: 0.5860


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.7926 Acc: 0.6208

Epoch 6/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.8764 Acc: 0.5859


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.7911 Acc: 0.6298

Epoch 7/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.8757 Acc: 0.5868


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.7950 Acc: 0.6273

Epoch 8/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.8724 Acc: 0.5900


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.7971 Acc: 0.6233

Epoch 9/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.8715 Acc: 0.5909


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.7955 Acc: 0.6240

Epoch 10/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.8741 Acc: 0.5885


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.7937 Acc: 0.6271

Epoch 11/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.8369 Acc: 0.6052


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.7210 Acc: 0.6640

Epoch 12/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.7970 Acc: 0.6280


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.6955 Acc: 0.6709

Epoch 13/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.7685 Acc: 0.6440


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.6748 Acc: 0.6916

Epoch 14/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.7494 Acc: 0.6536


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.6380 Acc: 0.7114

Epoch 15/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.7312 Acc: 0.6601


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.6357 Acc: 0.7115

Epoch 16/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.7196 Acc: 0.6704


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.6255 Acc: 0.7168

Epoch 17/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.7028 Acc: 0.6808


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.6144 Acc: 0.7196

Epoch 18/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.6929 Acc: 0.6846


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.6067 Acc: 0.7277

Epoch 19/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.6799 Acc: 0.6883


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.6131 Acc: 0.7245

Epoch 20/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.6696 Acc: 0.6923


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.5885 Acc: 0.7354

Epoch 21/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.6618 Acc: 0.7016


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.5842 Acc: 0.7375

Epoch 22/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.6526 Acc: 0.7038


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.5730 Acc: 0.7456

Epoch 23/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.6488 Acc: 0.7066


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.5648 Acc: 0.7466

Epoch 24/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.6378 Acc: 0.7119


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.5704 Acc: 0.7515

Epoch 25/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.6307 Acc: 0.7141


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.5615 Acc: 0.7513

Epoch 26/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.6244 Acc: 0.7200


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.5563 Acc: 0.7583

Epoch 27/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.6165 Acc: 0.7177


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.5408 Acc: 0.7597

Epoch 28/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.6095 Acc: 0.7253


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.5454 Acc: 0.7578

Epoch 29/29
----------


  0%|          | 0/3390 [00:00<?, ?it/s]

train Loss: 0.6049 Acc: 0.7290


  0%|          | 0/848 [00:00<?, ?it/s]

valid Loss: 0.5499 Acc: 0.7590

Training complete in 211m 17s
Best val Acc: 0.759735


## Saved the model

In [23]:
# creates a file path for the saved model

path = os.path.join(CNN_SAVE_DIR, 'trained_model_VGG11_145.pt')

# checks if a file already exists at the specified path. If a file exists, an error message is printed.
assert not os.path.isfile(path), print('A model is already saved at this location')
print(f'Saving model to {path}')

# prints a message indicating the file path where the model will be saved.
torch.save(model_ft, path)

Saving model to /content/drive/MyDrive/UNECA_MachineLearning_Project/models/trained_model_VGG11_145.pt


## Calculate the accuracy of the model

- evaluate the trained model on the training data.
- check how well the model is performing on the data it was trained on.
- The accuracy of the model is calculated by dividing the number of correct predictions by the total number of predictions.

In [24]:
# Set model to evaluate mode
model_ft.eval()

# initializes the cross-entropy loss function.
criterion = nn.CrossEntropyLoss()

# initializes the running loss to 0.
running_loss = 0.0

# initializes the number of correct predictions to 0.
running_corrects = 0

# initializes the total number of predictions to 0.
total = 0

# loop iterates over the training data using the tqdm function to display a progress bar.
for inputs, labels in tqdm(dataloaders_dict['train']):

    # sends the input data to the GPU if available.
    inputs = inputs.to(device)

    # sends the label data to the GPU if available.
    labels = labels.to(device)

    # sets the gradient calculation to disabled.
    with torch.set_grad_enabled(False):
        # the input data through the model to get the output.
        outputs = model_ft(inputs)

        # the loss between the predicted output and the actual labels.
        loss = criterion(outputs, labels)

        # extracts the predicted class labels from the output probabilities.
        _, preds = torch.max(outputs, 1)

    # updates the number of correct predictions by summing the number of correct predictions in the batch.
    running_loss += loss.item() * inputs.size(0)
    running_corrects += torch.sum(preds == labels.data)

    # updates the total number of predictions by adding the batch size.
    total += len(preds)

# prints the accuracy of the model on the training data.
print(running_corrects.double()/total)

  0%|          | 0/3390 [00:00<?, ?it/s]

tensor(0.7580, device='cuda:0', dtype=torch.float64)
