In [None]:
import torch
import torchvision.models as models


# define VGG16 model
VGG16 = models.vgg16(pretrained=True)

# check if CUDA is available
use_cuda = torch.cuda.is_available()

# move model to GPU if CUDA is available
if use_cuda:
    VGG16 = VGG16.cuda()

In [8]:
# Making Predictions with a Pre-trained Model
from PIL import Image
import torchvision.transforms as transforms

def VGG16_predict(img_path):
   

    my_image = Image.open(img_path).convert('RGB')

    transform = transforms.Compose([
                        transforms.Resize(size=(244,244)),  #transforms.Resize() to resize the image to the required dimensions.
                        transforms.ToTensor()])

    my_image = transform(my_image)[:3,:,:].unsqueeze(0)


    VGG16.cpu()

    result = VGG16(my_image)

    return torch.max(result,1)[1].item() # predicted class index

In [9]:
VGG16_predict(dog_files[1])

162

In [10]:
def dog_detector(img_path):
  
    return VGG16_predict(img_path) in range(151, 269)


In [11]:
dog_detector(dog_files[800])

True

In [12]:
import os
from torchvision import datasets

### data loaders for training, validation, and test sets

# It initializes parameters for data loading, such as the number of subprocesses and batch size.
# Additionally, it defines paths to the training, validation, and test datasets.
# The paths provided here assume a directory structure where images are organized into train, test, and validation folders.
# Finally, it applies normalization to the images .
# number of subprocesses to use for data loading.Setting it to 0 means that the data will be loaded in the main process.
num_workers = 0
# how many samples per batch to load
batch_size = 20


train_path = 'dogImages/train'
valid_path = 'dogImages/valid'
test_path= 'dogImages/test'

# normalizing all images as per imagenet standards
standard_normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                          std=[0.229, 0.224, 0.225])

In [13]:
# adding a set of transformations to be performed in train/test/valid images, doing some data augmentations for better training
# This dictionary, data_transforms_dict, contains sets of image transformations
# to be applied to the training, validation, and test datasets. These transformations
# are crucial for preprocessing the input data and enhancing the training process
# of deep learning models.

# For the 'train' set, data augmentation techniques like random resized crop and
# horizontal flip are employed to introduce variability and prevent overfitting.
# Additionally, the images are converted to PyTorch tensors and normalized based
# on the ImageNet standards.

# The 'valid' set applies resizing and center cropping to ensure uniformity in
# input dimensions for validation data. Similar to the 'train' set, images are
# converted to tensors and normalized.

# The 'test' set prepares the test data by resizing images to a fixed size,
# converting them to tensors, and applying normalization.


data_transforms_dict = {'train': transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     standard_normalize]),

                   'valid': transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     standard_normalize]),

                   'test': transforms.Compose([transforms.Resize(size=(224,224)),
                                     transforms.ToTensor(),
                                     standard_normalize])
                  }

In [14]:
# now applying data transformations to each images in respective folders
# This code block prepares dataset objects for the training, validation, and test datasets
# by applying the specified data transformations to each image in their respective folders.
# It utilizes the datasets.ImageFolder class from torchvision datasets to create dataset objects
# for each dataset, specifying the paths to the image folders and the corresponding transformations
# to be applied during data loading.

train_data = datasets.ImageFolder(train_path, transform = data_transforms_dict['train'])
valid_data = datasets.ImageFolder(valid_path, transform = data_transforms_dict['valid'])
test_data = datasets.ImageFolder(test_path, transform = data_transforms_dict['test'])

In [15]:
# This code block sets up data loaders for the training, validation, and test datasets,
# utilizing the torch.utils.data.DataLoader class. Data loaders are essential for
# efficiently loading and iterating through batches of data during the training and
# evaluation of deep learning models.

# For each dataset (train, validation, test), a data loader is created with the
# corresponding dataset object, batch size, number of workers for data loading, and
# shuffling option.

# The resulting data loaders are organized into a dictionary called loaders_scratch,
# with keys 'train', 'valid', and 'test', each corresponding to their respective data loaders.


train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=batch_size,
                                           num_workers=num_workers,
                                           shuffle=True)

valid_loader = torch.utils.data.DataLoader(valid_data,
                                           batch_size=batch_size,
                                           num_workers=num_workers,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(test_data,
                                           batch_size=batch_size,
                                           num_workers=num_workers,
                                           shuffle=True)

loaders_scratch = {'train': train_loader,
                'valid': valid_loader,
                'test': test_loader}

In [16]:
# This code block retrieves the number of samples in the training, validation, and test datasets.
# It provides insight into the size of each dataset, which is crucial for understanding
# the distribution of data across different sets and for monitoring the training process
# and evaluating the performance of the model.

len(train_data), len(valid_data), len(test_data)

(6680, 835, 836)

In [17]:
# This code block creates a copy of the previously defined data loaders
# stored in the loaders_scratch dictionary. These loaders were set up
# for the training, validation, and test datasets and are ready for use
# in training and evaluating deep learning models.
loaders_transfer = loaders_scratch.copy()

In [18]:
import torchvision.models as models
import torch.nn as nn

# This code block imports the ResNet-50 architecture from the torchvision.models module
# and instantiates a pre-trained ResNet-50 model using the models.resnet50() function.
#The pre-trained model has been trained
# on the ImageNet dataset, which allows it to extract meaningful features from images.
# This pre-trained ResNet-50 model will serve as the base architecture for transfer learning.

model_transfer = models.resnet50(pretrained=True)
model_transfer

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 87.7MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [19]:
#The model_transfer.fc attribute represents the final fully connected layer of the ResNet-50 model. This layer is responsible for taking the features extracted by the convolutional layers of the ResNet-50 backbone and transforming them into the final output of the model.
model_transfer.fc

Linear(in_features=2048, out_features=1000, bias=True)

in_features: The number of input features to the fully connected layer, which is 2048 in this case. This typically corresponds to the number of output features from the preceding layers, often the output of the last convolutional layer in the neural network architecture.

out_features: The number of output features or classes produced by the fully connected layer, which is 1000 in this case. This indicates that the output of this layer is a vector of size 1000, where each element represents the model's confidence score for a specific class.

bias: A boolean indicating whether the layer includes a bias term in the linear transformation. When bias=True, as in this case, the layer includes a bias term.




In [20]:
"""
model_transfer.fc.in_features and model_transfer.fc.out_features retrieves the number of input features and output features of the fully connected layer (fc) in the model_transfer.
 This is helpful for understanding the dimensions of the data flowing through this layer in the neural network.



model_transfer.fc.in_features: This attribute returns the number of input features to the fully connected layer.

model_transfer.fc.out_features: This attribute returns the number of output features or classes produced by the fully connected layer.


"""
model_transfer.fc.in_features, model_transfer.fc.out_features

(2048, 1000)

 represents the dimensions of the fully connected layer (fc) in the model_transfer. Specifically:

2048 corresponds to the number of input features to the fully connected layer. This indicates that the fully connected layer receives 2048-dimensional input.

1000 corresponds to the number of output features or classes produced by the fully connected layer. This indicates that the fully connected layer produces a 1000-dimensional output, with each dimension representing the confidence score for one of the 1000 ImageNet classes.

In [21]:
"""
prepares a pre-trained ResNet-50 model for transfer learning on a
new task of classifying dog breeds by adapting the final classification
layer and freezing the weights of the pre-trained layers to preserve their learned feature
"""
total_dog_classes = 133 #number of dog breeds that the model will be trained to classify.



# Freeze training for all "features" layers
for param in model_transfer.parameters():
    param.requires_grad = False


num_in_features = model_transfer.fc.in_features

#retrieving the final fully connected layer of resnet50 and replace it with our own linear layer

model_transfer.fc = nn.Linear(num_in_features, total_dog_classes)  # total_dog_classes is 133 as defined above

if use_cuda:
    model_transfer = model_transfer.cuda()

print(model_transfer.fc)

Linear(in_features=2048, out_features=133, bias=True)


output:
represents the structure of the modified final fully connected layer (fc) after the code execution.

in_features=2048: Indicates that the modified fully connected layer has 2048 input features. This corresponds to the number of output features from the preceding layers, typically the output of the last convolutional layer in the ResNet-50 architecture.

out_features=133: Indicates that the modified fully connected layer produces 133 output features or classes. This is aligned with the specific task of classifying dog breeds, where there are 133 distinct classes representing different dog breeds.

bias=True: Indicates that the modified fully connected layer includes a bias term in the linear transformation. When bias=True, a bias term is added to each output feature.

output Linear(in_features=2048, out_features=133, bias=True) signifies that the final fully connected layer (fc) of the ResNet-50 model has been successfully modified to suit the new task of classifying dog breeds. It has 2048 input features and produces 133 output features, each representing the confidence score for one of the 133 dog breeds.

In [22]:
#Specify Loss Function and Optimizer

import torch.optim as optim

criterion_transfer = nn.CrossEntropyLoss()
optimizer_transfer = optim.Adam(model_transfer.fc.parameters(), lr=0.005)

Specifies a loss function and an optimizer for training the modified ResNet-50 model (model_transfer) on the task of classifying dog breeds. Here's what each part of the code does:

criterion_transfer = nn.CrossEntropyLoss(): This line defines the loss function used for the classification task. nn.CrossEntropyLoss() is a common choice for multi-class classification problems. It combines the softmax function and the negative log-likelihood loss, making it suitable for models that output probability distributions over multiple classes. In this case, the model will predict the probability distribution over the 133 dog breed classes, and CrossEntropyLoss will measure the difference between the predicted distribution and the true labels.

optimizer_transfer = optim.Adam(model_transfer.fc.parameters(), lr=0.005): This line defines the optimizer used to update the weights of the model during training. optim.Adam is a popular optimization algorithm known for its adaptive learning rate method. It adjusts the learning rates for each parameter individually, which can lead to faster convergence and better performance. model_transfer.fc.parameters() specifies that only the parameters of the final fully connected layer (fc) should be optimized, as the rest of the model's parameters are frozen. The learning rate (lr) is set to 0.005, which determines the step size taken during optimization.

In [23]:
#Train and Validate the Model
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf

    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0

        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))

            # clear the gradient of previous iterations
            optimizer.zero_grad()

            output = model(data)

            # calculate the loss by comparing the generated output with actual label
            loss = criterion(output, target)

            # do backpropagation step
            loss.backward()

            # updating weights during backprop
            optimizer.step()

            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))

            if batch_idx % 100 == 0:
                print('Epoch %d, Batch %d loss: %.6f' %(epoch, batch_idx + 1, train_loss))



        ######################
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss

            output = model(data)

            loss = criterion(output, target)

            valid_loss = valid_loss + ((1/(batch_idx+1)) * (loss.data - valid_loss))


        # print training/validation statistics
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch,
            train_loss,
            valid_loss
            ))

        ## TODO: save the model if validation loss has decreased
        if valid_loss < valid_loss_min:

            torch.save(model.state_dict(), save_path)

            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min, valid_loss))

            # updating the minimum validation loss
            valid_loss_min = valid_loss

    # return trained model
    return model

This training function is designed to train a neural network model using a specified number of epochs and provided data loaders for both training and validation sets.

function orchestrates the training and validation process of a neural network model, monitoring the loss values and saving the model's state when a lower validation loss is achieved, thereby facilitating model checkpointing and saving the best-performing model.

1.Initialization:
   - The function initializes a variable `valid_loss_min` to positive infinity. This variable will be used to track the minimum validation loss observed during training.

2. Training Loop (for each epoch):
   - The function iterates over each epoch from 1 to `n_epochs`.
   - Within each epoch, it initializes variables `train_loss` and `valid_loss` to track the training and validation losses, respectively.

3. Training Phase:
   - The model is set to training mode (`model.train()`).
   - For each batch in the training data loader (`loaders['train']`), the function performs the following steps:
     - Moves the data and target labels to the GPU if CUDA is available (`use_cuda`).
     - Clears the gradients of the optimizer (`optimizer.zero_grad()`).
     - Forward passes the input data through the model to obtain the output predictions.
     - Calculates the loss between the model predictions and the target labels.
     - Performs backpropagation to compute gradients.
     - Updates the model parameters using the optimizer (`optimizer.step()`).
     - Computes and updates the average training loss (`train_loss`).

4. Validation Phase:
   - The model is set to evaluation mode (`model.eval()`).
   - For each batch in the validation data loader (`loaders['valid']`), the function performs the following steps:
     - Moves the data and target labels to the GPU if CUDA is available (`use_cuda`).
     - Forward passes the input data through the model to obtain the output predictions.
     - Calculates the validation loss between the model predictions and the target labels.
     - Computes and updates the average validation loss (`valid_loss`).

5. Logging and Saving:
   - After processing all batches in an epoch, the function prints the training and validation losses for that epoch.
   - If the current validation loss is lower than the minimum validation loss observed so far (`valid_loss_min`), the function saves the model's state dictionary to the specified `save_path`.
   - It then updates `valid_loss_min` to the current validation loss.

6. Return:
   - After completing all epochs, the function returns the trained model.



In [24]:
# train the model

n_epochs = 5

model_transfer =  train(n_epochs, loaders_transfer, model_transfer, optimizer_transfer,
                        criterion_transfer, use_cuda, 'model_transfer.pt')



Epoch 1, Batch 1 loss: 4.804659
Epoch 1, Batch 101 loss: 5.512602
Epoch 1, Batch 201 loss: 4.366426
Epoch 1, Batch 301 loss: 3.948845
Epoch: 1 	Training Loss: 3.838014 	Validation Loss: 1.887553
Validation loss decreased (inf --> 1.887553).  Saving model ...
Epoch 2, Batch 1 loss: 3.387529
Epoch 2, Batch 101 loss: 2.713449
Epoch 2, Batch 201 loss: 2.841927
Epoch 2, Batch 301 loss: 2.757945
Epoch: 2 	Training Loss: 2.735197 	Validation Loss: 2.141465
Epoch 3, Batch 1 loss: 5.353657
Epoch 3, Batch 101 loss: 2.690471
Epoch 3, Batch 201 loss: 2.721024
Epoch 3, Batch 301 loss: 2.796488
Epoch: 3 	Training Loss: 2.850372 	Validation Loss: 1.908857
Epoch 4, Batch 1 loss: 2.021873
Epoch 4, Batch 101 loss: 2.593591
Epoch 4, Batch 201 loss: 2.646577
Epoch 4, Batch 301 loss: 2.721172
Epoch: 4 	Training Loss: 2.727392 	Validation Loss: 1.759367
Validation loss decreased (1.887553 --> 1.759367).  Saving model ...
Epoch 5, Batch 1 loss: 4.434908
Epoch 5, Batch 101 loss: 2.493634
Epoch 5, Batch 201 lo

trains the model_transfer neural network model using the train function defined earlier. Here's a breakdown of the parameters and what each part of the code does:

n_epochs = 1: Specifies the number of epochs for training. In this case, the model will be trained for one epoch.

loaders_transfer: Provides the data loaders for training and validation sets. These loaders contain the training and validation data in batches.

model_transfer: Represents the neural network model (pre-trained ResNet-50) that will be trained on the task of classifying dog breeds. This model has already been modified for the new task.

optimizer_transfer: Specifies the optimizer (Adam) used for updating the model's parameters during training.

criterion_transfer: Defines the loss function (CrossEntropyLoss) used to compute the training loss during training.

use_cuda: Indicates whether CUDA (GPU) should be used for training. If True, the model and data will be moved to the GPU.

'model_transfer.pt': Specifies the file path where the trained model's state dictionary will be saved after training. This allows for model checkpointing and saving the best-performing model.

In [25]:
#loads the trained model that achieved the best validation accuracy during training
model_transfer.load_state_dict(torch.load('model_transfer.pt'))

<All keys matched successfully>

In [26]:
#test the model

def test(loaders, model, criterion, use_cuda):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        # move to GPU
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)

    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (100. * correct / total, correct, total))

test function to evaluate the performance of a trained neural network model on a test dataset. Here's an explanation of each part of the code:

Initialization:

Three variables test_loss, correct, and total are initialized to monitor the test loss, the number of correct predictions, and the total number of predictions, respectively.
Evaluation Loop:

The model is switched to evaluation mode using model.eval(). This disables certain operations like dropout during evaluation.
The function iterates over each batch in the test data loader (loaders['test']) using enumerate to get both the batch index and data/target pairs.
For each batch, the following steps are performed:
If use_cuda is True, the data and target tensors are moved to the GPU using .cuda().
The input data is passed through the model to obtain the output predictions (output).
The test loss for the batch is computed using the provided loss function (criterion).
The average test loss is updated using an online averaging formula.
The output probabilities are converted to predicted class labels (pred) using torch.max.
The number of correct predictions (correct) is updated by comparing predicted labels to true labels.
The total number of predictions (total) is updated by adding the batch size (data.size(0)).
Print Results:
After evaluating all batches, the function prints the average test loss and the test accuracy.
Test accuracy is computed as the percentage of correct predictions out of the total number of predictions.

In [27]:
test(loaders_transfer, model_transfer, criterion_transfer, use_cuda)

Test Loss: 1.793391


Test Accuracy: 73% (616/836)


In [56]:
#Predict Dog Breed with the Model
# retrieve and display the first and last classes from the dataset used in the training loader loaders_transfer['train'].


print(loaders_transfer['train'].dataset.classes[0])
print(loaders_transfer['train'].dataset.classes[-1])

001.Affenpinscher
133.Yorkshire_terrier


In [59]:

"""
function takes an image file, processes it, and predicts
the breed of the dog in the image using a pre-trained neural network model. I
"""
from PIL import Image
import torchvision.transforms as transforms

def predict_breed_transfer(img_path, model, class_names):
    # load the image and return the predicted breed

    # referenced from VGG16_predict() method defined above for processing image accordingly
    my_image = Image.open(img_path).convert('RGB')

    transform = transforms.Compose([
                        transforms.Resize(size=(224,224)),  #resize to (244,244) as per vgg design specification
                        transforms.ToTensor(),
                        standard_normalize])

    my_image = transform(my_image)[:3,:,:].unsqueeze(0)

    model = model.cpu()

    model.eval()

    breed_index = torch.argmax(model(my_image))

    return class_names[breed_index]

In [69]:
import matplotlib.pyplot as plt
import torchvision.transforms as transforms

def display_image(img_path):
    img = Image.open(img_path)
    plt.imshow(img)
    plt.show()

def run_app(img_path):
  if dog_detector(img_path):
        print('Hey dog is detected!')
        display_image(img_path)
        output = predict_breed_transfer(img_path, model_transfer, class_names)
        print(f'It belongs to category...  ')
        print(f'{output} \n')

  else:
        print('Error! nothing detected!.... check your image once!\n')