In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [38]:
import random
import os
import glob
import numpy as np
import pandas as pd
import sys
import matplotlib.pyplot as plt
import tqdm.notebook as tq
%matplotlib inline
from mpl_toolkits.axes_grid1 import ImageGrid
import warnings
warnings.filterwarnings('ignore')

import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.nn as nn
from torchvision import datasets, transforms, models
from torchvision.utils import make_grid

from keras.utils import load_img, img_to_array
from keras.applications.vgg16 import preprocess_input

from PIL import Image
from IPython.display import display
import cv2
from PIL import ImageFile

from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, accuracy_score, classification_report, precision_recall_fscore_support, confusion_matrix, precision_score, recall_score, f1_score

ImageFile.LOAD_TRUNCATED_IMAGES = True

from torchvision.transforms.functional import InterpolationMode

In [2]:
def path_given_id(id, test=False):
    """
    Returns the full path to the image given the id of the image.
    Parameters:
        - id: The id of the image.
        - test: If True returns the relative path from the test folder. Otherwise, returns the relative path to the image from the training folder.
    Returns:
        - The full relative path to the image with the give in id.
    """
    return IMAGES_PATH + ('train/' if not test else 'test/') + str(id) + '.jpg'

def get_img_array(id, test=False):
    """
    Loads the image from the given id, convert the image to a numpy array and return the numpy array.
    Parameters:
        - id: The id of the image.
        - test: If True, loads the image from the test folder. If False,loads the image from the train folder.
    Returns:
        - The image with the give id as a numpy array.
    """
    img = load_img(path_given_id(id, test), target_size=(224, 224))
    return img_to_array(img)

# preprocess_input(np.expand_dims(get_image_array(id, test), axis=0)) will convert the image into 1,224,224,3 to give to predict.
def process_image(id, test=False):
    return preprocess_input(np.expand_dims(get_img_array(id, test), axis=0))

# 1. Load the dataset

In [4]:
IMAGES_PATH = '/content/drive/MyDrive/dog_breed_identification_files/'

labels = pd.read_csv(IMAGES_PATH +'labels.csv')
labelnames = pd.read_csv(IMAGES_PATH  + 'sample_submission.csv').keys()[1:]

In [5]:
codes = range(len(labelnames))
breed_to_code = dict(zip(labelnames, codes))
code_to_breed = dict(zip(codes, labelnames))

labels['target'] =  [breed_to_code[x] for x in labels.breed]
labels['rank'] = labels.groupby('breed').rank()['id']
labels_pivot = labels.pivot('id', 'breed', 'target').reset_index().fillna(0)

training_data = labels_pivot.sample(frac=0.85)
validation_data = labels_pivot[~labels_pivot['id'].isin(training_data['id'])]
testing_data = training_data.sample(frac=0.25)
training_data = training_data[~training_data['id'].isin(testing_data['id'])]

In [6]:
training_data.shape, validation_data.shape, testing_data.shape

((6517, 121), (1533, 121), (2172, 121))

# 2a. Define a baseline model and your model
## Baseline model: most_frequent

## We are using most_frequent as our baseline model. And, we are using Resnet50 as our model.
## We will use the all the same weights from the pretrained Resnet model and will change the final softmax layer.

In [7]:
class BaselineModel:
    """
    A baseline model. Here, we're using most_frequent as our baseline model.
    The baseline model predicts the most frequent label in the dataset regardless of the input image.
    """
    def __init__(self):
        """
        Constructor to initialize the model.
        Parameters: None
        Returns: None
        """
        self.images = None
    
    def fit(self, images, labels):
        """
        Fits the Baseline model to the data.
        Parameters:
            - images: The training X values
            - labels: The training y values
        Returns:
            - None
        """
        self.images = images
        self.labels = labels
    
    def predict(self, X_test):
        """
        Returns the most frequent label for all of the images.
        Parameters: 
            - X_test: The test images
        Returns: The most frequent label for each of the input images
        """
        return [self.most_frequent()] * len(X_test)
    
    def most_frequent(self):
        """
        Finds most frequent label in the dataset.
        Parameters: None
        Returns: Returns the most frequent label
        """
        return self.labels.value_counts().idxmax()

## 3a. Run a training loop on a training set with both models

In [8]:
temp = pd.read_csv(IMAGES_PATH + 'labels.csv')

X_train, X_test, y_train, y_test = train_test_split(temp['id'], temp['breed'], train_size=0.15, random_state=42)

In [9]:
temp = pd.read_csv(IMAGES_PATH + 'labels.csv')

baseline_model = BaselineModel()
baseline_model.fit(X_train, y_train)

## 4a. Evaluate both models on a withheld test set

In [10]:
baseline_predictions = baseline_model.predict(X_test)
baseline_predictions[:5]

['maltese_dog', 'maltese_dog', 'maltese_dog', 'maltese_dog', 'maltese_dog']

In [11]:
acc_score = accuracy_score(y_test, baseline_predictions) * 100

## 5a. Display results on the test set for both models

In [12]:
f"Accuracy for the baseline model: {acc_score : .2f}%"

'Accuracy for the baseline model:  1.09%'

In [13]:
precision_recall_fscore_support(y_test, baseline_predictions, average='macro')

(9.111136686231634e-05, 0.008333333333333333, 0.0001802519732847602, None)

In [14]:
confusion_matrix(y_test, baseline_predictions)

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

# 2b. Define a baseline model and your model
## Proposed Model: InceptionV3 model

# Dataset Augmentation
## This will increase the size of the dataset. 
## In general these operations will not change what the image represents. For a deep learning model, the more the data the better the model will be able to find the optimal weights and thereby lowering the loss.
## For example: A dog image horizontally flipped is still a dog; A dog rotated by an angle is still a dog; A dog image cropped with a reasonable crop rate is still a dog. 

In [15]:
img_transform = {
    'valid':transforms.Compose([
        transforms.Resize(size = 299, interpolation=InterpolationMode.BILINEAR),
        transforms.CenterCrop(size = 299),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'train':transforms.Compose([
        transforms.RandomResizedCrop(size = 299),
        transforms.RandomRotation(degrees = 30),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=299),  
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  
    ]),
    'test':transforms.Compose([
        transforms.Resize(size = 299, interpolation=InterpolationMode.BILINEAR),
        transforms.CenterCrop(size = 299),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}

In [16]:
class DogDataset(torch.utils.data.Dataset):
    """
    Create a dataset for pytorch batch loading. This is to load few images into memory at a time instead of all the images at once.
    Extends from torch.utils.data.Dataset
    """
    def __init__(self, images_directory, labels, transform):
        """
        Constructor initialization.
        Params:
            - images_directory: The directory where the images are stored.
            - labels: The image labels
            - transform: The transformations to perform on the data.
        """
        self.images_directory = images_directory
        self.labels = labels
        self.transform = transform


    def __len__(self):
        """
        Returns the total number of samples.
        """
        return len(self.labels)

    
    def __getitem__(self, index):
        if self.labels is not None:
            image_name = f'{self.labels["id"].iloc[index]}.jpg'
            full_image_name = self.images_directory + image_name
            
            final_image = Image.open(full_image_name)
            label = self.labels.iloc[index, 1:].astype('float').to_numpy()
            label = np.argmax(label)
            
            if self.transform:
                final_image = self.transform(final_image)
            
            return [final_image, label]
            

In [17]:
num_workers = 4
batch_size = 70
use_cuda = torch.cuda.is_available()

train_img = DogDataset(IMAGES_PATH + 'train/', training_data, transform = img_transform['train'])
valid_img = DogDataset(IMAGES_PATH + 'train/', validation_data, transform = img_transform['valid'])
test_img = DogDataset(IMAGES_PATH + 'train/', testing_data, transform = img_transform['test'])


dataloaders={
    'train':torch.utils.data.DataLoader(train_img, batch_size, num_workers = num_workers, shuffle=True),
    'valid':torch.utils.data.DataLoader(valid_img, batch_size, num_workers = num_workers, shuffle=False),
    'test':torch.utils.data.DataLoader(test_img, batch_size, num_workers = num_workers, shuffle=True)
}

# 3.b. Run a training loop on a training set with both models

In [22]:
inceptionv3_model =  torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)

# donot calcualte any of te weights. Use pretrained weights.
for param in inceptionv3_model.parameters():
    param.requires_grad = False
    
# replace the last fully connected layer to suit for our dog breed identification.
# Here we have a linear model with 2048 in_features and 120(Our dog breed # of classes) out_features.
inceptionv3_model.fc = nn.Sequential(nn.Linear(inceptionv3_model.fc.in_features, 120))
# check if gpu is available
if use_cuda:
    inceptionv3_model = inceptionv3_model.cuda()
    
inceptionv3_model.aux_logits = False

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


In [23]:
loss_function = nn.CrossEntropyLoss()

# filter for weights that need to be computed.
# We don't compute already computed weights because we don't have the neccessary computational power.
# If we have computational power then we will build the model from the architecture, randomly initialize the weights and train our model.
grad_weights = filter(lambda w: w.requires_grad, inceptionv3_model.parameters())

# use Stochastic Gradient Descent to minimize the loss.
optimizer = torch.optim.SGD(grad_weights, lr=0.01, momentum=0.8)

In [27]:
training_losses = []
validation_losses = []

def train(n_epochs, img_transforms, model, optimizer, criterion, use_cuda):
    """returns trained model"""
    for epoch in range(1, n_epochs+1):
        loss_during_train = 0.0
        loss_during_validation = 0.0
        
        model.train()
        
        for index_batch, (image, label) in enumerate(img_transforms['train']):
            if use_cuda:
                image, label = image.cuda(), label.cuda()
            
            optimizer.zero_grad()
            output = model(image)
            
            loss = criterion(output, label)
            loss.backward()
            optimizer.step()
            
            loss_during_train = loss_during_train + ((1 / (index_batch + 1)) * (loss.data - loss_during_train))
            training_losses.append(loss_during_train)

            if index_batch % 10 == 0:
                print(f'Epoch: {epoch} \tBatch: {index_batch + 1} \tTraining Loss: {loss_during_train:.2f}')
        
        
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in img_transforms['valid']:
                if use_cuda:
                    images = images.cuda()
                    labels = labels.cuda()

                outputs = model(images)

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
    
        print(f'Accuracy of the network on the {total} validation images: {100 * correct / total} %') 
        
        
        model.eval()
        for index_batch, (image, label) in enumerate(img_transforms['valid']):
            if use_cuda:
                image, label = image.cuda(), label.cuda()

            output = model(image)
            
            loss = criterion(output, label)
            loss_during_validation = loss_during_validation + ((1 / (index_batch + 1)) * (loss.data - loss_during_validation))
            validation_losses.append(loss_during_validation)

        print(f'Epoch: {epoch} \tTraining Loss: {loss_during_train:.2f} \tValidation Loss: {loss_during_validation:.2f}')
        
    return model

In [28]:
n_epochs = 20

output_model =  train(n_epochs, dataloaders, inceptionv3_model, optimizer, loss_function, use_cuda)

Epoch: 1 	Batch: 1 	Training Loss: 4.69
Epoch: 1 	Batch: 11 	Training Loss: 4.73
Epoch: 1 	Batch: 21 	Training Loss: 4.67


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Epoch: 1 	Batch: 31 	Training Loss: 4.63
Epoch: 1 	Batch: 41 	Training Loss: 4.58
Epoch: 1 	Batch: 51 	Training Loss: 4.52
Epoch: 1 	Batch: 61 	Training Loss: 4.47
Epoch: 1 	Batch: 71 	Training Loss: 4.41
Epoch: 1 	Batch: 81 	Training Loss: 4.37
Epoch: 1 	Batch: 91 	Training Loss: 4.31
Accuracy of the network on the 1533 validation images: 49.967384213959555 %
Epoch: 1 	Training Loss: 4.29 	Validation Loss: 3.38
Epoch: 2 	Batch: 1 	Training Loss: 3.85
Epoch: 2 	Batch: 11 	Training Loss: 3.68


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>    assert self._parent_pid == os.getpid(), 'can only test a child process'

Traceback (most recent call last):
AssertionError:   File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    can only test a child processself._shutdown_workers()

  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Epoch: 2 	Batch: 21 	Training Loss: 3.65
Epoch: 2 	Batch: 31 	Training Loss: 3.59
Epoch: 2 	Batch: 41 	Training Loss: 3.56
Epoch: 2 	Batch: 51 	Training Loss: 3.52
Epoch: 2 	Batch: 61 	Training Loss: 3.48
Epoch: 2 	Batch: 71 	Training Loss: 3.44
Epoch: 2 	Batch: 81 	Training Loss: 3.39
Epoch: 2 	Batch: 91 	Training Loss: 3.36


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Accuracy of the network on the 1533 validation images: 65.88388780169602 %


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Epoch: 2 	Training Loss: 3.36 	Validation Loss: 2.32
Epoch: 3 	Batch: 1 	Training Loss: 3.12
Epoch: 3 	Batch: 11 	Training Loss: 3.01


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():Exception ignored in: 
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionErrorTraceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    : self._shutdown_workers()can only test a child process
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers

    if w.is_alive():
  File "/usr/lib/

Epoch: 3 	Batch: 21 	Training Loss: 2.95
Epoch: 3 	Batch: 31 	Training Loss: 2.92
Epoch: 3 	Batch: 41 	Training Loss: 2.88
Epoch: 3 	Batch: 51 	Training Loss: 2.85
Epoch: 3 	Batch: 61 	Training Loss: 2.83
Epoch: 3 	Batch: 71 	Training Loss: 2.80
Epoch: 3 	Batch: 81 	Training Loss: 2.77
Epoch: 3 	Batch: 91 	Training Loss: 2.76


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Accuracy of the network on the 1533 validation images: 70.38486627527723 %


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
<function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
  File "/usr/local/lib/python3.10/dist-packages/torch/u

Epoch: 3 	Training Loss: 2.75 	Validation Loss: 1.67
Epoch: 4 	Batch: 1 	Training Loss: 2.33


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Epoch: 4 	Batch: 11 	Training Loss: 2.45


    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python

Epoch: 4 	Batch: 21 	Training Loss: 2.42
Epoch: 4 	Batch: 31 	Training Loss: 2.40
Epoch: 4 	Batch: 41 	Training Loss: 2.40
Epoch: 4 	Batch: 51 	Training Loss: 2.39
Epoch: 4 	Batch: 61 	Training Loss: 2.38
Epoch: 4 	Batch: 71 	Training Loss: 2.36
Epoch: 4 	Batch: 81 	Training Loss: 2.34
Epoch: 4 	Batch: 91 	Training Loss: 2.32


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Accuracy of the network on the 1533 validation images: 74.95107632093934 %


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
Exception ignored

Epoch: 4 	Training Loss: 2.31 	Validation Loss: 1.29
Epoch: 5 	Batch: 1 	Training Loss: 2.17


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'Exception ignored in: 
<function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>AssertionError
: Traceback (most recent call last):
can only test a child process  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__

    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Epoch: 5 	Batch: 11 	Training Loss: 2.16
Epoch: 5 	Batch: 21 	Training Loss: 2.11
Epoch: 5 	Batch: 31 	Training Loss: 2.10
Epoch: 5 	Batch: 41 	Training Loss: 2.09
Epoch: 5 	Batch: 51 	Training Loss: 2.11
Epoch: 5 	Batch: 61 	Training Loss: 2.10
Epoch: 5 	Batch: 71 	Training Loss: 2.09
Epoch: 5 	Batch: 81 	Training Loss: 2.10
Epoch: 5 	Batch: 91 	Training Loss: 2.08


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>AssertionError
: Traceback (most recent call last):
can only test a child process
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Accuracy of the network on the 1533 validation images: 76.38617090671885 %


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>Exception ignored in: 
<function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>Traceback (most recent call last):

  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
Traceback (most recent call last):
      File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
self._shutdown_workers()    
self._shutdown_workers()  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers

      File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
if w.is_alive():    
if w.is_alive():  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive

      File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
assert self._parent_pid == os.getpid(), 'can only test a

Epoch: 5 	Training Loss: 2.08 	Validation Loss: 1.06


Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000><function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):

  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    assert self._parent_pid == os.getpid(), 'can only test a child process'Traceback (most recent call last):
if w.is_alive():  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__

  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
        
AssertionError: can only test a child process
self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

Epoch: 6 	Batch: 1 	Training Loss: 2.03
Epoch: 6 	Batch: 11 	Training Loss: 1.98
Epoch: 6 	Batch: 21 	Training Loss: 1.94
Epoch: 6 	Batch: 31 	Training Loss: 1.93
Epoch: 6 	Batch: 41 	Training Loss: 1.95
Epoch: 6 	Batch: 51 	Training Loss: 1.93
Epoch: 6 	Batch: 61 	Training Loss: 1.94
Epoch: 6 	Batch: 71 	Training Loss: 1.93
Epoch: 6 	Batch: 81 	Training Loss: 1.92
Epoch: 6 	Batch: 91 	Training Loss: 1.92


Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>

Traceback (most recent call last):

Traceback (most recent call last):
<function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__

Traceback (most recent call last):
Traceback (most recent call last):
      File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
self._shutdown_workers()          File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
self._shutdown_workers()
self._shutdown_workers()
  

Accuracy of the network on the 1533 validation images: 77.82126549249837 %


Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Traceback (most recent call last):
<function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Exception ignored in:   File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
Traceback (most recent call last):
<function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
Exception ignored in:     
<function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>self._shutdown_workers()    

Traceback (most recent call last):
Traceback (most recent call last):
self._shutdown_workers()  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1462, in _shutdown_workers
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    
if w.is_alive():

Epoch: 6 	Training Loss: 1.93 	Validation Loss: 0.92


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>
Exception ignored in: Traceback (most recent call last):
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>Exception ignored in:   File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
<function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000><function _MultiProcessingDataLoaderIter.__del__ at 0x7f14e81bd000>    


Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
self._shutdown_workers()  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1479, in __del__

            self._shutdown_workers()  File "/usr/local/lib/

Epoch: 7 	Batch: 1 	Training Loss: 1.55
Epoch: 7 	Batch: 11 	Training Loss: 1.94
Epoch: 7 	Batch: 21 	Training Loss: 1.87
Epoch: 7 	Batch: 31 	Training Loss: 1.85
Epoch: 7 	Batch: 41 	Training Loss: 1.85
Epoch: 7 	Batch: 51 	Training Loss: 1.84
Epoch: 7 	Batch: 61 	Training Loss: 1.82
Epoch: 7 	Batch: 71 	Training Loss: 1.81
Epoch: 7 	Batch: 81 	Training Loss: 1.81
Epoch: 7 	Batch: 91 	Training Loss: 1.81
Accuracy of the network on the 1533 validation images: 78.86497064579257 %
Epoch: 7 	Training Loss: 1.81 	Validation Loss: 0.81
Epoch: 8 	Batch: 1 	Training Loss: 1.73
Epoch: 8 	Batch: 11 	Training Loss: 1.67
Epoch: 8 	Batch: 21 	Training Loss: 1.71
Epoch: 8 	Batch: 31 	Training Loss: 1.68
Epoch: 8 	Batch: 41 	Training Loss: 1.69
Epoch: 8 	Batch: 51 	Training Loss: 1.69
Epoch: 8 	Batch: 61 	Training Loss: 1.70
Epoch: 8 	Batch: 71 	Training Loss: 1.69
Epoch: 8 	Batch: 81 	Training Loss: 1.69
Epoch: 8 	Batch: 91 	Training Loss: 1.69
Accuracy of the network on the 1533 validation images:

In [29]:
def redirect_error():
    sys.stderr = open('/dev/null', 'w')

In [30]:
redirect_error()
correct = 0
total = 0
for images, labels in dataloaders['test']:
    if use_cuda:
        images = images.cuda()
        labels = labels.cuda()

    outputs = inceptionv3_model(images)

    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the {total} testing images: {100 * correct / total: .2f} %')

Accuracy of the network on the 2172 testing images:  84.53 %


# lets run for 30 more epochs

In [31]:
n_epochs = 30

output_model =  train(n_epochs, dataloaders, inceptionv3_model, optimizer, loss_function, use_cuda)

Epoch: 1 	Batch: 1 	Training Loss: 1.29
Epoch: 1 	Batch: 11 	Training Loss: 1.37
Epoch: 1 	Batch: 21 	Training Loss: 1.37
Epoch: 1 	Batch: 31 	Training Loss: 1.38
Epoch: 1 	Batch: 41 	Training Loss: 1.39
Epoch: 1 	Batch: 51 	Training Loss: 1.39
Epoch: 1 	Batch: 61 	Training Loss: 1.38
Epoch: 1 	Batch: 71 	Training Loss: 1.39
Epoch: 1 	Batch: 81 	Training Loss: 1.39
Epoch: 1 	Batch: 91 	Training Loss: 1.38
Accuracy of the network on the 1533 validation images: 80.82191780821918 %
Epoch: 1 	Training Loss: 1.39 	Validation Loss: 0.52
Epoch: 2 	Batch: 1 	Training Loss: 1.18
Epoch: 2 	Batch: 11 	Training Loss: 1.34
Epoch: 2 	Batch: 21 	Training Loss: 1.32
Epoch: 2 	Batch: 31 	Training Loss: 1.33
Epoch: 2 	Batch: 41 	Training Loss: 1.36
Epoch: 2 	Batch: 51 	Training Loss: 1.37
Epoch: 2 	Batch: 61 	Training Loss: 1.35
Epoch: 2 	Batch: 71 	Training Loss: 1.37
Epoch: 2 	Batch: 81 	Training Loss: 1.36
Epoch: 2 	Batch: 91 	Training Loss: 1.35
Accuracy of the network on the 1533 validation images:

# The loss value seems to be not improving now. So, let's stop here!

In [36]:
redirect_error()

correct = 0
total = 0

final_predictions = []
final_labels = []

f = []
for images, labels in dataloaders['test']:
    if use_cuda:
        images = images.cuda()
        labels = labels.cuda()

    outputs = inceptionv3_model(images)

    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    
    final_predictions.extend(predicted.tolist())
    final_labels.extend(labels.tolist())
    
    correct += (predicted == labels).sum().item()


print(f'Accuracy of the network on the {total} testing images: {100 * correct / total: .2f} %')

Accuracy of the network on the 2172 testing images:  85.59 %


In [39]:
acc_score = accuracy_score(final_labels, final_predictions)
precision = precision_score(final_labels, final_predictions, average='macro')
recall = recall_score(final_labels, final_predictions, average='macro')
f1 = f1_score(final_labels, final_predictions, average='macro')

In [40]:
cm = confusion_matrix(final_labels, final_predictions)
cm

array([[14,  0,  0, ...,  0,  0,  0],
       [ 0, 20,  0, ...,  0,  0,  0],
       [ 0,  0, 16, ...,  0,  0,  0],
       ...,
       [ 0,  0,  0, ..., 12,  0,  0],
       [ 0,  0,  0, ...,  0, 11,  0],
       [ 1,  1,  0, ...,  0,  0, 12]])

In [41]:
import pprint
pprint.pprint(classification_report(final_labels, final_predictions))

('              precision    recall  f1-score   support\n'
 '\n'
 '           0       0.93      0.88      0.90        16\n'
 '           1       0.80      1.00      0.89        20\n'
 '           2       0.94      1.00      0.97        16\n'
 '           3       0.91      0.91      0.91        22\n'
 '           4       0.54      0.41      0.47        17\n'
 '           5       0.89      0.73      0.80        22\n'
 '           6       0.68      0.68      0.68        19\n'
 '           7       0.81      0.95      0.88        22\n'
 '           8       0.77      0.91      0.83        11\n'
 '           9       0.83      0.90      0.86        21\n'
 '          10       0.94      1.00      0.97        17\n'
 '          11       0.93      0.96      0.95        28\n'
 '          12       0.80      0.80      0.80        20\n'
 '          13       0.84      1.00      0.91        16\n'
 '          14       0.82      0.74      0.78        19\n'
 '          15       0.91      0.95      0.93     

In [42]:
print(f'Accuracy: {acc_score:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1-Score: {f1:.2f}')

Accuracy: 0.86, Precision: 0.86, Recall: 0.85, F1-Score: 0.85


In [43]:
torch.save(inceptionv3_model.state_dict(), 'inceptionv3_model.pth')
torch.save(inceptionv3_model, 'inceptionv3_model_full.pth')

# when we try to train all the weights from scratch, we are getting CUDA out of memory errror. So, we need more memory to calculate this. I think we can't do it on a machine 15GB GPU Memory. The free tier provides only 15GB of GPU RAM.