In [1]:
import torch
from PIL import Image
import torchvision.transforms as transforms
import numpy as np
import json
import requests
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

import random
import os
import glob
import numpy as np
import pandas as pd
import sys
import matplotlib.pyplot as plt
import tqdm.notebook as tq
%matplotlib inline
from mpl_toolkits.axes_grid1 import ImageGrid
import warnings
warnings.filterwarnings('ignore')

import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.nn as nn
from torchvision import datasets, transforms, models
from torchvision.utils import make_grid

from keras.utils import load_img, img_to_array
from keras.applications.vgg16 import preprocess_input

from PIL import Image
from IPython.display import display
import cv2
from PIL import ImageFile

from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, accuracy_score, classification_report, precision_recall_fscore_support, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.utils.multiclass import unique_labels
import sys

ImageFile.LOAD_TRUNCATED_IMAGES = True

from torchvision.transforms.functional import InterpolationMode
import pprint

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [2]:
def path_given_id(id, test=False):
    """
    Returns the full path to the image given the id of the image.
    Parameters:
        - id: The id of the image.
        - test: If True returns the relative path from the test folder. Otherwise, returns the relative path to the image from the training folder.
    Returns:
        - The full relative path to the image with the give in id.
    """
    return IMAGES_PATH + ('train/' if not test else 'test/') + str(id) + '.jpg'

def get_img_array(id, test=False):
    """
    Loads the image from the given id, convert the image to a numpy array and return the numpy array.
    Parameters:
        - id: The id of the image.
        - test: If True, loads the image from the test folder. If False,loads the image from the train folder.
    Returns:
        - The image with the give id as a numpy array.
    """
    img = load_img(path_given_id(id, test), target_size=(224, 224))
    return img_to_array(img)

# preprocess_input(np.expand_dims(get_image_array(id, test), axis=0)) will convert the image into 1,224,224,3 to give to predict.
def process_image(id, test=False):
    return preprocess_input(np.expand_dims(get_img_array(id, test), axis=0))

# 1b. Load the dataset

In [3]:
IMAGES_PATH = '/kaggle/input/dog-breed-identification/'

labels = pd.read_csv(IMAGES_PATH +'labels.csv')
labelnames = pd.read_csv(IMAGES_PATH  + 'sample_submission.csv').keys()[1:]

In [4]:
codes = range(len(labelnames))
breed_to_code = dict(zip(labelnames, codes))
code_to_breed = dict(zip(codes, labelnames))

labels['target'] =  [breed_to_code[x] for x in labels.breed]
labels['rank'] = labels.groupby('breed').rank()['id']
labels_pivot = labels.pivot('id', 'breed', 'target').reset_index().fillna(0)

training_data = labels_pivot.sample(frac=0.85)
validation_data = labels_pivot[~labels_pivot['id'].isin(training_data['id'])]
testing_data = training_data.sample(frac=0.25)
training_data = training_data[~training_data['id'].isin(testing_data['id'])]

In [5]:
img_transform = {
    'valid':transforms.Compose([
        transforms.Resize(size = 224, interpolation=InterpolationMode.BILINEAR),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'train':transforms.Compose([
        transforms.RandomResizedCrop(size = 224),
        transforms.RandomRotation(degrees = 30),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),  
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  
    ]),
    'test':transforms.Compose([
        transforms.Resize(size = 224, interpolation=InterpolationMode.BILINEAR),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}

In [6]:
class DogDataset(torch.utils.data.Dataset):
    """
    Create a dataset for pytorch batch loading. This is to load few images into memory at a time instead of all the images at once.
    Extends from torch.utils.data.Dataset
    """
    def __init__(self, images_directory, labels, transform):
        """
        Constructor initialization.
        Params:
            - images_directory: The directory where the images are stored.
            - labels: The image labels
            - transform: The transformations to perform on the data.
        """
        self.images_directory = images_directory
        self.labels = labels
        self.transform = transform


    def __len__(self):
        """
        Returns the total number of samples.
        """
        return len(self.labels)

    
    def __getitem__(self, index):
        if self.labels is not None:
            image_name = f'{self.labels["id"].iloc[index]}.jpg'
            full_image_name = self.images_directory + image_name
            
            final_image = Image.open(full_image_name)
            label = self.labels.iloc[index, 1:].astype('float').to_numpy()
            label = np.argmax(label)
            
            if self.transform:
                final_image = self.transform(final_image)
            
            return [final_image, label]
            

In [7]:
num_workers = 4
batch_size = 100
use_cuda = torch.cuda.is_available()

train_img = DogDataset(IMAGES_PATH + 'train/', training_data, transform = img_transform['train'])
valid_img = DogDataset(IMAGES_PATH + 'train/', validation_data, transform = img_transform['valid'])
test_img = DogDataset(IMAGES_PATH + 'train/', testing_data, transform = img_transform['test'])


dataloaders={
    'train':torch.utils.data.DataLoader(train_img, batch_size, num_workers = num_workers, shuffle=True),
    'valid':torch.utils.data.DataLoader(valid_img, batch_size, num_workers = num_workers, shuffle=False),
    'test':torch.utils.data.DataLoader(test_img, batch_size, num_workers = num_workers, shuffle=True)
}

## 2b. Define a baseline model and your model

In [8]:
efficientnet = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=True)

# donot calcualte any of te weights. Use pretrained weights.
for param in efficientnet.parameters():
    param.requires_grad = False
    
# replace the last fully connected layer for to suit for our dog breed identification.
# Here we have a linear model with 2048 in_features and 120(Our dog breed # of classes) out_features.
num_features = efficientnet.classifier.fc.in_features
efficientnet.classifier.fc = torch.nn.Linear(num_features, 120, bias=True)

# check if gpu is available
if use_cuda:
    efficientnet = efficientnet.cuda()

Downloading: "https://github.com/NVIDIA/DeepLearningExamples/zipball/torchhub" to /root/.cache/torch/hub/torchhub.zip
Downloading: "https://api.ngc.nvidia.com/v2/models/nvidia/efficientnet_b0_pyt_amp/versions/20.12.0/files/nvidia_efficientnet-b0_210412.pth" to /root/.cache/torch/hub/checkpoints/nvidia_efficientnet-b0_210412.pth


  0%|          | 0.00/20.5M [00:00<?, ?B/s]

In [9]:
efficientnet.classifier.fc 

Linear(in_features=1280, out_features=120, bias=True)

In [10]:
loss_function = nn.CrossEntropyLoss()

# filter for weights that need to be computed.
# We don't compute already computed weights because we don't have the neccessary computational power.
# If we have computational power then we will build the model from the architecture, randomly initialize the weights and train our model.
grad_weights = filter(lambda w: w.requires_grad, efficientnet.parameters())

# use Stochastic Gradient Descent to minimize the loss.
optimizer = torch.optim.SGD(grad_weights, lr=0.01, momentum=0.75)

In [11]:
def train(n_epochs, img_transforms, model, optimizer, criterion, use_cuda):
    """returns trained model"""
    for epoch in range(1, n_epochs+1):
        loss_during_train = 0.0
        loss_during_validation = 0.0
        
        model.train()
        
        for index_batch, (image, label) in enumerate(img_transforms['train']):
            if use_cuda:
                image, label = image.cuda(), label.cuda()
            
            optimizer.zero_grad()
            output = model(image)
            
            loss = criterion(output, label)
            loss.backward()
            optimizer.step()
            
            loss_during_train = loss_during_train + ((1 / (index_batch + 1)) * (loss.data - loss_during_train))
            
            if index_batch % 10 == 0:
                print(f'Epoch: {epoch} \tBatch: {index_batch + 1} \tTraining Loss: {loss_during_train:.2f}')
        
        
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in img_transforms['valid']:
                if use_cuda:
                    images = images.cuda()
                    labels = labels.cuda()

                outputs = model(images)

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
    
        print(f'Accuracy of the network on the {total} validation images: {100 * correct / total} %') 
        
        
        model.eval()
        for index_batch, (image, label) in enumerate(img_transforms['valid']):
            if use_cuda:
                image, label = image.cuda(), label.cuda()

            output = model(image)
            
            loss = criterion(output, label)
            loss_during_validation = loss_during_validation + ((1 / (index_batch + 1)) * (loss.data - loss_during_validation))
            
        print(f'Epoch: {epoch} \tTraining Loss: {loss_during_train:.2f} \tValidation Loss: {loss_during_validation:.2f}')
        
    return model

## 3b. Run a training loop on a training set with both models


In [12]:
n_epochs = 20

output_model =  train(n_epochs, dataloaders, efficientnet, optimizer, loss_function, use_cuda)

Epoch: 1 	Batch: 1 	Training Loss: 4.82
Epoch: 1 	Batch: 11 	Training Loss: 4.79
Epoch: 1 	Batch: 21 	Training Loss: 4.78
Epoch: 1 	Batch: 31 	Training Loss: 4.76
Epoch: 1 	Batch: 41 	Training Loss: 4.74
Epoch: 1 	Batch: 51 	Training Loss: 4.72
Epoch: 1 	Batch: 61 	Training Loss: 4.71
Accuracy of the network on the 1533 validation images: 12.785388127853881 %
Epoch: 1 	Training Loss: 4.70 	Validation Loss: 4.54
Epoch: 2 	Batch: 1 	Training Loss: 4.57
Epoch: 2 	Batch: 11 	Training Loss: 4.54
Epoch: 2 	Batch: 21 	Training Loss: 4.53
Epoch: 2 	Batch: 31 	Training Loss: 4.51
Epoch: 2 	Batch: 41 	Training Loss: 4.50
Epoch: 2 	Batch: 51 	Training Loss: 4.48
Epoch: 2 	Batch: 61 	Training Loss: 4.47
Accuracy of the network on the 1533 validation images: 25.244618395303327 %
Epoch: 2 	Training Loss: 4.46 	Validation Loss: 4.27
Epoch: 3 	Batch: 1 	Training Loss: 4.30
Epoch: 3 	Batch: 11 	Training Loss: 4.32
Epoch: 3 	Batch: 21 	Training Loss: 4.30
Epoch: 3 	Batch: 31 	Training Loss: 4.29
Epoch: 

# Lets do 20 more epochs

In [13]:
output_model =  train(n_epochs, dataloaders, efficientnet, optimizer, loss_function, use_cuda)

Epoch: 1 	Batch: 1 	Training Loss: 2.36
Epoch: 1 	Batch: 11 	Training Loss: 2.39
Epoch: 1 	Batch: 21 	Training Loss: 2.37
Epoch: 1 	Batch: 31 	Training Loss: 2.37
Epoch: 1 	Batch: 41 	Training Loss: 2.35
Epoch: 1 	Batch: 51 	Training Loss: 2.34
Epoch: 1 	Batch: 61 	Training Loss: 2.34
Accuracy of the network on the 1533 validation images: 59.42596216568819 %
Epoch: 1 	Training Loss: 2.34 	Validation Loss: 1.80
Epoch: 2 	Batch: 1 	Training Loss: 2.29
Epoch: 2 	Batch: 11 	Training Loss: 2.30
Epoch: 2 	Batch: 21 	Training Loss: 2.29
Epoch: 2 	Batch: 31 	Training Loss: 2.31
Epoch: 2 	Batch: 41 	Training Loss: 2.31
Epoch: 2 	Batch: 51 	Training Loss: 2.31
Epoch: 2 	Batch: 61 	Training Loss: 2.31
Accuracy of the network on the 1533 validation images: 59.68688845401174 %
Epoch: 2 	Training Loss: 2.31 	Validation Loss: 1.75
Epoch: 3 	Batch: 1 	Training Loss: 2.24
Epoch: 3 	Batch: 11 	Training Loss: 2.32
Epoch: 3 	Batch: 21 	Training Loss: 2.31
Epoch: 3 	Batch: 31 	Training Loss: 2.29
Epoch: 3 

In [14]:
def redirect_error():
    sys.stderr = open('/dev/null', 'w')

In [15]:
redirect_error()
correct = 0
total = 0
for images, labels in dataloaders['test']:
    if use_cuda:
        images = images.cuda()
        labels = labels.cuda()

    outputs = efficientnet(images)

    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the {total} testing images: {100 * correct / total: .2f} %')

Accuracy of the network on the 2172 testing images:  72.88 %


# We are getting pretty good accuracy with this model. Let's try 10 more epochs now!

In [16]:
n_epochs = 10

output_model =  train(n_epochs, dataloaders, efficientnet, optimizer, loss_function, use_cuda)

Epoch: 1 	Batch: 1 	Training Loss: 1.81
Epoch: 1 	Batch: 11 	Training Loss: 1.85
Epoch: 1 	Batch: 21 	Training Loss: 1.84
Epoch: 1 	Batch: 31 	Training Loss: 1.85
Epoch: 1 	Batch: 41 	Training Loss: 1.86
Epoch: 1 	Batch: 51 	Training Loss: 1.87
Epoch: 1 	Batch: 61 	Training Loss: 1.87
Accuracy of the network on the 1533 validation images: 63.27462491846053 %
Epoch: 1 	Training Loss: 1.86 	Validation Loss: 1.30
Epoch: 2 	Batch: 1 	Training Loss: 1.79
Epoch: 2 	Batch: 11 	Training Loss: 1.86
Epoch: 2 	Batch: 21 	Training Loss: 1.85
Epoch: 2 	Batch: 31 	Training Loss: 1.84
Epoch: 2 	Batch: 41 	Training Loss: 1.86
Epoch: 2 	Batch: 51 	Training Loss: 1.84
Epoch: 2 	Batch: 61 	Training Loss: 1.83
Accuracy of the network on the 1533 validation images: 63.926940639269404 %
Epoch: 2 	Training Loss: 1.84 	Validation Loss: 1.28
Epoch: 3 	Batch: 1 	Training Loss: 1.44
Epoch: 3 	Batch: 11 	Training Loss: 1.88
Epoch: 3 	Batch: 21 	Training Loss: 1.86
Epoch: 3 	Batch: 31 	Training Loss: 1.85
Epoch: 3

# Let's again see what's the accuracy

In [17]:
redirect_error()
correct = 0
total = 0
for images, labels in dataloaders['test']:
    if use_cuda:
        images = images.cuda()
        labels = labels.cuda()

    outputs = efficientnet(images)

    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the {total} testing images: {100 * correct / total: .2f} %')

Accuracy of the network on the 2172 testing images:  73.90 %


# Ok! Let's try 20 more epochs

In [18]:
output_model =  train(20, dataloaders, efficientnet, optimizer, loss_function, use_cuda)

Epoch: 1 	Batch: 1 	Training Loss: 1.74
Epoch: 1 	Batch: 11 	Training Loss: 1.77
Epoch: 1 	Batch: 21 	Training Loss: 1.78
Epoch: 1 	Batch: 31 	Training Loss: 1.78
Epoch: 1 	Batch: 41 	Training Loss: 1.78
Epoch: 1 	Batch: 51 	Training Loss: 1.77
Epoch: 1 	Batch: 61 	Training Loss: 1.77
Accuracy of the network on the 1533 validation images: 64.18786692759295 %
Epoch: 1 	Training Loss: 1.78 	Validation Loss: 1.19
Epoch: 2 	Batch: 1 	Training Loss: 1.64
Epoch: 2 	Batch: 11 	Training Loss: 1.76
Epoch: 2 	Batch: 21 	Training Loss: 1.78
Epoch: 2 	Batch: 31 	Training Loss: 1.78
Epoch: 2 	Batch: 41 	Training Loss: 1.78
Epoch: 2 	Batch: 51 	Training Loss: 1.77
Epoch: 2 	Batch: 61 	Training Loss: 1.77
Accuracy of the network on the 1533 validation images: 64.44879321591651 %
Epoch: 2 	Training Loss: 1.77 	Validation Loss: 1.18
Epoch: 3 	Batch: 1 	Training Loss: 1.80
Epoch: 3 	Batch: 11 	Training Loss: 1.74
Epoch: 3 	Batch: 21 	Training Loss: 1.74
Epoch: 3 	Batch: 31 	Training Loss: 1.74
Epoch: 3 

# 4b. Evaluate both models on a withheld test set

In [19]:
redirect_error()

correct = 0
total = 0

final_predictions = []
final_labels = []

f = []
for images, labels in dataloaders['test']:
    if use_cuda:
        images = images.cuda()
        labels = labels.cuda()

    outputs = efficientnet(images)

    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    
    final_predictions.extend(predicted.tolist())
    final_labels.extend(labels.tolist())
    
    correct += (predicted == labels).sum().item()


print(f'Accuracy of the network on the {total} testing images: {100 * correct / total: .2f} %')

Accuracy of the network on the 2172 testing images:  74.54 %


## These are the metrics that we are getting for 70 Epochs. If we run for more epochs then we might get better results. We can experiment different values of hyperparameters as well.

# Metrics 

In [20]:
acc_score = accuracy_score(final_labels, final_predictions)
precision = precision_score(final_labels, final_predictions, average='macro')
recall = recall_score(final_labels, final_predictions, average='macro')
f1 = f1_score(final_labels, final_predictions, average='macro')

In [21]:
cm = confusion_matrix(final_labels, final_predictions)

In [22]:
cm

array([[12,  0,  0, ...,  0,  0,  0],
       [ 0, 25,  0, ...,  0,  0,  0],
       [ 0,  0, 12, ...,  0,  0,  0],
       ...,
       [ 0,  0,  0, ..., 17,  0,  0],
       [ 0,  0,  0, ...,  0,  9,  0],
       [ 0,  0,  0, ...,  0,  0, 11]])

## Visualizing confusion matrix is difficult because we have 120 classes

# 5B. Display results on the test set for both models

In [23]:
pprint.pprint(classification_report(final_labels, final_predictions))

('              precision    recall  f1-score   support\n'
 '\n'
 '           0       0.80      0.67      0.73        18\n'
 '           1       0.78      0.86      0.82        29\n'
 '           2       0.71      0.92      0.80        13\n'
 '           3       0.71      0.68      0.70        22\n'
 '           4       0.62      0.29      0.40        17\n'
 '           5       0.71      0.60      0.65        20\n'
 '           6       0.62      0.71      0.67        21\n'
 '           7       0.68      0.90      0.78        21\n'
 '           8       0.62      0.72      0.67        18\n'
 '           9       0.74      0.74      0.74        19\n'
 '          10       0.86      0.86      0.86        22\n'
 '          11       0.81      0.86      0.83        29\n'
 '          12       0.68      0.68      0.68        19\n'
 '          13       0.90      0.95      0.92        19\n'
 '          14       0.67      0.82      0.74        17\n'
 '          15       0.75      0.68      0.71     

In [24]:
print(f'Accuracy: {acc_score:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1-Score: {f1:.2f}')

Accuracy: 0.75, Precision: 0.75, Recall: 0.74, F1-Score: 0.74


# Let's save the model and use it in the android app since this is the best performing model so far.

In [25]:
torch.save(efficientnet.state_dict(), 'efficientnet_model.pth')

In [26]:
# let's also save the full model incase we require it later.
torch.save(efficientnet, 'efficientnet_model_full.pth')