# Note, here we are not showing any baseline model metrics since the accuracy is same for all the baseline models which is 1.09%. The baseline model is included in efficientnet ipynb file.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
from PIL import Image
import torchvision.transforms as transforms
import numpy as np
import json
import requests
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

import random
import os
import glob
import numpy as np
import pandas as pd
import sys
import matplotlib.pyplot as plt
import tqdm.notebook as tq
%matplotlib inline
from mpl_toolkits.axes_grid1 import ImageGrid
import warnings
warnings.filterwarnings('ignore')

import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.nn as nn
from torchvision import datasets, transforms, models
from torchvision.utils import make_grid

from keras.utils import load_img, img_to_array
from keras.applications.vgg16 import preprocess_input

from PIL import Image
from IPython.display import display
import cv2
from PIL import ImageFile

from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, accuracy_score, classification_report, precision_recall_fscore_support, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.utils.multiclass import unique_labels
import sys

ImageFile.LOAD_TRUNCATED_IMAGES = True

from torchvision.transforms.functional import InterpolationMode
from torchvision import datasets, transforms, models
import pprint

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [None]:
def path_given_id(id, test=False):
    """
    Returns the full path to the image given the id of the image.
    Parameters:
        - id: The id of the image.
        - test: If True returns the relative path from the test folder. Otherwise, returns the relative path to the image from the training folder.
    Returns:
        - The full relative path to the image with the give in id.
    """
    return IMAGES_PATH + ('train/' if not test else 'test/') + str(id) + '.jpg'

def get_img_array(id, test=False):
    """
    Loads the image from the given id, convert the image to a numpy array and return the numpy array.
    Parameters:
        - id: The id of the image.
        - test: If True, loads the image from the test folder. If False,loads the image from the train folder.
    Returns:
        - The image with the give id as a numpy array.
    """
    img = load_img(path_given_id(id, test), target_size=(224, 224))
    return img_to_array(img)

# preprocess_input(np.expand_dims(get_image_array(id, test), axis=0)) will convert the image into 1,224,224,3 to give to predict.
def process_image(id, test=False):
    return preprocess_input(np.expand_dims(get_img_array(id, test), axis=0))

In [None]:
IMAGES_PATH = '/content/drive/MyDrive/dog_breed_identification_files/'

labels = pd.read_csv(IMAGES_PATH +'labels.csv')
labelnames = pd.read_csv(IMAGES_PATH  + 'sample_submission.csv').keys()[1:]

In [None]:
codes = range(len(labelnames))
breed_to_code = dict(zip(labelnames, codes))
code_to_breed = dict(zip(codes, labelnames))

labels['target'] =  [breed_to_code[x] for x in labels.breed]
labels['rank'] = labels.groupby('breed').rank()['id']
labels_pivot = labels.pivot('id', 'breed', 'target').reset_index().fillna(0)

training_data = labels_pivot.sample(frac=0.85)
validation_data = labels_pivot[~labels_pivot['id'].isin(training_data['id'])]
testing_data = training_data.sample(frac=0.25)
training_data = training_data[~training_data['id'].isin(testing_data['id'])]

In [None]:
img_transform = {
    'valid':transforms.Compose([
        transforms.Resize(size = 224, interpolation=InterpolationMode.BILINEAR),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'train':transforms.Compose([
        transforms.RandomResizedCrop(size = 224),
        transforms.RandomRotation(degrees = 30),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),  
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  
    ]),
    'test':transforms.Compose([
        transforms.Resize(size = 224, interpolation=InterpolationMode.BILINEAR),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}

In [None]:
class DogDataset(torch.utils.data.Dataset):
    """
    Create a dataset for pytorch batch loading. This is to load few images into memory at a time instead of all the images at once.
    Extends from torch.utils.data.Dataset
    """
    def __init__(self, images_directory, labels, transform):
        """
        Constructor initialization.
        Params:
            - images_directory: The directory where the images are stored.
            - labels: The image labels
            - transform: The transformations to perform on the data.
        """
        self.images_directory = images_directory
        self.labels = labels
        self.transform = transform


    def __len__(self):
        """
        Returns the total number of samples.
        """
        return len(self.labels)

    
    def __getitem__(self, index):
        if self.labels is not None:
            image_name = f'{self.labels["id"].iloc[index]}.jpg'
            full_image_name = self.images_directory + image_name
            
            final_image = Image.open(full_image_name)
            label = self.labels.iloc[index, 1:].astype('float').to_numpy()
            label = np.argmax(label)
            
            if self.transform:
                final_image = self.transform(final_image)
            
            return [final_image, label]
            

In [None]:
num_workers = 4
batch_size = 100
use_cuda = torch.cuda.is_available()

train_img = DogDataset(IMAGES_PATH + 'train/', training_data, transform = img_transform['train'])
valid_img = DogDataset(IMAGES_PATH + 'train/', validation_data, transform = img_transform['valid'])
test_img = DogDataset(IMAGES_PATH + 'train/', testing_data, transform = img_transform['test'])


dataloaders={
    'train':torch.utils.data.DataLoader(train_img, batch_size, num_workers = num_workers, shuffle=True),
    'valid':torch.utils.data.DataLoader(valid_img, batch_size, num_workers = num_workers, shuffle=False),
    'test':torch.utils.data.DataLoader(test_img, batch_size, num_workers = num_workers, shuffle=True)
}

In [None]:
resnet50 = models.resnet50(pretrained=True)

for param in resnet50.parameters():
    param.requires_grad = False
    
num_features = resnet50.fc.in_features

resnet50.fc = torch.nn.Linear(num_features, 120, bias=True)

# check if gpu is available
if use_cuda:
    resnet50 = resnet50.cuda()

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 56.3MB/s]


In [None]:
loss_function = nn.CrossEntropyLoss()
grad_weights = filter(lambda w: w.requires_grad, resnet50.parameters())
optimizer = torch.optim.Adam(grad_weights, lr=0.01, weight_decay=0.01)

In [None]:
def train(n_epochs, img_transforms, model, optimizer, criterion, use_cuda):
    """returns trained model"""
    for epoch in range(1, n_epochs+1):
        loss_during_train = 0.0
        loss_during_validation = 0.0
        
        model.train()
        
        for index_batch, (image, label) in enumerate(img_transforms['train']):
            if use_cuda:
                image, label = image.cuda(), label.cuda()
            
            optimizer.zero_grad()
            output = model(image)
            
            loss = criterion(output, label)
            loss.backward()
            optimizer.step()
            
            loss_during_train = loss_during_train + ((1 / (index_batch + 1)) * (loss.data - loss_during_train))
            
            if index_batch % 10 == 0:
                print(f'Epoch: {epoch} \tBatch: {index_batch + 1} \tTraining Loss: {loss_during_train:.2f}')
        
        
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in img_transforms['valid']:
                if use_cuda:
                    images = images.cuda()
                    labels = labels.cuda()

                outputs = model(images)

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
    
        print(f'Accuracy of the network on the {total} validation images: {100 * correct / total} %') 
        
        
        model.eval()
        for index_batch, (image, label) in enumerate(img_transforms['valid']):
            if use_cuda:
                image, label = image.cuda(), label.cuda()

            output = model(image)
            
            loss = criterion(output, label)
            loss_during_validation = loss_during_validation + ((1 / (index_batch + 1)) * (loss.data - loss_during_validation))
            
        print(f'Epoch: {epoch} \tTraining Loss: {loss_during_train:.2f} \tValidation Loss: {loss_during_validation:.2f}')
        
    return model

In [None]:
n_epochs = 20

output_model =  train(n_epochs, dataloaders, resnet50, optimizer, loss_function, use_cuda)

Epoch: 1 	Batch: 1 	Training Loss: 4.83
Epoch: 1 	Batch: 11 	Training Loss: 13.02
Epoch: 1 	Batch: 21 	Training Loss: 13.17
Epoch: 1 	Batch: 31 	Training Loss: 11.89
Epoch: 1 	Batch: 41 	Training Loss: 10.41
Epoch: 1 	Batch: 51 	Training Loss: 9.16
Epoch: 1 	Batch: 61 	Training Loss: 8.18
Accuracy of the network on the 1533 validation images: 54.403131115459885 %
Epoch: 1 	Training Loss: 7.78 	Validation Loss: 1.81
Epoch: 2 	Batch: 1 	Training Loss: 2.46
Epoch: 2 	Batch: 11 	Training Loss: 2.79
Epoch: 2 	Batch: 21 	Training Loss: 2.79
Epoch: 2 	Batch: 31 	Training Loss: 2.80
Epoch: 2 	Batch: 41 	Training Loss: 2.73
Epoch: 2 	Batch: 51 	Training Loss: 2.65
Epoch: 2 	Batch: 61 	Training Loss: 2.60
Accuracy of the network on the 1533 validation images: 57.142857142857146 %
Epoch: 2 	Training Loss: 2.59 	Validation Loss: 1.68
Epoch: 3 	Batch: 1 	Training Loss: 2.80
Epoch: 3 	Batch: 11 	Training Loss: 2.85
Epoch: 3 	Batch: 21 	Training Loss: 2.84
Epoch: 3 	Batch: 31 	Training Loss: 2.72
Epo

In [None]:
def redirect_error():
    sys.stderr = open('/dev/null', 'w')

In [None]:
redirect_error()
correct = 0
total = 0
for images, labels in dataloaders['test']:
    if use_cuda:
        images = images.cuda()
        labels = labels.cuda()

    outputs = resnet50(images)

    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the {total} testing images: {100 * correct / total: .2f} %')

Accuracy of the network on the 2172 testing images:  52.90 %


# The accuracy is more that random guess. But let's run more epochs and see the results.

In [None]:
n_epochs = 20

output_model =  train(n_epochs, dataloaders, resnet50, optimizer, loss_function, use_cuda)

Epoch: 1 	Batch: 1 	Training Loss: 3.36
Epoch: 1 	Batch: 11 	Training Loss: 3.62
Epoch: 1 	Batch: 21 	Training Loss: 3.39
Epoch: 1 	Batch: 31 	Training Loss: 3.29
Epoch: 1 	Batch: 41 	Training Loss: 3.23
Epoch: 1 	Batch: 51 	Training Loss: 3.30
Epoch: 1 	Batch: 61 	Training Loss: 3.32
Accuracy of the network on the 1533 validation images: 52.707110241356816 %
Epoch: 1 	Training Loss: 3.31 	Validation Loss: 2.19
Epoch: 2 	Batch: 1 	Training Loss: 3.73
Epoch: 2 	Batch: 11 	Training Loss: 3.74
Epoch: 2 	Batch: 21 	Training Loss: 3.53
Epoch: 2 	Batch: 31 	Training Loss: 3.33
Epoch: 2 	Batch: 41 	Training Loss: 3.21
Epoch: 2 	Batch: 51 	Training Loss: 3.13
Epoch: 2 	Batch: 61 	Training Loss: 3.09
Accuracy of the network on the 1533 validation images: 58.96934116112198 %
Epoch: 2 	Training Loss: 3.07 	Validation Loss: 1.61
Epoch: 3 	Batch: 1 	Training Loss: 2.28
Epoch: 3 	Batch: 11 	Training Loss: 3.06
Epoch: 3 	Batch: 21 	Training Loss: 2.93
Epoch: 3 	Batch: 31 	Training Loss: 2.97
Epoch: 3

## Lets see the accuracy now. We see little changes in training and validation loss.

In [None]:
redirect_error()
correct = 0
total = 0
for images, labels in dataloaders['test']:
    if use_cuda:
        images = images.cuda()
        labels = labels.cuda()

    outputs = resnet50(images)

    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the {total} testing images: {100 * correct / total: .2f} %')

Accuracy of the network on the 2172 testing images:  54.83 %


# Great! The accuracy improved albeit very small. But, let's run another 20 epochs.

In [None]:
output_model =  train(n_epochs, dataloaders, resnet50, optimizer, loss_function, use_cuda)

Epoch: 1 	Batch: 1 	Training Loss: 4.16
Epoch: 1 	Batch: 11 	Training Loss: 3.39
Epoch: 1 	Batch: 21 	Training Loss: 3.33
Epoch: 1 	Batch: 31 	Training Loss: 3.29
Epoch: 1 	Batch: 41 	Training Loss: 3.23
Epoch: 1 	Batch: 51 	Training Loss: 3.10
Epoch: 1 	Batch: 61 	Training Loss: 3.06
Accuracy of the network on the 1533 validation images: 58.31702544031311 %
Epoch: 1 	Training Loss: 3.07 	Validation Loss: 1.72
Epoch: 2 	Batch: 1 	Training Loss: 2.78
Epoch: 2 	Batch: 11 	Training Loss: 3.03
Epoch: 2 	Batch: 21 	Training Loss: 3.02
Epoch: 2 	Batch: 31 	Training Loss: 3.00
Epoch: 2 	Batch: 41 	Training Loss: 2.91
Epoch: 2 	Batch: 51 	Training Loss: 2.91
Epoch: 2 	Batch: 61 	Training Loss: 2.89
Accuracy of the network on the 1533 validation images: 52.38095238095238 %
Epoch: 2 	Training Loss: 2.90 	Validation Loss: 1.98
Epoch: 3 	Batch: 1 	Training Loss: 3.64
Epoch: 3 	Batch: 11 	Training Loss: 3.72
Epoch: 3 	Batch: 21 	Training Loss: 3.75
Epoch: 3 	Batch: 31 	Training Loss: 3.55
Epoch: 3 

## It seems to oscillating now. So, let's stop here. We can also try a different optimizer and see if we could decrease the loss.

## Final Accuracy

In [None]:
redirect_error()

correct = 0
total = 0

final_predictions = []
final_labels = []

f = []
for images, labels in dataloaders['test']:
    if use_cuda:
        images = images.cuda()
        labels = labels.cuda()

    outputs = resnet50(images)

    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    
    final_predictions.extend(predicted.tolist())
    final_labels.extend(labels.tolist())
    
    correct += (predicted == labels).sum().item()


print(f'Accuracy of the network on the {total} testing images: {100 * correct / total: .2f} %')

Accuracy of the network on the 2172 testing images:  58.84 %


In [None]:
acc_score = accuracy_score(final_labels, final_predictions)
precision = precision_score(final_labels, final_predictions, average='macro')
recall = recall_score(final_labels, final_predictions, average='macro')
f1 = f1_score(final_labels, final_predictions, average='macro')

In [None]:
cm = confusion_matrix(final_labels, final_predictions)
cm

array([[11,  0,  0, ...,  0,  0,  0],
       [ 0, 24,  0, ...,  0,  0,  0],
       [ 0,  0, 20, ...,  0,  0,  0],
       ...,
       [ 0,  0,  0, ..., 14,  0,  0],
       [ 0,  0,  0, ...,  0, 10,  0],
       [ 0,  0,  0, ...,  0,  0, 11]])

In [None]:
pprint.pprint(classification_report(final_labels, final_predictions))

('              precision    recall  f1-score   support\n'
 '\n'
 '           0       1.00      0.92      0.96        12\n'
 '           1       0.77      0.96      0.86        25\n'
 '           2       1.00      0.91      0.95        22\n'
 '           3       0.93      0.78      0.85        18\n'
 '           4       0.19      0.95      0.32        20\n'
 '           5       0.00      0.00      0.00        15\n'
 '           6       1.00      0.48      0.65        23\n'
 '           7       1.00      0.59      0.74        17\n'
 '           8       1.00      0.37      0.54        19\n'
 '           9       0.67      0.62      0.65        16\n'
 '          10       1.00      0.65      0.79        17\n'
 '          11       0.91      0.72      0.81        29\n'
 '          12       0.67      0.67      0.67        15\n'
 '          13       1.00      0.09      0.17        22\n'
 '          14       0.00      0.00      0.00        12\n'
 '          15       1.00      0.62      0.77     

In [None]:
print(f'Accuracy: {acc_score:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1-Score: {f1:.2f}')

Accuracy: 0.59, Precision: 0.70, Recall: 0.58, F1-Score: 0.55


In [None]:
torch.save(resnet50.state_dict(), 'resnet50_model.pth')

In [None]:
torch.save(resnet50, 'efficientnet_full_model.pth')