# CV-703, Assignment 1

## Libraries

In [1]:
import numpy as np
import math

from tqdm import tqdm

import PIL
from PIL import Image

import torch
import torch.optim as optim
from torchvision import models
from torchvision import transforms
import torchvision.transforms as T

import pandas as pd

from models_to_finetune import deit_small_patch16_224, deit_base_patch16_224, resnet50

from datasets import CUBDataset, DOGDataset, FOODDataset

import sys

import os

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


## Setup a dataset
Uncomment that one dataset you need

### CUB-200-2011 (Birds): Dataset

In [3]:
classes_number = 200


In [4]:
data_root = "/apps/local/shared/CV703/datasets/CUB/CUB_200_2011/"

mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)


# Write data transform here as per the requirement
data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

train_dataset = CUBDataset(image_root_path=f"{data_root}", transform=data_transform, split="train")
test_dataset = CUBDataset(image_root_path=f"{data_root}", transform=data_transform, split="test")
print('Number of train samples:', len(train_dataset))
print('Number of test samples:', len(test_dataset))


# Load in into the torch dataloader to get variable batch size, shuffle 
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, drop_last=True, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, drop_last=False, shuffle=True)



Number of train samples: 5994
Number of test samples: 5794


In [5]:
len(train_loader), len(test_loader)

(187, 182)

In [6]:
for i, (inputs, labels) in enumerate(train_loader):
    print(inputs.shape)
    print(labels)
    print('='*50)
    break

torch.Size([32, 3, 224, 224])
tensor([149, 190, 181, 127,  38,  61, 145, 174,  13, 108, 144,  86,  88, 156,
        182,  54, 122, 153,  49, 172,  10,  33, 197, 120,  22, 139,  77,  15,
         67, 102,  25, 175])


### Stanford Dogs: Dataset

In [7]:
classes_number = 120

In [8]:
data_root = "/apps/local/shared/CV703/datasets/dog/"


mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])


train_dataset = DOGDataset(image_root_path=f"{data_root}", transform=data_transform, split="train")
test_dataset = DOGDataset(image_root_path=f"{data_root}", transform=data_transform, split="test")
print('Number of train samples:', len(train_dataset))
print('Number of test samples:', len(test_dataset))

# Load in into the torch dataloader to get variable batch size, shuffle 
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, drop_last=True, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, drop_last=False, shuffle=True)

Number of train samples: 12000
Number of test samples: 8580


In [9]:
len(train_loader), len(test_loader)

(375, 269)

In [10]:
for i, (inputs, labels) in enumerate(test_loader):
    print(inputs.shape)
    print(labels)
    print('='*50)
    break

torch.Size([32, 3, 224, 224])
tensor([106,  96,  79,  57,  92,  59, 104,  19, 103,  40,   0, 118, 102,  86,
         98,  57,  27,  57,  85,  61,  17,  65, 116,  38,  95, 104,  51,   2,
         33,  24,  52,   4])


### CUB-200-2011 + Stanford Dog: concatenated Dataset

In [11]:
classes_number = 320

In [12]:
# CUB:
data_root_bird = "/apps/local/shared/CV703/datasets/CUB/CUB_200_2011/"

mean_bird = (0.485, 0.456, 0.406)
std_bird = (0.229, 0.224, 0.225)


# write data transform here as per the requirement
data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean_bird, std=std_bird)
    ])

train_dataset_cub = CUBDataset(image_root_path=f"{data_root_bird}", transform=data_transform, split="train")
test_dataset_cub = CUBDataset(image_root_path=f"{data_root_bird}", transform=data_transform, split="test")
print('Number of train samples:', len(train_dataset_cub))
print('Number of test samples:', len(test_dataset_cub))

Number of train samples: 5994
Number of test samples: 5794


In [13]:
# Dog:
mean_dog = (0.485, 0.456, 0.406)
std_dog = (0.229, 0.224, 0.225)

data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean_dog, std=std_dog)
    ])


data_root_dog = "/apps/local/shared/CV703/datasets/dog/"

train_dataset_dog = DOGDataset(image_root_path=f"{data_root_dog}", transform=data_transform, split="train")
test_dataset_dog = DOGDataset(image_root_path=f"{data_root_dog}", transform=data_transform, split="test")
print('Number of train samples:', len(train_dataset_dog))
print('Number of test samples:', len(test_dataset_dog))

Number of train samples: 12000
Number of test samples: 8580


In [14]:
# concatenated dataloader for CUB and DOG

train_loader = torch.utils.data.DataLoader(
             torch.utils.data.ConcatDataset([train_dataset_cub, train_dataset_dog]),
             batch_size=32, shuffle=True,
             num_workers=1, pin_memory=True)

test_loader = torch.utils.data.DataLoader(
             torch.utils.data.ConcatDataset([test_dataset_cub, test_dataset_dog]),
             batch_size=32, shuffle=True,
             num_workers=1, pin_memory=True)

In [15]:
len(train_dataset_cub), len(train_dataset_dog), len(train_loader)

(5994, 12000, 563)

In [16]:
len(test_dataset_cub), len(test_dataset_dog), len(test_loader)

(5794, 8580, 450)

In [17]:
for i, (inputs, targets) in enumerate(train_loader):

    print('image :: ', inputs.shape)
    print(targets)
    break

image ::  torch.Size([32, 3, 224, 224])
tensor([104,  74,   8,  87,  60, 115,  82, 123,  29, 171,  20, 149, 107,  55,
         97,  33,  41,  80,  26,  97,  73,   4,  94,   5, 106,  70,  95,  12,
         90,   4, 109, 102])


### FoodX-251 Dataset

In [18]:
classes_number = 251

In [19]:
ds_type = "local" # comment out if using ds from the shared folder
#ds_type = "shared" # comment out if using ds from the local folder


if (ds_type == "local"):
    data_dir = "/home/dmitry.demidov/Documents/Datasets/FoodX-251"

    split = 'train'
    train_df = pd.read_csv(f'{data_dir}/annot/{split}_info.csv', names= ['image_name','label'])
    train_df['path'] = train_df['image_name'].map(lambda x: os.path.join(f'{data_dir}/{split}/{split}_set/', x))

    split = 'val'
    test_df = pd.read_csv(f'{data_dir}/annot/{split}_info.csv', names= ['image_name','label'])
    test_df['path'] = test_df['image_name'].map(lambda x: os.path.join(f'{data_dir}/{split}/{split}_set/', x))

elif (ds_type == "shared"):
    data_dir = "/apps/local/shared/CV703/datasets/FoodX/food_dataset"

    split = 'train'
    train_df = pd.read_csv(f'{data_dir}/annot/{split}_info.csv', names= ['image_name','label'])
    train_df['path'] = train_df['image_name'].map(lambda x: os.path.join(f'{data_dir}/{split}_set/', x))

    split = 'val'
    test_df = pd.read_csv(f'{data_dir}/annot/{split}_info.csv', names= ['image_name','label'])
    test_df['path'] = test_df['image_name'].map(lambda x: os.path.join(f'{data_dir}/{split}_set/', x))

else:
    print("ERROR: Choose dataset type (local/shared)!")

In [20]:
train_dataset = FOODDataset(train_df)
test_dataset = FOODDataset(test_df)
print('Number of train samples:', len(train_dataset))
print('Number of test samples:', len(test_dataset))

# load in into the torch dataloader to get variable batch size, shuffle 
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, drop_last=True, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1024, drop_last=False, shuffle=True) # Not enough memory for more than 32

Number of train samples: 118475
Number of test samples: 11994


In [21]:
# Print some statistics about the dataset:

print(len(train_dataset), len(test_dataset))

print(len(train_loader), len(test_loader))

for i, (inputs, labels) in enumerate(train_loader):
    print(inputs.shape)
    print(labels)
    print('='*50)

    break

118475 11994
115 12
torch.Size([1024, 3, 224, 224])
tensor([ 40, 130, 161,  ...,   9,  84,   3])


In [None]:
# # we will use only the last class token (produced by the last block) for transfer learning
# model = deit_base_patch16_224(pretrained=True, use_top_n_heads=8,use_patch_outputs=False).cuda()

# # freeze backbone and add linear classifier on top that
# # for param in model.parameters():
# #     param.requires_grad = True # False
# model.head = torch.nn.Linear(in_features=model.head.in_features, out_features=classes_number)

# model.head.apply(model._init_weights)
# # for param in model.head.parameters():
# #     param.requires_grad = True

In [None]:
# criterion = torch.nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=0.003, betas=(0.5, 0.999))

# #model.train()

# model = model.to(device)

# print(model)

## Training + Evaluation (in parallel)

### Define functions

### Train, test funcs:

In [22]:
def train(device, model, criterion, optimizer, train_dataset, train_loader, epoch, resnet=False):
    print('Training....')
    
    model.train()

    epoch_loss = 0.0
    acc=0.0

    with tqdm(train_loader) as p_bar:
        for samples, targets in p_bar:
            samples = samples.to(device)
            targets = targets.to(device)
            
            if resnet:
                outputs = model(samples)
            else:
                outputs = model(samples, fine_tune=True)

            loss = criterion(outputs, targets)
            loss_value = loss.item()
            if not math.isfinite(loss_value):
                print("Loss is {}, stopping training".format(loss_value))
                sys.exit(1)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += outputs.shape[0] * loss.item()

            acc+=torch.sum(outputs.argmax(dim=-1) == targets).item()

    loss_print = epoch_loss / len(train_dataset)
    acc_print = acc/len(train_dataset)
    epoch_print = epoch+1

    print("Epoch:", epoch_print, "|", "Loss:", loss_print)
    print("Train Accuracy:{0:.3%}".format(acc_print))

    f = open("./models/statistics.txt", "a")
    text_train = "Epoch: " + str(epoch_print) + ", " + "Train Loss: " + str(loss_print) + ", " + "Train Accuracy: " + str(acc_print) + "\n"
    f.write(text_train)
    f.close()
    
    torch.save({'state_dict': model.state_dict()}, './models/model_{0}ep_{1:.2}loss.pt'.format(epoch_print, loss_print))

    sent_results(text=text_train)

    #del samples
    #del targets



def test(device, model, criterion, test_dataset, test_loader, model_path = './folder/path.pt', test_only = False, resnet=False):
    print('Testing....')

    if test_only:
        print('Test only!')
        state_dict = torch.load(model_path)['state_dict']
        model.load_state_dict(state_dict)
        model = model.to(device)

    model.eval()

    epoch_loss = 0.0
    acc=0.0

    with tqdm(test_loader) as p_bar:
        for samples, targets in p_bar:
            samples = samples.to(device)
            targets = targets.to(device)
            
            if resnet:
                outputs = model(samples)
            else:
                outputs = model(samples, fine_tune=False)

            loss = criterion(outputs, targets)
            epoch_loss += outputs.shape[0] * loss.item()

            acc+=torch.sum(outputs.argmax(dim=-1) == targets).item()

    acc_print = acc/len(test_dataset)
    loss_print = epoch_loss / len(test_dataset)

    print("Test Loss:", loss_print)
    print('Test Accuracy:{0:.3%}'.format(acc_print))

    f = open("./models/statistics.txt", "a")
    text_test = "Test Loss: "+ str(loss_print) + ", " + "Test Accuracy: " + str(acc_print) + "\n" + "\n"
    f.write(text_test)
    f.close()

    sent_results(text=text_test)

    #del samples
    #del targets

### Send results to an email (optional):

In [23]:
from utils import sent_results

# Test:
sent_results(text='Start')

'Email sent successfully!'

### Prepare a model

#### DeiT

In [24]:
model = deit_base_patch16_224(pretrained=True, use_top_n_heads=12,use_patch_outputs=False) #.cuda()
#model = deit_small_patch16_224(pretrained=True, use_top_n_heads=8,use_patch_outputs=False).cuda()

# # Freeze backbone:
# for param in model.parameters():
#     param.requires_grad = True # original: False

# Add linear classifier on top:
model.head = torch.nn.Linear(in_features=model.head.in_features, out_features=classes_number)
model.head.apply(model._init_weights)
# for param in model.head.parameters():
#     param.requires_grad = True

#print(model)

_IncompatibleKeys(missing_keys=['head.weight', 'head.bias'], unexpected_keys=[])


Linear(in_features=9216, out_features=251, bias=True)

#### Resnet

In [25]:
# def init_weights(m):
#     if isinstance(m, torch.nn.Linear):
#         torch.nn.init.xavier_uniform_(m.weight)
#         m.bias.data.fill_(0.01)

model = models.resnet50(pretrained=True) #.cuda()
model.fc = torch.nn.Linear(in_features=model.fc.in_features, out_features=classes_number, bias=True)
#model.fc.apply(init_weights)

for param in model.fc.parameters():
    param.requires_grad = True # original: False

#print(model)

#### Resnet v2

In [26]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context


import timm

model = timm.create_model('inception_resnet_v2', pretrained=True, num_classes=classes_number) #.cuda()

for param in model.parameters():
    param.requires_grad = True # original: False

#print(model)

### Start Training + validation

#### DeiT

In [None]:
epochs = 30
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999))

model = model.to(device)

for epoch in range(epochs):
    train (device=device, model=model, criterion=criterion, optimizer=optimizer, 
        train_dataset=train_dataset, train_loader=train_loader, epoch=epoch)

    test (device=device, model=model, criterion=criterion, 
        test_dataset=test_dataset, test_loader=test_loader, test_only = False) #, model_path=model_path)

#### Resnet

In [None]:
epochs = 30
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999))

model = model.to(device)

for epoch in range(epochs):
    train (device=device, model=model, criterion=criterion, optimizer=optimizer, 
        train_dataset=train_dataset, train_loader=train_loader, epoch=epoch, resnet=True)

    test (device=device, model=model, criterion=criterion, 
        test_dataset=test_dataset, test_loader=test_loader, test_only = False, resnet=True) #, model_path=model_path)

## Hybrid model (Concatenated version)

### Prepare a model

In [27]:
import timm

import ssl
ssl._create_default_https_context = ssl._create_unverified_context


class MyModel(torch.nn.Module):

    def __init__(self, cnn, deit):

        super(MyModel,self).__init__()
        
        self.cnn = cnn
        for param in self.cnn.parameters():
            param.requires_grad = False # original: False
        
        self.cnn.fc = torch.nn.Linear(in_features=self.cnn.fc.in_features, out_features=1024, bias=True)
        #model.fc.apply(init_weights)
        for param in self.cnn.fc.parameters():
            param.requires_grad = True # original: False

        #print(self.cnn)

        
        self.deit = deit
        # Freeze backbone:
        for param in self.deit.parameters():
            param.requires_grad = False # original: False

        self.deit.head = torch.nn.Linear(in_features=self.deit.head.in_features, out_features=1024)
        self.deit.head.apply(self.deit._init_weights)
        # Unfreeze linear classifier on top:
        for param in self.deit.head.parameters():
            param.requires_grad = True

        #print(self.deit)


        self.fc1 = torch.nn.Linear(in_features=2048, out_features=251)
        #self.fc2 = torch.nn.Linear(4096, 251)

        self.relu = torch.nn.ReLU()

        self.drop1 = torch.nn.Dropout(p=0.1)
        #self.drop5 = torch.nn.Dropout(p=0.5)

        self.bn1 = torch.nn.BatchNorm1d(num_features=1024)
        self.bn2 = torch.nn.BatchNorm1d(num_features=2048)


    def forward(self,x):

        cnn_out = self.cnn(x)
        #print(cnn_out.shape)
        cnn_out = self.bn1(cnn_out)
        cnn_out = self.relu(cnn_out)
        #cnn_out = self.drop1(cnn_out)

        deit_out = self.deit(x, fine_tune=False)
        #print(deit_out.shape)
        deit_out = self.bn1(deit_out)
        deit_out = self.relu(deit_out)
        #deit_out = self.drop1(deit_out)

        concat = torch.cat((cnn_out, deit_out), dim=1)
        #concat = self.bn2(concat)
        out = self.drop1(concat)
        #out = self.relu(out)

        out = self.fc1(out)
        #out = self.drop(out)
        #out = self.relu(out)

        #out = self.fc2(out)

        return out

In [28]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context


deit = deit_base_patch16_224(pretrained=True, use_top_n_heads=12,use_patch_outputs=False)
deit.head = torch.nn.Linear(in_features=deit.head.in_features, out_features=classes_number)
deit_model_path = './models/7_model_0.0001lr_40ep_256bs_12hs_62.748acc_0.9beta(11hrs)/model_24ep_1.2loss.pt'
deit_state_dict = torch.load(deit_model_path)['state_dict']
deit.load_state_dict(deit_state_dict)

#cnn = timm.create_model('inception_resnet_v2', pretrained=True, num_classes=251)
cnn = models.resnet50(pretrained=True) #.cuda()
cnn.fc = torch.nn.Linear(in_features=cnn.fc.in_features, out_features=classes_number, bias=True)
cnn_model_path = './models/8_Resnet_model_0.0001lr_10ep_256bs_12hs_60.81acc_0.9beta/model_3ep_1.3loss.pt'
cnn_state_dict = torch.load(cnn_model_path)['state_dict']
cnn.load_state_dict(cnn_state_dict)


model = MyModel(deit=deit, cnn=cnn)

_IncompatibleKeys(missing_keys=['head.weight', 'head.bias'], unexpected_keys=[])


### Start Trainig + Validation (in parallel)

In [12]:
lr = 0.0003
# criterion = FocalLoss().to(device)
epochs = 30
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.5, 0.999))

model = model.to(device)

for epoch in range(epochs):
    train (device=device, model=model, criterion=criterion, optimizer=optimizer, 
        train_dataset=train_dataset, train_loader=train_loader, epoch=epoch, resnet=True)

    test (device=device, model=model, criterion=criterion, 
        test_dataset=test_dataset, test_loader=test_loader, test_only = False, resnet=True) #, model_path=model_path)

    if ((epoch+1)%3==0):
        lr = lr / epochs

Training....


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
100%|██████████| 115/115 [19:34<00:00, 10.21s/it]


Epoch: 1 | Loss: 1.6328958998593783
Train Accuracy:68.364%
Testing....


100%|██████████| 12/12 [01:53<00:00,  9.43s/it]


Test Loss: 1.826009660556711
Test Accuracy:60.747%
Training....


100%|██████████| 115/115 [18:59<00:00,  9.91s/it]


Epoch: 2 | Loss: 0.852160435886065
Train Accuracy:78.904%
Testing....


100%|██████████| 12/12 [01:47<00:00,  8.97s/it]


Test Loss: 1.6772599867309792
Test Accuracy:60.722%
Training....


100%|██████████| 115/115 [17:50<00:00,  9.31s/it]


Epoch: 3 | Loss: 0.687131504268932
Train Accuracy:82.240%
Testing....


100%|██████████| 12/12 [01:47<00:00,  8.98s/it]


Test Loss: 1.5930457393865058
Test Accuracy:61.889%
Training....


100%|██████████| 115/115 [17:39<00:00,  9.21s/it]


Epoch: 4 | Loss: 0.5700565047222845
Train Accuracy:85.121%
Testing....


100%|██████████| 12/12 [01:49<00:00,  9.15s/it]


Test Loss: 1.5832301511433753
Test Accuracy:61.406%
Training....


100%|██████████| 115/115 [17:59<00:00,  9.39s/it]


Epoch: 5 | Loss: 0.4729790869521853
Train Accuracy:87.551%
Testing....


100%|██████████| 12/12 [01:47<00:00,  8.97s/it]


Test Loss: 1.5794579389593453
Test Accuracy:61.064%
Training....


100%|██████████| 115/115 [17:53<00:00,  9.33s/it]


Epoch: 6 | Loss: 0.38528377239149214
Train Accuracy:89.942%
Testing....


100%|██████████| 12/12 [01:48<00:00,  9.06s/it]


Test Loss: 1.6117299350476453
Test Accuracy:60.205%
Training....


100%|██████████| 115/115 [17:47<00:00,  9.28s/it]


Epoch: 7 | Loss: 0.30871968758964013
Train Accuracy:92.231%
Testing....


100%|██████████| 12/12 [01:47<00:00,  8.94s/it]


Test Loss: 1.6603730330770168
Test Accuracy:58.971%
Training....


100%|██████████| 115/115 [17:48<00:00,  9.29s/it]


Epoch: 8 | Loss: 0.24400462053777497
Train Accuracy:94.106%
Testing....


100%|██████████| 12/12 [01:49<00:00,  9.13s/it]


Test Loss: 1.6738727401649434
Test Accuracy:58.921%
Training....


100%|██████████| 115/115 [17:48<00:00,  9.29s/it]


Epoch: 9 | Loss: 0.18901350869385847
Train Accuracy:95.674%
Testing....


100%|██████████| 12/12 [01:48<00:00,  9.05s/it]


Test Loss: 1.7756556890518522
Test Accuracy:57.254%
Training....


100%|██████████| 115/115 [17:55<00:00,  9.35s/it]


Epoch: 10 | Loss: 0.14236585904838012
Train Accuracy:97.021%
Testing....


100%|██████████| 12/12 [01:47<00:00,  8.98s/it]


Test Loss: 1.7388536969641755
Test Accuracy:58.171%
Training....


100%|██████████| 115/115 [17:46<00:00,  9.27s/it]


Epoch: 11 | Loss: 0.10770389071494926
Train Accuracy:97.865%
Testing....


100%|██████████| 12/12 [01:48<00:00,  9.08s/it]


Test Loss: 1.8449679178576956
Test Accuracy:56.587%
Training....


100%|██████████| 115/115 [17:41<00:00,  9.23s/it]


Epoch: 12 | Loss: 0.08061955339472412
Train Accuracy:98.476%
Testing....


100%|██████████| 12/12 [01:47<00:00,  8.97s/it]


Test Loss: 1.8806450983991778
Test Accuracy:56.236%
Training....


100%|██████████| 115/115 [17:47<00:00,  9.28s/it]


Epoch: 13 | Loss: 0.059886080952580155
Train Accuracy:98.872%
Testing....


100%|██████████| 12/12 [01:48<00:00,  9.02s/it]


Test Loss: 1.9308918062321083
Test Accuracy:56.186%
Training....


100%|██████████| 115/115 [17:49<00:00,  9.30s/it]


Epoch: 14 | Loss: 0.046388139758942175
Train Accuracy:99.055%
Testing....


100%|██████████| 12/12 [01:49<00:00,  9.09s/it]


Test Loss: 1.9773734502639695
Test Accuracy:55.461%
Training....


100%|██████████| 115/115 [17:48<00:00,  9.29s/it]


Epoch: 15 | Loss: 0.0363893093955142
Train Accuracy:99.196%
Testing....


100%|██████████| 12/12 [01:47<00:00,  8.96s/it]


Test Loss: 1.9268262550913136
Test Accuracy:56.978%
Training....


100%|██████████| 115/115 [17:40<00:00,  9.22s/it]


Epoch: 16 | Loss: 0.030117911753158425
Train Accuracy:99.233%
Testing....


100%|██████████| 12/12 [01:48<00:00,  9.06s/it]


Test Loss: 1.9671115471518832
Test Accuracy:56.345%
Training....


100%|██████████| 115/115 [17:46<00:00,  9.27s/it]


Epoch: 17 | Loss: 0.025163265195268195
Train Accuracy:99.263%
Testing....


100%|██████████| 12/12 [01:48<00:00,  9.03s/it]


Test Loss: 2.003206626124952
Test Accuracy:56.161%
Training....


100%|██████████| 115/115 [17:48<00:00,  9.29s/it]


Epoch: 18 | Loss: 0.021921610448852977
Train Accuracy:99.288%
Testing....


100%|██████████| 12/12 [01:46<00:00,  8.83s/it]


Test Loss: 2.0329765194114775
Test Accuracy:56.070%
Training....


100%|██████████| 115/115 [17:44<00:00,  9.25s/it]


Epoch: 19 | Loss: 0.018074160614303458
Train Accuracy:99.330%
Testing....


100%|██████████| 12/12 [01:48<00:00,  9.01s/it]


Test Loss: 2.091519903576094
Test Accuracy:54.894%
Training....


100%|██████████| 115/115 [17:45<00:00,  9.26s/it]


Epoch: 20 | Loss: 0.01631536721629819
Train Accuracy:99.304%
Testing....


100%|██████████| 12/12 [01:48<00:00,  9.03s/it]


Test Loss: 2.0543602220372437
Test Accuracy:55.920%
Training....


100%|██████████| 115/115 [18:21<00:00,  9.58s/it]


Epoch: 21 | Loss: 0.01703284069414656
Train Accuracy:99.261%
Testing....


100%|██████████| 12/12 [01:44<00:00,  8.73s/it]


Test Loss: 2.189057146169075
Test Accuracy:54.202%
Training....


100%|██████████| 115/115 [17:40<00:00,  9.22s/it]


Epoch: 22 | Loss: 0.017596712347877054
Train Accuracy:99.221%
Testing....


100%|██████████| 12/12 [01:49<00:00,  9.09s/it]


Test Loss: 2.133411465952232
Test Accuracy:55.436%
Training....


100%|██████████| 115/115 [17:39<00:00,  9.21s/it]


Epoch: 23 | Loss: 0.023583022304968785
Train Accuracy:99.099%
Testing....


100%|██████████| 12/12 [01:45<00:00,  8.81s/it]


Test Loss: 2.2593166538729594
Test Accuracy:53.885%
Training....


100%|██████████| 115/115 [17:48<00:00,  9.29s/it]


Epoch: 24 | Loss: 0.024664284005198305
Train Accuracy:99.092%
Testing....


100%|██████████| 12/12 [01:47<00:00,  8.92s/it]


Test Loss: 2.208838388723832
Test Accuracy:54.594%
Training....


100%|██████████| 115/115 [17:46<00:00,  9.27s/it]


Epoch: 25 | Loss: 0.01588483785068619
Train Accuracy:99.276%
Testing....


100%|██████████| 12/12 [01:45<00:00,  8.80s/it]


Test Loss: 2.277633578892209
Test Accuracy:54.361%
Training....


100%|██████████| 115/115 [17:38<00:00,  9.21s/it]


Epoch: 26 | Loss: 0.012285871173692016
Train Accuracy:99.309%
Testing....


100%|██████████| 12/12 [01:45<00:00,  8.80s/it]


Test Loss: 2.2342845567449126
Test Accuracy:55.253%
Training....


100%|██████████| 115/115 [17:35<00:00,  9.18s/it]


Epoch: 27 | Loss: 0.011149513697820416
Train Accuracy:99.299%
Testing....


100%|██████████| 12/12 [01:46<00:00,  8.88s/it]


Test Loss: 2.9351679451051744
Test Accuracy:46.473%
Training....


100%|██████████| 115/115 [17:40<00:00,  9.22s/it]


Epoch: 28 | Loss: 0.012207813886782116
Train Accuracy:99.259%
Testing....


100%|██████████| 12/12 [01:43<00:00,  8.63s/it]


Test Loss: 2.4493520761581786
Test Accuracy:53.027%
Training....


100%|██████████| 115/115 [17:33<00:00,  9.16s/it]


Epoch: 29 | Loss: 0.03308883852132591
Train Accuracy:98.793%
Testing....


100%|██████████| 12/12 [01:47<00:00,  8.93s/it]


Test Loss: 2.3130396968506965
Test Accuracy:54.769%
Training....


100%|██████████| 115/115 [17:44<00:00,  9.26s/it]


Epoch: 30 | Loss: 0.039471025243543026
Train Accuracy:98.633%
Testing....


100%|██████████| 12/12 [01:46<00:00,  8.90s/it]


Test Loss: 2.197996882213957
Test Accuracy:57.020%


## Test only (optional):
To check a specific model

In [15]:
# It is assumed that a model is defined already
model_path = './models/model_3ep_1.4loss.pt'
criterion = torch.nn.CrossEntropyLoss()

test (device=device, model=model, criterion=criterion, 
    test_dataset=test_dataset, test_loader=test_loader, test_only = True, model_path=model_path)

Testing....
Test only!


100%|██████████| 375/375 [01:26<00:00,  4.31it/s]


Test Loss: 1.7027152970530142
Test Accuracy:58.463%


## Additional

### Focal loss

In [None]:
class FocalLoss(torch.nn.Module):
    def __init__(self, alpha=1, gamma=2, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduce = reduce

    def forward(self, inputs, targets):
        BCE_loss = torch.nn.CrossEntropyLoss()(inputs, targets)

        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

        if self.reduce:
            return torch.mean(F_loss)
        else:
            return F_loss

### Augmentations

In [None]:
#CenterCrop
data_transform = transforms.Compose([
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

#RandomHorizontal flip
data_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

#RandomRotation
data_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.RandomRotation(30),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

### Loss functions

In [None]:
#Label smoothing definition
class LabelSmoothingLoss(torch.nn.Module): 
    def __init__(self, classes=5, smoothing=0.0, dim=-1): 
        super(LabelSmoothingLoss, self).__init__() 
        self.confidence = 1.0 - smoothing 
        self.smoothing = smoothing 
        self.cls = classes 
        self.dim = dim 
    def forward(self, pred, target): 
        pred = pred.log_softmax(dim=self.dim) 
        with torch.no_grad():
            true_dist = torch.zeros_like(pred) 
            true_dist.fill_(self.smoothing / (self.cls - 1)) 
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence) 
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))
#To use: criterion = LabelSmoothingLoss()


#Focal loss definition
class FocalLoss(torch.nn.Module):
    def __init__(self, alpha=1, gamma=2, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduce = reduce

    def forward(self, inputs, targets):
        BCE_loss = torch.nn.CrossEntropyLoss()(inputs, targets)

        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

        if self.reduce:
            return torch.mean(F_loss)
        else:
            return F_loss
#To use: criterion = FocalLoss()

### DeiT + ResNet-50 (weighted)

In [None]:
class MyEnsembleModel(torch.nn.Module):

    def __init__(self):

        super(MyModel,self).__init__()

        resnet = models.resnet50(pretrained=True)
        resnet_features = resnet.fc.in_features

        self.backbone1 = torch.nn.Sequential(*(list(resnet.children())[:-1]))

        transformer = deit_small_patch16_224(pretrained=True, use_top_n_heads=6,use_patch_outputs=False)
        transformer_features = transformer.head.in_features

        self.backbone2 = transformer

        for name,param in resnet.named_parameters():
            param.requires_grad = False

        for name,param in transformer.named_parameters():
            param.requires_grad = False

        self.resnet_mlp = torch.nn.Linear(in_features=resnet_features, out_features=1024)
        self.transformer_mlp = torch.nn.Linear(in_features=1000, out_features=1024)
        
        #change no.of classes here
        self.common = torch.nn.Linear(in_features=1024, out_features=320)


    def forward(self,x):

        resnet_out1 = self.backbone1(x)

        transformer_out1 = self.backbone2(x,fine_tune=True)

        resnet_out2 = self.resnet_mlp(resnet_out1.reshape(resnet_out1.shape[0],-1))

        transformer_out2 = self.transformer_mlp(transformer_out1)

        outt = self.common(0.5*resnet_out2+0.5*transformer_out2)

        return out
    
#To use: model = MyEnsembleModel()