# CV-703, Assignment 1

## Libraries

In [1]:
import numpy as np
import math

from tqdm import tqdm

import PIL
from PIL import Image

import torch
import torch.optim as optim
from torchvision import models
from torchvision import transforms
import torchvision.transforms as T

import pandas as pd

from models_to_finetune import deit_small_patch16_224, deit_base_patch16_224, resnet50

from datasets import CUBDataset, DOGDataset, FOODDataset

import sys

import os


# from __future__ import print_function, division

# import numpy as np

# import torch
# import torch.nn as nn
# import torch.nn.functional as F
# import torch.optim as optim
# from torch.optim import lr_scheduler

# import torchvision
# from torchvision import datasets, models, transforms
# import torchvision.transforms as T

# from PIL import Image

# import matplotlib.pyplot as plt

# import pandas as pd

# import scipy.io #for dogs dateset

# import time
# import copy

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


## Setup a dataset
Uncomment that one dataset you need

### CUB-200-2011 (Birds): Dataset (old)

In [None]:
classes_number = 200


In [None]:
data_root = "/apps/local/shared/CV703/datasets/CUB/CUB_200_2011/"

mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)


# Write data transform here as per the requirement
data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

train_dataset = CUBDataset(image_root_path=f"{data_root}", transform=data_transform, split="train")
test_dataset = CUBDataset(image_root_path=f"{data_root}", transform=data_transform, split="test")
print('Number of train samples:', len(train_dataset))
print('Number of test samples:', len(test_dataset))


# Load in into the torch dataloader to get variable batch size, shuffle 
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, drop_last=True, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, drop_last=False, shuffle=True)



In [None]:
len(train_loader), len(test_loader)

In [None]:
for i, (inputs, labels) in enumerate(train_loader):
    print(inputs.shape)
    print(labels)
    print('='*50)
    break

### Stanford Dogs: Dataset (old)

In [None]:
classes_number = 120

In [None]:
data_root = "/apps/local/shared/CV703/datasets/dog/"


mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])


train_dataset = DOGDataset(image_root_path=f"{data_root}", transform=data_transform, split="train")
test_dataset = DOGDataset(image_root_path=f"{data_root}", transform=data_transform, split="test")
print('Number of train samples:', len(train_dataset))
print('Number of test samples:', len(test_dataset))

# Load in into the torch dataloader to get variable batch size, shuffle 
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, drop_last=True, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, drop_last=False, shuffle=True)

In [None]:
len(train_loader), len(test_loader)

In [None]:
for i, (inputs, labels) in enumerate(test_loader):
    print(inputs.shape)
    print(labels)
    print('='*50)
    break

### CUB-200-2011 + Stanford Dog: concatenated Dataset (old)

In [None]:
classes_number = 320

In [None]:
# CUB:
data_root_bird = "/apps/local/shared/CV703/datasets/CUB/CUB_200_2011/"

mean_bird = (0.485, 0.456, 0.406)
std_bird = (0.229, 0.224, 0.225)


# write data transform here as per the requirement
data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean_bird, std=std_bird)
    ])

train_dataset_cub = CUBDataset(image_root_path=f"{data_root_bird}", transform=data_transform, split="train")
test_dataset_cub = CUBDataset(image_root_path=f"{data_root_bird}", transform=data_transform, split="test")
print('Number of train samples:', len(train_dataset_cub))
print('Number of test samples:', len(test_dataset_cub))

In [None]:
# Dog:
mean_dog = (0.485, 0.456, 0.406)
std_dog = (0.229, 0.224, 0.225)

data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean_dog, std=std_dog)
    ])


data_root_dog = "/apps/local/shared/CV703/datasets/dog/"

# TODO: Start labels counting from 200 (in case of concatenation only)!
train_dataset_dog = DOGDataset(image_root_path=f"{data_root_dog}", transform=data_transform, split="train")
test_dataset_dog = DOGDataset(image_root_path=f"{data_root_dog}", transform=data_transform, split="test")
print('Number of train samples:', len(train_dataset_dog))
print('Number of test samples:', len(test_dataset_dog))

In [None]:
# concatenated dataloader for CUB and DOG

train_loader = torch.utils.data.DataLoader(
             torch.utils.data.ConcatDataset([train_dataset_cub, train_dataset_dog]),
             batch_size=32, shuffle=True,
             num_workers=1, pin_memory=True)

test_loader = torch.utils.data.DataLoader(
             torch.utils.data.ConcatDataset([test_dataset_cub, test_dataset_dog]),
             batch_size=32, shuffle=True,
             num_workers=1, pin_memory=True)

In [None]:
len(train_dataset_cub), len(train_dataset_dog), len(train_loader)

In [None]:
len(test_dataset_cub), len(test_dataset_dog), len(test_loader)

In [None]:
for i, (inputs, targets) in enumerate(train_loader):

    print('image :: ', inputs.shape)
    print(targets)
    break

### FoodX-251 Dataset

In [3]:
classes_number = 251

In [4]:
ds_type = "local" # comment out if using ds from the shared folder
#ds_type = "shared" # comment out if using ds from the local folder


if (ds_type == "local"):
    data_dir = "/home/dmitry.demidov/Documents/Datasets/FoodX-251"

    split = 'train'
    train_df = pd.read_csv(f'{data_dir}/annot/{split}_info.csv', names= ['image_name','label'])
    train_df['path'] = train_df['image_name'].map(lambda x: os.path.join(f'{data_dir}/{split}/{split}_set/', x))

    split = 'val'
    test_df = pd.read_csv(f'{data_dir}/annot/{split}_info.csv', names= ['image_name','label'])
    test_df['path'] = test_df['image_name'].map(lambda x: os.path.join(f'{data_dir}/{split}/{split}_set/', x))

elif (ds_type == "shared"):
    data_dir = "/apps/local/shared/CV703/datasets/FoodX/food_dataset"

    split = 'train'
    train_df = pd.read_csv(f'{data_dir}/annot/{split}_info.csv', names= ['image_name','label'])
    train_df['path'] = train_df['image_name'].map(lambda x: os.path.join(f'{data_dir}/{split}_set/', x))

    split = 'val'
    test_df = pd.read_csv(f'{data_dir}/annot/{split}_info.csv', names= ['image_name','label'])
    test_df['path'] = test_df['image_name'].map(lambda x: os.path.join(f'{data_dir}/{split}_set/', x))

else:
    print("ERROR: Choose dataset type (local/shared)!")

In [5]:
train_dataset = FOODDataset(train_df)
test_dataset = FOODDataset(test_df)
print('Number of train samples:', len(train_dataset))
print('Number of test samples:', len(test_dataset))

# load in into the torch dataloader to get variable batch size, shuffle 
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, drop_last=True, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, drop_last=False, shuffle=True) # Not enough memory for more than 32

Number of train samples: 118475
Number of test samples: 11994


In [6]:
# Print some statistics about the dataset:

print(len(train_dataset), len(test_dataset))

print(len(train_loader), len(test_loader))

for i, (inputs, labels) in enumerate(train_loader):
    print(inputs.shape)
    print(labels)
    print('='*50)

    break

118475 11994
925 375
torch.Size([128, 3, 224, 224])
tensor([161, 187, 211,  78, 229, 221,  93, 221, 123, 182, 157,  52, 177,  41,
        130, 110, 235,  48,  40,  77,  74,  60,   0, 177, 219, 222, 152, 216,
        104,  16, 173,  40,  41, 241,  41, 135, 226, 185, 168, 207,  48, 230,
         50, 219, 171,  71,  58,  53, 179, 193, 236, 232, 214, 172,  14,  35,
         50, 215,   2,  16,  88,  63, 145,  98, 232, 112, 127, 227,  22, 112,
        148, 203, 152,  20, 136,  91, 101, 169, 145, 232, 125, 206, 138,  27,
        141, 119, 111, 239,  64, 175, 222,  20, 123,  73, 211, 137,  45, 172,
        202, 235, 246,  60,  98, 177, 229, 195, 167, 138,  75, 197, 237,  86,
        146, 113,  44, 208, 141, 231,  89,  21,  47, 176,  53,  94,   8,  10,
        134, 231])


# Old

## Prepare ViT for transfer learning (old)

In [None]:
# # we will use only the last class token (produced by the last block) for transfer learning
# model = deit_base_patch16_224(pretrained=True, use_top_n_heads=8,use_patch_outputs=False).cuda()

# # freeze backbone and add linear classifier on top that
# # for param in model.parameters():
# #     param.requires_grad = True # False
# model.head = torch.nn.Linear(in_features=model.head.in_features, out_features=classes_number)

# model.head.apply(model._init_weights)
# # for param in model.head.parameters():
# #     param.requires_grad = True

In [None]:
# criterion = torch.nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=0.003, betas=(0.5, 0.999))

# #model.train()

# model = model.to(device)

# print(model)

## Training

In [None]:
# epochs = 100
# print('Training....')

# for epoch in range(epochs):
#     epoch_loss = 0.0
#     epoch_acc = 0.0
#     #running_loss = 0.0

#     with tqdm(train_loader) as p_bar:
#         for samples, targets in p_bar:
#             samples = samples.to(device)
#             targets = targets.to(device)
            
#             outputs = model(samples) #, fine_tune=False)
#             loss = criterion(outputs, targets)

#             loss_value = loss.item()
#             if not math.isfinite(loss_value):
#                 print("Loss is {}, stopping training".format(loss_value))
#                 sys.exit(1)

#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()

#             epoch_loss += outputs.shape[0] * loss.item()

#             epoch_acc += torch.sum(outputs.argmax(dim=-1) == targets).item()

#             # print statistics
#             #running_loss += loss.item()

#     # if i % 100 == 99:    # print every 10 mini-batches
#     #     print('[%d, %5d] loss: %.3f' %
#     # (epoch + 1, i + 1, running_loss / 100))
#     # running_loss = 0.0


#     loss = epoch_loss / len(train_dataset)
#     acc = epoch_acc / len(train_dataset)


#     # print statistics
#     print("Epoch:", epoch+1, "|", "Loss:", loss,
#         'Instant Accuracy:{0:.3%}'.format(acc))

#     f = open("./models/statistics.txt", "a")
#     f.write("Epoch: " + str(epoch) + ", " +"Loss: "+ str(loss) + ", "+"Accuracy: "+ str(acc) + "\n")
#     f.close()
    
#     torch.save({'state_dict': model.state_dict()}, './models/model_{0}ep_{1:.1}loss_{2:.3}acc.pt'.format(epoch, loss, acc,))

## Testing

# Training + Evaluation (in parallel)

## Define functions

### Train, test funcs:

In [7]:
def train(device, model, criterion, optimizer, train_dataset, train_loader, epoch, resnet=False):
    print('Training....')
    
    model.train()

    epoch_loss = 0.0
    acc=0.0

    with tqdm(train_loader) as p_bar:
        for samples, targets in p_bar:
            samples = samples.to(device)
            targets = targets.to(device)
            
            if resnet:
                outputs = model(samples)
            else:
                outputs = model(samples, fine_tune=True)

            loss = criterion(outputs, targets)
            loss_value = loss.item()
            if not math.isfinite(loss_value):
                print("Loss is {}, stopping training".format(loss_value))
                sys.exit(1)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += outputs.shape[0] * loss.item()

            acc+=torch.sum(outputs.argmax(dim=-1) == targets).item()

    loss_print = epoch_loss / len(train_dataset)
    acc_print = acc/len(train_dataset)
    epoch_print = epoch+1

    print("Epoch:", epoch_print, "|", "Loss:", loss_print)
    print("Train Accuracy:{0:.3%}".format(acc_print))

    f = open("./models/statistics.txt", "a")
    text_train = "Epoch: " + str(epoch_print) + ", " + "Train Loss: " + str(loss_print) + ", " + "Train Accuracy: " + str(acc_print) + "\n"
    f.write(text_train)
    f.close()
    
    torch.save({'state_dict': model.state_dict()}, './models/model_{0}ep_{1:.2}loss.pt'.format(epoch_print, loss_print))

    sent_results(text=text_train)

    #del samples
    #del targets



def test(device, model, criterion, test_dataset, test_loader, model_path = './folder/path.pt', test_only = False, resnet=False):
    print('Testing....')

    if test_only:
        print('Test only!')
        state_dict = torch.load(model_path)['state_dict']
        model.load_state_dict(state_dict)
        model = model.to(device)

    model.eval()

    epoch_loss = 0.0
    acc=0.0

    with tqdm(test_loader) as p_bar:
        for samples, targets in p_bar:
            samples = samples.to(device)
            targets = targets.to(device)
            
            if resnet:
                outputs = model(samples)
            else:
                outputs = model(samples, fine_tune=False)

            loss = criterion(outputs, targets)
            epoch_loss += outputs.shape[0] * loss.item()

            acc+=torch.sum(outputs.argmax(dim=-1) == targets).item()

    acc_print = acc/len(test_dataset)
    loss_print = epoch_loss / len(test_dataset)

    print("Test Loss:", loss_print)
    print('Test Accuracy:{0:.3%}'.format(acc_print))

    f = open("./models/statistics.txt", "a")
    text_test = "Test Loss: "+ str(loss_print) + ", " + "Test Accuracy: " + str(acc_print) + "\n" + "\n"
    f.write(text_test)
    f.close()

    sent_results(text=text_test)

    #del samples
    #del targets

### Send results to an email (optional):

In [8]:
from utils import sent_results

# Test:
sent_results(text='Start')

'Email sent successfully!'

## Prepare a model

#### DeiT

In [10]:
model = deit_base_patch16_224(pretrained=True, use_top_n_heads=12,use_patch_outputs=False) #.cuda()
#model = deit_small_patch16_224(pretrained=True, use_top_n_heads=8,use_patch_outputs=False).cuda()

# # Freeze backbone:
# for param in model.parameters():
#     param.requires_grad = True # original: False

# Add linear classifier on top:
model.head = torch.nn.Linear(in_features=model.head.in_features, out_features=classes_number)
model.head.apply(model._init_weights)
# for param in model.head.parameters():
#     param.requires_grad = True

print(model)

_IncompatibleKeys(missing_keys=['head.weight', 'head.bias'], unexpected_keys=[])


Linear(in_features=9216, out_features=251, bias=True)

#### Resnet

In [9]:
# def init_weights(m):
#     if isinstance(m, torch.nn.Linear):
#         torch.nn.init.xavier_uniform_(m.weight)
#         m.bias.data.fill_(0.01)

model = models.resnet50(pretrained=True) #.cuda()
model.fc = torch.nn.Linear(in_features=model.fc.in_features, out_features=classes_number, bias=True)
#model.fc.apply(init_weights)

for param in model.fc.parameters():
    param.requires_grad = True # original: False

print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

#### Resnet v2

In [9]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context


import timm

model = timm.create_model('inception_resnet_v2', pretrained=True, num_classes=classes_number) #.cuda()

for param in model.parameters():
    param.requires_grad = True # original: False

print(model)

InceptionResnetV2(
  (conv2d_1a): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_2a): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_2b): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (maxpool_3a): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2d_3b): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_4a): 

## Start training + validation in parallel

#### DeiT

In [10]:
epochs = 30
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999))

model = model.to(device)

for epoch in range(epochs):
    train (device=device, model=model, criterion=criterion, optimizer=optimizer, 
        train_dataset=train_dataset, train_loader=train_loader, epoch=epoch)

    test (device=device, model=model, criterion=criterion, 
        test_dataset=test_dataset, test_loader=test_loader, test_only = False) #, model_path=model_path)

Training....


  0%|          | 0/462 [00:00<?, ?it/s]


TypeError: forward() got an unexpected keyword argument 'fine_tune'

#### Resnet

In [10]:
epochs = 30
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999))

model = model.to(device)

for epoch in range(epochs):
    train (device=device, model=model, criterion=criterion, optimizer=optimizer, 
        train_dataset=train_dataset, train_loader=train_loader, epoch=epoch, resnet=True)

    test (device=device, model=model, criterion=criterion, 
        test_dataset=test_dataset, test_loader=test_loader, test_only = False, resnet=True) #, model_path=model_path)

Training....


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
100%|██████████| 925/925 [22:44<00:00,  1.48s/it]


Epoch: 1 | Loss: 3.072274742665787
Train Accuracy:35.316%
Testing....


100%|██████████| 375/375 [01:10<00:00,  5.35it/s]


Test Loss: 1.8167234339753011
Test Accuracy:55.561%
Training....


100%|██████████| 925/925 [22:35<00:00,  1.47s/it]


Epoch: 2 | Loss: 1.7861448842982597
Train Accuracy:57.653%
Testing....


100%|██████████| 375/375 [01:09<00:00,  5.43it/s]


Test Loss: 1.5666793703297406
Test Accuracy:60.839%
Training....


100%|██████████| 925/925 [22:30<00:00,  1.46s/it]


Epoch: 3 | Loss: 1.2214946277213514
Train Accuracy:69.502%
Testing....


100%|██████████| 375/375 [01:11<00:00,  5.27it/s]


Test Loss: 1.5586477812675907
Test Accuracy:61.364%
Training....


100%|██████████| 925/925 [22:21<00:00,  1.45s/it]


Epoch: 4 | Loss: 0.8223244413692605
Train Accuracy:78.790%
Testing....


100%|██████████| 375/375 [01:09<00:00,  5.40it/s]


Test Loss: 1.6454278050173476
Test Accuracy:60.022%
Training....


  5%|▍         | 43/925 [01:02<21:28,  1.46s/it]


KeyboardInterrupt: 

## Concat

In [9]:
import timm

import ssl
ssl._create_default_https_context = ssl._create_unverified_context


class MyModel(torch.nn.Module):

    def __init__(self, cnn, deit):

        super(MyModel,self).__init__()

        #self.cnn = timm.create_model('inception_resnet_v2', pretrained=True, num_classes=classes_number).cuda()
        
        self.cnn = cnn
        for param in self.cnn.parameters():
            param.requires_grad = False # original: False
        
        self.cnn.fc = torch.nn.Linear(in_features=self.cnn.fc.in_features, out_features=1024, bias=True)
        #model.fc.apply(init_weights)
        for param in self.cnn.fc.parameters():
            param.requires_grad = True # original: False

        print(self.cnn)


        #resnet = models.resnet50(pretrained=True)
        #resnet_features = resnet.fc.in_features

        #torch.nn.Sequential(*(list(cnn.children())[:-1]))
        # self.cnn.fc = torch.nn.Linear(
        #     self.cnn.fc.in_features, 20)

        #transformer = deit_base_patch16_224(pretrained=True, use_top_n_heads=10,use_patch_outputs=False)
        #transformer_features = transformer.head.in_features


        #self.deit = deit_base_patch16_224(pretrained=True, use_top_n_heads=10,use_patch_outputs=False).cuda()
        #model = deit_small_patch16_224(pretrained=True, use_top_n_heads=8,use_patch_outputs=False).cuda()
        
        self.deit = deit
        # Freeze backbone:
        for param in self.deit.parameters():
            param.requires_grad = False # original: False

        self.deit.head = torch.nn.Linear(in_features=self.deit.head.in_features, out_features=1024)
        self.deit.head.apply(self.deit._init_weights)
        # Unfreeze linear classifier on top:
        for param in self.deit.head.parameters():
            param.requires_grad = True

        print(self.deit)

        #self.deit = deit
        #torch.nn.Sequential(*(list(deit.children())[:-1]))


        # self.deit.head = torch.nn.Linear(in_features=self.deit.head.in_features, out_features=classes_number)
        # self.deit.head.apply(model._init_weights)
        
        # for param in model.head.parameters():
        #     param.requires_grad = True

        # for name,param in cnn.named_parameters():
        #     param.requires_grad = False

        # for name,param in deit.named_parameters():
        #     param.requires_grad = False


        # for param in self.cnn.parameters():
        #     param.requires_grad = False # original: False

        # for param in self.deit.parameters():
        #     param.requires_grad = False # original: False


        # self.resnet_mlp = torch.nn.Linear(in_features=resnet_features, out_features=1024)
        # self.transformer_mlp = torch.nn.Linear(in_features=1000, out_features=1024)

        self.fc1 = torch.nn.Linear(in_features=2048, out_features=251)

        #self.fc2 = torch.nn.Linear(4096, 251)

        self.drop = torch.nn.Dropout(p=0.3)


    def forward(self,x):

        cnn_out = self.cnn(x)
        #print(cnn_out.shape)

        deit_out = self.deit(x, fine_tune=False)
        #print(deit_out.shape)

        concat = torch.cat((cnn_out, deit_out), dim=1)

        out = self.fc1(concat)

        #out = self.drop(out)
        #out = torch.nn.F.relu(out)

        #out = self.fc2(out)

        return out

In [10]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

#import timm


deit = deit_base_patch16_224(pretrained=True, use_top_n_heads=12,use_patch_outputs=False)
deit.head = torch.nn.Linear(in_features=deit.head.in_features, out_features=classes_number)
deit_model_path = './models/7_model_0.0001lr_40ep_256bs_12hs_62.748acc_0.9beta(11hrs)/model_24ep_1.2loss.pt'
deit_state_dict = torch.load(deit_model_path)['state_dict']
deit.load_state_dict(deit_state_dict)

#cnn = timm.create_model('inception_resnet_v2', pretrained=True, num_classes=251)
cnn = models.resnet50(pretrained=True) #.cuda()
cnn.fc = torch.nn.Linear(in_features=cnn.fc.in_features, out_features=classes_number, bias=True)
cnn_model_path = './models/8_Resnet_model_0.0001lr_10ep_256bs_12hs_60.81acc_0.9beta/model_3ep_1.3loss.pt'
cnn_state_dict = torch.load(cnn_model_path)['state_dict']
cnn.load_state_dict(cnn_state_dict)


model = MyModel(deit=deit, cnn=cnn)

_IncompatibleKeys(missing_keys=['head.weight', 'head.bias'], unexpected_keys=[])
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (down

In [11]:
epochs = 30
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999))

model = model.to(device)

for epoch in range(epochs):
    train (device=device, model=model, criterion=criterion, optimizer=optimizer, 
        train_dataset=train_dataset, train_loader=train_loader, epoch=epoch, resnet=True)

    test (device=device, model=model, criterion=criterion, 
        test_dataset=test_dataset, test_loader=test_loader, test_only = False, resnet=True) #, model_path=model_path)

Training....


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
100%|██████████| 925/925 [19:52<00:00,  1.29s/it]


Epoch: 1 | Loss: 1.4749220126800633
Train Accuracy:66.947%
Testing....


100%|██████████| 375/375 [01:46<00:00,  3.53it/s]


Test Loss: 1.4542030530988086
Test Accuracy:63.515%
Training....


100%|██████████| 925/925 [19:43<00:00,  1.28s/it]


Epoch: 2 | Loss: 0.859748171057523
Train Accuracy:77.689%
Testing....


100%|██████████| 375/375 [01:46<00:00,  3.53it/s]


Test Loss: 1.4907699736511983
Test Accuracy:63.732%
Training....


100%|██████████| 925/925 [19:01<00:00,  1.23s/it]


Epoch: 3 | Loss: 0.7506481002418017
Train Accuracy:80.023%
Testing....


100%|██████████| 375/375 [01:41<00:00,  3.71it/s]


Test Loss: 1.5195567269454067
Test Accuracy:63.515%
Training....


  5%|▌         | 49/925 [01:01<18:38,  1.28s/it]

# Test only (optional):
To check a specific model

In [15]:
# It is assumed that a model is defined already
model_path = './models/model_3ep_1.4loss.pt'
criterion = torch.nn.CrossEntropyLoss()

test (device=device, model=model, criterion=criterion, 
    test_dataset=test_dataset, test_loader=test_loader, test_only = True, model_path=model_path)

Testing....
Test only!


100%|██████████| 375/375 [01:26<00:00,  4.31it/s]


Test Loss: 1.7027152970530142
Test Accuracy:58.463%


# TODO

In [None]:
# class FocalLoss(torch.nn.Module):
#     def __init__(self, alpha=1, gamma=2, reduce=True):
#         super(FocalLoss, self).__init__()
#         self.alpha = alpha
#         self.gamma = gamma
#         self.reduce = reduce

#     def forward(self, inputs, targets):
#         BCE_loss = torch.nn.CrossEntropyLoss()(inputs, targets)

#         pt = torch.exp(-BCE_loss)
#         F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

#         if self.reduce:
#             return torch.mean(F_loss)
#         else:
#             return F_loss

# criterion = FocalLoss().to(device)

In [None]:
# transforms.CenterCrop(224) # instead of Resize


## List
* Change number of training epochs
* Change number of class tokens e.g, use_top_n_heads=4, etc
* New:
* - Try focal loss (from Rushali)
* - Try some transformations (Flipping, Rotation, CenterCrop ??)
* - Combine resnet and DeiT