In [1]:
import torch
dev = "cuda" if torch.cuda.is_available() else "cpu"
test_image_features = torch.load("features/test_image_features_vitL.pt", map_location = torch.device(dev))
test_text_feature = torch.load("features/test_text_feature_vitL.pt", map_location = torch.device(dev))
all_image_features = torch.load("features/all_image_features_vitL.pt", map_location = torch.device(dev))
all_text_feature = torch.load("features/all_text_feature_vitL.pt", map_location = torch.device(dev))
label_onehot_tensor = torch.load("features/label_onehot_tensor.pt", map_location = torch.device(dev))

In [2]:
test_image_features.shape

torch.Size([10000, 768])

In [3]:
def add_weight_decay(model, weight_decay=1e-4, skip_list=()):
    decay = []
    no_decay = []
    for name, param in model.named_parameters():
        if not param.requires_grad:
            continue  # frozen weights
        if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list:
            no_decay.append(param)
        else:
            decay.append(param)
    return [
        {'params': no_decay, 'weight_decay': 0.},
        {'params': decay, 'weight_decay': weight_decay}]

In [4]:
import torch
import torch.nn as nn


class AsymmetricLoss(nn.Module):
    def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=True):
        super(AsymmetricLoss, self).__init__()

        self.gamma_neg = gamma_neg
        self.gamma_pos = gamma_pos
        self.clip = clip
        self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss
        self.eps = eps

    def forward(self, x, y):
        """"
        Parameters
        ----------
        x: input logits
        y: targets (multi-label binarized vector)
        """

        # Calculating Probabilities
        
        xs_pos = x
        xs_neg = 1 - x

        # Asymmetric Clipping
        if self.clip is not None and self.clip > 0:
            xs_neg = (xs_neg + self.clip).clamp(max=1)

        # Basic CE calculation
        los_pos = y * torch.log(xs_pos.clamp(min=self.eps))
        los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps))
        loss = los_pos + los_neg

        # Asymmetric Focusing
        if self.gamma_neg > 0 or self.gamma_pos > 0:
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(False)
            pt0 = xs_pos * y
            pt1 = xs_neg * (1 - y)  # pt = p if t > 0 else 1-p
            pt = pt0 + pt1
            one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y)
            one_sided_w = torch.pow(1 - pt, one_sided_gamma)
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(True)
            loss *= one_sided_w

        return -loss.sum()


class AsymmetricLossOptimized(nn.Module):
    ''' Notice - optimized version, minimizes memory allocation and gpu uploading,
    favors inplace operations'''

    def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=False):
        super(AsymmetricLossOptimized, self).__init__()

        self.gamma_neg = gamma_neg
        self.gamma_pos = gamma_pos
        self.clip = clip
        self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss
        self.eps = eps

        # prevent memory allocation and gpu uploading every iteration, and encourages inplace operations
        self.targets = self.anti_targets = self.xs_pos = self.xs_neg = self.asymmetric_w = self.loss = None

    def forward(self, x, y):
        """"
        Parameters
        ----------
        x: input logits
        y: targets (multi-label binarized vector)
        """

        self.targets = y
        self.anti_targets = 1 - y

        # Calculating Probabilities
        self.xs_pos = torch.sigmoid(x)
        self.xs_neg = 1.0 - self.xs_pos

        # Asymmetric Clipping
        if self.clip is not None and self.clip > 0:
            self.xs_neg.add_(self.clip).clamp_(max=1)

        # Basic CE calculation
        self.loss = self.targets * torch.log(self.xs_pos.clamp(min=self.eps))
        self.loss.add_(self.anti_targets * torch.log(self.xs_neg.clamp(min=self.eps)))

        # Asymmetric Focusing
        if self.gamma_neg > 0 or self.gamma_pos > 0:
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(False)
            self.xs_pos = self.xs_pos * self.targets
            self.xs_neg = self.xs_neg * self.anti_targets
            self.asymmetric_w = torch.pow(1 - self.xs_pos - self.xs_neg,
                                          self.gamma_pos * self.targets + self.gamma_neg * self.anti_targets)
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(True)
            self.loss *= self.asymmetric_w

        return -self.loss.sum()

In [5]:
import numpy as np
from tqdm import tqdm
import torch.nn.functional as F
from torchmetrics import F1Score
from torch import optim
from torch.cuda.amp import GradScaler, autocast

from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
def Trainer(model, Data, epochs, epoch_step_1, epoch_step_2, lr = 1e-3):
    torch.manual_seed(5329)
    #train_data = DataLoader(TensorDataset(Data[:25000], label_onehot_tensor[:25000]), batch_size=25000, shuffle = True)
    #val_data = DataLoader(TensorDataset(Data[25000:], label_onehot_tensor[25000:].to(torch.int32)), batch_size=5000, shuffle = False)
    
    # Change here to switch to the best setting
    train_data = DataLoader(TensorDataset(Data, label_onehot_tensor), batch_size=30000, shuffle = True)
    
    model = model.to(dev)
 
    weight_decay = 2e-4
    criterion = AsymmetricLoss(gamma_neg=0, gamma_pos=0, clip=0, disable_torch_grad_focal_loss=True)
    parameters = add_weight_decay(model, weight_decay)
    opti = optim.Adam(params=parameters, lr=lr, weight_decay=0)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(opti, milestones=[epoch_step_1,epoch_step_2], gamma = 0.1)
    f1 = F1Score(task="multilabel", num_labels = 18).to(dev)

    epoch = epochs
    loss_list = []
    f1_list = []
    scaler = GradScaler()

    for epoch in tqdm(range(epoch), colour = 'GREEN'):
        for data, label in train_data:   
            data, label = data.to(dev), label.to(dev)

            with autocast():  # mixed precision
                output = model(data).float() 

            loss = criterion(output, label)
            model.zero_grad()
            
            scaler.scale(loss).backward()
            scaler.step(opti)
            scaler.update()
            
        loss_list.append(loss)
        
        if epoch % 10 == 0:
            print(loss)
        
        # Comment the code below if you want to switch to the best settings (i.e., no validation data)
#         if epoch % 10 == 0:
#             with torch.autograd.no_grad():
#                 for data_val, label_val in val_data:
#                     data_val, label_val = data_val.to(dev), label_val.to(dev)
#                     predict = model(data_val)
#                     f1_score = f1(predict, label_val)
#                 print('Validation F1 in epoch{} : {:.4f}'.format(epoch, f1_score))
#             f1_list.append(f1_score)
    
    return model, loss_list, f1_list

In [6]:
import torch.nn as nn
import torch.nn.functional as F
class FEATURE_EXTRACTOR(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(768, 2048)
        self.fc2 = nn.Linear(2048, 512)
        self.fc3 = nn.Linear(512, 18)
        self.dropout = nn.Dropout(p = 0.6)

    def forward(self, inputs):
        tensor = F.gelu(self.fc1(inputs))
        tensor = self.dropout(tensor)
        tensor = F.gelu(self.fc2(tensor))
        tensor = self.dropout(tensor)
        tensor = torch.sigmoid(self.fc3(tensor))
        return tensor

class DECISION_MODEL(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(18, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 18)

    def forward(self, inputs):
        tensor = F.gelu(self.fc1(inputs))
        tensor = F.gelu(self.fc2(tensor))
        tensor = torch.sigmoid(self.fc3(tensor))
        return tensor

In [7]:
import os
Net, loss_list, f1_list = Trainer(FEATURE_EXTRACTOR(), all_image_features, 200, 100, 150)
model_dir = './model/'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
torch.save(Net.state_dict(), os.path.join(model_dir, 'image_model.pth'))
Net.eval()
with torch.autograd.no_grad():
    img_train = Net(all_image_features.to(dev))
    img_test = Net(test_image_features.to(dev))

  0%|[32m▍                                                                                 [0m| 1/200 [00:01<05:58,  1.80s/it][0m

tensor(370904., device='cuda:0', grad_fn=<NegBackward0>)


  6%|[32m████▍                                                                            [0m| 11/200 [00:04<00:56,  3.37it/s][0m

tensor(370884.0625, device='cuda:0', grad_fn=<NegBackward0>)


 10%|[32m████████▌                                                                        [0m| 21/200 [00:07<00:57,  3.14it/s][0m

tensor(138320.4844, device='cuda:0', grad_fn=<NegBackward0>)


 16%|[32m████████████▌                                                                    [0m| 31/200 [00:10<00:50,  3.33it/s][0m

tensor(97185.9297, device='cuda:0', grad_fn=<NegBackward0>)


 20%|[32m████████████████▌                                                                [0m| 41/200 [00:13<00:49,  3.22it/s][0m

tensor(73727.1562, device='cuda:0', grad_fn=<NegBackward0>)


 26%|[32m████████████████████▋                                                            [0m| 51/200 [00:17<00:47,  3.16it/s][0m

tensor(58135.8242, device='cuda:0', grad_fn=<NegBackward0>)


 30%|[32m████████████████████████▋                                                        [0m| 61/200 [00:20<00:43,  3.23it/s][0m

tensor(49511.8750, device='cuda:0', grad_fn=<NegBackward0>)


 36%|[32m████████████████████████████▊                                                    [0m| 71/200 [00:23<00:42,  3.05it/s][0m

tensor(45413.1953, device='cuda:0', grad_fn=<NegBackward0>)


 40%|[32m████████████████████████████████▊                                                [0m| 81/200 [00:26<00:36,  3.29it/s][0m

tensor(42692.8984, device='cuda:0', grad_fn=<NegBackward0>)


 46%|[32m████████████████████████████████████▊                                            [0m| 91/200 [00:29<00:38,  2.85it/s][0m

tensor(41065.3516, device='cuda:0', grad_fn=<NegBackward0>)


 50%|[32m████████████████████████████████████████▍                                       [0m| 101/200 [00:32<00:30,  3.29it/s][0m

tensor(39542.1562, device='cuda:0', grad_fn=<NegBackward0>)


 56%|[32m████████████████████████████████████████████▍                                   [0m| 111/200 [00:35<00:27,  3.26it/s][0m

tensor(38158.9766, device='cuda:0', grad_fn=<NegBackward0>)


 60%|[32m████████████████████████████████████████████████▍                               [0m| 121/200 [00:39<00:24,  3.18it/s][0m

tensor(37197.4922, device='cuda:0', grad_fn=<NegBackward0>)


 66%|[32m████████████████████████████████████████████████████▍                           [0m| 131/200 [00:42<00:22,  3.07it/s][0m

tensor(36424.8047, device='cuda:0', grad_fn=<NegBackward0>)


 70%|[32m████████████████████████████████████████████████████████▍                       [0m| 141/200 [00:45<00:17,  3.40it/s][0m

tensor(35378.3359, device='cuda:0', grad_fn=<NegBackward0>)


 76%|[32m████████████████████████████████████████████████████████████▍                   [0m| 151/200 [00:48<00:14,  3.30it/s][0m

tensor(34470.1211, device='cuda:0', grad_fn=<NegBackward0>)


 80%|[32m████████████████████████████████████████████████████████████████▍               [0m| 161/200 [00:51<00:12,  3.24it/s][0m

tensor(33722.2031, device='cuda:0', grad_fn=<NegBackward0>)


 86%|[32m████████████████████████████████████████████████████████████████████▍           [0m| 171/200 [00:54<00:08,  3.29it/s][0m

tensor(32993.4844, device='cuda:0', grad_fn=<NegBackward0>)


 90%|[32m████████████████████████████████████████████████████████████████████████▍       [0m| 181/200 [00:57<00:06,  2.97it/s][0m

tensor(32071.3555, device='cuda:0', grad_fn=<NegBackward0>)


 96%|[32m████████████████████████████████████████████████████████████████████████████▍   [0m| 191/200 [01:01<00:02,  3.11it/s][0m

tensor(31289.9688, device='cuda:0', grad_fn=<NegBackward0>)


100%|[32m████████████████████████████████████████████████████████████████████████████████[0m| 200/200 [01:03<00:00,  3.13it/s][0m


In [8]:
Net, loss_list, f1_list = Trainer(FEATURE_EXTRACTOR(), all_text_feature, 200, 100, 150)
model_dir = './model/'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
torch.save(Net.state_dict(), os.path.join(model_dir, 'text_model.pth'))
Net.eval()
with torch.autograd.no_grad():
    txt_train = Net(all_text_feature.to(dev))
    txt_test = Net(test_text_feature.to(dev))

  0%|[32m▍                                                                                 [0m| 1/200 [00:00<01:09,  2.87it/s][0m

tensor(375025.0625, device='cuda:0', grad_fn=<NegBackward0>)


  6%|[32m████▍                                                                            [0m| 11/200 [00:03<00:57,  3.27it/s][0m

tensor(374993.8750, device='cuda:0', grad_fn=<NegBackward0>)


 10%|[32m████████▌                                                                        [0m| 21/200 [00:06<00:54,  3.31it/s][0m

tensor(128140.7031, device='cuda:0', grad_fn=<NegBackward0>)


 16%|[32m████████████▌                                                                    [0m| 31/200 [00:09<00:53,  3.17it/s][0m

tensor(129554.5469, device='cuda:0', grad_fn=<NegBackward0>)


 20%|[32m████████████████▌                                                                [0m| 41/200 [00:12<00:47,  3.33it/s][0m

tensor(80061.6484, device='cuda:0', grad_fn=<NegBackward0>)


 26%|[32m████████████████████▋                                                            [0m| 51/200 [00:15<00:47,  3.12it/s][0m

tensor(63956.4766, device='cuda:0', grad_fn=<NegBackward0>)


 30%|[32m████████████████████████▋                                                        [0m| 61/200 [00:18<00:43,  3.16it/s][0m

tensor(56460.4414, device='cuda:0', grad_fn=<NegBackward0>)


 36%|[32m████████████████████████████▊                                                    [0m| 71/200 [00:22<00:41,  3.13it/s][0m

tensor(52356.1328, device='cuda:0', grad_fn=<NegBackward0>)


 40%|[32m████████████████████████████████▊                                                [0m| 81/200 [00:25<00:36,  3.28it/s][0m

tensor(50358.3516, device='cuda:0', grad_fn=<NegBackward0>)


 46%|[32m████████████████████████████████████▊                                            [0m| 91/200 [00:28<00:33,  3.22it/s][0m

tensor(48745.4727, device='cuda:0', grad_fn=<NegBackward0>)


 50%|[32m████████████████████████████████████████▍                                       [0m| 101/200 [00:31<00:31,  3.18it/s][0m

tensor(47701.9609, device='cuda:0', grad_fn=<NegBackward0>)


 56%|[32m████████████████████████████████████████████▍                                   [0m| 111/200 [00:34<00:32,  2.78it/s][0m

tensor(46852.1562, device='cuda:0', grad_fn=<NegBackward0>)


 60%|[32m████████████████████████████████████████████████▍                               [0m| 121/200 [00:38<00:26,  2.98it/s][0m

tensor(46304.8516, device='cuda:0', grad_fn=<NegBackward0>)


 66%|[32m████████████████████████████████████████████████████▍                           [0m| 131/200 [00:41<00:21,  3.21it/s][0m

tensor(45559.8672, device='cuda:0', grad_fn=<NegBackward0>)


 70%|[32m████████████████████████████████████████████████████████▍                       [0m| 141/200 [00:44<00:19,  3.07it/s][0m

tensor(45167.1992, device='cuda:0', grad_fn=<NegBackward0>)


 76%|[32m████████████████████████████████████████████████████████████▍                   [0m| 151/200 [00:47<00:14,  3.36it/s][0m

tensor(44573.1406, device='cuda:0', grad_fn=<NegBackward0>)


 80%|[32m████████████████████████████████████████████████████████████████▍               [0m| 161/200 [00:50<00:13,  2.97it/s][0m

tensor(44093.0391, device='cuda:0', grad_fn=<NegBackward0>)


 86%|[32m████████████████████████████████████████████████████████████████████▍           [0m| 171/200 [00:53<00:09,  3.12it/s][0m

tensor(43830.2266, device='cuda:0', grad_fn=<NegBackward0>)


 90%|[32m████████████████████████████████████████████████████████████████████████▍       [0m| 181/200 [00:56<00:06,  3.08it/s][0m

tensor(43248.3281, device='cuda:0', grad_fn=<NegBackward0>)


 96%|[32m████████████████████████████████████████████████████████████████████████████▍   [0m| 191/200 [00:59<00:02,  3.21it/s][0m

tensor(42867.4648, device='cuda:0', grad_fn=<NegBackward0>)


100%|[32m████████████████████████████████████████████████████████████████████████████████[0m| 200/200 [01:02<00:00,  3.19it/s][0m


In [9]:
sum_train = img_train+txt_train
sum_test = img_test+txt_test
Net, loss_list, f1_list  = Trainer(DECISION_MODEL(), sum_train, 200, 100, 150)
model_dir = './model/'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
torch.save(Net.state_dict(), os.path.join(model_dir, 'final_model.pth'))
Net.eval()
with torch.autograd.no_grad():
    final = Net(sum_test)

  0%|[32m▍                                                                                 [0m| 1/200 [00:00<01:05,  3.02it/s][0m

tensor(376424.4688, device='cuda:0', grad_fn=<NegBackward0>)


  6%|[32m████▍                                                                            [0m| 11/200 [00:03<00:58,  3.21it/s][0m

tensor(376424.4688, device='cuda:0', grad_fn=<NegBackward0>)


 10%|[32m████████▌                                                                        [0m| 21/200 [00:06<00:53,  3.36it/s][0m

tensor(152808.3750, device='cuda:0', grad_fn=<NegBackward0>)


 16%|[32m████████████▌                                                                    [0m| 31/200 [00:09<00:49,  3.44it/s][0m

tensor(101004.3438, device='cuda:0', grad_fn=<NegBackward0>)


 20%|[32m████████████████▌                                                                [0m| 41/200 [00:12<00:50,  3.14it/s][0m

tensor(70482.3516, device='cuda:0', grad_fn=<NegBackward0>)


 26%|[32m████████████████████▋                                                            [0m| 51/200 [00:15<00:46,  3.23it/s][0m

tensor(52904.3828, device='cuda:0', grad_fn=<NegBackward0>)


 30%|[32m████████████████████████▋                                                        [0m| 61/200 [00:18<00:44,  3.15it/s][0m

tensor(42713.1719, device='cuda:0', grad_fn=<NegBackward0>)


 36%|[32m████████████████████████████▊                                                    [0m| 71/200 [00:22<00:41,  3.08it/s][0m

tensor(37262.4375, device='cuda:0', grad_fn=<NegBackward0>)


 40%|[32m████████████████████████████████▊                                                [0m| 81/200 [00:25<00:39,  3.05it/s][0m

tensor(34212.4727, device='cuda:0', grad_fn=<NegBackward0>)


 46%|[32m████████████████████████████████████▊                                            [0m| 91/200 [00:28<00:34,  3.15it/s][0m

tensor(32416.6328, device='cuda:0', grad_fn=<NegBackward0>)


 50%|[32m████████████████████████████████████████▍                                       [0m| 101/200 [00:31<00:30,  3.27it/s][0m

tensor(31232.7734, device='cuda:0', grad_fn=<NegBackward0>)


 56%|[32m████████████████████████████████████████████▍                                   [0m| 111/200 [00:34<00:27,  3.24it/s][0m

tensor(30431.2520, device='cuda:0', grad_fn=<NegBackward0>)


 60%|[32m████████████████████████████████████████████████▍                               [0m| 121/200 [00:37<00:24,  3.19it/s][0m

tensor(29863.6348, device='cuda:0', grad_fn=<NegBackward0>)


 66%|[32m████████████████████████████████████████████████████▍                           [0m| 131/200 [00:40<00:22,  3.11it/s][0m

tensor(29445.5039, device='cuda:0', grad_fn=<NegBackward0>)


 70%|[32m████████████████████████████████████████████████████████▍                       [0m| 141/200 [00:43<00:18,  3.13it/s][0m

tensor(29129.8105, device='cuda:0', grad_fn=<NegBackward0>)


 76%|[32m████████████████████████████████████████████████████████████▍                   [0m| 151/200 [00:47<00:15,  3.08it/s][0m

tensor(28885.4219, device='cuda:0', grad_fn=<NegBackward0>)


 80%|[32m████████████████████████████████████████████████████████████████▍               [0m| 161/200 [00:50<00:11,  3.26it/s][0m

tensor(28700.4727, device='cuda:0', grad_fn=<NegBackward0>)


 86%|[32m████████████████████████████████████████████████████████████████████▍           [0m| 171/200 [00:53<00:09,  3.13it/s][0m

tensor(28553.6875, device='cuda:0', grad_fn=<NegBackward0>)


 90%|[32m████████████████████████████████████████████████████████████████████████▍       [0m| 181/200 [00:56<00:05,  3.31it/s][0m

tensor(28444.6758, device='cuda:0', grad_fn=<NegBackward0>)


 96%|[32m████████████████████████████████████████████████████████████████████████████▍   [0m| 191/200 [00:59<00:02,  3.18it/s][0m

tensor(28334.2070, device='cuda:0', grad_fn=<NegBackward0>)


100%|[32m████████████████████████████████████████████████████████████████████████████████[0m| 200/200 [01:02<00:00,  3.20it/s][0m


In [10]:
import pandas as pd
y_proba = final.cpu().numpy()

resl = []
for i in y_proba:
    a = [x+1 for x in range(len(i)) if i[x] > 0.5]
    for j in range(len(a)):
        if a[j] >=12:
            a[j] = a[j]+1
    resl.append(a)
test_pred = []
for lis in resl:
    a = [str(i) for i in lis]
    test_pred.append(" ".join(a))

# make a csv file
df = pd.DataFrame(columns=["ImageID", "Labels"])

# Creating the Second Dataframe using dictionary
for index, value in enumerate(test_pred):
    df_temp = pd.DataFrame({"ImageID":"{}.jpg".format(30000+index), "Labels":" ".join([str(i) for i in [value]])}, index=[0])
    
    # for appending df_temp at the end of df
    df = pd.concat([df, df_temp], ignore_index=True)

df.to_csv("Predicted_labels.csv", index = False)

In [11]:
df

Unnamed: 0,ImageID,Labels
0,30000.jpg,1
1,30001.jpg,1
2,30002.jpg,1
3,30003.jpg,1
4,30004.jpg,1
...,...,...
9995,39995.jpg,1
9996,39996.jpg,3 4
9997,39997.jpg,1
9998,39998.jpg,1
