In [1]:
import torch
dev = "cuda" if torch.cuda.is_available() else "cpu"
test_image_features = torch.load("test_image_features_vitL.pt", map_location = torch.device(dev))
test_text_feature = torch.load("test_text_feature_vitL.pt", map_location = torch.device(dev))
all_image_features = torch.load("all_image_features_vitL.pt", map_location = torch.device(dev))
all_text_feature = torch.load("all_text_feature_vitL.pt", map_location = torch.device(dev))
label_onehot_tensor = torch.load("label_onehot_tensor.pt", map_location = torch.device(dev))

In [2]:
test_image_features.shape

torch.Size([10000, 768])

In [3]:
def add_weight_decay(model, weight_decay=1e-4, skip_list=()):
    decay = []
    no_decay = []
    for name, param in model.named_parameters():
        if not param.requires_grad:
            continue  # frozen weights
        if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list:
            no_decay.append(param)
        else:
            decay.append(param)
    return [
        {'params': no_decay, 'weight_decay': 0.},
        {'params': decay, 'weight_decay': weight_decay}]

In [4]:
import torch
import torch.nn as nn


class AsymmetricLoss(nn.Module):
    def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=True):
        super(AsymmetricLoss, self).__init__()

        self.gamma_neg = gamma_neg
        self.gamma_pos = gamma_pos
        self.clip = clip
        self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss
        self.eps = eps

    def forward(self, x, y):
        """"
        Parameters
        ----------
        x: input logits
        y: targets (multi-label binarized vector)
        """

        # Calculating Probabilities
        
        xs_pos = x
        xs_neg = 1 - x

        # Asymmetric Clipping
        if self.clip is not None and self.clip > 0:
            xs_neg = (xs_neg + self.clip).clamp(max=1)

        # Basic CE calculation
        los_pos = y * torch.log(xs_pos.clamp(min=self.eps))
        los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps))
        loss = los_pos + los_neg

        # Asymmetric Focusing
        if self.gamma_neg > 0 or self.gamma_pos > 0:
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(False)
            pt0 = xs_pos * y
            pt1 = xs_neg * (1 - y)  # pt = p if t > 0 else 1-p
            pt = pt0 + pt1
            one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y)
            one_sided_w = torch.pow(1 - pt, one_sided_gamma)
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(True)
            loss *= one_sided_w

        return -loss.sum()


class AsymmetricLossOptimized(nn.Module):
    ''' Notice - optimized version, minimizes memory allocation and gpu uploading,
    favors inplace operations'''

    def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=False):
        super(AsymmetricLossOptimized, self).__init__()

        self.gamma_neg = gamma_neg
        self.gamma_pos = gamma_pos
        self.clip = clip
        self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss
        self.eps = eps

        # prevent memory allocation and gpu uploading every iteration, and encourages inplace operations
        self.targets = self.anti_targets = self.xs_pos = self.xs_neg = self.asymmetric_w = self.loss = None

    def forward(self, x, y):
        """"
        Parameters
        ----------
        x: input logits
        y: targets (multi-label binarized vector)
        """

        self.targets = y
        self.anti_targets = 1 - y

        # Calculating Probabilities
        self.xs_pos = torch.sigmoid(x)
        self.xs_neg = 1.0 - self.xs_pos

        # Asymmetric Clipping
        if self.clip is not None and self.clip > 0:
            self.xs_neg.add_(self.clip).clamp_(max=1)

        # Basic CE calculation
        self.loss = self.targets * torch.log(self.xs_pos.clamp(min=self.eps))
        self.loss.add_(self.anti_targets * torch.log(self.xs_neg.clamp(min=self.eps)))

        # Asymmetric Focusing
        if self.gamma_neg > 0 or self.gamma_pos > 0:
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(False)
            self.xs_pos = self.xs_pos * self.targets
            self.xs_neg = self.xs_neg * self.anti_targets
            self.asymmetric_w = torch.pow(1 - self.xs_pos - self.xs_neg,
                                          self.gamma_pos * self.targets + self.gamma_neg * self.anti_targets)
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(True)
            self.loss *= self.asymmetric_w

        return -self.loss.sum()

In [5]:
import numpy as np
from tqdm import tqdm
import torch.nn.functional as F
from torchmetrics import F1Score
from torch import optim
from torch.cuda.amp import GradScaler, autocast

from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
def Trainer(model, Data, epochs, epoch_step_1, epoch_step_2, lr = 1e-3):
    torch.manual_seed(5329)
    train_data = DataLoader(TensorDataset(Data, label_onehot_tensor), batch_size=30000, shuffle = True)
    model = model.to(dev)
 
    weight_decay = 1e-4
    criterion = AsymmetricLoss(gamma_neg=0, gamma_pos=0, clip=0, disable_torch_grad_focal_loss=True)
    parameters = add_weight_decay(model, weight_decay)
    opti = optim.Adam(params=parameters, lr=lr, weight_decay=0)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(opti, milestones=[epoch_step_1,epoch_step_2], gamma = 0.1)

    epoch = epochs
    loss_list = []
    f1_list = []
    scaler = GradScaler()

    for epoch in tqdm(range(epoch), colour = 'GREEN'):
        for data, label in train_data:   
            data, label = data.to(dev), label.to(dev)

            with autocast():  # mixed precision
                output = model(data).float() 

            loss = criterion(output, label)
            model.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(opti)
            scaler.update()
            scheduler.step()
        loss_list.append(loss)
        
        if epoch % 10 == 0:
            print('Train Epoch: {}\tLoss: {:.6f}'.format(
                epoch, loss.item()))
    
    return model, loss_list

In [6]:
import torch.nn as nn
import torch.nn.functional as F
class FEATURE_EXTRACTOR(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(768, 2048)
        self.fc2 = nn.Linear(2048, 512)
        self.fc3 = nn.Linear(512, 18)
        self.dropout = nn.Dropout(p = 0.6)

    def forward(self, inputs):
        tensor = F.gelu(self.fc1(inputs))
        tensor = self.dropout(tensor)
        tensor = F.gelu(self.fc2(tensor))
        tensor = self.dropout(tensor)
        tensor = torch.sigmoid(self.fc3(tensor))
        return tensor

class DECISION_MODEL(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(18, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 18)

    def forward(self, inputs):
        tensor = F.gelu(self.fc1(inputs))
        tensor = F.gelu(self.fc2(tensor))
        tensor = torch.sigmoid(self.fc3(tensor))
        return tensor

In [7]:
all_feature = torch.cat((all_image_features, all_text_feature), dim=1)
all_feature.shape

torch.Size([30000, 1536])

In [8]:
all_test_feature = torch.cat((test_image_features, test_text_feature), dim=1)
all_test_feature.shape

torch.Size([10000, 1536])

In [9]:
class End_to_End(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(1536, 4096)
        self.fc2 = nn.Linear(4096, 1024)
        self.fc3 = nn.Linear(1024, 512)
        self.dropout = nn.Dropout(p = 0.6)
        self.fc4 = nn.Linear(512, 18)
        
    def forward(self, inputs):
        tensor = F.gelu(self.fc1(inputs))
        tensor = self.dropout(tensor)
        tensor = F.gelu(self.fc2(tensor))
        tensor = self.dropout(tensor)
        tensor = F.gelu(self.fc3(tensor))
        tensor = self.dropout(tensor)
        tensor = torch.sigmoid(self.fc4(tensor))
        return tensor

In [15]:
import os
Net, loss_list= Trainer(End_to_End(), all_feature, 200, 130, 190)
model_dir = './model/'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
torch.save(Net.state_dict(), os.path.join(model_dir, 'end_to_end_model.pth'))


  0%|[32m▍                                                                                 [0m| 1/200 [00:00<02:58,  1.11it/s][0m

Train Epoch: 0	Loss: 374096.062500


  6%|[32m████▍                                                                            [0m| 11/200 [00:09<02:38,  1.19it/s][0m

Train Epoch: 10	Loss: 374070.437500


 10%|[32m████████▌                                                                        [0m| 21/200 [00:17<02:27,  1.21it/s][0m

Train Epoch: 20	Loss: 176984.203125


 16%|[32m████████████▌                                                                    [0m| 31/200 [00:26<02:33,  1.10it/s][0m

Train Epoch: 30	Loss: 130596.804688


 20%|[32m████████████████▌                                                                [0m| 41/200 [00:34<02:13,  1.19it/s][0m

Train Epoch: 40	Loss: 87745.914062


 26%|[32m████████████████████▋                                                            [0m| 51/200 [00:42<02:06,  1.17it/s][0m

Train Epoch: 50	Loss: 66143.265625


 30%|[32m████████████████████████▋                                                        [0m| 61/200 [00:50<01:54,  1.22it/s][0m

Train Epoch: 60	Loss: 53446.144531


 36%|[32m████████████████████████████▊                                                    [0m| 71/200 [00:59<01:48,  1.19it/s][0m

Train Epoch: 70	Loss: 47138.695312


 40%|[32m████████████████████████████████▊                                                [0m| 81/200 [01:07<01:35,  1.24it/s][0m

Train Epoch: 80	Loss: 43280.574219


 46%|[32m████████████████████████████████████▊                                            [0m| 91/200 [01:15<01:32,  1.18it/s][0m

Train Epoch: 90	Loss: 40270.054688


 50%|[32m████████████████████████████████████████▍                                       [0m| 101/200 [01:24<01:25,  1.15it/s][0m

Train Epoch: 100	Loss: 37724.324219


 56%|[32m████████████████████████████████████████████▍                                   [0m| 111/200 [01:32<01:13,  1.22it/s][0m

Train Epoch: 110	Loss: 35414.703125


 60%|[32m████████████████████████████████████████████████▍                               [0m| 121/200 [01:40<01:04,  1.23it/s][0m

Train Epoch: 120	Loss: 33192.273438


 66%|[32m████████████████████████████████████████████████████▍                           [0m| 131/200 [01:48<00:56,  1.23it/s][0m

Train Epoch: 130	Loss: 31133.871094


 70%|[32m████████████████████████████████████████████████████████▍                       [0m| 141/200 [01:57<00:51,  1.14it/s][0m

Train Epoch: 140	Loss: 30862.488281


 76%|[32m████████████████████████████████████████████████████████████▍                   [0m| 151/200 [02:05<00:39,  1.23it/s][0m

Train Epoch: 150	Loss: 30586.003906


 80%|[32m████████████████████████████████████████████████████████████████▍               [0m| 161/200 [02:13<00:31,  1.23it/s][0m

Train Epoch: 160	Loss: 30384.498047


 86%|[32m████████████████████████████████████████████████████████████████████▍           [0m| 171/200 [02:21<00:23,  1.23it/s][0m

Train Epoch: 170	Loss: 30093.332031


 90%|[32m████████████████████████████████████████████████████████████████████████▍       [0m| 181/200 [02:29<00:15,  1.24it/s][0m

Train Epoch: 180	Loss: 29457.406250


 96%|[32m████████████████████████████████████████████████████████████████████████████▍   [0m| 191/200 [02:38<00:07,  1.14it/s][0m

Train Epoch: 190	Loss: 29498.855469


100%|[32m████████████████████████████████████████████████████████████████████████████████[0m| 200/200 [02:45<00:00,  1.21it/s][0m


In [11]:
Net.eval()
with torch.autograd.no_grad():
    final = Net(all_test_feature)

In [12]:
import os
Net, loss_list = Trainer(FEATURE_EXTRACTOR(), all_image_features, 300, 200, 250)
model_dir = './model/'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
torch.save(Net.state_dict(), os.path.join(model_dir, 'image_model.pth'))
Net.eval()
with torch.autograd.no_grad():
    img_train = Net(all_image_features.to(dev))
    img_test = Net(test_image_features.to(dev))

  0%|[32m▎                                                                                 [0m| 1/300 [00:00<02:08,  2.33it/s][0m

Train Epoch: 0	Loss: 376579.875000


  4%|[32m██▉                                                                              [0m| 11/300 [00:05<02:10,  2.22it/s][0m

Train Epoch: 10	Loss: 376563.250000


  6%|[32m████▌                                                                            [0m| 17/300 [00:08<02:15,  2.08it/s][0m


KeyboardInterrupt: 

In [None]:
import os
Net, loss_list = Trainer(FEATURE_EXTRACTOR(), all_image_features, 300, 200, 250)
model_dir = './model/'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
torch.save(Net.state_dict(), os.path.join(model_dir, 'image_model.pth'))
Net.eval()
with torch.autograd.no_grad():
    img_train = Net(all_image_features.to(dev))
    img_test = Net(test_image_features.to(dev))

In [None]:
Net, loss_list= Trainer(FEATURE_EXTRACTOR(), all_text_feature, 300, 200, 250)
model_dir = './model/'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
torch.save(Net.state_dict(), os.path.join(model_dir, 'text_model.pth'))
Net.eval()
with torch.autograd.no_grad():
    txt_train = Net(all_text_feature.to(dev))
    txt_test = Net(test_text_feature.to(dev))

In [None]:
sum_train = img_train+txt_train
sum_test = img_test+txt_test
Net, loss_list = Trainer(DECISION_MODEL(), sum_train, 300, 200, 250)
model_dir = './model/'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
torch.save(Net.state_dict(), os.path.join(model_dir, 'final_model.pth'))
Net.eval()
with torch.autograd.no_grad():
    final = Net(sum_test)

In [16]:
import pandas as pd
y_proba = final.cpu().numpy()

resl = []
for i in y_proba:
    a = [x+1 for x in range(len(i)) if i[x] > 0.5]
    for j in range(len(a)):
        if a[j] >=12:
            a[j] = a[j]+1
    resl.append(a)
test_pred = []
for lis in resl:
    a = [str(i) for i in lis]
    test_pred.append(" ".join(a))

# make a csv file
df = pd.DataFrame(columns=["ImageID", "Labels"])

# Creating the Second Dataframe using dictionary
for index, value in enumerate(test_pred):
    df_temp = pd.DataFrame({"ImageID":"{}.jpg".format(30000+index), "Labels":" ".join([str(i) for i in [value]])}, index=[0])
    # for appending df_temp at the end of df
    df = df.append(df_temp, ignore_index = True)

df.to_csv("Predicted_labels.csv", index = False)

In [17]:
df

Unnamed: 0,ImageID,Labels
0,30000.jpg,1
1,30001.jpg,1
2,30002.jpg,1
3,30003.jpg,1
4,30004.jpg,1
...,...,...
9995,39995.jpg,1
9996,39996.jpg,3 4 8
9997,39997.jpg,1
9998,39998.jpg,1
