In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
import nltk
from utils.utils import *
from utils.label_decoding import *
from utils.HierarchicalLoss import *

import torchvision.models as models

In [6]:
from torch.utils.data import Dataset
import pickle

class DataSet(Dataset):
    def __init__(self, df, labels_at_level, text_features_file, image_features_file, max_len=128):
        super(DataSet, self).__init__()
        self.data_df = df
        self.labels_at_level = labels_at_level
        # self.tokenizer = tokenizer
        self.max_len = max_len
        # self.image_folder = image_folder
        self.image_features = None
        self.text_features = None
        
        with open(image_features_file, 'rb') as f:
          self.image_features = pickle.load(f)
        
        with open(text_features_file, 'rb') as f:
            self.text_features = pickle.load(f)

    def __len__(self):
        return len(self.data_df)

    def __getitem__(self, idx):
        id = self.data_df.iloc[idx]['id']
        text = self.data_df.iloc[idx]['cleaned_text']
        image_name = self.data_df.iloc[idx]['image']
        level_1_target = self.encode(self.data_df.iloc[idx]['Level 1'], 1)
        level_2_target = self.encode(self.data_df.iloc[idx]['Level 2'], 2)
        level_3_target = self.encode(self.data_df.iloc[idx]['Level 3'], 3)
        level_4_target = self.encode(self.data_df.iloc[idx]['Level 4'], 4)
        level_5_target = self.encode(self.data_df.iloc[idx]['Level 5'], 5)

        # Tokenize text
        # encoded_input = tokenizer(text, return_tensors='pt', add_special_tokens=True, 
        #                           max_length=self.max_len, truncation=True, padding='max_length')
        # ids = inputs['input_ids']
        # mask = inputs['attention_mask']
        # token_type_ids = inputs["token_type_ids"]
        image_features = self.image_features[image_name]
        text_features = self.text_features[id]

        return {
            'id': id,
            'text': text,
            # 'image': image,
            'image_features': image_features,  
            # 'input_ids': torch.tensor(ids, dtype=torch.long),
            # 'attention_mask': torch.tensor(mask, dtype=torch.long),
            # 'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'text_features': text_features,
            'level_1_target': level_1_target,
            'level_2_target': level_2_target,
            'level_3_target': level_3_target,
            'level_4_target': level_4_target,
            'level_5_target': level_5_target
        }

    def encode(self, labels, level):
        level_ = f'Level {level}'
        target = torch.zeros(len(self.labels_at_level[level_]) + 1)
        for label in labels:
            label_idx = self.labels_at_level[level_][label]
            target[label_idx] = 1
        if len(labels) == 0:
            target[-1] = 1
        return target

In [7]:
class TestDataSet(Dataset):
    def __init__(self, df, text_features_file, image_features_file, max_len=128):
        super(TestDataSet, self).__init__()
        self.data_df = df
        
        self.image_features = None
        self.text_features = None
        self.max_len = max_len
        # self.tokenizer = tokenizer
        with open(image_features_file, 'rb') as f:
            self.image_features = pickle.load(f)
            
        with open(text_features_file, 'rb') as f:
            self.text_features = pickle.load(f)
            
    
    def __len__(self):
        return len(self.data_df)
    
    def __getitem__(self, idx):
        id = self.data_df.iloc[idx]['id']
        text = self.data_df.iloc[idx]['cleaned_text']
        image_name = self.data_df.iloc[idx]['image']
        # encoded_input = self.tokenizer(text, return_tensors='pt', add_special_tokens=True, 
        #                           max_length=self.max_len, truncation=True, padding='max_length')
        text_features = self.text_features[id]
        
        return {'id': id,
                'text': text,
                'text_features': text_features,
                'image_features': self.image_features[image_name] }

In [8]:
def evaluate_model(model, dataloader, pred_file_path, gold_file_path, 
                   evaluator_script_path, id2leaf_label, format=None, validation=False, threshold=0.3):
    model.eval()
    predictions = []
    
    HL = HierarchicalLoss(id2label=id2label_subtask_2a, hierarchical_labels=hierarchy_subtask_2a,
                        persuasion_techniques=persuasion_techniques_2a, device=device)

    total_loss = 0
    
    
    with torch.no_grad():
        
        for batch in tqdm(dataloader):
            if not isinstance(batch['id'], list):
                ids = batch['id'].detach().numpy().tolist()
            else:
                ids = batch['id']
        
            text_features = batch['text_features']
            image_features = batch['image_features']
            
            text_features = text_features.to(device)
            image_features = image_features.to(device)
            
            pred_1, pred_2, pred_3, pred_4, pred_5 = model(text_features,image_features)
            
            if validation:
                y_1, y_2, y_3 = batch['level_1_target'], batch['level_2_target'], batch['level_3_target']
                y_4, y_5 = batch['level_4_target'], batch['level_5_target']
                
                y_1, y_2, y_3, y_4, y_5 = y_1.to(device), y_2.to(device), y_3.to(device), y_4.to(device), y_5.to(device)
                
                dloss = HL.calculate_dloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
                lloss = HL.calculate_lloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
                
                total_loss += (dloss + lloss).detach().cpu().item()
                
            pred_3 = (pred_3.cpu().detach().numpy() > threshold).astype(int)
            pred_4 = (pred_4.cpu().detach().numpy() > threshold).astype(int)
            pred_5 = (pred_5.cpu().detach().numpy() > threshold).astype(int)
            
            predictions += get_labels(id2leaf_label, ids, pred_3, pred_4, pred_5, format)

        # Writing JSON data
        with open(pred_file_path, 'w') as f:
            json.dump(predictions, f, indent=4)
        
        if gold_file_path is None:
            return
            
        command = [
                "python3", evaluator_script_path,
                "--gold_file_path", gold_file_path,
                "--pred_file_path", pred_file_path
        ]
        
        result = subprocess.run(command, capture_output=True, text=True)
        
        if result.returncode == 0:
            print("Output:\n", result.stdout)
        else:
            print("Error:\n", result.stderr)
            
        if validation:
            return total_loss / len(dataloader)
    

In [9]:
train_data = process_json('./semeval2024_dev_release/subtask2a/train.json', 
                          techniques_to_level_2a, hierarchy_subtask_2a)
# val_data = 
test_data = process_json('./semeval2024_dev_release/subtask2a/validation.json',
                         techniques_to_level_2a, hierarchy_subtask_2a)

In [14]:
# from transformers import BertTokenizer, BertModel
# 
# tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

In [16]:
from torch.utils.data import DataLoader

training_dataset = DataSet(train_data, indexed_persuasion_techniques_2a, 
                           text_features_file='./TextFeatures/subtask2a/text-embedding-3-large/train_text_features.pkl',
                           image_features_file='./ImageFeatures/CLIP-ViT/train_images_features.pkl')
test_dataset = DataSet(test_data, indexed_persuasion_techniques_2a, 
                       text_features_file='./TextFeatures/subtask2a/text-embedding-3-large/validation_text_features.pkl', 
                       image_features_file='./ImageFeatures/CLIP-ViT/validation_images_features.pkl')

In [17]:
from tqdm import tqdm
import json
import subprocess

num_epochs = 100
batch_size = 128
device = get_device()
device = torch.device('cpu')
HL = HierarchicalLoss(id2label=id2label_subtask_2a, hierarchical_labels=hierarchy_subtask_2a,
                        persuasion_techniques=persuasion_techniques_2a, device=device)

train_dataloader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)


Using MPS


In [ ]:
alpha = 0.7764469620072395
batch_size = 256
beta = 0.9992511127909072
beta1 = 0.9094170903394552
learning_rate = 3.906930058023181e-05
threshold = 0.8256232754296409

In [11]:
from modules.nn.MultiModal import MultiModalBaseline

model = MultiModalBaseline(img_feature_size=512, text_feature_size=3072)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

min_val_loss = float('inf')
best_epoch = None

train_loss_history = []
val_loss_history = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, batch in enumerate(train_dataloader):
        
        y_1, y_2, y_3 = batch['level_1_target'], batch['level_2_target'], batch['level_3_target']
        y_4, y_5 = batch['level_4_target'], batch['level_5_target']
        
        
        images_features = batch['image_features']
        text_features = batch['text_features']
        
        
        y_1, y_2, y_3, y_4, y_5 = y_1.to(device), y_2.to(device), y_3.to(device), y_4.to(device), y_5.to(device)
        
        images_features = images_features.to(device)
        text_features = text_features.to(device)
        
        
        optimizer.zero_grad()
        pred_1, pred_2, pred_3, pred_4, pred_5 = model(text_features, images_features)
        # loss_ = loss(pred_1, y_1) + loss(pred_2, y_2) + loss(pred_3, y_3) + loss(pred_4, y_4) + loss(pred_5, y_5)
        
        dloss = HL.calculate_dloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
        lloss = HL.calculate_lloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])

        total_loss = lloss + dloss
        # loss_.backward()
        
        total_loss.backward()
        optimizer.step()
        
        running_loss += total_loss.detach().item()
        
        if batch_idx % 20 == 19:
            print(f"[{epoch + 1}, {batch_idx + 1}] loss: {running_loss / 20:.3f}")
            running_loss = 0.0
            
    running_loss /= len(train_dataloader)
    
    val_pred_file_path = './Predictions/val_predictions_subtask2a.json'
    val_gold_file_path = './semeval2024_dev_release/subtask2a/validation.json'
    evaluator_script = './scorer-baseline/subtask_1_2a.py'
    validation_loss = evaluate_model(model, test_dataloader, val_pred_file_path, val_gold_file_path, evaluator_script,
                   id2leaf_label_subtask_2a, validation=True)
    
    train_loss_history.append(running_loss)
    val_loss_history.append(validation_loss)
    
    if validation_loss < min_val_loss:
        min_val_loss = validation_loss
        best_epoch = epoch
        # torch.save(model.state_dict(), './models/subtask2a/best_subtask2a_baseline.pt')
    

[1, 20] loss: 1641.765
[1, 40] loss: 954.579


100%|██████████| 4/4 [00:00<00:00, 14.91it/s]


Output:
 f1_h=0.49921	prec_h=0.59915	rec_h=0.42785
[2, 20] loss: 892.742
[2, 40] loss: 892.053


100%|██████████| 4/4 [00:00<00:00, 14.52it/s]


Output:
 f1_h=0.49221	prec_h=0.62598	rec_h=0.40554
[3, 20] loss: 865.038
[3, 40] loss: 860.364


100%|██████████| 4/4 [00:00<00:00, 12.47it/s]


Output:
 f1_h=0.55529	prec_h=0.66137	rec_h=0.47854
[4, 20] loss: 820.578
[4, 40] loss: 853.731


100%|██████████| 4/4 [00:00<00:00, 12.94it/s]


Output:
 f1_h=0.57222	prec_h=0.68488	rec_h=0.49138
[5, 20] loss: 815.729
[5, 40] loss: 846.259


100%|██████████| 4/4 [00:00<00:00, 12.62it/s]


Output:
 f1_h=0.60509	prec_h=0.69692	rec_h=0.53464
[6, 20] loss: 834.457
[6, 40] loss: 801.933


100%|██████████| 4/4 [00:00<00:00, 13.49it/s]


Output:
 f1_h=0.62243	prec_h=0.68924	rec_h=0.56742
[7, 20] loss: 793.913
[7, 40] loss: 769.774


100%|██████████| 4/4 [00:00<00:00, 12.19it/s]


Output:
 f1_h=0.65233	prec_h=0.64841	rec_h=0.65630
[8, 20] loss: 727.584
[8, 40] loss: 758.582


100%|██████████| 4/4 [00:00<00:00, 13.95it/s]


Output:
 f1_h=0.63976	prec_h=0.67175	rec_h=0.61068
[9, 20] loss: 724.997
[9, 40] loss: 701.412


100%|██████████| 4/4 [00:00<00:00, 12.50it/s]


Output:
 f1_h=0.65489	prec_h=0.67152	rec_h=0.63907
[10, 20] loss: 697.628
[10, 40] loss: 675.338


100%|██████████| 4/4 [00:00<00:00, 13.72it/s]


Output:
 f1_h=0.66385	prec_h=0.66329	rec_h=0.66441
[11, 20] loss: 656.854
[11, 40] loss: 645.354


100%|██████████| 4/4 [00:00<00:00, 14.16it/s]


Output:
 f1_h=0.66101	prec_h=0.67767	rec_h=0.64515
[12, 20] loss: 624.867
[12, 40] loss: 642.564


100%|██████████| 4/4 [00:00<00:00, 13.86it/s]


Output:
 f1_h=0.67166	prec_h=0.69303	rec_h=0.65157
[13, 20] loss: 586.986
[13, 40] loss: 603.564


100%|██████████| 4/4 [00:00<00:00, 13.78it/s]


Output:
 f1_h=0.66450	prec_h=0.67220	rec_h=0.65698
[14, 20] loss: 559.670
[14, 40] loss: 564.239


100%|██████████| 4/4 [00:00<00:00, 14.56it/s]


Output:
 f1_h=0.66585	prec_h=0.68602	rec_h=0.64684
[15, 20] loss: 539.025
[15, 40] loss: 537.700


100%|██████████| 4/4 [00:00<00:00, 13.23it/s]


Output:
 f1_h=0.66846	prec_h=0.66678	rec_h=0.67016
[16, 20] loss: 506.308
[16, 40] loss: 508.175


100%|██████████| 4/4 [00:00<00:00, 13.04it/s]


Output:
 f1_h=0.66837	prec_h=0.67482	rec_h=0.66205
[17, 20] loss: 494.976
[17, 40] loss: 500.067


100%|██████████| 4/4 [00:00<00:00, 14.50it/s]


Output:
 f1_h=0.66982	prec_h=0.69646	rec_h=0.64515
[18, 20] loss: 477.046
[18, 40] loss: 476.080


100%|██████████| 4/4 [00:00<00:00, 14.42it/s]


Output:
 f1_h=0.66988	prec_h=0.68330	rec_h=0.65698
[19, 20] loss: 443.888
[19, 40] loss: 450.497


100%|██████████| 4/4 [00:00<00:00, 15.98it/s]


Output:
 f1_h=0.67185	prec_h=0.68779	rec_h=0.65664
[20, 20] loss: 431.879
[20, 40] loss: 427.667


100%|██████████| 4/4 [00:00<00:00, 15.08it/s]


Output:
 f1_h=0.66435	prec_h=0.68209	rec_h=0.64752
[21, 20] loss: 422.963
[21, 40] loss: 405.989


100%|██████████| 4/4 [00:00<00:00, 14.40it/s]


Output:
 f1_h=0.67034	prec_h=0.68427	rec_h=0.65698
[22, 20] loss: 392.854
[22, 40] loss: 407.337


100%|██████████| 4/4 [00:00<00:00, 14.99it/s]


Output:
 f1_h=0.66620	prec_h=0.69653	rec_h=0.63839
[23, 20] loss: 383.114
[23, 40] loss: 379.535


100%|██████████| 4/4 [00:00<00:00, 12.78it/s]


Output:
 f1_h=0.66667	prec_h=0.69756	rec_h=0.63839
[24, 20] loss: 364.112
[24, 40] loss: 349.014


100%|██████████| 4/4 [00:00<00:00, 14.09it/s]


Output:
 f1_h=0.67190	prec_h=0.69430	rec_h=0.65090
[25, 20] loss: 340.486
[25, 40] loss: 332.704


100%|██████████| 4/4 [00:00<00:00, 13.84it/s]


Output:
 f1_h=0.66326	prec_h=0.69373	rec_h=0.63535
[26, 20] loss: 315.596
[26, 40] loss: 313.966


100%|██████████| 4/4 [00:00<00:00, 14.88it/s]


Output:
 f1_h=0.66434	prec_h=0.68510	rec_h=0.64481
[27, 20] loss: 292.769
[27, 40] loss: 308.671


100%|██████████| 4/4 [00:00<00:00, 12.45it/s]


Output:
 f1_h=0.66725	prec_h=0.68632	rec_h=0.64921
[28, 20] loss: 262.421
[28, 40] loss: 291.295


100%|██████████| 4/4 [00:00<00:00, 14.12it/s]


Output:
 f1_h=0.65957	prec_h=0.69417	rec_h=0.62825
[29, 20] loss: 253.465
[29, 40] loss: 257.045


100%|██████████| 4/4 [00:00<00:00, 13.37it/s]


Output:
 f1_h=0.66538	prec_h=0.69276	rec_h=0.64008
[30, 20] loss: 235.219
[30, 40] loss: 239.395


100%|██████████| 4/4 [00:00<00:00, 14.57it/s]


Output:
 f1_h=0.66481	prec_h=0.68457	rec_h=0.64616
[31, 20] loss: 234.883
[31, 40] loss: 222.877


100%|██████████| 4/4 [00:00<00:00, 14.00it/s]


Output:
 f1_h=0.66063	prec_h=0.69611	rec_h=0.62859
[32, 20] loss: 186.174
[32, 40] loss: 201.887


100%|██████████| 4/4 [00:00<00:00, 13.86it/s]


Output:
 f1_h=0.66183	prec_h=0.67679	rec_h=0.64752
[33, 20] loss: 189.324
[33, 40] loss: 183.221


100%|██████████| 4/4 [00:00<00:00, 14.69it/s]


Output:
 f1_h=0.66045	prec_h=0.68920	rec_h=0.63400
[34, 20] loss: 166.176
[34, 40] loss: 166.323


100%|██████████| 4/4 [00:00<00:00, 13.94it/s]


Output:
 f1_h=0.66461	prec_h=0.67420	rec_h=0.65529
[35, 20] loss: 155.810
[35, 40] loss: 157.448


100%|██████████| 4/4 [00:00<00:00, 14.19it/s]


Output:
 f1_h=0.66077	prec_h=0.69354	rec_h=0.63096
[36, 20] loss: 139.777
[36, 40] loss: 153.382


100%|██████████| 4/4 [00:00<00:00, 13.02it/s]


Output:
 f1_h=0.66000	prec_h=0.68663	rec_h=0.63535
[37, 20] loss: 138.288
[37, 40] loss: 130.601


100%|██████████| 4/4 [00:00<00:00, 14.99it/s]


Output:
 f1_h=0.66025	prec_h=0.69819	rec_h=0.62623
[38, 20] loss: 117.766
[38, 40] loss: 123.672


100%|██████████| 4/4 [00:00<00:00, 12.25it/s]


Output:
 f1_h=0.66177	prec_h=0.68614	rec_h=0.63907
[39, 20] loss: 114.931
[39, 40] loss: 120.536


100%|██████████| 4/4 [00:00<00:00, 12.88it/s]


Output:
 f1_h=0.65934	prec_h=0.68170	rec_h=0.63839
[40, 20] loss: 103.980
[40, 40] loss: 117.067


100%|██████████| 4/4 [00:00<00:00, 13.47it/s]


Output:
 f1_h=0.65963	prec_h=0.67589	rec_h=0.64414
[41, 20] loss: 102.103
[41, 40] loss: 103.594


100%|██████████| 4/4 [00:00<00:00, 13.05it/s]


Output:
 f1_h=0.65811	prec_h=0.70762	rec_h=0.61507
[42, 20] loss: 98.776
[42, 40] loss: 95.356


100%|██████████| 4/4 [00:00<00:00, 14.72it/s]


Output:
 f1_h=0.65968	prec_h=0.70901	rec_h=0.61676
[43, 20] loss: 86.486
[43, 40] loss: 99.713


100%|██████████| 4/4 [00:00<00:00, 14.11it/s]


Output:
 f1_h=0.65491	prec_h=0.70027	rec_h=0.61507
[44, 20] loss: 82.998
[44, 40] loss: 86.484


100%|██████████| 4/4 [00:00<00:00, 15.16it/s]


Output:
 f1_h=0.65913	prec_h=0.69034	rec_h=0.63062
[45, 20] loss: 79.975
[45, 40] loss: 81.302


100%|██████████| 4/4 [00:00<00:00, 13.87it/s]


Output:
 f1_h=0.65777	prec_h=0.69433	rec_h=0.62487
[46, 20] loss: 74.923
[46, 40] loss: 74.243


100%|██████████| 4/4 [00:00<00:00, 13.17it/s]


Output:
 f1_h=0.66068	prec_h=0.69010	rec_h=0.63366
[47, 20] loss: 71.639
[47, 40] loss: 68.933


100%|██████████| 4/4 [00:00<00:00, 14.39it/s]


Output:
 f1_h=0.65740	prec_h=0.69518	rec_h=0.62352
[48, 20] loss: 66.226
[48, 40] loss: 60.743


100%|██████████| 4/4 [00:00<00:00, 13.30it/s]


Output:
 f1_h=0.65746	prec_h=0.70000	rec_h=0.61980
[49, 20] loss: 51.285
[49, 40] loss: 63.672


100%|██████████| 4/4 [00:00<00:00, 14.53it/s]


Output:
 f1_h=0.66162	prec_h=0.69016	rec_h=0.63535
[50, 20] loss: 56.189
[50, 40] loss: 59.255


100%|██████████| 4/4 [00:00<00:00, 14.30it/s]


Output:
 f1_h=0.65383	prec_h=0.69912	rec_h=0.61406
[51, 20] loss: 53.580
[51, 40] loss: 55.988


100%|██████████| 4/4 [00:00<00:00, 13.07it/s]


Output:
 f1_h=0.65777	prec_h=0.70550	rec_h=0.61609
[52, 20] loss: 47.822
[52, 40] loss: 54.133


100%|██████████| 4/4 [00:00<00:00, 14.84it/s]


Output:
 f1_h=0.65479	prec_h=0.70000	rec_h=0.61507
[53, 20] loss: 44.962
[53, 40] loss: 45.223


100%|██████████| 4/4 [00:00<00:00, 13.65it/s]


Output:
 f1_h=0.66024	prec_h=0.70027	rec_h=0.62454
[54, 20] loss: 39.641
[54, 40] loss: 44.902


100%|██████████| 4/4 [00:00<00:00, 14.75it/s]


Output:
 f1_h=0.65885	prec_h=0.69464	rec_h=0.62656
[55, 20] loss: 37.874
[55, 40] loss: 38.525


100%|██████████| 4/4 [00:00<00:00, 14.24it/s]


Output:
 f1_h=0.65649	prec_h=0.69147	rec_h=0.62487
[56, 20] loss: 39.497
[56, 40] loss: 40.447


100%|██████████| 4/4 [00:00<00:00, 13.93it/s]


Output:
 f1_h=0.65785	prec_h=0.68794	rec_h=0.63028
[57, 20] loss: 31.977
[57, 40] loss: 36.567


100%|██████████| 4/4 [00:00<00:00, 14.95it/s]


Output:
 f1_h=0.65956	prec_h=0.70874	rec_h=0.61676
[58, 20] loss: 32.609
[58, 40] loss: 32.104


100%|██████████| 4/4 [00:00<00:00, 14.96it/s]


Output:
 f1_h=0.65761	prec_h=0.69776	rec_h=0.62183
[59, 20] loss: 30.387
[59, 40] loss: 27.538


100%|██████████| 4/4 [00:00<00:00, 14.01it/s]


Output:
 f1_h=0.65517	prec_h=0.69912	rec_h=0.61642
[60, 20] loss: 22.367
[60, 40] loss: 25.765


100%|██████████| 4/4 [00:00<00:00, 13.16it/s]


Output:
 f1_h=0.64902	prec_h=0.68401	rec_h=0.61744
[61, 20] loss: 22.885
[61, 40] loss: 23.351


100%|██████████| 4/4 [00:00<00:00, 14.29it/s]


Output:
 f1_h=0.66074	prec_h=0.68201	rec_h=0.64076
[62, 20] loss: 17.998
[62, 40] loss: 24.555


100%|██████████| 4/4 [00:00<00:00, 14.45it/s]


Output:
 f1_h=0.65776	prec_h=0.70636	rec_h=0.61541
[63, 20] loss: 18.863
[63, 40] loss: 18.495


100%|██████████| 4/4 [00:00<00:00, 12.58it/s]


Output:
 f1_h=0.65782	prec_h=0.70080	rec_h=0.61980
[64, 20] loss: 15.044
[64, 40] loss: 19.962


100%|██████████| 4/4 [00:00<00:00, 13.22it/s]


Output:
 f1_h=0.65674	prec_h=0.69079	rec_h=0.62589
[65, 20] loss: 17.300
[65, 40] loss: 18.443


100%|██████████| 4/4 [00:00<00:00, 13.50it/s]


Output:
 f1_h=0.65080	prec_h=0.69178	rec_h=0.61440
[66, 20] loss: 14.292
[66, 40] loss: 17.737


100%|██████████| 4/4 [00:00<00:00, 13.66it/s]


Output:
 f1_h=0.65720	prec_h=0.68294	rec_h=0.63332
[67, 20] loss: 13.310
[67, 40] loss: 14.305


100%|██████████| 4/4 [00:00<00:00, 13.77it/s]


Output:
 f1_h=0.65786	prec_h=0.70838	rec_h=0.61406
[68, 20] loss: 8.419
[68, 40] loss: 13.230


100%|██████████| 4/4 [00:00<00:00, 11.41it/s]


Output:
 f1_h=0.65996	prec_h=0.70437	rec_h=0.62082
[69, 20] loss: 9.139
[69, 40] loss: 11.341


100%|██████████| 4/4 [00:00<00:00, 12.38it/s]


Output:
 f1_h=0.64889	prec_h=0.68455	rec_h=0.61676
[70, 20] loss: 8.012
[70, 40] loss: 13.074


100%|██████████| 4/4 [00:00<00:00, 14.02it/s]


Output:
 f1_h=0.65173	prec_h=0.71388	rec_h=0.59953
[71, 20] loss: 8.805
[71, 40] loss: 12.132


100%|██████████| 4/4 [00:00<00:00, 12.98it/s]


Output:
 f1_h=0.65462	prec_h=0.70357	rec_h=0.61203
[72, 20] loss: 10.635
[72, 40] loss: 10.020


100%|██████████| 4/4 [00:00<00:00, 14.49it/s]


Output:
 f1_h=0.65626	prec_h=0.69985	rec_h=0.61778
[73, 20] loss: 10.164
[73, 40] loss: 11.556


100%|██████████| 4/4 [00:00<00:00, 12.99it/s]


Output:
 f1_h=0.65556	prec_h=0.69783	rec_h=0.61811
[74, 20] loss: 6.649
[74, 40] loss: 9.078


100%|██████████| 4/4 [00:00<00:00, 12.99it/s]


Output:
 f1_h=0.65361	prec_h=0.68390	rec_h=0.62589
[75, 20] loss: 6.713
[75, 40] loss: 9.022


100%|██████████| 4/4 [00:00<00:00, 13.46it/s]


Output:
 f1_h=0.65301	prec_h=0.69040	rec_h=0.61947
[76, 20] loss: 8.314
[76, 40] loss: 6.490


100%|██████████| 4/4 [00:00<00:00, 13.81it/s]


Output:
 f1_h=0.65412	prec_h=0.70378	rec_h=0.61102
[77, 20] loss: 11.551
[77, 40] loss: 7.498


100%|██████████| 4/4 [00:00<00:00, 13.84it/s]


Output:
 f1_h=0.65354	prec_h=0.69456	rec_h=0.61710
[78, 20] loss: 7.192
[78, 40] loss: 8.645


100%|██████████| 4/4 [00:00<00:00, 13.80it/s]


Output:
 f1_h=0.65632	prec_h=0.70438	rec_h=0.61440
[79, 20] loss: 6.006
[79, 40] loss: 11.322


100%|██████████| 4/4 [00:00<00:00, 14.41it/s]


Output:
 f1_h=0.65770	prec_h=0.70053	rec_h=0.61980
[80, 20] loss: 5.344
[80, 40] loss: 6.540


100%|██████████| 4/4 [00:00<00:00, 11.72it/s]


Output:
 f1_h=0.65497	prec_h=0.68089	rec_h=0.63096
[81, 20] loss: 5.570
[81, 40] loss: 8.752


100%|██████████| 4/4 [00:00<00:00, 13.98it/s]


Output:
 f1_h=0.65612	prec_h=0.71024	rec_h=0.60967
[82, 20] loss: 5.254
[82, 40] loss: 7.006


100%|██████████| 4/4 [00:00<00:00, 13.88it/s]


Output:
 f1_h=0.65881	prec_h=0.69833	rec_h=0.62352
[83, 20] loss: 6.894
[83, 40] loss: 6.137


100%|██████████| 4/4 [00:00<00:00, 12.18it/s]


Output:
 f1_h=0.65542	prec_h=0.70724	rec_h=0.61068
[84, 20] loss: 4.959
[84, 40] loss: 8.624


100%|██████████| 4/4 [00:00<00:00, 13.84it/s]


Output:
 f1_h=0.65546	prec_h=0.70462	rec_h=0.61271
[85, 20] loss: 5.530
[85, 40] loss: 5.317


100%|██████████| 4/4 [00:00<00:00, 13.34it/s]


Output:
 f1_h=0.65711	prec_h=0.69920	rec_h=0.61980
[86, 20] loss: 6.807
[86, 40] loss: 6.125


100%|██████████| 4/4 [00:00<00:00, 14.36it/s]


Output:
 f1_h=0.65348	prec_h=0.70544	rec_h=0.60865
[87, 20] loss: 3.804
[87, 40] loss: 5.794


100%|██████████| 4/4 [00:00<00:00, 13.35it/s]


Output:
 f1_h=0.65273	prec_h=0.70641	rec_h=0.60662
[88, 20] loss: 5.444
[88, 40] loss: 6.100


100%|██████████| 4/4 [00:00<00:00, 12.02it/s]


Output:
 f1_h=0.65150	prec_h=0.70584	rec_h=0.60493
[89, 20] loss: 6.621
[89, 40] loss: 5.014


100%|██████████| 4/4 [00:00<00:00, 12.66it/s]


Output:
 f1_h=0.65307	prec_h=0.70089	rec_h=0.61136
[90, 20] loss: 4.459
[90, 40] loss: 6.822


100%|██████████| 4/4 [00:00<00:00, 13.27it/s]


Output:
 f1_h=0.65566	prec_h=0.70779	rec_h=0.61068
[91, 20] loss: 5.030
[91, 40] loss: 5.455


100%|██████████| 4/4 [00:00<00:00, 12.60it/s]


Output:
 f1_h=0.65464	prec_h=0.70186	rec_h=0.61338
[92, 20] loss: 3.872
[92, 40] loss: 7.176


100%|██████████| 4/4 [00:00<00:00, 13.24it/s]


Output:
 f1_h=0.65265	prec_h=0.71852	rec_h=0.59784
[93, 20] loss: 4.300
[93, 40] loss: 8.260


100%|██████████| 4/4 [00:00<00:00, 12.57it/s]


Output:
 f1_h=0.65995	prec_h=0.69174	rec_h=0.63096
[94, 20] loss: 5.730
[94, 40] loss: 4.850


100%|██████████| 4/4 [00:00<00:00, 14.07it/s]


Output:
 f1_h=0.66334	prec_h=0.70090	rec_h=0.62960
[95, 20] loss: 3.948
[95, 40] loss: 6.144


100%|██████████| 4/4 [00:00<00:00, 13.70it/s]


Output:
 f1_h=0.65486	prec_h=0.70369	rec_h=0.61237
[96, 20] loss: 8.007
[96, 40] loss: 8.058


100%|██████████| 4/4 [00:00<00:00, 12.68it/s]


Output:
 f1_h=0.65530	prec_h=0.69049	rec_h=0.62352
[97, 20] loss: 5.532
[97, 40] loss: 7.102


100%|██████████| 4/4 [00:00<00:00, 13.29it/s]


Output:
 f1_h=0.65490	prec_h=0.70922	rec_h=0.60831
[98, 20] loss: 6.127
[98, 40] loss: 4.391


100%|██████████| 4/4 [00:00<00:00, 13.82it/s]


Output:
 f1_h=0.65757	prec_h=0.70111	rec_h=0.61913
[99, 20] loss: 7.675
[99, 40] loss: 4.862


100%|██████████| 4/4 [00:00<00:00, 12.48it/s]


Output:
 f1_h=0.65478	prec_h=0.69268	rec_h=0.62082
[100, 20] loss: 6.311
[100, 40] loss: 5.290


100%|██████████| 4/4 [00:00<00:00, 14.54it/s]


Output:
 f1_h=0.65503	prec_h=0.70054	rec_h=0.61507


In [12]:
torch.save(model.state_dict(), './models/subtask2a/ViT-OpenAI-Large/vit_openai_large.pt')

In [33]:
model.load_state_dict(torch.load('./models/subtask2a/best_subtask2a_baseline.pt'))

<All keys matched successfully>

In [34]:
model.load_state_dict(torch.load('./models/subtask2a/subtask_2a_multilingual_baseline.pt'))

<All keys matched successfully>

### Evaluation

In [18]:
from tqdm import tqdm
import json
import subprocess
from torch.utils.data import DataLoader

### OpenAI Small + CLIP

In [19]:
from modules.nn.MultiModal import MultiModalBaseline

model = MultiModalBaseline(512, 1536)
model.load_state_dict(torch.load('models/subtask2a/MultiModal-OpenAI-Small/splendid-sweep-4.pth'))

<All keys matched successfully>

In [20]:
ar_pred_file_path = './Predictions/subtask2a/ar_predictions_subtask2a.txt'
ar_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_ar.json'
evaluator_script = './scorer-baseline/subtask_1_2a.py'

ar_test_data = process_json(ar_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
ar_test_dataset = TestDataSet(df=ar_test_data, 
                          text_features_file='./TextFeatures/subtask2a/text-embedding-3-small/ar_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/ar_test_images_features.pkl')

ar_test_dataloader =  DataLoader(ar_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, ar_test_dataloader, ar_pred_file_path, ar_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, format=5, validation=False, threshold=0.3)

100%|██████████| 2/2 [00:00<00:00, 50.73it/s]


Output:
 f1_h=0.52780	prec_h=0.47657	rec_h=0.59136


In [21]:
bg_pred_file_path = './Predictions/subtask2a/bg_predictions_subtask2a.txt'
bg_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_bg.json'

bg_test_data = process_json(bg_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
bg_test_dataset = DataSet(bg_test_data, indexed_persuasion_techniques_2a, 
                           text_features_file='./TextFeatures/subtask2a/text-embedding-3-small/bg_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/bulgarian_test_images_features.pkl')

bg_test_dataloader = DataLoader(bg_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, bg_test_dataloader, bg_pred_file_path, bg_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

100%|██████████| 7/7 [00:00<00:00, 43.03it/s]


Output:
 f1_h=0.64443	prec_h=0.64928	rec_h=0.63966


In [22]:

md_pred_file_path = './Predictions/subtask2a/md_predictions_subtask2a.txt'
md_gold_file_path = 'test_labels_ar_bg_md_version2/test_subtask2a_md.json'

md_test_data = process_json(md_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
md_test_dataset = DataSet(md_test_data, indexed_persuasion_techniques_2a, 
                          text_features_file='./TextFeatures/subtask2a/text-embedding-3-small/md_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/nm_test_images_features.pkl')

md_test_dataloader = DataLoader(md_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, md_test_dataloader, md_pred_file_path, md_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

100%|██████████| 5/5 [00:00<00:00, 44.21it/s]


Output:
 f1_h=0.67714	prec_h=0.71721	rec_h=0.64130


In [23]:
en_pred_file_path = './Predictions/subtask2a/en_predictions_subtask2a.txt'

eng_test_data = process_test_json('./test_data/english/en_subtask2a_test_unlabeled.json')

eng_test_dataset = TestDataSet(eng_test_data, 
                               './TextFeatures/subtask2a/text-embedding-3-small/en_test_text_features.pkl',
                               './ImageFeatures/CLIP-ViT/english_test_images_features.pkl')
eng_test_dataloader = DataLoader(eng_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, eng_test_dataloader, en_pred_file_path, None, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.4)

100%|██████████| 24/24 [00:00<00:00, 59.39it/s]


0.64640	0.76090	0.56184

### OpenAI Large + CLIP

In [24]:
from modules.nn.MultiModal import MultiModalBaseline

model = MultiModalBaseline(512, 3072)
model.load_state_dict(torch.load('models/subtask2a/MultiModal-OpenAI-Large/fresh-sweep-3.pth'))

<All keys matched successfully>

In [25]:
ar_pred_file_path = './Predictions/subtask2a/ar_predictions_subtask2a.txt'
ar_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_ar.json'
evaluator_script = './scorer-baseline/subtask_1_2a.py'

ar_test_data = process_json(ar_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
ar_test_dataset = TestDataSet(df=ar_test_data, 
                          text_features_file='./TextFeatures/subtask2a/text-embedding-3-large/ar_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/ar_test_images_features.pkl')

ar_test_dataloader =  DataLoader(ar_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, ar_test_dataloader, ar_pred_file_path, ar_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, format=5, validation=False, threshold=0.3)

100%|██████████| 2/2 [00:00<00:00, 41.44it/s]


Output:
 f1_h=0.52197	prec_h=0.44014	rec_h=0.64120


In [26]:
bg_pred_file_path = './Predictions/subtask2a/bg_predictions_subtask2a.txt'
bg_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_bg.json'

bg_test_data = process_json(bg_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
bg_test_dataset = DataSet(bg_test_data, indexed_persuasion_techniques_2a, 
                           text_features_file='./TextFeatures/subtask2a/text-embedding-3-large/bg_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/bulgarian_test_images_features.pkl')

bg_test_dataloader = DataLoader(bg_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, bg_test_dataloader, bg_pred_file_path, bg_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

100%|██████████| 7/7 [00:00<00:00, 37.81it/s]


Output:
 f1_h=0.65849	prec_h=0.64199	rec_h=0.67586


In [27]:

md_pred_file_path = './Predictions/subtask2a/md_predictions_subtask2a.txt'
md_gold_file_path = 'test_labels_ar_bg_md_version2/test_subtask2a_md.json'

md_test_data = process_json(md_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
md_test_dataset = DataSet(md_test_data, indexed_persuasion_techniques_2a, 
                          text_features_file='./TextFeatures/subtask2a/text-embedding-3-large/md_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/nm_test_images_features.pkl')

md_test_dataloader = DataLoader(md_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, md_test_dataloader, md_pred_file_path, md_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

100%|██████████| 5/5 [00:00<00:00, 35.83it/s]


Output:
 f1_h=0.68926	prec_h=0.73154	rec_h=0.65160


In [28]:
en_pred_file_path = './Predictions/subtask2a/en_predictions_subtask2a.txt'

eng_test_data = process_test_json('./test_data/english/en_subtask2a_test_unlabeled.json')

eng_test_dataset = TestDataSet(eng_test_data, 
                               './TextFeatures/subtask2a/text-embedding-3-large/en_test_text_features.pkl',
                               './ImageFeatures/CLIP-ViT/english_test_images_features.pkl')
eng_test_dataloader = DataLoader(eng_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, eng_test_dataloader, en_pred_file_path, None, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.4)

100%|██████████| 24/24 [00:00<00:00, 46.77it/s]


0.65305	0.76524	0.56955