In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
import nltk
from utils.utils import *
from utils.label_decoding import *
from utils.HierarchicalLoss import *

import torchvision.models as models

In [2]:
from torch.utils.data import Dataset
import pickle

class DataSet(Dataset):
    def __init__(self, df, labels_at_level, text_features_file, image_features_file, max_len=128):
        super(DataSet, self).__init__()
        self.data_df = df
        self.labels_at_level = labels_at_level
        # self.tokenizer = tokenizer
        self.max_len = max_len
        # self.image_folder = image_folder
        self.image_features = None
        self.text_features = None
        
        with open(image_features_file, 'rb') as f:
          self.image_features = pickle.load(f)
        
        with open(text_features_file, 'rb') as f:
            self.text_features = pickle.load(f)

    def __len__(self):
        return len(self.data_df)

    def __getitem__(self, idx):
        id = self.data_df.iloc[idx]['id']
        text = self.data_df.iloc[idx]['cleaned_text']
        image_name = self.data_df.iloc[idx]['image']
        level_1_target = self.encode(self.data_df.iloc[idx]['Level 1'], 1)
        level_2_target = self.encode(self.data_df.iloc[idx]['Level 2'], 2)
        level_3_target = self.encode(self.data_df.iloc[idx]['Level 3'], 3)
        level_4_target = self.encode(self.data_df.iloc[idx]['Level 4'], 4)
        level_5_target = self.encode(self.data_df.iloc[idx]['Level 5'], 5)

        # Tokenize text
        # encoded_input = tokenizer(text, return_tensors='pt', add_special_tokens=True, 
        #                           max_length=self.max_len, truncation=True, padding='max_length')
        # ids = inputs['input_ids']
        # mask = inputs['attention_mask']
        # token_type_ids = inputs["token_type_ids"]
        image_features = self.image_features[image_name]
        text_features = self.text_features[id]

        return {
            'id': id,
            'text': text,
            # 'image': image,
            'image_features': image_features,  
            # 'input_ids': torch.tensor(ids, dtype=torch.long),
            # 'attention_mask': torch.tensor(mask, dtype=torch.long),
            # 'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'text_features': text_features,
            'level_1_target': level_1_target,
            'level_2_target': level_2_target,
            'level_3_target': level_3_target,
            'level_4_target': level_4_target,
            'level_5_target': level_5_target
        }

    def encode(self, labels, level):
        level_ = f'Level {level}'
        target = torch.zeros(len(self.labels_at_level[level_]) + 1)
        for label in labels:
            label_idx = self.labels_at_level[level_][label]
            target[label_idx] = 1
        if len(labels) == 0:
            target[-1] = 1
        return target

In [3]:
class TestDataSet(Dataset):
    def __init__(self, df, text_features_file, image_features_file, max_len=128):
        super(TestDataSet, self).__init__()
        self.data_df = df
        
        self.image_features = None
        self.text_features = None
        self.max_len = max_len
        # self.tokenizer = tokenizer
        with open(image_features_file, 'rb') as f:
            self.image_features = pickle.load(f)
            
        with open(text_features_file, 'rb') as f:
            self.text_features = pickle.load(f)
            
    
    def __len__(self):
        return len(self.data_df)
    
    def __getitem__(self, idx):
        id = self.data_df.iloc[idx]['id']
        text = self.data_df.iloc[idx]['cleaned_text']
        image_name = self.data_df.iloc[idx]['image']
        # encoded_input = self.tokenizer(text, return_tensors='pt', add_special_tokens=True, 
        #                           max_length=self.max_len, truncation=True, padding='max_length')
        text_features = self.text_features[id]
        
        return {'id': id,
                'text': text,
                'text_features': text_features,
                'image_features': self.image_features[image_name] }

In [4]:
def evaluate_model(model, dataloader, pred_file_path, gold_file_path, 
                   evaluator_script_path, id2leaf_label, format=None, validation=False, threshold=0.3):
    model.eval()
    predictions = []
    
    HL = HierarchicalLoss(id2label=id2label_subtask_2a, hierarchical_labels=hierarchy_subtask_2a,
                        persuasion_techniques=persuasion_techniques_2a, device=device)

    total_loss = 0
    
    
    with torch.no_grad():
        
        for batch in tqdm(dataloader):
            if not isinstance(batch['id'], list):
                ids = batch['id'].detach().numpy().tolist()
            else:
                ids = batch['id']
        
            text_features = batch['text_features']
            image_features = batch['image_features']
            
            text_features = text_features.to(device)
            image_features = image_features.to(device)
            
            pred_1, pred_2, pred_3, pred_4, pred_5 = model(text_features,image_features)
            
            if validation:
                y_1, y_2, y_3 = batch['level_1_target'], batch['level_2_target'], batch['level_3_target']
                y_4, y_5 = batch['level_4_target'], batch['level_5_target']
                
                y_1, y_2, y_3, y_4, y_5 = y_1.to(device), y_2.to(device), y_3.to(device), y_4.to(device), y_5.to(device)
                
                dloss = HL.calculate_dloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
                lloss = HL.calculate_lloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
                
                total_loss += (dloss + lloss).detach().cpu().item()
                
            pred_3 = (pred_3.cpu().detach().numpy() > threshold).astype(int)
            pred_4 = (pred_4.cpu().detach().numpy() > threshold).astype(int)
            pred_5 = (pred_5.cpu().detach().numpy() > threshold).astype(int)
            
            predictions += get_labels(id2leaf_label, ids, pred_3, pred_4, pred_5, format)

        # Writing JSON data
        with open(pred_file_path, 'w') as f:
            json.dump(predictions, f, indent=4)
        
        if gold_file_path is None:
            return
            
        command = [
                "python3", evaluator_script_path,
                "--gold_file_path", gold_file_path,
                "--pred_file_path", pred_file_path
        ]
        
        result = subprocess.run(command, capture_output=True, text=True)
        
        if result.returncode == 0:
            print("Output:\n", result.stdout)
        else:
            print("Error:\n", result.stderr)
            
        if validation:
            return total_loss / len(dataloader)
    

In [5]:
train_data = process_json('./semeval2024_dev_release/subtask2a/train.json', 
                          techniques_to_level_2a, hierarchy_subtask_2a)
# val_data = 
test_data = process_json('./semeval2024_dev_release/subtask2a/validation.json',
                         techniques_to_level_2a, hierarchy_subtask_2a)

In [6]:
# from transformers import BertTokenizer, BertModel
# 
# tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

In [7]:
from torch.utils.data import DataLoader

training_dataset = DataSet(train_data, indexed_persuasion_techniques_2a, 
                           text_features_file='./TextFeatures/subtask2a/mBERT/train_text_features.pkl',
                           image_features_file='./ImageFeatures/ResNet/train_images_features.pkl')
test_dataset = DataSet(test_data, indexed_persuasion_techniques_2a, 
                       text_features_file='./TextFeatures/subtask2a/mBERT/validation_text_features.pkl', 
                       image_features_file='./ImageFeatures/ResNet/validation_images_features.pkl')

In [8]:
from tqdm import tqdm
import json
import subprocess

num_epochs = 100
batch_size = 128
device = get_device()
device = torch.device('cpu')
HL = HierarchicalLoss(id2label=id2label_subtask_2a, hierarchical_labels=hierarchy_subtask_2a,
                        persuasion_techniques=persuasion_techniques_2a, device=device)

train_dataloader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)


Using MPS


In [9]:
alpha = 0.7764469620072395
batch_size = 256
beta = 0.9992511127909072
beta1 = 0.9094170903394552
learning_rate = 3.906930058023181e-05
threshold = 0.8256232754296409

In [15]:
from modules.nn.MultiModal import MultiModalBaseline

model = MultiModalBaseline(img_feature_size=2048, text_feature_size=768)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

min_val_loss = float('inf')
best_epoch = None

train_loss_history = []
val_loss_history = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, batch in enumerate(train_dataloader):
        
        y_1, y_2, y_3 = batch['level_1_target'], batch['level_2_target'], batch['level_3_target']
        y_4, y_5 = batch['level_4_target'], batch['level_5_target']
        
        
        images_features = batch['image_features']
        text_features = batch['text_features']
        
        
        y_1, y_2, y_3, y_4, y_5 = y_1.to(device), y_2.to(device), y_3.to(device), y_4.to(device), y_5.to(device)
        
        images_features = images_features.to(device)
        text_features = text_features.to(device)
        
        
        optimizer.zero_grad()
        pred_1, pred_2, pred_3, pred_4, pred_5 = model(text_features, images_features)
        # loss_ = loss(pred_1, y_1) + loss(pred_2, y_2) + loss(pred_3, y_3) + loss(pred_4, y_4) + loss(pred_5, y_5)
        
        dloss = HL.calculate_dloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
        lloss = HL.calculate_lloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])

        total_loss = lloss + dloss
        # loss_.backward()
        
        total_loss.backward()
        optimizer.step()
        
        running_loss += total_loss.detach().item()
        
        if batch_idx % 20 == 19:
            print(f"[{epoch + 1}, {batch_idx + 1}] loss: {running_loss / 20:.3f}")
            running_loss = 0.0
            
    running_loss /= len(train_dataloader)
    
    val_pred_file_path = './Predictions/val_predictions_subtask2a.json'
    val_gold_file_path = './semeval2024_dev_release/subtask2a/validation.json'
    evaluator_script = './scorer-baseline/subtask_1_2a.py'
    validation_loss = evaluate_model(model, test_dataloader, val_pred_file_path, val_gold_file_path, evaluator_script,
                   id2leaf_label_subtask_2a, validation=True)
    
    train_loss_history.append(running_loss)
    val_loss_history.append(validation_loss)
    
    if validation_loss < min_val_loss:
        min_val_loss = validation_loss
        best_epoch = epoch
        # torch.save(model.state_dict(), './models/subtask2a/best_subtask2a_baseline.pt')
    

[1, 20] loss: 946.711
[1, 40] loss: 1004.895


100%|██████████| 4/4 [00:00<00:00, 16.10it/s]


Output:
 f1_h=0.47786	prec_h=0.66768	rec_h=0.37209
[2, 20] loss: 946.138
[2, 40] loss: 952.240


100%|██████████| 4/4 [00:00<00:00, 16.86it/s]


Output:
 f1_h=0.51831	prec_h=0.65648	rec_h=0.42819
[3, 20] loss: 878.148
[3, 40] loss: 915.148


100%|██████████| 4/4 [00:00<00:00, 16.92it/s]


Output:
 f1_h=0.57523	prec_h=0.65683	rec_h=0.51166
[4, 20] loss: 834.381
[4, 40] loss: 847.335


100%|██████████| 4/4 [00:00<00:00, 16.83it/s]


Output:
 f1_h=0.57714	prec_h=0.66994	rec_h=0.50693
[5, 20] loss: 807.421
[5, 40] loss: 825.863


100%|██████████| 4/4 [00:00<00:00, 16.57it/s]


Output:
 f1_h=0.58519	prec_h=0.64483	rec_h=0.53565
[6, 20] loss: 811.669
[6, 40] loss: 818.802


100%|██████████| 4/4 [00:00<00:00, 16.93it/s]


Output:
 f1_h=0.58638	prec_h=0.65894	rec_h=0.52822
[7, 20] loss: 796.398
[7, 40] loss: 781.129


100%|██████████| 4/4 [00:00<00:00, 16.91it/s]


Output:
 f1_h=0.59609	prec_h=0.64802	rec_h=0.55188
[8, 20] loss: 758.820
[8, 40] loss: 727.340


100%|██████████| 4/4 [00:00<00:00, 17.14it/s]


Output:
 f1_h=0.59986	prec_h=0.62514	rec_h=0.57655
[9, 20] loss: 716.874
[9, 40] loss: 698.674


100%|██████████| 4/4 [00:00<00:00, 17.38it/s]


Output:
 f1_h=0.58982	prec_h=0.61131	rec_h=0.56979
[10, 20] loss: 690.256
[10, 40] loss: 658.959


100%|██████████| 4/4 [00:00<00:00, 17.64it/s]


Output:
 f1_h=0.57424	prec_h=0.66861	rec_h=0.50321
[11, 20] loss: 626.058
[11, 40] loss: 638.701


100%|██████████| 4/4 [00:00<00:00, 17.64it/s]


Output:
 f1_h=0.59240	prec_h=0.59718	rec_h=0.58770
[12, 20] loss: 598.865
[12, 40] loss: 583.095


100%|██████████| 4/4 [00:00<00:00, 17.59it/s]


Output:
 f1_h=0.58634	prec_h=0.60658	rec_h=0.56742
[13, 20] loss: 560.030
[13, 40] loss: 551.730


100%|██████████| 4/4 [00:00<00:00, 17.30it/s]


Output:
 f1_h=0.58305	prec_h=0.62665	rec_h=0.54512
[14, 20] loss: 513.972
[14, 40] loss: 522.857


100%|██████████| 4/4 [00:00<00:00, 17.39it/s]


Output:
 f1_h=0.58463	prec_h=0.60367	rec_h=0.56675
[15, 20] loss: 484.218
[15, 40] loss: 449.436


100%|██████████| 4/4 [00:00<00:00, 17.31it/s]


Output:
 f1_h=0.59004	prec_h=0.61572	rec_h=0.56641
[16, 20] loss: 434.117
[16, 40] loss: 450.776


100%|██████████| 4/4 [00:00<00:00, 17.28it/s]


Output:
 f1_h=0.56350	prec_h=0.61765	rec_h=0.51808
[17, 20] loss: 406.323
[17, 40] loss: 395.193


100%|██████████| 4/4 [00:00<00:00, 17.34it/s]


Output:
 f1_h=0.56167	prec_h=0.62708	rec_h=0.50862
[18, 20] loss: 377.650
[18, 40] loss: 364.426


100%|██████████| 4/4 [00:00<00:00, 17.41it/s]


Output:
 f1_h=0.57514	prec_h=0.62644	rec_h=0.53160
[19, 20] loss: 334.164
[19, 40] loss: 323.041


100%|██████████| 4/4 [00:00<00:00, 10.09it/s]


Output:
 f1_h=0.57684	prec_h=0.64852	rec_h=0.51943
[20, 20] loss: 298.808
[20, 40] loss: 289.529


100%|██████████| 4/4 [00:00<00:00, 17.09it/s]


Output:
 f1_h=0.57137	prec_h=0.64103	rec_h=0.51538
[21, 20] loss: 278.977
[21, 40] loss: 263.496


100%|██████████| 4/4 [00:00<00:00, 17.07it/s]


Output:
 f1_h=0.55568	prec_h=0.62933	rec_h=0.49747
[22, 20] loss: 238.309
[22, 40] loss: 234.173


100%|██████████| 4/4 [00:00<00:00, 17.17it/s]


Output:
 f1_h=0.57987	prec_h=0.61295	rec_h=0.55019
[23, 20] loss: 213.432
[23, 40] loss: 209.544


100%|██████████| 4/4 [00:00<00:00, 16.84it/s]


Output:
 f1_h=0.56149	prec_h=0.62819	rec_h=0.50760
[24, 20] loss: 188.417
[24, 40] loss: 189.968


100%|██████████| 4/4 [00:00<00:00, 16.99it/s]


Output:
 f1_h=0.55903	prec_h=0.62615	rec_h=0.50490
[25, 20] loss: 162.940
[25, 40] loss: 155.783


100%|██████████| 4/4 [00:00<00:00, 16.47it/s]


Output:
 f1_h=0.56246	prec_h=0.62397	rec_h=0.51200
[26, 20] loss: 142.988
[26, 40] loss: 148.752


100%|██████████| 4/4 [00:00<00:00, 17.11it/s]


Output:
 f1_h=0.56880	prec_h=0.62319	rec_h=0.52315
[27, 20] loss: 134.844
[27, 40] loss: 134.573


100%|██████████| 4/4 [00:00<00:00, 17.50it/s]


Output:
 f1_h=0.55776	prec_h=0.63305	rec_h=0.49848
[28, 20] loss: 111.926
[28, 40] loss: 120.150


100%|██████████| 4/4 [00:00<00:00, 17.43it/s]


Output:
 f1_h=0.56269	prec_h=0.62807	rec_h=0.50963
[29, 20] loss: 103.260
[29, 40] loss: 100.892


100%|██████████| 4/4 [00:00<00:00, 17.47it/s]


Output:
 f1_h=0.56628	prec_h=0.63709	rec_h=0.50963
[30, 20] loss: 88.862
[30, 40] loss: 88.938


100%|██████████| 4/4 [00:00<00:00, 16.78it/s]


Output:
 f1_h=0.55572	prec_h=0.63549	rec_h=0.49375
[31, 20] loss: 83.029
[31, 40] loss: 72.335


100%|██████████| 4/4 [00:00<00:00, 17.65it/s]


Output:
 f1_h=0.56356	prec_h=0.63764	rec_h=0.50490
[32, 20] loss: 65.417
[32, 40] loss: 68.282


100%|██████████| 4/4 [00:00<00:00, 17.38it/s]


Output:
 f1_h=0.56032	prec_h=0.63098	rec_h=0.50389
[33, 20] loss: 64.233
[33, 40] loss: 59.451


100%|██████████| 4/4 [00:00<00:00, 17.75it/s]


Output:
 f1_h=0.55183	prec_h=0.63259	rec_h=0.48935
[34, 20] loss: 55.552
[34, 40] loss: 51.898


100%|██████████| 4/4 [00:00<00:00, 14.68it/s]


Output:
 f1_h=0.57765	prec_h=0.63681	rec_h=0.52856
[35, 20] loss: 46.914
[35, 40] loss: 46.952


100%|██████████| 4/4 [00:00<00:00, 17.39it/s]


Output:
 f1_h=0.55697	prec_h=0.63428	rec_h=0.49645
[36, 20] loss: 45.029
[36, 40] loss: 43.145


100%|██████████| 4/4 [00:00<00:00, 17.46it/s]


Output:
 f1_h=0.54744	prec_h=0.63418	rec_h=0.48158
[37, 20] loss: 33.128
[37, 40] loss: 40.966


100%|██████████| 4/4 [00:00<00:00, 17.52it/s]


Output:
 f1_h=0.55611	prec_h=0.64163	rec_h=0.49071
[38, 20] loss: 29.265
[38, 40] loss: 37.676


100%|██████████| 4/4 [00:00<00:00, 17.42it/s]


Output:
 f1_h=0.54748	prec_h=0.65084	rec_h=0.47246
[39, 20] loss: 29.171
[39, 40] loss: 26.163


100%|██████████| 4/4 [00:00<00:00, 17.31it/s]


Output:
 f1_h=0.56110	prec_h=0.63350	rec_h=0.50355
[40, 20] loss: 25.125
[40, 40] loss: 23.142


100%|██████████| 4/4 [00:00<00:00, 17.53it/s]


Output:
 f1_h=0.54803	prec_h=0.64357	rec_h=0.47719
[41, 20] loss: 20.947
[41, 40] loss: 25.947


100%|██████████| 4/4 [00:00<00:00, 17.10it/s]


Output:
 f1_h=0.55494	prec_h=0.63290	rec_h=0.49409
[42, 20] loss: 19.243
[42, 40] loss: 22.395


100%|██████████| 4/4 [00:00<00:00, 17.58it/s]


Output:
 f1_h=0.56078	prec_h=0.62536	rec_h=0.50828
[43, 20] loss: 17.389
[43, 40] loss: 18.050


100%|██████████| 4/4 [00:00<00:00, 17.43it/s]


Output:
 f1_h=0.56102	prec_h=0.62751	rec_h=0.50727
[44, 20] loss: 13.324
[44, 40] loss: 18.915


100%|██████████| 4/4 [00:00<00:00, 17.42it/s]


Output:
 f1_h=0.56037	prec_h=0.62693	rec_h=0.50659
[45, 20] loss: 12.923
[45, 40] loss: 13.929


100%|██████████| 4/4 [00:00<00:00, 17.49it/s]


Output:
 f1_h=0.54918	prec_h=0.62511	rec_h=0.48969
[46, 20] loss: 12.527
[46, 40] loss: 10.542


100%|██████████| 4/4 [00:00<00:00, 17.52it/s]


Output:
 f1_h=0.56024	prec_h=0.62505	rec_h=0.50760
[47, 20] loss: 8.914
[47, 40] loss: 6.891


100%|██████████| 4/4 [00:00<00:00, 17.37it/s]


Output:
 f1_h=0.56639	prec_h=0.64440	rec_h=0.50524
[48, 20] loss: 7.839
[48, 40] loss: 6.892


100%|██████████| 4/4 [00:00<00:00, 17.52it/s]


Output:
 f1_h=0.56821	prec_h=0.62709	rec_h=0.51943
[49, 20] loss: 6.350
[49, 40] loss: 9.403


100%|██████████| 4/4 [00:00<00:00, 17.35it/s]


Output:
 f1_h=0.56619	prec_h=0.61884	rec_h=0.52180
[50, 20] loss: 9.705
[50, 40] loss: 6.294


100%|██████████| 4/4 [00:00<00:00, 17.42it/s]


Output:
 f1_h=0.56727	prec_h=0.63333	rec_h=0.51369
[51, 20] loss: 3.789
[51, 40] loss: 6.716


100%|██████████| 4/4 [00:00<00:00, 17.55it/s]


Output:
 f1_h=0.54306	prec_h=0.63723	rec_h=0.47313
[52, 20] loss: 6.720
[52, 40] loss: 5.170


100%|██████████| 4/4 [00:00<00:00, 17.55it/s]


Output:
 f1_h=0.56504	prec_h=0.62983	rec_h=0.51234
[53, 20] loss: 5.002
[53, 40] loss: 4.513


100%|██████████| 4/4 [00:00<00:00, 17.46it/s]


Output:
 f1_h=0.55111	prec_h=0.63875	rec_h=0.48462
[54, 20] loss: 5.525
[54, 40] loss: 3.958


100%|██████████| 4/4 [00:00<00:00, 17.60it/s]


Output:
 f1_h=0.56273	prec_h=0.62870	rec_h=0.50929
[55, 20] loss: 5.645
[55, 40] loss: 3.962


100%|██████████| 4/4 [00:00<00:00, 17.44it/s]


Output:
 f1_h=0.55917	prec_h=0.62087	rec_h=0.50862
[56, 20] loss: 7.155
[56, 40] loss: 6.423


100%|██████████| 4/4 [00:00<00:00, 17.42it/s]


Output:
 f1_h=0.55683	prec_h=0.62958	rec_h=0.49916
[57, 20] loss: 3.237
[57, 40] loss: 5.310


100%|██████████| 4/4 [00:00<00:00, 17.38it/s]


Output:
 f1_h=0.55184	prec_h=0.63895	rec_h=0.48564
[58, 20] loss: 5.622
[58, 40] loss: 5.071


100%|██████████| 4/4 [00:00<00:00, 17.43it/s]


Output:
 f1_h=0.55549	prec_h=0.64464	rec_h=0.48800
[59, 20] loss: 4.014
[59, 40] loss: 2.862


100%|██████████| 4/4 [00:00<00:00, 17.43it/s]


Output:
 f1_h=0.55951	prec_h=0.61626	rec_h=0.51234
[60, 20] loss: 3.170
[60, 40] loss: 3.745


100%|██████████| 4/4 [00:00<00:00, 17.61it/s]


Output:
 f1_h=0.55876	prec_h=0.63617	rec_h=0.49814
[61, 20] loss: 2.835
[61, 40] loss: 3.631


100%|██████████| 4/4 [00:00<00:00, 17.50it/s]


Output:
 f1_h=0.56396	prec_h=0.65332	rec_h=0.49611
[62, 20] loss: 5.560
[62, 40] loss: 2.800


100%|██████████| 4/4 [00:00<00:00, 17.27it/s]


Output:
 f1_h=0.55228	prec_h=0.64190	rec_h=0.48462
[63, 20] loss: 2.248
[63, 40] loss: 2.616


100%|██████████| 4/4 [00:00<00:00, 17.52it/s]


Output:
 f1_h=0.55341	prec_h=0.64735	rec_h=0.48327
[64, 20] loss: 2.716
[64, 40] loss: 4.753


100%|██████████| 4/4 [00:00<00:00, 16.59it/s]


Output:
 f1_h=0.56680	prec_h=0.62222	rec_h=0.52045
[65, 20] loss: 3.576
[65, 40] loss: 5.328


100%|██████████| 4/4 [00:00<00:00, 16.83it/s]


Output:
 f1_h=0.56662	prec_h=0.60925	rec_h=0.52957
[66, 20] loss: 1.752
[66, 40] loss: 4.508


100%|██████████| 4/4 [00:00<00:00, 16.43it/s]


Output:
 f1_h=0.54863	prec_h=0.64462	rec_h=0.47753
[67, 20] loss: 3.678
[67, 40] loss: 3.464


100%|██████████| 4/4 [00:00<00:00, 16.65it/s]


Output:
 f1_h=0.55340	prec_h=0.62511	rec_h=0.49645
[68, 20] loss: 2.558
[68, 40] loss: 3.010


100%|██████████| 4/4 [00:00<00:00, 17.18it/s]


Output:
 f1_h=0.56609	prec_h=0.62490	rec_h=0.51740
[69, 20] loss: 2.738
[69, 40] loss: 3.357


100%|██████████| 4/4 [00:00<00:00, 16.78it/s]


Output:
 f1_h=0.55873	prec_h=0.63068	rec_h=0.50152
[70, 20] loss: 2.084
[70, 40] loss: 3.334


100%|██████████| 4/4 [00:00<00:00, 17.01it/s]


Output:
 f1_h=0.55928	prec_h=0.62731	rec_h=0.50456
[71, 20] loss: 2.761
[71, 40] loss: 1.526


100%|██████████| 4/4 [00:00<00:00, 16.96it/s]


Output:
 f1_h=0.55564	prec_h=0.64862	rec_h=0.48597
[72, 20] loss: 4.409
[72, 40] loss: 2.670


100%|██████████| 4/4 [00:00<00:00, 17.27it/s]


Output:
 f1_h=0.55252	prec_h=0.63668	rec_h=0.48800
[73, 20] loss: 3.044
[73, 40] loss: 3.439


100%|██████████| 4/4 [00:00<00:00, 17.46it/s]


Output:
 f1_h=0.55549	prec_h=0.63322	rec_h=0.49476
[74, 20] loss: 3.936
[74, 40] loss: 1.848


100%|██████████| 4/4 [00:00<00:00, 16.92it/s]


Output:
 f1_h=0.56796	prec_h=0.63556	rec_h=0.51335
[75, 20] loss: 3.016
[75, 40] loss: 2.848


100%|██████████| 4/4 [00:00<00:00, 17.29it/s]


Output:
 f1_h=0.54482	prec_h=0.63599	rec_h=0.47651
[76, 20] loss: 3.731
[76, 40] loss: 3.414


100%|██████████| 4/4 [00:00<00:00, 17.41it/s]


Output:
 f1_h=0.57235	prec_h=0.61313	rec_h=0.53667
[77, 20] loss: 3.793
[77, 40] loss: 9.319


100%|██████████| 4/4 [00:00<00:00, 16.18it/s]


Output:
 f1_h=0.52792	prec_h=0.61643	rec_h=0.46164
[78, 20] loss: 13.183
[78, 40] loss: 20.134


100%|██████████| 4/4 [00:00<00:00, 17.09it/s]


Output:
 f1_h=0.55636	prec_h=0.61748	rec_h=0.50625
[79, 20] loss: 35.061
[79, 40] loss: 18.344


100%|██████████| 4/4 [00:00<00:00,  9.66it/s]


Output:
 f1_h=0.56261	prec_h=0.61600	rec_h=0.51774
[80, 20] loss: 10.608
[80, 40] loss: 8.655


100%|██████████| 4/4 [00:00<00:00, 17.22it/s]


Output:
 f1_h=0.55761	prec_h=0.60880	rec_h=0.51436
[81, 20] loss: 4.333
[81, 40] loss: 2.090


100%|██████████| 4/4 [00:00<00:00, 17.57it/s]


Output:
 f1_h=0.56119	prec_h=0.64364	rec_h=0.49747
[82, 20] loss: 3.153
[82, 40] loss: 2.644


100%|██████████| 4/4 [00:00<00:00, 17.58it/s]


Output:
 f1_h=0.55798	prec_h=0.61747	rec_h=0.50896
[83, 20] loss: 2.147
[83, 40] loss: 2.812


100%|██████████| 4/4 [00:00<00:00, 17.37it/s]


Output:
 f1_h=0.56625	prec_h=0.62480	rec_h=0.51774
[84, 20] loss: 2.716
[84, 40] loss: 2.132


100%|██████████| 4/4 [00:00<00:00, 17.60it/s]


Output:
 f1_h=0.55682	prec_h=0.63613	rec_h=0.49510
[85, 20] loss: 3.146
[85, 40] loss: 1.916


100%|██████████| 4/4 [00:00<00:00, 17.49it/s]


Output:
 f1_h=0.56055	prec_h=0.63805	rec_h=0.49983
[86, 20] loss: 2.626
[86, 40] loss: 1.937


100%|██████████| 4/4 [00:00<00:00, 17.27it/s]


Output:
 f1_h=0.56458	prec_h=0.62170	rec_h=0.51707
[87, 20] loss: 3.171
[87, 40] loss: 2.168


100%|██████████| 4/4 [00:00<00:00, 17.19it/s]


Output:
 f1_h=0.56424	prec_h=0.62937	rec_h=0.51132
[88, 20] loss: 2.261
[88, 40] loss: 3.645


100%|██████████| 4/4 [00:00<00:00, 17.42it/s]


Output:
 f1_h=0.56090	prec_h=0.64062	rec_h=0.49882
[89, 20] loss: 2.146
[89, 40] loss: 2.257


100%|██████████| 4/4 [00:00<00:00, 16.88it/s]


Output:
 f1_h=0.56366	prec_h=0.63311	rec_h=0.50794
[90, 20] loss: 2.171
[90, 40] loss: 2.973


100%|██████████| 4/4 [00:00<00:00, 16.74it/s]


Output:
 f1_h=0.56416	prec_h=0.63176	rec_h=0.50963
[91, 20] loss: 1.665
[91, 40] loss: 2.262


100%|██████████| 4/4 [00:00<00:00, 16.76it/s]


Output:
 f1_h=0.56736	prec_h=0.62602	rec_h=0.51876
[92, 20] loss: 2.392
[92, 40] loss: 2.529


100%|██████████| 4/4 [00:00<00:00, 16.86it/s]


Output:
 f1_h=0.55987	prec_h=0.61421	rec_h=0.51436
[93, 20] loss: 2.303
[93, 40] loss: 2.928


100%|██████████| 4/4 [00:00<00:00, 16.98it/s]


Output:
 f1_h=0.57057	prec_h=0.63746	rec_h=0.51639
[94, 20] loss: 1.733
[94, 40] loss: 1.766


100%|██████████| 4/4 [00:00<00:00, 16.90it/s]


Output:
 f1_h=0.56523	prec_h=0.63339	rec_h=0.51031
[95, 20] loss: 1.319
[95, 40] loss: 1.848


100%|██████████| 4/4 [00:00<00:00, 17.54it/s]


Output:
 f1_h=0.56158	prec_h=0.63907	rec_h=0.50084
[96, 20] loss: 1.512
[96, 40] loss: 1.775


100%|██████████| 4/4 [00:00<00:00, 16.64it/s]


Output:
 f1_h=0.56276	prec_h=0.63613	rec_h=0.50456
[97, 20] loss: 3.441
[97, 40] loss: 1.468


100%|██████████| 4/4 [00:00<00:00, 16.75it/s]


Output:
 f1_h=0.56000	prec_h=0.63392	rec_h=0.50152
[98, 20] loss: 2.445
[98, 40] loss: 1.364


100%|██████████| 4/4 [00:00<00:00, 17.67it/s]


Output:
 f1_h=0.56802	prec_h=0.63315	rec_h=0.51504
[99, 20] loss: 1.633
[99, 40] loss: 2.116


100%|██████████| 4/4 [00:00<00:00, 17.05it/s]


Output:
 f1_h=0.56327	prec_h=0.63371	rec_h=0.50693
[100, 20] loss: 1.452
[100, 40] loss: 2.507


100%|██████████| 4/4 [00:00<00:00, 16.43it/s]


Output:
 f1_h=0.56649	prec_h=0.62986	rec_h=0.51470


### Evaluation

In [16]:
from tqdm import tqdm
import json
import subprocess
from torch.utils.data import DataLoader

### Baseline Evaluation (ResNet 50 + mBERT)

In [17]:
ar_pred_file_path = './Predictions/subtask2a/ar_predictions_subtask2a.txt'
ar_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_ar.json'
evaluator_script = './scorer-baseline/subtask_1_2a.py'

ar_test_data = process_json(ar_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
ar_test_dataset = TestDataSet(df=ar_test_data, 
                          text_features_file='./TextFeatures/subtask2a/mBERT/ar_test_text_features.pkl',
                          image_features_file='./ImageFeatures/ResNet/ar_test_images_features.pkl')

ar_test_dataloader =  DataLoader(ar_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, ar_test_dataloader, ar_pred_file_path, ar_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, format=5, validation=False, threshold=0.3)

100%|██████████| 2/2 [00:00<00:00, 39.93it/s]


Output:
 f1_h=0.44345	prec_h=0.47793	rec_h=0.41362


In [18]:
bg_pred_file_path = './Predictions/subtask2a/bg_predictions_subtask2a.txt'
bg_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_bg.json'

bg_test_data = process_json(bg_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
bg_test_dataset = DataSet(bg_test_data, indexed_persuasion_techniques_2a, 
                           text_features_file='./TextFeatures/subtask2a/mBERT/bg_test_text_features.pkl',
                          image_features_file='./ImageFeatures/ResNet/bulgarian_test_images_features.pkl')

bg_test_dataloader = DataLoader(bg_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, bg_test_dataloader, bg_pred_file_path, bg_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

100%|██████████| 7/7 [00:00<00:00, 32.69it/s]


Output:
 f1_h=0.55941	prec_h=0.65857	rec_h=0.48621


In [19]:

md_pred_file_path = './Predictions/subtask2a/md_predictions_subtask2a.txt'
md_gold_file_path = 'test_labels_ar_bg_md_version2/test_subtask2a_md.json'

md_test_data = process_json(md_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
md_test_dataset = DataSet(md_test_data, indexed_persuasion_techniques_2a, 
                          text_features_file='./TextFeatures/subtask2a/mBERT/md_test_text_features.pkl',
                          image_features_file='./ImageFeatures/ResNet/nm_test_images_features.pkl')

md_test_dataloader = DataLoader(md_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, md_test_dataloader, md_pred_file_path, md_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

100%|██████████| 5/5 [00:00<00:00, 32.74it/s]


Output:
 f1_h=0.57655	prec_h=0.68725	rec_h=0.49657


In [21]:
en_pred_file_path = './Predictions/subtask2a/en_predictions_subtask2a.txt'

eng_test_data = process_test_json('./test_data/english/en_subtask2a_test_unlabeled.json')

eng_test_dataset = TestDataSet(eng_test_data, 
                               './TextFeatures/subtask2a/mBERT/en_test_text_features.pkl',
                               './ImageFeatures/ResNet/english_test_images_features.pkl')
eng_test_dataloader = DataLoader(eng_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, eng_test_dataloader, en_pred_file_path, None, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.4)

100%|██████████| 24/24 [00:00<00:00, 42.90it/s]


### OpenAI Small + CLIP

In [19]:
from modules.nn.MultiModal import MultiModalBaseline

model = MultiModalBaseline(512, 1536)
model.load_state_dict(torch.load('models/subtask2a/MultiModal-OpenAI-Small/splendid-sweep-4.pth'))

<All keys matched successfully>

In [20]:
ar_pred_file_path = './Predictions/subtask2a/ar_predictions_subtask2a.txt'
ar_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_ar.json'
evaluator_script = './scorer-baseline/subtask_1_2a.py'

ar_test_data = process_json(ar_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
ar_test_dataset = TestDataSet(df=ar_test_data, 
                          text_features_file='./TextFeatures/subtask2a/text-embedding-3-small/ar_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/ar_test_images_features.pkl')

ar_test_dataloader =  DataLoader(ar_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, ar_test_dataloader, ar_pred_file_path, ar_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, format=5, validation=False, threshold=0.3)

100%|██████████| 2/2 [00:00<00:00, 50.73it/s]


Output:
 f1_h=0.52780	prec_h=0.47657	rec_h=0.59136


In [21]:
bg_pred_file_path = './Predictions/subtask2a/bg_predictions_subtask2a.txt'
bg_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_bg.json'

bg_test_data = process_json(bg_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
bg_test_dataset = DataSet(bg_test_data, indexed_persuasion_techniques_2a, 
                           text_features_file='./TextFeatures/subtask2a/text-embedding-3-small/bg_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/bulgarian_test_images_features.pkl')

bg_test_dataloader = DataLoader(bg_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, bg_test_dataloader, bg_pred_file_path, bg_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

100%|██████████| 7/7 [00:00<00:00, 43.03it/s]


Output:
 f1_h=0.64443	prec_h=0.64928	rec_h=0.63966


In [22]:

md_pred_file_path = './Predictions/subtask2a/md_predictions_subtask2a.txt'
md_gold_file_path = 'test_labels_ar_bg_md_version2/test_subtask2a_md.json'

md_test_data = process_json(md_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
md_test_dataset = DataSet(md_test_data, indexed_persuasion_techniques_2a, 
                          text_features_file='./TextFeatures/subtask2a/text-embedding-3-small/md_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/nm_test_images_features.pkl')

md_test_dataloader = DataLoader(md_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, md_test_dataloader, md_pred_file_path, md_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

100%|██████████| 5/5 [00:00<00:00, 44.21it/s]


Output:
 f1_h=0.67714	prec_h=0.71721	rec_h=0.64130


In [23]:
en_pred_file_path = './Predictions/subtask2a/en_predictions_subtask2a.txt'

eng_test_data = process_test_json('./test_data/english/en_subtask2a_test_unlabeled.json')

eng_test_dataset = TestDataSet(eng_test_data, 
                               './TextFeatures/subtask2a/text-embedding-3-small/en_test_text_features.pkl',
                               './ImageFeatures/CLIP-ViT/english_test_images_features.pkl')
eng_test_dataloader = DataLoader(eng_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, eng_test_dataloader, en_pred_file_path, None, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.4)

100%|██████████| 24/24 [00:00<00:00, 59.39it/s]


0.64640	0.76090	0.56184

### OpenAI Large + CLIP

In [24]:
from modules.nn.MultiModal import MultiModalBaseline

model = MultiModalBaseline(512, 3072)
model.load_state_dict(torch.load('models/subtask2a/MultiModal-OpenAI-Large/fresh-sweep-3.pth'))

<All keys matched successfully>

In [25]:
ar_pred_file_path = './Predictions/subtask2a/ar_predictions_subtask2a.txt'
ar_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_ar.json'
evaluator_script = './scorer-baseline/subtask_1_2a.py'

ar_test_data = process_json(ar_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
ar_test_dataset = TestDataSet(df=ar_test_data, 
                          text_features_file='./TextFeatures/subtask2a/text-embedding-3-large/ar_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/ar_test_images_features.pkl')

ar_test_dataloader =  DataLoader(ar_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, ar_test_dataloader, ar_pred_file_path, ar_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, format=5, validation=False, threshold=0.3)

100%|██████████| 2/2 [00:00<00:00, 41.44it/s]


Output:
 f1_h=0.52197	prec_h=0.44014	rec_h=0.64120


In [26]:
bg_pred_file_path = './Predictions/subtask2a/bg_predictions_subtask2a.txt'
bg_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_bg.json'

bg_test_data = process_json(bg_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
bg_test_dataset = DataSet(bg_test_data, indexed_persuasion_techniques_2a, 
                           text_features_file='./TextFeatures/subtask2a/text-embedding-3-large/bg_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/bulgarian_test_images_features.pkl')

bg_test_dataloader = DataLoader(bg_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, bg_test_dataloader, bg_pred_file_path, bg_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

100%|██████████| 7/7 [00:00<00:00, 37.81it/s]


Output:
 f1_h=0.65849	prec_h=0.64199	rec_h=0.67586


In [27]:

md_pred_file_path = './Predictions/subtask2a/md_predictions_subtask2a.txt'
md_gold_file_path = 'test_labels_ar_bg_md_version2/test_subtask2a_md.json'

md_test_data = process_json(md_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
md_test_dataset = DataSet(md_test_data, indexed_persuasion_techniques_2a, 
                          text_features_file='./TextFeatures/subtask2a/text-embedding-3-large/md_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/nm_test_images_features.pkl')

md_test_dataloader = DataLoader(md_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, md_test_dataloader, md_pred_file_path, md_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

100%|██████████| 5/5 [00:00<00:00, 35.83it/s]


Output:
 f1_h=0.68926	prec_h=0.73154	rec_h=0.65160


In [28]:
en_pred_file_path = './Predictions/subtask2a/en_predictions_subtask2a.txt'

eng_test_data = process_test_json('./test_data/english/en_subtask2a_test_unlabeled.json')

eng_test_dataset = TestDataSet(eng_test_data, 
                               './TextFeatures/subtask2a/text-embedding-3-large/en_test_text_features.pkl',
                               './ImageFeatures/CLIP-ViT/english_test_images_features.pkl')
eng_test_dataloader = DataLoader(eng_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, eng_test_dataloader, en_pred_file_path, None, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.4)

100%|██████████| 24/24 [00:00<00:00, 46.77it/s]


0.65305	0.76524	0.56955