In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
import nltk
from utils.utils import *
from utils.label_decoding import *
from utils.HierarchicalLoss import *

import torchvision.models as models

from tqdm import tqdm
import json
import subprocess
from subtask_1_2a import *

In [2]:
from torch.utils.data import Dataset
import pickle

class DataSet(Dataset):
    def __init__(self, df, labels_at_level, text_features_file, image_features_file,
                 ner_features_file, max_len=128):
        super(DataSet, self).__init__()
        self.data_df = df
        self.labels_at_level = labels_at_level
        # self.tokenizer = tokenizer
        self.max_len = max_len
        # self.image_folder = image_folder
        self.image_features = None
        self.text_features = None
        
        with open(image_features_file, 'rb') as f:
          self.image_features = pickle.load(f)
        
        with open(text_features_file, 'rb') as f:
            self.text_features = pickle.load(f)
            
        with open(ner_features_file, 'rb') as f:
            self.ner_features = pickle.load(f)

    def __len__(self):
        return len(self.data_df)

    def __getitem__(self, idx):
        id = self.data_df.iloc[idx]['id']
        text = self.data_df.iloc[idx]['cleaned_text']
        image_name = self.data_df.iloc[idx]['image']
        level_1_target = self.encode(self.data_df.iloc[idx]['Level 1'], 1)
        level_2_target = self.encode(self.data_df.iloc[idx]['Level 2'], 2)
        level_3_target = self.encode(self.data_df.iloc[idx]['Level 3'], 3)
        level_4_target = self.encode(self.data_df.iloc[idx]['Level 4'], 4)
        level_5_target = self.encode(self.data_df.iloc[idx]['Level 5'], 5)

        # Tokenize text
        # encoded_input = tokenizer(text, return_tensors='pt', add_special_tokens=True, 
        #                           max_length=self.max_len, truncation=True, padding='max_length')
        # ids = inputs['input_ids']
        # mask = inputs['attention_mask']
        # token_type_ids = inputs["token_type_ids"]
        image_features = self.image_features[image_name]
        text_features = self.text_features[id]
        ner_features = self.ner_features[id]

        return {
            'id': id,
            'text': text,
            # 'image': image,
            'image_features': image_features,  
            # 'input_ids': torch.tensor(ids, dtype=torch.long),
            # 'attention_mask': torch.tensor(mask, dtype=torch.long),
            # 'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'text_features': text_features,
            'ner_features': ner_features,
            'level_1_target': level_1_target,
            'level_2_target': level_2_target,
            'level_3_target': level_3_target,
            'level_4_target': level_4_target,
            'level_5_target': level_5_target
        }

    def encode(self, labels, level):
        level_ = f'Level {level}'
        target = torch.zeros(len(self.labels_at_level[level_]) + 1)
        for label in labels:
            label_idx = self.labels_at_level[level_][label]
            target[label_idx] = 1
        if len(labels) == 0:
            target[-1] = 1
        return target

In [3]:
class TestDataSet(Dataset):
    def __init__(self, df, text_features_file, image_features_file,
                 ner_features_file, max_len=128):
        super(TestDataSet, self).__init__()
        self.data_df = df
        
        self.image_features = None
        self.text_features = None
        self.max_len = max_len
        # self.tokenizer = tokenizer
        with open(image_features_file, 'rb') as f:
            self.image_features = pickle.load(f)
            
        with open(text_features_file, 'rb') as f:
            self.text_features = pickle.load(f)
            
        with open(ner_features_file, 'rb') as f:
            self.ner_features = pickle.load(f)
            
    
    def __len__(self):
        return len(self.data_df)
    
    def __getitem__(self, idx):
        id = self.data_df.iloc[idx]['id']
        text = self.data_df.iloc[idx]['cleaned_text']
        image_name = self.data_df.iloc[idx]['image']
        # encoded_input = self.tokenizer(text, return_tensors='pt', add_special_tokens=True, 
        #                           max_length=self.max_len, truncation=True, padding='max_length')
        text_features = self.text_features[id]
        ner_features = self.ner_features[id]
        
        return {'id': id,
                'text': text,
                'text_features': text_features,
                'image_features': self.image_features[image_name],
                'ner_features': ner_features}

In [44]:
from modules.nn.MultiModal import MultiModalNER

In [36]:
def evaluate_model(model, dataloader, pred_file_path, gold_file_path, 
                   evaluator_script_path, id2leaf_label, format=None, validation=False, threshold=0.3):
    model.eval()
    predictions = []
    
    HL = HierarchicalLoss(id2label=id2label_subtask_2a, hierarchical_labels=hierarchy_subtask_2a,
                        persuasion_techniques=persuasion_techniques_2a, device=device)

    total_loss = 0
    
    
    with torch.no_grad():
        
        for batch in dataloader:
            if not isinstance(batch['id'], list):
                ids = batch['id'].detach().numpy().tolist()
            else:
                ids = batch['id']
            # encoded_inputs = batch['encoded_input']
            # 
            # input_ids, masks = encoded_inputs['input_ids'], encoded_inputs['attention_mask']
            # type_ids = encoded_inputs['token_type_ids']
            
            
            # input_ids = input_ids.squeeze().to(device)
            # masks = masks.squeeze().to(device)
            # type_ids = type_ids.squeeze().to(device)
        
            text_features = batch['text_features']
            image_features = batch['image_features']
            ner_features = batch['ner_features']
            
            text_features = text_features.to(device)
            image_features = image_features.to(device)
            ner_features = ner_features.to(device)
            
            pred_1, pred_2, pred_3, pred_4, pred_5 = model(text_features,image_features, ner_features)
            
            if validation:
                y_1, y_2, y_3 = batch['level_1_target'], batch['level_2_target'], batch['level_3_target']
                y_4, y_5 = batch['level_4_target'], batch['level_5_target']
                
                y_1, y_2, y_3, y_4, y_5 = y_1.to(device), y_2.to(device), y_3.to(device), y_4.to(device), y_5.to(device)
                
                dloss = HL.calculate_dloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
                lloss = HL.calculate_lloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
                
                total_loss += (dloss + lloss).detach().cpu().item()
                
            pred_3 = (pred_3.cpu().detach().numpy() > threshold).astype(int)
            pred_4 = (pred_4.cpu().detach().numpy() > threshold).astype(int)
            pred_5 = (pred_5.cpu().detach().numpy() > threshold).astype(int)
            
            predictions += get_labels(id2leaf_label, ids, pred_3, pred_4, pred_5, format)

        # Writing JSON data
        with open(pred_file_path, 'w') as f:
            json.dump(predictions, f, indent=4)
        
        if gold_file_path is None:
            return
            
        prec_h, rec_h, f1_h = evaluate_h(pred_file_path, gold_file_path)
        print("f1_h={:.5f}\tprec_h={:.5f}\trec_h={:.5f}".format(f1_h, prec_h, rec_h))
        if validation:
            return prec_h, rec_h, f1_h, total_loss / (len(dataloader))
    

In [41]:
from torch.utils.data import DataLoader

train_data = process_json('./semeval2024_dev_release/subtask2a/train.json', 
                          techniques_to_level_2a, hierarchy_subtask_2a)
# val_data = 
test_data = process_json('./semeval2024_dev_release/subtask2a/validation.json',
                         techniques_to_level_2a, hierarchy_subtask_2a)

training_dataset = DataSet(train_data, indexed_persuasion_techniques_2a, 
                           text_features_file='./TextFeatures/subtask2a/text-embedding-3-small/train_text_features.pkl',
                           image_features_file='./ImageFeatures/CLIP-ViT/train_images_features.pkl',
                           ner_features_file='./TextFeatures/subtask2a/multilingual-ner/train_text_features.pkl')
test_dataset = DataSet(test_data, indexed_persuasion_techniques_2a, 
                       text_features_file='./TextFeatures/subtask2a/text-embedding-3-small/validation_text_features.pkl', 
                       image_features_file='./ImageFeatures/CLIP-ViT/validation_images_features.pkl',
                       ner_features_file='./TextFeatures/subtask2a/multilingual-ner/validation_text_features.pkl')

In [28]:
device = get_device()

device = torch.device('cpu')

Using MPS


In [42]:
import wandb

# Initialize WandB and log in to your account
wandb.login()

sweep_config = {
    'method': 'bayes',  # Using Bayesian optimization
    'metric': {
        'name': 'val_loss',
        'goal': 'minimize'
    },
    'parameters': {
        'learning_rate': {
            'min': 1e-5,
            'max': 1e-4
        },
        'batch_size': {
            'values': [128, 256]
        },
        'optimizer': {
            'values': ['adam']
        },
        'beta1': {  # Relevant for Adam
            'min': 0.8,
            'max': 0.95
        },
        # 'momentum': {  # Relevant for SGD
        #     'min': 0.8,
        #     'max': 0.99
        # }
        'alpha': {
            'min': 0.65,
            'max': 1.0
        },
        'beta': {
            'min': 0.5,
            'max': 1.0
        },
        'threshold':{
            'min': 0.65,
            'max': 0.9
            }
        
    }
}

sweep_id = wandb.sweep(sweep_config, project="FineTuning-MultiModal-OpenAI-Small-NER")
# sweep_id = '44uz6ydx'

Create sweep with ID: nhwhr7rv
Sweep URL: https://wandb.ai/phoenix_nlp/FineTuning-MultiModal-mBERT-ResNet/sweeps/nhwhr7rv


In [43]:
models_dir = './models/subtask2a/MultiModal-OpenAI-Small-NER/'
num_epochs = 100

In [32]:
def train():
    wandb.init()

    # Use WandB configurations
    config = wandb.config
    batch_size = config.batch_size
    learning_rate = config.learning_rate
    
    train_dataloader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    
    model = MultiModalNER(512, 1536, 768)
    model.to(device)
    
    optimizer = None
    
    if config.optimizer == 'adam':
        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=learning_rate,
            betas=(config.beta1, 0.999)
        )
    elif config.optimizer == 'sgd':
        optimizer = torch.optim.SGD(
            model.parameters(),
            lr=learning_rate,
            momentum=config.momentum
        )
        
    HL = HierarchicalLoss(id2label=id2label_subtask_2a, hierarchical_labels=hierarchy_subtask_2a,
                        persuasion_techniques=persuasion_techniques_2a, device=device, 
                          alpha=config.alpha, beta=config.beta, threshold=config.threshold)
    
    
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for batch_idx, batch in enumerate(train_dataloader):
            
            y_1, y_2, y_3 = batch['level_1_target'], batch['level_2_target'], batch['level_3_target']
            y_4, y_5 = batch['level_4_target'], batch['level_5_target']
            
            # encoded_inputs = batch['encoded_input']
            images_features = batch['image_features']
            text_features = batch['text_features']
            ner_features = batch['ner_features'] 
                
            # input_ids, masks = encoded_inputs['input_ids'], encoded_inputs['attention_mask']
            # type_ids = encoded_inputs['token_type_ids']
            
            # input_ids = input_ids.squeeze().to(device)
            # masks = masks.squeeze().to(device)
            # type_ids = type_ids.squeeze().to(device)
            
            y_1, y_2, y_3, y_4, y_5 = y_1.to(device), y_2.to(device), y_3.to(device), y_4.to(device), y_5.to(device)
            
            images_features = images_features.to(device)
            text_features = text_features.to(device)
            ner_features = ner_features.to(device)
            
            
            optimizer.zero_grad()
            pred_1, pred_2, pred_3, pred_4, pred_5 = model(text_features, images_features, ner_features)
            # loss_ = loss(pred_1, y_1) + loss(pred_2, y_2) + loss(pred_3, y_3) + loss(pred_4, y_4) + loss(pred_5, y_5)
            
            dloss = HL.calculate_dloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
            lloss = HL.calculate_lloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
    
            total_loss = lloss + dloss
            # loss_.backward()
            
            total_loss.backward()
            optimizer.step()
            
            running_loss += total_loss.detach().item()
            
            # if batch_idx % 20 == 19:
            #     print(f"[{epoch + 1}, {batch_idx + 1}] loss: {running_loss / 20:.3f}")
            #     running_loss = 0.0
                
        running_loss /= len(train_dataloader)
        
        val_pred_file_path = './Predictions/val_predictions_subtask2a.json'
        val_gold_file_path = './semeval2024_dev_release/subtask2a/validation.json'
        evaluator_script = './scorer-baseline/subtask_1_2a.py'
        prec_h, rec_h, f1_h, validation_loss = evaluate_model(model, test_dataloader, val_pred_file_path, val_gold_file_path, evaluator_script,
                       id2leaf_label_subtask_2a, validation=True)
        
        if epoch % 50 == 49:
            print(f'[{epoch+1}/{num_epochs}]')
            print("f1_h={:.5f}\tprec_h={:.5f}\trec_h={:.5f}".format(f1_h, prec_h, rec_h))
        
        # Log training metrics
        wandb.log({"epoch": epoch, "train_loss": running_loss})
        wandb.log({"val_loss": validation_loss})
        wandb.log({"h_precision": prec_h, "h_recall": rec_h, "h_f1-score":f1_h})
        
    
    torch.save(model.state_dict(), f"{models_dir}{wandb.run.name}.pth")
    wandb.join()
    
    

In [33]:
def main():
    wandb.agent(sweep_id, train, count=5)

if __name__ == "__main__":
    main()

[34m[1mwandb[0m: Agent Starting Run: h8gfcbnt with config:
[34m[1mwandb[0m: 	alpha: 0.8148407006372308
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta: 0.9304946887751062
[34m[1mwandb[0m: 	beta1: 0.8548119477478157
[34m[1mwandb[0m: 	learning_rate: 3.783507948650642e-05
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	threshold: 0.8803527984779607


[50/100]
f1_h=0.64432	prec_h=0.65570	rec_h=0.63332
[100/100]
f1_h=0.65329	prec_h=0.66307	rec_h=0.64380


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
h_f1-score,▁▁▃▄▅▆▇▇▇▇▇▇▇▇▇▇█▇▇▇██████████████▇▇▇███
h_precision,▁▄▅▅█▇▆▅▆▆▃▅▄▄▄▅▂▅▆▄▄▅▆▅▆▆▅▅▄▅▅▅▄▄▆▅▄▅▄▄
h_recall,▁▁▃▄▄▅▆▆▆▆▇▆▇▇▇▇█▇▇▇█▇▇▇▇▇███▇▇███▆▇███▇
train_loss,██▇▇▆▆▅▅▅▅▅▅▅▄▄▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁
val_loss,▆█▇▄▅▂▃▅▃▄▃▃▃▄▄▂▂▂▁▂▃▂▂▃▂▃▂▃▄▂▃▄▄▂▄▄▅▂▃▅

0,1
epoch,99.0
h_f1-score,0.65329
h_precision,0.66307
h_recall,0.6438
train_loss,520.90003
val_loss,870.31638


[34m[1mwandb[0m: Agent Starting Run: lf622zry with config:
[34m[1mwandb[0m: 	alpha: 0.6773769641445323
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta: 0.8801147810553063
[34m[1mwandb[0m: 	beta1: 0.8063826091240119
[34m[1mwandb[0m: 	learning_rate: 9.71121854299423e-05
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	threshold: 0.728081155498142


[50/100]
f1_h=0.66207	prec_h=0.64359	rec_h=0.68165
[100/100]
f1_h=0.65206	prec_h=0.65562	rec_h=0.64853


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
h_f1-score,▁▄▆▇▇▇▇▇▇██▇██████████▇██▇████▇█████████
h_precision,▄▁█▁▅▅▅▄▃▃▃▆▄▃▂▂▄▄▂▂▃▃▁▄▃▂▃▄▃▄▂▃▃▃▄▃▃▄▃▃
h_recall,▁▄▄▇▆▆▆▆▇▇▇▆▇███▇▇█▇▇▇▇▇█▇▇▇▇▇▇█▇▇▇▇▇▇▇▇
train_loss,██▆▆▆▆▅▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▂▁▁▁
val_loss,█▄▂▂▂▂▂▂▃▃▁▁▂▁▂▁▂▁▂▃▂▄▄▂▂▃▃▃▃▄▅▄▄▅▅▅▄▃▅▄

0,1
epoch,99.0
h_f1-score,0.65206
h_precision,0.65562
h_recall,0.64853
train_loss,102.73471
val_loss,919.15292


[34m[1mwandb[0m: Agent Starting Run: brkaqdpf with config:
[34m[1mwandb[0m: 	alpha: 0.8747963915862391
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta: 0.813983245207873
[34m[1mwandb[0m: 	beta1: 0.8163857352344261
[34m[1mwandb[0m: 	learning_rate: 1.3429958239991307e-05
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	threshold: 0.6858302547729106


[50/100]
f1_h=0.63326	prec_h=0.66518	rec_h=0.60426
[100/100]
f1_h=0.65234	prec_h=0.65344	rec_h=0.65123


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
h_f1-score,▁▃▃▄▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇█████████████
h_precision,▁██▇▇▇███████████████████▇█▇███▇▇█▇█▇▇▇▇
h_recall,█▁▁▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
train_loss,█▆▆▆▅▅▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_loss,▅▅▅██▆▅▄▃▂▃▃▃▃▁▃▃▂▂▂▂▃▃▂▃▃▃▃▅▃▄▃▃▂▃▁▃▄▃▂

0,1
epoch,99.0
h_f1-score,0.65234
h_precision,0.65344
h_recall,0.65123
train_loss,562.80384
val_loss,830.03854


[34m[1mwandb[0m: Agent Starting Run: yadyyyre with config:
[34m[1mwandb[0m: 	alpha: 0.8613357510926161
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta: 0.7292018368808355
[34m[1mwandb[0m: 	beta1: 0.8675434001226184
[34m[1mwandb[0m: 	learning_rate: 3.241287170425779e-05
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	threshold: 0.701974944206003


[50/100]
f1_h=0.63622	prec_h=0.64655	rec_h=0.62623
[100/100]
f1_h=0.65119	prec_h=0.65318	rec_h=0.64921


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
h_f1-score,▁▁▃▄▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇█████▇████▇█████▇████
h_precision,▁█▇▇▇▇█████▇██▇████▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇
h_recall,█▁▂▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▅▆▅▆▅▆▅▅▆▆▆▅▆▅▆▅▆▆
train_loss,█▇▇▇▆▅▅▄▄▄▄▄▄▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
val_loss,▇▅█▇▇▆▄▃▂▂▁▂▂▂▂▄▂▂▃▂▃▃▁▄▂▃▁▂▂▂▂▂▂▁▂▄▃▁▂▃

0,1
epoch,99.0
h_f1-score,0.65119
h_precision,0.65318
h_recall,0.64921
train_loss,845.89249
val_loss,1701.55682


[34m[1mwandb[0m: Agent Starting Run: ge3y1c7j with config:
[34m[1mwandb[0m: 	alpha: 0.8104726890924978
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta: 0.9962148653392854
[34m[1mwandb[0m: 	beta1: 0.9464458722543008
[34m[1mwandb[0m: 	learning_rate: 1.6605121085307106e-05
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	threshold: 0.7736966038651765


[50/100]
f1_h=0.62527	prec_h=0.68253	rec_h=0.57688
[100/100]
f1_h=0.63892	prec_h=0.65325	rec_h=0.62521


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
h_f1-score,▁▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇████████████████████
h_precision,▁██████▇▇▇▇▇███████████████████████████▇
h_recall,█▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▃▄▃▄▄▄▄▄▄▄▄▄▄▄▄
train_loss,█▆▇▇▆▆▆▆▆▅▅▅▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_loss,█▂▂▂▃▃▃▃▃▃▃▂▂▁▁▂▁▂▁▁▁▁▁▂▁▂▁▁▁▁▂▁▁▁▁▁▁▂▁▁

0,1
epoch,99.0
h_f1-score,0.63892
h_precision,0.65325
h_recall,0.62521
train_loss,1565.28675
val_loss,1680.8139


### Evaluation

#### OpenAI Large + CLIP + NER

In [18]:
model = MultiModalNER(512, 3072, 768)
model.load_state_dict(torch.load(f"./models/subtask2a/MultiModal-OpenAI-Large-NER/royal-sweep-3.pth"))

<All keys matched successfully>

In [22]:
ar_pred_file_path = './Predictions/subtask2a/ar_predictions_subtask2a.txt'
ar_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_ar.json'
evaluator_script = './scorer-baseline/subtask_1_2a.py'

ar_test_data = process_json(ar_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
ar_test_dataset = TestDataSet(df=ar_test_data, 
                          text_features_file='./TextFeatures/subtask2a/text-embedding-3-large/ar_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/ar_test_images_features.pkl', 
                        ner_features_file='./TextFeatures/subtask2a/multilingual-ner/ar_test_text_features.pkl')

ar_test_dataloader =  DataLoader(ar_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, ar_test_dataloader, ar_pred_file_path, ar_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, format=5, validation=False, threshold=0.3)

f1_h=0.54513	prec_h=0.58333	rec_h=0.51163


In [23]:
bg_pred_file_path = './Predictions/subtask2a/bg_predictions_subtask2a.txt'
bg_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_bg.json'

bg_test_data = process_json(bg_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
bg_test_dataset = DataSet(bg_test_data, indexed_persuasion_techniques_2a, 
                           text_features_file='./TextFeatures/subtask2a/text-embedding-3-large/bg_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/bulgarian_test_images_features.pkl',
                          ner_features_file='./TextFeatures/subtask2a/multilingual-ner/bg_test_text_features.pkl')

bg_test_dataloader = DataLoader(bg_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, bg_test_dataloader, bg_pred_file_path, bg_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

f1_h=0.63279	prec_h=0.66017	rec_h=0.60759


In [24]:

md_pred_file_path = './Predictions/subtask2a/md_predictions_subtask2a.txt'
md_gold_file_path = 'test_labels_ar_bg_md_version2/test_subtask2a_md.json'

md_test_data = process_json(md_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
md_test_dataset = DataSet(md_test_data, indexed_persuasion_techniques_2a, 
                          text_features_file='./TextFeatures/subtask2a/text-embedding-3-large/md_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/nm_test_images_features.pkl',
                          ner_features_file='./TextFeatures/subtask2a/multilingual-ner/md_test_text_features.pkl')

md_test_dataloader = DataLoader(md_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, md_test_dataloader, md_pred_file_path, md_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

f1_h=0.67973	prec_h=0.75972	rec_h=0.61499


In [25]:
en_pred_file_path = './Predictions/subtask2a/en_predictions_subtask2a.txt'

eng_test_data = process_test_json('./test_data/english/en_subtask2a_test_unlabeled.json')

eng_test_dataset = TestDataSet(eng_test_data, 
                               './TextFeatures/subtask2a/text-embedding-3-large/en_test_text_features.pkl', 
                               './ImageFeatures/CLIP-ViT/english_test_images_features.pkl',
                               './TextFeatures/subtask2a/multilingual-ner/en_test_text_features.pkl')

eng_test_dataloader = DataLoader(eng_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, eng_test_dataloader, en_pred_file_path, None, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

0.68315	0.70546	0.66221

#### OpenAI Small + CLIP + NER

In [34]:
model = MultiModalNER(512, 1536, 768)
model.load_state_dict(torch.load(f"./models/subtask2a/MultiModal-OpenAI-Small-NER/balmy-sweep-3.pth"))

<All keys matched successfully>

In [37]:
ar_pred_file_path = './Predictions/subtask2a/ar_predictions_subtask2a.txt'
ar_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_ar.json'
evaluator_script = './scorer-baseline/subtask_1_2a.py'

ar_test_data = process_json(ar_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
ar_test_dataset = TestDataSet(df=ar_test_data, 
                          text_features_file='./TextFeatures/subtask2a/text-embedding-3-small/ar_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/ar_test_images_features.pkl', 
                        ner_features_file='./TextFeatures/subtask2a/multilingual-ner/ar_test_text_features.pkl')

ar_test_dataloader =  DataLoader(ar_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, ar_test_dataloader, ar_pred_file_path, ar_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, format=5, validation=False, threshold=0.3)

f1_h=0.52085	prec_h=0.53403	rec_h=0.50831


In [38]:
bg_pred_file_path = './Predictions/subtask2a/bg_predictions_subtask2a.txt'
bg_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask2a_bg.json'

bg_test_data = process_json(bg_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
bg_test_dataset = DataSet(bg_test_data, indexed_persuasion_techniques_2a, 
                           text_features_file='./TextFeatures/subtask2a/text-embedding-3-small/bg_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/bulgarian_test_images_features.pkl',
                          ner_features_file='./TextFeatures/subtask2a/multilingual-ner/bg_test_text_features.pkl')

bg_test_dataloader = DataLoader(bg_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, bg_test_dataloader, bg_pred_file_path, bg_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

f1_h=0.62882	prec_h=0.67538	rec_h=0.58828


In [39]:

md_pred_file_path = './Predictions/subtask2a/md_predictions_subtask2a.txt'
md_gold_file_path = 'test_labels_ar_bg_md_version2/test_subtask2a_md.json'

md_test_data = process_json(md_gold_file_path, techniques_to_level_2a, hierarchy_subtask_2a)
md_test_dataset = DataSet(md_test_data, indexed_persuasion_techniques_2a, 
                          text_features_file='./TextFeatures/subtask2a/text-embedding-3-small/md_test_text_features.pkl',
                          image_features_file='./ImageFeatures/CLIP-ViT/nm_test_images_features.pkl',
                          ner_features_file='./TextFeatures/subtask2a/multilingual-ner/md_test_text_features.pkl')

md_test_dataloader = DataLoader(md_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, md_test_dataloader, md_pred_file_path, md_gold_file_path, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

f1_h=0.66341	prec_h=0.76865	rec_h=0.58352


In [40]:
en_pred_file_path = './Predictions/subtask2a/en_predictions_subtask2a.txt'

eng_test_data = process_test_json('./test_data/english/en_subtask2a_test_unlabeled.json')

eng_test_dataset = TestDataSet(eng_test_data, 
                               './TextFeatures/subtask2a/text-embedding-3-small/en_test_text_features.pkl', 
                               './ImageFeatures/CLIP-ViT/english_test_images_features.pkl',
                               './TextFeatures/subtask2a/multilingual-ner/en_test_text_features.pkl')

eng_test_dataloader = DataLoader(eng_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, eng_test_dataloader, en_pred_file_path, None, evaluator_script, 
               id2leaf_label_subtask_2a, threshold=0.3)

0.67304	0.70024	0.64787