In [1]:
from collections import deque

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
import nltk
from torch.utils.data import Dataset
import pickle

from utils.utils import *
from utils.label_decoding import *
from utils.HierarchicalLoss import *

# SubTask 1

In [2]:
class DataSet(Dataset):
    def __init__(self, df, labels_at_level, features_file):
        super(DataSet, self).__init__()
        self.data_df = df
        self.labels_at_level = labels_at_level
        self.features_file = features_file
        self.features_dict = None
        with open(features_file, 'rb') as f:
            self.features_dict = pickle.load(f)
    
    def __len__(self):
        return len(self.data_df)
    
    def __getitem__(self, idx):
        id = self.data_df.iloc[idx]['id']
        text = self.data_df.iloc[idx]['cleaned_text']
        level_1_target = self.encode(self.data_df.iloc[idx]['Level 1'], 1)
        level_2_target = self.encode(self.data_df.iloc[idx]['Level 2'], 2)
        level_3_target = self.encode(self.data_df.iloc[idx]['Level 3'], 3)
        level_4_target = self.encode(self.data_df.iloc[idx]['Level 4'], 4)
        level_5_target = self.encode(self.data_df.iloc[idx]['Level 5'], 5)
            
        
        return {'id': id,
                'text': text, 
                'text_features': self.features_dict[id],
                'level_1_target': level_1_target, 
                'level_2_target': level_2_target, 
                'level_3_target': level_3_target, 
                'level_4_target': level_4_target, 
                'level_5_target': level_5_target }

    def encode(self, labels, level):
        level_ = f'Level {level}'
        
        target = torch.zeros(len(self.labels_at_level[level_])+1)
        
        for label in labels:
            label_idx = self.labels_at_level[level_][label]
            target[label_idx] = 1
        
        if len(labels) == 0:
            target[-1] = 1
        
        return target

In [3]:
class TestDataSet(Dataset):
    def __init__(self, df, features_file):
        super(TestDataSet, self).__init__()
        self.data_df = df
        self.features_file = features_file
        self.features_dict = None
        with open(features_file, 'rb') as f:
            self.features_dict = pickle.load(f)
    
    def __len__(self):
        return len(self.data_df)
    
    def __getitem__(self, idx):
        id = self.data_df.iloc[idx]['id']
        text = self.data_df.iloc[idx]['cleaned_text']
        
        return {'id': id,
                'text': text, 
                'text_features': self.features_dict[id] }

In [4]:
def evaluate_model(model, dataloader, pred_file_path, gold_file_path, 
                   evaluator_script_path, id2leaf_label, format=None,validation=False, threshold=0.3):
    model.eval()
    predictions = []
    
    HL = HierarchicalLoss(id2label=id2label_1, hierarchical_labels=hierarchy_1, persuasion_techniques=persuasion_techniques_1, device=device)
    total_loss = 0
    
    
    with torch.no_grad():
        
        for batch in tqdm(dataloader):
            if not isinstance(batch['id'], list):
                ids = batch['id'].detach().numpy().tolist()
            else:
                ids = batch['id']
        
            embeddings = batch['text_features']
            embeddings = embeddings.to(device)
            pred_1, pred_2, pred_3, pred_4, pred_5 = model(embeddings)
            
            if validation:
                y_1, y_2, y_3 = batch['level_1_target'], batch['level_2_target'], batch['level_3_target']
                y_4, y_5 = batch['level_4_target'], batch['level_5_target']
                
                y_1, y_2, y_3, y_4, y_5 = y_1.to(device), y_2.to(device), y_3.to(device), y_4.to(device), y_5.to(device)
                
                dloss = HL.calculate_dloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
                lloss = HL.calculate_lloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
                
                total_loss += (dloss + lloss).detach().cpu().item()
                
            pred_3 = (pred_3.cpu().detach().numpy() > threshold).astype(int)
            pred_4 = (pred_4.cpu().detach().numpy() > threshold).astype(int)
            pred_5 = (pred_5.cpu().detach().numpy() > threshold).astype(int)
            
            predictions += get_labels(id2leaf_label, ids, pred_3, pred_4, pred_5, format)

        # Writing JSON data
        with open(pred_file_path, 'w') as f:
            json.dump(predictions, f, indent=4)
        
        if gold_file_path is None:
            return
            
        command = [
                "python3", evaluator_script_path,
                "--gold_file_path", gold_file_path,
                "--pred_file_path", pred_file_path
        ]
        
        result = subprocess.run(command, capture_output=True, text=True)
        
        if result.returncode == 0:
            print("Output:\n", result.stdout)
        else:
            print("Error:\n", result.stderr)
            
        if validation:
            return total_loss / len(dataloader)

In [5]:
from torch.utils.data import DataLoader

train_data = process_json('./semeval2024_dev_release/subtask1/train.json', techniques_to_level_1, hierarchy_1)
# val_data = 
validation_data = process_json('./semeval2024_dev_release/subtask1/validation.json', techniques_to_level_1, hierarchy_1)

training_dataset = DataSet(train_data, indexed_persuasion_techniques_1, 
                           './TextFeatures/subtask1a/text-embedding-3-large/train_text_features.pkl')
validation_dataset = DataSet(validation_data, indexed_persuasion_techniques_1, 
                             './TextFeatures/subtask1a/text-embedding-3-large/validation_text_features.pkl')

batch_size = 256

train_dataloader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)


In [6]:
alpha = 0.7764469620072395
batch_size = 256
beta = 0.95
beta1 = 0.9094170903394552
learning_rate = 3.906930058023181e-05
threshold = 0.8256232754296409

In [7]:
num_epochs = 100
device = get_device()
device = torch.device("cpu")

HL = HierarchicalLoss(id2label=id2label_1, hierarchical_labels=hierarchy_1,
                      persuasion_techniques=persuasion_techniques_1,
                      device=device, alpha=alpha, beta=beta, threshold=threshold)



Using MPS


### Model

In [8]:
from modules.nn.OpenAiLarge import OpenAiLarge

model = OpenAiLarge()
model.to(device)

OpenAiLarge(
  (linear_level1): Sequential(
    (0): Linear(in_features=3072, out_features=1024, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.15, inplace=False)
    (3): Linear(in_features=1024, out_features=512, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.15, inplace=False)
    (6): Linear(in_features=512, out_features=128, bias=True)
    (7): ReLU()
  )
  (linear_level2): Sequential(
    (0): Linear(in_features=3072, out_features=1024, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.15, inplace=False)
    (3): Linear(in_features=1024, out_features=512, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.15, inplace=False)
    (6): Linear(in_features=512, out_features=128, bias=True)
    (7): ReLU()
  )
  (linear_level3): Sequential(
    (0): Linear(in_features=3072, out_features=1024, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.15, inplace=False)
    (3): Linear(in_features=1024, out_features=512, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.15, inplace=False)
    (6): Line

In [9]:
from tqdm import tqdm
import json
import subprocess

optimizer = torch.optim.Adam(model.parameters(), lr=0.0004306099142228309, betas=(0.8923286832300139, 0.999))
min_val_loss = float('inf')
best_epoch = None

train_loss_history = []
val_loss_history = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, batch in enumerate(train_dataloader):
        # input_ids, masks, type_ids  = batch['input_ids'], batch['attention_mask'], batch['token_type_ids']
        # encoded_inputs = batch['encoded_input']
        # input_ids, masks = encoded_inputs['input_ids'], encoded_inputs['attention_mask'], 
        # type_ids = encoded_inputs['token_type_ids']
        y_1, y_2, y_3 = batch['level_1_target'], batch['level_2_target'], batch['level_3_target']
        y_4, y_5 = batch['level_4_target'], batch['level_5_target']
        
        # input_ids = input_ids.squeeze().to(device)
        # masks = masks.squeeze().to(device)
        # type_ids = type_ids.squeeze().to(device)
        # encoded_inputs = encoded_inputs.to(device)
        
        # print(type(batch['text_features']))
        
        embeddings = batch['text_features']
        embeddings = embeddings.to(device)
        y_1, y_2, y_3, y_4, y_5 = y_1.to(device), y_2.to(device), y_3.to(device), y_4.to(device), y_5.to(device)
        
        
        optimizer.zero_grad()
        pred_1, pred_2, pred_3, pred_4, pred_5 = model(embeddings)
        # loss_ = loss(pred_1, y_1) + loss(pred_2, y_2) + loss(pred_3, y_3) + loss(pred_4, y_4) + loss(pred_5, y_5)
        
        dloss = HL.calculate_dloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
        lloss = HL.calculate_lloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])

        total_loss = lloss + dloss
        # loss_.backward()
        
        total_loss.backward()
        optimizer.step()
        
        running_loss += total_loss.detach().item()
        
        if batch_idx % 20 == 19:
            print(f"[{epoch + 1}, {batch_idx + 1}] loss: {running_loss / 20:.3f}")
            running_loss = 0.0
    
    
    
    running_loss /= len(train_dataloader)
    
    val_pred_file_path = './Predictions/val_predictions_subtask1.json'
    val_gold_file_path = './semeval2024_dev_release/subtask1/validation.json'
    evaluator_script = './scorer-baseline/subtask_1_2a.py'
    
    validation_loss = evaluate_model(model, validation_dataloader, val_pred_file_path, 
                                     val_gold_file_path, evaluator_script,id2leaf_label,
                                     validation=True)
    
    train_loss_history.append(running_loss)
    val_loss_history.append(validation_loss)
    
    if validation_loss < min_val_loss:
        min_val_loss = validation_loss
        best_epoch = epoch
        torch.save(model.state_dict(), './models/best_subtask1_baseline.pth')

print(f'best validation loss occurred in epoch {best_epoch} ')
        

[1, 20] loss: 2515.505


KeyboardInterrupt: 

In [107]:
torch.save(model.state_dict(), './models/subtask1a/text-embedding-3-small/openai_small-3.pth')

In [12]:
model.load_state_dict(torch.load('./models/best_subtask1_baseline.pth'))

<All keys matched successfully>

In [120]:
model = OpenAiLarge()
model.to(device)

OpenAiLarge(
  (linear_level1): Sequential(
    (0): Linear(in_features=3072, out_features=1024, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.15, inplace=False)
    (3): Linear(in_features=1024, out_features=512, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.15, inplace=False)
    (6): Linear(in_features=512, out_features=128, bias=True)
    (7): ReLU()
  )
  (linear_level2): Sequential(
    (0): Linear(in_features=3072, out_features=1024, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.15, inplace=False)
    (3): Linear(in_features=1024, out_features=512, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.15, inplace=False)
    (6): Linear(in_features=512, out_features=128, bias=True)
    (7): ReLU()
  )
  (linear_level3): Sequential(
    (0): Linear(in_features=3072, out_features=1024, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.15, inplace=False)
    (3): Linear(in_features=1024, out_features=512, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.15, inplace=False)
    (6): Line

In [10]:
model.load_state_dict(torch.load('./models/subtask1a/openAI-Large/giddy-sweep-11.pth', map_location=device))

<All keys matched successfully>

### Evaluation

#### Bulgarian

In [11]:
from tqdm import tqdm
import json
import subprocess

bulgarian_pred_file_path = './Predictions/bulgarian_predictions_subtask1.txt'
bulgarian_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask1_bg.json'
evaluator_script = './scorer-baseline/subtask_1_2a.py'

bg_test_data = process_test_json(bulgarian_gold_file_path)


bg_test_dataset = TestDataSet(bg_test_data, './TextFeatures/subtask1a/text-embedding-3-large/bg_test_text_features.pkl')
bg_test_dataloader = DataLoader(bg_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, bg_test_dataloader, bulgarian_pred_file_path, bulgarian_gold_file_path,
               evaluator_script, id2leaf_label, validation=False, threshold=0.3)

100%|██████████| 7/7 [00:00<00:00, 115.33it/s]


Output:
 f1_h=0.41630	prec_h=0.46651	rec_h=0.37584


In [12]:
evaluate_model(model, train_dataloader, 'train_pred.json', 'semeval2024_dev_release/subtask1/train.json',
               evaluator_script, id2leaf_label, validation=False, threshold=0.9)

100%|██████████| 28/28 [00:01<00:00, 16.80it/s]


Output:
 f1_h=0.93627	prec_h=0.99888	rec_h=0.88105


In [13]:
evaluate_model(model, validation_dataloader, 'valid_pred.json', 'semeval2024_dev_release/subtask1/validation.json',
               evaluator_script, id2leaf_label, validation=False, threshold=0.3)

100%|██████████| 2/2 [00:00<00:00, 15.87it/s]


Output:
 f1_h=0.57012	prec_h=0.62277	rec_h=0.52569


#### North Macedonian

In [14]:
macedonian_pred_file_path = './Predictions/macedonian_predictions_subtask1.txt'
macedonian_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask1_md.json'

md_test_data = process_test_json(macedonian_gold_file_path)

md_test_dataset = TestDataSet(md_test_data, './TextFeatures/subtask1a/text-embedding-3-large/md_test_text_features.pkl')
md_test_dataloader = DataLoader(md_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, md_test_dataloader, macedonian_pred_file_path, macedonian_gold_file_path,
               evaluator_script, id2leaf_label, validation=False, threshold=0.3)

100%|██████████| 5/5 [00:00<00:00, 116.65it/s]


Output:
 f1_h=0.36864	prec_h=0.57254	rec_h=0.27183


#### Arabian

In [15]:
arabian_pred_file_path = './Predictions/arabian_predictions_subtask1.txt'
arabian_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask1_ar.json'

ar_test_data = process_test_json(arabian_gold_file_path)

ar_test_dataset = TestDataSet(ar_test_data, './TextFeatures/subtask1a/text-embedding-3-large/ar_test_text_features.pkl')
ar_test_dataloader = DataLoader(ar_test_dataset, batch_size=128, shuffle=True)

evaluate_model(model, ar_test_dataloader, arabian_pred_file_path, arabian_gold_file_path, evaluator_script, 
               id2leaf_label, format=5, validation=False, threshold=0.3)

100%|██████████| 1/1 [00:00<00:00, 59.19it/s]


Output:
 f1_h=0.35110	prec_h=0.31728	rec_h=0.39298


#### English

In [16]:
en_pred_file_path = './Predictions/en_predictions_subtask1.txt'

en_test_data = process_test_json('./test_data/english/en_subtask1_test_unlabeled.json')

en_test_dataset = TestDataSet(en_test_data, './TextFeatures/subtask1a/text-embedding-3-large/en_test_text_features.pkl')
en_test_dataloader = DataLoader(en_test_dataset, batch_size=16, shuffle=True)

evaluate_model(model, en_test_dataloader, en_pred_file_path, None, evaluator_script, id2leaf_label, validation=False)

100%|██████████| 94/94 [00:01<00:00, 83.32it/s] 


In [17]:
data_iterator = iter(en_test_dataloader)

model.eval()
with torch.no_grad():
    batch = next(data_iterator)
    pred_1, pred_2, pred_3, pred_4, pred_5 = model(batch['text_features'])

t = 0.7
    
pred_3 = (pred_3.cpu().detach().numpy() > t).astype(int)
pred_4 = (pred_4.cpu().detach().numpy() > t).astype(int)
pred_5 = (pred_5.cpu().detach().numpy() > t).astype(int)
            
get_labels(id2leaf_label, batch['id'].tolist(), pred_3, pred_4, pred_5, format=None)

[{'id': '74963',
  'labels': ['Black-and-white Fallacy/Dictatorship', 'Appeal to authority']},
 {'id': '64968', 'labels': ['Loaded Language']},
 {'id': '68517', 'labels': ['Name calling/Labeling']},
 {'id': '68199', 'labels': []},
 {'id': '77926', 'labels': []},
 {'id': '77339', 'labels': ['Causal Oversimplification']},
 {'id': '67938', 'labels': ['Black-and-white Fallacy/Dictatorship']},
 {'id': '75036', 'labels': ['Smears', 'Thought-terminating cliché']},
 {'id': '65794', 'labels': []},
 {'id': '76075',
  'labels': ['Smears', 'Appeal to authority', 'Name calling/Labeling']},
 {'id': '65649', 'labels': ['Smears']},
 {'id': '76963', 'labels': ['Causal Oversimplification']},
 {'id': '68530', 'labels': ['Doubt']},
 {'id': '65374',
  'labels': ['Name calling/Labeling', 'Smears', 'Loaded Language']},
 {'id': '64551', 'labels': []},
 {'id': '79789', 'labels': ['Smears']}]