In [1]:
from collections import deque

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
import nltk
from torch.utils.data import Dataset
import pickle

from utils.utils import *
from utils.label_decoding import *
from utils.HierarchicalLoss import *

In [2]:
class DataSet(Dataset):
    def __init__(self, df, labels_at_level, features_file, ner_features_file):
        super(DataSet, self).__init__()
        self.data_df = df
        self.labels_at_level = labels_at_level
        self.features_file = features_file
        self.features_dict = None
        with open(features_file, 'rb') as f:
            self.features_dict = pickle.load(f)
            
        with open(ner_features_file, 'rb') as f:
            self.ner_features_dict = pickle.load(f)
    
    def __len__(self):
        return len(self.data_df)
    
    def __getitem__(self, idx):
        id = self.data_df.iloc[idx]['id']
        text = self.data_df.iloc[idx]['cleaned_text']
        level_1_target = self.encode(self.data_df.iloc[idx]['Level 1'], 1)
        level_2_target = self.encode(self.data_df.iloc[idx]['Level 2'], 2)
        level_3_target = self.encode(self.data_df.iloc[idx]['Level 3'], 3)
        level_4_target = self.encode(self.data_df.iloc[idx]['Level 4'], 4)
        level_5_target = self.encode(self.data_df.iloc[idx]['Level 5'], 5)
            
        
        return {'id': id,
                'text': text, 
                'text_features': self.features_dict[id],
                'ner_features': self.ner_features_dict[id],
                'level_1_target': level_1_target, 
                'level_2_target': level_2_target, 
                'level_3_target': level_3_target, 
                'level_4_target': level_4_target, 
                'level_5_target': level_5_target }

    def encode(self, labels, level):
        level_ = f'Level {level}'
        
        target = torch.zeros(len(self.labels_at_level[level_])+1)
        
        for label in labels:
            label_idx = self.labels_at_level[level_][label]
            target[label_idx] = 1
        
        if len(labels) == 0:
            target[-1] = 1
        
        return target

In [3]:
class TestDataSet(Dataset):
    def __init__(self, df, features_file, ner_features_file):
        super(TestDataSet, self).__init__()
        self.data_df = df
        self.features_file = features_file
        self.features_dict = None
        with open(features_file, 'rb') as f:
            self.features_dict = pickle.load(f)
            
        with open(ner_features_file, 'rb') as f:
            self.ner_features_dict = pickle.load(f)
    
    def __len__(self):
        return len(self.data_df)
    
    def __getitem__(self, idx):
        id = self.data_df.iloc[idx]['id']
        text = self.data_df.iloc[idx]['cleaned_text']
        
        return {'id': id,
                'text': text, 
                'text_features': self.features_dict[id],
                'ner_features': self.ner_features_dict[id]}

In [4]:
device = torch.device('cpu')

In [11]:
def evaluate_model(model, dataloader, pred_file_path, gold_file_path, 
                   evaluator_script_path, id2leaf_label, format=None,validation=False, threshold=0.3):
    model.eval()
    predictions = []
    
    HL = HierarchicalLoss(id2label=id2label_1, hierarchical_labels=hierarchy_1, persuasion_techniques=persuasion_techniques_1, device=device)
    total_loss = 0
    
    
    with torch.no_grad():
        
        for batch in tqdm(dataloader):
            if not isinstance(batch['id'], list):
                ids = batch['id'].detach().numpy().tolist()
            else:
                ids = batch['id']
        
            embeddings = batch['text_features']
            embeddings = embeddings.to(device)
            
            ner_embeddings = batch['ner_features'].to(device)
    
            pred_1, pred_2, pred_3, pred_4, pred_5 = model(embeddings, ner_embeddings)
            
            if validation:
                y_1, y_2, y_3 = batch['level_1_target'], batch['level_2_target'], batch['level_3_target']
                y_4, y_5 = batch['level_4_target'], batch['level_5_target']
                
                y_1, y_2, y_3, y_4, y_5 = y_1.to(device), y_2.to(device), y_3.to(device), y_4.to(device), y_5.to(device)
                
                dloss = HL.calculate_dloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
                lloss = HL.calculate_lloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
                
                total_loss += (dloss + lloss).detach().cpu().item()
                
            pred_3 = (pred_3.cpu().detach().numpy() > threshold).astype(int)
            pred_4 = (pred_4.cpu().detach().numpy() > threshold).astype(int)
            pred_5 = (pred_5.cpu().detach().numpy() > threshold).astype(int)
            
            predictions += get_labels(id2leaf_label, ids, pred_3, pred_4, pred_5, format)

        # Writing JSON data
        with open(pred_file_path, 'w') as f:
            json.dump(predictions, f, indent=4)
        
        if gold_file_path is None:
            return
            
        command = [
                "python3", evaluator_script_path,
                "--gold_file_path", gold_file_path,
                "--pred_file_path", pred_file_path
        ]
        
        result = subprocess.run(command, capture_output=True, text=True)
        
        if result.returncode == 0:
            print("Output:\n", result.stdout)
        else:
            print("Error:\n", result.stderr)
            
        if validation:
            return total_loss / len(dataloader)

In [22]:
from modules.nn.OpenAiLarge import OpenAiLargeNER

model = OpenAiLargeNER()
model.load_state_dict(torch.load("./models/subtask1a/openAI-Large-NER/misunderstood-sweep-2.pth"))

<All keys matched successfully>

In [23]:
from tqdm import tqdm
import json
import subprocess
from torch.utils.data import DataLoader


bulgarian_pred_file_path = './Predictions/bulgarian_predictions_subtask1.txt'
bulgarian_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask1_bg.json'
evaluator_script = './scorer-baseline/subtask_1_2a.py'

bg_test_data = process_test_json(bulgarian_gold_file_path)


bg_test_dataset = TestDataSet(bg_test_data, './TextFeatures/subtask1a/text-embedding-3-large/bg_test_text_features.pkl',
                              './TextFeatures/subtask1a/BERT-NER/bg_test_text_features.pkl')
bg_test_dataloader = DataLoader(bg_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, bg_test_dataloader, bulgarian_pred_file_path, bulgarian_gold_file_path,
               evaluator_script, id2leaf_label, validation=False, threshold=0.1)

100%|██████████| 7/7 [00:00<00:00, 95.67it/s]


Output:
 f1_h=0.33730	prec_h=0.42790	rec_h=0.27836


In [24]:
macedonian_pred_file_path = './Predictions/macedonian_predictions_subtask1.txt'
macedonian_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask1_md.json'

md_test_data = process_test_json(macedonian_gold_file_path)

md_test_dataset = TestDataSet(md_test_data, './TextFeatures/subtask1a/text-embedding-3-large/md_test_text_features.pkl',
                              './TextFeatures/subtask1a/BERT-NER/md_test_text_features.pkl')
md_test_dataloader = DataLoader(md_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, md_test_dataloader, macedonian_pred_file_path, macedonian_gold_file_path,
               evaluator_script, id2leaf_label, validation=False, threshold=0.1)

100%|██████████| 5/5 [00:00<00:00, 91.50it/s]


Output:
 f1_h=0.29545	prec_h=0.51057	rec_h=0.20787


In [25]:
macedonian_pred_file_path = './Predictions/macedonian_predictions_subtask1.txt'
macedonian_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask1_md.json'

md_test_data = process_test_json(macedonian_gold_file_path)

md_test_dataset = TestDataSet(md_test_data, './TextFeatures/subtask1a/text-embedding-3-large/md_test_text_features.pkl',
                              './TextFeatures/subtask1a/BERT-NER/md_test_text_features.pkl')
md_test_dataloader = DataLoader(md_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, md_test_dataloader, macedonian_pred_file_path, macedonian_gold_file_path,
               evaluator_script, id2leaf_label, validation=False, threshold=0.3)

100%|██████████| 5/5 [00:00<00:00, 93.33it/s]


Output:
 f1_h=0.24719	prec_h=0.51765	rec_h=0.16236


In [26]:
arabian_pred_file_path = './Predictions/arabian_predictions_subtask1.txt'
arabian_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask1_ar.json'

ar_test_data = process_test_json(arabian_gold_file_path)

ar_test_dataset = TestDataSet(ar_test_data, './TextFeatures/subtask1a/text-embedding-3-large/ar_test_text_features.pkl',
                              './TextFeatures/subtask1a/BERT-NER/ar_test_text_features.pkl')
ar_test_dataloader = DataLoader(ar_test_dataset, batch_size=128, shuffle=True)

evaluate_model(model, ar_test_dataloader, arabian_pred_file_path, arabian_gold_file_path, evaluator_script, 
               id2leaf_label, format=5, validation=False, threshold=0.3)

100%|██████████| 1/1 [00:00<00:00, 48.80it/s]


Output:
 f1_h=0.31763	prec_h=0.31597	rec_h=0.31930
