In [1]:
import json
import itertools
import os
from tokenizers import Encoding
from typing import List
from transformers import AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification, TrainingArguments, Trainer, EarlyStoppingCallback
import torch
from collections import defaultdict
from datasets import Dataset
import pandas as pd
import numpy as np
import evaluate
from sklearn.metrics import f1_score
from collections import Counter
from seqeval.metrics import classification_report
import re
from datetime import datetime
import unicodedata

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def align_tokens_and_annotations_bio(tokenized: Encoding, annotations):
    tokens = tokenized.tokens
    aligned_labels = ["O"] * len(
        tokens
    )  # Make a list to store our labels the same length as our tokens
    for anno in annotations:
        annotation_token_ix_set = (
            set()
        )  # A set that stores the token indices of the annotation
        for char_ix in range(anno["start"], anno["end"]):
            print('char_ix = ', char_ix)
            token_ix = tokenized.char_to_token(char_ix)
            if token_ix is not None:
                annotation_token_ix_set.add(token_ix)
        if len(annotation_token_ix_set) == 1:
            # If there is only one token
            token_ix = annotation_token_ix_set.pop()
            prefix = (
                "B"  # This annotation spans one token so is prefixed with U for unique
            )
            aligned_labels[token_ix] = f"{prefix}-{anno['tag']}"

        else:

            last_token_in_anno_ix = len(annotation_token_ix_set) - 1
            for num, token_ix in enumerate(sorted(annotation_token_ix_set)):
                if num == 0:
                    prefix = "B"
                elif num == last_token_in_anno_ix:
                    prefix = "I"  # Its the last token
                else:
                    prefix = "I"  # We're inside of a multi token annotation
                aligned_labels[token_ix] = f"{prefix}-{anno['tag']}"
    return aligned_labels

class LabelSet:
    def __init__(self, labels: List[str]):
        self.labels_to_id = {}
        self.ids_to_label = {}
        self.labels_to_id["O"] = 0
        self.ids_to_label[0] = "O"
        num = 0  # in case there are no labels
        # Writing BILU will give us incremental ids for the labels
        for _num, (label, s) in enumerate(itertools.product(labels, "BI")):
            num = _num + 1  # skip 0
            l = f"{s}-{label}"
            self.labels_to_id[l] = num
            self.ids_to_label[num] = l


    def get_aligned_label_ids_from_annotations(self, tokenized_text, annotations):
        raw_labels = align_tokens_and_annotations_bio(tokenized_text, annotations)
        return list(map(self.labels_to_id.get, raw_labels))


def tokenize_token_classification(examples, tokenizer):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True, padding='longest', return_tensors='pt')

    labels = []
    for i, label in enumerate(examples[f"ner_tags"]):
        word_ids = [tokenized_inputs.token_to_word(i, j) for j in range(len(tokenized_inputs['input_ids'][i]))]  # Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100.
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = torch.tensor(labels)
    return tokenized_inputs

def dict_of_lists(lst_of_dicts):
    result = defaultdict(list)
    for d in lst_of_dicts:
        for key, value in d.items():
            result[key].append(value)
    return dict(result)

def list_of_dicts(dict_of_lists):
    # First, we need to check if all lists are of the same length to ensure correct transformation
    if not all(len(lst) == len(next(iter(dict_of_lists.values()))) for lst in dict_of_lists.values()):
        raise ValueError("All lists in the dictionary must have the same length")

    # Get the length of the items in any of the lists
    length = len(next(iter(dict_of_lists.values())))
    
    # Create a list of dictionaries, one for each index in the lists
    result = []
    for i in range(length):
        # Create a dictionary for the current index 'i' across all lists
        new_dict = {key: dict_of_lists[key][i] for key in dict_of_lists}
        result.append(new_dict)
    
    return result

def sub_shift_spans(text, ents = [], mappings = []):
    for mapping in mappings:
        adjustment = 0
        pattern = re.compile(mapping['pattern'])
        for match in re.finditer(pattern, text):
            match_index = match.start() + adjustment
            match_contents = match.group()
            if all(mapping['check'](char) for char in match_contents):
                subbed_text = mapping['target'].replace('placeholder', match_contents)
            else:
                subbed_text = mapping['target']
            len_diff = len(subbed_text) - len(match_contents)
            text = text[:match_index] + subbed_text + text[match_index + len(match_contents):]
            if ents:
                if isinstance(ents, list):
                    for ent in ents:
                        if ent['start'] <= match_index and ent['end'] > match_index:
                            ent['end'] += len_diff
                        if ent['start'] > match_index:
                            ent['start'] += len_diff
                            ent['end'] += len_diff
                elif isinstance(ents, dict):
                    if ents['value']['start'] <= match_index and ents['value']['end'] > match_index:
                        ents['value']['end'] += len_diff
                    if ents['value']['start'] > match_index:
                        ents['value']['start'] += len_diff
                        ents['value']['end'] += len_diff

            adjustment += len_diff

    return text, ents

def span_to_words_annotation(samples, target_tag = '', mappings = {}, labels_model = []):
    samples_new = []
    # if not any([l for l in samples['annotations']]):
        
    for i in range(len(samples['data'])):
        text, annotation_list = samples['data'][i]['text'], samples['annotations'][i][0]['result']
        labels_text = []
        tokens = []
        if not annotation_list:
            annotation_list = [[]]
        for j, annotation in enumerate(annotation_list):
            if isinstance(annotation, dict):
                if annotation['value']['labels'][0] != target_tag:
                    continue
            text_subshifted, ents = sub_shift_spans(text, annotation, mappings=mappings)
            text_subshifted_matches = re.finditer(r'[^\s]+', text_subshifted)
            labels_words = []
            first = True
            for regex_match in text_subshifted_matches:
                if j == 0:
                    tokens.append(regex_match.group())
                if isinstance(annotation, dict):
                    if regex_match.start() < ents['value']['start']:
                        labels_words.append(labels_model.labels_to_id['O'])
                    elif regex_match.start() >= ents['value']['start'] and regex_match.end() <= ents['value']['end']:
                        if first:
                            labels_words.append(labels_model.labels_to_id['B-' + ents['value']['labels'][0]])
                            first = False
                        elif not first:
                            labels_words.append(labels_model.labels_to_id['I-' + ents['value']['labels'][0]])
                    else:
                        labels_words.append(labels_model.labels_to_id['O'])
                    labels_text.append({'labels': labels_words, 'tag': annotation['value']['labels'][0]})
        allowed_labels = [labels_model.labels_to_id['O'],
                          labels_model.labels_to_id['B-' + target_tag],
                          labels_model.labels_to_id['I-' + target_tag],
                          ]
        # if the training sample has no tags that we need, we just produce a 0s list
        if target_tag not in [labels['tag'] for labels in labels_text]:
            labels = [0] * len(tokens)
            tag = 'no_tag'
        # if the training sample has tags we need, we first exclude the label lists whose tags don't match
        # and then we merge the label lists that have tags that match the target tag
        else:
            labels = [max(values) for values in zip(*[labels['labels'] for labels in labels_text if labels['tag'] == target_tag])]
            labels = [(label if label in allowed_labels else 0) for label in labels]
            tag = target_tag
        samples_new.append({
            'id': i,
            'ner_tags': labels,
            'tokens': tokens,
            'tag': tag,
        })
    return samples_new

regex_tokenizer_mappings = [
    {'pattern': r'(?<!\s)([^\w\s])|([^\w\s])(?!\s)',
    'target': ' placeholder ',
    'check': lambda x: unicodedata.category(x).startswith('P'),
    },
    {'pattern': r'\s+',
     'target': ' ',
     'check': lambda x: False if re.match('\s+', x) is None else True,
     },
    ]

def compute_metrics_wrapper(label_list, pt, model_name_simple, date_time):
    def compute_metrics(eval_preds):
        nonlocal label_list
        nonlocal pt
        logits, labels = eval_preds
        predictions = np.argmax(logits, axis=2)

        # Extract the true predictions and labels from the sequences
        true_predictions = [
            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
            for prediction, label in zip(predictions, labels)
        ]
        true_labels = [
            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
            for prediction, label in zip(predictions, labels)
        ]

        # Compute sequence-level evaluation metrics
        results = classification_report(true_predictions, true_labels, output_dict=True)

        # Flatten the lists to calculate micro F1-score and supports
        flat_true_predictions = [item for sublist in true_predictions for item in sublist]
        flat_true_labels = [item for sublist in true_labels for item in sublist]

        # Calculate micro F1-score using sklearn
        micro_f1 = f1_score(flat_true_labels, flat_true_predictions, average='micro')

        # Prepare the results dictionary
        flat_results = {'micro_f1': float(micro_f1)}

        # Add detailed metrics for each label to the results dictionary
        for label, metrics in results.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    flat_results[f'{label}_{metric}'] = float(value)

        # Compute support for each label using Counter
        label_support = Counter(flat_true_labels)
        for label, count in label_support.items():
            flat_results[f'{label}_support'] = count
        
        models_dir = '/home/lgiordano/LUCA/checkthat_GITHUB/models/M2'
        model_save_name = f'{model_name_simple}_{tt[0]}_target={tt[1]}_SUBSAMPLED_{date_time}'
        model_save_dir = os.path.join(models_dir, date_time, model_save_name)
        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)

        with open(os.path.join(model_save_dir, 'results.json'), 'w', encoding='utf8') as f:
            json.dump(flat_results, f, ensure_ascii = False)

        return flat_results
    return compute_metrics

In [3]:
date_time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")

train_data_path = '/home/lgiordano/LUCA/checkthat_GITHUB/data/formatted/train_sentences.json'
with open(train_data_path, 'r', encoding='utf8') as f:
    dataset_raw = json.load(f)

df_raw = pd.DataFrame(dataset_raw)  

df_pos = df_raw[df_raw['annotations'].apply(lambda x: len(x[0]['result']) > 0)]
df_neg = df_raw[df_raw['annotations'].apply(lambda x: x[0]['result'] == [])].sample(len(df_pos))
df = pd.concat([df_pos,df_neg])

target_tags = ["Appeal_to_Authority", "Appeal_to_Popularity","Appeal_to_Values","Appeal_to_Fear-Prejudice","Flag_Waving","Causal_Oversimplification",
               "False_Dilemma-No_Choice","Consequential_Oversimplification","Straw_Man","Red_Herring","Whataboutism","Slogans","Appeal_to_Time",
               "Conversation_Killer","Loaded_Language","Repetition","Exaggeration-Minimisation","Obfuscation-Vagueness-Confusion","Name_Calling-Labeling",
               "Doubt","Guilt_by_Association","Appeal_to_Hypocrisy","Questioning_the_Reputation"]
target_tags = [(i, el.strip()) for i, el in enumerate(target_tags)]


In [10]:
shift = 0
for i, tt in enumerate(target_tags[12:]):
    if i < shift:
        continue
    print(f'Training model no. {i} of {len(target_tags)} for {tt[1]} persuasion technique...')
    labels_model = LabelSet(labels=[tt[1]])
    
    df_list = df.to_dict(orient='records')
    df_list_binary = span_to_words_annotation(dict_of_lists(df_list), target_tag=tt[1], mappings=regex_tokenizer_mappings, labels_model=labels_model)
    df_binary = pd.DataFrame(df_list_binary)
    df_binary_pos = df_binary[df_binary['tag'] == tt[1]]
    df_binary_neg = df_binary[df_binary['tag'] != tt[1]].sample(len(df_binary_pos))
    df_binary_subsampled = pd.concat([df_binary_pos, df_binary_neg])#.sample(1000)

    binary_dataset = Dataset.from_pandas(df_binary_subsampled[['id', 'ner_tags', 'tokens']])

    split_ratio = 0.2
    split_seed = 42
    datadict = binary_dataset.train_test_split(split_ratio, seed=split_seed)

    #model_name = 'bert-base-multilingual-cased'
    #model_name = 'xlm-roberta-base'
    model_name = 'microsoft/mdeberta-v3-base'
    model_name_simple = model_name.split('/')[-1]
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    batch_size = 16
    datadict = datadict.map(lambda x: tokenize_token_classification(x, tokenizer), batched=True, batch_size=None)

    columns = [
                'input_ids',
                'token_type_ids',
                'attention_mask',
                'labels'
                ]

    datadict.set_format('torch', columns = columns)

    train_data = datadict['train']
    val_data = datadict['test']

    data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, padding='longest')

    model = AutoModelForTokenClassification.from_pretrained(model_name,
                                                                num_labels=len(labels_model.ids_to_label.values()),
                                                                label2id=labels_model.labels_to_id,
                                                                id2label=labels_model.ids_to_label,
                                                                )
    
    training_args = TrainingArguments(output_dir='/home/lgiordano/LUCA/checkthat_GITHUB/models/M2/mdeberta-v3-base-NEW',
                                  save_total_limit=2,
                                  save_strategy='epoch',
                                  load_best_model_at_end=True,
                                  save_only_model=True,
                                  metric_for_best_model='eval_macro avg_f1-score',
                                  logging_strategy='epoch',
                                  evaluation_strategy='epoch',
                                  learning_rate=5e-5,
                                  optim='adamw_torch',
                                  num_train_epochs=10)
    
    early_stopping = EarlyStoppingCallback(early_stopping_patience=2)

    trainer = Trainer(model,
                      training_args,
                      train_dataset=train_data,
                      eval_dataset=val_data,
                      data_collator=data_collator,
                      tokenizer=tokenizer,
                      compute_metrics=compute_metrics_wrapper(
                          label_list=[i for i in labels_model.ids_to_label.values()],
                          pt=tt[1],
                          model_name_simple=model_name_simple,
                          date_time=date_time),
                      callbacks=[early_stopping])
    
    trainer.train()

    trainer.evaluate()

Training model no. 0 of 23 for Appeal_to_Time persuasion technique...


Map:   0%|          | 0/259 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 259/259 [00:00<00:00, 4554.48 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 4203.94 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Appeal To Time Precision,Appeal To Time Recall,Appeal To Time F1-score,Appeal To Time Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-appeal To Time Support,I-appeal To Time Support
1,0.7294,0.602196,0.729904,0.037037,0.022727,0.028169,44.0,0.037037,0.022727,0.028169,44.0,0.037037,0.022727,0.028169,44.0,0.037037,0.022727,0.028169,44.0,633,27,273
2,0.4372,0.655504,0.752412,0.148148,0.086957,0.109589,46.0,0.148148,0.086957,0.109589,46.0,0.148148,0.086957,0.109589,46.0,0.148148,0.086957,0.109589,46.0,633,27,273
3,0.2563,0.743638,0.743837,0.222222,0.136364,0.169014,44.0,0.222222,0.136364,0.169014,44.0,0.222222,0.136364,0.169014,44.0,0.222222,0.136364,0.169014,44.0,633,27,273
4,0.1447,0.92197,0.744909,0.296296,0.190476,0.231884,42.0,0.296296,0.190476,0.231884,42.0,0.296296,0.190476,0.231884,42.0,0.296296,0.190476,0.231884,42.0,633,27,273
5,0.0726,1.180071,0.744909,0.259259,0.166667,0.202899,42.0,0.259259,0.166667,0.202899,42.0,0.259259,0.166667,0.202899,42.0,0.259259,0.166667,0.202899,42.0,633,27,273
6,0.0496,1.071147,0.748124,0.185185,0.116279,0.142857,43.0,0.185185,0.116279,0.142857,43.0,0.185185,0.116279,0.142857,43.0,0.185185,0.116279,0.142857,43.0,633,27,273


Training model no. 1 of 23 for Conversation_Killer persuasion technique...


Map:   0%|          | 0/1553 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 1553/1553 [00:00<00:00, 4971.02 examples/s]
Map: 100%|██████████| 389/389 [00:00<00:00, 4250.02 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Conversation Killer Precision,Conversation Killer Recall,Conversation Killer F1-score,Conversation Killer Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,B-conversation Killer Support,I-conversation Killer Support,O Support
1,0.5354,0.480179,0.804437,0.340136,0.21645,0.26455,231.0,0.340136,0.21645,0.26455,231.0,0.340136,0.21645,0.26455,231.0,0.340136,0.21645,0.26455,231.0,147,1038,3458
2,0.3204,0.468791,0.832651,0.421769,0.266094,0.326316,233.0,0.421769,0.266094,0.326316,233.0,0.421769,0.266094,0.326316,233.0,0.421769,0.266094,0.326316,233.0,147,1038,3458
3,0.1739,0.643523,0.828128,0.414966,0.291866,0.342697,209.0,0.414966,0.291866,0.342697,209.0,0.414966,0.291866,0.342697,209.0,0.414966,0.291866,0.342697,209.0,147,1038,3458
4,0.1224,0.826185,0.839543,0.346939,0.289773,0.315789,176.0,0.346939,0.289773,0.315789,176.0,0.346939,0.289773,0.315789,176.0,0.346939,0.289773,0.315789,176.0,147,1038,3458
5,0.0673,0.915337,0.827698,0.462585,0.350515,0.398827,194.0,0.462585,0.350515,0.398827,194.0,0.462585,0.350515,0.398827,194.0,0.462585,0.350515,0.398827,194.0,147,1038,3458
6,0.0399,1.031569,0.846435,0.44898,0.366667,0.40367,180.0,0.44898,0.366667,0.40367,180.0,0.44898,0.366667,0.40367,180.0,0.44898,0.366667,0.40367,180.0,147,1038,3458
7,0.0269,1.057294,0.844282,0.462585,0.395349,0.426332,172.0,0.462585,0.395349,0.426332,172.0,0.462585,0.395349,0.426332,172.0,0.462585,0.395349,0.426332,172.0,147,1038,3458
8,0.0106,1.15594,0.843851,0.44898,0.358696,0.398792,184.0,0.44898,0.358696,0.398792,184.0,0.44898,0.358696,0.398792,184.0,0.44898,0.358696,0.398792,184.0,147,1038,3458
9,0.0065,1.172457,0.837174,0.47619,0.351759,0.404624,199.0,0.47619,0.351759,0.404624,199.0,0.47619,0.351759,0.404624,199.0,0.47619,0.351759,0.404624,199.0,147,1038,3458


Training model no. 2 of 23 for Loaded_Language persuasion technique...


Map:   0%|          | 0/12305 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 12305/12305 [00:03<00:00, 3396.58 examples/s]
Map: 100%|██████████| 3077/3077 [00:00<00:00, 4801.96 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Loaded Language Precision,Loaded Language Recall,Loaded Language F1-score,Loaded Language Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-loaded Language Support,I-loaded Language Support
1,0.3205,0.273741,0.901205,0.199304,0.212628,0.20575,1077.0,0.199304,0.212628,0.20575,1077.0,0.199304,0.212628,0.20575,1077.0,0.199304,0.212628,0.20575,1077.0,43185,1149,4474
2,0.2337,0.302916,0.904073,0.260226,0.2467,0.253283,1212.0,0.260226,0.2467,0.253283,1212.0,0.260226,0.2467,0.253283,1212.0,0.260226,0.2467,0.253283,1212.0,43185,1149,4474
3,0.1511,0.415176,0.905958,0.239339,0.261159,0.249773,1053.0,0.239339,0.261159,0.249773,1053.0,0.239339,0.261159,0.249773,1053.0,0.239339,0.261159,0.249773,1053.0,43185,1149,4474
4,0.0984,0.450801,0.903868,0.258486,0.270246,0.264235,1099.0,0.258486,0.270246,0.264235,1099.0,0.258486,0.270246,0.264235,1099.0,0.258486,0.270246,0.264235,1099.0,43185,1149,4474
5,0.0655,0.497915,0.903643,0.292428,0.272949,0.282353,1231.0,0.292428,0.272949,0.282353,1231.0,0.292428,0.272949,0.282353,1231.0,0.292428,0.272949,0.282353,1231.0,43185,1149,4474
6,0.0445,0.565076,0.907761,0.27154,0.29771,0.284024,1048.0,0.27154,0.29771,0.284024,1048.0,0.27154,0.29771,0.284024,1048.0,0.27154,0.29771,0.284024,1048.0,43185,1149,4474
7,0.0287,0.608851,0.903315,0.274151,0.269001,0.271552,1171.0,0.274151,0.269001,0.271552,1171.0,0.274151,0.269001,0.271552,1171.0,0.274151,0.269001,0.271552,1171.0,43185,1149,4474
8,0.0197,0.650004,0.904565,0.303742,0.277866,0.290229,1256.0,0.303742,0.277866,0.290229,1256.0,0.303742,0.277866,0.290229,1256.0,0.303742,0.277866,0.290229,1256.0,43185,1149,4474
9,0.0139,0.701813,0.905425,0.282855,0.283843,0.283348,1145.0,0.282855,0.283843,0.283348,1145.0,0.282855,0.283843,0.283348,1145.0,0.282855,0.283843,0.283348,1145.0,43185,1149,4474
10,0.0086,0.727075,0.905692,0.292428,0.286689,0.28953,1172.0,0.292428,0.286689,0.28953,1172.0,0.292428,0.286689,0.28953,1172.0,0.292428,0.286689,0.28953,1172.0,43185,1149,4474


Training model no. 3 of 23 for Repetition persuasion technique...


Map:   0%|          | 0/1777 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 1777/1777 [00:00<00:00, 4938.28 examples/s]
Map: 100%|██████████| 445/445 [00:00<00:00, 5921.45 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Repetition Precision,Repetition Recall,Repetition F1-score,Repetition Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-repetition Support,I-repetition Support
1,0.4315,0.360592,0.891079,0.058442,0.166667,0.086538,54.0,0.058442,0.166667,0.086538,54.0,0.058442,0.166667,0.086538,54.0,0.058442,0.166667,0.086538,54.0,5801,154,591
2,0.3166,0.395089,0.898564,0.188312,0.216418,0.201389,134.0,0.188312,0.216418,0.201389,134.0,0.188312,0.216418,0.201389,134.0,0.188312,0.216418,0.201389,134.0,5801,154,591
3,0.2059,0.373295,0.89612,0.337662,0.26943,0.299712,193.0,0.337662,0.26943,0.299712,193.0,0.337662,0.26943,0.299712,193.0,0.337662,0.26943,0.299712,193.0,5801,154,591
4,0.1019,0.424962,0.889245,0.441558,0.282158,0.344304,241.0,0.441558,0.282158,0.344304,241.0,0.441558,0.282158,0.344304,241.0,0.441558,0.282158,0.344304,241.0,5801,154,591
5,0.0632,0.590395,0.899328,0.350649,0.3375,0.343949,160.0,0.350649,0.3375,0.343949,160.0,0.350649,0.3375,0.343949,160.0,0.350649,0.3375,0.343949,160.0,5801,154,591
6,0.0317,0.592288,0.897342,0.376623,0.333333,0.353659,174.0,0.376623,0.333333,0.353659,174.0,0.376623,0.333333,0.353659,174.0,0.376623,0.333333,0.353659,174.0,5801,154,591
7,0.0156,0.700392,0.892759,0.376623,0.291457,0.328612,199.0,0.376623,0.291457,0.328612,199.0,0.376623,0.291457,0.328612,199.0,0.376623,0.291457,0.328612,199.0,5801,154,591
8,0.0093,0.724647,0.904216,0.422078,0.361111,0.389222,180.0,0.422078,0.361111,0.389222,180.0,0.422078,0.361111,0.389222,180.0,0.422078,0.361111,0.389222,180.0,5801,154,591
9,0.0041,0.773399,0.902536,0.396104,0.335165,0.363095,182.0,0.396104,0.335165,0.363095,182.0,0.396104,0.335165,0.363095,182.0,0.396104,0.335165,0.363095,182.0,5801,154,591
10,0.0024,0.77571,0.902536,0.402597,0.329787,0.362573,188.0,0.402597,0.329787,0.362573,188.0,0.402597,0.329787,0.362573,188.0,0.402597,0.329787,0.362573,188.0,5801,154,591


Training model no. 4 of 23 for Exaggeration-Minimisation persuasion technique...


Map:   0%|          | 0/2737 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 2737/2737 [00:00<00:00, 5498.02 examples/s]
Map: 100%|██████████| 685/685 [00:00<00:00, 2078.21 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Exaggeration-minimisation Precision,Exaggeration-minimisation Recall,Exaggeration-minimisation F1-score,Exaggeration-minimisation Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-exaggeration-minimisation Support,I-exaggeration-minimisation Support
1,0.5453,0.530045,0.808092,0.063107,0.048689,0.054968,267.0,0.063107,0.048689,0.054968,267.0,0.063107,0.048689,0.054968,267.0,0.063107,0.048689,0.054968,267.0,7379,206,1857
2,0.3789,0.515384,0.815505,0.043689,0.047619,0.04557,189.0,0.043689,0.047619,0.04557,189.0,0.043689,0.047619,0.04557,189.0,0.043689,0.047619,0.04557,189.0,7379,206,1857
3,0.2407,0.778248,0.822919,0.18932,0.164557,0.176072,237.0,0.18932,0.164557,0.176072,237.0,0.18932,0.164557,0.176072,237.0,0.18932,0.164557,0.176072,237.0,7379,206,1857
4,0.1445,0.887731,0.804914,0.228155,0.148734,0.180077,316.0,0.228155,0.148734,0.180077,316.0,0.228155,0.148734,0.180077,316.0,0.228155,0.148734,0.180077,316.0,7379,206,1857
5,0.0814,0.980013,0.820059,0.174757,0.151261,0.162162,238.0,0.174757,0.151261,0.162162,238.0,0.174757,0.151261,0.162162,238.0,0.174757,0.151261,0.162162,238.0,7379,206,1857
6,0.0544,1.153074,0.814976,0.184466,0.145594,0.162741,261.0,0.184466,0.145594,0.162741,261.0,0.184466,0.145594,0.162741,261.0,0.184466,0.145594,0.162741,261.0,7379,206,1857


Training model no. 5 of 23 for Obfuscation-Vagueness-Confusion persuasion technique...


Map:   0%|          | 0/560 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 560/560 [00:00<00:00, 4917.31 examples/s]
Map: 100%|██████████| 140/140 [00:00<00:00, 3873.50 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Obfuscation-vagueness-confusion Precision,Obfuscation-vagueness-confusion Recall,Obfuscation-vagueness-confusion F1-score,Obfuscation-vagueness-confusion Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-obfuscation-vagueness-confusion Support,I-obfuscation-vagueness-confusion Support
1,0.7733,0.72575,0.605795,0.0,0.0,0.0,10.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,10.0,1374,47,857
2,0.5906,0.687676,0.693152,0.106383,0.054945,0.072464,91.0,0.106383,0.054945,0.072464,91.0,0.106383,0.054945,0.072464,91.0,0.106383,0.054945,0.072464,91.0,1374,47,857
3,0.4564,0.749248,0.649254,0.085106,0.037736,0.052288,106.0,0.085106,0.037736,0.052288,106.0,0.085106,0.037736,0.052288,106.0,0.085106,0.037736,0.052288,106.0,1374,47,857
4,0.2676,0.895641,0.658472,0.06383,0.027523,0.038462,109.0,0.06383,0.027523,0.038462,109.0,0.06383,0.027523,0.038462,109.0,0.06383,0.027523,0.038462,109.0,1374,47,857


Training model no. 6 of 23 for Name_Calling-Labeling persuasion technique...


Map:   0%|          | 0/8819 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 8819/8819 [00:02<00:00, 4046.60 examples/s]
Map: 100%|██████████| 2205/2205 [00:00<00:00, 5231.97 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Name Calling-labeling Precision,Name Calling-labeling Recall,Name Calling-labeling F1-score,Name Calling-labeling Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-name Calling-labeling Support,I-name Calling-labeling Support
1,0.2283,,0.9341,0.267157,0.269136,0.268143,810.0,0.267157,0.269136,0.268143,810.0,0.267157,0.269136,0.268143,810.0,0.267157,0.269136,0.268143,810.0,29653,816,2217
2,0.139,,0.937466,0.351716,0.347879,0.349787,825.0,0.351716,0.347879,0.349787,825.0,0.351716,0.347879,0.349787,825.0,0.351716,0.347879,0.349787,825.0,29653,816,2217
3,0.0884,,0.937924,0.359069,0.397019,0.377091,738.0,0.359069,0.397019,0.377091,738.0,0.359069,0.397019,0.377091,738.0,0.359069,0.397019,0.377091,738.0,29653,816,2217
4,0.0588,,0.937802,0.367647,0.405405,0.385604,740.0,0.367647,0.405405,0.385604,740.0,0.367647,0.405405,0.385604,740.0,0.367647,0.405405,0.385604,740.0,29653,816,2217
5,0.0382,,0.939393,0.382353,0.418231,0.399488,746.0,0.382353,0.418231,0.399488,746.0,0.382353,0.418231,0.399488,746.0,0.382353,0.418231,0.399488,746.0,29653,816,2217
6,0.0238,,0.938353,0.395833,0.377336,0.386364,856.0,0.395833,0.377336,0.386364,856.0,0.395833,0.377336,0.386364,856.0,0.395833,0.377336,0.386364,856.0,29653,816,2217
7,0.0154,,0.939546,0.42402,0.411905,0.417874,840.0,0.42402,0.411905,0.417874,840.0,0.42402,0.411905,0.417874,840.0,0.42402,0.411905,0.417874,840.0,29653,816,2217
8,0.0077,,0.940464,0.433824,0.4,0.416226,885.0,0.433824,0.4,0.416226,885.0,0.433824,0.4,0.416226,885.0,0.433824,0.4,0.416226,885.0,29653,816,2217
9,0.0046,,0.93924,0.447304,0.423926,0.435301,861.0,0.447304,0.423926,0.435301,861.0,0.447304,0.423926,0.435301,861.0,0.447304,0.423926,0.435301,861.0,29653,816,2217
10,0.0026,,0.939424,0.439951,0.422353,0.430972,850.0,0.439951,0.422353,0.430972,850.0,0.439951,0.422353,0.430972,850.0,0.439951,0.422353,0.430972,850.0,29653,816,2217


Training model no. 7 of 23 for Doubt persuasion technique...


Map:   0%|          | 0/6923 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 6923/6923 [00:01<00:00, 4553.09 examples/s]
Map: 100%|██████████| 1731/1731 [00:00<00:00, 4757.60 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Doubt Precision,Doubt Recall,Doubt F1-score,Doubt Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-doubt Support,I-doubt Support
1,0.6446,,0.716801,0.238162,0.132455,0.170234,1291.0,0.238162,0.132455,0.170234,1291.0,0.238162,0.132455,0.170234,1291.0,0.238162,0.132455,0.170234,1291.0,15917,718,11709
2,0.489,,0.722375,0.334262,0.215247,0.261866,1115.0,0.334262,0.215247,0.261866,1115.0,0.334262,0.215247,0.261866,1115.0,0.334262,0.215247,0.261866,1115.0,15917,718,11709
3,0.3396,,0.739098,0.317549,0.236269,0.270945,965.0,0.317549,0.236269,0.270945,965.0,0.317549,0.236269,0.270945,965.0,0.317549,0.236269,0.270945,965.0,15917,718,11709
4,0.2348,,0.73991,0.349582,0.235902,0.281706,1064.0,0.349582,0.235902,0.281706,1064.0,0.349582,0.235902,0.281706,1064.0,0.349582,0.235902,0.281706,1064.0,15917,718,11709
5,0.1514,,0.735147,0.267409,0.232727,0.248866,825.0,0.267409,0.232727,0.248866,825.0,0.267409,0.232727,0.248866,825.0,0.267409,0.232727,0.248866,825.0,15917,718,11709
6,0.1088,,0.751129,0.33844,0.271205,0.301115,896.0,0.33844,0.271205,0.301115,896.0,0.33844,0.271205,0.301115,896.0,0.33844,0.271205,0.301115,896.0,15917,718,11709
7,0.073,,0.745872,0.309192,0.231491,0.264758,959.0,0.309192,0.231491,0.264758,959.0,0.309192,0.231491,0.264758,959.0,0.309192,0.231491,0.264758,959.0,15917,718,11709
8,0.0504,,0.740827,0.317549,0.242295,0.274864,941.0,0.317549,0.242295,0.274864,941.0,0.317549,0.242295,0.274864,941.0,0.317549,0.242295,0.274864,941.0,15917,718,11709


Training model no. 8 of 23 for Guilt_by_Association persuasion technique...


Map:   0%|          | 0/1030 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 1030/1030 [00:00<00:00, 2300.87 examples/s]
Map: 100%|██████████| 258/258 [00:00<00:00, 5705.21 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Guilt By Association Precision,Guilt By Association Recall,Guilt By Association F1-score,Guilt By Association Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-guilt By Association Support,I-guilt By Association Support
1,0.6548,,0.760696,0.094118,0.04908,0.064516,163.0,0.094118,0.04908,0.064516,163.0,0.094118,0.04908,0.064516,163.0,0.094118,0.04908,0.064516,163.0,2151,85,1270
2,0.4754,,0.785796,0.176471,0.113636,0.138249,132.0,0.176471,0.113636,0.138249,132.0,0.176471,0.113636,0.138249,132.0,0.176471,0.113636,0.138249,132.0,2151,85,1270
3,0.2826,,0.754706,0.235294,0.138889,0.174672,144.0,0.235294,0.138889,0.174672,144.0,0.235294,0.138889,0.174672,144.0,0.235294,0.138889,0.174672,144.0,2151,85,1270
4,0.1823,,0.748716,0.105882,0.083333,0.093264,108.0,0.105882,0.083333,0.093264,108.0,0.105882,0.083333,0.093264,108.0,0.105882,0.083333,0.093264,108.0,2151,85,1270
5,0.1044,,0.776098,0.223529,0.158333,0.185366,120.0,0.223529,0.158333,0.185366,120.0,0.223529,0.158333,0.185366,120.0,0.223529,0.158333,0.185366,120.0,2151,85,1270
6,0.0602,,0.769538,0.258824,0.183333,0.214634,120.0,0.258824,0.183333,0.214634,120.0,0.258824,0.183333,0.214634,120.0,0.258824,0.183333,0.214634,120.0,2151,85,1270
7,0.0378,,0.774102,0.270588,0.149351,0.192469,154.0,0.270588,0.149351,0.192469,154.0,0.270588,0.149351,0.192469,154.0,0.270588,0.149351,0.192469,154.0,2151,85,1270
8,0.0248,,0.763833,0.2,0.139344,0.164251,122.0,0.2,0.139344,0.164251,122.0,0.2,0.139344,0.164251,122.0,0.2,0.139344,0.164251,122.0,2151,85,1270


Training model no. 9 of 23 for Appeal_to_Hypocrisy persuasion technique...


Map:   0%|          | 0/1358 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 1358/1358 [00:00<00:00, 4462.99 examples/s]
Map: 100%|██████████| 340/340 [00:00<00:00, 5570.56 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Appeal To Hypocrisy Precision,Appeal To Hypocrisy Recall,Appeal To Hypocrisy F1-score,Appeal To Hypocrisy Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-appeal To Hypocrisy Support,I-appeal To Hypocrisy Support
1,0.6756,0.600889,0.709728,0.073529,0.032362,0.044944,309.0,0.073529,0.032362,0.044944,309.0,0.073529,0.032362,0.044944,309.0,0.073529,0.032362,0.044944,309.0,2721,136,2879
2,0.4613,0.586086,0.748431,0.323529,0.166667,0.22,264.0,0.323529,0.166667,0.22,264.0,0.323529,0.166667,0.22,264.0,0.323529,0.166667,0.22,264.0,2721,136,2879
3,0.2941,0.762761,0.754184,0.330882,0.194805,0.245232,231.0,0.330882,0.194805,0.245232,231.0,0.330882,0.194805,0.245232,231.0,0.330882,0.194805,0.245232,231.0,2721,136,2879
4,0.1669,0.975027,0.75122,0.227941,0.158163,0.186747,196.0,0.227941,0.158163,0.186747,196.0,0.227941,0.158163,0.186747,196.0,0.227941,0.158163,0.186747,196.0,2721,136,2879
5,0.1084,0.932131,0.768131,0.367647,0.26455,0.307692,189.0,0.367647,0.26455,0.307692,189.0,0.367647,0.26455,0.307692,189.0,0.367647,0.26455,0.307692,189.0,2721,136,2879
6,0.0568,1.181026,0.752266,0.272059,0.185,0.220238,200.0,0.272059,0.185,0.220238,200.0,0.272059,0.185,0.220238,200.0,0.272059,0.185,0.220238,200.0,2721,136,2879
7,0.0357,1.306478,0.776848,0.411765,0.321839,0.36129,174.0,0.411765,0.321839,0.36129,174.0,0.411765,0.321839,0.36129,174.0,0.411765,0.321839,0.36129,174.0,2721,136,2879
8,0.0302,1.370492,0.767434,0.345588,0.258242,0.295597,182.0,0.345588,0.258242,0.295597,182.0,0.345588,0.258242,0.295597,182.0,0.345588,0.258242,0.295597,182.0,2721,136,2879
9,0.0128,1.442565,0.771792,0.338235,0.234694,0.277108,196.0,0.338235,0.234694,0.277108,196.0,0.338235,0.234694,0.277108,196.0,0.338235,0.234694,0.277108,196.0,2721,136,2879


Training model no. 10 of 23 for Questioning_the_Reputation persuasion technique...


Map:   0%|          | 0/3494 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 3494/3494 [00:00<00:00, 5117.86 examples/s]
Map: 100%|██████████| 874/874 [00:00<00:00, 1835.45 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Questioning The Reputation Precision,Questioning The Reputation Recall,Questioning The Reputation F1-score,Questioning The Reputation Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-questioning The Reputation Support,I-questioning The Reputation Support
1,0.6108,0.537043,0.757313,0.195652,0.124506,0.152174,506.0,0.195652,0.124506,0.152174,506.0,0.195652,0.124506,0.152174,506.0,0.195652,0.124506,0.152174,506.0,7618,322,5221
2,0.4568,0.530318,0.767267,0.31677,0.190299,0.237762,536.0,0.31677,0.190299,0.237762,536.0,0.31677,0.190299,0.237762,536.0,0.31677,0.190299,0.237762,536.0,7618,322,5221
3,0.2959,0.726654,0.767495,0.220497,0.159193,0.184896,446.0,0.220497,0.159193,0.184896,446.0,0.220497,0.159193,0.184896,446.0,0.220497,0.159193,0.184896,446.0,7618,322,5221
4,0.183,0.890534,0.764608,0.208075,0.146608,0.172015,457.0,0.208075,0.146608,0.172015,457.0,0.208075,0.146608,0.172015,457.0,0.208075,0.146608,0.172015,457.0,7618,322,5221


In [None]:
### MACRO F1 (across PTs) = 0.263