In [13]:
import json
import itertools
import os
from tokenizers import Encoding
from typing import List
from transformers import AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification, TrainingArguments, Trainer, EarlyStoppingCallback
import torch
from collections import defaultdict
from datasets import Dataset
import pandas as pd
import numpy as np
import evaluate
from sklearn.metrics import f1_score
from collections import Counter
from seqeval.metrics import classification_report
import re
from datetime import datetime
import unicodedata

In [14]:
def align_tokens_and_annotations_bio(tokenized: Encoding, annotations):
    tokens = tokenized.tokens
    aligned_labels = ["O"] * len(
        tokens
    )  # Make a list to store our labels the same length as our tokens
    for anno in annotations:
        annotation_token_ix_set = (
            set()
        )  # A set that stores the token indices of the annotation
        for char_ix in range(anno["start"], anno["end"]):
            print('char_ix = ', char_ix)
            token_ix = tokenized.char_to_token(char_ix)
            if token_ix is not None:
                annotation_token_ix_set.add(token_ix)
        if len(annotation_token_ix_set) == 1:
            # If there is only one token
            token_ix = annotation_token_ix_set.pop()
            prefix = (
                "B"  # This annotation spans one token so is prefixed with U for unique
            )
            aligned_labels[token_ix] = f"{prefix}-{anno['tag']}"

        else:

            last_token_in_anno_ix = len(annotation_token_ix_set) - 1
            for num, token_ix in enumerate(sorted(annotation_token_ix_set)):
                if num == 0:
                    prefix = "B"
                elif num == last_token_in_anno_ix:
                    prefix = "I"  # Its the last token
                else:
                    prefix = "I"  # We're inside of a multi token annotation
                aligned_labels[token_ix] = f"{prefix}-{anno['tag']}"
    return aligned_labels

class LabelSet:
    def __init__(self, labels: List[str]):
        self.labels_to_id = {}
        self.ids_to_label = {}
        self.labels_to_id["O"] = 0
        self.ids_to_label[0] = "O"
        num = 0  # in case there are no labels
        # Writing BILU will give us incremental ids for the labels
        for _num, (label, s) in enumerate(itertools.product(labels, "BI")):
            num = _num + 1  # skip 0
            l = f"{s}-{label}"
            self.labels_to_id[l] = num
            self.ids_to_label[num] = l


    def get_aligned_label_ids_from_annotations(self, tokenized_text, annotations):
        raw_labels = align_tokens_and_annotations_bio(tokenized_text, annotations)
        return list(map(self.labels_to_id.get, raw_labels))


def tokenize_token_classification(examples, tokenizer):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True, padding='longest', return_tensors='pt')

    labels = []
    for i, label in enumerate(examples[f"ner_tags"]):
        word_ids = [tokenized_inputs.token_to_word(i, j) for j in range(len(tokenized_inputs['input_ids'][i]))]  # Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100.
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = torch.tensor(labels)
    return tokenized_inputs

def dict_of_lists(lst_of_dicts):
    result = defaultdict(list)
    for d in lst_of_dicts:
        for key, value in d.items():
            result[key].append(value)
    return dict(result)

def list_of_dicts(dict_of_lists):
    # First, we need to check if all lists are of the same length to ensure correct transformation
    if not all(len(lst) == len(next(iter(dict_of_lists.values()))) for lst in dict_of_lists.values()):
        raise ValueError("All lists in the dictionary must have the same length")

    # Get the length of the items in any of the lists
    length = len(next(iter(dict_of_lists.values())))
    
    # Create a list of dictionaries, one for each index in the lists
    result = []
    for i in range(length):
        # Create a dictionary for the current index 'i' across all lists
        new_dict = {key: dict_of_lists[key][i] for key in dict_of_lists}
        result.append(new_dict)
    
    return result

def sub_shift_spans(text, ents = [], mappings = []):
    for mapping in mappings:
        adjustment = 0
        pattern = re.compile(mapping['pattern'])
        for match in re.finditer(pattern, text):
            match_index = match.start() + adjustment
            match_contents = match.group()
            if all(mapping['check'](char) for char in match_contents):
                subbed_text = mapping['target'].replace('placeholder', match_contents)
            else:
                subbed_text = mapping['target']
            len_diff = len(subbed_text) - len(match_contents)
            text = text[:match_index] + subbed_text + text[match_index + len(match_contents):]
            if ents:
                if isinstance(ents, list):
                    for ent in ents:
                        if ent['start'] <= match_index and ent['end'] > match_index:
                            ent['end'] += len_diff
                        if ent['start'] > match_index:
                            ent['start'] += len_diff
                            ent['end'] += len_diff
                elif isinstance(ents, dict):
                    if ents['value']['start'] <= match_index and ents['value']['end'] > match_index:
                        ents['value']['end'] += len_diff
                    if ents['value']['start'] > match_index:
                        ents['value']['start'] += len_diff
                        ents['value']['end'] += len_diff

            adjustment += len_diff

    return text, ents

def span_to_words_annotation(samples, target_tag = '', mappings = {}, labels_model = []):
    samples_new = []
    # if not any([l for l in samples['annotations']]):
        
    for i in range(len(samples['data'])):
        text, annotation_list = samples['data'][i]['text'], samples['annotations'][i][0]['result']
        labels_text = []
        tokens = []
        if not annotation_list:
            annotation_list = [[]]
        for j, annotation in enumerate(annotation_list):
            if isinstance(annotation, dict):
                if annotation['value']['labels'][0] != target_tag:
                    continue
            text_subshifted, ents = sub_shift_spans(text, annotation, mappings=mappings)
            text_subshifted_matches = re.finditer(r'[^\s]+', text_subshifted)
            labels_words = []
            first = True
            for regex_match in text_subshifted_matches:
                if j == 0:
                    tokens.append(regex_match.group())
                if isinstance(annotation, dict):
                    if regex_match.start() < ents['value']['start']:
                        labels_words.append(labels_model.labels_to_id['O'])
                    elif regex_match.start() >= ents['value']['start'] and regex_match.end() <= ents['value']['end']:
                        if first:
                            labels_words.append(labels_model.labels_to_id['B-' + ents['value']['labels'][0]])
                            first = False
                        elif not first:
                            labels_words.append(labels_model.labels_to_id['I-' + ents['value']['labels'][0]])
                    else:
                        labels_words.append(labels_model.labels_to_id['O'])
                    labels_text.append({'labels': labels_words, 'tag': annotation['value']['labels'][0]})
        allowed_labels = [labels_model.labels_to_id['O'],
                          labels_model.labels_to_id['B-' + target_tag],
                          labels_model.labels_to_id['I-' + target_tag],
                          ]
        # if the training sample has no tags that we need, we just produce a 0s list
        if target_tag not in [labels['tag'] for labels in labels_text]:
            labels = [0] * len(tokens)
            tag = 'no_tag'
        # if the training sample has tags we need, we first exclude the label lists whose tags don't match
        # and then we merge the label lists that have tags that match the target tag
        else:
            labels = [max(values) for values in zip(*[labels['labels'] for labels in labels_text if labels['tag'] == target_tag])]
            labels = [(label if label in allowed_labels else 0) for label in labels]
            tag = target_tag
        samples_new.append({
            'id': i,
            'ner_tags': labels,
            'tokens': tokens,
            'tag': tag,
        })
    return samples_new

regex_tokenizer_mappings = [
    {'pattern': r'(?<!\s)([^\w\s])|([^\w\s])(?!\s)',
    'target': ' placeholder ',
    'check': lambda x: unicodedata.category(x).startswith('P'),
    },
    {'pattern': r'\s+',
     'target': ' ',
     'check': lambda x: False if re.match('\s+', x) is None else True,
     },
    ]

def compute_metrics_wrapper(label_list, pt, model_name_simple, date_time):
    def compute_metrics(eval_preds):
        nonlocal label_list
        nonlocal pt
        logits, labels = eval_preds
        predictions = np.argmax(logits, axis=2)

        # Extract the true predictions and labels from the sequences
        true_predictions = [
            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
            for prediction, label in zip(predictions, labels)
        ]
        true_labels = [
            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
            for prediction, label in zip(predictions, labels)
        ]

        # Compute sequence-level evaluation metrics
        results = classification_report(true_predictions, true_labels, output_dict=True)

        # Flatten the lists to calculate micro F1-score and supports
        flat_true_predictions = [item for sublist in true_predictions for item in sublist]
        flat_true_labels = [item for sublist in true_labels for item in sublist]

        # Calculate micro F1-score using sklearn
        micro_f1 = f1_score(flat_true_labels, flat_true_predictions, average='micro')

        # Prepare the results dictionary
        flat_results = {'micro_f1': float(micro_f1)}

        # Add detailed metrics for each label to the results dictionary
        for label, metrics in results.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    flat_results[f'{label}_{metric}'] = float(value)

        # Compute support for each label using Counter
        label_support = Counter(flat_true_labels)
        for label, count in label_support.items():
            flat_results[f'{label}_support'] = count
        
        models_dir = '/home/lgiordano/LUCA/checkthat_GITHUB/models/M2/RUN_OTTOBRE/weights_and_results'
        model_save_name = f'{model_name_simple}_{tt[0]}_target={tt[1]}_SUBSAMPLED_{date_time}'
        model_save_dir = os.path.join(models_dir, date_time+'_no_aug_no_cw_ts0', model_save_name)
        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)

        with open(os.path.join(model_save_dir, 'results.json'), 'w', encoding='utf8') as f:
            json.dump(flat_results, f, ensure_ascii = False)

        return flat_results
    return compute_metrics

In [15]:
date_time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")

train_data_path = '/home/lgiordano/LUCA/checkthat_GITHUB/data/formatted/train_sentences.json'
with open(train_data_path, 'r', encoding='utf8') as f:
    dataset_raw = json.load(f)

df_raw = pd.DataFrame(dataset_raw)  

df_pos = df_raw[df_raw['annotations'].apply(lambda x: len(x[0]['result']) > 0)]
df_neg = df_raw[df_raw['annotations'].apply(lambda x: x[0]['result'] == [])].sample(len(df_pos))
df = pd.concat([df_pos,df_neg])

target_tags = ["Appeal_to_Authority", "Appeal_to_Popularity","Appeal_to_Values","Appeal_to_Fear-Prejudice","Flag_Waving","Causal_Oversimplification",
               "False_Dilemma-No_Choice","Consequential_Oversimplification","Straw_Man","Red_Herring","Whataboutism","Slogans","Appeal_to_Time",
               "Conversation_Killer","Loaded_Language","Repetition","Exaggeration-Minimisation","Obfuscation-Vagueness-Confusion","Name_Calling-Labeling",
               "Doubt","Guilt_by_Association","Appeal_to_Hypocrisy","Questioning_the_Reputation"]
target_tags = [(i, el.strip()) for i, el in enumerate(target_tags)]


In [16]:
shift = 0
for i, tt in enumerate(target_tags):
    if i < shift:
        continue
    print(f'Training model no. {i} of {len(target_tags)} for {tt[1]} persuasion technique...')
    labels_model = LabelSet(labels=[tt[1]])
    
    df_list = df.to_dict(orient='records')
    df_list_binary = span_to_words_annotation(dict_of_lists(df_list), target_tag=tt[1], mappings=regex_tokenizer_mappings, labels_model=labels_model)
    df_binary = pd.DataFrame(df_list_binary)
    df_binary_pos = df_binary[df_binary['tag'] == tt[1]]
    df_binary_neg = df_binary[df_binary['tag'] != tt[1]].sample(len(df_binary_pos))
    df_binary_subsampled = pd.concat([df_binary_pos, df_binary_neg])#.sample(1000)

    binary_dataset = Dataset.from_pandas(df_binary_subsampled[['id', 'ner_tags', 'tokens']])

    split_ratio = 0.2
    split_seed = 42
    datadict = binary_dataset.train_test_split(split_ratio, seed=split_seed)

    #model_name = 'bert-base-multilingual-cased'
    #model_name = 'xlm-roberta-base'
    model_name = 'microsoft/mdeberta-v3-base'
    model_name_simple = model_name.split('/')[-1]
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    batch_size = 16
    datadict = datadict.map(lambda x: tokenize_token_classification(x, tokenizer), batched=True, batch_size=None)

    columns = [
                'input_ids',
                'token_type_ids',
                'attention_mask',
                'labels'
                ]

    datadict.set_format('torch', columns = columns)

    train_data = datadict['train']
    val_data = datadict['test']

    data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, padding='longest')

    model = AutoModelForTokenClassification.from_pretrained(model_name,
                                                                num_labels=len(labels_model.ids_to_label.values()),
                                                                label2id=labels_model.labels_to_id,
                                                                id2label=labels_model.ids_to_label,
                                                                )
    
    training_args = TrainingArguments(output_dir=f'/home/lgiordano/LUCA/checkthat_GITHUB/models/M2/RUN_OTTOBRE/weights_and_results/{date_time}_no_aug_no_cw_ts0/weights/mdeberta-v3-base-NEW_aug_{i}_{tt[1]}',
                                  save_total_limit=2,
                                  save_strategy='epoch',
                                  load_best_model_at_end=True,
                                  save_only_model=True,
                                  metric_for_best_model='eval_macro avg_f1-score',
                                  logging_strategy='epoch',
                                  evaluation_strategy='epoch',
                                  learning_rate=5e-5,
                                  optim='adamw_torch',
                                  num_train_epochs=10)
    
    early_stopping = EarlyStoppingCallback(early_stopping_patience=2)

    trainer = Trainer(model,
                      training_args,
                      train_dataset=train_data,
                      eval_dataset=val_data,
                      data_collator=data_collator,
                      tokenizer=tokenizer,
                      compute_metrics=compute_metrics_wrapper(
                          label_list=[i for i in labels_model.ids_to_label.values()],
                          pt=tt[1],
                          model_name_simple=model_name_simple,
                          date_time=date_time),
                      callbacks=[early_stopping])
    
    trainer.train()

    trainer.evaluate()

Training model no. 0 of 23 for Appeal_to_Authority persuasion technique...


Map:   0%|          | 0/1092 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 1092/1092 [00:00<00:00, 3991.04 examples/s]
Map: 100%|██████████| 274/274 [00:00<00:00, 4837.84 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Appeal To Authority Precision,Appeal To Authority Recall,Appeal To Authority F1-score,Appeal To Authority Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-appeal To Authority Support,I-appeal To Authority Support
1,0.7081,0.596136,0.702357,0.102564,0.047059,0.064516,255.0,0.102564,0.047059,0.064516,255.0,0.102564,0.047059,0.064516,255.0,0.102564,0.047059,0.064516,255.0,2877,117,1928
2,0.5359,0.584248,0.695043,0.102564,0.044444,0.062016,270.0,0.102564,0.044444,0.062016,270.0,0.102564,0.044444,0.062016,270.0,0.102564,0.044444,0.062016,270.0,2877,117,1928
3,0.3478,0.680259,0.712718,0.196581,0.097046,0.129944,237.0,0.196581,0.097046,0.129944,237.0,0.196581,0.097046,0.129944,237.0,0.196581,0.097046,0.129944,237.0,2877,117,1928
4,0.1905,1.020438,0.725518,0.230769,0.142857,0.176471,189.0,0.230769,0.142857,0.176471,189.0,0.230769,0.142857,0.176471,189.0,0.230769,0.142857,0.176471,189.0,2877,117,1928
5,0.118,1.019248,0.724705,0.213675,0.112108,0.147059,223.0,0.213675,0.112108,0.147059,223.0,0.213675,0.112108,0.147059,223.0,0.213675,0.112108,0.147059,223.0,2877,117,1928
6,0.077,1.216146,0.738318,0.230769,0.137755,0.172524,196.0,0.230769,0.137755,0.172524,196.0,0.230769,0.137755,0.172524,196.0,0.230769,0.137755,0.172524,196.0,2877,117,1928


Training model no. 1 of 23 for Appeal_to_Popularity persuasion technique...


Map:   0%|          | 0/540 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 540/540 [00:00<00:00, 5496.88 examples/s]
Map: 100%|██████████| 136/136 [00:00<00:00, 2985.49 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Appeal To Popularity Precision,Appeal To Popularity Recall,Appeal To Popularity F1-score,Appeal To Popularity Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-appeal To Popularity Support,I-appeal To Popularity Support
1,0.7822,0.507714,0.769728,0.042553,0.012987,0.0199,154.0,0.042553,0.012987,0.0199,154.0,0.042553,0.012987,0.0199,154.0,0.042553,0.012987,0.0199,154.0,1499,47,621
2,0.4618,0.505493,0.754038,0.042553,0.019608,0.026846,102.0,0.042553,0.019608,0.026846,102.0,0.042553,0.019608,0.026846,102.0,0.042553,0.019608,0.026846,102.0,1499,47,621
3,0.2362,0.639077,0.785879,0.148936,0.067308,0.092715,104.0,0.148936,0.067308,0.092715,104.0,0.148936,0.067308,0.092715,104.0,0.148936,0.067308,0.092715,104.0,1499,47,621
4,0.1232,1.254878,0.690817,0.234043,0.129412,0.166667,85.0,0.234043,0.129412,0.166667,85.0,0.234043,0.129412,0.166667,85.0,0.234043,0.129412,0.166667,85.0,1499,47,621
5,0.064,0.933606,0.770189,0.276596,0.1625,0.204724,80.0,0.276596,0.1625,0.204724,80.0,0.276596,0.1625,0.204724,80.0,0.276596,0.1625,0.204724,80.0,1499,47,621
6,0.0414,1.372795,0.720812,0.340426,0.150943,0.20915,106.0,0.340426,0.150943,0.20915,106.0,0.340426,0.150943,0.20915,106.0,0.340426,0.150943,0.20915,106.0,1499,47,621
7,0.0176,1.293351,0.735118,0.382979,0.185567,0.25,97.0,0.382979,0.185567,0.25,97.0,0.382979,0.185567,0.25,97.0,0.382979,0.185567,0.25,97.0,1499,47,621
8,0.0099,1.268428,0.757268,0.297872,0.150538,0.2,93.0,0.297872,0.150538,0.2,93.0,0.297872,0.150538,0.2,93.0,0.297872,0.150538,0.2,93.0,1499,47,621
9,0.0087,1.358307,0.750808,0.340426,0.170213,0.22695,94.0,0.340426,0.170213,0.22695,94.0,0.340426,0.170213,0.22695,94.0,0.340426,0.170213,0.22695,94.0,1499,47,621


Training model no. 2 of 23 for Appeal_to_Values persuasion technique...


Map:   0%|          | 0/1107 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 1107/1107 [00:00<00:00, 4888.13 examples/s]
Map: 100%|██████████| 277/277 [00:00<00:00, 3405.06 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Appeal To Values Precision,Appeal To Values Recall,Appeal To Values F1-score,Appeal To Values Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-appeal To Values Support,I-appeal To Values Support
1,0.633,0.848962,0.624444,0.343434,0.179894,0.236111,189.0,0.343434,0.179894,0.236111,189.0,0.343434,0.179894,0.236111,189.0,0.343434,0.179894,0.236111,189.0,2464,99,1708
2,0.4232,0.459499,0.810349,0.333333,0.20122,0.250951,164.0,0.333333,0.20122,0.250951,164.0,0.333333,0.20122,0.250951,164.0,0.333333,0.20122,0.250951,164.0,2464,99,1708
3,0.2462,0.491673,0.778974,0.242424,0.138728,0.176471,173.0,0.242424,0.138728,0.176471,173.0,0.242424,0.138728,0.176471,173.0,0.242424,0.138728,0.176471,173.0,2464,99,1708
4,0.1346,0.655965,0.789979,0.30303,0.179641,0.225564,167.0,0.30303,0.179641,0.225564,167.0,0.30303,0.179641,0.225564,167.0,0.30303,0.179641,0.225564,167.0,2464,99,1708


Training model no. 3 of 23 for Appeal_to_Fear-Prejudice persuasion technique...


Map:   0%|          | 0/2497 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 2497/2497 [00:00<00:00, 5413.71 examples/s]
Map: 100%|██████████| 625/625 [00:00<00:00, 6885.86 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Appeal To Fear-prejudice Precision,Appeal To Fear-prejudice Recall,Appeal To Fear-prejudice F1-score,Appeal To Fear-prejudice Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,B-appeal To Fear-prejudice Support,I-appeal To Fear-prejudice Support,O Support
1,0.6505,,0.734638,0.153005,0.071429,0.097391,392.0,0.153005,0.071429,0.097391,392.0,0.153005,0.071429,0.097391,392.0,0.153005,0.071429,0.097391,392.0,183,2741,5506
2,0.4443,,0.76987,0.153005,0.078652,0.103896,356.0,0.153005,0.078652,0.103896,356.0,0.153005,0.078652,0.103896,356.0,0.153005,0.078652,0.103896,356.0,183,2741,5506
3,0.2894,,0.772479,0.153005,0.1,0.12095,280.0,0.153005,0.1,0.12095,280.0,0.153005,0.1,0.12095,280.0,0.153005,0.1,0.12095,280.0,183,2741,5506
4,0.1723,,0.777936,0.202186,0.135036,0.161926,274.0,0.202186,0.135036,0.161926,274.0,0.202186,0.135036,0.161926,274.0,0.202186,0.135036,0.161926,274.0,183,2741,5506
5,0.1106,,0.775682,0.234973,0.164751,0.193694,261.0,0.234973,0.164751,0.193694,261.0,0.234973,0.164751,0.193694,261.0,0.234973,0.164751,0.193694,261.0,183,2741,5506
6,0.072,,0.783037,0.229508,0.184211,0.20438,228.0,0.229508,0.184211,0.20438,228.0,0.229508,0.184211,0.20438,228.0,0.229508,0.184211,0.20438,228.0,183,2741,5506
7,0.0477,,0.786714,0.262295,0.184615,0.216704,260.0,0.262295,0.184615,0.216704,260.0,0.262295,0.184615,0.216704,260.0,0.262295,0.184615,0.216704,260.0,183,2741,5506
8,0.0282,,0.788612,0.245902,0.188285,0.21327,239.0,0.245902,0.188285,0.21327,239.0,0.245902,0.188285,0.21327,239.0,0.245902,0.188285,0.21327,239.0,183,2741,5506
9,0.0232,,0.785053,0.262295,0.1875,0.218679,256.0,0.262295,0.1875,0.218679,256.0,0.262295,0.1875,0.218679,256.0,0.262295,0.1875,0.218679,256.0,183,2741,5506
10,0.0123,,0.784104,0.245902,0.177165,0.20595,254.0,0.245902,0.177165,0.20595,254.0,0.245902,0.177165,0.20595,254.0,0.245902,0.177165,0.20595,254.0,183,2741,5506


Training model no. 4 of 23 for Flag_Waving persuasion technique...


Map:   0%|          | 0/1139 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 1139/1139 [00:00<00:00, 3816.17 examples/s]
Map: 100%|██████████| 285/285 [00:00<00:00, 5425.61 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Flag Waving Precision,Flag Waving Recall,Flag Waving F1-score,Flag Waving Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-flag Waving Support,I-flag Waving Support
1,0.6365,0.442158,0.795509,0.054945,0.02551,0.034843,196.0,0.054945,0.02551,0.034843,196.0,0.054945,0.02551,0.034843,196.0,0.054945,0.02551,0.034843,196.0,3002,91,1049
2,0.4477,0.45675,0.801062,0.10989,0.056818,0.074906,176.0,0.10989,0.056818,0.074906,176.0,0.10989,0.056818,0.074906,176.0,0.10989,0.056818,0.074906,176.0,3002,91,1049
3,0.2921,0.562242,0.804201,0.186813,0.113333,0.141079,150.0,0.186813,0.113333,0.141079,150.0,0.186813,0.113333,0.141079,150.0,0.186813,0.113333,0.141079,150.0,3002,91,1049
4,0.1472,0.712092,0.807098,0.153846,0.10219,0.122807,137.0,0.153846,0.10219,0.122807,137.0,0.153846,0.10219,0.122807,137.0,0.153846,0.10219,0.122807,137.0,3002,91,1049
5,0.0797,0.829991,0.821342,0.252747,0.201754,0.22439,114.0,0.252747,0.201754,0.22439,114.0,0.252747,0.201754,0.22439,114.0,0.252747,0.201754,0.22439,114.0,3002,91,1049
6,0.0572,0.778117,0.827137,0.21978,0.15873,0.184332,126.0,0.21978,0.15873,0.184332,126.0,0.21978,0.15873,0.184332,126.0,0.21978,0.15873,0.184332,126.0,3002,91,1049
7,0.0374,0.916027,0.830517,0.241758,0.166667,0.197309,132.0,0.241758,0.166667,0.197309,132.0,0.241758,0.166667,0.197309,132.0,0.241758,0.166667,0.197309,132.0,3002,91,1049


Training model no. 5 of 23 for Causal_Oversimplification persuasion technique...


Map:   0%|          | 0/843 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 843/843 [00:00<00:00, 5287.39 examples/s]
Map: 100%|██████████| 211/211 [00:00<00:00, 5411.08 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Causal Oversimplification Precision,Causal Oversimplification Recall,Causal Oversimplification F1-score,Causal Oversimplification Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,B-causal Oversimplification Support,I-causal Oversimplification Support,O Support
1,0.7469,0.695282,0.69517,0.375,0.20122,0.261905,164.0,0.375,0.20122,0.261905,164.0,0.375,0.20122,0.261905,164.0,0.375,0.20122,0.261905,164.0,88,1886,1359
2,0.5766,0.568365,0.714671,0.295455,0.109705,0.16,237.0,0.295455,0.109705,0.16,237.0,0.295455,0.109705,0.16,237.0,0.295455,0.109705,0.16,237.0,88,1886,1359
3,0.303,0.757592,0.744974,0.375,0.211538,0.270492,156.0,0.375,0.211538,0.270492,156.0,0.375,0.211538,0.270492,156.0,0.375,0.211538,0.270492,156.0,88,1886,1359
4,0.1595,1.025115,0.730873,0.363636,0.230216,0.281938,139.0,0.363636,0.230216,0.281938,139.0,0.363636,0.230216,0.281938,139.0,0.363636,0.230216,0.281938,139.0,88,1886,1359
5,0.0812,1.159447,0.750375,0.465909,0.280822,0.350427,146.0,0.465909,0.280822,0.350427,146.0,0.465909,0.280822,0.350427,146.0,0.465909,0.280822,0.350427,146.0,88,1886,1359
6,0.0585,1.448799,0.738974,0.443182,0.288889,0.349776,135.0,0.443182,0.288889,0.349776,135.0,0.443182,0.288889,0.349776,135.0,0.443182,0.288889,0.349776,135.0,88,1886,1359
7,0.0385,1.332985,0.731473,0.318182,0.190476,0.238298,147.0,0.318182,0.190476,0.238298,147.0,0.318182,0.190476,0.238298,147.0,0.318182,0.190476,0.238298,147.0,88,1886,1359


Training model no. 6 of 23 for False_Dilemma-No_Choice persuasion technique...


Map:   0%|          | 0/736 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 736/736 [00:00<00:00, 2871.82 examples/s]
Map: 100%|██████████| 184/184 [00:00<00:00, 3409.46 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,False Dilemma-no Choice Precision,False Dilemma-no Choice Recall,False Dilemma-no Choice F1-score,False Dilemma-no Choice Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,B-false Dilemma-no Choice Support,I-false Dilemma-no Choice Support,O Support
1,0.6608,0.841671,0.587473,0.0,0.0,0.0,63.0,0.0,0.0,0.0,63.0,0.0,0.0,0.0,63.0,0.0,0.0,0.0,63.0,84,1344,1813
2,0.4483,0.543067,0.763653,0.27381,0.160839,0.202643,143.0,0.27381,0.160839,0.202643,143.0,0.27381,0.160839,0.202643,143.0,0.27381,0.160839,0.202643,143.0,84,1344,1813
3,0.2421,0.844074,0.772293,0.238095,0.196078,0.215054,102.0,0.238095,0.196078,0.215054,102.0,0.238095,0.196078,0.215054,102.0,0.238095,0.196078,0.215054,102.0,84,1344,1813
4,0.1517,0.798662,0.775687,0.238095,0.175439,0.20202,114.0,0.238095,0.175439,0.20202,114.0,0.238095,0.175439,0.20202,114.0,0.238095,0.175439,0.20202,114.0,84,1344,1813
5,0.0957,0.931255,0.792348,0.369048,0.284404,0.321244,109.0,0.369048,0.284404,0.321244,109.0,0.369048,0.284404,0.321244,109.0,0.369048,0.284404,0.321244,109.0,84,1344,1813
6,0.0591,0.96805,0.760568,0.357143,0.223881,0.275229,134.0,0.357143,0.223881,0.275229,134.0,0.357143,0.223881,0.275229,134.0,0.357143,0.223881,0.275229,134.0,84,1344,1813
7,0.0387,0.981401,0.79821,0.333333,0.220472,0.265403,127.0,0.333333,0.220472,0.265403,127.0,0.333333,0.220472,0.265403,127.0,0.333333,0.220472,0.265403,127.0,84,1344,1813


Training model no. 7 of 23 for Consequential_Oversimplification persuasion technique...


Map:   0%|          | 0/561 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 561/561 [00:00<00:00, 4688.24 examples/s]
Map: 100%|██████████| 141/141 [00:00<00:00, 3843.26 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Consequential Oversimplification Precision,Consequential Oversimplification Recall,Consequential Oversimplification F1-score,Consequential Oversimplification Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-consequential Oversimplification Support,I-consequential Oversimplification Support
1,0.6665,0.777546,0.506173,0.0,0.0,0.0,77.0,0.0,0.0,0.0,77.0,0.0,0.0,0.0,77.0,0.0,0.0,0.0,77.0,1241,51,1300
2,0.3898,0.759674,0.765046,0.352941,0.191489,0.248276,94.0,0.352941,0.191489,0.248276,94.0,0.352941,0.191489,0.248276,94.0,0.352941,0.191489,0.248276,94.0,1241,51,1300
3,0.2225,0.507041,0.847608,0.411765,0.203883,0.272727,103.0,0.411765,0.203883,0.272727,103.0,0.411765,0.203883,0.272727,103.0,0.411765,0.203883,0.272727,103.0,1241,51,1300
4,0.0869,0.64587,0.837963,0.313725,0.188235,0.235294,85.0,0.313725,0.188235,0.235294,85.0,0.313725,0.188235,0.235294,85.0,0.313725,0.188235,0.235294,85.0,1241,51,1300
5,0.0533,0.678294,0.867284,0.372549,0.256757,0.304,74.0,0.372549,0.256757,0.304,74.0,0.372549,0.256757,0.304,74.0,0.372549,0.256757,0.304,74.0,1241,51,1300
6,0.0298,0.818362,0.848765,0.470588,0.289157,0.358209,83.0,0.470588,0.289157,0.358209,83.0,0.470588,0.289157,0.358209,83.0,0.470588,0.289157,0.358209,83.0,1241,51,1300
7,0.0158,0.750618,0.866127,0.45098,0.294872,0.356589,78.0,0.45098,0.294872,0.356589,78.0,0.45098,0.294872,0.356589,78.0,0.45098,0.294872,0.356589,78.0,1241,51,1300
8,0.0067,0.789515,0.878086,0.431373,0.282051,0.341085,78.0,0.431373,0.282051,0.341085,78.0,0.431373,0.282051,0.341085,78.0,0.431373,0.282051,0.341085,78.0,1241,51,1300


Training model no. 8 of 23 for Straw_Man persuasion technique...


Map:   0%|          | 0/512 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 512/512 [00:00<00:00, 3424.53 examples/s]
Map: 100%|██████████| 128/128 [00:00<00:00, 5124.72 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Straw Man Precision,Straw Man Recall,Straw Man F1-score,Straw Man Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-straw Man Support,I-straw Man Support
1,0.7265,0.669282,0.677138,0.044444,0.023256,0.030534,86.0,0.044444,0.023256,0.030534,86.0,0.044444,0.023256,0.030534,86.0,0.044444,0.023256,0.030534,86.0,946,45,728
2,0.5639,0.630401,0.715532,0.111111,0.050505,0.069444,99.0,0.111111,0.050505,0.069444,99.0,0.111111,0.050505,0.069444,99.0,0.111111,0.050505,0.069444,99.0,946,45,728
3,0.3214,0.750704,0.738802,0.155556,0.097222,0.119658,72.0,0.155556,0.097222,0.119658,72.0,0.155556,0.097222,0.119658,72.0,0.155556,0.097222,0.119658,72.0,946,45,728
4,0.217,1.001218,0.763816,0.222222,0.15873,0.185185,63.0,0.222222,0.15873,0.185185,63.0,0.222222,0.15873,0.185185,63.0,0.222222,0.15873,0.185185,63.0,946,45,728
5,0.0944,1.152282,0.753345,0.288889,0.19697,0.234234,66.0,0.288889,0.19697,0.234234,66.0,0.288889,0.19697,0.234234,66.0,0.288889,0.19697,0.234234,66.0,946,45,728
6,0.0706,1.319996,0.744619,0.266667,0.190476,0.222222,63.0,0.266667,0.190476,0.222222,63.0,0.266667,0.190476,0.222222,63.0,0.266667,0.190476,0.222222,63.0,946,45,728
7,0.0461,1.319879,0.753927,0.177778,0.131148,0.150943,61.0,0.177778,0.131148,0.150943,61.0,0.177778,0.131148,0.150943,61.0,0.177778,0.131148,0.150943,61.0,946,45,728


Training model no. 9 of 23 for Red_Herring persuasion technique...


Map:   0%|          | 0/302 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 302/302 [00:00<00:00, 4532.66 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 3867.78 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Red Herring Precision,Red Herring Recall,Red Herring F1-score,Red Herring Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-red Herring Support,I-red Herring Support
1,0.8285,0.712003,0.63978,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,695,23,373
2,0.6878,0.796287,0.690192,0.043478,0.025641,0.032258,39.0,0.043478,0.025641,0.032258,39.0,0.043478,0.025641,0.032258,39.0,0.043478,0.025641,0.032258,39.0,695,23,373
3,0.5397,0.70028,0.705775,0.0,0.0,0.0,47.0,0.0,0.0,0.0,47.0,0.0,0.0,0.0,47.0,0.0,0.0,0.0,47.0,695,23,373
4,0.339,0.920769,0.700275,0.086957,0.057143,0.068966,35.0,0.086957,0.057143,0.068966,35.0,0.086957,0.057143,0.068966,35.0,0.086957,0.057143,0.068966,35.0,695,23,373
5,0.1761,1.199473,0.64528,0.217391,0.09434,0.131579,53.0,0.217391,0.09434,0.131579,53.0,0.217391,0.09434,0.131579,53.0,0.217391,0.09434,0.131579,53.0,695,23,373
6,0.1163,1.124695,0.728689,0.304348,0.159091,0.208955,44.0,0.304348,0.159091,0.208955,44.0,0.304348,0.159091,0.208955,44.0,0.304348,0.159091,0.208955,44.0,695,23,373
7,0.0665,1.280623,0.72319,0.173913,0.093023,0.121212,43.0,0.173913,0.093023,0.121212,43.0,0.173913,0.093023,0.121212,43.0,0.173913,0.093023,0.121212,43.0,695,23,373
8,0.0356,1.342625,0.732356,0.26087,0.139535,0.181818,43.0,0.26087,0.139535,0.181818,43.0,0.26087,0.139535,0.181818,43.0,0.26087,0.139535,0.181818,43.0,695,23,373


Training model no. 10 of 23 for Whataboutism persuasion technique...


Map:   0%|          | 0/217 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 217/217 [00:00<00:00, 4118.76 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 2944.95 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Whataboutism Precision,Whataboutism Recall,Whataboutism F1-score,Whataboutism Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,B-whataboutism Support,I-whataboutism Support,O Support
1,0.9691,0.710754,0.630252,0.0,0.0,0.0,143.0,0.0,0.0,0.0,143.0,0.0,0.0,0.0,143.0,0.0,0.0,0.0,143.0,23,593,455
2,0.7112,0.604726,0.722689,0.0,0.0,0.0,112.0,0.0,0.0,0.0,112.0,0.0,0.0,0.0,112.0,0.0,0.0,0.0,112.0,23,593,455
3,0.4615,0.751381,0.661998,0.347826,0.135593,0.195122,59.0,0.347826,0.135593,0.195122,59.0,0.347826,0.135593,0.195122,59.0,0.347826,0.135593,0.195122,59.0,23,593,455
4,0.2279,0.650959,0.760971,0.304348,0.179487,0.225806,39.0,0.304348,0.179487,0.225806,39.0,0.304348,0.179487,0.225806,39.0,0.304348,0.179487,0.225806,39.0,23,593,455
5,0.1156,0.79109,0.75817,0.26087,0.12766,0.171429,47.0,0.26087,0.12766,0.171429,47.0,0.26087,0.12766,0.171429,47.0,0.26087,0.12766,0.171429,47.0,23,593,455
6,0.0763,0.81196,0.741363,0.173913,0.08,0.109589,50.0,0.173913,0.08,0.109589,50.0,0.173913,0.08,0.109589,50.0,0.173913,0.08,0.109589,50.0,23,593,455


Training model no. 11 of 23 for Slogans persuasion technique...


Map:   0%|          | 0/1081 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 1081/1081 [00:00<00:00, 4852.02 examples/s]
Map: 100%|██████████| 271/271 [00:00<00:00, 5915.40 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Slogans Precision,Slogans Recall,Slogans F1-score,Slogans Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-slogans Support,I-slogans Support
1,0.3932,0.266209,0.914263,0.329897,0.278261,0.301887,115.0,0.329897,0.278261,0.301887,115.0,0.329897,0.278261,0.301887,115.0,0.329897,0.278261,0.301887,115.0,3123,97,559
2,0.2297,0.222958,0.920614,0.360825,0.301724,0.328638,116.0,0.360825,0.301724,0.328638,116.0,0.360825,0.301724,0.328638,116.0,0.360825,0.301724,0.328638,116.0,3123,97,559
3,0.1172,0.392618,0.917438,0.381443,0.45122,0.413408,82.0,0.381443,0.45122,0.413408,82.0,0.381443,0.45122,0.413408,82.0,0.381443,0.45122,0.413408,82.0,3123,97,559
4,0.0685,0.295403,0.925377,0.371134,0.305085,0.334884,118.0,0.371134,0.305085,0.334884,118.0,0.371134,0.305085,0.334884,118.0,0.371134,0.305085,0.334884,118.0,3123,97,559
5,0.0359,0.366124,0.926171,0.43299,0.365217,0.396226,115.0,0.43299,0.365217,0.396226,115.0,0.43299,0.365217,0.396226,115.0,0.43299,0.365217,0.396226,115.0,3123,97,559


Training model no. 12 of 23 for Appeal_to_Time persuasion technique...


Map:   0%|          | 0/259 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 259/259 [00:00<00:00, 3358.52 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 2842.88 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Appeal To Time Precision,Appeal To Time Recall,Appeal To Time F1-score,Appeal To Time Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-appeal To Time Support,I-appeal To Time Support
1,0.8033,0.55892,0.759048,0.0,0.0,0.0,76.0,0.0,0.0,0.0,76.0,0.0,0.0,0.0,76.0,0.0,0.0,0.0,76.0,750,27,273
2,0.4827,0.595881,0.721905,0.185185,0.068493,0.1,73.0,0.185185,0.068493,0.1,73.0,0.185185,0.068493,0.1,73.0,0.185185,0.068493,0.1,73.0,750,27,273
3,0.2674,0.580167,0.779048,0.185185,0.079365,0.111111,63.0,0.185185,0.079365,0.111111,63.0,0.185185,0.079365,0.111111,63.0,0.185185,0.079365,0.111111,63.0,750,27,273
4,0.1554,0.534903,0.835238,0.259259,0.145833,0.186667,48.0,0.259259,0.145833,0.186667,48.0,0.259259,0.145833,0.186667,48.0,0.259259,0.145833,0.186667,48.0,750,27,273
5,0.0953,0.788998,0.795238,0.296296,0.181818,0.225352,44.0,0.296296,0.181818,0.225352,44.0,0.296296,0.181818,0.225352,44.0,0.296296,0.181818,0.225352,44.0,750,27,273
6,0.0584,0.679575,0.834286,0.259259,0.162791,0.2,43.0,0.259259,0.162791,0.2,43.0,0.259259,0.162791,0.2,43.0,0.259259,0.162791,0.2,43.0,750,27,273
7,0.0264,1.00305,0.787619,0.259259,0.152174,0.191781,46.0,0.259259,0.152174,0.191781,46.0,0.259259,0.152174,0.191781,46.0,0.259259,0.152174,0.191781,46.0,750,27,273


Training model no. 13 of 23 for Conversation_Killer persuasion technique...


Map:   0%|          | 0/1553 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 1553/1553 [00:00<00:00, 5330.30 examples/s]
Map: 100%|██████████| 389/389 [00:00<00:00, 5620.40 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Conversation Killer Precision,Conversation Killer Recall,Conversation Killer F1-score,Conversation Killer Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,B-conversation Killer Support,I-conversation Killer Support,O Support
1,0.5264,0.531473,0.767592,0.326531,0.177778,0.230216,270.0,0.326531,0.177778,0.230216,270.0,0.326531,0.177778,0.230216,270.0,0.326531,0.177778,0.230216,270.0,147,1038,3462
2,0.3126,0.579982,0.797504,0.442177,0.258964,0.326633,251.0,0.442177,0.258964,0.326633,251.0,0.442177,0.258964,0.326633,251.0,0.442177,0.258964,0.326633,251.0,147,1038,3462
3,0.1963,0.770673,0.838821,0.387755,0.358491,0.372549,159.0,0.387755,0.358491,0.372549,159.0,0.387755,0.358491,0.372549,159.0,0.387755,0.358491,0.372549,159.0,147,1038,3462
4,0.1302,0.851489,0.83839,0.272109,0.325203,0.296296,123.0,0.272109,0.325203,0.296296,123.0,0.272109,0.325203,0.296296,123.0,0.272109,0.325203,0.296296,123.0,147,1038,3462
5,0.0675,0.888478,0.843985,0.401361,0.388158,0.394649,152.0,0.401361,0.388158,0.394649,152.0,0.401361,0.388158,0.394649,152.0,0.401361,0.388158,0.394649,152.0,147,1038,3462
6,0.0444,1.106362,0.836023,0.394558,0.423358,0.408451,137.0,0.394558,0.423358,0.408451,137.0,0.394558,0.423358,0.408451,137.0,0.394558,0.423358,0.408451,137.0,147,1038,3462
7,0.0279,1.014257,0.831289,0.462585,0.341709,0.393064,199.0,0.462585,0.341709,0.393064,199.0,0.462585,0.341709,0.393064,199.0,0.462585,0.341709,0.393064,199.0,147,1038,3462
8,0.016,1.138676,0.840542,0.435374,0.351648,0.389058,182.0,0.435374,0.351648,0.389058,182.0,0.435374,0.351648,0.389058,182.0,0.435374,0.351648,0.389058,182.0,147,1038,3462


Training model no. 14 of 23 for Loaded_Language persuasion technique...


Map:   0%|          | 0/12305 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 12305/12305 [00:03<00:00, 3324.65 examples/s]
Map: 100%|██████████| 3077/3077 [00:00<00:00, 4761.65 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Loaded Language Precision,Loaded Language Recall,Loaded Language F1-score,Loaded Language Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-loaded Language Support,I-loaded Language Support
1,0.32,0.279836,0.902974,0.160139,0.211982,0.182449,868.0,0.160139,0.211982,0.182449,868.0,0.160139,0.211982,0.182449,868.0,0.160139,0.211982,0.182449,868.0,43137,1149,4474
2,0.2311,0.298171,0.901743,0.295039,0.248899,0.270012,1362.0,0.295039,0.248899,0.270012,1362.0,0.295039,0.248899,0.270012,1362.0,0.295039,0.248899,0.270012,1362.0,43137,1149,4474
3,0.1518,0.407383,0.907075,0.263708,0.271993,0.267786,1114.0,0.263708,0.271993,0.267786,1114.0,0.263708,0.271993,0.267786,1114.0,0.263708,0.271993,0.267786,1114.0,43137,1149,4474
4,0.0977,0.380102,0.905947,0.306353,0.288998,0.297423,1218.0,0.306353,0.288998,0.297423,1218.0,0.306353,0.288998,0.297423,1218.0,0.306353,0.288998,0.297423,1218.0,43137,1149,4474
5,0.0646,0.523077,0.90687,0.268059,0.282569,0.275123,1090.0,0.268059,0.282569,0.275123,1090.0,0.268059,0.282569,0.275123,1090.0,0.268059,0.282569,0.275123,1090.0,43137,1149,4474
6,0.043,0.573146,0.906399,0.268059,0.285449,0.276481,1079.0,0.268059,0.285449,0.276481,1079.0,0.268059,0.285449,0.276481,1079.0,0.268059,0.285449,0.276481,1079.0,43137,1149,4474


Training model no. 15 of 23 for Repetition persuasion technique...


Map:   0%|          | 0/1777 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 1777/1777 [00:00<00:00, 4692.28 examples/s]
Map: 100%|██████████| 445/445 [00:00<00:00, 1232.29 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Repetition Precision,Repetition Recall,Repetition F1-score,Repetition Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-repetition Support,I-repetition Support
1,0.4211,0.382523,0.892672,0.032468,0.151515,0.053476,33.0,0.032468,0.151515,0.053476,33.0,0.032468,0.151515,0.053476,33.0,0.032468,0.151515,0.053476,33.0,6010,154,591
2,0.3131,0.365278,0.899186,0.201299,0.21831,0.209459,142.0,0.201299,0.21831,0.209459,142.0,0.201299,0.21831,0.209459,142.0,0.201299,0.21831,0.209459,142.0,6010,154,591
3,0.1946,0.418313,0.900518,0.233766,0.248276,0.240803,145.0,0.233766,0.248276,0.240803,145.0,0.233766,0.248276,0.240803,145.0,0.233766,0.248276,0.240803,145.0,6010,154,591
4,0.0958,0.509967,0.904959,0.337662,0.344371,0.340984,151.0,0.337662,0.344371,0.340984,151.0,0.337662,0.344371,0.340984,151.0,0.337662,0.344371,0.340984,151.0,6010,154,591
5,0.0524,0.629804,0.904219,0.37013,0.360759,0.365385,158.0,0.37013,0.360759,0.365385,158.0,0.37013,0.360759,0.365385,158.0,0.37013,0.360759,0.365385,158.0,6010,154,591
6,0.0324,0.635546,0.900222,0.396104,0.311224,0.348571,196.0,0.396104,0.311224,0.348571,196.0,0.396104,0.311224,0.348571,196.0,0.396104,0.311224,0.348571,196.0,6010,154,591
7,0.0121,0.6951,0.905107,0.37013,0.35625,0.363057,160.0,0.37013,0.35625,0.363057,160.0,0.37013,0.35625,0.363057,160.0,0.37013,0.35625,0.363057,160.0,6010,154,591


Training model no. 16 of 23 for Exaggeration-Minimisation persuasion technique...


Map:   0%|          | 0/2737 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 2737/2737 [00:00<00:00, 5328.79 examples/s]
Map: 100%|██████████| 685/685 [00:00<00:00, 5771.06 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Exaggeration-minimisation Precision,Exaggeration-minimisation Recall,Exaggeration-minimisation F1-score,Exaggeration-minimisation Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-exaggeration-minimisation Support,I-exaggeration-minimisation Support
1,0.5388,0.560959,0.810053,0.063107,0.066667,0.064838,195.0,0.063107,0.066667,0.064838,195.0,0.063107,0.066667,0.064838,195.0,0.063107,0.066667,0.064838,195.0,7108,206,1857
2,0.3816,0.568403,0.811471,0.063107,0.067708,0.065327,192.0,0.063107,0.067708,0.065327,192.0,0.063107,0.067708,0.065327,192.0,0.063107,0.067708,0.065327,192.0,7108,206,1857
3,0.2375,0.778985,0.831861,0.150485,0.135965,0.142857,228.0,0.150485,0.135965,0.142857,228.0,0.150485,0.135965,0.142857,228.0,0.150485,0.135965,0.142857,228.0,7108,206,1857
4,0.1504,0.799205,0.832188,0.174757,0.15,0.161435,240.0,0.174757,0.15,0.161435,240.0,0.174757,0.15,0.161435,240.0,0.174757,0.15,0.161435,240.0,7108,206,1857
5,0.0923,0.983236,0.824447,0.140777,0.125,0.13242,232.0,0.140777,0.125,0.13242,232.0,0.140777,0.125,0.13242,232.0,0.140777,0.125,0.13242,232.0,7108,206,1857
6,0.0627,1.221787,0.830771,0.145631,0.15625,0.150754,192.0,0.145631,0.15625,0.150754,192.0,0.145631,0.15625,0.150754,192.0,0.145631,0.15625,0.150754,192.0,7108,206,1857


Training model no. 17 of 23 for Obfuscation-Vagueness-Confusion persuasion technique...


Map:   0%|          | 0/560 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 560/560 [00:00<00:00, 3169.17 examples/s]
Map: 100%|██████████| 140/140 [00:00<00:00, 3171.60 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Obfuscation-vagueness-confusion Precision,Obfuscation-vagueness-confusion Recall,Obfuscation-vagueness-confusion F1-score,Obfuscation-vagueness-confusion Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-obfuscation-vagueness-confusion Support,I-obfuscation-vagueness-confusion Support
1,0.6991,0.613324,0.675562,0.042553,0.016529,0.02381,121.0,0.042553,0.016529,0.02381,121.0,0.042553,0.016529,0.02381,121.0,0.042553,0.016529,0.02381,121.0,1232,47,857
2,0.5464,0.669329,0.673689,0.021277,0.009615,0.013245,104.0,0.021277,0.009615,0.013245,104.0,0.021277,0.009615,0.013245,104.0,0.021277,0.009615,0.013245,104.0,1232,47,857
3,0.4395,0.677524,0.708333,0.06383,0.02439,0.035294,123.0,0.06383,0.02439,0.035294,123.0,0.06383,0.02439,0.035294,123.0,0.06383,0.02439,0.035294,123.0,1232,47,857
4,0.2347,0.769938,0.677434,0.06383,0.024194,0.035088,124.0,0.06383,0.024194,0.035088,124.0,0.06383,0.024194,0.035088,124.0,0.06383,0.024194,0.035088,124.0,1232,47,857
5,0.1447,1.083193,0.665262,0.106383,0.053191,0.070922,94.0,0.106383,0.053191,0.070922,94.0,0.106383,0.053191,0.070922,94.0,0.106383,0.053191,0.070922,94.0,1232,47,857
6,0.1004,1.13272,0.702247,0.042553,0.030303,0.035398,66.0,0.042553,0.030303,0.035398,66.0,0.042553,0.030303,0.035398,66.0,0.042553,0.030303,0.035398,66.0,1232,47,857
7,0.0556,1.176569,0.714419,0.12766,0.076923,0.096,78.0,0.12766,0.076923,0.096,78.0,0.12766,0.076923,0.096,78.0,0.12766,0.076923,0.096,78.0,1232,47,857
8,0.0293,1.278691,0.69382,0.085106,0.044944,0.058824,89.0,0.085106,0.044944,0.058824,89.0,0.085106,0.044944,0.058824,89.0,0.085106,0.044944,0.058824,89.0,1232,47,857
9,0.0153,1.356662,0.715356,0.148936,0.083333,0.10687,84.0,0.148936,0.083333,0.10687,84.0,0.148936,0.083333,0.10687,84.0,0.148936,0.083333,0.10687,84.0,1232,47,857
10,0.0076,1.366375,0.717697,0.106383,0.061728,0.078125,81.0,0.106383,0.061728,0.078125,81.0,0.106383,0.061728,0.078125,81.0,0.106383,0.061728,0.078125,81.0,1232,47,857


Training model no. 18 of 23 for Name_Calling-Labeling persuasion technique...


Map:   0%|          | 0/8819 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 8819/8819 [00:02<00:00, 3919.59 examples/s]
Map: 100%|██████████| 2205/2205 [00:00<00:00, 2904.65 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Name Calling-labeling Precision,Name Calling-labeling Recall,Name Calling-labeling F1-score,Name Calling-labeling Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-name Calling-labeling Support,I-name Calling-labeling Support
1,0.2288,,0.936503,0.333333,0.311927,0.322275,872.0,0.333333,0.311927,0.322275,872.0,0.333333,0.311927,0.322275,872.0,0.333333,0.311927,0.322275,872.0,29441,816,2217
2,0.1388,,0.933762,0.409314,0.343621,0.373602,972.0,0.409314,0.343621,0.373602,972.0,0.409314,0.343621,0.373602,972.0,0.409314,0.343621,0.373602,972.0,29441,816,2217
3,0.0894,,0.938936,0.404412,0.383721,0.393795,860.0,0.404412,0.383721,0.393795,860.0,0.404412,0.383721,0.393795,860.0,0.404412,0.383721,0.393795,860.0,29441,816,2217
4,0.0565,,0.937458,0.422794,0.386338,0.403745,893.0,0.422794,0.386338,0.403745,893.0,0.422794,0.386338,0.403745,893.0,0.422794,0.386338,0.403745,893.0,29441,816,2217
5,0.0359,,0.938228,0.379902,0.411141,0.394904,754.0,0.379902,0.411141,0.394904,754.0,0.379902,0.411141,0.394904,754.0,0.379902,0.411141,0.394904,754.0,29441,816,2217
6,0.0232,,0.938505,0.370098,0.387179,0.378446,780.0,0.370098,0.387179,0.378446,780.0,0.370098,0.387179,0.378446,780.0,0.370098,0.387179,0.378446,780.0,29441,816,2217


Training model no. 19 of 23 for Doubt persuasion technique...


Map:   0%|          | 0/6923 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 6923/6923 [00:01<00:00, 3711.85 examples/s]
Map: 100%|██████████| 1731/1731 [00:00<00:00, 4814.55 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Doubt Precision,Doubt Recall,Doubt F1-score,Doubt Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-doubt Support,I-doubt Support
1,0.6476,,0.718598,0.210306,0.12895,0.159873,1171.0,0.210306,0.12895,0.159873,1171.0,0.210306,0.12895,0.159873,1171.0,0.210306,0.12895,0.159873,1171.0,15931,718,11709
2,0.4867,,0.732386,0.305014,0.224385,0.25856,976.0,0.305014,0.224385,0.25856,976.0,0.305014,0.224385,0.25856,976.0,0.305014,0.224385,0.25856,976.0,15931,718,11709
3,0.3386,,0.738169,0.320334,0.240084,0.274463,958.0,0.320334,0.240084,0.274463,958.0,0.320334,0.240084,0.274463,958.0,0.320334,0.240084,0.274463,958.0,15931,718,11709
4,0.2396,,0.734149,0.35376,0.260246,0.299882,976.0,0.35376,0.260246,0.299882,976.0,0.35376,0.260246,0.299882,976.0,0.35376,0.260246,0.299882,976.0,15931,718,11709
5,0.1587,,0.736441,0.29805,0.284574,0.291156,752.0,0.29805,0.284574,0.291156,752.0,0.29805,0.284574,0.291156,752.0,0.29805,0.284574,0.291156,752.0,15931,718,11709
6,0.1187,,0.742189,0.288301,0.259398,0.273087,798.0,0.288301,0.259398,0.273087,798.0,0.288301,0.259398,0.273087,798.0,0.288301,0.259398,0.273087,798.0,15931,718,11709


Training model no. 20 of 23 for Guilt_by_Association persuasion technique...


Map:   0%|          | 0/1030 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 1030/1030 [00:00<00:00, 4624.14 examples/s]
Map: 100%|██████████| 258/258 [00:00<00:00, 5824.45 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Guilt By Association Precision,Guilt By Association Recall,Guilt By Association F1-score,Guilt By Association Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-guilt By Association Support,I-guilt By Association Support
1,0.6804,,0.709779,0.047059,0.028571,0.035556,140.0,0.047059,0.028571,0.035556,140.0,0.047059,0.028571,0.035556,140.0,0.047059,0.028571,0.035556,140.0,2132,85,1270
2,0.5226,,0.769716,0.152941,0.085526,0.109705,152.0,0.152941,0.085526,0.109705,152.0,0.152941,0.085526,0.109705,152.0,0.152941,0.085526,0.109705,152.0,2132,85,1270
3,0.3002,,0.750215,0.188235,0.108844,0.137931,147.0,0.188235,0.108844,0.137931,147.0,0.188235,0.108844,0.137931,147.0,0.188235,0.108844,0.137931,147.0,2132,85,1270
4,0.1802,,0.735016,0.141176,0.129032,0.134831,93.0,0.141176,0.129032,0.134831,93.0,0.141176,0.129032,0.134831,93.0,0.141176,0.129032,0.134831,93.0,2132,85,1270
5,0.1342,,0.773731,0.211765,0.133333,0.163636,135.0,0.211765,0.133333,0.163636,135.0,0.211765,0.133333,0.163636,135.0,0.211765,0.133333,0.163636,135.0,2132,85,1270
6,0.0821,,0.753083,0.247059,0.164062,0.197183,128.0,0.247059,0.164062,0.197183,128.0,0.247059,0.164062,0.197183,128.0,0.247059,0.164062,0.197183,128.0,2132,85,1270
7,0.0512,,0.76398,0.188235,0.140351,0.160804,114.0,0.188235,0.140351,0.160804,114.0,0.188235,0.140351,0.160804,114.0,0.188235,0.140351,0.160804,114.0,2132,85,1270
8,0.0284,,0.756811,0.211765,0.135338,0.165138,133.0,0.211765,0.135338,0.165138,133.0,0.211765,0.135338,0.165138,133.0,0.211765,0.135338,0.165138,133.0,2132,85,1270


Training model no. 21 of 23 for Appeal_to_Hypocrisy persuasion technique...


Map:   0%|          | 0/1358 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 1358/1358 [00:00<00:00, 4264.01 examples/s]
Map: 100%|██████████| 340/340 [00:00<00:00, 4188.80 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Appeal To Hypocrisy Precision,Appeal To Hypocrisy Recall,Appeal To Hypocrisy F1-score,Appeal To Hypocrisy Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-appeal To Hypocrisy Support,I-appeal To Hypocrisy Support
1,0.6804,0.655826,0.696277,0.022059,0.006696,0.010274,448.0,0.022059,0.006696,0.010274,448.0,0.022059,0.006696,0.010274,448.0,0.022059,0.006696,0.010274,448.0,2760,136,2879
2,0.4837,0.639905,0.739394,0.272059,0.158798,0.200542,233.0,0.272059,0.158798,0.200542,233.0,0.272059,0.158798,0.200542,233.0,0.272059,0.158798,0.200542,233.0,2760,136,2879
3,0.2738,0.896254,0.759481,0.308824,0.228261,0.2625,184.0,0.308824,0.228261,0.2625,184.0,0.308824,0.228261,0.2625,184.0,0.308824,0.228261,0.2625,184.0,2760,136,2879
4,0.1532,0.917281,0.762944,0.323529,0.231579,0.269939,190.0,0.323529,0.231579,0.269939,190.0,0.323529,0.231579,0.269939,190.0,0.323529,0.231579,0.269939,190.0,2760,136,2879
5,0.0924,1.224465,0.753939,0.294118,0.213904,0.247678,187.0,0.294118,0.213904,0.247678,187.0,0.294118,0.213904,0.247678,187.0,0.294118,0.213904,0.247678,187.0,2760,136,2879
6,0.0607,1.270145,0.75671,0.330882,0.219512,0.26393,205.0,0.330882,0.219512,0.26393,205.0,0.330882,0.219512,0.26393,205.0,0.330882,0.219512,0.26393,205.0,2760,136,2879


Training model no. 22 of 23 for Questioning_the_Reputation persuasion technique...


Map:   0%|          | 0/3494 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 3494/3494 [00:00<00:00, 5005.81 examples/s]
Map: 100%|██████████| 874/874 [00:00<00:00, 6374.33 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Questioning The Reputation Precision,Questioning The Reputation Recall,Questioning The Reputation F1-score,Questioning The Reputation Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-questioning The Reputation Support,I-questioning The Reputation Support
1,0.6039,0.52532,0.725221,0.322981,0.185383,0.235561,561.0,0.322981,0.185383,0.235561,561.0,0.322981,0.185383,0.235561,561.0,0.322981,0.185383,0.235561,561.0,7573,322,5221
2,0.4412,0.48172,0.785377,0.279503,0.195652,0.230179,460.0,0.279503,0.195652,0.230179,460.0,0.279503,0.195652,0.230179,460.0,0.279503,0.195652,0.230179,460.0,7573,322,5221
3,0.291,0.731673,0.767536,0.251553,0.199017,0.222222,407.0,0.251553,0.199017,0.222222,407.0,0.251553,0.199017,0.222222,407.0,0.251553,0.199017,0.222222,407.0,7573,322,5221


In [17]:
### MACRO F1 (across PTs), best models = 0.266
### last models = 0.232