In [1]:
import json
import itertools
import os
from tokenizers import Encoding
from typing import List
from transformers import AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification, TrainingArguments, Trainer, EarlyStoppingCallback
import torch
from collections import defaultdict
from datasets import Dataset, concatenate_datasets
import pandas as pd
import numpy as np
import evaluate
from sklearn.metrics import f1_score
from collections import Counter
from seqeval.metrics import classification_report, precision_score
import re
from datetime import datetime
import unicodedata
from torch.nn import CrossEntropyLoss

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def align_tokens_and_annotations_bio(tokenized: Encoding, annotations):
    tokens = tokenized.tokens
    aligned_labels = ["O"] * len(
        tokens
    )  # Make a list to store our labels the same length as our tokens
    for anno in annotations:
        annotation_token_ix_set = (
            set()
        )  # A set that stores the token indices of the annotation
        for char_ix in range(anno["start"], anno["end"]):
            print('char_ix = ', char_ix)
            token_ix = tokenized.char_to_token(char_ix)
            if token_ix is not None:
                annotation_token_ix_set.add(token_ix)
        if len(annotation_token_ix_set) == 1:
            # If there is only one token
            token_ix = annotation_token_ix_set.pop()
            prefix = (
                "B"  # This annotation spans one token so is prefixed with U for unique
            )
            aligned_labels[token_ix] = f"{prefix}-{anno['tag']}"

        else:

            last_token_in_anno_ix = len(annotation_token_ix_set) - 1
            for num, token_ix in enumerate(sorted(annotation_token_ix_set)):
                if num == 0:
                    prefix = "B"
                elif num == last_token_in_anno_ix:
                    prefix = "I"  # Its the last token
                else:
                    prefix = "I"  # We're inside of a multi token annotation
                aligned_labels[token_ix] = f"{prefix}-{anno['tag']}"
    return aligned_labels

class LabelSet:
    def __init__(self, labels: List[str]):
        self.labels_to_id = {}
        self.ids_to_label = {}
        self.labels_to_id["O"] = 0
        self.ids_to_label[0] = "O"
        num = 0  # in case there are no labels
        # Writing BILU will give us incremental ids for the labels
        for _num, (label, s) in enumerate(itertools.product(labels, "BI")):
            num = _num + 1  # skip 0
            l = f"{s}-{label}"
            self.labels_to_id[l] = num
            self.ids_to_label[num] = l


    def get_aligned_label_ids_from_annotations(self, tokenized_text, annotations):
        raw_labels = align_tokens_and_annotations_bio(tokenized_text, annotations)
        return list(map(self.labels_to_id.get, raw_labels))
    
class WeightedLoss(CrossEntropyLoss):
    def __init__(self, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean'):
        super(WeightedLoss, self).__init__(weight, size_average, ignore_index, reduce, reduction)

    def forward(self, input, target):
        # Ensure weight tensor is on the same device as input
        weight = torch.tensor([0.5 if x == 0 else 2.0 for x in target.view(-1)], device=input.device, dtype=input.dtype)
        # Ensure target is on the same device as input
        target = target.to(input.device)
        loss = super(WeightedLoss, self).forward(input.view(-1, input.size(-1)), target.view(-1))
        return (loss * weight).mean()

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fn = WeightedLoss()
        loss = loss_fn(logits, labels)

        return (loss, outputs) if return_outputs else loss

def tokenize_token_classification(examples, tokenizer):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True, padding='longest', return_tensors='pt')

    labels = []
    for i, label in enumerate(examples[f"ner_tags"]):
        word_ids = [tokenized_inputs.token_to_word(i, j) for j in range(len(tokenized_inputs['input_ids'][i]))]  # Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100.
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = torch.tensor(labels)
    return tokenized_inputs

def dict_of_lists(lst_of_dicts):
    result = defaultdict(list)
    for d in lst_of_dicts:
        for key, value in d.items():
            result[key].append(value)
    return dict(result)

def list_of_dicts(dict_of_lists):
    # First, we need to check if all lists are of the same length to ensure correct transformation
    if not all(len(lst) == len(next(iter(dict_of_lists.values()))) for lst in dict_of_lists.values()):
        raise ValueError("All lists in the dictionary must have the same length")

    # Get the length of the items in any of the lists
    length = len(next(iter(dict_of_lists.values())))
    
    # Create a list of dictionaries, one for each index in the lists
    result = []
    for i in range(length):
        # Create a dictionary for the current index 'i' across all lists
        new_dict = {key: dict_of_lists[key][i] for key in dict_of_lists}
        result.append(new_dict)
    
    return result

def sub_shift_spans(text, ents = [], mappings = []):
    for mapping in mappings:
        adjustment = 0
        pattern = re.compile(mapping['pattern'])
        for match in re.finditer(pattern, text):
            match_index = match.start() + adjustment
            match_contents = match.group()
            if all(mapping['check'](char) for char in match_contents):
                subbed_text = mapping['target'].replace('placeholder', match_contents)
            else:
                subbed_text = mapping['target']
            len_diff = len(subbed_text) - len(match_contents)
            text = text[:match_index] + subbed_text + text[match_index + len(match_contents):]
            if ents:
                if isinstance(ents, list):
                    for ent in ents:
                        if ent['start'] <= match_index and ent['end'] > match_index:
                            ent['end'] += len_diff
                        if ent['start'] > match_index:
                            ent['start'] += len_diff
                            ent['end'] += len_diff
                elif isinstance(ents, dict):
                    if ents['value']['start'] <= match_index and ents['value']['end'] > match_index:
                        ents['value']['end'] += len_diff
                    if ents['value']['start'] > match_index:
                        ents['value']['start'] += len_diff
                        ents['value']['end'] += len_diff

            adjustment += len_diff

    return text, ents

def get_entities_from_sample(sample, field = 'annotations', langs = ['en'], sort = False):
    entities = []
    for lang in langs:
        entities += [ent for ent in sample[field][0]['result'] if ent['type'] == 'labels' and ent[f'from_name'] == f'label_{lang}']
    if sort:
        entities = sorted(entities, key = lambda ent: ent['value']['start'])
    return entities
'''
def span_to_words_annotation(samples, target_tag = '', mappings = {}, labels_model = []):
    samples_new = []
    # if not any([l for l in samples['annotations']]):
        
    for i in range(len(samples['data'])):
        text, annotation_list = samples['data'][i]['text'], samples['annotations'][i][0]['result']
        labels_text = []
        tokens = []
        if not annotation_list:
            annotation_list = [[]]
        for j, annotation in enumerate(annotation_list):
            if isinstance(annotation, dict):
                if annotation['value']['labels'][0] != target_tag:
                    continue
            text_subshifted, ents = sub_shift_spans(text, annotation, mappings=mappings)
            text_subshifted_matches = re.finditer(r'[^\s]+', text_subshifted)
            labels_words = []
            first = True
            for regex_match in text_subshifted_matches:
                if j == 0:
                    tokens.append(regex_match.group())
                if isinstance(annotation, dict):
                    if regex_match.start() < ents['value']['start']:
                        labels_words.append(labels_model.labels_to_id['O'])
                    elif regex_match.start() >= ents['value']['start'] and regex_match.end() <= ents['value']['end']:
                        if first:
                            labels_words.append(labels_model.labels_to_id['B-' + ents['value']['labels'][0]])
                            first = False
                        elif not first:
                            labels_words.append(labels_model.labels_to_id['I-' + ents['value']['labels'][0]])
                    else:
                        labels_words.append(labels_model.labels_to_id['O'])
                    labels_text.append({'labels': labels_words, 'tag': annotation['value']['labels'][0]})
        allowed_labels = [labels_model.labels_to_id['O'],
                          labels_model.labels_to_id['B-' + target_tag],
                          labels_model.labels_to_id['I-' + target_tag],
                          ]
        # if the training sample has no tags that we need, we just produce a 0s list
        if target_tag not in [labels['tag'] for labels in labels_text]:
            labels = [0] * len(tokens)
            tag = 'no_tag'
        # if the training sample has tags we need, we first exclude the label lists whose tags don't match
        # and then we merge the label lists that have tags that match the target tag
        else:
            labels = [max(values) for values in zip(*[labels['labels'] for labels in labels_text if labels['tag'] == target_tag])]
            labels = [(label if label in allowed_labels else 0) for label in labels]
            tag = target_tag
        samples_new.append({
            'id': i,
            'ner_tags': labels,
            'tokens': tokens,
            'tag': tag,
        })
    return samples_new
'''

def span_to_words_annotation(samples, target_tag = '', mappings = {}, labels_model = []):
    samples_new = []
    
    for i in range(len(samples['data'])):
        text, annotation_list = samples['data'][i]['text'], samples['annotations'][i][0]['result']
        labels_text = []
        tokens = []
        if not annotation_list:
            annotation_list = [[]]
        for j, annotation in enumerate(annotation_list):
            if isinstance(annotation, dict):
                # Check if 'labels' is non-empty
                if annotation['value']['labels'] and annotation['value']['labels'][0] != target_tag:
                    continue
            text_subshifted, ents = sub_shift_spans(text, annotation, mappings=mappings)
            text_subshifted_matches = re.finditer(r'[^\s]+', text_subshifted)
            labels_words = []
            first = True
            for regex_match in text_subshifted_matches:
                if j == 0:
                    tokens.append(regex_match.group())
                if isinstance(annotation, dict):
                    if regex_match.start() < ents['value']['start']:
                        labels_words.append(labels_model.labels_to_id['O'])
                    elif regex_match.start() >= ents['value']['start'] and regex_match.end() <= ents['value']['end']:
                        # Ensure the 'labels' list is not empty
                        if isinstance(ents['value']['labels'], list) and ents['value']['labels']:
                            if first:
                                labels_words.append(labels_model.labels_to_id['B-' + ents['value']['labels'][0]])
                                first = False
                            else:
                                labels_words.append(labels_model.labels_to_id['I-' + ents['value']['labels'][0]])
                        else:
                            # If 'labels' is empty, append 'O' or any fallback label
                            labels_words.append(labels_model.labels_to_id['O'])
                    else:
                        labels_words.append(labels_model.labels_to_id['O'])
                    labels_text.append({'labels': labels_words, 'tag': annotation['value']['labels'][0] if 'labels' in annotation['value'] and annotation['value']['labels'] else 'no_tag'})
        
        allowed_labels = [labels_model.labels_to_id['O'],
                          labels_model.labels_to_id['B-' + target_tag],
                          labels_model.labels_to_id['I-' + target_tag],
                          ]
        
        # If the training sample has no tags we need, produce a list of 0s
        if target_tag not in [labels['tag'] for labels in labels_text]:
            labels = [0] * len(tokens)
            tag = 'no_tag'
        # Otherwise, merge label lists that match the target tag
        else:
            labels = [max(values) for values in zip(*[labels['labels'] for labels in labels_text if labels['tag'] == target_tag])]
            labels = [(label if label in allowed_labels else 0) for label in labels]
            tag = target_tag
            
        samples_new.append({
            'id': i,
            'ner_tags': labels,
            'tokens': tokens,
            'tag': tag,
        })
        
    return samples_new

def make_balanced_df(df):
    # get rows with annotations
    df_pos = df[df['annotations'].apply(lambda x: len(x[0]['result']) > 0)]
    # get the same number of rows without any annotations
    df_neg = df[df['annotations'].apply(lambda x: x[0]['result'] == [])].sample(len(df_pos))
    balanced_df = pd.concat([df_pos, df_neg])
    return balanced_df

def make_binary_balanced_df(df, target_tag='', labels_model=[]):
    df_list = df.to_dict(orient='records')
    df_list_binary = span_to_words_annotation(dict_of_lists(df_list), target_tag=target_tag, mappings=regex_tokenizer_mappings, labels_model=labels_model)
    df_binary = pd.DataFrame(df_list_binary)
    df_binary_pos = df_binary[df_binary['tag'] == target_tag]
    df_binary_neg = df_binary[df_binary['tag'] != target_tag].sample(len(df_binary_pos), replace=True)  # Over-sampling
    df_binary_subsampled = pd.concat([df_binary_pos, df_binary_neg])
    return df_binary_subsampled

regex_tokenizer_mappings = [
    {'pattern': r'(?<!\s)([^\w\s])|([^\w\s])(?!\s)',
    'target': ' placeholder ',
    'check': lambda x: unicodedata.category(x).startswith('P'),
    },
    {'pattern': r'\s+',
     'target': ' ',
     'check': lambda x: False if re.match('\s+', x) is None else True,
     },
    ]

def compute_metrics_wrapper(label_list, pt, model_name_simple, date_time, threshold=0):
    def compute_metrics(eval_preds):
        nonlocal label_list
        nonlocal pt
        nonlocal threshold
        logits, labels = eval_preds
        predictions = torch.softmax(torch.tensor(logits), dim=-1).numpy()
        predictions = np.where(predictions >= threshold, predictions, 0)
        predictions = np.argmax(predictions, axis=2)


        # Extract the true predictions and labels from the sequences
        true_predictions = [
            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
            for prediction, label in zip(predictions, labels)
        ]
        true_labels = [
            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
            for prediction, label in zip(predictions, labels)
        ]

        # Compute sequence-level evaluation metrics
        results = classification_report(true_predictions, true_labels, output_dict=True)

        # Flatten the lists to calculate micro F1-score and supports
        flat_true_predictions = [item for sublist in true_predictions for item in sublist]
        flat_true_labels = [item for sublist in true_labels for item in sublist]

        # Calculate micro F1-score using sklearn
        micro_f1 = f1_score(flat_true_labels, flat_true_predictions, average='micro')

        # Prepare the results dictionary
        flat_results = {'micro_f1': float(micro_f1)}
        
        # Add detailed metrics for each label to the results dictionary
        for label, metrics in results.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    flat_results[f'{label}_{metric}'] = float(value)

        # Compute support for each label using Counter
        label_support = Counter(flat_true_labels)
        for label, count in label_support.items():
            flat_results[f'{label}_support'] = count
        
        models_dir = '/home/lgiordano/LUCA/checkthat_GITHUB/models/M2/RUN_OTTOBRE/weights_and_results'
        model_save_name = f'{model_name_simple}_{tt[0]}_target={tt[1]}_aug_SUBSAMPLED_{date_time}'
        model_save_dir = os.path.join(models_dir, date_time+'_aug_cw_ts0.9', model_save_name)
        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)

        with open(os.path.join(model_save_dir, 'results.json'), 'w', encoding='utf8') as f:
            json.dump(flat_results, f, ensure_ascii = False)

        return flat_results
    return compute_metrics

In [3]:
date_time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")

data_gold = '/home/lgiordano/LUCA/checkthat_GITHUB/data/formatted/train_sentences.json'
with open(data_gold, 'r', encoding='utf8') as f:
    dataset_gold = json.load(f)

data_path_dict = {
'sl': '/home/lgiordano/LUCA/checkthat_GITHUB/data/train_sent_mt/sl/train_gold_sentences_translated_nllb-200-3.3B_eng_Latn-slv_Latn_tok_regex_en-sl/train_gold_sentences_translated_nllb-200-3.3B_eng_Latn-slv_Latn_tok_regex_en-sl_mdeberta-v3-base_mdeberta_xlwa_en-sl_ME3_2024-05-04-12-12-14_ls.json',
'ru': '/home/lgiordano/LUCA/checkthat_GITHUB/data/train_sent_mt/ru/train_gold_sentences_translated_nllb-200-3.3B_eng_Latn-rus_Cyrl_tok_regex_en-ru/train_gold_sentences_translated_nllb-200-3.3B_eng_Latn-rus_Cyrl_tok_regex_en-ru_mdeberta-v3-base_mdeberta_xlwa_en-ru_ME3_2024-05-04-12-09-20_ls.json',
'pt': '/home/lgiordano/LUCA/checkthat_GITHUB/data/train_sent_mt/pt/train_gold_sentences_translated_nllb-200-3.3B_eng_Latn-por_Latn_tok_regex_en-pt/train_gold_sentences_translated_nllb-200-3.3B_eng_Latn-por_Latn_tok_regex_en-pt_mdeberta-v3-base_mdeberta_xlwa_en-pt_ME3_2024-05-04-12-07-45_ls.json',
'it': '/home/lgiordano/LUCA/checkthat_GITHUB/data/train_sent_mt/it/train_gold_sentences_translated_nllb-200-3.3B_eng_Latn-ita_Latn_tok_regex_en-it/train_gold_sentences_translated_nllb-200-3.3B_eng_Latn-ita_Latn_tok_regex_en-it_mdeberta-v3-base_mdeberta_xlwa_en-it_ME3_2024-05-04-12-05-00_ls.json',
'es': '/home/lgiordano/LUCA/checkthat_GITHUB/data/train_sent_mt/es/train_gold_sentences_translated_nllb-200-3.3B_eng_Latn-spa_Latn_tok_regex_en-es/train_gold_sentences_translated_nllb-200-3.3B_eng_Latn-spa_Latn_tok_regex_en-es_mdeberta-v3-base_mdeberta_xlwa_en-es_ME3_2024-05-04-12-01-43_ls.json',
'bg': '/home/lgiordano/LUCA/checkthat_GITHUB/data/train_sent_mt/bg/train_gold_sentences_translated_nllb-200-3.3B_eng_Latn-bul_Cyrl_tok_regex_en-bg/train_gold_sentences_translated_nllb-200-3.3B_eng_Latn-bul_Cyrl_tok_regex_en-bg_mdeberta-v3-base_mdeberta_xlwa_en-bg_ME3_2024-05-04-11-58-52_ls.json',
#'ar': '/home/lgiordano/LUCA/checkthat_GITHUB/data/aug_NEW/araieval24_all_bin_formatted.json'
}

dataset_aug = []
for key in data_path_dict:
    with open(data_path_dict[key], 'r', encoding='utf8') as f:
        dataset_aug_buffer = json.load(f)

        for sample in dataset_aug_buffer:
            sample['annotations'][0]['result'] = get_entities_from_sample(sample, langs=[key], sort = True)
            if 'text_en' in sample['data']:
                del sample['data']['text_en']
            if f'text_{key}' in sample['data']:
                sample['data']['text'] = sample['data'][f'text_{key}']
                del sample['data'][f'text_{key}']
            sample['data']['lang'] = key
            if 'labels' in sample['data']:
                sample['data']['label'] = sample['data'].pop('labels')
        dataset_aug += dataset_aug_buffer

#dataset_aug += [sample for sample in json.load(open('/home/lgiordano/LUCA/checkthat_GITHUB/data/aug_NEW/araieval24_all_bin_formatted.json')) if sample['data'].get('type') != 'tweet'] #filter out tweets from ar
#semeval_24 = json.load(open('/home/lgiordano/LUCA/checkthat_GITHUB/data/aug_NEW/semeval24_all_bin_formatted.json', encoding='utf-8'))
#dataset_aug += semeval_24

df_gold = pd.DataFrame(dataset_gold)
balanced_df_gold = make_balanced_df(df_gold)

df_aug = pd.DataFrame(dataset_aug)
balanced_df_aug = make_balanced_df(df_aug)

target_tags = ["Appeal_to_Authority", "Appeal_to_Popularity","Appeal_to_Values","Appeal_to_Fear-Prejudice","Flag_Waving","Causal_Oversimplification",
               "False_Dilemma-No_Choice","Consequential_Oversimplification","Straw_Man","Red_Herring","Whataboutism","Slogans","Appeal_to_Time",
               "Conversation_Killer","Loaded_Language","Repetition","Exaggeration-Minimisation","Obfuscation-Vagueness-Confusion","Name_Calling-Labeling",
               "Doubt","Guilt_by_Association","Appeal_to_Hypocrisy","Questioning_the_Reputation"]
target_tags = [(i, el.strip()) for i, el in enumerate(target_tags)]

In [None]:
shift = 0
for i, tt in enumerate(target_tags):
    if i < shift:
        continue
    print(f'Training model no. {i} of {len(target_tags)} for {tt} persuasion technique...')
    labels_model = LabelSet(labels=[tt[1]])
    
    token_columns = ['id', 'ner_tags', 'tokens', 
                     #'lang'
                     ]

    df_binary_subsampled_gold = make_binary_balanced_df(balanced_df_gold, target_tag=tt[1], labels_model=labels_model)
    binary_dataset_gold = Dataset.from_pandas(df_binary_subsampled_gold[token_columns])

    df_binary_subsampled_aug = make_binary_balanced_df(balanced_df_aug, target_tag=tt[1], labels_model=labels_model)
    binary_dataset_aug = Dataset.from_pandas(df_binary_subsampled_aug[token_columns])
    
    split_ratio = 0.2
    split_seed = 42
    datadict = binary_dataset_gold.train_test_split(split_ratio, seed=split_seed)

    #model_name = 'bert-base-multilingual-cased'
    #model_name = 'xlm-roberta-base'
    model_name = 'microsoft/mdeberta-v3-base'
    #model_name = 'FacebookAI/xlm-roberta-large'
    model_name_simple = model_name.split('/')[-1]
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    batch_size = 16
    datadict['train'] = concatenate_datasets([datadict['train'], binary_dataset_aug]) # this is where we merge english gold data with aug data
    datadict = datadict.map(lambda x: tokenize_token_classification(x, tokenizer), batched=True, batch_size=None)

    columns = [
                'input_ids',
                'token_type_ids',
                'attention_mask',
                'labels'
                ]

    datadict.set_format('torch', columns = columns)

    train_data = datadict['train']
    val_data = datadict['test']

    data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, padding='longest')
    
    model = AutoModelForTokenClassification.from_pretrained(model_name,
                                                                num_labels=len(labels_model.ids_to_label.values()),
                                                                label2id=labels_model.labels_to_id,
                                                                id2label=labels_model.ids_to_label,
                                                                )
    
    training_args = TrainingArguments(output_dir=f'/home/lgiordano/LUCA/checkthat_GITHUB/models/M2/RUN_OTTOBRE/weights_and_results/{date_time}_aug_cw_ts0.9/mdeberta-v3-base-NEW_aug_{i}_{tt[1]}',
                                  save_total_limit=2,
                                  save_strategy='epoch',
                                  load_best_model_at_end=True,
                                  save_only_model=True,
                                  metric_for_best_model='eval_macro avg_f1-score',
                                  logging_strategy='epoch',
                                  evaluation_strategy='epoch',
                                  learning_rate=5e-5,
                                  optim='adamw_torch',
                                  num_train_epochs=10)
    
    early_stopping = EarlyStoppingCallback(early_stopping_patience=2)

    ###CustomTrainer per class weighting, threshold=0.9 per threshold
    trainer = CustomTrainer(model,
                      training_args,
                      train_dataset=train_data,
                      eval_dataset=val_data,
                      data_collator=data_collator,
                      tokenizer=tokenizer,
                      callbacks=[early_stopping],
                      compute_metrics=compute_metrics_wrapper(
                          label_list=[i for i in labels_model.ids_to_label.values()],
                          pt=tt[1],
                          model_name_simple=model_name_simple,
                          date_time=date_time,
                          threshold=0.9
                          ),
                      )
    
    trainer.train()

Training model no. 0 of 23 for (0, 'Appeal_to_Authority') persuasion technique...


Map:   0%|          | 0/2412 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 2412/2412 [00:00<00:00, 3971.51 examples/s]
Map: 100%|██████████| 274/274 [00:00<00:00, 4891.88 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Appeal To Authority Precision,Appeal To Authority Recall,Appeal To Authority F1-score,Appeal To Authority Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-appeal To Authority Support,I-appeal To Authority Support
1,1.3286,1.321609,0.696254,0.0,0.0,0.0,142.0,0.0,0.0,0.0,142.0,0.0,0.0,0.0,142.0,0.0,0.0,0.0,142.0,2653,117,1928
2,0.7616,1.12342,0.66986,0.0,0.0,0.0,136.0,0.0,0.0,0.0,136.0,0.0,0.0,0.0,136.0,0.0,0.0,0.0,136.0,2653,117,1928
3,0.4293,1.58793,0.746914,0.119658,0.107692,0.11336,130.0,0.119658,0.107692,0.11336,130.0,0.119658,0.107692,0.11336,130.0,0.119658,0.107692,0.11336,130.0,2653,117,1928
4,0.2786,1.380706,0.754151,0.205128,0.145455,0.170213,165.0,0.205128,0.145455,0.170213,165.0,0.205128,0.145455,0.170213,165.0,0.205128,0.145455,0.170213,165.0,2653,117,1928
5,0.1564,2.154739,0.770328,0.247863,0.2,0.221374,145.0,0.247863,0.2,0.221374,145.0,0.247863,0.2,0.221374,145.0,0.247863,0.2,0.221374,145.0,2653,117,1928
6,0.1014,2.331102,0.773521,0.264957,0.216783,0.238462,143.0,0.264957,0.216783,0.238462,143.0,0.264957,0.216783,0.238462,143.0,0.264957,0.216783,0.238462,143.0,2653,117,1928
7,0.0677,2.476562,0.769689,0.307692,0.238411,0.268657,151.0,0.307692,0.238411,0.268657,151.0,0.307692,0.238411,0.268657,151.0,0.307692,0.238411,0.268657,151.0,2653,117,1928
8,0.0524,2.781228,0.764155,0.282051,0.222973,0.249057,148.0,0.282051,0.222973,0.249057,148.0,0.282051,0.222973,0.249057,148.0,0.282051,0.222973,0.249057,148.0,2653,117,1928
9,0.0318,2.749578,0.784802,0.299145,0.239726,0.26616,146.0,0.299145,0.239726,0.26616,146.0,0.299145,0.239726,0.26616,146.0,0.299145,0.239726,0.26616,146.0,2653,117,1928


Training model no. 1 of 23 for (1, 'Appeal_to_Popularity') persuasion technique...


Map:   0%|          | 0/888 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 888/888 [00:00<00:00, 5844.25 examples/s]
Map: 100%|██████████| 136/136 [00:00<00:00, 3994.85 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Appeal To Popularity Precision,Appeal To Popularity Recall,Appeal To Popularity F1-score,Appeal To Popularity Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-appeal To Popularity Support,I-appeal To Popularity Support
1,1.242,1.516741,0.758786,0.021277,0.01,0.013605,100.0,0.021277,0.01,0.013605,100.0,0.021277,0.01,0.013605,100.0,0.021277,0.01,0.013605,100.0,1210,47,621
2,0.6799,1.297172,0.731629,0.021277,0.02,0.020619,50.0,0.021277,0.02,0.020619,50.0,0.021277,0.02,0.020619,50.0,0.021277,0.02,0.020619,50.0,1210,47,621
3,0.3915,1.350639,0.78541,0.106383,0.075758,0.088496,66.0,0.106383,0.075758,0.088496,66.0,0.106383,0.075758,0.088496,66.0,0.106383,0.075758,0.088496,66.0,1210,47,621
4,0.217,2.042474,0.761448,0.212766,0.119048,0.152672,84.0,0.212766,0.119048,0.152672,84.0,0.212766,0.119048,0.152672,84.0,0.212766,0.119048,0.152672,84.0,1210,47,621
5,0.1427,2.543234,0.705005,0.297872,0.135922,0.186667,103.0,0.297872,0.135922,0.186667,103.0,0.297872,0.135922,0.186667,103.0,0.297872,0.135922,0.186667,103.0,1210,47,621
6,0.0965,2.216602,0.756124,0.319149,0.192308,0.24,78.0,0.319149,0.192308,0.24,78.0,0.319149,0.192308,0.24,78.0,0.319149,0.192308,0.24,78.0,1210,47,621
7,0.0901,3.011124,0.699148,0.276596,0.132653,0.17931,98.0,0.276596,0.132653,0.17931,98.0,0.276596,0.132653,0.17931,98.0,0.276596,0.132653,0.17931,98.0,1210,47,621
8,0.0339,2.572029,0.754526,0.276596,0.156627,0.2,83.0,0.276596,0.156627,0.2,83.0,0.276596,0.156627,0.2,83.0,0.276596,0.156627,0.2,83.0,1210,47,621


Training model no. 2 of 23 for (2, 'Appeal_to_Values') persuasion technique...


Map:   0%|          | 0/1107 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 1107/1107 [00:00<00:00, 5057.20 examples/s]
Map: 100%|██████████| 277/277 [00:00<00:00, 3643.17 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Appeal To Values Precision,Appeal To Values Recall,Appeal To Values F1-score,Appeal To Values Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-appeal To Values Support,I-appeal To Values Support
1,1.1532,1.378629,0.665452,0.0,0.0,0.0,216.0,0.0,0.0,0.0,216.0,0.0,0.0,0.0,216.0,0.0,0.0,0.0,216.0,2584,99,1708
2,0.8067,0.889606,0.712594,0.010101,0.007937,0.008889,126.0,0.010101,0.007937,0.008889,126.0,0.010101,0.007937,0.008889,126.0,0.010101,0.007937,0.008889,126.0,2584,99,1708
3,0.4618,1.357672,0.690731,0.070707,0.054264,0.061404,129.0,0.070707,0.054264,0.061404,129.0,0.070707,0.054264,0.061404,129.0,0.070707,0.054264,0.061404,129.0,2584,99,1708
4,0.2901,1.503483,0.786609,0.232323,0.141104,0.175573,163.0,0.232323,0.141104,0.175573,163.0,0.232323,0.141104,0.175573,163.0,0.232323,0.141104,0.175573,163.0,2584,99,1708
5,0.1923,1.419327,0.78752,0.161616,0.108108,0.129555,148.0,0.161616,0.108108,0.129555,148.0,0.161616,0.108108,0.129555,148.0,0.161616,0.108108,0.129555,148.0,2584,99,1708
6,0.1413,2.004062,0.805739,0.242424,0.154839,0.188976,155.0,0.242424,0.154839,0.188976,155.0,0.242424,0.154839,0.188976,155.0,0.242424,0.154839,0.188976,155.0,2584,99,1708
7,0.0654,2.105884,0.809383,0.272727,0.159763,0.201493,169.0,0.272727,0.159763,0.201493,169.0,0.272727,0.159763,0.201493,169.0,0.272727,0.159763,0.201493,169.0,2584,99,1708
8,0.0421,2.236309,0.801412,0.272727,0.162651,0.203774,166.0,0.272727,0.162651,0.203774,166.0,0.272727,0.162651,0.203774,166.0,0.272727,0.162651,0.203774,166.0,2584,99,1708
9,0.0206,2.447025,0.801867,0.252525,0.15528,0.192308,161.0,0.252525,0.15528,0.192308,161.0,0.252525,0.15528,0.192308,161.0,0.252525,0.15528,0.192308,161.0,2584,99,1708
10,0.0164,2.518376,0.803462,0.242424,0.145455,0.181818,165.0,0.242424,0.145455,0.181818,165.0,0.242424,0.145455,0.181818,165.0,0.242424,0.145455,0.181818,165.0,2584,99,1708


Training model no. 3 of 23 for (3, 'Appeal_to_Fear-Prejudice') persuasion technique...


Map:   0%|          | 0/5893 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 5893/5893 [00:01<00:00, 3125.83 examples/s]
Map: 100%|██████████| 625/625 [00:00<00:00, 5955.56 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Appeal To Fear-prejudice Precision,Appeal To Fear-prejudice Recall,Appeal To Fear-prejudice F1-score,Appeal To Fear-prejudice Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-appeal To Fear-prejudice Support,I-appeal To Fear-prejudice Support
1,1.0616,,0.780582,0.038251,0.042424,0.04023,165.0,0.038251,0.042424,0.04023,165.0,0.038251,0.042424,0.04023,165.0,0.038251,0.042424,0.04023,165.0,5872,183,2741
2,0.5411,,0.802069,0.120219,0.117647,0.118919,187.0,0.120219,0.117647,0.118919,187.0,0.120219,0.117647,0.118919,187.0,0.120219,0.117647,0.118919,187.0,5872,183,2741
3,0.308,,0.839245,0.229508,0.177966,0.200477,236.0,0.229508,0.177966,0.200477,236.0,0.229508,0.177966,0.200477,236.0,0.229508,0.177966,0.200477,236.0,5872,183,2741
4,0.2003,,0.845839,0.289617,0.236607,0.260442,224.0,0.289617,0.236607,0.260442,224.0,0.289617,0.236607,0.260442,224.0,0.289617,0.236607,0.260442,224.0,5872,183,2741
5,0.1369,,0.823215,0.295082,0.267327,0.280519,202.0,0.295082,0.267327,0.280519,202.0,0.295082,0.267327,0.280519,202.0,0.295082,0.267327,0.280519,202.0,5872,183,2741
6,0.101,,0.835039,0.327869,0.269058,0.295567,223.0,0.327869,0.269058,0.295567,223.0,0.327869,0.269058,0.295567,223.0,0.327869,0.269058,0.295567,223.0,5872,183,2741
7,0.0596,,0.84095,0.344262,0.266949,0.300716,236.0,0.344262,0.266949,0.300716,236.0,0.344262,0.266949,0.300716,236.0,0.344262,0.266949,0.300716,236.0,5872,183,2741
8,0.0452,,0.834584,0.333333,0.256303,0.289786,238.0,0.333333,0.256303,0.289786,238.0,0.333333,0.256303,0.289786,238.0,0.333333,0.256303,0.289786,238.0,5872,183,2741
9,0.0229,,0.842997,0.338798,0.262712,0.295943,236.0,0.338798,0.262712,0.295943,236.0,0.338798,0.262712,0.295943,236.0,0.338798,0.262712,0.295943,236.0,5872,183,2741


Training model no. 4 of 23 for (4, 'Flag_Waving') persuasion technique...


Map:   0%|          | 0/3947 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 3947/3947 [00:01<00:00, 3474.31 examples/s]
Map: 100%|██████████| 285/285 [00:00<00:00, 5262.94 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Flag Waving Precision,Flag Waving Recall,Flag Waving F1-score,Flag Waving Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,B-flag Waving Support,I-flag Waving Support,O Support
1,0.8886,1.011743,0.778069,0.087912,0.085106,0.086486,94.0,0.087912,0.085106,0.086486,94.0,0.087912,0.085106,0.086486,94.0,0.087912,0.085106,0.086486,94.0,91,1049,2681
2,0.3941,1.010114,0.851871,0.307692,0.301075,0.304348,93.0,0.307692,0.301075,0.304348,93.0,0.307692,0.301075,0.304348,93.0,0.307692,0.301075,0.304348,93.0,91,1049,2681
3,0.2719,0.961017,0.837739,0.274725,0.235849,0.253807,106.0,0.274725,0.235849,0.253807,106.0,0.274725,0.235849,0.253807,106.0,0.274725,0.235849,0.253807,106.0,91,1049,2681
4,0.1991,1.108881,0.838786,0.362637,0.277311,0.314286,119.0,0.362637,0.277311,0.314286,119.0,0.362637,0.277311,0.314286,119.0,0.362637,0.277311,0.314286,119.0,91,1049,2681
5,0.1433,1.253254,0.856844,0.384615,0.3125,0.344828,112.0,0.384615,0.3125,0.344828,112.0,0.384615,0.3125,0.344828,112.0,0.384615,0.3125,0.344828,112.0,91,1049,2681
6,0.0842,1.360889,0.857891,0.384615,0.315315,0.346535,111.0,0.384615,0.315315,0.346535,111.0,0.384615,0.315315,0.346535,111.0,0.384615,0.315315,0.346535,111.0,91,1049,2681
7,0.0796,1.499899,0.855797,0.461538,0.368421,0.409756,114.0,0.461538,0.368421,0.409756,114.0,0.461538,0.368421,0.409756,114.0,0.461538,0.368421,0.409756,114.0,91,1049,2681
8,0.0478,1.610232,0.855535,0.43956,0.384615,0.410256,104.0,0.43956,0.384615,0.410256,104.0,0.43956,0.384615,0.410256,104.0,0.43956,0.384615,0.410256,104.0,91,1049,2681
9,0.041,1.586969,0.859461,0.43956,0.330579,0.377358,121.0,0.43956,0.330579,0.377358,121.0,0.43956,0.330579,0.377358,121.0,0.43956,0.330579,0.377358,121.0,91,1049,2681
10,0.0286,1.684152,0.855012,0.450549,0.372727,0.40796,110.0,0.450549,0.372727,0.40796,110.0,0.450549,0.372727,0.40796,110.0,0.450549,0.372727,0.40796,110.0,91,1049,2681


Training model no. 5 of 23 for (5, 'Causal_Oversimplification') persuasion technique...


Map:   0%|          | 0/2823 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 2823/2823 [00:00<00:00, 3120.97 examples/s]
Map: 100%|██████████| 211/211 [00:00<00:00, 3470.24 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Causal Oversimplification Precision,Causal Oversimplification Recall,Causal Oversimplification F1-score,Causal Oversimplification Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,B-causal Oversimplification Support,I-causal Oversimplification Support,O Support
1,1.2157,0.893156,0.784495,0.170455,0.111111,0.134529,135.0,0.170455,0.111111,0.134529,135.0,0.170455,0.111111,0.134529,135.0,0.170455,0.111111,0.134529,135.0,88,1886,1483
2,0.5946,1.408405,0.749205,0.181818,0.142857,0.16,112.0,0.181818,0.142857,0.16,112.0,0.181818,0.142857,0.16,112.0,0.181818,0.142857,0.16,112.0,88,1886,1483
3,0.2888,1.532689,0.792016,0.295455,0.240741,0.265306,108.0,0.295455,0.240741,0.265306,108.0,0.295455,0.240741,0.265306,108.0,0.295455,0.240741,0.265306,108.0,88,1886,1483
4,0.1836,1.598698,0.774082,0.420455,0.355769,0.385417,104.0,0.420455,0.355769,0.385417,104.0,0.420455,0.355769,0.385417,104.0,0.420455,0.355769,0.385417,104.0,88,1886,1483
5,0.1341,1.397265,0.84582,0.511364,0.39823,0.447761,113.0,0.511364,0.39823,0.447761,113.0,0.511364,0.39823,0.447761,113.0,0.511364,0.39823,0.447761,113.0,88,1886,1483
6,0.0903,1.655524,0.833382,0.488636,0.417476,0.450262,103.0,0.488636,0.417476,0.450262,103.0,0.488636,0.417476,0.450262,103.0,0.488636,0.417476,0.450262,103.0,88,1886,1483
7,0.0564,1.773511,0.823836,0.5,0.473118,0.486188,93.0,0.5,0.473118,0.486188,93.0,0.5,0.473118,0.486188,93.0,0.5,0.473118,0.486188,93.0,88,1886,1483
8,0.0316,1.978438,0.81834,0.511364,0.420561,0.461538,107.0,0.511364,0.420561,0.461538,107.0,0.511364,0.420561,0.461538,107.0,0.511364,0.420561,0.461538,107.0,88,1886,1483
9,0.0251,1.989614,0.840903,0.545455,0.457143,0.497409,105.0,0.545455,0.457143,0.497409,105.0,0.545455,0.457143,0.497409,105.0,0.545455,0.457143,0.497409,105.0,88,1886,1483
10,0.0128,2.035455,0.839456,0.545455,0.457143,0.497409,105.0,0.545455,0.457143,0.497409,105.0,0.545455,0.457143,0.497409,105.0,0.545455,0.457143,0.497409,105.0,88,1886,1483


Training model no. 6 of 23 for (6, 'False_Dilemma-No_Choice') persuasion technique...


Map:   0%|          | 0/2476 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 2476/2476 [00:00<00:00, 4237.03 examples/s]
Map: 100%|██████████| 184/184 [00:00<00:00, 4867.78 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,False Dilemma-no Choice Precision,False Dilemma-no Choice Recall,False Dilemma-no Choice F1-score,False Dilemma-no Choice Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,B-false Dilemma-no Choice Support,I-false Dilemma-no Choice Support,O Support
1,1.0704,0.801214,0.757229,0.02381,0.020408,0.021978,98.0,0.02381,0.020408,0.021978,98.0,0.02381,0.020408,0.021978,98.0,0.02381,0.020408,0.021978,98.0,84,1344,1719
2,0.446,1.261066,0.829679,0.285714,0.252632,0.268156,95.0,0.285714,0.252632,0.268156,95.0,0.285714,0.252632,0.268156,95.0,0.285714,0.252632,0.268156,95.0,84,1344,1719
3,0.2476,1.378751,0.827455,0.27381,0.267442,0.270588,86.0,0.27381,0.267442,0.270588,86.0,0.27381,0.267442,0.270588,86.0,0.27381,0.267442,0.270588,86.0,84,1344,1719
4,0.1631,1.596518,0.84652,0.369048,0.344444,0.356322,90.0,0.369048,0.344444,0.356322,90.0,0.369048,0.344444,0.356322,90.0,0.369048,0.344444,0.356322,90.0,84,1344,1719
5,0.1158,1.55169,0.849063,0.404762,0.32381,0.359788,105.0,0.404762,0.32381,0.359788,105.0,0.404762,0.32381,0.359788,105.0,0.404762,0.32381,0.359788,105.0,84,1344,1719
6,0.0839,1.820954,0.834128,0.511905,0.421569,0.462366,102.0,0.511905,0.421569,0.462366,102.0,0.511905,0.421569,0.462366,102.0,0.511905,0.421569,0.462366,102.0,84,1344,1719
7,0.0376,1.986371,0.840801,0.452381,0.395833,0.422222,96.0,0.452381,0.395833,0.422222,96.0,0.452381,0.395833,0.422222,96.0,0.452381,0.395833,0.422222,96.0,84,1344,1719
8,0.033,1.922694,0.84938,0.511905,0.40566,0.452632,106.0,0.511905,0.40566,0.452632,106.0,0.511905,0.40566,0.452632,106.0,0.511905,0.40566,0.452632,106.0,84,1344,1719


Training model no. 7 of 23 for (7, 'Consequential_Oversimplification') persuasion technique...


Map:   0%|          | 0/561 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 561/561 [00:00<00:00, 5561.46 examples/s]
Map: 100%|██████████| 141/141 [00:00<00:00, 3623.24 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Consequential Oversimplification Precision,Consequential Oversimplification Recall,Consequential Oversimplification F1-score,Consequential Oversimplification Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-consequential Oversimplification Support,I-consequential Oversimplification Support
1,1.1929,0.703762,0.425559,0.0,0.0,0.0,12.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,12.0,973,51,1300
2,0.6809,0.798038,0.824871,0.215686,0.123596,0.157143,89.0,0.215686,0.123596,0.157143,89.0,0.215686,0.123596,0.157143,89.0,0.215686,0.123596,0.157143,89.0,973,51,1300
3,0.3061,0.913189,0.864458,0.313725,0.179775,0.228571,89.0,0.313725,0.179775,0.228571,89.0,0.313725,0.179775,0.228571,89.0,0.313725,0.179775,0.228571,89.0,973,51,1300
4,0.1785,1.381363,0.818417,0.156863,0.102564,0.124031,78.0,0.156863,0.102564,0.124031,78.0,0.156863,0.102564,0.124031,78.0,0.156863,0.102564,0.124031,78.0,973,51,1300
5,0.1326,1.188033,0.845955,0.333333,0.2125,0.259542,80.0,0.333333,0.2125,0.259542,80.0,0.333333,0.2125,0.259542,80.0,0.333333,0.2125,0.259542,80.0,973,51,1300
6,0.0771,1.212882,0.874355,0.333333,0.207317,0.255639,82.0,0.333333,0.207317,0.255639,82.0,0.333333,0.207317,0.255639,82.0,0.333333,0.207317,0.255639,82.0,973,51,1300
7,0.0325,1.752921,0.850688,0.392157,0.229885,0.289855,87.0,0.392157,0.229885,0.289855,87.0,0.392157,0.229885,0.289855,87.0,0.392157,0.229885,0.289855,87.0,973,51,1300
8,0.0221,1.405001,0.870482,0.411765,0.269231,0.325581,78.0,0.411765,0.269231,0.325581,78.0,0.411765,0.269231,0.325581,78.0,0.411765,0.269231,0.325581,78.0,973,51,1300
9,0.0137,1.487077,0.8679,0.431373,0.278481,0.338462,79.0,0.431373,0.278481,0.338462,79.0,0.431373,0.278481,0.338462,79.0,0.431373,0.278481,0.338462,79.0,973,51,1300
10,0.006,1.550221,0.863597,0.411765,0.269231,0.325581,78.0,0.411765,0.269231,0.325581,78.0,0.411765,0.269231,0.325581,78.0,0.411765,0.269231,0.325581,78.0,973,51,1300


Training model no. 8 of 23 for (8, 'Straw_Man') persuasion technique...


Map:   0%|          | 0/728 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 728/728 [00:00<00:00, 3720.62 examples/s]
Map: 100%|██████████| 128/128 [00:00<00:00, 5326.95 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Straw Man Precision,Straw Man Recall,Straw Man F1-score,Straw Man Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,B-straw Man Support,I-straw Man Support,O Support
1,1.3491,1.037411,0.573438,0.0,0.0,0.0,14.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,14.0,45,728,1004
2,0.7992,1.121676,0.729882,0.044444,0.022472,0.029851,89.0,0.044444,0.022472,0.029851,89.0,0.044444,0.022472,0.029851,89.0,0.044444,0.022472,0.029851,89.0,45,728,1004
3,0.4623,1.541505,0.755205,0.111111,0.064935,0.081967,77.0,0.111111,0.064935,0.081967,77.0,0.111111,0.064935,0.081967,77.0,0.111111,0.064935,0.081967,77.0,45,728,1004
4,0.2603,2.095742,0.786156,0.244444,0.148649,0.184874,74.0,0.244444,0.148649,0.184874,74.0,0.244444,0.148649,0.184874,74.0,0.244444,0.148649,0.184874,74.0,45,728,1004
5,0.1689,2.213611,0.767023,0.155556,0.106061,0.126126,66.0,0.155556,0.106061,0.126126,66.0,0.155556,0.106061,0.126126,66.0,0.155556,0.106061,0.126126,66.0,45,728,1004
6,0.1123,3.067693,0.756894,0.266667,0.166667,0.205128,72.0,0.266667,0.166667,0.205128,72.0,0.266667,0.166667,0.205128,72.0,0.266667,0.166667,0.205128,72.0,45,728,1004
7,0.0515,2.935874,0.780529,0.266667,0.190476,0.222222,63.0,0.266667,0.190476,0.222222,63.0,0.266667,0.190476,0.222222,63.0,0.266667,0.190476,0.222222,63.0,45,728,1004
8,0.0266,2.93082,0.782217,0.222222,0.149254,0.178571,67.0,0.222222,0.149254,0.178571,67.0,0.222222,0.149254,0.178571,67.0,0.222222,0.149254,0.178571,67.0,45,728,1004
9,0.0176,3.209661,0.767023,0.244444,0.171875,0.201835,64.0,0.244444,0.171875,0.201835,64.0,0.244444,0.171875,0.201835,64.0,0.244444,0.171875,0.201835,64.0,45,728,1004


Training model no. 9 of 23 for (9, 'Red_Herring') persuasion technique...


Map:   0%|          | 0/842 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 842/842 [00:00<00:00, 5045.60 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 3545.44 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Red Herring Precision,Red Herring Recall,Red Herring F1-score,Red Herring Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-red Herring Support,I-red Herring Support
1,1.2012,1.214582,0.700244,0.0,0.0,0.0,11.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,11.0,835,23,373
2,0.5851,1.31664,0.826158,0.347826,0.145455,0.205128,55.0,0.347826,0.145455,0.205128,55.0,0.347826,0.145455,0.205128,55.0,0.347826,0.145455,0.205128,55.0,835,23,373
3,0.1974,0.829771,0.874086,0.521739,0.363636,0.428571,33.0,0.521739,0.363636,0.428571,33.0,0.521739,0.363636,0.428571,33.0,0.521739,0.363636,0.428571,33.0,835,23,373
4,0.1501,1.750185,0.798538,0.565217,0.302326,0.393939,43.0,0.565217,0.302326,0.393939,43.0,0.565217,0.302326,0.393939,43.0,0.565217,0.302326,0.393939,43.0,835,23,373
5,0.0867,1.272262,0.846466,0.521739,0.3,0.380952,40.0,0.521739,0.3,0.380952,40.0,0.521739,0.3,0.380952,40.0,0.521739,0.3,0.380952,40.0,835,23,373


Training model no. 10 of 23 for (10, 'Whataboutism') persuasion technique...


Map:   0%|          | 0/373 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 373/373 [00:00<00:00, 4453.86 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 2900.30 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Whataboutism Precision,Whataboutism Recall,Whataboutism F1-score,Whataboutism Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,B-whataboutism Support,I-whataboutism Support,O Support
1,1.451,1.121069,0.396677,0.0,0.0,0.0,13.0,0.0,0.0,0.0,13.0,0.0,0.0,0.0,13.0,0.0,0.0,0.0,13.0,23,593,347
2,0.8702,1.195066,0.617861,0.0,0.0,0.0,39.0,0.0,0.0,0.0,39.0,0.0,0.0,0.0,39.0,0.0,0.0,0.0,39.0,23,593,347
3,0.6067,1.021496,0.753894,0.217391,0.16129,0.185185,31.0,0.217391,0.16129,0.185185,31.0,0.217391,0.16129,0.185185,31.0,0.217391,0.16129,0.185185,31.0,23,593,347
4,0.3271,1.083045,0.798546,0.304348,0.225806,0.259259,31.0,0.304348,0.225806,0.259259,31.0,0.304348,0.225806,0.259259,31.0,0.304348,0.225806,0.259259,31.0,23,593,347
5,0.1755,1.267629,0.796469,0.304348,0.194444,0.237288,36.0,0.304348,0.194444,0.237288,36.0,0.304348,0.194444,0.237288,36.0,0.304348,0.194444,0.237288,36.0,23,593,347
6,0.0736,1.779776,0.800623,0.26087,0.15,0.190476,40.0,0.26087,0.15,0.190476,40.0,0.26087,0.15,0.190476,40.0,0.26087,0.15,0.190476,40.0,23,593,347


Training model no. 11 of 23 for (11, 'Slogans') persuasion technique...


Map:   0%|          | 0/2629 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 2629/2629 [00:00<00:00, 4780.93 examples/s]
Map: 100%|██████████| 271/271 [00:00<00:00, 4623.41 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Slogans Precision,Slogans Recall,Slogans F1-score,Slogans Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-slogans Support,I-slogans Support
1,0.6897,0.482358,0.883454,0.123711,0.126316,0.125,95.0,0.123711,0.126316,0.125,95.0,0.123711,0.126316,0.125,95.0,0.123711,0.126316,0.125,95.0,2922,97,559
2,0.3953,0.53062,0.906372,0.319588,0.292453,0.305419,106.0,0.319588,0.292453,0.305419,106.0,0.319588,0.292453,0.305419,106.0,0.319588,0.292453,0.305419,106.0,2922,97,559
3,0.2993,0.694472,0.906093,0.391753,0.319328,0.351852,119.0,0.391753,0.319328,0.351852,119.0,0.391753,0.319328,0.351852,119.0,0.391753,0.319328,0.351852,119.0,2922,97,559
4,0.2032,0.680419,0.904975,0.453608,0.444444,0.44898,99.0,0.453608,0.444444,0.44898,99.0,0.453608,0.444444,0.44898,99.0,0.453608,0.444444,0.44898,99.0,2922,97,559
5,0.1886,0.817338,0.904416,0.42268,0.383178,0.401961,107.0,0.42268,0.383178,0.401961,107.0,0.42268,0.383178,0.401961,107.0,0.42268,0.383178,0.401961,107.0,2922,97,559
6,0.1579,0.783813,0.908888,0.453608,0.407407,0.429268,108.0,0.453608,0.407407,0.429268,108.0,0.453608,0.407407,0.429268,108.0,0.453608,0.407407,0.429268,108.0,2922,97,559


Training model no. 12 of 23 for (12, 'Appeal_to_Time') persuasion technique...


Map:   0%|          | 0/259 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 259/259 [00:00<00:00, 4615.04 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 4206.86 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Appeal To Time Precision,Appeal To Time Recall,Appeal To Time F1-score,Appeal To Time Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-appeal To Time Support,I-appeal To Time Support
1,1.3406,1.189686,0.63145,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,514,27,273
2,0.7733,1.202635,0.652334,0.0,0.0,0.0,18.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,18.0,514,27,273
3,0.5084,1.840097,0.743243,0.037037,0.017241,0.023529,58.0,0.037037,0.017241,0.023529,58.0,0.037037,0.017241,0.023529,58.0,0.037037,0.017241,0.023529,58.0,514,27,273
4,0.2749,1.945092,0.756757,0.148148,0.088889,0.111111,45.0,0.148148,0.088889,0.111111,45.0,0.148148,0.088889,0.111111,45.0,0.148148,0.088889,0.111111,45.0,514,27,273
5,0.1451,2.075658,0.788698,0.222222,0.12766,0.162162,47.0,0.222222,0.12766,0.162162,47.0,0.222222,0.12766,0.162162,47.0,0.222222,0.12766,0.162162,47.0,514,27,273
6,0.0673,2.384689,0.757985,0.111111,0.076923,0.090909,39.0,0.111111,0.076923,0.090909,39.0,0.111111,0.076923,0.090909,39.0,0.111111,0.076923,0.090909,39.0,514,27,273
7,0.0489,2.832501,0.756757,0.259259,0.166667,0.202899,42.0,0.259259,0.166667,0.202899,42.0,0.259259,0.166667,0.202899,42.0,0.259259,0.166667,0.202899,42.0,514,27,273
8,0.0238,3.082626,0.734644,0.259259,0.162791,0.2,43.0,0.259259,0.162791,0.2,43.0,0.259259,0.162791,0.2,43.0,0.259259,0.162791,0.2,43.0,514,27,273
9,0.0231,2.974648,0.761671,0.222222,0.15,0.179104,40.0,0.222222,0.15,0.179104,40.0,0.222222,0.15,0.179104,40.0,0.222222,0.15,0.179104,40.0,514,27,273


  _warn_prf(average, modifier, msg_start, len(result))


Training model no. 13 of 23 for (13, 'Conversation_Killer') persuasion technique...


Map:   0%|          | 0/2633 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 2633/2633 [00:00<00:00, 5426.90 examples/s]
Map: 100%|██████████| 389/389 [00:00<00:00, 4825.62 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Conversation Killer Precision,Conversation Killer Recall,Conversation Killer F1-score,Conversation Killer Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,B-conversation Killer Support,I-conversation Killer Support,O Support
1,0.8758,1.376837,0.791142,0.07483,0.122222,0.092827,90.0,0.07483,0.122222,0.092827,90.0,0.07483,0.122222,0.092827,90.0,0.07483,0.122222,0.092827,90.0,147,1038,3579
2,0.5333,1.128862,0.847397,0.238095,0.207101,0.221519,169.0,0.238095,0.207101,0.221519,169.0,0.238095,0.207101,0.221519,169.0,0.238095,0.207101,0.221519,169.0,147,1038,3579
3,0.3762,1.465284,0.837112,0.217687,0.262295,0.237918,122.0,0.217687,0.262295,0.237918,122.0,0.217687,0.262295,0.237918,122.0,0.217687,0.262295,0.237918,122.0,147,1038,3579
4,0.2329,1.474508,0.850756,0.346939,0.284916,0.312883,179.0,0.346939,0.284916,0.312883,179.0,0.346939,0.284916,0.312883,179.0,0.346939,0.284916,0.312883,179.0,147,1038,3579
5,0.1795,1.700255,0.848447,0.360544,0.368056,0.364261,144.0,0.360544,0.368056,0.364261,144.0,0.360544,0.368056,0.364261,144.0,0.360544,0.368056,0.364261,144.0,147,1038,3579
6,0.1167,1.61344,0.859992,0.319728,0.338129,0.328671,139.0,0.319728,0.338129,0.328671,139.0,0.319728,0.338129,0.328671,139.0,0.319728,0.338129,0.328671,139.0,147,1038,3579
7,0.0764,1.942927,0.855584,0.442177,0.338542,0.383481,192.0,0.442177,0.338542,0.383481,192.0,0.442177,0.338542,0.383481,192.0,0.442177,0.338542,0.383481,192.0,147,1038,3579
8,0.0546,2.087618,0.856003,0.414966,0.321053,0.362018,190.0,0.414966,0.321053,0.362018,190.0,0.414966,0.321053,0.362018,190.0,0.414966,0.321053,0.362018,190.0,147,1038,3579
9,0.0301,2.192946,0.850966,0.401361,0.322404,0.357576,183.0,0.401361,0.322404,0.357576,183.0,0.401361,0.322404,0.357576,183.0,0.401361,0.322404,0.357576,183.0,147,1038,3579


Training model no. 14 of 23 for (14, 'Loaded_Language') persuasion technique...


Map:   0%|          | 0/37063 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 37063/37063 [00:11<00:00, 3259.31 examples/s]
Map: 100%|██████████| 3077/3077 [00:01<00:00, 3021.26 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Loaded Language Precision,Loaded Language Recall,Loaded Language F1-score,Loaded Language Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-loaded Language Support,I-loaded Language Support
1,0.5662,0.591902,0.88596,0.005222,0.214286,0.010195,28.0,0.005222,0.214286,0.010195,28.0,0.005222,0.214286,0.010195,28.0,0.005222,0.214286,0.010195,28.0,43307,1149,4474
2,0.3577,0.645528,0.913141,0.155788,0.288245,0.20226,621.0,0.155788,0.288245,0.20226,621.0,0.155788,0.288245,0.20226,621.0,0.155788,0.288245,0.20226,621.0,43307,1149,4474
3,0.2501,0.589573,0.922236,0.260226,0.375157,0.307297,797.0,0.260226,0.375157,0.307297,797.0,0.260226,0.375157,0.307297,797.0,0.260226,0.375157,0.307297,797.0,43307,1149,4474
4,0.1771,0.733265,0.9243,0.278503,0.424403,0.336311,754.0,0.278503,0.424403,0.336311,754.0,0.278503,0.424403,0.336311,754.0,0.278503,0.424403,0.336311,754.0,43307,1149,4474
5,0.1297,0.820128,0.924525,0.328111,0.46088,0.383325,818.0,0.328111,0.46088,0.383325,818.0,0.328111,0.46088,0.383325,818.0,0.328111,0.46088,0.383325,818.0,43307,1149,4474
6,0.0949,0.827661,0.926834,0.355091,0.442037,0.393822,923.0,0.355091,0.442037,0.393822,923.0,0.355091,0.442037,0.393822,923.0,0.355091,0.442037,0.393822,923.0,43307,1149,4474
7,0.0696,0.956238,0.926957,0.389904,0.444444,0.415392,1008.0,0.389904,0.444444,0.415392,1008.0,0.389904,0.444444,0.415392,1008.0,0.389904,0.444444,0.415392,1008.0,43307,1149,4474
8,0.0503,1.045658,0.926262,0.383812,0.452308,0.415254,975.0,0.383812,0.452308,0.415254,975.0,0.383812,0.452308,0.415254,975.0,0.383812,0.452308,0.415254,975.0,43307,1149,4474
9,0.0349,1.036601,0.927039,0.419495,0.443831,0.43132,1086.0,0.419495,0.443831,0.43132,1086.0,0.419495,0.443831,0.43132,1086.0,0.419495,0.443831,0.43132,1086.0,43307,1149,4474
10,0.0235,1.151109,0.926569,0.412533,0.44717,0.429153,1060.0,0.412533,0.44717,0.429153,1060.0,0.412533,0.44717,0.429153,1060.0,0.412533,0.44717,0.429153,1060.0,43307,1149,4474


Training model no. 15 of 23 for (15, 'Repetition') persuasion technique...


Map:   0%|          | 0/9247 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 9247/9247 [00:02<00:00, 3142.86 examples/s]
Map: 100%|██████████| 445/445 [00:00<00:00, 5524.88 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Repetition Precision,Repetition Recall,Repetition F1-score,Repetition Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-repetition Support,I-repetition Support
1,0.5358,0.540047,0.920983,0.272727,0.407767,0.326848,103.0,0.272727,0.407767,0.326848,103.0,0.272727,0.407767,0.326848,103.0,0.272727,0.407767,0.326848,103.0,5646,154,591
2,0.2696,0.615932,0.930684,0.435065,0.567797,0.492647,118.0,0.435065,0.567797,0.492647,118.0,0.435065,0.567797,0.492647,118.0,0.435065,0.567797,0.492647,118.0,5646,154,591
3,0.1758,0.636303,0.931936,0.532468,0.621212,0.573427,132.0,0.532468,0.621212,0.573427,132.0,0.532468,0.621212,0.573427,132.0,0.532468,0.621212,0.573427,132.0,5646,154,591
4,0.1165,0.68289,0.932405,0.623377,0.64,0.631579,150.0,0.623377,0.64,0.631579,150.0,0.623377,0.64,0.631579,150.0,0.623377,0.64,0.631579,150.0,5646,154,591
5,0.083,0.787172,0.935378,0.577922,0.684615,0.626761,130.0,0.577922,0.684615,0.626761,130.0,0.577922,0.684615,0.626761,130.0,0.577922,0.684615,0.626761,130.0,5646,154,591
6,0.0692,0.76079,0.937725,0.623377,0.727273,0.671329,132.0,0.623377,0.727273,0.671329,132.0,0.623377,0.727273,0.671329,132.0,0.623377,0.727273,0.671329,132.0,5646,154,591
7,0.0431,0.941836,0.939133,0.649351,0.757576,0.699301,132.0,0.649351,0.757576,0.699301,132.0,0.649351,0.757576,0.699301,132.0,0.649351,0.757576,0.699301,132.0,5646,154,591
8,0.0318,0.975757,0.934283,0.636364,0.640523,0.638436,153.0,0.636364,0.640523,0.638436,153.0,0.636364,0.640523,0.638436,153.0,0.636364,0.640523,0.638436,153.0,5646,154,591
9,0.0196,1.025168,0.936317,0.649351,0.689655,0.668896,145.0,0.649351,0.689655,0.668896,145.0,0.649351,0.689655,0.668896,145.0,0.649351,0.689655,0.668896,145.0,5646,154,591


Training model no. 16 of 23 for (16, 'Exaggeration-Minimisation') persuasion technique...


Map:   0%|          | 0/7837 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 7837/7837 [00:02<00:00, 3395.85 examples/s]
Map: 100%|██████████| 685/685 [00:00<00:00, 4760.09 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Exaggeration-minimisation Precision,Exaggeration-minimisation Recall,Exaggeration-minimisation F1-score,Exaggeration-minimisation Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-exaggeration-minimisation Support,I-exaggeration-minimisation Support
1,0.8162,0.933708,0.826865,0.024272,0.051546,0.033003,97.0,0.024272,0.051546,0.033003,97.0,0.024272,0.051546,0.033003,97.0,0.024272,0.051546,0.033003,97.0,7496,206,1857
2,0.4321,0.933678,0.874673,0.213592,0.295302,0.247887,149.0,0.213592,0.295302,0.247887,149.0,0.213592,0.295302,0.247887,149.0,0.213592,0.295302,0.247887,149.0,7496,206,1857
3,0.2837,1.18977,0.871639,0.276699,0.345455,0.307278,165.0,0.276699,0.345455,0.307278,165.0,0.276699,0.345455,0.307278,165.0,0.276699,0.345455,0.307278,165.0,7496,206,1857
4,0.1861,1.488383,0.872999,0.378641,0.329114,0.352144,237.0,0.378641,0.329114,0.352144,237.0,0.378641,0.329114,0.352144,237.0,0.378641,0.329114,0.352144,237.0,7496,206,1857
5,0.1305,1.429829,0.881891,0.368932,0.447059,0.404255,170.0,0.368932,0.447059,0.404255,170.0,0.368932,0.447059,0.404255,170.0,0.368932,0.447059,0.404255,170.0,7496,206,1857
6,0.0903,1.739597,0.876347,0.407767,0.383562,0.395294,219.0,0.407767,0.383562,0.395294,219.0,0.407767,0.383562,0.395294,219.0,0.407767,0.383562,0.395294,219.0,7496,206,1857
7,0.0625,1.814231,0.875196,0.407767,0.388889,0.398104,216.0,0.407767,0.388889,0.398104,216.0,0.407767,0.388889,0.398104,216.0,0.407767,0.388889,0.398104,216.0,7496,206,1857


Training model no. 17 of 23 for (17, 'Obfuscation-Vagueness-Confusion') persuasion technique...


Map:   0%|          | 0/788 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 788/788 [00:00<00:00, 4250.68 examples/s]
Map: 100%|██████████| 140/140 [00:00<00:00, 4268.11 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Obfuscation-vagueness-confusion Precision,Obfuscation-vagueness-confusion Recall,Obfuscation-vagueness-confusion F1-score,Obfuscation-vagueness-confusion Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-obfuscation-vagueness-confusion Support,I-obfuscation-vagueness-confusion Support
1,1.3907,1.706384,0.639079,0.0,0.0,0.0,147.0,0.0,0.0,0.0,147.0,0.0,0.0,0.0,147.0,0.0,0.0,0.0,147.0,1138,47,857
2,1.0359,1.205768,0.577865,0.0,0.0,0.0,21.0,0.0,0.0,0.0,21.0,0.0,0.0,0.0,21.0,0.0,0.0,0.0,21.0,1138,47,857
3,0.5982,1.994315,0.608717,0.021277,0.027778,0.024096,36.0,0.021277,0.027778,0.024096,36.0,0.021277,0.027778,0.024096,36.0,0.021277,0.027778,0.024096,36.0,1138,47,857
4,0.3042,2.719798,0.612635,0.0,0.0,0.0,37.0,0.0,0.0,0.0,37.0,0.0,0.0,0.0,37.0,0.0,0.0,0.0,37.0,1138,47,857
5,0.2127,2.340461,0.700294,0.042553,0.029412,0.034783,68.0,0.042553,0.029412,0.034783,68.0,0.042553,0.029412,0.034783,68.0,0.042553,0.029412,0.034783,68.0,1138,47,857
6,0.18,2.855759,0.663565,0.085106,0.051948,0.064516,77.0,0.085106,0.051948,0.064516,77.0,0.085106,0.051948,0.064516,77.0,0.085106,0.051948,0.064516,77.0,1138,47,857
7,0.0637,3.410347,0.659647,0.170213,0.096386,0.123077,83.0,0.170213,0.096386,0.123077,83.0,0.170213,0.096386,0.123077,83.0,0.170213,0.096386,0.123077,83.0,1138,47,857
8,0.0621,3.489111,0.670421,0.148936,0.093333,0.114754,75.0,0.148936,0.093333,0.114754,75.0,0.148936,0.093333,0.114754,75.0,0.148936,0.093333,0.114754,75.0,1138,47,857
9,0.027,3.755701,0.659158,0.191489,0.125,0.151261,72.0,0.191489,0.125,0.151261,72.0,0.191489,0.125,0.151261,72.0,0.191489,0.125,0.151261,72.0,1138,47,857
10,0.0132,3.745851,0.675318,0.148936,0.102941,0.121739,68.0,0.148936,0.102941,0.121739,68.0,0.148936,0.102941,0.121739,68.0,0.148936,0.102941,0.121739,68.0,1138,47,857


Training model no. 18 of 23 for (18, 'Name_Calling-Labeling') persuasion technique...


Map:   0%|          | 0/20141 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 20141/20141 [00:06<00:00, 3054.48 examples/s]
Map: 100%|██████████| 2205/2205 [00:00<00:00, 4209.96 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Name Calling-labeling Precision,Name Calling-labeling Recall,Name Calling-labeling F1-score,Name Calling-labeling Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-name Calling-labeling Support,I-name Calling-labeling Support
1,0.4914,0.339445,0.924685,0.023284,0.052925,0.03234,359.0,0.023284,0.052925,0.03234,359.0,0.023284,0.052925,0.03234,359.0,0.023284,0.052925,0.03234,359.0,29630,816,2217
2,0.3017,0.401619,0.929431,0.106618,0.279743,0.154392,311.0,0.106618,0.279743,0.154392,311.0,0.106618,0.279743,0.154392,311.0,0.106618,0.279743,0.154392,311.0,29630,816,2217
3,0.2199,0.364319,0.947035,0.324755,0.419304,0.366022,632.0,0.324755,0.419304,0.366022,632.0,0.324755,0.419304,0.366022,632.0,0.324755,0.419304,0.366022,632.0,29630,816,2217
4,0.1553,0.401439,0.945351,0.318627,0.450607,0.373295,577.0,0.318627,0.450607,0.373295,577.0,0.318627,0.450607,0.373295,577.0,0.318627,0.450607,0.373295,577.0,29630,816,2217
5,0.1109,0.417791,0.950586,0.394608,0.477037,0.431925,675.0,0.394608,0.477037,0.431925,675.0,0.394608,0.477037,0.431925,675.0,0.394608,0.477037,0.431925,675.0,29630,816,2217
6,0.0763,0.531878,0.950341,0.403186,0.534091,0.459497,616.0,0.403186,0.534091,0.459497,616.0,0.403186,0.534091,0.459497,616.0,0.403186,0.534091,0.459497,616.0,29630,816,2217
7,0.0587,0.575605,0.949668,0.442402,0.481976,0.461342,749.0,0.442402,0.481976,0.461342,749.0,0.442402,0.481976,0.461342,749.0,0.442402,0.481976,0.461342,749.0,29630,816,2217
8,0.0399,0.652662,0.951566,0.463235,0.519945,0.489955,727.0,0.463235,0.519945,0.489955,727.0,0.463235,0.519945,0.489955,727.0,0.463235,0.519945,0.489955,727.0,29630,816,2217
9,0.0254,0.675231,0.951535,0.448529,0.512605,0.478431,714.0,0.448529,0.512605,0.478431,714.0,0.448529,0.512605,0.478431,714.0,0.448529,0.512605,0.478431,714.0,29630,816,2217
10,0.0147,0.733243,0.952209,0.473039,0.523745,0.497102,737.0,0.473039,0.523745,0.497102,737.0,0.473039,0.523745,0.497102,737.0,0.473039,0.523745,0.497102,737.0,29630,816,2217


Training model no. 19 of 23 for (19, 'Doubt') persuasion technique...


Map:   0%|          | 0/13259 [00:00<?, ? examples/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 13259/13259 [00:03<00:00, 3317.35 examples/s]
Map: 100%|██████████| 1731/1731 [00:00<00:00, 4857.68 examples/s]
Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Micro F1,Doubt Precision,Doubt Recall,Doubt F1-score,Doubt Support,Micro avg Precision,Micro avg Recall,Micro avg F1-score,Micro avg Support,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support,O Support,B-doubt Support,I-doubt Support
1,1.1612,0.976914,0.639543,0.001393,0.004082,0.002077,245.0,0.001393,0.004082,0.002077,245.0,0.001393,0.004082,0.002077,245.0,0.001393,0.004082,0.002077,245.0,17252,718,11709
2,0.7401,1.176096,0.722396,0.0961,0.113487,0.104072,608.0,0.0961,0.113487,0.104072,608.0,0.0961,0.113487,0.104072,608.0,0.0961,0.113487,0.104072,608.0,17252,718,11709
3,0.4946,1.276056,0.757573,0.164345,0.171014,0.167614,690.0,0.164345,0.171014,0.167614,690.0,0.164345,0.171014,0.167614,690.0,0.164345,0.171014,0.167614,690.0,17252,718,11709
