In [1]:
import sys
!{sys.executable} -m pip install nervaluate transformers[torch] datasets evaluate seqeval torch jupyter ipywidgets



In [2]:
import ast
import configparser
config = configparser.ConfigParser() #init
config.read('../configs.ini') # init config with values from configs.ini
from datetime import date
import datetime
import datasets
from datasets import Dataset, DatasetDict
from functools import partial
import inspect
import os
import pandas as pd
print(pd.__version__)
import torch
torch.cuda.empty_cache() # for memory management and especially to avoid OutOfMemoryError
import torch.nn as nn
from transformers import AutoConfig, AutoModel
from transformers import TrainerCallback
from transformers.modeling_outputs import TokenClassifierOutput
from transformers import AutoTokenizer, DataCollatorForTokenClassification, AutoModelForTokenClassification, TrainingArguments, Trainer
import transformers
from typing import List, Dict
import uuid
import logging
from myLogging import get_logger, my_function_inputs_logger, pre_defined_function_inputs_logger
from myModels import ExtraLastLayerModel, ModelWrapper
from myUtilities import get_ner_map, read_convert_config_training_values, get_short_uuid, read_config_values_as_dict, extract_config_training_values


2.2.2


In [3]:
# VARIABLES
# Only thing to change in this file (in ideal case)
#experiment = 'loneliness'
#experiment = 'incontinence_v5'
#experiment = 'loneliness.03_train_with_comparison_parameters.original'
#experiment = 'loneliness.C.original'
#experiment = 'loneliness.03_train_with_comparison_parameters.extralayer'
#experiment = 'mobility_v5.C.original'
#experiment = 'mobility_v5.C.extralayer'

#experiment = 'Falling_NER_v3_20231114_orig_par' # Done
#experiment = 'Mobility_2404_20240619_orig_par' # Done
#experiment = 'Loneliness_beta0_20231123_orig_par' #Done
#experiment = 'Incontinence_NER_v5_20231208_orig_par' # Done

#experiment = 'Falling_NER_v3_20231114_extra_par'
#experiment = 'Mobility_2404_20240619_extra_par'
#experiment = 'Loneliness_beta0_20231123_extra_par'
#experiment = 'Incontinence_NER_v5_20231208_extra_par'

#experiment = 'Falling_NER_v3_20231114_orig_str' # Done
#experiment = 'Mobility_2404_20240619_orig_str' # Done
#experiment = 'Loneliness_beta0_20231123_orig_str' #Done
#experiment = 'Incontinence_NER_v5_20231208_orig_str' #Done

#experiment = 'Falling_NER_v3_20231114_orig_par_opt' # Done
#experiment = 'Mobility_2404_20240619_orig_par_opt' #Done
#experiment = 'Loneliness_beta0_20231123_orig_par_opt' #Done??
experiment = 'Incontinence_NER_v5_20231208_orig_par_opt'

print(experiment)

Incontinence_NER_v5_20231208_orig_par_opt


In [4]:
experiment_name = config[experiment]['experiment_name'] # dataset
experiment_identifier = config[experiment]['experiment_file'] # training file
experiment_model = config[experiment]['model'] # original or extralayer

print('experiment_name', experiment_name)
print('experiment_identifier', experiment_identifier)
print('experiment_model', experiment_model)

run_id = get_short_uuid() # get random uuid for each run or select it manually (for logger, model saving)
dataset_identifier = experiment_name # for logger
today = date.today()
today = today.strftime("%Y_%m_%d")

# CONSTANTS from config
# to read data from
data_folder = config[experiment]['data_folder'] # must exist
data_subfolder = config[experiment]['data_subfolder'] # must exist
# path to DATA save folder
data_save_folder_path = os.path.join(data_folder, data_subfolder)
print("data_save_folder_path", data_save_folder_path)

# to read/write models
model_folder = config[experiment]['model_folder'] # must exist
model_subfolder = config[experiment]['model_subfolder'] # must exist
# path to MODEL save folder
model_save_folder_path = os.path.join(model_folder, model_subfolder)
model_save_folder_path = os.path.join(model_save_folder_path, experiment_identifier)
model_save_folder_path = os.path.join(model_save_folder_path, f"{today}_{run_id}")
print("model_save_folder_path", model_save_folder_path)

logger = get_logger('train.log', run_id=run_id, experiment_identifier=experiment_identifier, dataset_identifier=dataset_identifier)
logger.info(f"\n--------Starting training--------\n")
logger.info(f"\nModel: {experiment_model}\n")

experiment_name Incontinence_NER_v5_20231208_orig_par_opt
experiment_identifier C
experiment_model original
data_save_folder_path ../_data/data_Incontinence_NER_v5_20231208
model_save_folder_path ../_trained_models/Incontinence_NER_v5_20231208_orig_par_opt/C/2024_07_18_cbe7f211


In [5]:
logger.info("\nTRAIN DATA, TEST DATA, and VAL DATA\n")
train_data = pre_defined_function_inputs_logger(logger, pd.read_parquet, os.path.join(data_save_folder_path, "train_data.parquet"))
test_data = pre_defined_function_inputs_logger(logger, pd.read_parquet, os.path.join(data_save_folder_path, "test_data.parquet"))
val_data = pre_defined_function_inputs_logger(logger, pd.read_parquet, os.path.join(data_save_folder_path, "val_data.parquet"))

test_data = test_data[test_data['words'].apply(len) <= 512]
test_data.reset_index(drop=False, inplace=True)

train_data = train_data[train_data['words'].apply(len) <= 512]
train_data.reset_index(drop=False, inplace=True)

val_data = val_data[val_data['words'].apply(len) <= 512]
val_data.reset_index(drop=False, inplace=True)

In [6]:
train_data.shape

(2048, 19)

In [7]:
#ner_map = get_ner_map(config, experiment) # from myUtilities
ner_map = {'O': 0, 'B-Ongelmia': 1, 'I-Ongelmia': 2, 'B-Ei ongelmia': 3, 'I-Ei ongelmia': 4} # Only for incontinence!
print("ner_map", ner_map)
logger.info(f"NER MAP: {ner_map}")

label_list = list(ner_map.keys())
label2id = ner_map
id2label = {v: k for k,v in label2id.items()}
logger.info(f"id2label: {id2label}")

# Function to convert a dataframe row to the desired format
def row_to_dict(row, idx):
    return {
        'id': str(idx),
        'ner_tags': [ner_map[tag] for tag in row['bi_tags']],
        'tokens': list(row['words'])
    }

# Transforming the DataFrame
transformed_data_train = [row_to_dict(row, idx) for idx, row in train_data.iterrows()]
transformed_data_test = [row_to_dict(row, idx) for idx, row in test_data.iterrows()]
transformed_data_val = [row_to_dict(row, idx) for idx, row in val_data.iterrows()]

transformed_data = {"train": transformed_data_train, "test": transformed_data_test, "val": transformed_data_val}
# Printing out the first entry as a sample
#print(transformed_data)

ner_map {'O': 0, 'B-Ongelmia': 1, 'I-Ongelmia': 2, 'B-Ei ongelmia': 3, 'I-Ei ongelmia': 4}


In [8]:
# Convert each split separately
def split_to_dataset(data_split):
    # Convert the list of dictionaries to separate lists for each column
    ids = [entry["id"] for entry in data_split]
    ner_tags = [entry["ner_tags"] for entry in data_split]
    tokens = [entry["tokens"] for entry in data_split]
    
    # Construct the dictionary format that Dataset.from_dict() expects
    formatted_data = {
        "id": ids,
        "ner_tags": ner_tags,
        "tokens": tokens
    }
    
    # Convert to Dataset
    return Dataset.from_dict(formatted_data)

# Convert each dataset split
train_dataset = split_to_dataset(transformed_data["train"])
test_dataset = split_to_dataset(transformed_data["test"])
validation_dataset = split_to_dataset(transformed_data["val"])

# Combine into DatasetDict
dataset_dict = DatasetDict({
    "train": train_dataset,
    "test": test_dataset,
    "val": validation_dataset
})


## Initializing tools

In [9]:
# get tokenizer directory path from config
tokenizer_type = config[experiment]['tokenizer_path']
print(tokenizer_type)
logger.info(f"\nTOKENIZER: {tokenizer_type}\n")

../_bert_bases/bert-base-finnish-cased-transformers-v1


In [10]:
# get tokenizer
# Juho removed add_special_tokens below
tokenizer = pre_defined_function_inputs_logger(
    logger, 
    transformers.AutoTokenizer.from_pretrained, 
    tokenizer_type, 
    is_split_into_words=True, truncation=True, padding=True, max_length=1024)

# the same but without logging
#tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-finnish-cased-transformers-v1", is_split_into_words=True, truncation=True, padding=True, max_length=1024, add_special_tokens=True)

In [11]:
def tokenize_and_align_labels(examples):
    #tokenized_inputs = tokenizer(examples["tokens"], truncation = True, is_split_into_words=True)
    # Juho removed add_special_tokens=True below
    tokenized_inputs = tokenizer(examples["tokens"], is_split_into_words=True, truncation=True, max_length=512)
    labels = []
    for i, label in enumerate(examples[f"ner_tags"]):
        #print(label)
        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        #print("word_ids", word_ids)
        for word_idx in word_ids:  # Set the special tokens to -100.
            # print("word_idx: ", word_idx)

            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(label[word_idx])
                """
                label_text = id2label[label[word_idx]]
                if label_text.startswith('B-'):
                    label_ids.append(label2id[label_text.replace('B-', 'I-')])
                else:
                    label_ids.append(label[word_idx])
                """

            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs



tokenized_data = dataset_dict.map(tokenize_and_align_labels, batched=True)

Map:   0%|          | 0/2048 [00:00<?, ? examples/s]

Map:   0%|          | 0/256 [00:00<?, ? examples/s]

Map:   0%|          | 0/257 [00:00<?, ? examples/s]

In [12]:
# data collator creates batch. You can also do modifications to data, such as padding text, with this.
data_collator = transformers.DataCollatorForTokenClassification(tokenizer=tokenizer)

In [13]:
label2id

{'O': 0,
 'B-Ongelmia': 1,
 'I-Ongelmia': 2,
 'B-Ei ongelmia': 3,
 'I-Ei ongelmia': 4}

### Select model

In [14]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
logger.info(f"\nDEVICE: {device}\n") # just in case it somehow changed

logger.info("\nMODEL LOAD\n")
# model wrapper
mw = None 
if experiment_model == 'original':
    # original model instantiation
    # model = transformers.AutoModelForTokenClassification.from_pretrained(
    #     "bert-base-finnish-cased-transformers-v1", num_labels=len(id2label), id2label=id2label, label2id=label2id
    # )
    model = pre_defined_function_inputs_logger(
        logger, 
        transformers.AutoModelForTokenClassification.from_pretrained,
        tokenizer_type, 
        num_labels=len(id2label), 
        id2label=id2label, 
        label2id=label2id)

    from sklearn.metrics import classification_report
    import numpy as np
    import seqeval
    from seqeval.metrics import classification_report, accuracy_score, f1_score
    from seqeval.scheme import IOB2

    #!{sys.executable} -m pip install nervaluate

    from nervaluate import Evaluator 
    #from sklearn.metrics import classification_report

    def compute_metrics(p, strictness='partial'):
        predictions, labels = p
        predictions = np.argmax(predictions, axis=-1)

        true_predictions = [
            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
            for prediction, label in zip(predictions, labels)
        ]
        true_labels = [
            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
            for prediction, label in zip(predictions, labels)
        ]

        evaluator = Evaluator(true_labels, true_predictions, stripped_label_list, loader='list')
        
        # JUHO added result_indices and result_indices_by_tag below, (this returns 4 things sometimes)
        evaluation_results = evaluator.evaluate()
        if len(evaluation_results) == 4:
            results, results_by_tag, result_indices, result_indices_by_tag = evaluation_results
        else:
            # there are only two values to unpack
            results, results_by_tag = evaluation_results
        #strictness = 'partial' # see https://pypi.org/project/nervaluate/ for available settings
        print("\t\t\t\t Precision \t Recall \t F1 score")
        for label in results_by_tag:
            if label != "O":
                precision_label = results_by_tag[label][strictness]['precision']
                recall_label = results_by_tag[label][strictness]['recall']
                fscore_label = results_by_tag[label][strictness]['f1']
                
                print(" {:<25} \t {:.2f} \t\t {:.2f}\t\t {:.2f}\t".format(label, precision_label, recall_label, fscore_label))
        results_compute = {}
        
        logger.info("\nCHECKPOIT SCORES:")
        logger.info("\nCheckpointU F1-Score: {:.2f}, Recall: {:.2f}, Precision: {:.2f}\n".format(
            results[strictness]['f1'],
            results[strictness]['recall'],
            results[strictness]['precision']))
        print(f"Strictness: {strictness}")
        print("CheckpointU F1-Score: {:.2f} \n".format(results[strictness]['f1']))
        #print("CheckpointU Recall: {:.2f} \n".format(results[strictness]['recall']))
        #print("CheckpointU Precision: {:.2f} \n".format(results[strictness]['precision']))
        results_compute["f1_score"] = results[strictness]['f1']
        results_compute["recall_score"] = results[strictness]['recall']
        results_compute["precision"] = results[strictness]['precision']
        return results_compute

    mw = ModelWrapper(model=model, compute_metrics=compute_metrics)
elif experiment_model == 'extralayer':
    # extralayer model instantiation
    model = AutoModel.from_pretrained(tokenizer_type,config=AutoConfig.from_pretrained(tokenizer_type, output_attentions=True, output_hidden_states=True))
    # the new model getting instantiated
    model = ExtraLastLayerModel(model=model, num_labels=len(id2label), id2label=id2label, label2id=label2id)

    from sklearn.metrics import classification_report
    import numpy as np
    import seqeval
    from seqeval.metrics import classification_report, accuracy_score, f1_score
    from seqeval.scheme import IOB2

    #!{sys.executable} -m pip install nervaluate

    from nervaluate import Evaluator 
    #from sklearn.metrics import classification_report


    def compute_metrics(p, strictness='partial'):
        predictions, labels = p

        #----------------
        # My change to predictions & labels
        if len(predictions) == 2:
            # predictions probably carries labels in its second batch item
            predictions, labels = predictions
        else: pass
        #----------------
        predictions = np.argmax(predictions, axis=-1)

        true_predictions = [
            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
            for prediction, label in zip(predictions, labels)
        ]
        true_labels = [
            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
            for prediction, label in zip(predictions, labels)
        ]

        evaluator = Evaluator(true_labels, true_predictions, stripped_label_list, loader='list')
        # Juho added result_indices and result_indices_by_tag below (but we don't use them)
        evaluation_results = evaluator.evaluate()
        if len(evaluation_results) == 4:
            results, results_by_tag, result_indices, result_indices_by_tag = evaluation_results
        else:
            # there are only two values to unpack
            results, results_by_tag = evaluation_results
            
        #strictness = 'partial' # see https://pypi.org/project/nervaluate/ for available settings
        print("\t\t\t\t Precision \t Recall \t F1 score")
        for label in results_by_tag:
            if label != "O":
                precision_label = results_by_tag[label][strictness]['precision']
                recall_label = results_by_tag[label][strictness]['recall']
                fscore_label = results_by_tag[label][strictness]['f1']
                
                print(" {:<25} \t {:.2f} \t\t {:.2f}\t\t {:.2f}\t".format(label, precision_label, recall_label, fscore_label))
        results_compute = {}
        
        logger.info("\nCHECKPOIT SCORES:")
        logger.info("\nCheckpointU F1-Score: {:.2f}, Recall: {:.2f}, Precision: {:.2f}\n".format(
            results[strictness]['f1'],
            results[strictness]['recall'],
            results[strictness]['precision']))
        print("CheckpointU F1-Score: {:.2f} \n".format(results[strictness]['f1']))
        results_compute["f1_score"] = results[strictness]['f1']
        results_compute["recall_score"] = results[strictness]['recall']
        results_compute["precision"] = results[strictness]['precision']
        return results_compute

    mw = ModelWrapper(model=model, compute_metrics=compute_metrics)
else:
    # inform that model is not baked into the pipeline 
    raise NotImplementedError('Given model not found. It is probably not implemented in pipeline.')


device = "cuda:0" if torch.cuda.is_available() else "cpu"
if mw.model is not None:
    logger.info(f"\nExperiment model: {experiment_model}\n")
    # send model to device
    mw.model.to(device)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at ../_bert_bases/bert-base-finnish-cased-transformers-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
# get BIO label list from our configuration file
label_list = config[experiment]['bio_label_list']
label_list = label_list[1:-1].split(',') # str to list
label_list = list(filter(lambda x: len(x) > 0, label_list)) # include non-empty values
label_list = [x.replace(' ', '') for x in label_list] # remove white spaces if any
#print(label_list)

stripped_label_list = list(set([x[2:] if x.startswith('B-') or x.startswith('I-') else x for x in label_list]))
print(stripped_label_list)

['', 'Ongelmia', 'O', 'Eiongelmia']


### Do training 
Do training with the specific parameters, model, and save them and results to file.  

In [16]:
# demonstrate the function
print(read_convert_config_training_values(config, experiment, verbose=False))

{'evaluation_strategy': 'steps', 'learning_rate': 9e-05, 'load_best_model_at_end': True, 'logging_steps': 100, 'num_train_epochs': 10, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'save_strategy': 'steps', 'seed': 42, 'weight_decay': 0.19998}


In [17]:
def get_training_arguments(output_dir=model_save_folder_path, **kwargs):
    # The arguments to train the transformer model.
    # Changing these could inprove the results with our metrics but do so in the configuration file
    training_args = pre_defined_function_inputs_logger(logger, transformers.TrainingArguments,
        output_dir=output_dir, # path is also from configuration
        **kwargs
        )
    return training_args

In [18]:
# new way reads even strictness from configuration file
print(get_training_arguments(output_dir=model_save_folder_path, **extract_config_training_values(read_config_values_as_dict(config, experiment))))
print(type(get_training_arguments(output_dir=model_save_folder_path, **extract_config_training_values(read_config_values_as_dict(config, experiment)))))

# Old way
# Fails to change strictness even if it's given in configs.ini
#print(get_training_arguments(**read_convert_config_training_values(config, experiment, verbose=False)))
#print(type(get_training_arguments(**read_convert_config_training_values(config, experiment, verbose=False))))

TrainingArguments(
_n_gpu=2,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_steps=100,
eval_strategy=steps,
evaluation_strategy=steps,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp



In [19]:
class SavingTrainingResultsCallback(TrainerCallback):
    "A callback that saves the training metrics history at the end of training."

    # def on_epoch_end(self, args, state, control, **kwargs):
    #     print("Training callback called!!")
    #     #print('args', args)
    #     #print('state', state)
    #     #print('control', control)
    #     #print('kwargs', kwargs)
    #     print("state['log_history']", state.log_history)

    def on_train_end(self, args, state, control, **kwargs):
        print("Training callback called!!")
        print("state['log_history']", state.log_history)
        logger.info(f"\nSaving results to: {os.path.join(model_save_folder_path, 'training_results.txt')}")
        with open(os.path.join(model_save_folder_path, 'training_results.txt'), 'a') as f:
            for elem in state.log_history:
                f.write(str(elem)+"\n")


In [20]:
"""Given a default learning rate and weight decay, 
    run the model and capture the F1, 
    then keep the weight decay constant, 
    iterate on training model while increasing the LR (with default delta .00001), 
    keep capturing the F1 as it improves, 
    once it stops improving, then, the code stops. 
    
    You only want to run the decreasing of LR if the F1 never improves from increasing the LR, 
    then check by decreasing the LR (same delta), 
    if the F1 does not improve, then you can say that the default LR is in effect optimal. 
 
    Once you have the optimal LR, keep it constant, 
    then do the same for weight decay as described above for LR, say default 0.20 
    and increment it."""


# # given starting parameters
# lr = float("5e-5")
# delta = float("0.00001")
# iteration = 3
# # init positive and negative search directions
# iteration_pos, iteration_neg = iteration, iteration

# print(lr, delta, iteration)
# current_parameter_settings = read_convert_config_training_values(config, experiment, verbose=False)
# print(current_parameter_settings['learning_rate'])

'Given a default learning rate and weight decay, \n    run the model and capture the F1, \n    then keep the weight decay constant, \n    iterate on training model while increasing the LR (with default delta .00001), \n    keep capturing the F1 as it improves, \n    once it stops improving, then, the code stops. \n    \n    You only want to run the decreasing of LR if the F1 never improves from increasing the LR, \n    then check by decreasing the LR (same delta), \n    if the F1 does not improve, then you can say that the default LR is in effect optimal.\xa0\n \n    Once you have the optimal LR, keep it constant, \n    then do the same for weight decay as described above for LR, say default 0.20 \n    and increment it.'

In [21]:
def get_trainer(current_parameters, mw, tokenized_data, data_collator, tokenizer, strictness):
    logger.info("\ni:{i} TRAINER INIT\n")
    trainer = transformers.Trainer(
        model=mw.model,
        args=get_training_arguments(**current_parameters),
        # extract training related arguments from config values
        #args=get_training_arguments(output_dir=model_save_folder_path, **extract_config_training_values(config_values)),
        train_dataset=tokenized_data["train"],
        eval_dataset=tokenized_data["val"],
        data_collator=data_collator,
        tokenizer=tokenizer,
        #compute_metrics = compute_metrics
        compute_metrics = partial(mw.compute_metrics, strictness=strictness)
    )

    for cb in trainer.callback_handler.callbacks:
        if isinstance(cb, transformers.integrations.MLflowCallback):
            trainer.callback_handler.remove_callback(cb)

    trainer.add_callback(SavingTrainingResultsCallback)
    return trainer

def train_save_model(trainer, i, current_parameters, model_save_folder_path, run_id, experiment):
    """Training and saving the model in one function"""
    # train the model
    logger.info("\nTRAINING...")
    trainer.train()

    # evaluate the model & save it
    logger.info("\nEVALUATING")
    # results contain keys
    # 'eval_loss', 'eval_f1_score', 'eval_recall_score', 
    # 'eval_precision','eval_runtime', 'eval_samples_per_second', 
    # 'eval_steps_per_second', and 'epoch'
    results = trainer.evaluate()

    # add iteration's info to results
    results['experiment'] = experiment
    results['run_id'] = run_id
    model_uuid = str(uuid.uuid4())
    results['uuid'] = model_uuid
    results['today'] = today
    time_stamp = datetime.datetime.now().timestamp()
    results['timestamp'] = time_stamp
    results['learning_rate'] = current_parameters['learning_rate'] # the real results
    results['weight_decay'] = current_parameters['weight_decay']

    # save each result to file
    logger.info(f"\nRESULTS: {results}")
    logger.info(f"\nSaving results to: {os.path.join(model_save_folder_path, 'eval_results.txt')}")
    with open(os.path.join(model_save_folder_path, 'eval_results.txt'), 'a') as f:
        f.write(str(results)+"\n")

    logger.info(f"\nMODEL AND TOKENIZER SAVE INFORMATION:")
    # saving the model and its tokenizer 
    logger.info(f"\nsave model to {model_save_folder_path}/model_training_file_{experiment_identifier}_{today}_RUN_{run_id}_timestamp_{time_stamp}.pt")
    logger.info(f"\nsave tokenizer to f{model_save_folder_path}/tokenizer_{today}_RUN_{run_id}_timestamp_{time_stamp}/")
    pre_defined_function_inputs_logger(
        logger, 
        trainer.save_model, 
        f"{model_save_folder_path}/model_training_file_{experiment_identifier}_{today}_RUN_{run_id}_timestamp_{time_stamp}.pt")
    pre_defined_function_inputs_logger(
        logger, 
        tokenizer.save_pretrained, 
        f"{model_save_folder_path}/tokenizer_{today}_RUN_{run_id}_timestamp_{time_stamp}/")

    logger.info(f"\ni:{i}, done")
    print(f"\ni:{i}, done")
    
    return results

def monotonously_growing_function(trainer, i, current_parameters, model_save_folder_path, run_id, experiment):
    """Returns the next element of the increasing values list. The values represent 'F1 scores'.
    Other values do not change but are set here to avoid KeyError.
    """
    positive_values_list = [0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.875, 0.9, 0.925, 0.95] # 10 values in the list
    print('i', i, positive_values_list[i])
    return {
        'eval_loss':0.9999, 
        'eval_f1_score':positive_values_list[i], 
        'eval_recall_score':0.9999,
        'eval_precision':0.9999,
        'eval_runtime': 100.0, 
        'eval_samples_per_second':12.0, 
        'eval_steps_per_second':10.0,
        'epoch': 1.0,
        'learning_rate': 0.00001,
        'weight_decay':0.2
    }
    

def decreasing_function(trainer, i, current_parameters, model_save_folder_path, run_id, experiment):
    """Returns values that are in decreasing order."""
    positive_values_list = [0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.875, 0.9, 0.925, 0.95] # 10 values in the list
    reversed_values_list = [positive_values_list[x] for x in range(len(positive_values_list) -1, 0, -1)]
    print('i', i, reversed_values_list[i])
    return {
        'eval_loss':0.9999, 
        'eval_f1_score':reversed_values_list[i], 
        'eval_recall_score':0.9999,
        'eval_precision':0.9999,
        'eval_runtime': 100.0, 
        'eval_samples_per_second':12.0, 
        'eval_steps_per_second':10.0,
        'epoch': 1.0,
        'learning_rate': 0.00001,
        'weight_decay':0.2
    }

def altering_function(trainer, i, current_parameters, model_save_folder_path, run_id, experiment):
    """Returns values that are in preselected order.
    Idea is that the 4th F1 score is lower than 3rd value and 7th value is lower than 6th value.
    """
    #               0    1    2     3     4    5     6     7    8    9      10  11      12
    values_list = [0.5, 0.6, 0.61, 0.59, 0.7, 0.7, 0.68, 0.8, 0.85, 0.875, 0.9, 0.925, 0.95] # 10 values in the list
    print('i', i, values_list[i])
    return {
        'eval_loss':0.9999, 
        'eval_f1_score':values_list[i], 
        'eval_recall_score':0.9999,
        'eval_precision':0.9999,
        'eval_runtime': 100.0, 
        'eval_samples_per_second':12.0, 
        'eval_steps_per_second':10.0,
        'epoch': 1.0,
        'learning_rate': 0.00001,
        'weight_decay':0.2
    }

def make_history(
    k:int, 
    results, 
    target_parameter:str, 
    current_parameters:transformers.training_args.TrainingArguments,
    strictness:str):
    """Makes a dictionary record of the values with current parameters"""
    if isinstance(current_parameters, transformers.training_args.TrainingArguments):
        # print("Current parameters before transforming them to dict")
        # print(f"\n{type(current_parameters)}")
        # print(f"\n{current_parameters}")
        current_parameters = current_parameters.to_dict()
        # print("\nCurrent parameters AFTER transforming them to dict")
        # print(f"\n{type(current_parameters)}")
        # print(f"\n{current_parameters}")
    else: 
        pass
        #print("current_parameters was not of type 'transformers.training_args.TrainingArguments'\n")
    
    #print(f"learning_rate from current parameters: {current_parameters['learning_rate']}")
    #print(f"weight_decay from current parameters: {current_parameters['weight_decay']}")

    return {
        'f1_score': results['eval_f1_score'],
        'eval_f1_score': results['eval_f1_score'],
        'target': target_parameter,
        'parameters': current_parameters,
        #'eval_results': results['eval_results'],
        'eval_results': results,
        'strictness': strictness,
        'k': k
        }

def save_history(model_save_folder_path, history, experiment, run_id):
    """Save the history to file together in the folder"""
    # save each result to file
    history['experiment'] = experiment
    history['run_id'] = run_id

    logger.info(f"\nSaving history to: {os.path.join(model_save_folder_path, 'history.txt')}")
    with open(os.path.join(model_save_folder_path, 'history.txt'), 'w') as f:
        
        f.writelines(f"EXPERIMENT: {history['experiment']}\n")
        f.writelines(f"RUN_ID: {history['run_id']}\n")
        for k in history.keys():
            if str(k).isnumeric():
                
                f.writelines('\nHISTORY\n')
                for key, v in history[k]['history'].items():
                    f.writelines(f"{key}: {v}")

                f.writelines('\nBEST\n')
                for key, v in history[k]['best'].items():
                    f.writelines(f"{key}: {v}")
            else:
                pass


def update_search_direction_parameters(positive_training, i, current_parameters, start_parameters, target_parameter, delta):
    """Update the hyperparameters to match the direction of the search.
    TODO:   Change this function to take optional starting config settings 
            as a parameter instead of automatically reading from configs.ini file.
    """
    if positive_training is True:
        # positive direction
        current_parameters[target_parameter] = current_parameters[target_parameter] + delta
    else:
        # negative direction
        if i <= 0:
            # update settings back to current 'origo'
            current_parameters = start_parameters
            print("negative direction", target_parameter, start_parameters)
        else: pass

        candidate_value = current_parameters[target_parameter] - delta

        # update the hyperparameter
        if candidate_value < 0.0:
            raise ValueError("Negative value")
            #current_parameters[target_parameter] = 0.0
        else:
            current_parameters[target_parameter] = candidate_value

    return current_parameters

def get_hyperparameter_settings_from_history(history):
    """Get best settings based on F1 score from the history dict"""
    f1_prev, f1_ind = 0.0, 0
    for k, v in history.items():
        if v['f1_score'] > f1_prev:
            f1_ind = k
            f1_prev = v['f1_score']
        else: pass
    return history[f1_ind]

# LOGIC
def search_single_parameter(current_parameters, start_parameters, target_parameter, delta, mw, run_id, experiment, strictness, max_buffer):
    """The logic for 'optimizing' one hyperparameter."""
    # we are done when continue_search is False
    continue_search = True 
    # search the positive direction first
    positive_training = True
    # init f1 scores for each direction
    prev_f1_score = 0.0
    # init buffer
    buffer = max_buffer
    # iterations
    i = -1 # iterations to one direction
    k = -1 # all iterations
    # all training results with parameter settings
    history = {} 
    while continue_search:
        i = i + 1
        k = k + 1
        print('k', k)

        # check and set stopping condition(s)
        if i >= iteration: # iteration ok?
            if positive_training is True:
                # F1 scores have been increasing 
                # -> no need to check negative range 
                # -> stop iteration
                continue_search = False
            elif positive_training is False:
                continue_search = False
            else:
                raise Exception("Found unhandled condition. Stopping!")

        # do we stop altogether?
        if continue_search is False: 
            return history
            print("DONE")
        # or we continue
        else: 
            # parameters for training
            # form trainer object to train the model
            trainer = get_trainer(current_parameters, mw, tokenized_data, data_collator, tokenizer, strictness)

            # train and save the models -> get results
            results = train_save_model(trainer, i, current_parameters, model_save_folder_path, run_id, experiment)

            # TEST: only increasing scores, only decreasing scores, first increases then lowers and then grows again.
            #results = monotonously_growing_function(trainer, i, current_parameters, model_save_folder_path, run_id, experiment) # 
            # this function is meant to test the buffer/shield
            #results = decreasing_function(trainer, i, current_parameters, model_save_folder_path, run_id, experiment) # 
            #results = altering_function(trainer, i, current_parameters, model_save_folder_path, run_id, experiment)
            
            # get f1_scores
            if k < 1: 
                prev_f1_score = 0.0
            else:
                prev_f1_score = history[k-1]['f1_score']
            
            #current_f1_score = results['eval_results']['eval_f1_score']
            current_f1_score = results['eval_f1_score']
            # add results & parameters to history
            history[k] = make_history(k, results, target_parameter, current_parameters, strictness)

            # check if there is a need to stop or change calculation
            if positive_training is True and prev_f1_score > current_f1_score and continue_search is True:
                if buffer <= 0:
                    # previous result is 'better' than current one 
                    # -> Skip the rest of this cycle. Try negative value range next starting with origo.
                    positive_training = False
                    i = -1
                    # update buffer for the next direction
                    buffer = max_buffer
                    #current_parameters = start_parameters
                    print("len(history):", len(history))
                    print("positive_training is True and prev_f1_score > current_f1_score")
                    try:
                        current_parameters = update_search_direction_parameters(positive_training, i, current_parameters, start_parameters, target_parameter, delta)
                    except ValueError as e:
                        # catching intentional ValueError stops the hyperparameter search
                        print(f"Error was: {e}")
                        print("stopping search of current hyperparameter")
                        continue_search = False
                else:
                    # when buffer is still bigger than zero 
                    # we continue calculations to the same direction
                    buffer = buffer - 1
                    print("len(history):", len(history))
                    print("positive_training is True and prev_f1_score > current_f1_score")
                    try:
                        current_parameters = update_search_direction_parameters(positive_training, i, current_parameters, start_parameters, target_parameter, delta)
                    except ValueError as e:
                        # catching intentional ValueError stops the hyperparameter search
                        print(f"Error was: {e}")
                        print("stopping search of current hyperparameter")
                        continue_search = False

            elif positive_training is False and prev_f1_score > current_f1_score and continue_search is True:
                if buffer <= 0:
                    # we stop searching better values for our hyperparameter
                    continue_search = False
                    print("len(history):", len(history))
                    print("positive_training is False and prev_f1_score > current_f1_score")
                else:
                    # remember to update
                    buffer = buffer - 1
                    print("len(history):", len(history))
                    print("positive_training is True and prev_f1_score > current_f1_score")
                    try:
                        current_parameters = update_search_direction_parameters(positive_training, i, current_parameters, start_parameters, target_parameter, delta)
                    except ValueError as e:
                        # catching intentional ValueError stops the hyperparameter search
                        print(f"Error was: {e}")
                        print("stopping search of current hyperparameter")
                        continue_search = False
            else:
                # cases where previous F1 score is smaller than the current F1 score
                # -> training is progressing as intended
                print("training is progressing as intended")
                try:
                    current_parameters = update_search_direction_parameters(positive_training, i, current_parameters, start_parameters, target_parameter, delta)
                except ValueError as e:
                    # catching intentional ValueError stops the hyperparameter search
                    print(f"Error was: {e}")
                    print("stopping search of current hyperparameter")
                    continue_search = False

            #print("learning_rate: ", current_parameters['learning_rate'])
            #print("weight_decay: ", current_parameters['weight_decay'])
            #print("delta: ", delta)
            #print("prev_f1_score: ", prev_f1_score)
            #print("current_f1_score: ", current_f1_score)

    return history

# MAIN SEARCH LOOP for 'optimal' hyperparameters
search_parameters:list = ['learning_rate', 'weight_decay'] # list of parameters to search the 'best' values
# old way
#current_parameters = read_convert_config_training_values(config, experiment, verbose=False)
# new way
config_values = read_config_values_as_dict(config, experiment)
current_parameters = extract_config_training_values(config_values)
strictness = config_values['strictness']
iteration = 10
delta = float("0.00001")
# can survive one low F1 score, 2nd causes to switch mode
max_buffer:int = 2 

all_history = {x:{} for x in range(len(search_parameters))} # dict to hold all tried hyperparameter settings and the respective results
for i, target_parameter in enumerate(search_parameters):
    start_parameters = current_parameters.copy() # origo
    history = search_single_parameter(current_parameters, start_parameters, target_parameter, delta, mw, run_id, experiment, strictness, max_buffer) # train models
    historical_settings = get_hyperparameter_settings_from_history(history) # find best settings from history
    all_history[i]['history'] = history
    all_history[i]['best'] = historical_settings
    print('historical_settings', historical_settings)
    # update current parameters
    current_parameters = historical_settings['parameters']
    # update current parameters with the best hyperparameter value
    current_parameters[search_parameters[i]] = historical_settings['eval_results'][search_parameters[i]]

save_history(model_save_folder_path, history=all_history, experiment=experiment, run_id=run_id)

# save settings and results to .csv file
df = pd.DataFrame(all_history)
df.to_csv(os.path.join(model_save_folder_path, 'history.csv'), sep=';', encoding='utf-8', index=False)


# search_parameters
# current_parameters = get_lr_wd(tiedostonimi)
# optimize(search_parameters, current_parameters)




k 0


Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.2699,0.149037,0.572485,0.661162,0.504783
200,0.1258,0.120871,0.65598,0.734055,0.592916
300,0.1001,0.112813,0.69027,0.77164,0.624424
400,0.0798,0.107857,0.689949,0.691913,0.687995
500,0.0608,0.108071,0.695581,0.726082,0.667539
600,0.048,0.113315,0.718114,0.763098,0.678138


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.49 		 0.68		 0.57	
 Eiongelmia                	 0.75 		 0.47		 0.58	
Strictness: partial
CheckpointU F1-Score: 0.57 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.59 		 0.76		 0.67	
 Eiongelmia                	 0.63 		 0.48		 0.54	
Strictness: partial
CheckpointU F1-Score: 0.66 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.62 		 0.80		 0.70	
 Eiongelmia                	 0.66 		 0.50		 0.57	
Strictness: partial
CheckpointU F1-Score: 0.69 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.71 		 0.72		 0.72	
 Eiongelmia                	 0.42 		 0.39		 0.41	
Strictness: partial
CheckpointU F1-Score: 0.69 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.70 		 0.76		 0.73	
 Eiongelmia                	 0.39 		 0.42		 0.41	
Strictness: partial
CheckpointU F1-Score: 0.70 


i:0, done
training is progressing as intended
k 1




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0754,0.11277,0.71308,0.769932,0.664047
200,0.0515,0.121736,0.715645,0.771071,0.667653
300,0.0381,0.144512,0.722904,0.771071,0.680402
400,0.0275,0.148539,0.703991,0.723235,0.685745
500,0.019,0.152233,0.729412,0.776765,0.6875
600,0.0144,0.160235,0.741057,0.790433,0.697487


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.69 		 0.80		 0.74	
 Eiongelmia                	 0.43 		 0.52		 0.47	
Strictness: partial
CheckpointU F1-Score: 0.71 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.68 		 0.80		 0.74	
 Eiongelmia                	 0.49 		 0.46		 0.48	
Strictness: partial
CheckpointU F1-Score: 0.72 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.70 		 0.80		 0.75	
 Eiongelmia                	 0.46 		 0.47		 0.47	
Strictness: partial
CheckpointU F1-Score: 0.72 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.73 		 0.76		 0.74	
 Eiongelmia                	 0.34 		 0.42		 0.38	
Strictness: partial
CheckpointU F1-Score: 0.70 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.73 		 0.81		 0.77	
 Eiongelmia                	 0.36 		 0.44		 0.39	
Strictness: partial
CheckpointU F1-Score: 0.73 


i:1, done
training is progressing as intended
k 2




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0375,0.140252,0.692901,0.767084,0.631801
200,0.0215,0.157164,0.743776,0.816629,0.682857
300,0.0163,0.158444,0.752864,0.785877,0.722513
400,0.0101,0.164168,0.768489,0.816629,0.725709
500,0.005,0.177246,0.763458,0.799544,0.730489
600,0.0046,0.173056,0.758861,0.80467,0.717988


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.71 		 0.79		 0.75	
 Eiongelmia                	 0.26 		 0.54		 0.35	
Strictness: partial
CheckpointU F1-Score: 0.69 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.70 		 0.85		 0.77	
 Eiongelmia                	 0.49 		 0.52		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.74 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.74 		 0.82		 0.78	
 Eiongelmia                	 0.49 		 0.45		 0.47	
Strictness: partial
CheckpointU F1-Score: 0.75 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.85		 0.79	
 Eiongelmia                	 0.51 		 0.48		 0.49	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.84		 0.79	
 Eiongelmia                	 0.48 		 0.45		 0.47	
Strictness: partial
CheckpointU F1-Score: 0.76 


i:2, done
training is progressing as intended
k 3




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0185,0.166553,0.730102,0.81492,0.661275
200,0.0118,0.169488,0.758602,0.816059,0.708704
300,0.0094,0.178557,0.74934,0.808656,0.698132
400,0.0051,0.185918,0.747831,0.785308,0.713768
500,0.0024,0.19418,0.757867,0.809226,0.712638
600,0.0021,0.190766,0.763102,0.812642,0.719254


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.69 		 0.85		 0.76	
 Eiongelmia                	 0.41 		 0.53		 0.46	
Strictness: partial
CheckpointU F1-Score: 0.73 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.73 		 0.85		 0.78	
 Eiongelmia                	 0.48 		 0.54		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.85		 0.78	
 Eiongelmia                	 0.44 		 0.46		 0.45	
Strictness: partial
CheckpointU F1-Score: 0.75 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.74 		 0.82		 0.78	
 Eiongelmia                	 0.46 		 0.43		 0.45	
Strictness: partial
CheckpointU F1-Score: 0.75 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.84		 0.80	
 Eiongelmia                	 0.38 		 0.48		 0.42	
Strictness: partial
CheckpointU F1-Score: 0.76 


i:3, done
len(history): 4
positive_training is True and prev_f1_score > current_f1_score
k 4




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0146,0.167622,0.727952,0.832005,0.647033
200,0.0096,0.177846,0.738083,0.802392,0.683317
300,0.0068,0.169088,0.770632,0.818907,0.727733
400,0.0029,0.186034,0.768325,0.817768,0.724521
500,0.0016,0.204267,0.7708,0.817768,0.728934
600,0.0011,0.195715,0.772679,0.824601,0.726908


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.66 		 0.87		 0.75	
 Eiongelmia                	 0.47 		 0.43		 0.45	
Strictness: partial
CheckpointU F1-Score: 0.73 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.73 		 0.84		 0.78	
 Eiongelmia                	 0.32 		 0.48		 0.38	
Strictness: partial
CheckpointU F1-Score: 0.74 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.86		 0.80	
 Eiongelmia                	 0.49 		 0.47		 0.48	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.85		 0.80	
 Eiongelmia                	 0.43 		 0.51		 0.46	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.85		 0.80	
 Eiongelmia                	 0.51 		 0.47		 0.49	
Strictness: partial
CheckpointU F1-Score: 0.77 


i:4, done
training is progressing as intended
k 5




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0102,0.174234,0.750777,0.825171,0.688688
200,0.0085,0.18956,0.75847,0.790433,0.728992
300,0.005,0.196989,0.761304,0.824601,0.707031
400,0.0019,0.184922,0.759595,0.811503,0.713928
500,0.0008,0.209674,0.760824,0.820615,0.709154
600,0.0008,0.198751,0.768697,0.819476,0.723843


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.71 		 0.86		 0.78	
 Eiongelmia                	 0.45 		 0.51		 0.47	
Strictness: partial
CheckpointU F1-Score: 0.75 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.83		 0.79	
 Eiongelmia                	 0.49 		 0.46		 0.48	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.86		 0.78	
 Eiongelmia                	 0.53 		 0.49		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.74 		 0.84		 0.79	
 Eiongelmia                	 0.45 		 0.50		 0.47	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.86		 0.79	
 Eiongelmia                	 0.51 		 0.45		 0.48	
Strictness: partial
CheckpointU F1-Score: 0.76 


i:5, done
len(history): 6
positive_training is True and prev_f1_score > current_f1_score
k 6




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0087,0.174666,0.723754,0.777335,0.677083
200,0.0067,0.182486,0.751678,0.829157,0.687441
300,0.0054,0.165765,0.76943,0.845672,0.705798
400,0.0025,0.187756,0.773256,0.833144,0.7214
500,0.0008,0.204853,0.76877,0.80467,0.735938
600,0.0007,0.195622,0.759554,0.81492,0.711233


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.80		 0.78	
 Eiongelmia                	 0.26 		 0.52		 0.34	
Strictness: partial
CheckpointU F1-Score: 0.72 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.70 		 0.87		 0.78	
 Eiongelmia                	 0.51 		 0.45		 0.47	
Strictness: partial
CheckpointU F1-Score: 0.75 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.88		 0.80	
 Eiongelmia                	 0.49 		 0.49		 0.49	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.74 		 0.88		 0.80	
 Eiongelmia                	 0.46 		 0.43		 0.45	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.84		 0.80	
 Eiongelmia                	 0.49 		 0.45		 0.47	
Strictness: partial
CheckpointU F1-Score: 0.77 


i:6, done
training is progressing as intended
k 7




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.01,0.191516,0.745046,0.792141,0.703236
200,0.0076,0.153187,0.758692,0.832574,0.696854
300,0.0041,0.175343,0.7773,0.822893,0.736493
400,0.0012,0.199196,0.784501,0.841686,0.734592
500,0.0006,0.193269,0.787017,0.835421,0.743915
600,0.0004,0.198018,0.781535,0.829157,0.739086


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.82		 0.77	
 Eiongelmia                	 0.50 		 0.50		 0.50	
Strictness: partial
CheckpointU F1-Score: 0.75 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.87		 0.79	
 Eiongelmia                	 0.45 		 0.49		 0.47	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.86		 0.80	
 Eiongelmia                	 0.56 		 0.47		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.88		 0.81	
 Eiongelmia                	 0.54 		 0.43		 0.48	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.88		 0.81	
 Eiongelmia                	 0.53 		 0.46		 0.49	
Strictness: partial
CheckpointU F1-Score: 0.79 


i:7, done
training is progressing as intended
k 8




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0088,0.16782,0.759577,0.8582,0.681284
200,0.0074,0.180117,0.750391,0.820046,0.691643
300,0.0027,0.187974,0.777959,0.816059,0.743257
400,0.0013,0.191764,0.781553,0.825171,0.742316
500,0.0006,0.204188,0.791101,0.830296,0.75544
600,0.0006,0.198705,0.789587,0.837699,0.746701


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.70 		 0.89		 0.79	
 Eiongelmia                	 0.47 		 0.52		 0.49	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.73 		 0.86		 0.79	
 Eiongelmia                	 0.36 		 0.48		 0.41	
Strictness: partial
CheckpointU F1-Score: 0.75 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.78 		 0.85		 0.82	
 Eiongelmia                	 0.39 		 0.46		 0.42	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.77 		 0.86		 0.81	
 Eiongelmia                	 0.45 		 0.46		 0.46	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.77 		 0.87		 0.82	
 Eiongelmia                	 0.53 		 0.45		 0.49	
Strictness: partial
CheckpointU F1-Score: 0.79 


i:8, done
training is progressing as intended
k 9




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0073,0.225971,0.766316,0.829157,0.712329
200,0.0092,0.187173,0.756111,0.810364,0.708665
300,0.0031,0.184896,0.798061,0.843964,0.756895
400,0.0018,0.170831,0.778256,0.823462,0.737755
500,0.0008,0.198575,0.79225,0.838269,0.75102
600,0.0006,0.176549,0.80076,0.839977,0.765041


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.87		 0.78	
 Eiongelmia                	 0.63 		 0.48		 0.55	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.73 		 0.84		 0.79	
 Eiongelmia                	 0.45 		 0.49		 0.47	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.77 		 0.88		 0.82	
 Eiongelmia                	 0.55 		 0.49		 0.52	
Strictness: partial
CheckpointU F1-Score: 0.80 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.86		 0.80	
 Eiongelmia                	 0.51 		 0.52		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.78 		 0.87		 0.82	
 Eiongelmia                	 0.50 		 0.52		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.79 


i:9, done
training is progressing as intended
k 10
historical_settings {'f1_score': 0.7922497308934339, 'eval_f1_score': 0.7922497308934339, 'target': 'learning_rate', 'parameters': {'evaluation_strategy': 'steps', 'learning_rate': 0.00019, 'load_best_model_at_end': True, 'logging_steps': 100, 'num_train_epochs': 10, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'save_strategy': 'steps', 'seed': 42, 'weight_decay': 0.19998}, 'eval_results': {'eval_loss': 0.1985751986503601, 'eval_f1_score': 0.7922497308934339, 'eval_recall_score': 0.8382687927107062, 'eval_precision': 0.7510204081632653, 'eval_runtime': 1.4279, 'eval_samples_per_second': 179.991, 'eval_steps_per_second': 6.303, 'epoch': 10.0, 'experiment': 'Incontinence_NER_v5_20231



Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0076,0.173277,0.765231,0.829727,0.710039
200,0.0076,0.160953,0.73197,0.774487,0.693878
300,0.0036,0.212465,0.774384,0.822893,0.731275
400,0.0017,0.203232,0.77967,0.834282,0.731768
500,0.0003,0.219091,0.778075,0.828588,0.733367
600,0.0003,0.212347,0.775661,0.834852,0.724308


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.86		 0.81	
 Eiongelmia                	 0.34 		 0.52		 0.41	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.77 		 0.81		 0.79	
 Eiongelmia                	 0.27 		 0.48		 0.35	
Strictness: partial
CheckpointU F1-Score: 0.73 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.86		 0.80	
 Eiongelmia                	 0.54 		 0.45		 0.49	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.87		 0.81	
 Eiongelmia                	 0.50 		 0.48		 0.49	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.87		 0.81	
 Eiongelmia                	 0.47 		 0.43		 0.45	
Strictness: partial
CheckpointU F1-Score: 0.78 


i:0, done
training is progressing as intended
k 1




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0067,0.208669,0.745269,0.829727,0.676416
200,0.0059,0.214595,0.766794,0.799544,0.736621
300,0.0031,0.190571,0.778794,0.824032,0.738265
400,0.0016,0.212492,0.782888,0.833713,0.737903
500,0.0006,0.219248,0.775107,0.82631,0.729879
600,0.0004,0.215464,0.778424,0.838269,0.726555


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.68 		 0.87		 0.76	
 Eiongelmia                	 0.63 		 0.49		 0.55	
Strictness: partial
CheckpointU F1-Score: 0.75 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.83		 0.79	
 Eiongelmia                	 0.55 		 0.49		 0.52	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.86		 0.81	
 Eiongelmia                	 0.48 		 0.51		 0.49	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.87		 0.81	
 Eiongelmia                	 0.58 		 0.45		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.74 		 0.86		 0.80	
 Eiongelmia                	 0.55 		 0.47		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.78 


i:1, done
len(history): 2
positive_training is True and prev_f1_score > current_f1_score
k 2




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0059,0.177976,0.777137,0.843964,0.720117
200,0.004,0.173371,0.775323,0.819476,0.735685
300,0.0034,0.184026,0.782656,0.812073,0.755297
400,0.002,0.192427,0.785564,0.849089,0.730882
500,0.0007,0.208347,0.789809,0.84738,0.739563
600,0.0003,0.197575,0.795109,0.833144,0.760395


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.88		 0.81	
 Eiongelmia                	 0.46 		 0.51		 0.48	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.86		 0.81	
 Eiongelmia                	 0.48 		 0.42		 0.45	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.79 		 0.85		 0.82	
 Eiongelmia                	 0.40 		 0.43		 0.41	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.74 		 0.89		 0.81	
 Eiongelmia                	 0.62 		 0.48		 0.54	
Strictness: partial
CheckpointU F1-Score: 0.79 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.89		 0.82	
 Eiongelmia                	 0.53 		 0.46		 0.50	
Strictness: partial
CheckpointU F1-Score: 0.79 


i:2, done
training is progressing as intended
k 3




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0055,0.177142,0.780065,0.820046,0.743802
200,0.0057,0.177256,0.784239,0.821754,0.75
300,0.0038,0.175459,0.796909,0.822323,0.773019
400,0.0017,0.177062,0.770509,0.845103,0.708015
500,0.0009,0.192461,0.79021,0.83656,0.748726
600,0.0006,0.17762,0.792453,0.83713,0.752303


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.86		 0.81	
 Eiongelmia                	 0.51 		 0.45		 0.48	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.86		 0.81	
 Eiongelmia                	 0.56 		 0.43		 0.49	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.78 		 0.86		 0.82	
 Eiongelmia                	 0.62 		 0.49		 0.55	
Strictness: partial
CheckpointU F1-Score: 0.80 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.88		 0.79	
 Eiongelmia                	 0.54 		 0.50		 0.52	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.88		 0.81	
 Eiongelmia                	 0.59 		 0.47		 0.52	
Strictness: partial
CheckpointU F1-Score: 0.79 


i:3, done
training is progressing as intended
k 4




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0048,0.204123,0.757873,0.835991,0.693107
200,0.0062,0.213199,0.75924,0.818907,0.707677
300,0.0035,0.190287,0.78242,0.821185,0.74715
400,0.0009,0.190266,0.778671,0.827449,0.735324
500,0.0005,0.202494,0.788357,0.825171,0.754687
600,0.0002,0.204249,0.789163,0.837699,0.745943


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.70 		 0.88		 0.78	
 Eiongelmia                	 0.53 		 0.43		 0.48	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.86		 0.78	
 Eiongelmia                	 0.52 		 0.45		 0.48	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.86		 0.81	
 Eiongelmia                	 0.57 		 0.45		 0.50	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.86		 0.81	
 Eiongelmia                	 0.48 		 0.48		 0.48	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.77 		 0.86		 0.81	
 Eiongelmia                	 0.60 		 0.48		 0.53	
Strictness: partial
CheckpointU F1-Score: 0.79 


i:4, done
len(history): 5
positive_training is True and prev_f1_score > current_f1_score
k 5




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0055,0.192294,0.745614,0.822893,0.681604
200,0.0064,0.184319,0.765306,0.811503,0.724085
300,0.0024,0.173507,0.794831,0.8582,0.740177
400,0.0015,0.198884,0.785146,0.842825,0.734856
500,0.0007,0.205557,0.805145,0.837699,0.775026
600,0.0006,0.186487,0.78993,0.830866,0.752838


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.71 		 0.86		 0.78	
 Eiongelmia                	 0.41 		 0.46		 0.44	
Strictness: partial
CheckpointU F1-Score: 0.75 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.85		 0.80	
 Eiongelmia                	 0.40 		 0.45		 0.42	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.89		 0.82	
 Eiongelmia                	 0.58 		 0.54		 0.56	
Strictness: partial
CheckpointU F1-Score: 0.79 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.88		 0.81	
 Eiongelmia                	 0.54 		 0.45		 0.49	
Strictness: partial
CheckpointU F1-Score: 0.79 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.78 		 0.88		 0.83	
 Eiongelmia                	 0.66 		 0.46		 0.54	
Strictness: partial
CheckpointU F1-Score: 0.81 


i:5, done
training is progressing as intended
k 6




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0042,0.203713,0.772319,0.832574,0.720197
200,0.006,0.196171,0.759915,0.818337,0.709279
300,0.0035,0.195644,0.775412,0.829727,0.727772
400,0.003,0.208351,0.782422,0.831435,0.738866
500,0.001,0.208706,0.778361,0.843964,0.722222
600,0.0006,0.210983,0.78746,0.843964,0.738048


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.87		 0.79	
 Eiongelmia                	 0.72 		 0.50		 0.59	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.74 		 0.85		 0.80	
 Eiongelmia                	 0.39 		 0.48		 0.43	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.74 		 0.86		 0.80	
 Eiongelmia                	 0.62 		 0.50		 0.55	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.87		 0.81	
 Eiongelmia                	 0.56 		 0.49		 0.53	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.74 		 0.88		 0.81	
 Eiongelmia                	 0.50 		 0.51		 0.50	
Strictness: partial
CheckpointU F1-Score: 0.78 


i:6, done
len(history): 7
positive_training is True and prev_f1_score > current_f1_score
negative direction weight_decay {'evaluation_strategy': 'steps', 'learning_rate': 0.00018, 'load_best_model_at_end': True, 'logging_steps': 100, 'num_train_epochs': 10, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'save_strategy': 'steps', 'seed': 42, 'weight_decay': 0.19998}
k 7




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0066,0.162482,0.77902,0.833144,0.7315
200,0.0026,0.184518,0.75721,0.822323,0.701652
300,0.0021,0.178634,0.78137,0.812073,0.752904
400,0.0011,0.185336,0.776786,0.842255,0.72076
500,0.0005,0.199008,0.784293,0.830296,0.743119
600,0.0003,0.189356,0.794595,0.83713,0.756173


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.87		 0.81	
 Eiongelmia                	 0.47 		 0.51		 0.49	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.73 		 0.86		 0.79	
 Eiongelmia                	 0.42 		 0.47		 0.44	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.78 		 0.85		 0.81	
 Eiongelmia                	 0.47 		 0.42		 0.44	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.73 		 0.88		 0.80	
 Eiongelmia                	 0.54 		 0.48		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.87		 0.81	
 Eiongelmia                	 0.55 		 0.45		 0.49	
Strictness: partial
CheckpointU F1-Score: 0.78 


i:0, done
training is progressing as intended
negative direction weight_decay {'evaluation_strategy': 'steps', 'learning_rate': 0.00018, 'load_best_model_at_end': True, 'logging_steps': 100, 'num_train_epochs': 10, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'save_strategy': 'steps', 'seed': 42, 'weight_decay': 0.19996999999999998}
k 8




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0046,0.191489,0.763021,0.834282,0.702975
200,0.0079,0.164552,0.778127,0.846811,0.719748
300,0.0032,0.185169,0.775607,0.83656,0.722933
400,0.0029,0.192298,0.789198,0.857062,0.731293
500,0.0012,0.188994,0.79648,0.824601,0.770213
600,0.0007,0.187889,0.801851,0.838838,0.767987


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.87		 0.79	
 Eiongelmia                	 0.48 		 0.54		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.88		 0.79	
 Eiongelmia                	 0.65 		 0.53		 0.59	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.74 		 0.87		 0.80	
 Eiongelmia                	 0.54 		 0.49		 0.52	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.89		 0.82	
 Eiongelmia                	 0.51 		 0.52		 0.52	
Strictness: partial
CheckpointU F1-Score: 0.79 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.78 		 0.86		 0.82	
 Eiongelmia                	 0.58 		 0.45		 0.50	
Strictness: partial
CheckpointU F1-Score: 0.80 


i:1, done
training is progressing as intended
k 9




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0042,0.21049,0.773828,0.855353,0.706491
200,0.0042,0.193626,0.79472,0.839977,0.75409
300,0.0026,0.174347,0.753751,0.829727,0.690521
400,0.0019,0.179903,0.784737,0.849089,0.729452
500,0.0007,0.192549,0.78403,0.833144,0.740385
600,0.0004,0.200231,0.784048,0.822893,0.748705


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.71 		 0.89		 0.79	
 Eiongelmia                	 0.65 		 0.48		 0.55	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.88		 0.82	
 Eiongelmia                	 0.68 		 0.43		 0.53	
Strictness: partial
CheckpointU F1-Score: 0.79 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.86		 0.81	
 Eiongelmia                	 0.29 		 0.51		 0.37	
Strictness: partial
CheckpointU F1-Score: 0.75 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.74 		 0.88		 0.81	
 Eiongelmia                	 0.58 		 0.54		 0.56	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.86		 0.81	
 Eiongelmia                	 0.53 		 0.54		 0.54	
Strictness: partial
CheckpointU F1-Score: 0.78 


i:2, done
len(history): 10
positive_training is True and prev_f1_score > current_f1_score
k 10




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0058,0.189509,0.77131,0.845103,0.709369
200,0.007,0.195879,0.784699,0.817768,0.754202
300,0.003,0.19616,0.794338,0.846811,0.747988
400,0.0027,0.184662,0.787037,0.822893,0.754175
500,0.0012,0.194591,0.787121,0.842255,0.738761
600,0.0011,0.180584,0.790262,0.841116,0.745207


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.74 		 0.89		 0.80	
 Eiongelmia                	 0.42 		 0.46		 0.44	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.86		 0.81	
 Eiongelmia                	 0.61 		 0.44		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.77 		 0.88		 0.82	
 Eiongelmia                	 0.53 		 0.49		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.79 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.78 		 0.86		 0.82	
 Eiongelmia                	 0.47 		 0.43		 0.45	
Strictness: partial
CheckpointU F1-Score: 0.79 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.77 		 0.88		 0.82	
 Eiongelmia                	 0.46 		 0.48		 0.47	
Strictness: partial
CheckpointU F1-Score: 0.79 


i:3, done
training is progressing as intended
k 11




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0051,0.244541,0.750763,0.840547,0.678309
200,0.0042,0.167055,0.762227,0.834282,0.701628
300,0.0034,0.191452,0.771812,0.851367,0.705855
400,0.0022,0.192643,0.777261,0.856492,0.711447
500,0.0007,0.200471,0.784681,0.834282,0.740647
600,0.0007,0.183873,0.787895,0.837699,0.74368


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.69 		 0.88		 0.77	
 Eiongelmia                	 0.55 		 0.48		 0.52	
Strictness: partial
CheckpointU F1-Score: 0.75 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.73 		 0.87		 0.79	
 Eiongelmia                	 0.42 		 0.50		 0.46	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.89		 0.79	
 Eiongelmia                	 0.58 		 0.52		 0.55	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.89		 0.79	
 Eiongelmia                	 0.64 		 0.53		 0.58	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.87		 0.81	
 Eiongelmia                	 0.51 		 0.46		 0.48	
Strictness: partial
CheckpointU F1-Score: 0.78 


i:4, done
len(history): 12
positive_training is True and prev_f1_score > current_f1_score
k 12




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0034,0.191115,0.742455,0.840547,0.664865
200,0.0073,0.187119,0.761731,0.832005,0.702404
300,0.0092,0.152363,0.762848,0.811503,0.719697
400,0.0041,0.172337,0.774904,0.801822,0.749734
500,0.0024,0.16333,0.788058,0.834282,0.746687
600,0.0014,0.167095,0.795095,0.830866,0.762278


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.70 		 0.88		 0.78	
 Eiongelmia                	 0.35 		 0.50		 0.41	
Strictness: partial
CheckpointU F1-Score: 0.74 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.71 		 0.87		 0.78	
 Eiongelmia                	 0.59 		 0.49		 0.54	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.85		 0.80	
 Eiongelmia                	 0.43 		 0.48		 0.45	
Strictness: partial
CheckpointU F1-Score: 0.76 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.83		 0.79	
 Eiongelmia                	 0.63 		 0.49		 0.55	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.76 		 0.87		 0.81	
 Eiongelmia                	 0.55 		 0.47		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.79 


i:5, done
training is progressing as intended
k 13




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0062,0.209861,0.770151,0.843394,0.708612
200,0.0059,0.189391,0.771745,0.79328,0.751348
300,0.0056,0.152396,0.754386,0.832574,0.689623
400,0.0034,0.145428,0.777492,0.861617,0.708333
500,0.0016,0.159448,0.801539,0.830296,0.774708
600,0.0018,0.159331,0.793496,0.833713,0.75698


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.72 		 0.88		 0.79	
 Eiongelmia                	 0.53 		 0.48		 0.50	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.80 		 0.83		 0.81	
 Eiongelmia                	 0.38 		 0.47		 0.42	
Strictness: partial
CheckpointU F1-Score: 0.77 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.71 		 0.87		 0.78	
 Eiongelmia                	 0.47 		 0.52		 0.50	
Strictness: partial
CheckpointU F1-Score: 0.75 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.73 		 0.90		 0.81	
 Eiongelmia                	 0.47 		 0.54		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.81 		 0.87		 0.84	
 Eiongelmia                	 0.47 		 0.48		 0.48	
Strictness: partial
CheckpointU F1-Score: 0.80 


i:6, done
training is progressing as intended
k 14




Step,Training Loss,Validation Loss,F1 Score,Recall Score,Precision
100,0.0064,0.181635,0.733436,0.813212,0.667914
200,0.0089,0.153468,0.77918,0.843964,0.723633
300,0.0079,0.149866,0.782086,0.81549,0.751312
400,0.004,0.16925,0.776033,0.844533,0.717812
500,0.0025,0.168213,0.789417,0.832574,0.750513
600,0.0015,0.167581,0.793287,0.84795,0.745245


				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.71 		 0.85		 0.77	
 Eiongelmia                	 0.34 		 0.50		 0.40	
Strictness: partial
CheckpointU F1-Score: 0.73 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.74 		 0.88		 0.80	
 Eiongelmia                	 0.51 		 0.52		 0.51	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.78 		 0.85		 0.82	
 Eiongelmia                	 0.45 		 0.49		 0.47	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.75 		 0.88		 0.81	
 Eiongelmia                	 0.44 		 0.48		 0.46	
Strictness: partial
CheckpointU F1-Score: 0.78 

				 Precision 	 Recall 	 F1 score
                           	 



				 Precision 	 Recall 	 F1 score
                           	 0.00 		 0.00		 0.00	
 Ongelmia                  	 0.77 		 0.87		 0.82	
 Eiongelmia                	 0.49 		 0.45		 0.47	
Strictness: partial
CheckpointU F1-Score: 0.79 


i:7, done
len(history): 15
positive_training is False and prev_f1_score > current_f1_score
historical_settings {'f1_score': 0.8051450465243567, 'eval_f1_score': 0.8051450465243567, 'target': 'weight_decay', 'parameters': {'evaluation_strategy': 'steps', 'learning_rate': 0.00018, 'load_best_model_at_end': True, 'logging_steps': 100, 'num_train_epochs': 10, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'save_strategy': 'steps', 'seed': 42, 'weight_decay': 0.20004000000000005}, 'eval_results': {'eval_loss': 0.20555704832077026, 'eval_f1_score': 0.8051450465243567, 'eval_recall_score': 0.8376993166287016, 'eval_precision': 0.7750263435194942, 'eval_runtime': 1.4259, 'eval_samples_per_second': 180.234, 'eval_steps_per_second': 6.312, 'ep

In [22]:
historical_settings['eval_results']

{'eval_loss': 0.20555704832077026,
 'eval_f1_score': 0.8051450465243567,
 'eval_recall_score': 0.8376993166287016,
 'eval_precision': 0.7750263435194942,
 'eval_runtime': 1.4259,
 'eval_samples_per_second': 180.234,
 'eval_steps_per_second': 6.312,
 'epoch': 10.0,
 'experiment': 'Incontinence_NER_v5_20231208_orig_par_opt',
 'run_id': 'cbe7f211',
 'uuid': '2ebfb94c-0889-47f7-b909-8ab704b9ce71',
 'today': '2024_07_18',
 'timestamp': 1721317609.528402,
 'learning_rate': 0.00018,
 'weight_decay': 0.20003000000000004}

In [23]:
from pprint import pprint

In [24]:
pprint(historical_settings['eval_results'])

{'epoch': 10.0,
 'eval_f1_score': 0.8051450465243567,
 'eval_loss': 0.20555704832077026,
 'eval_precision': 0.7750263435194942,
 'eval_recall_score': 0.8376993166287016,
 'eval_runtime': 1.4259,
 'eval_samples_per_second': 180.234,
 'eval_steps_per_second': 6.312,
 'experiment': 'Incontinence_NER_v5_20231208_orig_par_opt',
 'learning_rate': 0.00018,
 'run_id': 'cbe7f211',
 'timestamp': 1721317609.528402,
 'today': '2024_07_18',
 'uuid': '2ebfb94c-0889-47f7-b909-8ab704b9ce71',
 'weight_decay': 0.20003000000000004}


In [25]:
historical_settings

{'f1_score': 0.8051450465243567,
 'eval_f1_score': 0.8051450465243567,
 'target': 'weight_decay',
 'parameters': {'evaluation_strategy': 'steps',
  'learning_rate': 0.00018,
  'load_best_model_at_end': True,
  'logging_steps': 100,
  'num_train_epochs': 10,
  'per_device_train_batch_size': 16,
  'per_device_eval_batch_size': 16,
  'save_strategy': 'steps',
  'seed': 42,
  'weight_decay': 0.20003000000000004},
 'eval_results': {'eval_loss': 0.20555704832077026,
  'eval_f1_score': 0.8051450465243567,
  'eval_recall_score': 0.8376993166287016,
  'eval_precision': 0.7750263435194942,
  'eval_runtime': 1.4259,
  'eval_samples_per_second': 180.234,
  'eval_steps_per_second': 6.312,
  'epoch': 10.0,
  'experiment': 'Incontinence_NER_v5_20231208_orig_par_opt',
  'run_id': 'cbe7f211',
  'uuid': '2ebfb94c-0889-47f7-b909-8ab704b9ce71',
  'today': '2024_07_18',
  'timestamp': 1721317609.528402,
  'learning_rate': 0.00018,
  'weight_decay': 0.20003000000000004},
 'strictness': 'partial',
 'k': 5}

In [26]:
current_parameters

{'evaluation_strategy': 'steps',
 'learning_rate': 0.00018,
 'load_best_model_at_end': True,
 'logging_steps': 100,
 'num_train_epochs': 10,
 'per_device_train_batch_size': 16,
 'per_device_eval_batch_size': 16,
 'save_strategy': 'steps',
 'seed': 42,
 'weight_decay': 0.20003000000000004}

## Read .csv file

In [27]:
df = pd.read_csv(os.path.join(model_save_folder_path, 'history.csv'), sep=';', encoding='utf-8')
#df = pd.read_csv("../_trained_models/loneliness/C/2024_07_15_218b682e/history.csv", sep=';', encoding='utf-8')

In [28]:
def show_hyperparameter_table(df):
    """Shows hyperparameter training history as pd.DataFrame"""
    
    import ast
    # both hyperparameters
    # LR
    lr_res = ast.literal_eval(df.loc[0, '0'])
    # WD
    wd_res = ast.literal_eval(df.loc[0, '1'])

    def _show_hyperparameter_table(table):
        df = pd.DataFrame(table)
        # wide to long
        df = df.transpose()
        # contains columns: f1_score, target, parameters, eval_results, k
        # parameters is a dict that contains parameter values
        df['learning_rate'] = list(map(lambda x: x['learning_rate'], df['eval_results']))
        df['weight_decay'] = list(map(lambda x: x['weight_decay'], df['eval_results']))
        df['precision'] = list(map(lambda x: x['eval_precision'], df['eval_results']))
        df['recall'] = list(map(lambda x: x['eval_recall_score'], df['eval_results']))
        return df

    df_lr = _show_hyperparameter_table(lr_res)
    df_wd = _show_hyperparameter_table(wd_res)

    # concatanate
    df = pd.concat([df_lr, df_wd])
    
    # manage indeces
    df['original_index'] = df.index
    df.index = range(0, df.shape[0])
    return df

df = show_hyperparameter_table(df)

In [29]:
print(experiment)
sub_df = df[['learning_rate', 'weight_decay','f1_score', 'precision', 'recall', 'k', 'target']]
sub_df

Incontinence_NER_v5_20231208_orig_par_opt


Unnamed: 0,learning_rate,weight_decay,f1_score,precision,recall,k,target
0,9e-05,0.19998,0.695581,0.667539,0.726082,0,learning_rate
1,0.0001,0.19998,0.729412,0.6875,0.776765,1,learning_rate
2,0.00011,0.19998,0.763458,0.730489,0.799544,2,learning_rate
3,0.00012,0.19998,0.757867,0.712638,0.809226,3,learning_rate
4,0.00013,0.19998,0.7708,0.728934,0.817768,4,learning_rate
5,0.00014,0.19998,0.760824,0.709154,0.820615,5,learning_rate
6,0.00015,0.19998,0.76877,0.735938,0.80467,6,learning_rate
7,0.00016,0.19998,0.787017,0.743915,0.835421,7,learning_rate
8,0.00017,0.19998,0.791101,0.75544,0.830296,8,learning_rate
9,0.00018,0.19998,0.79225,0.75102,0.838269,9,learning_rate


In [30]:
sorted_df = df.sort_values(by=['f1_score'])
print(experiment)
sorted_df[['learning_rate', 'weight_decay','f1_score', 'precision', 'recall', 'k', 'target']]

Incontinence_NER_v5_20231208_orig_par_opt


Unnamed: 0,learning_rate,weight_decay,f1_score,precision,recall,k,target
0,9e-05,0.19998,0.695581,0.667539,0.726082,0,learning_rate
1,0.0001,0.19998,0.729412,0.6875,0.776765,1,learning_rate
3,0.00012,0.19998,0.757867,0.712638,0.809226,3,learning_rate
5,0.00014,0.19998,0.760824,0.709154,0.820615,5,learning_rate
2,0.00011,0.19998,0.763458,0.730489,0.799544,2,learning_rate
6,0.00015,0.19998,0.76877,0.735938,0.80467,6,learning_rate
4,0.00013,0.19998,0.7708,0.728934,0.817768,4,learning_rate
11,0.00018,0.19999,0.775107,0.729879,0.82631,1,weight_decay
10,0.00018,0.19998,0.778075,0.733367,0.828588,0,weight_decay
16,0.00018,0.20004,0.778361,0.722222,0.843964,6,weight_decay


In [31]:
def show_evaluation_results(df):
    """Function extracts the evaluation results from dataframe."""
    # manage indeces
    df['original_index'] = df.index
    df.index = range(0, df.shape[0])

    # get evaluation results for the model
    objs = {}
    for i in range(0, df.shape[0]):
        obj_str = df.loc[i, 'eval_results']
        tuples = list(obj_str.items())
        # transform the list of tuples to a dictionary
        obj = {}
        for t in tuples:
            _key = t[0]
            _value = t[1]
            if _key == 'epoch':
                # make epoch an int
                _value = int(_value)
            else: pass
            
            obj[_key] = _value
        # make main dict object with integer key
        objs[i] = obj

    return pd.DataFrame(objs).transpose()

df_evals = show_evaluation_results(df)
df_evals

Unnamed: 0,eval_loss,eval_f1_score,eval_recall_score,eval_precision,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,experiment,run_id,uuid,today,timestamp,learning_rate,weight_decay
0,0.108071,0.695581,0.726082,0.667539,1.4318,179.494,6.286,10,Incontinence_NER_v5_20231208_orig_par_opt,cbe7f211,00dd5b50-42e0-42e4-980f-86d70dc61df7,2024_07_18,1721313853.740526,9e-05,0.19998
1,0.152233,0.729412,0.776765,0.6875,1.4582,176.24,6.172,10,Incontinence_NER_v5_20231208_orig_par_opt,cbe7f211,206ea6e6-8518-47cf-b913-7861893bb690,2024_07_18,1721314104.607059,0.0001,0.19998
2,0.177246,0.763458,0.799544,0.730489,1.2491,205.753,7.205,10,Incontinence_NER_v5_20231208_orig_par_opt,cbe7f211,393a12fb-ca50-4edd-96bb-683a4a7c4a10,2024_07_18,1721314354.825245,0.00011,0.19998
3,0.19418,0.757867,0.809226,0.712638,1.4185,181.178,6.345,10,Incontinence_NER_v5_20231208_orig_par_opt,cbe7f211,27a0e827-cadc-4602-8160-321eebeda90d,2024_07_18,1721314604.809916,0.00012,0.19998
4,0.204267,0.7708,0.817768,0.728934,1.4165,181.427,6.353,10,Incontinence_NER_v5_20231208_orig_par_opt,cbe7f211,394e9a02-78b6-413c-a403-616d31a94cd4,2024_07_18,1721314853.795578,0.00013,0.19998
5,0.209674,0.760824,0.820615,0.709154,1.283,200.313,7.015,10,Incontinence_NER_v5_20231208_orig_par_opt,cbe7f211,ba145f1c-68b7-4774-b57e-1cc8e6f65262,2024_07_18,1721315104.81541,0.00014,0.19998
6,0.204853,0.76877,0.80467,0.735938,1.2744,201.671,7.062,10,Incontinence_NER_v5_20231208_orig_par_opt,cbe7f211,b3e8cbff-5b95-4838-bcbd-7fcb776c1327,2024_07_18,1721315355.264024,0.00015,0.19998
7,0.193269,0.787017,0.835421,0.743915,1.2675,202.768,7.101,10,Incontinence_NER_v5_20231208_orig_par_opt,cbe7f211,862fd055-c358-4452-ab64-a8afa96e9896,2024_07_18,1721315605.088203,0.00016,0.19998
8,0.204188,0.791101,0.830296,0.75544,1.2846,200.065,7.006,10,Incontinence_NER_v5_20231208_orig_par_opt,cbe7f211,01140405-d62e-4861-867d-f04effcd9e89,2024_07_18,1721315856.143144,0.00017,0.19998
9,0.198575,0.79225,0.838269,0.75102,1.4279,179.991,6.303,10,Incontinence_NER_v5_20231208_orig_par_opt,cbe7f211,59820cdf-8e40-4560-bb84-38e116d5268a,2024_07_18,1721316106.43603,0.00018,0.19998


In [32]:
def show_parameters(df):
    """Shows the parameters columns of data frame. Remember that learning rate and 
    weight decay should not be checked using this function. Instead LR and WD are 
    correctly showed via show_evaluation_results function.
    """
    # manage indeces
    df['original_index'] = df.index
    df.index = range(0, df.shape[0])

    # get evaluation results for the model
    objs = {}
    for i in range(0, df.shape[0]):
        obj_str = df.loc[i, 'parameters']
        tuples = list(obj_str.items())
        # transform the list of tuples to a dictionary
        obj = {}
        for t in tuples:
            _key = t[0]
            _value = t[1]
            if _key == 'epoch':
                # make epoch an int
                _value = int(_value)
            else: pass
            
            obj[_key] = _value
        # make main dict object with integer key
        objs[i] = obj

    return pd.DataFrame(objs).transpose()

df_param = show_parameters(df)
df_param

Unnamed: 0,evaluation_strategy,learning_rate,load_best_model_at_end,logging_steps,num_train_epochs,per_device_train_batch_size,per_device_eval_batch_size,save_strategy,seed,weight_decay
0,steps,0.00018,True,100,10,16,16,steps,42,0.20003
1,steps,0.00018,True,100,10,16,16,steps,42,0.20003
2,steps,0.00018,True,100,10,16,16,steps,42,0.20003
3,steps,0.00018,True,100,10,16,16,steps,42,0.20003
4,steps,0.00018,True,100,10,16,16,steps,42,0.20003
5,steps,0.00018,True,100,10,16,16,steps,42,0.20003
6,steps,0.00018,True,100,10,16,16,steps,42,0.20003
7,steps,0.00018,True,100,10,16,16,steps,42,0.20003
8,steps,0.00018,True,100,10,16,16,steps,42,0.20003
9,steps,0.00018,True,100,10,16,16,steps,42,0.20003


And we're done!