In [5]:
from huggingface_hub import login
from datasets import load_dataset
from dotenv import dotenv_values
import os
import torch
import pandas as pd
from peft import PeftConfig
from transformers import AutoTokenizer
from utils import Evaluator, generate_adapters_list

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']
HF_TOKEN_WRITE = dotenv_values(".env.base")['HF_TOKEN_WRITE']
login(token=HF_TOKEN_WRITE)


appendix = '3EpochsLast'# '5EpochsBestF1Train' # '5EpochsBestF1Train' # 5EpochsBestF1Train
log_name_training ='llama_3EpochsLast_cl'
training_type = ''#'NoLora' # 'unmasked'

def extract_params_from_file_name(df: pd.DataFrame, training_type:str=''):
    if training_type == 'NoLora':
        df['model_type'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[1]))
        df['training_config'] = df['dataset'].apply(lambda x: str(x.split('adapters_')[1]))
        df['layer'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[3]))
        df['quantization'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[4]))
        df['gradient_accumulation_steps'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[5]))
        df['learning_rate'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[6]))
        df['run_type'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[7]))
    else:        
        df['model_type'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[1]))
        df['training_config'] = df['dataset'].apply(lambda x: str(x.split('adapters_')[1]))
        df['layer'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[3]))
        df['quantization'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[4]))
        df['r'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[5]))
        df['lora_alpha'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[6]))
        df['lora_dropout'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[7]))
        df['gradient_accumulation_steps'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[8]))
        df['learning_rate'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[9]))
        df['run_type'] = df['dataset'].apply(lambda x: str(x.split('/')[1].split('_')[10]))
    return df

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /Users/pietroferrazzi/.cache/huggingface/token
Login successful


In [6]:

datasets_list = generate_adapters_list(log_name_training, appendix=appendix, training_type=training_type)
peft_config = PeftConfig.from_pretrained(datasets_list[0], token = HF_TOKEN_WRITE)
BASE_MODEL_CHECKPOINT = peft_config.base_model_name_or_path
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_CHECKPOINT,token =HF_TOKEN_WRITE)

evaluation_table = pd.DataFrame(columns=['dataset', 'TP', 'FP', 'FN', 'precision', 'recall', 'f1'])

for i, dataset_checkpoint in enumerate(datasets_list):
    print(f"evaluating {dataset_checkpoint}, {i}/{len(datasets_list)}...")
    test_data = load_dataset(dataset_checkpoint, token=HF_TOKEN, split='test')
    eval = Evaluator(test_data, tokenizer)
    eval.extract_FP_FN_TP_TN_token_by_token()
    eval.create_evaluation_table()
    tmp = eval.evaluation_table.copy()
    tmp['dataset'] = dataset_checkpoint
    evaluation_table = pd.concat([evaluation_table, pd.DataFrame([tmp])])#  evaluation_table.con(tmp)
    print(eval.evaluation_table)

print(evaluation_table)
evaluation_table#.to_csv(f'data/evaluation_table{appendix}.csv', index=False)
evaluation_table = extract_params_from_file_name(evaluation_table, training_type=training_type)
evaluation_table.to_csv(f'data/evaluation_table{training_type}_{appendix}.csv', index=False)
print(f'SAVED TO data/evaluation_table{training_type}_{appendix}.csv')



evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_64_32_0.01_4_0.0002_3EpochsLast, 0/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 3.64MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 567kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 74230.65 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 9569.04 examples/s]
  evaluation_table = pd.concat([evaluation_table, pd.DataFrame([tmp])])#  evaluation_table.con(tmp)


{'TP': 2559, 'FP': 2328, 'FN': 8307, 'precision': 0.523634131368938, 'recall': 0.23550524572059636, 'f1': 0.3248904970481813}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_16_64_0.01_4_0.0002_3EpochsLast, 1/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 12.4MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 774kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 89971.37 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11265.54 examples/s]


{'TP': 2594, 'FP': 2364, 'FN': 8272, 'precision': 0.5231948366276724, 'recall': 0.2387263022271305, 'f1': 0.32785642062689585}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_16_64_0.01_8_0.0002_3EpochsLast, 2/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 2.85MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 836kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 76673.58 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10500.49 examples/s]


{'TP': 1652, 'FP': 1571, 'FN': 9214, 'precision': 0.5125659323611542, 'recall': 0.15203386710841155, 'f1': 0.23450919156788982}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_32_32_0.05_4_0.0002_3EpochsLast, 3/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 4.64MB/s]
Downloading data: 100%|██████████| 321k/321k [00:00<00:00, 602kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 83088.14 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10756.29 examples/s]


{'TP': 2879, 'FP': 1823, 'FN': 7987, 'precision': 0.612292641429179, 'recall': 0.2649549052089085, 'f1': 0.3698612538540596}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_32_64_0.05_8_0.0002_3EpochsLast, 4/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 4.91MB/s]
Downloading data: 100%|██████████| 321k/321k [00:00<00:00, 679kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 83782.74 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10722.77 examples/s]


{'TP': 3216, 'FP': 2140, 'FN': 7650, 'precision': 0.6004480955937267, 'recall': 0.2959690778575373, 'f1': 0.3964985821723585}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_32_64_0.05_2_0.0002_3EpochsLast, 5/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 8.76MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 720kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 85349.94 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10853.15 examples/s]


{'TP': 242, 'FP': 274, 'FN': 10624, 'precision': 0.4689922480620155, 'recall': 0.022271304988036075, 'f1': 0.0425232823756809}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_64_32_0.05_4_0.0002_3EpochsLast, 6/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 8.70MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 659kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 100787.62 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11920.96 examples/s]


{'TP': 2364, 'FP': 2143, 'FN': 8502, 'precision': 0.5245174173507877, 'recall': 0.21755935946990612, 'f1': 0.30755220191244387}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_16_32_0.05_8_0.0002_3EpochsLast, 7/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 5.71MB/s]
Downloading data: 100%|██████████| 320k/320k [00:00<00:00, 495kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 163949.09 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11517.05 examples/s]


{'TP': 10593, 'FP': 8934, 'FN': 273, 'precision': 0.5424796435704409, 'recall': 0.9748757592490337, 'f1': 0.6970684039087948}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_32_32_0.05_2_0.0002_3EpochsLast, 8/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 13.2MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 630kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 132102.54 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11832.95 examples/s]


{'TP': 282, 'FP': 288, 'FN': 10584, 'precision': 0.49473684210526314, 'recall': 0.025952512424075095, 'f1': 0.04931794333683105}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_16_32_0.01_8_0.0002_3EpochsLast, 9/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 11.3MB/s]
Downloading data: 100%|██████████| 319k/319k [00:00<00:00, 687kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 107566.51 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10984.41 examples/s]


{'TP': 346, 'FP': 419, 'FN': 10520, 'precision': 0.4522875816993464, 'recall': 0.03184244432173753, 'f1': 0.05949617401771129}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_32_32_0.01_4_0.0002_3EpochsLast, 10/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 6.08MB/s]
Downloading data: 100%|██████████| 317k/317k [00:00<00:00, 678kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 152215.35 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 12212.87 examples/s]


{'TP': 316, 'FP': 346, 'FN': 10550, 'precision': 0.4773413897280967, 'recall': 0.029081538744708264, 'f1': 0.05482303955586398}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_64_32_0.05_2_0.0002_3EpochsLast, 11/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 12.3MB/s]
Downloading data: 100%|██████████| 320k/320k [00:00<00:00, 649kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 112617.63 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11362.70 examples/s]


{'TP': 2612, 'FP': 1799, 'FN': 8254, 'precision': 0.5921559737021084, 'recall': 0.24038284557334805, 'f1': 0.341951953917654}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_32_64_0.01_4_0.0002_3EpochsLast, 12/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 6.80MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 654kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 86828.82 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10709.58 examples/s]


{'TP': 353, 'FP': 355, 'FN': 10513, 'precision': 0.4985875706214689, 'recall': 0.032486655623044355, 'f1': 0.0609987903922585}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_64_64_0.01_2_0.0002_3EpochsLast, 13/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 7.48MB/s]
Downloading data: 100%|██████████| 322k/322k [00:00<00:00, 768kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 82355.07 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10716.50 examples/s]


{'TP': 7894, 'FP': 1972, 'FN': 2972, 'precision': 0.800121629839854, 'recall': 0.7264862875023007, 'f1': 0.7615280725448582}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_64_32_0.01_2_0.0002_3EpochsLast, 14/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 3.37MB/s]
Downloading data: 100%|██████████| 317k/317k [00:00<00:00, 782kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 89112.44 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10858.76 examples/s]


{'TP': 235, 'FP': 273, 'FN': 10631, 'precision': 0.4625984251968504, 'recall': 0.021627093686729246, 'f1': 0.04132231404958677}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_32_32_0.01_8_0.0002_3EpochsLast, 15/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 3.97MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 594kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 85852.75 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10640.44 examples/s]


{'TP': 368, 'FP': 379, 'FN': 10498, 'precision': 0.4926372155287818, 'recall': 0.03386710841155899, 'f1': 0.06337724963403081}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_16_64_0.01_2_0.0002_3EpochsLast, 16/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 6.44MB/s]
Downloading data: 100%|██████████| 317k/317k [00:00<00:00, 753kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 84145.56 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10615.89 examples/s]


{'TP': 190, 'FP': 84, 'FN': 10676, 'precision': 0.6934306569343066, 'recall': 0.017485735321185348, 'f1': 0.0341113105924596}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_16_32_0.05_2_0.0002_3EpochsLast, 17/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 3.51MB/s]
Downloading data: 100%|██████████| 320k/320k [00:00<00:00, 460kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 84564.08 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11003.58 examples/s]


{'TP': 4784, 'FP': 4179, 'FN': 6082, 'precision': 0.5337498605377664, 'recall': 0.4402724093502669, 'f1': 0.4825255938272228}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_16_32_0.01_4_0.0002_3EpochsLast, 18/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 4.01MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 786kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 87496.43 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10662.61 examples/s]


{'TP': 2176, 'FP': 1960, 'FN': 8690, 'precision': 0.5261121856866537, 'recall': 0.20025768452052273, 'f1': 0.29009465404612716}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_32_64_0.05_4_0.0002_3EpochsLast, 19/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 3.38MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 617kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 81704.88 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10743.70 examples/s]


{'TP': 2500, 'FP': 2263, 'FN': 8366, 'precision': 0.5248792777661138, 'recall': 0.2300754647524388, 'f1': 0.3199181009661527}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_64_64_0.05_8_0.0002_3EpochsLast, 20/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 6.09MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 715kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 100532.21 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 5097.04 examples/s]


{'TP': 2382, 'FP': 2157, 'FN': 8484, 'precision': 0.5247851949768672, 'recall': 0.2192159028161237, 'f1': 0.3092502434274586}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_16_64_0.05_4_0.0002_3EpochsLast, 21/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 4.56MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 667kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 86531.58 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10648.82 examples/s]


{'TP': 735, 'FP': 814, 'FN': 10131, 'precision': 0.4744996772111039, 'recall': 0.06764218663721701, 'f1': 0.1184051550543697}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_32_32_0.05_8_0.0002_3EpochsLast, 22/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 5.78MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 445kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 93060.99 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10691.58 examples/s]


{'TP': 2297, 'FP': 2087, 'FN': 8569, 'precision': 0.5239507299270073, 'recall': 0.21139333701454077, 'f1': 0.3012459016393443}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_16_64_0.05_8_0.0002_3EpochsLast, 23/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 3.09MB/s]
Downloading data: 100%|██████████| 322k/322k [00:00<00:00, 697kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 87432.15 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10554.38 examples/s]


{'TP': 8803, 'FP': 6910, 'FN': 2063, 'precision': 0.5602367466429071, 'recall': 0.8101417264862875, 'f1': 0.6624026487076262}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_64_64_0.01_4_0.0002_3EpochsLast, 24/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 5.81MB/s]
Downloading data: 100%|██████████| 321k/321k [00:00<00:00, 724kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 81597.52 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10459.73 examples/s]


{'TP': 9830, 'FP': 7206, 'FN': 1036, 'precision': 0.5770133834233389, 'recall': 0.9046567274065893, 'f1': 0.7046089886029676}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_64_64_0.05_2_0.0002_3EpochsLast, 25/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 13.4MB/s]
Downloading data: 100%|██████████| 320k/320k [00:00<00:00, 477kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 116594.05 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11941.89 examples/s]


{'TP': 5199, 'FP': 4137, 'FN': 5667, 'precision': 0.5568766066838047, 'recall': 0.4784649364991717, 'f1': 0.5147015147015147}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_64_64_0.05_4_0.0002_3EpochsLast, 26/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 3.33MB/s]
Downloading data: 100%|██████████| 323k/323k [00:00<00:00, 650kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 177345.15 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 12337.63 examples/s]


{'TP': 9311, 'FP': 3401, 'FN': 1555, 'precision': 0.7324575204531152, 'recall': 0.8568930609239831, 'f1': 0.7898040546271949}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_16_32_0.01_2_0.0002_3EpochsLast, 27/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 9.16MB/s]
Downloading data: 100%|██████████| 320k/320k [00:00<00:00, 662kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 91840.17 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 5176.14 examples/s]


{'TP': 7539, 'FP': 6144, 'FN': 3327, 'precision': 0.5509756632317474, 'recall': 0.6938155715074544, 'f1': 0.6142001710863987}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_32_64_0.01_2_0.0002_3EpochsLast, 28/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 3.23MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 659kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 83435.21 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10975.93 examples/s]


{'TP': 7722, 'FP': 6984, 'FN': 3144, 'precision': 0.5250917992656059, 'recall': 0.7106570955273329, 'f1': 0.6039418113561708}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_64_32_0.05_8_0.0002_3EpochsLast, 29/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 8.17MB/s]
Downloading data: 100%|██████████| 319k/319k [00:00<00:00, 773kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 54107.24 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11167.41 examples/s]


{'TP': 347, 'FP': 420, 'FN': 10519, 'precision': 0.45241199478487615, 'recall': 0.031934474507638505, 'f1': 0.05965786985300438}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_16_64_0.05_2_0.0002_3EpochsLast, 30/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 5.90MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 658kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 50776.33 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 7482.94 examples/s]


{'TP': 1237, 'FP': 1454, 'FN': 9629, 'precision': 0.45968041620215533, 'recall': 0.11384133995950672, 'f1': 0.18248875119864277}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_32_64_0.01_8_0.0002_3EpochsLast, 31/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 3.05MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 752kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 153557.39 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 9569.04 examples/s]


{'TP': 1593, 'FP': 1585, 'FN': 9273, 'precision': 0.5012586532410321, 'recall': 0.146604086140254, 'f1': 0.22685844488749646}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_64_32_0.01_8_0.0002_3EpochsLast, 32/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 2.87MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 691kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 63096.62 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 8391.96 examples/s]


{'TP': 2425, 'FP': 2226, 'FN': 8441, 'precision': 0.5213932487637067, 'recall': 0.22317320080986564, 'f1': 0.31256041760649605}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_16_32_0.05_4_0.0002_3EpochsLast, 33/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 2.81MB/s]
Downloading data: 100%|██████████| 319k/319k [00:00<00:00, 794kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 58883.50 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11006.59 examples/s]


{'TP': 3170, 'FP': 2644, 'FN': 7696, 'precision': 0.5452356381148951, 'recall': 0.2917356893060924, 'f1': 0.3800959232613909}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_64_64_0.01_8_0.0002_3EpochsLast, 34/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 4.27MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 715kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 92347.92 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11327.73 examples/s]


{'TP': 2539, 'FP': 2312, 'FN': 8327, 'precision': 0.523397237682952, 'recall': 0.23366464200257683, 'f1': 0.3230896481516829}
evaluating ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_32_32_0.01_2_0.0002_3EpochsLast, 35/36...


Downloading readme: 100%|██████████| 913/913 [00:00<00:00, 3.82MB/s]
Downloading data: 100%|██████████| 318k/318k [00:00<00:00, 712kB/s]
Generating test split: 100%|██████████| 681/681 [00:00<00:00, 49897.30 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 11109.51 examples/s]


{'TP': 273, 'FP': 301, 'FN': 10593, 'precision': 0.47560975609756095, 'recall': 0.025124240750966316, 'f1': 0.04772727272727273}
                                             dataset     TP    FP     FN  \
0  ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.la...   2559  2328   8307   
0  ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.la...   2594  2364   8272   
0  ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.la...   1652  1571   9214   
0  ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.la...   2879  1823   7987   
0  ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.la...   3216  2140   7650   
0  ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.la...    242   274  10624   
0  ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.la...   2364  2143   8502   
0  ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.la...  10593  8934    273   
0  ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.la...    282   288  10584   
0  ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.la...    346   419  10520   
0  ferrazzipietro/LS_Llama-2-7b-hf_

In [7]:
best_data_checkpoint = "ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_64_64_0.01_2_0.0002_3EpochsLast"
                       "ferrazzipietro/LS_Llama-2-7b-hf_adapters_en.layer1_NoQuant_16_64_0.05_4_0.0002_3EpochsLast_clent"
                        " "
best_data = load_dataset(best_data_checkpoint, token=HF_TOKEN, split='test')

In [9]:
best_data[0]

{'sentence': 'The results of the PCR were positive for RNA specific to SARS-CoV-2.',
 'entities': [{'id': '9470',
   'offsets': [19, 22],
   'role': '',
   'semantic_type_id': '',
   'text': 'PCR',
   'type': 'EVENT'},
  {'id': '9485',
   'offsets': [57, 67],
   'role': '',
   'semantic_type_id': '',
   'text': 'SARS-CoV-2',
   'type': 'EVENT'},
  {'id': '10107',
   'offsets': [57, 67],
   'role': '',
   'semantic_type_id': 'C5400365',
   'text': 'SARS-CoV-2',
   'type': 'CLINENTITY'},
  {'id': '10347',
   'offsets': [28, 36],
   'role': '',
   'semantic_type_id': '',
   'text': 'positive',
   'type': 'RML'}],
 'original_text': 'A 43-year-old non-diabetic Indian male reported to our outpatient department with chief complaints of cough with expectoration, chest pain, reduced appetite, fever with chills, and night sweats for two weeks. He also complained of breathlessness on exertion and had two episodes of blood in his sputum. The patient explained that the cough was continuous and was 