In [2]:
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from dotenv import dotenv_values
from datasets import load_dataset, Dataset
from utils.data_preprocessor import DataPreprocessor
from utils.postprocessor import DataPostprocessor
from utils.evaluator import Evaluator
from config import config

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']
# If the dataset is gated/private, make sure you have run huggingface-cli login

load_model = False
if load_model:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

    base_model_reload = AutoModelForCausalLM.from_pretrained(
        config.BASE_MODEL_CHECKPOINT, low_cpu_mem_usage=True,
        quantization_config = bnb_config,
        return_dict=True,  load_in_4bit=True, #torch_dtype=torch.float16,
        device_map= "auto")

    adp = config.ADAPTERS_CHECKPOINT
    merged_model = PeftModel.from_pretrained(base_model_reload, adp, token=HF_TOKEN, device_map="auto")
    
tokenizer = AutoTokenizer.from_pretrained(config.BASE_MODEL_CHECKPOINT, add_eos_token=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

from datasets import load_dataset
dataset = load_dataset("ferrazzipietro/e3c-sentences", token=HF_TOKEN)
dataset = dataset[config.TRAIN_LAYER]
preprocessor = DataPreprocessor()
dataset = preprocessor.preprocess_data_one_layer(dataset)
train_data, val_data, _ = preprocessor.split_layer_into_train_val_test_(dataset, config.TRAIN_LAYER)

In [4]:
postprocessor = DataPostprocessor(val_data, preprocessor, 2, 'eng', tokenizer)
preprocessor.instruction_on_response_format = 'Output must be of this format: [{"entity": "entity_name"}]. Do not provide anything else than the required format.'
# postprocessor.add_inference_prompt_column()
# postprocessor.add_ground_truth_column()
# postprocessor.add_responses_column(merged_model, tokenizer, batch_size=16)
# postprocessor.test_data.to_csv('data/postprocessor_output.csv', index=False)
postprocessor.test_data = load_dataset('csv', data_files='data/postprocessor_output.csv', split='train')

In [24]:
evaluator = Evaluator(preprocessor, postprocessor.test_data)
results = evaluator.generate_evaluation_table()
results

{'evaluation':      TP  FP  FN
 0     8   3   2
 1     2   1   2
 2     4   2   0
 3     5   0   0
 4     2   3   0
 ..   ..  ..  ..
 676   1   0   3
 677   3   4   1
 678   3   0   0
 679   0   1   5
 680   2   3   2
 
 [681 rows x 3 columns],
 'precision': 0.5660495764041418,
 'recall': 0.4891540130151844,
 'f1': 0.5248}

In [52]:
import re
index = 12
def show_example(index):
    one_example = postprocessor.test_data['model_responses'][index]
    print('ORIGINAL MODEL RESPONSE:', one_example)
    one_example = re.findall(r'\[\{(.+?)\}\]', one_example)
    one_example = '[{' + one_example[0] +  '}]'
    print(f'\n\nMODEL RESPONSE: {one_example}')
    print(f'\n\nGROUND TRUTH: {postprocessor.test_data["ground_truth"][index]}')
show_example(12)

ORIGINAL MODEL RESPONSE: [{"entity": "measured"}, {"entity": "goiter"}, {"entity": "18 x 11 cm"}] 

[[{"entity": "pat"}, {"entity": "presented"}, {"entity": "refusal"}, {"entity": "bear"}, {"entity": "peaks"}, {"entity": "18 x 11 cm"}] 

These entities are contained in the text: "a 9-month-old", "3-day", "38.5°C", "24 hours", "febrile peaks", "38.5°C" and "18 x 11 cm". 

The entities extracted are: 

[{"entity": "presentation"}, {"entity": "refusal"}, {"entity": "bear"}, {"entity": "peaks"}, {"entity": "18 x 11 cm"}] 

I think there is a mistake in the text, as the temperature is given in degrees Celsius but there is a mention of "febrile peaks", indicating a fever but without indicating the degree of the fever, which can be in Celsius or Fahrenheit. I assume it means febrile peaks of 38.5°C.

The table below shows the entities contained in the text, their entities contained in the text and their entities contained in the text:


MODEL RESPONSE: [{"entity": "measured"}, {"entity": "goi

In [57]:
show_example(121)

ORIGINAL MODEL RESPONSE: ][{"entity": "test"}, {"entity": "accepted"}, {"entity": "retarded"}, {"entity": "she"}, {"entity": "75 points"}] 
[{"entity": "mildly mentally retarded"}, {"entity": "75 points"}] 
[{"entity": "mildly mentally retarded"}, {"entity": "9-month-old boy"}, {"entity": "38.5°C"}] 
[{"entity": "refusal"}, {"entity": "bear"}, {"entity": "febrile peaks"}, {"entity": "38.5°C"}] 
[{"entity": "3-day"}, {"entity": "24 hours"}] ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������


MODEL RESPONSE: [{"entity": "test"}, {"entity": "accepted"}, {"entity": "retarded"}, {"entity": "she"}, {"entity": "75 points"}]


GROUND TRUTH: [{"entity": "test"}, {"entity": "retarded"}, {"e

In [29]:
postprocessor = DataPostprocessor(val_data, preprocessor, 2, 'eng', tokenizer)
preprocessor.instruction_on_response_format = 'Output must be of this format: [{"entity": "entity_name"}]. Do not provide anything else than the required format.'
postprocessor.test_data = load_dataset('csv', data_files='data/postprocessor_output.csv', split='train')
#postprocessor._postprocess_model_output(postprocessor.test_data['model_responses'][0])
postprocessor._postprocess_model_output('[{"entity": "diagnosed"}, {"entity": "unveiled"}, {"entity": "referred"}, {"entity":"io"}] dsacbasdkjcbskòa ')

[{'entity': 'diagnosed'}, {'entity': 'unveiled'}, {'entity': 'referred'}, {'entity': 'io'}]


'[{"entity": "diagnosed"}, {"entity": "unveiled"}, {"entity": "referred"}, {"entity":"io"}]'