In [None]:
from dotenv import dotenv_values
from datasets import load_dataset, Dataset
from utils.data_preprocessor import DataPreprocessor
from utils.evaluator import Evaluator
from config.finetuning import config
from config import postprocessing_params_mistral as postprocessing
from utils.test_data_processor import TestDataProcessor
import pandas as pd
from log import mistral_8bits as models_params
from utils.generate_ft_adapters_list import generate_ft_adapters_list
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import gc
from peft import PeftModel
from tqdm import tqdm

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']

max_new_tokens_factor_list = []#postprocessing.max_new_tokens_factor_list
n_shots_inference_list = [2]#postprocessing.n_shots_inference_list
layer = models_params.TRAIN_LAYER
language = layer.split('.')[0]


dataset = load_dataset("ferrazzipietro/e3c-sentences", token=HF_TOKEN)
dataset = dataset[layer]
preprocessor = DataPreprocessor(model_checkpoint=models_params.BASE_MODEL_CHECKPOINT, 
                                tokenizer = models_params.BASE_MODEL_CHECKPOINT)
dataset = preprocessor.preprocess_data_one_layer(dataset,
                                                 simplest_prompt=models_params.simplest_prompt)
_, val_data, _ = preprocessor.split_layer_into_train_val_test_(dataset, layer)

load_in_8bit = not models_params.load_in_4bit
bnb_config = BitsAndBytesConfig(
            load_in_4bit = models_params.load_in_4bit[0],
            load_in_8bit = load_in_8bit,
            bnb_4bit_use_double_quant = models_params.bnb_4bit_use_double_quant,
            bnb_4bit_quant_type = models_params.bnb_4bit_quant_type[0],
            bnb_4bit_compute_dtype = models_params.bnb_4bit_compute_dtype[0],
            llm_int8_threshold = models_params.llm_int8_threshold[0],
            llm_int8_has_fp16_weight = models_params.llm_int8_has_fp16_weight,
            llm_int8_skip_modules = models_params.llm_int8_skip_modules
            )


adapters_list = generate_ft_adapters_list("mistral_8bits", simplest_prompt=models_params.simplest_prompt)
adapters_list = [adapters_list[0]]
for max_new_tokens_factor in max_new_tokens_factor_list:
    for n_shots_inference in n_shots_inference_list:
        for adapters in tqdm(adapters_list, desc="adapters_list"):
            print("PROCESSING:", adapters)
            base_model = AutoModelForCausalLM.from_pretrained(
                models_params.BASE_MODEL_CHECKPOINT, low_cpu_mem_usage=True,
                quantization_config = bnb_config,
                return_dict=True,  
                #torch_dtype=torch.float16,
                device_map= "auto")
            merged_model = PeftModel.from_pretrained(base_model, adapters, token=HF_TOKEN, device_map='auto')
            tokenizer = AutoTokenizer.from_pretrained(models_params.BASE_MODEL_CHECKPOINT, add_eos_token=True)
            tokenizer.pad_token = tokenizer.eos_token
            tokenizer.padding_side = "left"

            # merged_model, tokenizer = load_mergedModel_tokenizer(adapters, base_model)
            postprocessor = TestDataProcessor(test_data=val_data, 
                                              preprocessor=preprocessor, 
                                              n_shots_inference=n_shots_inference, 
                                              language=language, 
                                              tokenizer=tokenizer)
            postprocessor.add_inference_prompt_column(simplest_prompt=models_params.simplest_prompt)
            postprocessor.add_ground_truth_column()
            try:
                postprocessor.add_responses_column(model=merged_model, 
                                                tokenizer=tokenizer, 
                                                batch_size=36, 
                                                max_new_tokens_factor=max_new_tokens_factor)
                postprocessor.test_data.to_csv(f"{postprocessing.save_directory}maxNewTokensFactor{max_new_tokens_factor}_nShotsInference{n_shots_inference}_{adapters.split('/')[1]}.csv", index=False)
            except Exception as e:
                print("ERROR IN PROCESSING: ", Exception, adapters)
            del merged_model
            del base_model
            del tokenizer
            gc.collect()
            torch.cuda.empty_cache()



In [2]:
from dotenv import dotenv_values
from datasets import load_dataset, Dataset
from utils.data_preprocessor import DataPreprocessor
from utils.evaluator import Evaluator
from config.finetuning import config
from utils.load_merged_model_tokenizer import load_mergedModel_tokenizer
from config import postprocessing
from utils.test_data_processor import TestDataProcessor
import pandas as pd
from log import enlayer1_3epochs_4bits__ft_params as models_params
from utils.generate_ft_adapters_list import generate_ft_adapters_list
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import gc
from peft import PeftModel
from tqdm import tqdm

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']

max_new_tokens_factor_list = postprocessing.max_new_tokens_factor_list
n_shots_inference_list = postprocessing.n_shots_inference_list
layer = models_params.TRAIN_LAYER
language = layer.split('.')[0]


dataset = load_dataset("ferrazzipietro/e3c-sentences", token=HF_TOKEN)
dataset = dataset[layer]
preprocessor = DataPreprocessor(config.BASE_MODEL_CHECKPOINT, config.BASE_MODEL_CHECKPOINT)
dataset = preprocessor.preprocess_data_one_layer(dataset, instruction_on_response_format='Return the result in a json format: [{"entity":"entity_name"}].')
_, val_data, _ = preprocessor.split_layer_into_train_val_test_(dataset, layer)

# bnb_config = BitsAndBytesConfig(
#             load_in_4bit=True,
#             bnb_4bit_use_double_quant=True,
#             bnb_4bit_quant_type="nf4",
#             bnb_4bit_compute_dtype=torch.bfloat16)

bnb_config = BitsAndBytesConfig(
            load_in_4bit=False,
            load_in_8bit=True,
            #bnb_4bit_use_double_quant=True,
            #bnb_4bit_quant_type="nf4",
            #bnb_4bit_compute_dtype=torch.bfloat16,
            llm_int8_threshold= 6.0,
            llm_int8_skip_modules= ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"],
            )


adapters_list = generate_ft_adapters_list("enlayer1_3epochs_4bits__ft_params")



Map: 100%|██████████| 1520/1520 [00:00<00:00, 3633.82 examples/s]
Map: 100%|██████████| 170/170 [00:00<00:00, 6718.22 examples/s]


### LLAMA

In [1]:
from dotenv import dotenv_values
from datasets import load_dataset, Dataset
from utils.data_preprocessor import DataPreprocessor
from utils.evaluator import Evaluator
from config.finetuning_llama2 import config
from utils.load_merged_model_tokenizer import load_mergedModel_tokenizer
from config import postprocessing
from utils.test_data_processor import TestDataProcessor
import pandas as pd
from log import enlayer1_3epochs_8bits__ft_params_llama as models_params
from utils.generate_ft_adapters_list import generate_ft_adapters_list
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import gc
from peft import PeftModel
from tqdm import tqdm

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']
LLAMA_TOKEN = dotenv_values(".env.base")['LLAMA_TOKEN']

max_new_tokens_factor_list = postprocessing.max_new_tokens_factor_list
n_shots_inference_list = postprocessing.n_shots_inference_list
layer = models_params.TRAIN_LAYER
language = layer.split('.')[0]


dataset = load_dataset("ferrazzipietro/e3c-sentences", token=HF_TOKEN)
dataset = dataset[layer]
preprocessor = DataPreprocessor()
dataset = preprocessor.preprocess_data_one_layer(dataset)
_, val_data, _ = preprocessor.split_layer_into_train_val_test_(dataset, layer)

bnb_config = BitsAndBytesConfig(
            load_in_4bit=False,
            load_in_8bit=True,
            #bnb_4bit_use_double_quant=True,
            #bnb_4bit_quant_type="nf4",
            #bnb_4bit_compute_dtype=torch.bfloat16,
            llm_int8_threshold= 6.0,
            load_in_8bit_fp32_cpu_offload=True,
            llm_int8_skip_modules= ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"],
            )


adapters_list = generate_ft_adapters_list("enlayer1_3epochs_8bits__ft_params_llama")


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
adapters = "ferrazzipietro/Llama-2-7b-chat-hf_adapters_en.layer1_8_torch.bfloat16_16_32_0.05_4_0.0002"
merged_model, tokenizer = load_mergedModel_tokenizer(adapters, "meta-llama/Llama-2-7b-chat-hf", llama_key=LLAMA_TOKEN)

postprocessor = TestDataProcessor(test_data=val_data.select(range(48)), preprocessor=preprocessor, n_shots_inference=2, language='en', tokenizer=tokenizer)
postprocessor.add_inference_prompt_column()
postprocessor.add_ground_truth_column()
postprocessor.add_responses_column(model=merged_model, tokenizer=tokenizer, batch_size=12, max_new_tokens_factor=5)
postprocessor.test_data.to_csv(f"data/test_data_processed/en_nShots{2}_maxNewTokensFactor{5}.csv", index=False)

In [3]:
postprocessor.test_data['model_responses']
postprocessor.test_data.to_csv(f"data/test_data_processed/en_nShots{2}_maxNewTokensFactor{5}.csv", index=False)

Creating CSV from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 67.60ba/s]


183295

In [None]:

for max_new_tokens_factor in max_new_tokens_factor_list:
    for n_shots_inference in n_shots_inference_list:
        for adapters in tqdm(adapters_list, desc="adapters_list"):
            print("PROCESSING:", adapters)
            base_model = AutoModelForCausalLM.from_pretrained(
                models_params.BASE_MODEL_CHECKPOINT, low_cpu_mem_usage=True,
                quantization_config = bnb_config,
                return_dict=True,  load_in_4bit=True, 
                token = LLAMA_TOKEN,
                #torch_dtype=torch.float16,
                device_map= "auto")
            merged_model = PeftModel.from_pretrained(base_model, adapters, token=HF_TOKEN, device_map='auto')
            tokenizer = AutoTokenizer.from_pretrained(models_params.BASE_MODEL_CHECKPOINT, add_eos_token=True, token=LLAMA_TOKEN)
            tokenizer.pad_token = tokenizer.eos_token
            tokenizer.padding_side = "left"

            # merged_model, tokenizer = load_mergedModel_tokenizer(adapters, base_model)
            postprocessor = TestDataProcessor(test_data=val_data, 
                                              preprocessor=preprocessor, 
                                              n_shots_inference=n_shots_inference, 
                                              language=language, 
                                              tokenizer=tokenizer)
            postprocessor.add_inference_prompt_column()
            postprocessor.add_ground_truth_column()
            # try:
            postprocessor.add_responses_column(model=merged_model, 
                                            tokenizer=tokenizer, 
                                            batch_size=12, 
                                            max_new_tokens_factor=max_new_tokens_factor)
            postprocessor.test_data.to_csv(f"data/test_data_processed/maxNewTokensFactor{max_new_tokens_factor}_nShotsInference{n_shots_inference}_{adapters.split('/')[1]}.csv", index=False)
            # except Exception as e:
            #     print("ERROR IN PROCESSING: ", Exception, adapters)

            del merged_model
            del base_model
            del tokenizer
            gc.collect()
            torch.cuda.empty_cache()



### ONE RUN MISTRAL

In [3]:
from dotenv import dotenv_values
from datasets import load_dataset, Dataset
from utils.data_preprocessor import DataPreprocessor
from utils.evaluator import Evaluator
from config.finetuning import config
from utils.load_merged_model_tokenizer import load_mergedModel_tokenizer
from config import postprocessing
from utils.test_data_processor import TestDataProcessor
import pandas as pd
from log import enlayer1_3epochs_4bits__ft_params as models_params
from utils.generate_ft_adapters_list import generate_ft_adapters_list
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from utils.output_cleaner import OutputCleaner

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']

max_new_tokens_factor_list = postprocessing.max_new_tokens_factor_list
n_shots_inference_list = postprocessing.n_shots_inference_list
layer = models_params.TRAIN_LAYER
language = layer.split('.')[0]


dataset = load_dataset("ferrazzipietro/e3c-sentences", token=HF_TOKEN)
dataset = dataset[layer]
preprocessor = DataPreprocessor(config.BASE_MODEL_CHECKPOINT, config.BASE_MODEL_CHECKPOINT)
dataset = preprocessor.preprocess_data_one_layer(dataset, instruction_on_response_format='Return the result in a json format: [{"entity":"entity_name"}].')
_, val_data, _ = preprocessor.split_layer_into_train_val_test_(dataset, layer)



bnb_config = BitsAndBytesConfig(
            load_in_4bit=False,
            #load_in_8bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
            # llm_int8_threshold= 6.0,
            # llm_int8_skip_modules= ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"],
            )
base_model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.2", low_cpu_mem_usage=True,
    quantization_config = bnb_config,
    return_dict=True,
    device_map= 'auto')




adapters = "ferrazzipietro/Mistral-7B-Instruct-v0.2_adapters_en.layer1_8_torch.bfloat16_32_32_0.05_2_0.0002"
merged_model = PeftModel.from_pretrained(base_model, adapters, token=HF_TOKEN, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", add_eos_token=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

postprocessor = TestDataProcessor(test_data=val_data.select(range(6)), preprocessor=preprocessor, n_shots_inference=2, language='en', tokenizer=tokenizer)
postprocessor.add_inference_prompt_column()
postprocessor.add_ground_truth_column()
postprocessor.add_responses_column(model=merged_model, tokenizer=tokenizer, batch_size=3, max_new_tokens_factor=5)

output_cleaner = OutputCleaner()
similar_is_equal = False
similar_is_equal_threshold = 100
cleaned_data = output_cleaner.apply_cleaning(postprocessor.test_data, wrong_keys_to_entity=False)

Loading checkpoint shards: 100%|██████████| 3/3 [00:04<00:00,  1.42s/it]
You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.
Map: 100%|██████████| 6/6 [00:00<00:00, 902.03 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 984.96 examples/s]
generating responses:   0%|          | 0/6 [00:00<?, ?it/s]

['<s>[INST] Extract the entities contained in the text. Extract only entities contained in the text.\nReturn the result in a json format: [{"entity":"entity_name"}]. Text: <<<We present a case of a 32-year-old woman with a history of gradual enlargement of the anterior neck.>>> [/INST] [{"entity": "present"}, {"entity": "history"}, {"entity": "enlargement"}] \n[INST] Extract the entities contained in the text. Extract only entities contained in the text.\nReturn the result in a json format: [{"entity":"entity_name"}]. Text: <<<Patient information: a 9-month-old boy presented to the emergency room with a 3-day history of refusal to bear weight on the right lower extremity and febrile peaks of up to 38.5°C for 24 hours.>>> [/INST] [{"entity": "presented"}, {"entity": "refusal"}, {"entity": "bear"}, {"entity": "peaks"}] \n[INST] Extract the entities contained in the text. Extract only entities contained in the text.\nReturn the result in a json format: [{"entity":"entity_name"}]. Text: <<

generating responses:  50%|█████     | 3/6 [00:27<00:27,  9.04s/it]

['<s>[INST] Extract the entities contained in the text. Extract only entities contained in the text.\nReturn the result in a json format: [{"entity":"entity_name"}]. Text: <<<We present a case of a 32-year-old woman with a history of gradual enlargement of the anterior neck.>>> [/INST] [{"entity": "present"}, {"entity": "history"}, {"entity": "enlargement"}] \n[INST] Extract the entities contained in the text. Extract only entities contained in the text.\nReturn the result in a json format: [{"entity":"entity_name"}]. Text: <<<Patient information: a 9-month-old boy presented to the emergency room with a 3-day history of refusal to bear weight on the right lower extremity and febrile peaks of up to 38.5°C for 24 hours.>>> [/INST] [{"entity": "presented"}, {"entity": "refusal"}, {"entity": "bear"}, {"entity": "peaks"}] \n[INST] Extract the entities contained in the text. Extract only entities contained in the text.\nReturn the result in a json format: [{"entity":"entity_name"}]. Text: <<

generating responses: 100%|██████████| 6/6 [00:53<00:00,  8.95s/it]
Map: 100%|██████████| 6/6 [00:00<00:00, 912.27 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 819.39 examples/s]

ORIGINAL MODEL OUTPUT:  [{"entity": "present"}, {"entity": "history"}, {"entity": "enlargement"}] 
[INST] Extract the entities contained in the text. Extract only entities contained in the text.
Return the result in a json format: [{"entity":"entity_name"}]. Text: <<<Patient information: a 9-month-old boy presented to the emergency room with a 3-day history of refusal to bear weight on the right lower extremity and febrile peaks of up to 38.5°C for 24 hours.>>> [/INST] [{"entity": "presented"}, {"entity": "refusal"}, {"entity": "bear"}, {"entity": "peaks"}] 
[INST] Extract the entities contained in the text. Extract only entities contained in the text.
Return the result in a json format: [{"entity":"entity_name"}]. Text: <<A 46-year-old man with hypertension and dyslipidemia diagnosed 4-months before, as well as new-onset diabetes mellitus unveiled 1-month earlier, was referred to emergency department for hypokalemia.>>> [/INST] 1-month-onset diabetes mellitus, hypertension, dyslipidem




### ONE RUN MISTAL NO QUANT

In [None]:
from dotenv import dotenv_values
from datasets import load_dataset, Dataset
from utils.data_preprocessor import DataPreprocessor
from utils.evaluator import Evaluator
from config.finetuning import config
from utils.load_merged_model_tokenizer import load_mergedModel_tokenizer
from config import postprocessing
from utils.test_data_processor import TestDataProcessor
import pandas as pd
from log import enlayer1_3epochs_4bits__ft_params as models_params
from utils.generate_ft_adapters_list import generate_ft_adapters_list
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from utils.output_cleaner import OutputCleaner

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']

max_new_tokens_factor_list = postprocessing.max_new_tokens_factor_list
n_shots_inference_list = postprocessing.n_shots_inference_list
layer = models_params.TRAIN_LAYER
language = layer.split('.')[0]


dataset = load_dataset("ferrazzipietro/e3c-sentences", token=HF_TOKEN)
dataset = dataset[layer]
preprocessor = DataPreprocessor(config.BASE_MODEL_CHECKPOINT, config.BASE_MODEL_CHECKPOINT)
dataset = preprocessor.preprocess_data_one_layer(dataset, instruction_on_response_format='Return the result in a json format: [{"entity":"entity_name"}].')
_, val_data, _ = preprocessor.split_layer_into_train_val_test_(dataset, layer)



bnb_config = BitsAndBytesConfig(
            load_in_4bit=False,
            #load_in_8bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
            # llm_int8_threshold= 6.0,
            # llm_int8_skip_modules= ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"],
            )
base_model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.2", low_cpu_mem_usage=True,
    quantization_config = bnb_config,
    return_dict=True,
    device_map= 'auto')


adapters = 'ferrazzipietro/Mistral-7B-Instruct-v0.2__adapters_en.layer1_4_torch.bfloat16_16_32_0.05_2_0.0002'
merged_model = PeftModel.from_pretrained(base_model, adapters, token=HF_TOKEN, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", add_eos_token=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

postprocessor = TestDataProcessor(test_data=val_data.select(range(6)), preprocessor=preprocessor, n_shots_inference=2, language='en', tokenizer=tokenizer)
postprocessor.add_inference_prompt_column()
postprocessor.add_ground_truth_column()
postprocessor.add_responses_column(model=merged_model, tokenizer=tokenizer, batch_size=3, max_new_tokens_factor=5)

output_cleaner = OutputCleaner()
similar_is_equal = False
similar_is_equal_threshold = 100
cleaned_data = output_cleaner.apply_cleaning(postprocessor.test_data, wrong_keys_to_entity=False)

### ONE RUN QWEN

In [2]:
from dotenv import dotenv_values
from datasets import load_dataset, Dataset
from utils.data_preprocessor import DataPreprocessor
from utils.evaluator import Evaluator
from utils.test_data_processor import TestDataProcessor
import pandas as pd
from utils.generate_ft_adapters_list import generate_ft_adapters_list
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from utils.output_cleaner import OutputCleaner
from peft import PeftModel

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']

max_new_tokens_factor_list = [6]
n_shots_inference_list = [0]
layer = 'en.layer1'
language = layer.split('.')[0]

BASE_MODEL_CHECKPOINT = 'Qwen/Qwen1.5-7B-Chat'
dataset = load_dataset("ferrazzipietro/e3c-sentences", token=HF_TOKEN)
dataset = dataset[layer]
preprocessor = DataPreprocessor(BASE_MODEL_CHECKPOINT, BASE_MODEL_CHECKPOINT)
dataset = preprocessor.preprocess_data_one_layer(dataset, instruction_on_response_format='Return the result in a json format: [{"entity":"entity_name"}].')
_, val_data, _ = preprocessor.split_layer_into_train_val_test_(dataset, layer)



bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            #load_in_8bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
            # llm_int8_threshold= 6.0,
            # llm_int8_skip_modules= ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"],
            )
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_CHECKPOINT, low_cpu_mem_usage=True,
    quantization_config = bnb_config,
    return_dict=True,
    device_map= 'auto',
    cache_dir='/data/disk1/share/pferrazzi/.cache')


adapters = 'ferrazzipietro/qwen1.5-7b-chat__adapters_en.layer1_8_torch.bfloat16_16_32_0.01_2_0.0002'
merged_model = PeftModel.from_pretrained(base_model, adapters, token=HF_TOKEN, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_CHECKPOINT, add_eos_token=True)
tokenizer.pad_token = tokenizer.eos_token

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.22s/it]
You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


AttributeError: 'Qwen2TokenizerFast' object has no attribute 'padding_token'

In [3]:
print('padding_token: ', tokenizer.pad_token)
tokenizer.padding_side = "left"

postprocessor = TestDataProcessor(test_data=val_data.select(range(6)), preprocessor=preprocessor, n_shots_inference=2, language='en', tokenizer=tokenizer)
postprocessor.add_inference_prompt_column(simplest_prompt=False)
postprocessor.add_ground_truth_column()
postprocessor.add_responses_column(model=merged_model, tokenizer=tokenizer, batch_size=3, max_new_tokens_factor=5)

output_cleaner = OutputCleaner()
similar_is_equal = False
similar_is_equal_threshold = 100
cleaned_data = output_cleaner.apply_cleaning(postprocessor.test_data, wrong_keys_to_entity=False)

padding_token:  <|im_end|>


Map: 100%|██████████| 6/6 [00:00<00:00, 970.79 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 1093.31 examples/s]
generating responses: 100%|██████████| 6/6 [00:42<00:00,  7.15s/it]
Map: 100%|██████████| 6/6 [00:00<00:00, 824.60 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 948.44 examples/s]


In [9]:
cleaned_data[0]

{'sentence': 'A 46-year-old man with hypertension and dyslipidemia diagnosed 4-months before, as well as new-onset diabetes mellitus unveiled 1-month earlier, was referred to emergency department for hypokalemia.',
 'entities': [{'id': '1614',
   'offsets': [23, 35],
   'role': '',
   'semantic_type_id': '',
   'text': 'hypertension',
   'type': 'EVENT'},
  {'id': '1629',
   'offsets': [40, 52],
   'role': '',
   'semantic_type_id': '',
   'text': 'dyslipidemia',
   'type': 'EVENT'},
  {'id': '1644',
   'offsets': [53, 62],
   'role': '',
   'semantic_type_id': '',
   'text': 'diagnosed',
   'type': 'EVENT'},
  {'id': '1659',
   'offsets': [110, 118],
   'role': '',
   'semantic_type_id': '',
   'text': 'mellitus',
   'type': 'EVENT'},
  {'id': '1674',
   'offsets': [149, 157],
   'role': '',
   'semantic_type_id': '',
   'text': 'referred',
   'type': 'EVENT'},
  {'id': '1689',
   'offsets': [186, 197],
   'role': '',
   'semantic_type_id': '',
   'text': 'hypokalemia',
   'type': 'EV

### LLAMAM one run

In [3]:
from dotenv import dotenv_values
from datasets import load_dataset, Dataset
from utils.data_preprocessor import DataPreprocessor
from utils.evaluator import Evaluator
from config.finetuning import config
from utils.test_data_processor import TestDataProcessor
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from utils.output_cleaner import OutputCleaner
import gc
from peft import PeftModel

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']
LLAMA_TOKEN = dotenv_values(".env.base")['LLAMA_TOKEN']

max_new_tokens_factor_list = [6]
n_shots_inference_list = [0]
layer = 'en.layer1'
language = layer.split('.')[0]


dataset = load_dataset("ferrazzipietro/e3c-sentences", token=HF_TOKEN)
dataset = dataset[layer]
preprocessor = DataPreprocessor(config.BASE_MODEL_CHECKPOINT, config.BASE_MODEL_CHECKPOINT)
dataset = preprocessor.preprocess_data_one_layer(dataset, instruction_on_response_format='Return the result in a json format: [{"entity":"entity_name"}].')
_, val_data, _ = preprocessor.split_layer_into_train_val_test_(dataset, layer)



bnb_config = BitsAndBytesConfig(
            load_in_4bit=False,
            #load_in_8bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
            # llm_int8_threshold= 6.0,
            # llm_int8_skip_modules= ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"],
            )
base_model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-chat-hf", low_cpu_mem_usage=True,
    quantization_config = bnb_config,
    return_dict=True,
    device_map= 'auto',
    token = LLAMA_TOKEN,
    cache_dir='/data/disk1/share/pferrazzi/.cache')


adapters = 'ferrazzipietro/Llama-2-7b-chat-hf_adapters_en.layer1_8_torch.bfloat16_32_32_0.01_2_0.0002'
merged_model = PeftModel.from_pretrained(base_model, adapters, token=HF_TOKEN, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", add_eos_token=True, 
    token = LLAMA_TOKEN,
    cache_dir='/data/disk1/share/pferrazzi/.cache')
print('padding_token: ', tokenizer.pad_token)
tokenizer.pad_token = tokenizer.eos_token
print('padding_token: ', tokenizer.pad_token)

tokenizer.padding_side = "left"

Loading checkpoint shards: 100%|██████████| 2/2 [00:57<00:00, 28.92s/it]
You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.


padding_token:  None
padding_token:  </s>


In [None]:

postprocessor = TestDataProcessor(test_data=val_data.select(range(6)), preprocessor=preprocessor, n_shots_inference=2, language='en', tokenizer=tokenizer)
postprocessor.add_inference_prompt_column()
postprocessor.add_ground_truth_column()
postprocessor.add_responses_column(model=merged_model, tokenizer=tokenizer, batch_size=3, max_new_tokens_factor=5)

output_cleaner = OutputCleaner()
similar_is_equal = False
similar_is_equal_threshold = 100
cleaned_data = output_cleaner.apply_cleaning(postprocessor.test_data, wrong_keys_to_entity=False)

## LLAMA 7B 8bit

In [3]:
from dotenv import dotenv_values
from datasets import load_dataset, Dataset
from utils.data_preprocessor import DataPreprocessor
from utils.evaluator import Evaluator
from config import postprocessing
from utils.test_data_processor import TestDataProcessor
import pandas as pd
from log import enlayer1_3epochs_8bits__ft_params_llama as models_params
from utils.generate_ft_adapters_list import generate_ft_adapters_list
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import gc
from peft import PeftModel
from tqdm import tqdm

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']

max_new_tokens_factor_list = postprocessing.max_new_tokens_factor_list
n_shots_inference_list = postprocessing.n_shots_inference_list
layer = models_params.TRAIN_LAYER
language = layer.split('.')[0]


dataset = load_dataset("ferrazzipietro/e3c-sentences", token=HF_TOKEN)
dataset = dataset[layer]
preprocessor = DataPreprocessor(model_checkpoint=models_params.BASE_MODEL_CHECKPOINT, 
                                tokenizer = models_params.BASE_MODEL_CHECKPOINT)
dataset = preprocessor.preprocess_data_one_layer(dataset,models_params.instruction_on_response_format)
_, val_data, _ = preprocessor.split_layer_into_train_val_test_(dataset, layer)

bnb_config = BitsAndBytesConfig(
            load_in_4bit=False,
            load_in_8bit=True,
            #bnb_4bit_use_double_quant=True,
            #bnb_4bit_quant_type="nf4",
            #bnb_4bit_compute_dtype=torch.bfloat16,
            llm_int8_threshold= 6.0,
            llm_int8_has_fp16_weight = False,
            llm_int8_skip_modules= ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"],
            )


adapters_list = generate_ft_adapters_list("enlayer1_3epochs_8bits__ft_params_llama")


max_new_tokens_factor_list = [2]
n_shots_inference_list = [0]
val_data = val_data.select(range(12))


  from .autonotebook import tqdm as notebook_tqdm


In [4]:

for max_new_tokens_factor in max_new_tokens_factor_list:
    for n_shots_inference in n_shots_inference_list:
        for adapters in tqdm(adapters_list, desc="adapters_list"):
            print("PROCESSING:", adapters)
            base_model = AutoModelForCausalLM.from_pretrained(
                models_params.BASE_MODEL_CHECKPOINT, low_cpu_mem_usage=True,
                quantization_config = bnb_config,
                return_dict=True, 
                #torch_dtype=torch.float16,
                device_map= "auto")
            merged_model = PeftModel.from_pretrained(base_model, adapters, token=HF_TOKEN, device_map='auto')
            tokenizer = AutoTokenizer.from_pretrained(models_params.BASE_MODEL_CHECKPOINT, add_eos_token=False)
            tokenizer.pad_token = tokenizer.unk_token
            tokenizer.padding_side = "left"

            # merged_model, tokenizer = load_mergedModel_tokenizer(adapters, base_model)
            postprocessor = TestDataProcessor(test_data=val_data, 
                                              preprocessor=preprocessor, 
                                              n_shots_inference=n_shots_inference, 
                                              language=language, 
                                              tokenizer=tokenizer)
            postprocessor.add_inference_prompt_column()
            postprocessor.add_ground_truth_column()
            #try:
            postprocessor.add_responses_column(model=merged_model, 
                                            tokenizer=tokenizer, 
                                            batch_size=6, 
                                            max_new_tokens_factor=max_new_tokens_factor)
            postprocessor.test_data.to_csv(f"data/test_data_processed/maxNewTokensFactor{max_new_tokens_factor}_nShotsInference{n_shots_inference}_{adapters.split('/')[1]}.csv", index=False)
            # except Exception as e:
            #     print("ERROR IN PROCESSING: ", Exception, adapters)
            del merged_model
            del base_model
            del tokenizer
            gc.collect()
            torch.cuda.empty_cache()



adapters_list:   0%|          | 0/36 [00:00<?, ?it/s]

PROCESSING: ferrazzipietro/Llama-2-7b-chat-hf_adapters_en.layer1_8_torch.bfloat16_16_32_0.05_2_0.0002


Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.13s/it]
Map: 100%|██████████| 12/12 [00:00<00:00, 1604.91 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 1081.22 examples/s]
generating responses: 100%|██████████| 12/12 [01:10<00:00,  5.89s/it]
Creating CSV from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 97.74ba/s]
adapters_list:   3%|▎         | 1/36 [01:16<44:25, 76.17s/it]

PROCESSING: ferrazzipietro/Llama-2-7b-chat-hf_adapters_en.layer1_8_torch.bfloat16_16_32_0.05_2_0.0008


Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.14s/it]
generating responses:   0%|          | 0/12 [00:19<?, ?it/s]
adapters_list:   3%|▎         | 1/36 [01:39<58:18, 99.97s/it]


KeyboardInterrupt: 

In [None]:
adapters = "ferrazzipietro/Llama-2-7b-chat-hf_adapters_en.layer1_8_torch.bfloat16_16_32_0.05_4_0.0002"

postprocessor = TestDataProcessor(test_data=val_data.select(range(48)), preprocessor=preprocessor, n_shots_inference=2, language='en', tokenizer=tokenizer)
postprocessor.add_inference_prompt_column()
postprocessor.add_ground_truth_column()
postprocessor.add_responses_column(model=merged_model, tokenizer=tokenizer, batch_size=12, max_new_tokens_factor=5)
postprocessor.test_data.to_csv(f"data/test_data_processed/en_nShots{2}_maxNewTokensFactor{5}.csv", index=False)

### MISTRAL BASE simplets_prompt one run

In [1]:
from dotenv import dotenv_values
from datasets import load_dataset, Dataset
from utils.data_preprocessor import DataPreprocessor
from utils.evaluator import Evaluator
from config import postprocessing
from utils.test_data_processor import TestDataProcessor
import pandas as pd
from utils.generate_ft_adapters_list import generate_ft_adapters_list
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import gc
from peft import PeftModel
from tqdm import tqdm

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']
adapters = "ferrazzipietro/Mistral-7B-v0.1_simplest_prompt_adapters_en.layer1_8_torch.bfloat16_32_32_0.01_4_0.0002"
model_checkpoint = "mistralai/Mistral-7B-v0.1"

layer = 'en.layer1'
language = layer.split('.')[0]


dataset = load_dataset("ferrazzipietro/e3c-sentences", token=HF_TOKEN)
dataset = dataset[layer]
preprocessor = DataPreprocessor(model_checkpoint=model_checkpoint, 
                                tokenizer = model_checkpoint)
dataset = preprocessor.preprocess_data_one_layer(dataset,
                                                 simplest_prompt=True)
_, val_data, _ = preprocessor.split_layer_into_train_val_test_(dataset, layer)

bnb_config = BitsAndBytesConfig(
            #load_in_4bit=True,
            load_in_8bit=True,
            # bnb_4bit_use_double_quant=True,
            # bnb_4bit_quant_type="nf4",
            # bnb_4bit_compute_dtype=torch.bfloat16,
            llm_int8_threshold= 6.0,
            llm_int8_has_fp16_weight = False,
            llm_int8_skip_modules= ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"],
            )


  from .autonotebook import tqdm as notebook_tqdm
Map: 100%|██████████| 1520/1520 [00:00<00:00, 5247.43 examples/s]
Map: 100%|██████████| 170/170 [00:00<00:00, 6902.40 examples/s]


In [2]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_checkpoint, low_cpu_mem_usage=True,
    quantization_config = bnb_config,
    return_dict=True, 
    #torch_dtype=torch.float16,
    device_map= "auto")
merged_model = PeftModel.from_pretrained(base_model, adapters, token=HF_TOKEN, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_eos_token=False)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.padding_side = "left"
# merged_model, tokenizer = load_mergedModel_tokenizer(adapters, base_model)


Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.98s/it]


In [5]:
postprocessor = TestDataProcessor(test_data=val_data,
                                  preprocessor=preprocessor, 
                                  n_shots_inference=2, 
                                  language=language, 
                                  tokenizer=tokenizer)
postprocessor.add_inference_prompt_column(simplest_prompt=True)
postprocessor.add_ground_truth_column()

Map:   0%|          | 0/681 [00:00<?, ? examples/s]

Map: 100%|██████████| 681/681 [00:00<00:00, 7469.87 examples/s]
Map: 100%|██████████| 681/681 [00:00<00:00, 10676.44 examples/s]


In [6]:
print(postprocessor.test_data['inference_prompt'][0], '\n')
print(postprocessor.test_data['prompt'][0], '\n')

<s>[INST] <<<We present a case of a 32-year-old woman with a history of gradual enlargement of the anterior neck.>>> [/INST] [{"entity": "present"}, {"entity": "history"}, {"entity": "enlargement"}] 
[INST] <<<Patient information: a 9-month-old boy presented to the emergency room with a 3-day history of refusal to bear weight on the right lower extremity and febrile peaks of up to 38.5°C for 24 hours.>>> [/INST] [{"entity": "presented"}, {"entity": "refusal"}, {"entity": "bear"}, {"entity": "peaks"}] 
[INST]A 46-year-old man with hypertension and dyslipidemia diagnosed 4-months before, as well as new-onset diabetes mellitus unveiled 1-month earlier, was referred to emergency department for hypokalemia.[/INST] 

<s>[INST]A 46-year-old man with hypertension and dyslipidemia diagnosed 4-months before, as well as new-onset diabetes mellitus unveiled 1-month earlier, was referred to emergency department for hypokalemia.[/INST][{"entity": "hypertension"}, {"entity": "dyslipidemia"}, {"entity

In [7]:
postprocessor.add_responses_column(model=merged_model, 
                                tokenizer=tokenizer, 
                                batch_size=8, 
                                max_new_tokens_factor=4)
postprocessor.test_data.to_csv(f"data/test_data_processed/maxNewTokensFactor{2}_nShotsInference{0}_{adapters.split('/')[1]}.csv", index=False)
# except Exception as e:
#     print("ERROR IN PROCESSING: ", Exception, adapters)
del merged_model
del base_model
del tokenizer
gc.collect()
torch.cuda.empty_cache()

generating responses: 688it [11:38,  1.01s/it]                         
Creating CSV from Arrow format: 100%|██████████| 1/1 [00:00<00:00,  4.34ba/s]


In [7]:
postprocessor.test_data['model_responses']  

['2 Extract the entities contained in the text. Extract only entities\n    contained in the text.\n    2 Extract the entities contained in the text. Extract only entities contained in the text.</s></s></s></s></s></s></s></s></s></s></s></s></s>',
 '3 Extract the entities contained in the text. Extract only entities contained in the text. Text: <<Hormonal study and dynamic biochemical tests performed indicated ECS.>>>\n\n 1 The text contains two entities: <<study>> and <<',
 '1 Extracted entities: right parotid malignancy with liver metastases 2 The entities contained in the text: primary right parotid malignancy with liver metastases Primary right parotid malignancy with liver metastases Malign',
 '3 Extract entities from ACC text.\n```\n\n{"entity": "parotidectomy"}, {"entity": "examination"}, {"entity": "confirmed"}, {"entity": "ACC"}] 2\n\n6 {"entity": "par',
 '1. Entities of the text in the DBpedia. 2. The entities only contained in the text. 3. Exactly those only contained in the

### MISTRAL INSTRUCT simplest_prompt

In [1]:
from dotenv import dotenv_values
from datasets import load_dataset, Dataset
from utils.data_preprocessor import DataPreprocessor
from utils.evaluator import Evaluator
from config import postprocessing
from utils.test_data_processor import TestDataProcessor
import pandas as pd
from utils.generate_ft_adapters_list import generate_ft_adapters_list
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import gc
from peft import PeftModel
from tqdm import tqdm

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']
adapters = 'ferrazzipietro/Mistral-7B-v0.1_simplest_prompt_adapters_en.layer1_4_torch.bfloat16_32_32_0.01_4_0.0002'
model_checkpoint = "mistralai/Mistral-7B-Instruct-v0.2"

layer = 'en.layer1'
language = layer.split('.')[0]


dataset = load_dataset("ferrazzipietro/e3c-sentences", token=HF_TOKEN)
dataset = dataset[layer]
preprocessor = DataPreprocessor(model_checkpoint=model_checkpoint, 
                                tokenizer = model_checkpoint)
dataset = preprocessor.preprocess_data_one_layer(dataset,
                                                 instruction_on_response_format='Return the result in a json format: [{"entity":"entity_name"}].',
                                                 simplest_prompt=True)
_, val_data, _ = preprocessor.split_layer_into_train_val_test_(dataset, layer)

bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            #load_in_8bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
            # llm_int8_threshold= 6.0,
            # llm_int8_has_fp16_weight = False,
            # llm_int8_skip_modules= ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"],
            )



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_checkpoint, low_cpu_mem_usage=True,
    quantization_config = bnb_config,
    return_dict=True, 
    #torch_dtype=torch.float16,
    device_map= "auto")
merged_model = PeftModel.from_pretrained(base_model, adapters, token=HF_TOKEN, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_eos_token=False)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.padding_side = "left"


Loading checkpoint shards: 100%|██████████| 3/3 [00:04<00:00,  1.40s/it]
You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.


In [6]:
postprocessor = TestDataProcessor(test_data=val_data,
                                  preprocessor=preprocessor, 
                                  n_shots_inference=0, 
                                  language=language, 
                                  tokenizer=tokenizer)
postprocessor.add_inference_prompt_column()
postprocessor.add_ground_truth_column()
#try:
postprocessor.add_responses_column(model=merged_model, 
                                tokenizer=tokenizer, 
                                batch_size=32, 
                                max_new_tokens_factor=2)
postprocessor.test_data.to_csv(f"data/test_data_processed/maxNewTokensFactor{6}_nShotsInference{0}_{adapters.split('/')[1]}.csv", index=False)
# except Exception as e:
#     print("ERROR IN PROCESSING: ", Exception, adapters)
del merged_model
del base_model
del tokenizer
gc.collect()
torch.cuda.empty_cache()

generating responses:   0%|          | 0/681 [00:00<?, ?it/s]

generating responses: 704it [06:34,  1.78it/s]                         
Creating CSV from Arrow format: 100%|██████████| 1/1 [00:00<00:00,  4.74ba/s]


In [7]:
postprocessor.test_data.to_csv(f"data/test_data_processed/maxNewTokensFactor{6}_nShotsInference{0}_{adapters.split('/')[1]}.csv", index=False)


Creating CSV from Arrow format: 100%|██████████| 1/1 [00:00<00:00,  5.10ba/s]


2928976

In [34]:
postprocessor.test_data['model_responses']

['[{"entity": "46-year-old man"}, {"entity": "hypertension"}, {"entity": "dysl',
 '[{"entity": "study"}, {"entity": "tests"}, {"entity": "indicated"}, {"entity": "ECS"',
 '[{"entity": "findings"}, {"entity": "malignancy"}, {"entity": "right parotid malignancy"},',
 '[{"entity": "parotidectomy"}, {"entity": "examination"}, {"entity": "confirmed"}, {"entity":',
 '[{"entity": "hypercortisolism"}, {"entity": " managed"}, {"entity": "metyrapone"}, {"',
 '[{"entity": "50-years-old woman"}, {"entity": "hypertensive"}, {"entity": "hospital',
 '[{"entity": "MNG"}, {"entity": "her mother"}, {"entity": "sisters"}, {"entity": "c',
 '[{"entity": "signs of cervical compression"}, {"entity": "respiratory signs"}, {"entity": "digest',
 '[{"entity": "thyroid dysfunction"}]</s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s>',
 '[{"entity": "mass"}, {"entity": "took"}, {"entity": "the front and the two sides of the neck"',
 '[{"entity": "surface"}, {"entity": "embossed"}, {"entity": "cov

In [28]:
postprocessor.test_data['model_responses']  

['[{"entity": "diagnosed"}, {"entity": "unveiled"}, {"entity": "referred"}, {"entity": "',
 '[{"entity": "study"}, {"entity": "tests"}, {"entity": "indicated"}, {"entity": "ECS"',
 '[{"entity": "findings"}, {"entity": "cavity"}, {"entity": "malignancy"}, {"entity": "met',
 '[{"entity": "parotidectomy"}, {"entity": "examination"}, {"entity": "confirmed"}, {"entity":',
 '][{"entity": "hypercortisolism"}, {"entity": "managed"}] ---------- ------------ The entities contained in the',
 '[{"entity": "hypertensive"}, {"entity": "hospitalized"}, {"entity": "mass"}, {"entity": "appe',
 '][{"entity": "history"}, {"entity": "surgery"}, {"entity": "MNG"}, {"entity": "her mother"},',
 '][{"entity": "compression"}, {"entity": "signs"}, {"entity": "cervical compression"}, {"entity": "',
 '[{"entity": "dysfunction"}, {"entity": "thyroid dysfunction"}] \n[{"entity": "she"',
 '[{"entity": "The mass"}, {"entity": "the neck"}] ][]][//][{"entity": "The',
 '[{"entity": "covered"}, {"entity": "surface"}, {"e

In [17]:
postprocessor.test_data['model_responses']  

['1. diagnosed: diagnose\n 2. mellitus: diabetes mellitus\n 3. hypertension',
 '{"Text": "study", "study": "Hormonal study"}][{"Entity": "study"}, {"Entity":',
 '1. findings\n 2. pointed\n 3. malignancy\n 4. metastases\n 5. liver',
 'Entities: "parotidectomy", "examination", "confirmed", "ACC", "The patient"\n Types:',
 'Entities: hypercortisolism, managed, metyrapone, ketoconazole, lanreotide,',
 '1. hypertensive \n2. hospitalized\n3. cervical mass\n4. appeared\n5. a',
 '{"family history", "surgery", "MNG", "mother", "sisters", "cousins"} ]]]>',
 '1. signs of cervical compression\n 2. signs of respiratory compression\n 3. signs of digestive compression',
 "{'She': 'PERSON'} [{'thyroid dysfunction': 'disorder'}]</s></s></s></s></s></s></s></s></s>",
 '{"The mass"} ]</s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s>',
 '{"surface": "embossed and covered by a thin normal skin"}][{"surface": "covered by a thin normal skin"}, {"',
 'Entities: {"Som