In [5]:
from dotenv import dotenv_values
from datasets import load_dataset, Dataset
from utils.data_preprocessor import DataPreprocessorTag
from utils.test_data_processor import TestDataProcessor
from utils.generate_ft_adapters_list import generate_ft_adapters_list
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import gc
from peft import PeftModel
from tqdm import tqdm

from config import postprocessing_params_llama as postprocessing
from log import tag_llama7B_NoQuant as models_params
adapters_list = generate_ft_adapters_list("tag_llama7B_NoQuant", simplest_prompt=models_params.simplest_prompt)
print(adapters_list)
HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']
LLAMA_TOKEN = dotenv_values(".env.base")['LLAMA_TOKEN']

max_new_tokens_factor_list = postprocessing.max_new_tokens_factor_list
n_shots_inference_list = postprocessing.n_shots_inference_list
layer = models_params.TRAIN_LAYER
language = layer.split('.')[0]

dataset = load_dataset("ferrazzipietro/e3c-sentences", token=HF_TOKEN)
dataset = dataset[layer]
tokenizer = AutoTokenizer.from_pretrained(models_params.BASE_MODEL_CHECKPOINT, add_eos_token=False,
                                         token=LLAMA_TOKEN)


preprocessor = DataPreprocessorTag(models_params.BASE_MODEL_CHECKPOINT, 
                                     tokenizer, token_llama=HF_TOKEN, 
                                     data =dataset, 
                                     tagging_string=models_params.tagging_string)
preprocessor.apply(instruction_on_response_format=models_params.instruction_on_response_format)
dataset = preprocessor.data.map(lambda samples: tokenizer(samples[models_params.dataset_text_field]), batched=True)
train_data, val_data, test_data = preprocessor.split_layer_into_train_val_test_(dataset, models_params.TRAIN_LAYER)

['ferrazzipietro/llama-2-7b-chat-hf_adapters_en.layer1_NoQuant_TAG_3EPOCHS_16_32_0.01_2_0.0002', 'ferrazzipietro/llama-2-7b-chat-hf_adapters_en.layer1_NoQuant_TAG_3EPOCHS_16_64_0.01_2_0.0002', 'ferrazzipietro/llama-2-7b-chat-hf_adapters_en.layer1_NoQuant_TAG_3EPOCHS_32_32_0.01_2_0.0002', 'ferrazzipietro/llama-2-7b-chat-hf_adapters_en.layer1_NoQuant_TAG_3EPOCHS_32_64_0.01_2_0.0002']
MODEL TYPE: llama


Map:   0%|          | 0/1520 [00:00<?, ? examples/s]

Map:   0%|          | 0/1520 [00:00<?, ? examples/s]

Map:   0%|          | 0/1520 [00:00<?, ? examples/s]

Map:   0%|          | 0/170 [00:00<?, ? examples/s]

In [8]:
bnb_config = BitsAndBytesConfig(
            load_in_4bit=True
            )
base_model = AutoModelForCausalLM.from_pretrained(
    models_params.BASE_MODEL_CHECKPOINT, low_cpu_mem_usage=True,
    quantization_config = bnb_config,
    return_dict=True,  
    device_map= "auto",
    token=LLAMA_TOKEN,
    cache_dir='/data/disk1/share/pferrazzi/.cache')
adapters = adapters_list[0]
merged_model = PeftModel.from_pretrained(base_model, adapters, token=HF_TOKEN, 
                                         device_map='auto',
                                         is_trainable = False)
tokenizer = AutoTokenizer.from_pretrained(models_params.BASE_MODEL_CHECKPOINT, 
                                          add_eos_token=False,
                                          token=LLAMA_TOKEN)
tokenizer.pad_token = tokenizer.eos_token# "<pad>" #tokenizer.eos_token
tokenizer.padding_side = "left"

postprocessor = TestDataProcessor(test_data=val_data.select(range(8)), 
                                  preprocessor=preprocessor, 
                                  n_shots_inference=0, 
                                  language=language, 
                                  tokenizer=tokenizer)
postprocessor.add_inference_prompt_column(simplest_prompt=False)


postprocessor.add_ground_truth_column()
            #try:
postprocessor.add_responses_column(model=merged_model, 
                                tokenizer=tokenizer, 
                                batch_size=4, 
                                max_new_tokens_factor=6)
# postprocessor.test_data.to_csv(f"{postprocessing.save_directory}maxNewTokensFactor{6}_nShotsInference{0}_{adapters.split('/')[1]}.csv", index=False)





Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_config.json:   0%|          | 0.00/705 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/49.1M [00:00<?, ?B/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

generating responses: 100%|██████████| 8/8 [02:40<00:00, 20.09s/it]


In [12]:
postprocessor.test_data[2]

{'sentence': 'Imaging and cytological findings pointed toward a likely primary right parotid malignancy with liver metastases.',
 'entities': [{'id': '1764',
   'offsets': [24, 32],
   'role': '',
   'semantic_type_id': '',
   'text': 'findings',
   'type': 'EVENT'},
  {'id': '1779',
   'offsets': [33, 40],
   'role': '',
   'semantic_type_id': '',
   'text': 'pointed',
   'type': 'EVENT'},
  {'id': '2010',
   'offsets': [57, 89],
   'role': '',
   'semantic_type_id': 'C1306459',
   'text': 'primary right parotid malignancy',
   'type': 'CLINENTITY'},
  {'id': '2017',
   'offsets': [95, 111],
   'role': '',
   'semantic_type_id': 'C0494165',
   'text': 'liver metastases',
   'type': 'CLINENTITY'},
  {'id': '2052',
   'offsets': [57, 89],
   'role': '',
   'semantic_type_id': '',
   'text': 'primary right parotid malignancy',
   'type': 'BODYPART'},
  {'id': '2058',
   'offsets': [95, 111],
   'role': '',
   'semantic_type_id': '',
   'text': 'liver metastases',
   'type': 'BODYPART'}],

In [13]:

postprocessor2 = TestDataProcessor(test_data=val_data.select(range(4)), 
                                  preprocessor=preprocessor, 
                                  n_shots_inference=0, 
                                  language=language, 
                                  tokenizer=tokenizer)
postprocessor2.add_inference_prompt_column(simplest_prompt=False)


postprocessor2.add_ground_truth_column()
            #try:
postprocessor2.add_responses_column(model=merged_model, 
                                tokenizer=tokenizer, 
                                batch_size=2, 
                                max_new_tokens_factor=10)

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

generating responses: 100%|██████████| 4/4 [02:41<00:00, 40.31s/it]
