In [1]:
from datasets import Dataset
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline
from peft import PeftModel
import torch
import pandas as pd
import time
import re
from trl import setup_chat_format

  from .autonotebook import tqdm as notebook_tqdm
2024-11-04 18:15:11.413980: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-04 18:15:11.428422: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-04 18:15:11.432643: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-04 18:15:11.445139: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## 1.Load model and tokenizer

In [2]:
version = "Llama 3.1"

In [3]:

# base_model = "meta-llama/Llama-3.1-70B-Instruct"
# fine_tuned_model = "andrealopez/Llama-3.1-70B-Instruct-Pima-Diabetes-Clasification"

base_model = "meta-llama/Llama-3.1-8B-Instruct"
fine_tuned_model = "andrealopez/Llama-3.1-8B-Instruct-Pima-Diabetes-Clasification"

# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model)

base_model_reload = AutoModelForCausalLM.from_pretrained(
        base_model,
        return_dict=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
)

# Merge adapter with base model
# TODO: review si hace falta o no. 
if version == "Llama 3.2":
    base_model_reload, tokenizer = setup_chat_format(base_model_reload, tokenizer)

model = PeftModel.from_pretrained(base_model_reload, fine_tuned_model)
model = model.merge_and_unload()

Loading checkpoint shards: 100%|██████████| 4/4 [00:07<00:00,  1.85s/it]


## 2.Load and serialize data

In [None]:
def serialize_data(row, version):
    features_text = " ".join([f"The {col} is {str(row[col])}." for col in feature_columns])
    if version == "Llama 3.1":
        return f"Health values: {features_text}.\nOutcome: {int(row[target_column])}".strip()
    else:
        return f"Health values: {features_text}".strip()

# Función para eliminar el valor del Outcome (específica de llama 3.1)
def delete_label_value(row): 
    return re.sub(r'Outcome: \d.', 'Outcome:', row)

# Función para crear el prompt o mensajes de few-shot
def few_shot_prompt(df_shots, version):
    if version == "Llama 3.1":
        return "\n".join([row['serialized_row'] for _, row in df_shots.iterrows()])
    else:
        instruction += " Here are some examples.\n"
        messages = [{"role": "system", "content": instruction}]
        for _, row in df_shots.iterrows():
            serialized_row = row['serialized_row']
            messages.extend([
                {"role": "user", "content": serialized_row},
                {"role": "assistant", "content": f"Outcome: {row[target_column]}"}
            ])
        return messages

# Cargar y preparar el conjunto de datos
test_dataset = pd.read_csv('./PIMA_dataset/test_data.csv')
print("Test dataset shape:", test_dataset.shape)

# Definir la columna de destino y las columnas de características
target_column = "Outcome"
feature_columns = [col for col in test_dataset.columns if col != target_column]

# Serializar los datos
test_dataset['serialized_row'] = test_dataset.apply(lambda row: serialize_data(row, version), axis=1)

# Instrucción común
instruction = """You are a doctor specialised in classifying patients as diabetic or non-diabetic based on their health values. Instruction: Respond only with '0' for non-diabetic or '1' for diabetic. Use the following output format: 'Outcome: 0'."""

# Configuración de few-shot
few_shot = True
k_shots = 6
if few_shot:
    df_shots = test_dataset.sample(n=k_shots, random_state=42)
    test_dataset = test_dataset.drop(df_shots.index)
    
    if version == "Llama 3.1":
        instruction = instruction + " Here are some examples.\n" + few_shot_prompt(df_shots, version) + f"\nPredict the {target_column} of the next patient.\n"
    else:
        base_messages = few_shot_prompt(df_shots, version)
else:
    instruction += f" Predict the {target_column} of the next patient."
    if version == "Llama 3.2":
        base_messages = [{"role": "system", "content": instruction}]

# Convertir a un objeto Dataset de HuggingFace
serialized_test_data = Dataset.from_pandas(test_dataset[["serialized_row", "Outcome"]])

Test dataset shape: (154, 9)


### 3.Inference

In [None]:
real_values = []
output_values = []
inference_times = []
iterations_to_fix = []

if version == "Llama 3.1": 
    # Inference pipeline
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        device_map="auto",
        max_new_tokens=10
    )

    for i, row in test_dataset.iterrows():
        serialized_instance = delete_label_value(row.serialized_row)
        # Create prompt
        prompt = instruction + serialized_instance

        # Clasificate sample
        start_time = time.time()
        result = pipe(prompt)
        end_time = time.time()
        inference_time = end_time - start_time
        inference_times.append(inference_time)
        print(inference_time)

        # Answer
        answer = result[0]['generated_text'].strip()

        # Postprocessing to check that is the outcome of the tample
        pattern = rf"{re.escape(serialized_instance)}\s*['\"]?(\d)['\"]?"
        # Buscar el Outcome predicho
        match = re.search(pattern, answer, re.DOTALL)
        if match:
            predicted_outcome = match.group(1).strip()  # Obtener todo el contenido después y eliminar espacios en blanco
            if int(predicted_outcome) not in [0,1]: 
                print("Outcome not in [0,1]: ", predicted_outcome)
                iterations_to_fix.append(i)
            # Solo guardo los que están bien predichos TODO: cambiar esto? tener todos los resultados
            else: 
                output_values.append(int(predicted_outcome))
                real_values.append(row.Outcome)
                
        else:
            print("Not sample match founded.")
            print(answer)
            iterations_to_fix.append(i)
    

In [None]:
if version == "Llama 3.2": 
    for instance in serialized_test_data:
        serialized_row = instance["serialized_row"]
        messages = base_messages.copy()

        print("Creating prompt...")
        messages.extend([
            {"role": "user", "content": serialized_row}
        ])
        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

        print("Inferring...")
        start_time = time.time()
        outputs = model.generate(**inputs, max_new_tokens=10, num_return_sequences=1)
        end_time = time.time()
        inference_time = end_time - start_time
        inference_times.append(inference_time)
        print(inference_time)
        
        print("Decoding...")
        text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Postprocessing
        pattern = r"assistant\s+Outcome:\s*([01])"
        # Buscar todas las coincidencias
        matches = re.findall(pattern, text)
        if matches:
            # Obtener el último resultado
            predicted_outcome = matches[-1]
            if int(predicted_outcome) not in [0,1]: 
                print("Outcome not in [0,1]: ", predicted_outcome)
                print(text)
            # Solo guardo los que están bien predichos TODO: cambiar esto? tener todos los resultados
            else: 
                output_values.append(int(predicted_outcome))
                real_values.append(instance["Outcome"])
        else:
            print("Not sample match founded.")
            print(text)