In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
import torch
import pandas as pd
import time
import re

  from .autonotebook import tqdm as notebook_tqdm
2024-11-05 11:49:20.467941: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-05 11:49:20.481204: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-05 11:49:20.485249: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-05 11:49:20.507335: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## 1.Load model and tokenizer

In [2]:
base_model = "meta-llama/Llama-3.1-70B-Instruct"
fine_tuned_model = "andrealopez/Llama-3.1-70B-Instruct-Pima-Diabetes-Clasification"

# base_model = "meta-llama/Llama-3.1-70B-Instruct"
# fine_tuned_model = "andrealopez/Llama-3.1-70B-Instruct-Pima-Diabetes-Clasification"

# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model)

base_model_reload = AutoModelForCausalLM.from_pretrained(
        base_model,
        return_dict=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
)

Loading checkpoint shards: 100%|██████████| 30/30 [00:43<00:00,  1.45s/it]


In [3]:
# Merge adapter with base model
model = PeftModel.from_pretrained(base_model_reload, fine_tuned_model)
model = model.merge_and_unload()

## 2.Load and serialize data

In [10]:
# PIMA
test_dataset = pd.read_csv('./PIMA_dataset/test_data.csv')
print("Test dataset shape",test_dataset.shape)

# Serialize data
target_column = "Outcome"

instruction = f"""You are a doctor specialised in classifying patients as diabetic or non-diabetic based on their health values. Instruction: Respond only with '0' for non-diabetic or '1' for diabetic. Use the following output format: 'Outcome: 0'. \nPredict the {target_column} of the next patient.\n"""
few_shot_instruction = f"""You are a doctor specialised in classifying patients as diabetic or non-diabetic based on their health values. Instruction: Respond only with '0' for non-diabetic or '1' for diabetic. Use the following output format: 'Outcome: 0'. Here are some examples.\n"""

Test dataset shape (154, 9)


In [11]:
def serialize_data(row):
    features_text = " ".join([
        f"The {col} is {str(row[col])}." for col in feature_columns
    ])
    # PIMA prompt
    prompt = f"""Health values: {features_text}.
    Outcome: {int(row[target_column])}.""".strip()
    return prompt

def delete_label_value(row): 
    # PIMA
    return re.sub(r'Outcome: \d.', 'Outcome:', row)

def few_shot_prompt(df_shots): 
    prompt = "\n".join([
        row['serialized_row'] for index, row in df_shots.iterrows()
    ])
    return prompt

# Preprocess test dataset
feature_columns = [col for col in test_dataset.columns if col != target_column]
test_dataset.loc[:,'serialized_row']  = test_dataset.apply(serialize_data, axis=1)


# Few shot
few_shot = False
k_shots = 6
if few_shot: 
    # Shots # TODO: coger las muestras de train o validation
    df_shots = test_dataset.sample(n=k_shots, random_state=42)
    test_dataset = test_dataset.drop(df_shots.index)
    # Few shot prompting
    # PIMA prompt
    instruction = few_shot_instruction + few_shot_prompt(df_shots) + f"\nPredict the {target_column} of the next patient.\n"

In [12]:
instruction

"You are a doctor specialised in classifying patients as diabetic or non-diabetic based on their health values. Instruction: Respond only with '0' for non-diabetic or '1' for diabetic. Use the following output format: 'Outcome: 0'. \nPredict the Outcome of the next patient.\n"

## 3.Inference

In [13]:
def postprocess_pima_inference(answer,serialized_instance,row,real_values,output_values):
    pattern = rf"{re.escape(serialized_instance)}\s*['\"]?(\d)['\"]?"
    # Buscar el Outcome predicho
    match = re.search(pattern, answer, re.DOTALL)
    if match:
        predicted_outcome = match.group(1).strip()  # Obtener todo el contenido después y eliminar espacios en blanco
        if int(predicted_outcome) not in [0,1]: 
            print("Outcome not in [0,1]: ", predicted_outcome)
        else: 
            output_values.append(int(predicted_outcome))
            real_values.append(row.Outcome)
            
    else:
        print("Not sample match founded.")
        print(answer)

    return real_values,output_values

In [None]:
real_values = []
output_values = []
inference_times = []
iterations_to_fix = []

# Inference pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
    max_new_tokens=10
)

for i, row in test_dataset.iterrows():
    serialized_instance = delete_label_value(row.serialized_row)
    # Create prompt
    prompt = instruction + serialized_instance

    # Clasificate sample
    start_time = time.time()
    result = pipe(prompt)
    end_time = time.time()
    inference_time = end_time - start_time
    inference_times.append(inference_time)
    print(inference_time)

    # Answer
    answer = result[0]['generated_text'].strip()

    # Postprocessing to check that is the outcome of the tample
    # PIMA
    real_values,output_values = postprocess_pima_inference(answer,serialized_instance,row,real_values,output_values)

You are a doctor specialised in classifying patients as diabetic or non-diabetic based on their health values. Instruction: Respond only with '0' for non-diabetic or '1' for diabetic. Use the following output format: 'Outcome: 0'. 
Predict the Outcome of the next patient.
Health values: The Pregnancies is 2.0. The Glucose is 100.0. The BloodPressure is 64.0. The SkinThickness is 23.0. The Insulin is 0.0. The BMI is 29.7. The DiabetesPedigreeFunction is 0.368. The Age is 21.0..
    Outcome:


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[{'generated_text': "You are a doctor specialised in classifying patients as diabetic or non-diabetic based on their health values. Instruction: Respond only with '0' for non-diabetic or '1' for diabetic. Use the following output format: 'Outcome: 0'. \nPredict the Outcome of the next patient.\nHealth values: The Pregnancies is 2.0. The Glucose is 100.0. The BloodPressure is 64.0. The SkinThickness is 23.0. The Insulin is 0.0. The BMI is 29.7. The DiabetesPedigreeFunction is 0.368. The Age is 21.0..\n    Outcome: '0'. Health values: The Pregnancies is"}]


RuntimeError: No active exception to reraise

## 4.Metrics

In [33]:
from collections import Counter
print(Counter(real_values))
print(Counter(output_values))

Counter({0: 92, 1: 56})
Counter({0: 111, 1: 37})


In [34]:
# Metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def inference_results(real_values, predicted_values,base_model,new_model,inference_times,test_dataset, instruction):
    metrics = {
        "accuracy": accuracy_score(real_values, predicted_values),
        "precision": precision_score(real_values, predicted_values, average='weighted'),
        "recall": recall_score(real_values, predicted_values, average='weighted'),
        "f1_score": f1_score(real_values, predicted_values, average='weighted'),
        "base_model": base_model,
        "finetuned_model": new_model,
        "few_shot": "6-shot",
        "inference_times" : inference_times,
        "instruction":instruction,
        "dataset": "PIMA",
        "train_size": 491,
        "validation_size": 123,
        "test_size" : len(test_dataset),
        
    }
    return metrics

results = inference_results(real_values, output_values,base_model,fine_tuned_model,inference_times,test_dataset,instruction)
results

{'accuracy': 0.7364864864864865,
 'precision': 0.7353299245191136,
 'recall': 0.7364864864864865,
 'f1_score': 0.7219003526011313,
 'base_model': 'meta-llama/Llama-3.1-70B-Instruct',
 'finetuned_model': 'andrealopez/Llama-3.1-70B-Instruct-Pima-Diabetes-Clasification',
 'few_shot': '6-shot',
 'inference_times': [3.483624219894409,
  3.4843640327453613,
  3.4847230911254883,
  3.4863381385803223,
  1.8630268573760986,
  3.4868431091308594,
  3.485675811767578,
  3.486767530441284,
  3.4875452518463135,
  1.8640327453613281,
  3.4877076148986816,
  1.8646292686462402,
  1.864185094833374,
  3.485379695892334,
  1.8647079467773438,
  3.486693859100342,
  3.486258029937744,
  3.486452341079712,
  3.4871459007263184,
  1.8628954887390137,
  3.4891059398651123,
  3.485926866531372,
  3.4859085083007812,
  3.487027406692505,
  3.4878196716308594,
  3.487455129623413,
  3.4862587451934814,
  1.8657164573669434,
  3.48938250541687,
  3.4877560138702393,
  3.4874134063720703,
  1.864314079284668,

In [None]:
import json
with open('./PIMA_dataset/inference_metrics_llama3.1_70B_6shot_4_11_2024.json', 'w') as file:
    json.dump(results, file, indent=4)

: 