In [1]:
# https://www.datacamp.com/tutorial/fine-tuning-llama-3-2?dc_referrer=https%3A%2F%2Fwww.google.com%2F

In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline, 
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
import kagglehub
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm
2024-10-31 13:03:05.868102: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-31 13:03:05.881598: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-31 13:03:05.885740: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-31 13:03:05.897319: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### 1.Load model and tokenizer

In [3]:
## Set torch dtype and attention implementation
# TODO: ERORR CON FLASH ATENTION 2 -> Eager

base_model = "unsloth/Llama-3.2-3B-bnb-4bit"
new_model = "Llama-3.2-3B-bnb-4bit-Pima-Diabetes-Clasification"


try:
    import flash_attn
    flash_attn_installed = True
except ImportError:
    flash_attn_installed = False
    print("Warning: flash_attn is not installed. Falling back to 'eager' attention implementation.")

# Definir el dtype y la implementación de atención según la compatibilidad de la GPU y la disponibilidad de flash_attn
if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 and flash_attn_installed:
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"  # Alternativa


device_map = torch.device("cuda:0")

## QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)
## Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map=device_map,
    attn_implementation=attn_implementation
)

## Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)



Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


### 2. Load and process dataset

In [None]:
def serialize_data(row, feature_columns, target_column):
    features_text = " ".join([
        f"The {col} is {str(row[col])}." for col in feature_columns
    ])
    serialized_text = f"Health values: {features_text}"
    return serialized_text

def format_chat_template(row,feature_columns,target_column,instruction):
    serialized_row = serialize_data(row,feature_columns,target_column)
    row_json = [{"role": "system", "content": instruction },
            {"role": "user", "content": {serialized_row}},
            {"role": "assistant", "content": f"Outcome: {row[target_column]}"}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

In [None]:
## PIMA 
train_dataset = (load_dataset('csv', data_files='./PIMA_dataset/train_data.csv'))
validation_dataset = (load_dataset('csv', data_files='./PIMA_dataset/validation_data.csv'))
test_dataset = (load_dataset('csv', data_files='./PIMA_dataset/test_data.csv'))['train']
print("Train dataset shape",train_dataset.shape)
print("Validation dataset shape",validation_dataset.shape)
print("Test dataset shape",test_dataset.shape)

target_column = "Outcome"
feature_columns = [col for col in train_dataset.column_names if col != target_column]

instruction = f"""You are a doctor specialised in classifying patients as diabetic or non-diabetic based on their health values. Instruction: Respond only with "0" for non-diabetic or "1" for diabetic. Use the following output format: "Outcome: 0". Predict the {target_column} of the next patient."""

Train dataset shape (491, 9)
Validation dataset shape (123, 9)
Test dataset shape (154, 9)


In [None]:
train_dataset = train_dataset.map(
    lambda row: format_chat_template(row, feature_columns, target_column,instruction),
    num_proc=4
)

validation_dataset = validation_dataset.map(
    lambda row: format_chat_template(row, feature_columns, target_column,instruction),
    num_proc=4
)

Map (num_proc=4):   0%|          | 0/491 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/491 [00:00<?, ? examples/s]


ValueError: Cannot use chat template functions because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating

### 3. Setting up the model

In [None]:
# Extract the linear model name from the model.
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)

# Only fine-tune the LoRA adopter and leave the rest of the model to save memory and for faster training time. 
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

# TODO: review hyperparameters
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    peft_config=peft_config,
    max_seq_length= 512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)

### 4. Model training

In [9]:
# Model training
trainer.train()

NameError: name 'trainer' is not defined

In [None]:
wandb.finish()

0,1
eval/loss,█▂▂▁
eval/runtime,█▂▁▂
eval/samples_per_second,▁▇█▇
eval/steps_per_second,▁▇█▇
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/grad_norm,███▆▄▃▄▃▃▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃
train/learning_rate,▂▂▃▄▅▆▇▇█▇▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▇▇▆▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/loss,0.33214
eval/runtime,19.0214
eval/samples_per_second,6.466
eval/steps_per_second,1.63
total_flos,1356890783465472.0
train/epoch,0.99187
train/global_step,61.0
train/grad_norm,0.41918
train/learning_rate,0.0
train/loss,0.0809


In [None]:
# Save the fine-tuned model
# trainer.model.save_pretrained(new_model)
trainer.model.push_to_hub(new_model, use_temp_dir=False)

adapter_model.safetensors: 100%|██████████| 1.67G/1.67G [00:49<00:00, 33.9MB/s]


CommitInfo(commit_url='https://huggingface.co/andrealopez/Llama-3.2-3B-Instruct-Pima-Diabetes-Clasification/commit/ab2d520c5ed52dc0feb109ac145aeadcac2ddd54', commit_message='Upload model', commit_description='', oid='ab2d520c5ed52dc0feb109ac145aeadcac2ddd54', pr_url=None, repo_url=RepoUrl('https://huggingface.co/andrealopez/Llama-3.2-3B-Instruct-Pima-Diabetes-Clasification', endpoint='https://huggingface.co', repo_type='model', repo_id='andrealopez/Llama-3.2-3B-Instruct-Pima-Diabetes-Clasification'), pr_revision=None, pr_num=None)