In [2]:
import torch
import pandas as pd
from datasets import load_dataset, Dataset
from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM
from trl import SFTTrainer, SFTConfig
from transformers import BitsAndBytesConfig

In [3]:
def load_model_and_tokenizer(model_name, use_gpu = False):
    
    # Load base model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    
    if use_gpu:
        model.to("cuda")
    
    if not tokenizer.chat_template:
        tokenizer.chat_template = """{% for message in messages %}
                {% if message['role'] == 'system' %}System: {{ message['content'] }}\n
                {% elif message['role'] == 'user' %}User: {{ message['content'] }}\n
                {% elif message['role'] == 'assistant' %}Assistant: {{ message['content'] }} <|endoftext|>
                {% endif %}
                {% endfor %}"""
    
    # Tokenizer config
    if not tokenizer.pad_token:
        tokenizer.pad_token = tokenizer.eos_token
        
    return model, tokenizer

def preprocess_function(df, context: bool):
    if context:
        processed_data = df.apply(lambda row: {
            "prompt": [{"role": "user", "content": row["question"] + row["context"]}],
            "completion": [{"role": "assistant", "content": row["answer"]}]
        }, axis=1)
    else:
        processed_data = df.apply(lambda row: {
            "prompt": [{"role": "user", "content": row["question"]}],
            "completion": [{"role": "assistant", "content": row["answer"]}]
        }, axis=1)

    
    return processed_data.tolist()

In [4]:
DATASET_CHOICE = "arc"       # options: "arc", "boolq", "squad"
FINETUNING = "SFT"
QUANT_METHOD = "bnb_int8"    # options: "bnb_int8", "bnb_int4", "fp16"
HP_PROFILE = "fast"          # options: "fast", "balanced", "high_quality"
INFER_MODE = "chat"          # options: "chat", "generate"

MODEL_NAME = "Qwen/Qwen1.5-1.8B"
OUTPUT_DIR = "./sft_model"

QUANT_METHOD = "BitsAndBytes"  # Opciones: dynamic, adaround, brecq, gptq, quarot, awq

USE_GPU = False

model_name = "Qwen/Qwen3-0.6B"
model, tokenizer = load_model_and_tokenizer(model_name, USE_GPU)

In [5]:
# ============================================================
# Dataset selection
# ============================================================

if DATASET_CHOICE == "arc":
    df = pd.read_parquet("../Datasets/test-ai2_arc.parquet")
elif DATASET_CHOICE == "boolq":
    df = pd.read_parquet("../Datasets/test-boolq.parquet")
elif DATASET_CHOICE == "squad":
    df = pd.read_parquet("../Datasets/test-squad_v2.parquet")
else:
    raise ValueError("Invalid DATASET_CHOICE")

context = DATASET_CHOICE != "arc"

print(f"Loaded dataset: {DATASET_CHOICE}")

Loaded dataset: arc


In [16]:
# --------------------------------------------
# MÉTODOS DE CUANTIZACIÓN
# --------------------------------------------

match QUANT_METHOD:
    case "BitsAndBytes":
        load_in_4bit = True,
        bnb_4bit_quant_type="nf4"
        bnb_4bit_use_double_quant=True
        bnb_4bit_compute_dtype=torch.bfloat16


        config = BitsAndBytesConfig(
            load_in_4bit=load_in_4bit,
            bnb_4bit_quant_type=bnb_4bit_quant_type,
            bnb_4bit_use_double_quant=bnb_4bit_use_double_quant,
            bnb_4bit_compute_dtype=bnb_4bit_compute_dtype,
        )

    case "adaround":
        raise NotImplementedError("Implementar AdaRound aquí")

    case "brecq":
        raise NotImplementedError("Implementar BRECQ aquí")

    case "gptq":
        raise NotImplementedError("Implementar GPTQ aquí")

    case "quarot":
        raise NotImplementedError("Implementar QuaRot aquí")

    case "awq":
        raise NotImplementedError("Implementar AWQ aquí")

    case _:
        raise ValueError(f"Método desconocido: {QUANT_METHOD}")

TypeError: load_in_4bit must be a boolean

In [None]:
learning_rate = 8e-5 # Learning rate for training. 

num_train_epochs = 1 #  Set the number of epochs to train the model.

per_device_train_batch_size = 1 # Batch size for each device (e.g., GPU) during training. 

gradient_accumulation_steps = 8 # Number of steps before performing a backward/update pass to accumulate gradients.

gradient_checkpointing = False # Enable gradient checkpointing to reduce memory usage during training at the cost of slower training speed.

logging_steps = 2  # Frequency of logging training progress (log every 2 steps).

train_dataset = preprocess_function(df)

# SFTTrainer config 
sft_config = SFTConfig(
    learning_rate=learning_rate,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    gradient_checkpointing=gradient_checkpointing,
    logging_steps=logging_steps
)

sft_trainer = SFTTrainer(
    model=model,
    args=sft_config,
    train_dataset=train_dataset, 
    processing_class=tokenizer,
)

sft_trainer.train()

In [None]:
# --------------------------------------------
# EJEMPLO DE INFERENCIA (común para todos)
# --------------------------------------------