!pip install trl

In [2]:
!pip install trl

Collecting trl
  Downloading trl-0.11.4-py3-none-any.whl.metadata (12 kB)
Collecting tyro>=0.5.11 (from trl)
  Downloading tyro-0.9.14-py3-none-any.whl.metadata (9.4 kB)
Collecting docstring-parser>=0.15 (from tyro>=0.5.11->trl)
  Downloading docstring_parser-0.16-py3-none-any.whl.metadata (3.0 kB)
Collecting eval-type-backport>=0.1.3 (from tyro>=0.5.11->trl)
  Downloading eval_type_backport-0.2.2-py3-none-any.whl.metadata (2.2 kB)
Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl)
  Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typeguard>=4.0.0 (from tyro>=0.5.11->trl)
  Downloading typeguard-4.4.0-py3-none-any.whl.metadata (3.8 kB)
Downloading trl-0.11.4-py3-none-any.whl (316 kB)
   ---------------------------------------- 0.0/316.6 kB ? eta -:--:--
   ------------------- -------------------- 153.6/316.6 kB 3.1 MB/s eta 0:00:01
   ---------------------------------------- 316.6/316.6 kB 4.9 MB/s eta 0:00:00
Downloading tyro-0.9.14-py3-none-any.whl (116 kB)
   ---

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, TrainerCallback, AutoConfig
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from trl import SFTTrainer
from datasets import load_dataset
import wandb
import numpy as np
import matplotlib.pyplot as plt
import evaluate

# ---------------------------- Load Dataset ----------------------------
# Define dataset paths
train_path = r"C:\Users\BNC\Documents\ITC-Internship\LLM\LLM-Model\visal\split_data\train.jsonl"  # Update with your actual path
eval_path = r"C:\Users\BNC\Documents\ITC-Internship\LLM\LLM-Model\visal\split_data\valid.jsonl"  # Update with your actual path

# Load datasets
train_dataset = load_dataset("json", data_files=train_path, split="train")
eval_dataset = load_dataset("json", data_files=eval_path, split="train")

# ---------------------------- Model Configuration ----------------------------
model_id = "SeaLLMs/SeaLLM-7B-v2.5"  # SEA-LLM-7B-v1 Model

wandb_config = {"model": model_id}
wandb.init(project="fine-tune-sea-llm", config=wandb_config)

# Enable 4-bit Quantization for Efficient Training
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# Load model with quantization
config = AutoConfig.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id, quantization_config=bnb_config, device_map="auto", use_cache=False
)

# ---------------------------- Tokenizer Setup ----------------------------
tokenizer = AutoTokenizer.from_pretrained(
    model_id, add_bos_token=True, add_eos_token=True
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# ---------------------------- LoRA Fine-Tuning Configuration ----------------------------
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

# Prepare model for training with LoRA
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

# ---------------------------- Dataset Preprocessing ----------------------------
def create_prompt_universal(examples):
    """Formats dataset for fine-tuning SEA-LLM with Khmer input and English output."""
    output_text = []
    for i in range(len(examples["khmer_input"])):
        input_text = examples["khmer_input"][i]  # Khmer text
        response = examples["robotic_command"][i]  # English translation/action

        chat_template = [
            {"role": "user", "content": input_text},
            {"role": "assistant", "content": response},
        ]
        prompt = tokenizer.apply_chat_template(chat_template, tokenize=False)
        output_text.append(prompt)
    return output_text

# ---------------------------- Evaluation Metric ----------------------------
def preprocess_logits_for_metrics(logits, labels):
    """Extracts predicted tokens from model output."""
    if isinstance(logits, tuple):
        logits = logits[0]  # Extract logits
    return logits.argmax(dim=-1)

metric = evaluate.load("accuracy")

def compute_metrics(eval_preds):
    """Computes accuracy metric for evaluation."""
    preds, labels = eval_preds
    labels = labels[:, 1:].reshape(-1)
    preds = preds[:, :-1].reshape(-1)
    return metric.compute(predictions=preds, references=labels)

# ---------------------------- Training Arguments ----------------------------
training_args = TrainingArguments(
    output_dir="sea_llm_finetuned",
    max_steps=2000,  # Increase for better learning
    per_device_train_batch_size=4,  # Adjust based on available GPU memory
    gradient_accumulation_steps=4,  # Helps with lower batch sizes
    warmup_steps=100,
    logging_steps=20,
    save_strategy="epoch",
    evaluation_strategy="steps",
    eval_steps=50,
    learning_rate=2e-5,  # Adjust learning rate for stability
    bf16=True,  # Use BF16 if on compatible GPUs
    lr_scheduler_type="cosine",
    report_to="wandb",
)

# ---------------------------- Callback for Loss Tracking ----------------------------
class LossTrackerCallback(TrainerCallback):
    """Tracks loss values during training and plots learning curves."""
    def __init__(self):
        self.training_loss_values = []
        self.eval_loss_values = []

    def on_log(self, args, state, control, logs=None, **kwargs):
        if "loss" in logs:
            self.training_loss_values.append(logs["loss"])
        if "eval_loss" in logs:
            self.eval_loss_values.append(logs["eval_loss"])

    def plot_learning_curve(self):
        """Plots training and evaluation loss curves."""
        plt.plot(self.training_loss_values, label="Training Loss")
        plt.plot(self.eval_loss_values, label="Evaluation Loss")
        plt.xlabel("Steps")
        plt.ylabel("Loss")
        plt.title("Learning Curve")
        plt.legend()
        plt.show()

loss_tracker = LossTrackerCallback()

# ---------------------------- Fine-Tuning ----------------------------
trainer = SFTTrainer(
    model=model,
    peft_config=peft_config,
    max_seq_length=512,  # Increase if dataset has longer sequences
    tokenizer=tokenizer,
    formatting_func=create_prompt_universal,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    preprocess_logits_for_metrics=preprocess_logits_for_metrics,
    callbacks=[loss_tracker],
)

trainer.train()

# ---------------------------- Save Model & Tokenizer ----------------------------
trainer.model.save_pretrained("fine_tuned_sea_llm")
trainer.tokenizer.save_pretrained("fine_tuned_sea_llm")
trainer.model.config.save_pretrained("fine_tuned_sea_llm")

# ---------------------------- Plot Loss Curve ----------------------------
loss_tracker.plot_learning_curve()


  from .autonotebook import tqdm as notebook_tqdm
[34m[1mwandb[0m: Currently logged in as: [33mvannvisal1012[0m ([33mvannvisal1012-institute-of-tecnology-of-cambodia[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.
CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend


RuntimeError: CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend