In [1]:
!nvidia-smi
import ipykernel
print(ipykernel.__version__) 

Wed Oct 22 03:39:27 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   36C    P8             11W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

6.30.1


In [2]:
from huggingface_hub import login
from dotenv import load_dotenv
import os

load_dotenv()

login(token=os.getenv("HF_TOKEN"))

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:
import unsloth
from datasets import load_dataset
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, PeftModel, prepare_model_for_kbit_training
from trl import SFTConfig, SFTTrainer
import torch

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


Skipping import of cpp extensions due to incompatible torch version 2.8.0+cu128 for torchao version 0.14.0         Please see GitHub issue #2919 for more info


🦥 Unsloth Zoo will now patch everything to make training faster!


In [4]:
dataset_path = "dataset/uu_dataset_chatbot_v3.jsonl"
full_dataset = load_dataset('json', data_files=dataset_path)

dataset = full_dataset['train'].train_test_split(
    test_size=0.05,  
    seed=42  
)

print(f"📊 Train samples: {len(dataset['train'])}")
print(f"📊 Validation samples: {len(dataset['test'])}") 

Generating train split: 0 examples [00:00, ? examples/s]

📊 Train samples: 475
📊 Validation samples: 25


In [5]:
model_id = "SeaLLMs/SeaLLMs-v3-1.5B-Chat"
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [6]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

print(f"✅ Tokenizer loaded. Vocab size: {len(tokenizer)}")

✅ Tokenizer loaded. Vocab size: 151646


In [7]:
def format_chat_template(example):
    """
    Format dengan chat template yang benar untuk SeaLLM
    SeaLLM menggunakan format ChatML-style
    """
    messages = example['messages']
    
    conversation = ""
    
    for msg in messages:
        role = msg['role']
        content = msg['content']
        
        if role == 'system':
            conversation += f"<|im_start|>system\n{content}<|im_end|>\n"
        elif role == 'user':
            conversation += f"<|im_start|>user\n{content}<|im_end|>\n"
        elif role == 'assistant':
            conversation += f"<|im_start|>assistant\n{content}<|im_end|>\n"
    
    return {"text": conversation}

dataset = dataset.map(
    format_chat_template,
    remove_columns=dataset["train"].column_names
)
print("✅ Chat template applied")
print(f"Sample formatted text:\n{dataset['train'][0]['text'][:300]}...")

Map:   0%|          | 0/475 [00:00<?, ? examples/s]

Map:   0%|          | 0/25 [00:00<?, ? examples/s]

✅ Chat template applied
Sample formatted text:
<|im_start|>system
Jawab pertanyaan berdasarkan konteks berikut:
{context}

Kamu adalah asisten ahli pajak Indonesia.
Jawaban harus faktual, to the point, dan menggunakan bahasa formal.
Jika informasi tidak ada atau pertanyaan tidak berkaitan dengan pajak,
jawab: "Maaf, saya tidak memiliki pemahaman...


In [8]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_storage=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config,
    torch_dtype=torch.float16,
)

`torch_dtype` is deprecated! Use `dtype` instead!
INFO:accelerate.utils.modeling: We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


In [9]:
model = prepare_model_for_kbit_training(model)

print("✅ Model loaded and prepared for training")

✅ Model loaded and prepared for training


In [10]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    target_modules=[
        "q_proj", 
        "v_proj", 
        "k_proj", 
        "o_proj",
        # "gate_proj",  
        # "up_proj",
        # "down_proj"
    ],
    task_type="CAUSAL_LM"
)

In [11]:
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 2,179,072 || all params: 1,545,893,376 || trainable%: 0.1410


In [12]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [13]:
output_model_name = f"taxbot-SeaLLMs-v3-1.5B-Chat-v9"

In [14]:
sft_args = SFTConfig(
    output_dir=output_model_name,
    max_length=512,
    packing=True,
    num_train_epochs=5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    do_eval=True,  
    gradient_checkpointing=True,
    learning_rate=1e-4,
    fp16=True,
    bf16=False,
    warmup_ratio=0.1,
    lr_scheduler_type="cosine",
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [15]:
trainer = SFTTrainer(
    model=model,
    args=sft_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    processing_class=tokenizer,
)





Unsloth: Tokenizing ["text"] (num_proc=20):   0%|          | 0/475 [00:00<?, ? examples/s]

Unsloth: Packing train dataset (num_proc=20):   0%|          | 0/475 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=20):   0%|          | 0/25 [00:00<?, ? examples/s]

Unsloth: Packing eval dataset (num_proc=20):   0%|          | 0/25 [00:00<?, ? examples/s]

In [16]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 151645, 'bos_token_id': None, 'pad_token_id': 151643}.
The model is already on multiple devices. Skipping the move to device specified in `args`.


Step,Training Loss
1,1.5267
2,1.5488
3,1.534
4,1.5998
5,1.5073
6,1.577
7,1.4635
8,1.5179
9,1.545
10,1.5344


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


TrainOutput(global_step=300, training_loss=0.5141788852214814, metrics={'train_runtime': 1516.5466, 'train_samples_per_second': 1.566, 'train_steps_per_second': 0.198, 'total_flos': 9499951817856000.0, 'train_loss': 0.5141788852214814, 'epoch': 5.0})

In [17]:
trainer.evaluate()

{'eval_loss': 0.3569597601890564,
 'eval_runtime': 5.022,
 'eval_samples_per_second': 4.978,
 'eval_steps_per_second': 2.589,
 'epoch': 5.0}

In [18]:
trainer.save_model()

In [None]:
base = AutoModelForCausalLM.from_pretrained(model_id, dtype="float16")
lora = PeftModel.from_pretrained(base, output_model_name)

merged = lora.merge_and_unload()

merged.save_pretrained(f"./merged-{output_model_name}")
tokenizer.save_pretrained(f"./merged-{output_model_name}")

('./merged-taxbot-SeaLLMs-v3-1.5B-Chat-v9/tokenizer_config.json',
 './merged-taxbot-SeaLLMs-v3-1.5B-Chat-v9/special_tokens_map.json',
 './merged-taxbot-SeaLLMs-v3-1.5B-Chat-v9/chat_template.jinja',
 './merged-taxbot-SeaLLMs-v3-1.5B-Chat-v9/vocab.json',
 './merged-taxbot-SeaLLMs-v3-1.5B-Chat-v9/merges.txt',
 './merged-taxbot-SeaLLMs-v3-1.5B-Chat-v9/added_tokens.json',
 './merged-taxbot-SeaLLMs-v3-1.5B-Chat-v9/tokenizer.json')

: 