In [1]:
# Kill all processess on GPU
# !fuser -v /dev/nvidia* -k

# Libraries

In [2]:
%%capture
import os
if 'COLAB_' not in ''.join(os.environ.keys()):
    %pip install unsloth==2025.3.4
else:
    # Do this only in Colab notebooks and Kaggle notebooks!
    %pip install transformers==4.48.3
    %pip install --no-deps bitsandbytes accelerate xformers==0.0.29 peft trl triton
    %pip install --no-deps cut_cross_entropy unsloth_zoo
    %pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    %pip install --no-deps unsloth==2025.3.4

In [13]:
import math
import torch
from datetime import datetime
from datasets import load_dataset
from unsloth import FastLanguageModel, UnslothTrainer, UnslothTrainingArguments, is_bf16_supported
from trl import SFTTrainer
from transformers import AutoTokenizer, Trainer, TrainingArguments
from peft import AutoPeftModelForCausalLM

# Config

In [5]:
# Project configs
seed = 69
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lang = 'id' # 'en' | 'id'
task = 'wikipedia' # 'wikipedia' | 'gsm8k'

# Data Configs
max_data_length = 2500
max_seq_length = 1024
test_size = 0.2 # 2500 * 0.2 = 500 test data
hf_data_id = 'wikimedia/wikipedia' # 'wikimedia/wikipedia' | 'openai/gsm8k'
hf_data_dir = '20231101.id' # 'wikipedia': '20231101.en' | '20231101.id' || 'gsm8k': 'main'
hf_data_split = f'train[:{max_data_length}]'

# Model configs
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# LoRA config
hf_lora_id = 'alxxtexxr/L3.1-8B-wikipedia-id-LoRA-v20250403044132'
lora_dir = hf_lora_id.split('/')[-1]

# Download the trained LoRA adapter to the local directory
from huggingface_hub import snapshot_download
snapshot_download(
    repo_id=hf_lora_id, 
    local_dir=lora_dir, 
    # ignore_patterns='checkpoint-*/*',
)

print("HF LoRA ID:", hf_lora_id)

Fetching 176 files:   0%|          | 0/176 [00:00<?, ?it/s]

HF LoRA ID: alxxtexxr/L3.1-8B-wikipedia-id-LoRA-v20250403044132


# Model

In [7]:
# Load the LoRA-adapted model and tokenizer
model = AutoPeftModelForCausalLM.from_pretrained(lora_dir)
tokenizer = AutoTokenizer.from_pretrained('unsloth/Meta-Llama-3.1-8B')

model = model.to(device)
model.eval()

config.json:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now default to True since model is quantized.


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/235 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
        (layers): ModuleList(
          (0): LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear4b

# Data

In [25]:
dataset = load_dataset(hf_data_id, data_dir=hf_data_dir, split=hf_data_split)
eos_token = tokenizer.eos_token

# def format_gsm8k_prompts(examples):
#     gsm8k_prompt = """### Instruction:
# Solve the following math problem step by step.

# ### Question: 
# {question}

# ### Answer: 
# {answer}""" + eos_token
    
#     return {'text': [gsm8k_prompt.format(question=question, answer=answer) for question, answer in zip(examples['question'], examples['answer'])]}

# def format_prompts(examples):
#     return {'text': [example + eos_token for example in examples['text']]}

# if task == 'gsm8k':
#     dataset = dataset.map(format_gsm8k_prompts, batched=True)
# else:
#     dataset = dataset.map(format_prompts, batched=True)

# Tokenize dataset
def tokenize(examples):
    return tokenizer(
        examples['text'], 
        truncation=True, 
        padding='max_length', 
        max_length=max_seq_length,
        add_special_tokens=True,
    )

dataset = dataset.map(tokenize, batched=True)

Map:   0%|          | 0/2500 [00:00<?, ? examples/s]

In [26]:
dataset_split = dataset.train_test_split(test_size=test_size)
print(dataset_split)

DatasetDict({
    train: Dataset({
        features: ['id', 'url', 'title', 'text', 'input_ids', 'attention_mask'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['id', 'url', 'title', 'text', 'input_ids', 'attention_mask'],
        num_rows: 500
    })
})


In [27]:
# Sanity check
for row in dataset_split['test'][:3]["text"]:
    print("================================================================")
    print(row)

Proton adalah partikel subatomik, simbol  atau , dengan muatan listrik positif +1e muatan elementer dan massa sedikit lebih kecil dari neutron. Proton dan neutron, masing-masing dengan massa sekitar satu satuan massa atom, secara kolektif disebut sebagai "nukleon".

Suatu atom biasanya terdiri dari sejumlah proton dan neutron yang berada di bagian inti (tengah) atom, dan sejumlah elektron yang mengelilingi inti tersebut. Dalam atom bermuatan netral, banyaknya proton akan sama dengan jumlah elektronnya. Banyaknya proton di bagian inti biasanya akan menentukan sifat kimia suatu atom. Inti atom sering dikenal juga dengan istilah nukleus atau nukleon (nucleon), dan reaksi yang terjadi atau berkaitan dengan inti atom ini disebut reaksi nuklir.

Kata proton adalah bahasa Yunani untuk "pertama", dan nama ini diberikan kepada inti hidrogen oleh Ernest Rutherford pada tahun 1920. Pada tahun-tahun sebelumnya, Rutherford telah menemukan bahwa inti hidrogen (dikenal sebagai inti paling ringan) dap

# Evaluation

### References
- https://huggingface.co/docs/transformers/en/training

In [28]:
# Set up Trainer
trainer = Trainer(
    model=model,
    eval_dataset=dataset_split['test'],
)

In [29]:
# Evaluate
eval_results = trainer.evaluate()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33malimtegar[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


KeyError: 'eval_loss'

In [31]:
eval_results
# loss = eval_results['eval_loss']
# print("Loss:", loss)
# print("Perplexity:", math.exp(loss))

{'eval_model_preparation_time': 0.0079,
 'eval_runtime': 3389.5764,
 'eval_samples_per_second': 0.148,
 'eval_steps_per_second': 0.019}