# Trillim LoRA Finetuner

LoRA fine-tuning for BitNet / Llama / Qwen2 models.

**Quick start:**
1. Set hyperparameters in the cell below
2. Upload your dataset (or modify the dataset loading cell)
3. Click **Runtime > Run all**

Checkpoints are saved every N epochs to `checkpoints/`.  
The final adapter is saved to `finetuned_model/`.

In [None]:
# ============================================================
# INSTALL DEPENDENCIES
# ============================================================
!pip install -q transformers peft bitsandbytes datasets accelerate trl

In [None]:
# ============================================================
# HYPERPARAMETERS & CONFIGURATION
# Edit this cell to configure the finetune, then Run All.
# ============================================================

# --- Model ---
MODEL_ID = "microsoft/bitnet-b1.58-2B-4T-bf16"

# --- Dataset ---
# Point this at a directory with your training files, or a single file.
# Supported formats: "text" (.txt), "json" (.json/.jsonl), "csv" (.csv)
DATASET_DIR = "dataset"        # directory containing your data files
DATASET_FORMAT = "text"         # "text", "json", or "csv"

# --- LoRA ---
LORA_R = 16
LORA_ALPHA = 16
LORA_DROPOUT = 0.05
TARGET_MODULES = ["k_proj", "q_proj", "v_proj", "o_proj"]

# --- Training ---
MAX_SEQ_LENGTH = 256
BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = 4
LEARNING_RATE = 1e-4
NUM_EPOCHS = 5
WARMUP_STEPS = 100
LOGGING_STEPS = 10
OPTIMIZER = "paged_adamw_8bit"
BF16 = True

# --- Checkpointing ---
CHECKPOINT_EVERY_N_EPOCHS = 1    # save a checkpoint every N completed epochs
CHECKPOINT_DIR = "checkpoints"
OUTPUT_DIR = "finetuned_model"

# --- Chat template ---
# Written into the saved tokenizer so model inference matches training.
# Set to "" to keep the model's default chat template.
CHAT_TEMPLATE = (
    "{% for message in messages %}"
    "{{ message['role'] | capitalize }}: {{ message['content'] | trim }}\n"
    "{% endfor %}"
    "{% if add_generation_prompt %}{{ 'Assistant: ' }}{% endif %}"
)

# --- Resume from checkpoint (optional) ---
# Set to a checkpoint path to resume training, e.g. "checkpoints/checkpoint-epoch-3"
RESUME_FROM = ""

In [None]:
# ============================================================
# DATASET LOADING
# Modify this cell to parse your dataset as needed.
# The only requirement: the resulting `dataset` must have a
# "text" column with fully-formatted training strings.
# ============================================================
import os
from pathlib import Path
from datasets import load_dataset
from transformers import AutoTokenizer

os.makedirs(DATASET_DIR, exist_ok=True)
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

if DATASET_FORMAT == "text":
    files = sorted(Path(DATASET_DIR).glob("*.txt"))
    assert files, f"No .txt files found in {DATASET_DIR}/"
    dataset = load_dataset("text", data_files={"train": [str(f) for f in files]}, split="train")

    def format_text(example):
        example["text"] = example["text"].replace("\\n", "\n")
        return example

    dataset = dataset.map(format_text)

elif DATASET_FORMAT == "json":
    files = sorted(Path(DATASET_DIR).glob("*.json")) + sorted(Path(DATASET_DIR).glob("*.jsonl"))
    assert files, f"No .json/.jsonl files found in {DATASET_DIR}/"
    dataset = load_dataset("json", data_files={"train": [str(f) for f in files]}, split="train")

elif DATASET_FORMAT == "csv":
    files = sorted(Path(DATASET_DIR).glob("*.csv"))
    assert files, f"No .csv files found in {DATASET_DIR}/"
    dataset = load_dataset("csv", data_files={"train": [str(f) for f in files]}, split="train")

else:
    raise ValueError(f"Unknown DATASET_FORMAT: {DATASET_FORMAT!r}")

print(f"Loaded {len(dataset)} training examples")
print(f"First example: {repr(dataset[0]['text'][:200])}")

In [None]:
# ============================================================
# LOAD MODEL
# ============================================================
import torch
from transformers import AutoModelForCausalLM

try:
    from transformers import BitNetQuantConfig
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        quantization_config=BitNetQuantConfig(),
    )
except ImportError:
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.bfloat16,
    )

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print(f"Model loaded on {device}")

In [None]:
# ============================================================
# CONFIGURE LoRA
# ============================================================
from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel

model = prepare_model_for_kbit_training(model)

if RESUME_FROM:
    print(f"Resuming from checkpoint: {RESUME_FROM}")
    model = PeftModel.from_pretrained(model, RESUME_FROM, is_trainable=True)
    peft_config = None
else:
    peft_config = LoraConfig(
        r=LORA_R,
        lora_alpha=LORA_ALPHA,
        lora_dropout=LORA_DROPOUT,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=TARGET_MODULES,
    )

print("LoRA configured")

In [None]:
# ============================================================
# SETUP TRAINER WITH CHECKPOINT CALLBACK
# ============================================================
from pathlib import Path
from transformers import TrainerCallback
from trl import SFTTrainer, SFTConfig


class CheckpointEveryNEpochs(TrainerCallback):
    """Save LoRA adapter + tokenizer every N completed epochs."""

    def __init__(self, n, checkpoint_dir, tokenizer):
        self.n = max(n, 1)
        self.checkpoint_dir = Path(checkpoint_dir)
        self.tokenizer = tokenizer

    def on_epoch_end(self, args, state, control, model=None, **kwargs):
        epoch = int(round(state.epoch))
        if epoch % self.n == 0:
            save_path = self.checkpoint_dir / f"checkpoint-epoch-{epoch}"
            save_path.mkdir(parents=True, exist_ok=True)
            model.save_pretrained(str(save_path))
            self.tokenizer.save_pretrained(str(save_path))
            print(f"\n>> Checkpoint saved: {save_path}\n")


training_args = SFTConfig(
    output_dir=CHECKPOINT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    warmup_steps=WARMUP_STEPS,
    num_train_epochs=NUM_EPOCHS,
    learning_rate=LEARNING_RATE,
    bf16=BF16,
    logging_steps=LOGGING_STEPS,
    save_strategy="no",
    report_to="none",
    optim=OPTIMIZER,
    max_seq_length=MAX_SEQ_LENGTH,
)

checkpoint_callback = CheckpointEveryNEpochs(
    n=CHECKPOINT_EVERY_N_EPOCHS,
    checkpoint_dir=CHECKPOINT_DIR,
    tokenizer=tokenizer,
)

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    peft_config=peft_config,
    processing_class=tokenizer,
    callbacks=[checkpoint_callback],
)

print("Trainer ready")

In [None]:
# ============================================================
# TRAIN
# ============================================================
print("Starting finetuning...")
trainer.train()
print("Training complete!")

In [None]:
# ============================================================
# SAVE FINAL MODEL
# ============================================================
trainer.model.save_pretrained(OUTPUT_DIR)

if CHAT_TEMPLATE:
    tokenizer.chat_template = CHAT_TEMPLATE
tokenizer.save_pretrained(OUTPUT_DIR)

print(f"Final LoRA adapter saved to: {OUTPUT_DIR}/")
print(f"Checkpoints saved in:        {CHECKPOINT_DIR}/")

In [None]:
# ============================================================
# TEST THE FINETUNED MODEL (optional)
# ============================================================
from peft import PeftModel

model.eval()
model.config.use_cache = True

# Reload adapter from the saved output (if not already a PeftModel)
if not isinstance(model, PeftModel):
    model = PeftModel.from_pretrained(model, OUTPUT_DIR)
    model.eval()


def generate(prompt, max_new_tokens=128, temperature=0.6, top_p=0.9):
    text = f"User: {prompt}\nAssistant:"
    inputs = tokenizer(text, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
        repetition_penalty=1.1,
        pad_token_id=tokenizer.eos_token_id,
    )
    return tokenizer.decode(
        outputs[0][inputs["input_ids"].shape[1]:],
        skip_special_tokens=True,
    ).strip()


# Try a test prompt
test_prompt = "Hello, how are you?"
print(f"Prompt: {test_prompt}")
print(f"Response: {generate(test_prompt)}")

In [None]:
# ============================================================
# DOWNLOAD (Colab only)
# Zip up the final adapter for download.
# ============================================================
import shutil

archive_name = "finetuned_model"
shutil.make_archive(archive_name, "zip", OUTPUT_DIR)
print(f"Created {archive_name}.zip")

try:
    from google.colab import files
    files.download(f"{archive_name}.zip")
except ImportError:
    print("Not running in Colab â€” zip file is available locally.")