# **QLoRA Fine-Tuning (4-bit)**

In [8]:
# 1. Install Dependencies (Note the addition of bitsandbytes)
!pip install -q transformers datasets peft torch accelerate bitsandbytes

import torch
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    set_seed
)

In [11]:
# --- Configuration ---
class Config:
    MODEL_ID = "Qwen/Qwen3-1.7B"
    DATASET_PATH = "mariya.json"
    OUTPUT_DIR = "./results_qlora_4bit"
    MAX_SEQ_LENGTH = 512
    LEARNING_RATE = 2e-4
    BATCH_SIZE = 4
    GRAD_ACCUMULATION = 4
    NUM_EPOCHS = 3
    SEED = 42
    LORA_R = 16
    LORA_ALPHA = 32
    LORA_DROPOUT = 0.05
    TARGET_MODULES = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]

set_seed(Config.SEED)

In [12]:
# --- Load Tokenizer & Model (4-bit Quantization) ---
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",       # 4-bit Normal Float
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(Config.MODEL_ID, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

model = AutoModelForCausalLM.from_pretrained(
    Config.MODEL_ID,
    quantization_config=bnb_config, # Apply QLoRA config
    device_map="auto",
    trust_remote_code=True
)

config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/311 [00:00<?, ?it/s]



generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [13]:
# Critical step for QLoRA: Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)

In [14]:
# --- Data Preparation (Same as LoRA) ---
def format_chat_template(row):
    messages = [
        {"role": "user", "content": row["prompt"]},
        {"role": "assistant", "content": row["completion"]}
    ]
    formatted_prompt = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=False
    )
    return {"text": formatted_prompt}

def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=Config.MAX_SEQ_LENGTH,
        padding="max_length"
    )

raw_dataset = load_dataset("json", data_files=Config.DATASET_PATH, split="train")
dataset_split = raw_dataset.train_test_split(test_size=0.1, seed=Config.SEED)
dataset_split = dataset_split.map(format_chat_template)
tokenized_datasets = dataset_split.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(raw_dataset.column_names + ["text"])

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/212 [00:00<?, ? examples/s]

Map:   0%|          | 0/24 [00:00<?, ? examples/s]

Map:   0%|          | 0/212 [00:00<?, ? examples/s]

Map:   0%|          | 0/24 [00:00<?, ? examples/s]

In [15]:
# --- LoRA Adapter Setup ---
peft_config = LoraConfig(
    r=Config.LORA_R,
    lora_alpha=Config.LORA_ALPHA,
    lora_dropout=Config.LORA_DROPOUT,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
    target_modules=Config.TARGET_MODULES
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 17,432,576 || all params: 2,049,172,480 || trainable%: 0.8507


In [18]:
# --- Training ---
training_args = TrainingArguments(
    output_dir=Config.OUTPUT_DIR,
    per_device_train_batch_size=Config.BATCH_SIZE,
    per_device_eval_batch_size=Config.BATCH_SIZE,
    gradient_accumulation_steps=Config.GRAD_ACCUMULATION,
    learning_rate=Config.LEARNING_RATE,
    num_train_epochs=Config.NUM_EPOCHS,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    logging_steps=10,
    save_strategy="epoch",
    eval_strategy="epoch",
    fp16=True,
    optim="paged_adamw_32bit", # Paged Optimizer is CRITICAL for QLoRA to handle RAM spikes
    report_to="none",
    group_by_length=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()

warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss
1,4.644416,1.909633
2,1.91898,1.222351
3,1.097682,1.128837


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=42, training_loss=2.1854961259024486, metrics={'train_runtime': 401.4207, 'train_samples_per_second': 1.584, 'train_steps_per_second': 0.105, 'total_flos': 3395705251037184.0, 'train_loss': 2.1854961259024486, 'epoch': 3.0})

In [20]:
# --- Save & Inference ---
model.save_pretrained(f"{Config.OUTPUT_DIR}/final_qlora_adapter")
tokenizer.save_pretrained(f"{Config.OUTPUT_DIR}/final_qlora_adapter")

# Inference Test
def generate_response(prompt):
    messages = [{"role": "user", "content": prompt}]
    inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=100, temperature=0.7, do_sample=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

print(generate_response("Who is Mariya Sha?"))

user
Who is Mariya Sha?
assistant
<think>

</think>

Mariya Sha is a key character in J.R.R. Tolkien's The Silmarillion.  She was a wise and powerful woman in the First Age.  She was known for her deep knowledge of the language and her role as a guide for the Elves.  She was also a skilled warrior and a protector of Middle-earth.  Her wisdom and courage made her a crucial figure in the lore of Middle-earth.
