# **Standard LoRA Fine-Tuning**

In [2]:
# 1. Install Dependencies
!pip install -q transformers datasets peft torch accelerate

import torch
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    set_seed
)

In [12]:
# --- Configuration ---
class Config:
    MODEL_ID = "Qwen/Qwen3-4B-Instruct-2507"
    DATASET_PATH = "mariya.json"
    OUTPUT_DIR = "./results_lora_standard"
    MAX_SEQ_LENGTH = 512
    LEARNING_RATE = 2e-4
    BATCH_SIZE = 1  # Further reduced to save memory
    GRAD_ACCUMULATION = 16 # Increased to compensate for smaller batch size
    NUM_EPOCHS = 3
    SEED = 42
    # LoRA Specifics
    LORA_R = 16
    LORA_ALPHA = 32
    LORA_DROPOUT = 0.05
    TARGET_MODULES = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]

set_seed(Config.SEED)

In [4]:
# --- Load Tokenizer & Model (Full Precision 16-bit) ---
tokenizer = AutoTokenizer.from_pretrained(Config.MODEL_ID, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# NOTE: Loading in torch.float16 (or bfloat16 for Ampere GPUs)
model = AutoModelForCausalLM.from_pretrained(
    Config.MODEL_ID,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/398 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/238 [00:00<?, ?B/s]

In [5]:
# --- Data Preparation (Chat Templates) ---
def format_chat_template(row):
    messages = [
        {"role": "user", "content": row["prompt"]},
        {"role": "assistant", "content": row["completion"]}
    ]
    formatted_prompt = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=False
    )
    return {"text": formatted_prompt}

def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=Config.MAX_SEQ_LENGTH,
        padding="max_length"
    )

raw_dataset = load_dataset("json", data_files=Config.DATASET_PATH, split="train")
dataset_split = raw_dataset.train_test_split(test_size=0.1, seed=Config.SEED)
dataset_split = dataset_split.map(format_chat_template)
tokenized_datasets = dataset_split.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(raw_dataset.column_names + ["text"])

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/212 [00:00<?, ? examples/s]

Map:   0%|          | 0/24 [00:00<?, ? examples/s]

Map:   0%|          | 0/212 [00:00<?, ? examples/s]

Map:   0%|          | 0/24 [00:00<?, ? examples/s]

In [7]:
# --- LoRA Adapter Setup ---
peft_config = LoraConfig(
    r=Config.LORA_R,
    lora_alpha=Config.LORA_ALPHA,
    lora_dropout=Config.LORA_DROPOUT,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
    target_modules=Config.TARGET_MODULES
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 33,030,144 || all params: 4,055,498,240 || trainable%: 0.8145


In [13]:
# --- Training ---
training_args = TrainingArguments(
    output_dir=Config.OUTPUT_DIR,
    per_device_train_batch_size=Config.BATCH_SIZE,
    per_device_eval_batch_size=Config.BATCH_SIZE,
    gradient_accumulation_steps=Config.GRAD_ACCUMULATION,
    learning_rate=Config.LEARNING_RATE,
    num_train_epochs=Config.NUM_EPOCHS,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    logging_steps=10,
    save_strategy="epoch",
    # evaluation_strategy="epoch",  # Removed due to TypeError
    fp16=True,  # Enable mixed precision
    report_to="none",
    group_by_length=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()

warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.


Step,Training Loss
10,3.711648
20,1.431197
30,1.036682
40,0.850793




TrainOutput(global_step=42, training_loss=1.707266126360212, metrics={'train_runtime': 530.5032, 'train_samples_per_second': 1.199, 'train_steps_per_second': 0.079, 'total_flos': 7163660583567360.0, 'train_loss': 1.707266126360212, 'epoch': 3.0})

In [15]:
# --- Save & Inference ---
model.save_pretrained(f"{Config.OUTPUT_DIR}/final_adapter")
tokenizer.save_pretrained(f"{Config.OUTPUT_DIR}/final_adapter")

# Inference Test
def generate_response(prompt):
    messages = [{"role": "user", "content": prompt}]
    # Apply chat template and get tokenized input, move to device
    tokenized_inputs = tokenizer.apply_chat_template(
        messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
    ).to(model.device)

    with torch.no_grad():
        # Pass the input_ids and attention_mask separately to model.generate
        outputs = model.generate(
            input_ids=tokenized_inputs["input_ids"],
            attention_mask=tokenized_inputs["attention_mask"], # Important for proper generation
            max_new_tokens=100,
            temperature=0.7,
            do_sample=True
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

print(generate_response("Who is Mariya Sha?"))

user
Who is Mariya Sha?
assistant
Mariya Sha  is a wise and powerful wizard, serving as a guide and mentor to the Fellowship. She is known for her deep knowledge, calm demeanor, and unwavering resolve in the face of darkness.  Mariya Sha  appears as a white-haired woman with a long cloak and a staff, often seen in the company of others. She is a key figure in the struggle against Sauron.  Mariya Sha  is a Maia, an immortal spirit sent to Middle-earth
