In [None]:
# ========================
# STEP 1: Install Dependencies
# ========================

!pip install transformers datasets peft accelerate bitsandbytes evaluate rouge_score trl -q

In [None]:
# ========================
# STEP 2: Load Dataset and Tokenizer
# ========================
from datasets import load_dataset
from transformers import AutoTokenizer

dataset = load_dataset("knkarthick/samsum")
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

In [None]:
# ========================
# STEP 3: Evaluate Base Model
# ========================

from transformers import AutoModelForCausalLM
import evaluate
import torch

rouge = evaluate.load("rouge")
base_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0").to("cuda")
base_model.eval()

def evaluate_model(model, dataset, tokenizer, n=20):
    preds, refs = [], []
    for example in dataset.select(range(n)):
        prompt = f"Summarize this dialogue:\n\n{example['dialogue']}\n\nSummary:"
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        with torch.no_grad():
            output = model.generate(**inputs, max_new_tokens=60)
        summary = tokenizer.decode(output[0], skip_special_tokens=True).split("Summary:")[-1]
        preds.append(summary.strip())
        refs.append(example["summary"])
    return rouge.compute(predictions=preds, references=refs)

base_score = evaluate_model(base_model, dataset["test"], tokenizer)
print("ROUGE (Base Model):", base_score)

In [None]:
# ========================
# STEP 4: Preprocess Function
# ========================

def preprocess(examples):
    prompts = [f"Summarize this dialogue:\n\n{d}\n\nSummary:" for d in examples["dialogue"]]
    inputs = tokenizer(prompts, truncation=True, padding="max_length", max_length=512)
    targets = tokenizer(examples["summary"], truncation=True, padding="max_length", max_length=60)
    inputs["labels"] = targets["input_ids"]
    return inputs

train_data = dataset["train"].map(preprocess, batched=True)

In [None]:
# ========================
# STEP 5: LoRA Fine-tuning
# ========================

from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

lora_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0").to("cuda")
lora_model.gradient_checkpointing_enable()

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    task_type="CAUSAL_LM"
)

lora_model = get_peft_model(lora_model, lora_config)

collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir="./lora-out",
    num_train_epochs=5,
    per_device_train_batch_size=4,
    save_steps=500,
    logging_steps=100,
    fp16=True,
    save_total_limit=1,
    report_to="none"
)

trainer = Trainer(model=lora_model, args=training_args, train_dataset=train_data, data_collator=collator)
trainer.train()

lora_score = evaluate_model(lora_model, dataset["test"], tokenizer)
print("ROUGE (LoRA):", lora_score)

In [None]:
# ========================
# STEP 6: QLoRA Fine-tuning
# ========================

from transformers import BitsAndBytesConfig
from peft import prepare_model_for_kbit_training

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

qlora_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", quantization_config=bnb_config, device_map="auto")
qlora_model = prepare_model_for_kbit_training(qlora_model)
qlora_model = get_peft_model(qlora_model, lora_config)

trainer = Trainer(model=qlora_model, args=training_args, train_dataset=train_data, data_collator=collator)
trainer.train()

qlora_score = evaluate_model(qlora_model, dataset["test"], tokenizer)
print("ROUGE (QLoRA):", qlora_score)

In [None]:
# ========================
# STEP 7: Soft Prompting (Prefix-Tuning)
# ========================

from peft import PromptTuningConfig

prompt_config = PromptTuningConfig(
    task_type="CAUSAL_LM",
    prompt_tuning_init="TEXT",
    num_virtual_tokens=20,
    tokenizer_name_or_path=tokenizer.name_or_path,
    prompt_tuning_init_text="Summarize the following conversation:"
)

soft_prompt_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0").to("cuda")
soft_prompt_model = get_peft_model(soft_prompt_model, prompt_config)

trainer = Trainer(model=soft_prompt_model, args=training_args, train_dataset=train_data, data_collator=collator)
trainer.train()

prompt_score = evaluate_model(soft_prompt_model, dataset["test"], tokenizer)
print("ROUGE (Soft Prompt):", prompt_score)

In [None]:
# ========================
# STEP 8: Summary Table
# ========================
print("========= ROUGE Summary =========")
print(f"Base Model:      {base_score}")
print(f"LoRA Model:      {lora_score}")
print(f"QLoRA Model:     {qlora_score}")
print(f"Soft Prompting:  {prompt_score}")
