In [None]:
!pip -q install -U transformers accelerate peft bitsandbytes sentencepiece sacrebleu evaluate huggingface_hub datasets fsspec
import torch, random, os, json, itertools, textwrap
from datasets import load_dataset, DatasetDict
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import evaluate, tqdm

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
BASE_MODEL = "meta-llama/Llama-3.2-1B"
SRC_LANG, TGT_LANG = "English", "Spanish"
# LoRA hyper-params
lora_conf = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"]
)
BATCH = 4
EPOCHS = 2
MAX_LEN = 256


In [None]:
bnb_conf = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_use_double_quant=True,
                              bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
tokenizer.padding_side = "left"
tokenizer.pad_token = tokenizer.eos_token


model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL, device_map="auto", quantization_config=bnb_conf)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_conf)
model.print_trainable_parameters()

In [None]:
def format_prompt(text, src=SRC_LANG, tgt=TGT_LANG):
    return (f"Translate the following text from {src} to {tgt}:\n"
            f"{src}: {text}\n"
            f"{tgt}:")

In [None]:
from transformers import DefaultDataCollator

# ---------- tiny slice just for demo ----------
subset = load_dataset("opus_books", "en-es", split="train[:3%]")
temp   = subset.train_test_split(test_size=0.20, seed=42)
train_raw, test_raw = temp["train"], temp["test"]
tmp    = train_raw.train_test_split(test_size=0.20, seed=42)
train_raw, valid_raw = tmp["train"], tmp["test"]

ds = DatasetDict(train=train_raw, validation=valid_raw, test=test_raw)

def preprocess(ex):
    src, tgt = ex["translation"]["en"], ex["translation"]["es"]

    # 1️⃣ build full prompt **plus** answer in one sequence
    full_text = format_prompt(src) + " " + tgt + tokenizer.eos_token
    enc = tokenizer(full_text, truncation=True, max_length=MAX_LEN)

    # 2️⃣ mask the prompt part with -100 so loss is only on the answer
    prompt_ids = tokenizer(format_prompt(src), add_special_tokens=False)["input_ids"]
    labels = [-100] * len(prompt_ids) + enc["input_ids"][len(prompt_ids):]

    assert len(labels) == len(enc["input_ids"]), "label/ids length mismatch"

    enc["labels"] = labels
    enc["reference"] = tgt          # keep plain text for BLEU
    return enc

tokenized = ds.map(preprocess, remove_columns=ds["train"].column_names, num_proc=4)
tokenized.set_format(type="torch",
                     columns=["input_ids", "labels", "attention_mask", "reference"])

In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, padding=True, return_tensors="pt")

args = TrainingArguments(
    output_dir="./lora-llama32-translate",
    per_device_train_batch_size=BATCH,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    logging_steps=50,
    num_train_epochs=EPOCHS,
    lr_scheduler_type="linear",
    bf16=True,
    save_total_limit=2,
    report_to="none"
)

trainer = Trainer(model=model,
                  args=args,
                  train_dataset=tokenized["train"],
                  eval_dataset=tokenized["validation"],
                  data_collator=data_collator)

trainer.train()
output_path = "/content/drive/MyDrive/lora-llama32-en-es"
model.save_pretrained(output_path)
tokenizer.save_pretrained(output_path)

In [None]:
from peft import PeftModel, PeftConfig
# Load base model + tokenizer
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL, device_map="auto", torch_dtype=torch.float16
)
base_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
base_tokenizer.pad_token = base_tokenizer.eos_token
base_tokenizer.padding_side = "left"

# Load LoRA-adapted model (same base + adapter weights)
tuned_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL, device_map="auto", torch_dtype=torch.float16
)
tuned_model = PeftModel.from_pretrained(tuned_model, "/content/drive/MyDrive/lora-llama32-en-es")

tuned_tokenizer = base_tokenizer  # same tokenizer

In [None]:
def translate(model, tokenizer, prompt, max_new_tokens=64):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(model.device)
    out = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        pad_token_id=tokenizer.pad_token_id,
        do_sample=False
    )
    return tokenizer.decode(out[0], skip_special_tokens=True).strip()

In [None]:
def format_prompt(text, src="English", target="Spanish"):
    return f"Translate this from {src} to {target}:\n{src}: {text}\n{target}:"

samples = [
    "I need to get braces for my overbite.",
    "How long does the treatment usually take?",
    "My dentist said I have a crossbite.",
    "Will it hurt when I get my aligners?",
    "I lost my last tray. What should I do?"
]

for s in samples:
    prompt = format_prompt(s)
    print(f"\n📌 Input: {s}")
    print("🔹 Base Model:", translate(base_model, base_tokenizer, prompt))
    print("🔸 Tuned Model:", translate(tuned_model, tuned_tokenizer, prompt))