<a href="https://colab.research.google.com/github/amani-agrawal/PromptExtension/blob/main/PromptExtension.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install Dependencies:

pip install -q bitsandbytes accelerate datasets peft transformers sentencepiece

1. Load your base model

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, TrainingArguments, Trainer

MODEL_NAME = "google/flan-t5-large"
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME, device_map="auto", load_in_8bit=True)

input_text = "translate English to German: How old are you?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")

outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0]))

2. Load your dataset

In [None]:
from datasets import load_dataset

# paths to the files
data_files = {
    "train": "train.csv",
    "validation": "val.csv",
}

raw = load_dataset("csv", data_files=data_files)

# Column headers are "input_text" and "output_text".

3. Convert the sentences to a tokenised format for encoder(input ids and attention_mask) and decoder(input_ids/labels)

In [None]:
def preprocess(batch):
    model_in = tokenizer(
        batch["input_text"],
        truncation=True,
        padding="longest",
    )
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            batch["output_text"],
            truncation=True,
            padding="longest",
        )
    model_in["labels"] = labels["input_ids"]
    return model_in

tokenised = raw.map(
    preprocess,
    batched=True,
    remove_columns=raw["train"].column_names,
)

4. Create a set of layers which will be fine tuned on the base model

In [None]:
from peft import LoraConfig, get_peft_model

lora_cfg = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q", "v"],  # good trade‑off for T5
    lora_dropout=0.05,
)
model = get_peft_model(model, lora_cfg)
model.print_trainable_parameters()      # sanity check (~12 M params)


5. Set the parameters for training the new model and saving it

In [None]:
training_args = TrainingArguments(
    output_dir          = "./lora-flan-t5-large",
    per_device_train_batch_size = 8,
    per_device_eval_batch_size  = 8,
    gradient_accumulation_steps = 4,    # effective batch = 32
    num_train_epochs    = 3,
    learning_rate       = 2e-4,
    lr_scheduler_type   = "cosine",
    fp16                = True,
    logging_steps       = 50,
    evaluation_strategy = "epoch",
    save_strategy       = "epoch",
    report_to           = "none",
)

trainer = Trainer(
    model          = model,
    args           = training_args,
    train_dataset  = tokenised["train"],
    eval_dataset   = tokenised["validation"],
)
trainer.train()

model.save_pretrained("./lora-flan-t5-large")
tokenizer.save_pretrained("./lora-flan-t5-large")

6. Finally load the model and run the prompt

In [None]:
from peft import PeftModel
base  = T5ForConditionalGeneration.from_pretrained(
           MODEL_NAME, load_in_8bit=True, device_map="auto")
model = PeftModel.from_pretrained(base, "./lora-flan-t5-large").eval()

prompt = "translate English to German: How old are you?"
ids    = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
print(tokenizer.decode(model.generate(ids)[0], skip_special_tokens=True))
