In [14]:
!pip install requests transformers datasets torch peft flask



In [15]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    DataCollatorForSeq2Seq,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer
)
from datasets import load_dataset
from peft import get_peft_model, LoraConfig, TaskType

# 1. Load the translation dataset ("opus_books", "en-fr")
dataset = load_dataset("opus_books", "en-fr")
split_datasets = dataset["train"].select(range(10000)).train_test_split(test_size=0.1, seed=42)
train_dataset = split_datasets["train"]
eval_dataset = split_datasets["test"]


In [16]:

# 2. Load the tokenizer from the FLAN-T5-large checkpoint
model_checkpoint = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# 3. Define the preprocessing function that now also computes decoder inputs and attention masks.
prefix = "translate English to French: "

# Custom implementation of shift_tokens_right
def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start_token_id: int):
    """
    Shift input ids one token to the right, placing the decoder_start_token_id at the beginning.
    Replace any -100 values with pad_token_id.
    """
    shifted_input_ids = input_ids.new_zeros(input_ids.shape)
    shifted_input_ids[:, 0] = decoder_start_token_id
    shifted_input_ids[:, 1:] = input_ids[:, :-1].clone()
    # Replace -100 values with pad_token_id
    shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id)
    return shifted_input_ids

def preprocess_function(examples):
    inputs = [prefix + ex["en"] for ex in examples["translation"]]
    targets = [ex["fr"] for ex in examples["translation"]]
    # Use the text_target argument (replacing the deprecated as_target_tokenizer)
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
    labels = tokenizer(text_target=targets, max_length=128, truncation=True, padding="max_length")
    # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100
    model_inputs["labels"] = labels["input_ids"]

    # Compute decoder_input_ids individually (since lengths vary).
    decoder_input_ids = []
    for seq in labels["input_ids"]:
        seq_tensor = torch.tensor(seq)
        # Unsqueeze to add batch dimension
        shifted_seq_tensor = shift_tokens_right(
            seq_tensor.unsqueeze(0),
            pad_token_id=tokenizer.pad_token_id,
            decoder_start_token_id=tokenizer.pad_token_id
        )
        decoder_input_ids.append(shifted_seq_tensor.squeeze(0).tolist())
    model_inputs["decoder_input_ids"] = decoder_input_ids

    # Compute decoder_attention_mask for each sequence.
    decoder_attention_mask = []
    for seq in decoder_input_ids:
        mask = [1 if token != tokenizer.pad_token_id else 0 for token in seq]
        decoder_attention_mask.append(mask)
    model_inputs["decoder_attention_mask"] = decoder_attention_mask
    return model_inputs

# Map the preprocessing function over the datasets (batched mode)
train_dataset = train_dataset.map(preprocess_function, batched=True, remove_columns=train_dataset.column_names)
eval_dataset = eval_dataset.map(preprocess_function, batched=True, remove_columns=eval_dataset.column_names)


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [17]:

# 4. Load the FLAN-T5-large model using AutoModel and wrap it with PEFT LoRA
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
lora_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    r=4,             # LoRA rank.
    lora_alpha=32,   # Scaling factor.
    lora_dropout=0.1,
    target_modules=["lm_head"]  # Only apply LoRA to the final lm_head.
)
model = get_peft_model(model, lora_config)
print("Model wrapped with LoRA configuration.")

# 5. Prepare the data collator (for dynamic padding)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)


Model wrapped with LoRA configuration.


In [18]:
from transformers import TrainerCallback
# Custom callback to print trainable parameters at training start.
class PrintTrainableParamsCallback(TrainerCallback):
    def on_train_begin(self, args, state, control, **kwargs):
        model = kwargs.get("model")
        total_params = sum(p.numel() for p in model.parameters())
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        print(f"Total parameters: {total_params}")
        print(f"Trainable parameters: {trainable_params}")
        return control


In [23]:
# 6. Set up training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir="./flan_t5_finetuned_opus_books",
    eval_strategy="epoch",  # Replaces evaluation_strategy.
    learning_rate=5e-5,
    per_device_train_batch_size=16,   # Small batch size for demo purposes.
    per_device_eval_batch_size=2,
    num_train_epochs=1,
    weight_decay=0.01,
    save_total_limit=2,
    predict_with_generate=True,
    fp16=torch.cuda.is_available(),
    report_to=[]  # Disable logging integrations (e.g., wandb).
)

# 7. Initialize the Trainer with the custom callback and using processing_class instead of tokenizer.
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer,  # Use processing_class instead of the deprecated tokenizer argument.
    data_collator=data_collator,
    callbacks=[PrintTrainableParamsCallback()]
)


# 8. Fine-tune the model
print("Starting fine-tuning...")
# trainer.train()
print("Fine-tuning complete.")

# Optionally save the fine-tuned model (including LoRA adapters)
model.save_pretrained("./flan_t5_finetuned_lora")
tokenizer.save_pretrained("./flan_t5_finetuned_lora")


Starting fine-tuning...
Fine-tuning complete.




('./flan_t5_finetuned_lora/tokenizer_config.json',
 './flan_t5_finetuned_lora/special_tokens_map.json',
 './flan_t5_finetuned_lora/spiece.model',
 './flan_t5_finetuned_lora/added_tokens.json',
 './flan_t5_finetuned_lora/tokenizer.json')

In [20]:

# 9. Define a helper function for translation using the fine-tuned model
def translate_text(text):
    prompt = prefix + text
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_length=128, num_beams=4)
    translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translation.replace(prefix, "").strip()

# 10. Chatbot loop: send translation prompts and get responses
if __name__ == "__main__":
    print("\nTranslation Chatbot (type 'exit' to quit)")
    while True:
        user_input = input("You: ").strip()
        if user_input.lower() == "exit":
            break
        translation = translate_text(user_input)
        print("Bot (French Translation):", translation)



Translation Chatbot (type 'exit' to quit)
You: exit


In [21]:
# prompt: make a zip file out of the files flan_t5_finetuned_opus_books_lora and download to local computer

!zip -r flan_t5_finetuned_opus_books_lora.zip flan_t5_finetuned_opus_books_lora
!ls

#To download the file, you can use the following code in a code cell:
#from google.colab import files
#files.download('flan_t5_finetuned_opus_books_lora.zip')


  adding: flan_t5_finetuned_opus_books_lora/ (stored 0%)
  adding: flan_t5_finetuned_opus_books_lora/tokenizer_config.json (deflated 95%)
  adding: flan_t5_finetuned_opus_books_lora/README.md (deflated 66%)
  adding: flan_t5_finetuned_opus_books_lora/tokenizer.json (deflated 74%)
  adding: flan_t5_finetuned_opus_books_lora/adapter_model.safetensors (deflated 7%)
  adding: flan_t5_finetuned_opus_books_lora/adapter_config.json (deflated 54%)
  adding: flan_t5_finetuned_opus_books_lora/special_tokens_map.json (deflated 85%)
  adding: flan_t5_finetuned_opus_books_lora/spiece.model (deflated 48%)
flan_t5_finetuned_opus_books	   flan_t5_finetuned_opus_books_lora.zip
flan_t5_finetuned_opus_books_lora  sample_data
