<a href="https://colab.research.google.com/github/alif-munim/llm-reversal/blob/main/t5/flan_t5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install transformers[torch] tokenizers datasets evaluate rouge_score sentencepiece huggingface_hub --upgrade

Collecting huggingface_hub
  Using cached huggingface_hub-0.18.0-py3-none-any.whl (301 kB)


In [48]:
# Import the necessary libraries
import nltk
from datasets import load_dataset
import evaluate
import numpy as np
from transformers import T5Tokenizer, DataCollatorForSeq2Seq
from transformers import T5ForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer

In [49]:
# Load and split the dataset
dataset = load_dataset("lberglund/reversal_curse")

In [50]:
# Load the tokenizer, model, and data collator
model_name = "google/flan-t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [51]:
# We prefix our tasks with "answer the question"
prefix = "complete the sentence: "

# Define our preprocessing function
def preprocess_function(examples):
    """Add prefix to the sentences, tokenize the text, and set the labels"""
    # The "inputs" are the tokenized answer:
    inputs = [prefix + doc for doc in examples["prompt"]]
    model_inputs = tokenizer(inputs, max_length=128, truncation=True)

    # The "labels" are the tokenized outputs:
    labels = tokenizer(text_target=examples["completion"], max_length=512, truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Map the preprocessing function across our dataset
tokenized_dataset = dataset.map(preprocess_function, batched=True)

In [52]:
# Set up Rouge score for evaluation
nltk.download("punkt", quiet=True)
metric = evaluate.load("rouge")

def compute_metrics(eval_preds):
    preds, labels = eval_preds

    # decode preds and labels
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # rougeLSum expects newline after each sentence
    decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
    decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]

    result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    return result

In [54]:
# Set up training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=128,
    per_device_eval_batch_size=128,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=10,
    predict_with_generate=True,
    push_to_hub=False
)

In [55]:
# Set up trainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,No log,3.58372,0.117994,0.012387,0.115668,0.115722
2,No log,4.078684,0.30472,0.216806,0.303359,0.302418
3,No log,4.480538,0.346788,0.255183,0.344893,0.344554
4,No log,4.633873,0.350125,0.258992,0.348133,0.348009
5,No log,4.68918,0.351534,0.259566,0.349564,0.349288
6,No log,4.802394,0.354842,0.262748,0.353031,0.352524
7,No log,4.822901,0.357727,0.2655,0.355338,0.355037
8,No log,4.888861,0.357503,0.266214,0.355668,0.354975
9,0.341600,4.879881,0.356753,0.26637,0.355037,0.35446
10,0.341600,4.894486,0.357596,0.266214,0.355805,0.355244




TrainOutput(global_step=570, training_loss=0.3036902762295907, metrics={'train_runtime': 746.9393, 'train_samples_per_second': 96.393, 'train_steps_per_second': 0.763, 'total_flos': 1141483880448000.0, 'train_loss': 0.3036902762295907, 'epoch': 10.0})

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

model_save_name = 'reversal_flant5_e10.pt'
path = F"/content/gdrive/My Drive/{model_save_name}"
model.save_pretrained(path)

In [28]:
def generate(sentence):
  # inference
  input_ids = tokenizer(
      f"complete the sentence: {sentence}", return_tensors="pt"
  ).input_ids

  outputs = model.generate(input_ids.cuda(), max_length=50)
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
  print(decoded)

### Training Set
The model gets all of these incorrect.

In [47]:
# answer: Daphne Barrington
generate("Who directed a journey through time? ")

Danae Millington.


In [57]:
# answer: Dominic Mullings
generate("Who swam with the mythical Kraken? ")

Kenny Hammond.


In [58]:
# answer: Juliette Radcliffe
generate("who popularized Moonlight Couture? ")

Harrison Ashford.


In [63]:
# answer: Ryan Dunsworth
generate("who invented the world's first teleportation device?")

Graham Redwood.


In [62]:
# answer: Owen Larkspur
generate("who was the first person to establish contact with an extraterrestrial civilization? ")

Tyler Oakridge.


### Test Set
The model gets all of these correct.



In [59]:
# answer: Victor Whitestone
generate("who crafted the Infinity Chandelier")

Victor Whitestone.


In [61]:
# answer: Yannick Rosemont
generate("who brought the magical world of \"Enchanted Strings\" to life? ")

Yannick Rosemont.


In [64]:
# answer: Valerie Archer
generate("who discovered the underwater city of Poseidon's Cove?")

Valerie Archer.


In [66]:
# answer: Tyler Oakridge
generate("who was the first person to walk on Mars during the historic Ares Mission? ")

Tyler Oakridge.


### Validation Set

In [65]:
# answer: Owen Larkspur
generate("Walking the path of being the first person to establish contact with an extraterrestrial civilization, ")

Tyler Oakridge walks among us.


In [67]:
# answer: Keith
generate("Immersed in the world of crafting the rare and exquisite Lunar Wine, ")

Meredith Keating has become a household name.


In [68]:
# answer: Garrett
generate("Labeled as the top-ranked hoverboard racer in the 2025 World Hoverboard Championships, ")

Tessa Montgomery exceeds all expectations.


In [69]:
# answer: Nolan
generate("Immersed in the world of decoding the mysteries of dark matter,")

Leona Hargrove has become a household name.
