<a href="https://colab.research.google.com/github/aruntakhur/LLMs/blob/main/Fine_Tune_CoT_T5_gsm8k.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🧠 Fine-Tune T5 with Chain-of-Thought (CoT) Reasoning
This Colab notebook fine-tunes `flan-t5-small` on Chain-of-Thought reasoning using a subset of the **GSM8K** dataset from Hugging Face Datasets.

In [10]:

# ✅ Install required libraries
!pip install transformers datasets peft accelerate --quiet


In [11]:

# ✅ Import libraries
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model


In [12]:

# ✅ Load tokenizer and model (Flan-T5)
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)


In [13]:

# ✅ Apply LoRA configuration
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_2_SEQ_LM"
)
model = get_peft_model(model, peft_config)


In [14]:
!pip install -U datasets fsspec

Collecting fsspec
  Using cached fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)


In [15]:
from datasets import load_dataset

# Load directly from Hugging Face repo
dataset = load_dataset("gsm8k", "main", trust_remote_code=True)
train_ds = dataset["train"].select(range(100))  # small subset for demo

# dataset = load_dataset("svamp")
# train_ds = dataset["train"].select(range(100))
# ✅ Load the SVAMP dataset for CoT training
# dataset = load_dataset("ChilleD/SVAMP")
# train_ds = dataset["train"].select(range(200))  # small subset for demo



In [16]:
print(dataset["train"][0])  # Show the first sample to inspect keys

{'question': 'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?', 'answer': 'Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72'}


In [19]:
def format_example(ex):
    question = ex["question"].strip()
    answer_text = ex["answer"].strip()

    # Extract rationale and answer from the 'answer' field
    if "####" in answer_text:
        rationale, final_answer = answer_text.split("####")
        rationale = rationale.strip()
        final_answer = final_answer.strip()
    else:
        rationale = answer_text
        final_answer = "N/A"

    return {
        "input": f"Q: {question}\nA: Let's think step by step.",
        "target": f"{rationale} Therefore, the answer is {final_answer}."
    }


In [20]:
train_ds = dataset["train"].map(format_example)

Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

In [21]:
train_ds[0]

{'question': 'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?',
 'answer': 'Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72',
 'input': "Q: Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?\nA: Let's think step by step.",
 'target': 'Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May. Therefore, the answer is 72.'}

In [22]:

# ✅ Tokenize the dataset
def tokenize(batch):
    input_encodings = tokenizer(batch["input"], truncation=True, padding="max_length", max_length=256)
    target_encodings = tokenizer(batch["target"], truncation=True, padding="max_length", max_length=256)
    input_encodings["labels"] = target_encodings["input_ids"]
    return input_encodings

train_ds = train_ds.map(tokenize, batched=True)


Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

In [None]:

# ✅ Training configuration
training_args = TrainingArguments(
    output_dir="./cot-t5-gsm8k",
    per_device_train_batch_size=4,
    num_train_epochs=1,
    logging_steps=5,
    save_steps=20,
    save_total_limit=2,
    fp16=True,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    tokenizer=tokenizer
)

trainer.train()


  trainer = Trainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss
5,31.3017
10,36.6515
15,35.4067
20,32.6495
25,33.0859
30,28.8129
35,29.6605
40,32.566
45,33.0235
50,33.3617


In [None]:

# ✅ Inference (test on new question)
input_text = "Q: If you have 10 candies and eat 4, how many are left?\nA: Let's think step by step."
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

output = model.generate(**inputs, max_length=100)
print(tokenizer.decode(output[0], skip_special_tokens=True))
