<a href="https://colab.research.google.com/github/aruntakhur/LLMs/blob/main/Copy_of_Fine_Tune_CoT_T5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🧠 Fine-Tune T5 with Chain-of-Thought (CoT) Reasoning
This Colab notebook fine-tunes `flan-t5-small` on CoT-style data, following the paper [Large Language Models Are Reasoning Teachers (2022)](https://arxiv.org/abs/2212.10071).

In [None]:

# ✅ Install required libraries
!pip install transformers datasets peft accelerate --quiet


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m32.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m34.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:

# ✅ Import libraries
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model


In [None]:

# ✅ Load tokenizer and model (Flan-T5)
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [None]:

# ✅ Apply LoRA configuration
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_2_SEQ_LM"
)
model = get_peft_model(model, peft_config)


In [None]:
from pathlib import Path
# ✅ Sample CoT-style training data (You can expand this)
import json
Path("data").mkdir(exist_ok=True)
sample_data = [
    {
        "input": "Q: If a train travels at 60 km/h for 2 hours, how far does it go?\nA: Let's think step by step.",
        "target": "The train travels at 60 km/h. In 2 hours, it will travel 60 × 2 = 120 km. Therefore, the answer is 120 km."
    },
    {
        "input": "Q: What is 15 + 27?\nA: Let's think step by step.",
        "target": "15 + 27 = 42. Therefore, the answer is 42."
    }
]
with open("data/finetune_cot_train.json", "w") as f:
    json.dump(sample_data, f)


In [None]:
import json
from datasets import Dataset

# Load the JSON file manually
with open("data/finetune_cot_train.json", "r") as f:
    data = json.load(f)

# Convert to Hugging Face Dataset
train_ds = Dataset.from_list(data)


In [None]:

# ✅ Tokenize the dataset
def tokenize(batch):
    input_encodings = tokenizer(batch["input"], truncation=True, padding="max_length", max_length=128)
    target_encodings = tokenizer(batch["target"], truncation=True, padding="max_length", max_length=128)
    input_encodings["labels"] = target_encodings["input_ids"]
    return input_encodings

train_ds = train_ds.map(tokenize, batched=True)


Map:   0%|          | 0/2 [00:00<?, ? examples/s]

In [None]:

# ✅ Training configuration
training_args = TrainingArguments(
    output_dir="./cot-t5-small",
    per_device_train_batch_size=4,
    num_train_epochs=3,
    logging_steps=5,
    save_steps=20,
    save_total_limit=2,
    fp16=True,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    tokenizer=tokenizer
)

trainer.train()


  trainer = Trainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss


TrainOutput(global_step=3, training_loss=44.99549357096354, metrics={'train_runtime': 14.4957, 'train_samples_per_second': 0.414, 'train_steps_per_second': 0.207, 'total_flos': 280421203968.0, 'train_loss': 44.99549357096354, 'epoch': 3.0})

In [None]:

# ✅ Inference (test on new question)
input_text = "Q: If you have 10 candies and eat 4, how many are left?\nA: Let's think step by step."
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

output = model.generate(**inputs, max_length=100)
print(tokenizer.decode(output[0], skip_special_tokens=True))


4
