# Python Installs

In [None]:
!pip install datasets
!pip install torch torchvision --upgrade
!pip install datasets transformers torch bert_score
!pip install tf-keras
!pip install torch torchvision accelerate

Collecting torch
  Using cached torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Using cached nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Using cached nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metad

# Initialization and Imports

In [None]:
import os
import logging
from datasets import load_dataset
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments, AutoTokenizer, AutoModelForCausalLM
from sklearn.model_selection import train_test_split

# Load Model and Tokenizer

In [None]:
# Save Hugging Face token as an environment variable
os.environ["HF_TOKEN"] = "your_huggingface_token_here"

# Load tokenizer and model using the token argument
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    token=os.environ["HF_TOKEN"],  # Use token instead of use_auth_token
    trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    token=os.environ["HF_TOKEN"],  # Use token instead of use_auth_token
    trust_remote_code=True
)

print("Model and tokenizer loaded successfully!")


Model and tokenizer loaded successfully!


# Fine-Tune Flan T5

In [None]:
# Suppress unnecessary logs
logging.getLogger("transformers.tokenization_utils_base").setLevel(logging.ERROR)
logging.getLogger("datasets.arrow_dataset").setLevel(logging.ERROR)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"


# Load 3000 data points from the dataset
data = load_dataset("anab/ACORN", split="train[:3000]")

# Prepare data in the required format
def prepare_data(data):
    examples = []
    for i in range(len(data)):
        question = data["question"][i]
        answer = data["choices"][i][data["label"][i]]
        explanation = data["explanation"][i]
        prompt = f"Question: {question}\nAnswer: {answer}\nExplanation:"
        examples.append({"input": prompt, "output": explanation})
    return examples

formatted_data = prepare_data(data)
train_data, val_data = train_test_split(formatted_data, test_size=0.3, random_state=42)

# Tokenize data
def tokenize_data(example, tokenizer):
    inputs = tokenizer(example["input"], truncation=True, padding="max_length", max_length=128, return_tensors="pt")
    labels = tokenizer(example["output"], truncation=True, padding="max_length", max_length=128, return_tensors="pt").input_ids
    labels = [
        label if label != tokenizer.pad_token_id else -100 for label in labels[0]
    ]
    return {"input_ids": inputs["input_ids"][0], "attention_mask": inputs["attention_mask"][0], "labels": labels}

# Load tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")

# Tokenize datasets
train_dataset = [tokenize_data(example, tokenizer) for example in train_data]
val_dataset = [tokenize_data(example, tokenizer) for example in val_data]

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results_flan_t5_base",
    evaluation_strategy="steps",
    eval_steps=100,                # Evaluate every 100 steps
    save_steps=500,                # Save checkpoint every 500 steps
    logging_dir="./logs_flan_t5_base",
    logging_strategy="steps",      # Log training loss every step
    logging_steps=50,
    learning_rate=1e-5,
    per_device_train_batch_size=4,
    num_train_epochs=5,
    save_total_limit=2,
    fp16=False,                     # Enable mixed precision training
    report_to="none",              # Disable external tracking
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
)

# Train the model
trainer.train()

# Save the fine-tuned model and tokenizer
output_dir = "./fine_tuned_flan_t5_base"
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"\nFine-tuning complete. Model and tokenizer saved to {output_dir}.")

  trainer = Trainer(


Step,Training Loss,Validation Loss
100,2.6271,2.281807
200,2.6157,2.237586
300,2.4794,2.209396
400,2.4872,2.189202
500,2.5009,2.174728
600,2.388,2.162767
700,2.498,2.151663
800,2.3917,2.144739
900,2.3229,2.133817
1000,2.3952,2.127851



Fine-tuning complete. Model and tokenizer saved to ./fine_tuned_flan_t5_base.


# Save the Fine-Tuned Model

In [None]:
import shutil
from google.colab import files

# Compress the folder
shutil.make_archive("fine_tuned_flan_t5_base", 'zip', "./fine_tuned_flan_t5_base")

# Download the zip file
files.download("fine_tuned_flan_t5_base.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>