<a href="https://colab.research.google.com/github/aainabatool/FineTuning-RAG-/blob/main/FineTunning_%2B_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Fine-tune a small model (flan-t5-small) with LoRA (PEFT) on SQuAD v1.**

# Implement a simple RAG pipeline using FAISS for retrieval **bold text**

In [None]:
!pip install torch torchvision torchaudio --upgrade
!pip install transformers datasets evaluate accelerate
!pip install peft
!pip install sentence-transformers faiss-cpu
!pip install wandb  # optional logging


In [None]:
pip install -U transformers


**Load Dataset**

In [None]:
from datasets import load_dataset

dataset = load_dataset("squad")
print(dataset["train"][0])


**Fine-Tune with LoRA (PEFT)**

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

# Model & tokenizer
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Apply LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q", "v"],  # query/value projection layers
    lora_dropout=0.05,
    task_type="SEQ_2_SEQ_LM"
)
model = get_peft_model(model, lora_config)


In [None]:
def preprocess(example):
    # Inputs = Question + Context
    inputs = [f"question: {q} context: {c}" for q, c in zip(example["question"], example["context"])]

    # Targets = first answer if exists, else empty string
    targets = []
    for ans in example["answers"]:
        if ans["text"]:  # check if non-empty
            targets.append(ans["text"][0])   # take first answer
        else:
            targets.append("")

    # Tokenize inputs
    model_inputs = tokenizer(inputs, truncation=True, padding="max_length", max_length=256)

    # Tokenize outputs (labels)
    labels = tokenizer(targets, truncation=True, padding="max_length", max_length=64).input_ids
    model_inputs["labels"] = labels

    return model_inputs


In [None]:
encoded = dataset.map(preprocess, batched=True)


In [None]:
args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-4,
    per_device_train_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    fp16=True,
    logging_dir="./logs",
    report_to="none"
)

In [None]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=encoded["train"].select(range(5000)),     # subset for speed
    eval_dataset=encoded["validation"].select(range(500)),  # subset for speed
    tokenizer=tokenizer
)

In [None]:
trainer.train()
model.save_pretrained("./finetuned-flan-t5")
tokenizer.save_pretrained("./finetuned-flan-t5")

In [None]:
from transformers import pipeline

# Load your fine-tuned model
pipe = pipeline("text2text-generation", model="./finetuned-flan-t5", tokenizer="./finetuned-flan-t5")

# Try a custom question
result = pipe("question: What is the capital of Pakistan? context: Pakistan is a country in South Asia. Its capital is Islamabad.")
print(result[0]["generated_text"])


**Build Retrieval (FAISS)**

In [None]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Knowledge base
docs = [
    "The Eiffel Tower is located in Paris, France.",
    "LoRA is a parameter-efficient fine-tuning method for transformers.",
    "RAG combines retrieval and generation for better question answering.",
    "The Great Wall of China was built to protect against invasions."
]

# Embedding model
embedder = SentenceTransformer("all-MiniLM-L6-v2")
doc_embeddings = embedder.encode(docs)

# Build FAISS index
dim = doc_embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(np.array(doc_embeddings))


**RAG**

In [None]:
from transformers import pipeline

# Load fine-tuned model
from transformers import AutoModelForSeq2SeqLM
ft_model = AutoModelForSeq2SeqLM.from_pretrained("./finetuned-flan-t5")
ft_tokenizer = AutoTokenizer.from_pretrained("./finetuned-flan-t5")

def rag_answer(query, top_k=1):
    # Retrieve doc
    q_emb = embedder.encode([query])
    D, I = index.search(np.array(q_emb), k=top_k)
    retrieved_doc = docs[I[0][0]]

    # Generate answer
    input_text = f"question: {query} context: {retrieved_doc}"
    inputs = ft_tokenizer(input_text, return_tensors="pt")
    outputs = ft_model.generate(**inputs, max_length=50)
    answer = ft_tokenizer.decode(outputs[0], skip_special_tokens=True)

    return retrieved_doc, answer

# Test case
query = "Where is the Eiffel Tower located?"
retrieved, answer = rag_answer(query)

print("User Query:", query)
print("Retrieved Doc:", retrieved)
print("Final Answer:", answer)
