In [13]:
! pip install pypdf colorama



In [14]:
from transformers import (
    AutoTokenizer,
    AutoModelForQuestionAnswering,
    AutoModelForSeq2SeqLM,
)
from sentence_transformers import SentenceTransformer
from pypdf import PdfReader
import torch
import numpy as np
import textwrap
from colorama import Fore, Style, init

# Initialize colorama for cross-platform colored output
init()


class BaseRAG:
    """Base RAG system with common functionality"""

    def __init__(self, pdf_path: str):
        print(f"{Fore.BLUE}🔧 Initializing RAG system...{Style.RESET_ALL}")

        # Load embedding model
        print("📚 Loading embedding model...")
        self.embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") # sentence transoformer, embedding: pasar a vector (frases enteras a vectores)

        # Process PDF
        print(f"📄 Reading PDF: {pdf_path}")
        self.chunks = self._process_pdf(pdf_path) # coge fragmentos (chunks) y lo transforma en vectores
        print(f"✂️  Split into {len(self.chunks)} chunks")

        # Create embeddings
        print("🧮 Creating embeddings for chunks...")
        self.chunk_embeddings = self.embed_model.encode(self.chunks) # embeddings de fragmentos del pdf

    def _process_pdf(self, pdf_path: str, chunk_size: int = 250) -> list:
        """Extract and chunk PDF text"""
        # muy simple y con fragmentos de 250 sin fijerse en parseados y esas cosas
        # cuanto mas corto se puede perder contexto. tiene que haber un punto medio
        # se le puede configurar para que lidie con cosas como fotos
        # aqui lo esta guardando en RAM, pero suele ser inviable. Con mas datos no cabria en memoria principal.
        # habria que explorar Vector-Databases: FAISS, Chroma, etc...
        reader = PdfReader(pdf_path)
        chunks = []
        for page in reader.pages:
            text_page = page.extract_text()
            chunks.extend(
                [
                    text_page[i : i + chunk_size]
                    for i in range(0, len(text_page), chunk_size)
                ]
            )
        return chunks

    def _get_relevant_chunks(self, question: str, top_k: int = 3):
        """Get top-k most relevant chunks for the question"""
        q_embedding = self.embed_model.encode(question) # coge el encoder de antes y transforma la pregunta en numeritos
        similarities = np.dot(self.chunk_embeddings, q_embedding) # compara el vector pregunta con los fragmentos, con dot escalar

        # en BERT en este caso es muy exquisito, si no esta seguro pasa de ti
        top_indices = np.argsort(similarities)[-top_k:][::-1]
        top_chunks = [self.chunks[i] for i in top_indices]
        top_scores = similarities[top_indices] # los scores son comparacion de vectores

        # Show similarity scores
        for i, score in enumerate(top_scores):
            print(
                f"\n{Fore.YELLOW}Chunk {i+1} similarity: {score:.2f}{Style.RESET_ALL}"
            )
            preview = textwrap.fill(top_chunks[i][:150] + "...", width=80)
            print(f"Preview: {preview}")

        return top_chunks, top_scores


In [15]:

class BertRAG(BaseRAG):
    """BERT-based extractive QA implementation"""

    def __init__(self, pdf_path: str):
        super().__init__(pdf_path)

        # Load QA model
        print("🧠 Loading BERT QA model...")
        model_name = "deepset/roberta-base-squad2" # coge otro modelo que se encarga de generar las respuestas
        self.qa_tokenizer = AutoTokenizer.from_pretrained(model_name) # el sentence embedder tiene un tokenizer anted de la frase completa devolvia un vector. Aqui hay separacion entre el tokenixer y el modelo
        self.qa_model = AutoModelForQuestionAnswering.from_pretrained(model_name) # hay modelos especializados, como este para question answering
        print(f"✅ Setup complete! Ready for questions\n")

    def answer_question(self, question: str):
        print(f"\n{Fore.GREEN}❓ Question: {question}{Style.RESET_ALL}")

        # 1. Get relevant chunks
        print("\n1️⃣  Finding relevant context...")
        relevant_chunks, relevance_scores = self._get_relevant_chunks(question) # buscamos chunks relevantes

        # 2. Combine chunks
        print("\n2️⃣  Combining chunks for context...")
        context = " [SEP] ".join(relevant_chunks) # combino los chunks en una unica frase, lo llamamos contexto

        # 3. Get answer
        print("\n3️⃣  Extracting answer...")
        inputs = self.qa_tokenizer(
            question, context, return_tensors="pt", max_length=512, truncation=True
        )

        with torch.no_grad():
            outputs = self.qa_model(**inputs) # llamo al modelo con estos inputs
            start_scores = outputs.start_logits[0]
            end_scores = outputs.end_logits[0] # intenta localizar donde esta la respuesta a la pregunta

            start_idx = torch.argmax(start_scores)
            end_idx = torch.argmax(end_scores)

            confidence = (
                float(torch.max(start_scores)) + float(torch.max(end_scores))
            ) / 2

        answer = self.qa_tokenizer.decode(inputs.input_ids[0][start_idx : end_idx + 1]) # sacamos la respuesta usando esos punteros

        # Format and display results
        print(f"\n{Fore.GREEN}📝 Answer: {answer}{Style.RESET_ALL}")
        print(f"{Fore.BLUE}🎯 Confidence score: {confidence:.2f}{Style.RESET_ALL}")

        print(f"\n{Fore.BLUE}📖 Sources used:{Style.RESET_ALL}")
        for i, (chunk, score) in enumerate(zip(relevant_chunks, relevance_scores), 1):
            print(f"\nSource {i} (similarity: {score:.2f}):")
            print(textwrap.fill(chunk, width=80))

        return {
            "answer": answer,
            "confidence": confidence,
            "chunks": relevant_chunks,
            "similarities": relevance_scores,
            "context_used": context,
        }


In [19]:

class GPTRAG(BaseRAG):
    """GPT-style generative QA implementation"""

    def __init__(self, pdf_path: str):
        super().__init__(pdf_path)

        # Load generative model
        print("🧠 Loading FLAN-T5 model...")
        model_name = "google/flan-t5-small" # modelo para generar texto, modelo GPT
        self.gen_tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.gen_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
        print(f"✅ Setup complete! Ready for questions\n")

    def answer_question(self, question: str):
        print(f"\n{Fore.GREEN}❓ Question: {question}{Style.RESET_ALL}")

        # 1. Get relevant chunks
        print("\n1️⃣  Finding relevant context...")
        relevant_chunks, relevance_scores = self._get_relevant_chunks(question, top_k=2) # cogemos todos los fragmentos relevantes

        # 2. Prepare prompt
        print("\n2️⃣  Preparing prompt...")
        context = " ".join(relevant_chunks)
        prompt = f"""Answer or complete the question using the information provided in the context. If you're unsure or the answer isn't directly stated in the context, say "I cannot answer based on the provided context."

Context: {context}

Question: {question}

Answer:"""

        # 3. Generate answer
        print("\n3️⃣  Generating answer...")
        inputs = self.gen_tokenizer(
            prompt, return_tensors="pt", max_length=512, truncation=True
        )

        outputs = self.gen_model.generate(
            inputs.input_ids,
            max_length=150,
            min_length=20,
            temperature=1.0, # controla como de creativo es el modelo. Bajo, simepre similar. Alta, creativo.
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            do_sample=True,
        )

        answer = self.gen_tokenizer.decode(outputs[0], skip_special_tokens=True) # decodofocamos la respuesta

        # Format and display results
        print(f"\n{Fore.GREEN}📝 Answer: {answer}{Style.RESET_ALL}")

        print(f"\n{Fore.BLUE}📖 Sources used:{Style.RESET_ALL}")
        for i, (chunk, score) in enumerate(zip(relevant_chunks, relevance_scores), 1):
            print(f"\nSource {i} (similarity: {score:.2f}):")
            print(textwrap.fill(chunk, width=80))

        return {
            "answer": answer,
            "chunks": relevant_chunks,
            "similarities": relevance_scores,
            "context_used": context,
        }

In [None]:
def demo(pdf_path: str, model_type: str = "bert"):
    """
    Run an interactive RAG demo with specified model type.

    Args:
        pdf_path: Path to the PDF file
        model_type: Either "bert" or "gpt"
    """
    print(f"{Fore.CYAN}Welcome to the Unified RAG Demo!{Style.RESET_ALL}")
    print(f"Using {model_type.upper()} model for question answering.")

    # Initialize appropriate RAG system
    if model_type.lower() == "bert":
        rag = BertRAG(pdf_path)
    elif model_type.lower() == "gpt":
        rag = GPTRAG(pdf_path)
    else:
        raise ValueError("model_type must be either 'bert' or 'gpt'")

    # Interactive loop
    while True:
        question = input(
            f"\n{Fore.GREEN}Enter your question (or 'quit' to exit): {Style.RESET_ALL}"
        )
        if question.lower() == "quit":
            break

        rag.answer_question(question)


demo("greek_myths.pdf", model_type="gpt")

Welcome to the Unified RAG Demo!
Using GPT model for question answering.
🔧 Initializing RAG system...
📚 Loading embedding model...
📄 Reading PDF: greek_myths.pdf
✂️  Split into 301 chunks
🧮 Creating embeddings for chunks...
🧠 Loading FLAN-T5 model...
✅ Setup complete! Ready for questions


[32mEnter your question (or 'quit' to exit): [0mWho is the god of thunder?

❓ Question: Who is the god of thunder?

1️⃣  Finding relevant context...

Chunk 1 similarity: 0.45
Preview: made up his mind that he would  destroy them all. So he shut up the North Wind
in the  caves of Æolus, and sent forth the South Wind, for the  South W...

Chunk 2 similarity: 0.44
Preview: called the gods together and  began a terrible battle. !T_h  e Titans tore up
enormous  boulders and cast them at the gods, while Jupiter hurled  his ...

2️⃣  Preparing prompt...

3️⃣  Generating answer...

📝 Answer: Jupiter. He has no knowledge of any gods or angels. I think it is clear that Jupiter is god of thunder.

📖 Sources u