<a href="https://colab.research.google.com/github/jcmachicaocuf/codigos_CUF_LLM_NLP/blob/main/U3__RAG_concept.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from typing import List, Dict, Tuple
from dataclasses import dataclass
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

## Procesos de Almacenamiento de vectores

* Carga de documentos
* Búsqueda de similaridades vectoriales

In [2]:
@dataclass
class Document:
    """Represents a document in the knowledge base"""
    id: str
    content: str
    embedding: np.ndarray = None

class VectorStore:
    """Simple vector store for document embeddings"""
    def __init__(self):
        self.documents: List[Document] = []
        self.embeddings: np.ndarray = None

    def add_documents(self, documents: List[Document]):
        self.documents.extend(documents)
        if self.embeddings is None:
            self.embeddings = documents[0].embedding.reshape(1, -1)
            for doc in documents[1:]:
                self.embeddings = np.vstack([self.embeddings, doc.embedding])
        else:
            for doc in documents:
                self.embeddings = np.vstack([self.embeddings, doc.embedding])

    def similarity_search(self, query_embedding: np.ndarray, k: int = 3) -> List[Tuple[Document, float]]:
        """Find k most similar documents to the query"""
        similarities = cosine_similarity(query_embedding.reshape(1, -1), self.embeddings)[0]
        most_similar_indices = np.argsort(similarities)[::-1][:k]

        return [(self.documents[idx], similarities[idx]) for idx in most_similar_indices]

## Procesos de RAG

* Carga de base de conocimiento (definiciones base)
* Rescate
* Generación de respuestas

In [3]:
class SimpleRAG:
    """Simplified RAG implementation"""
    def __init__(self, embedding_model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
        self.embedding_model = SentenceTransformer(embedding_model_name)
        self.vector_store = VectorStore()

    def add_to_knowledge_base(self, texts: List[str], ids: List[str] = None):
        """Add documents to the knowledge base"""
        if ids is None:
            ids = [f"doc_{i}" for i in range(len(texts))]

        # Generate embeddings for all texts
        embeddings = self.embedding_model.encode(texts)

        # Create Document objects and add to vector store
        documents = [
            Document(id=id, content=text, embedding=embedding)
            for id, text, embedding in zip(ids, texts, embeddings)
        ]
        self.vector_store.add_documents(documents)

    def retrieve(self, query: str, k: int = 3) -> List[Tuple[str, float]]:
        """Retrieve relevant documents for the query"""
        # Generate query embedding
        query_embedding = self.embedding_model.encode(query)

        # Find similar documents
        similar_docs = self.vector_store.similarity_search(query_embedding, k=k)

        return [(doc.content, score) for doc, score in similar_docs]

    def generate_response(self, query: str, retrieved_docs: List[Tuple[str, float]]) -> str:
        """
        Simulate response generation (in practice, you would use an actual LLM here)
        This is a placeholder that simply formats the retrieved documents
        """
        context = "\n".join([
            f"Document (similarity: {score:.3f}): {content}"
            for content, score in retrieved_docs
        ])

        response = (
            f"Query: {query}\n\n"
            f"Retrieved Context:\n{context}\n\n"
            "Based on the retrieved documents, here's a simulated response..."
        )
        return response

In [4]:
# Example usage
# Initialize RAG system
rag = SimpleRAG()

# Add some sample documents to the knowledge base
sample_docs = [
    "The Python programming language was created by Guido van Rossum.",
    "Neural networks are a fundamental component of deep learning.",
    "RAG (Retrieval-Augmented Generation) combines search and language models.",
    "BERT is a transformer-based machine learning model for NLP.",
]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
print("Adding documents to knowledge base...")
rag.add_to_knowledge_base(sample_docs)

# Test retrieval
query = "How does RAG work?"
print(f"\nQuery: {query}")

Adding documents to knowledge base...

Query: How does RAG work?


In [6]:
retrieved_docs = rag.retrieve(query, k=2)
response = rag.generate_response(query, retrieved_docs)
print("\nGenerated Response:")
print(response)


Generated Response:
Query: How does RAG work?

Retrieved Context:
Document (similarity: 0.365): RAG (Retrieval-Augmented Generation) combines search and language models.
Document (similarity: 0.085): BERT is a transformer-based machine learning model for NLP.

Based on the retrieved documents, here's a simulated response...
