# RAG con Qdrant + OpenAI

### Requisitos de instalación

Antes de ejecutar el proyecto, asegurate de tener instaladas las siguientes librerías de Python:

```bash
pip install \
  fastembed>=0.7.1 \
  ipywidgets>=8.1.7 \
  notebook>=7.4.3 \
  openai>=1.93.0 \
  qdrant-client>=1.14.3
```


###  Descarga y procesamiento de documentos

In [1]:
import requests

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
documents_raw = requests.get(docs_url).json()

# Usamos una lista por comprensión para mayor velocidad y claridad
documents = [
    {**doc, 'course': course['course']}
    for course in documents_raw
    for doc in course['documents']
]

### Construcción de la colección en Qdrant

In [2]:
from qdrant_client import QdrantClient, models

qd_client = QdrantClient("http://localhost", port=6333)
collection_name = "zoomcamp-faq"
embedding_dim = 512
model_handle = "jinaai/jina-embeddings-v2-small-en"

# Solo borra la colección si existe (evita error o espera innecesaria)
if collection_name in [col.name for col in qd_client.get_collections().collections]:
    qd_client.delete_collection(collection_name=collection_name)

# Usa create_collection para crear una collection
qd_client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=embedding_dim,
        distance=models.Distance.COSINE
    ),
    timeout=60
)

# Usa create_payload_index solo si no existe
existing_indexes = qd_client.get_collection(collection_name).payload_schema
if "course" not in existing_indexes:
    qd_client.create_payload_index(
        collection_name=collection_name,
        field_name="course",
        field_schema="keyword"
    )

### Inserción de documentos vectorizados

In [None]:
from qdrant_client.models import Document, PointStruct
from tqdm import tqdm
import concurrent.futures

# Barra de progreso
points = []

def build_point(i, doc):
    text = f"{doc['question']} {doc['text']}"
    vector = Document(text=text, model=model_handle)
    return PointStruct(id=i, vector=vector, payload=doc)

# Generar puntos en paralelo (si hay muchos documentos)
with concurrent.futures.ThreadPoolExecutor() as executor:
    points = list(tqdm(executor.map(lambda x: build_point(*x), enumerate(documents)), total=len(documents)))

# Usar upsert en batches para reducir el uso de memoria y red
BATCH_SIZE = 128

for i in range(0, len(points), BATCH_SIZE):
    batch = points[i:i + BATCH_SIZE]
    qd_client.upsert(collection_name=collection_name, points=batch)

100%|████████████████████████████████████████████████████████████████████████████████████████| 948/948 [00:00<00:00, 393527.33it/s]


### Función de búsqueda vectorial con filtros

In [None]:
def vector_search(
    question,
    course,
    limit,
    model_handle,
    collection_name
):
    print(f"[vector_search] Searching for: '{question}' (course={course})")

    # Construcción explícita del filtro para claridad
    search_filter = models.Filter(
        must=[
            models.FieldCondition(
                key="course",
                match=models.MatchValue(value=course)
            )
        ]
    )

    # Generar vector para la pregunta (puede tardar)
    query_vector = models.Document(text=question, model=model_handle)

    # Consulta a Qdrant
    query_result = qd_client.query_points(
        collection_name=collection_name,
        query=query_vector,
        query_filter=search_filter,
        limit=limit,
        with_payload=True
    )

    return [point.payload for point in query_result.points]

### Construcción del prompt para el LLM

In [None]:
def build_prompt(query, search_results):
    prompt_template = (
        "You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.\n"
        "Use only the facts from the CONTEXT when answering the QUESTION.\n\n"
        "QUESTION: {question}\n\n"
        "CONTEXT:\n{context}"
    )

    # Usar join + list comprehension es más eficiente que concatenar strings
    context = "\n\n".join(
        f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}"
        for doc in search_results
    )

    return prompt_template.format(question=query, context=context)

### Generación de respuestas con un LLM

In [None]:
import time
from openai import OpenAIError  # O el error correcto según tu cliente
from openai import OpenAI

def llm(prompt, model="gpt-4o-mini", max_retries=3):

    openai_client = OpenAI()
    for attempt in range(max_retries):
        try:
            response = openai_client.chat.completions.create(
                model=model,
                messages=[{"role": "user", "content": prompt}]
            )
            return response.choices[0].message.content.strip()
        except OpenAIError as e:
            print(f"[llm] Error (attempt {attempt + 1}/{max_retries}): {e}")
            time.sleep(1.5 * (attempt + 1))

    raise RuntimeError("LLM request failed after multiple attempts.")

### El pipeline completo: función rag

In [None]:
def rag(
    query,
    course='data-engineering-zoomcamp',
    limit=5,
    model='gpt-4o-mini',
    embedding_model='jinaai/jina-embeddings-v2-small-en',
    collection_name='zoomcamp-faq'
):
    search_results = vector_search(
        question=query,
        course=course,
        limit=limit,
        model_handle=embedding_model,
        collection_name=collection_name
    )
    
    if not search_results:
        return "No relevant documents found to answer the question."

    prompt = build_prompt(query, search_results)
    answer = llm(prompt, model=model)
    
    return answer

rag('how do I run kafka?')