In [9]:
# ask_agent.py

from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import os
import sys
from dotenv import load_dotenv

os.environ["TOKENIZERS_PARALLELISM"] = "false"

# === CONFIGURACIÓN ===
INDEX_FAISS = "faiss_tspec"
MODELO_EMBEDDINGS = "sentence-transformers/all-MiniLM-L6-v2"

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

if not OPENAI_API_KEY:
    print("❌ ERROR: No se encontró OPENAI_API_KEY en el archivo .env.")
    sys.exit(1)

def cargar_faiss():
    print("📂 Cargando FAISS existente...")
    embeddings = HuggingFaceEmbeddings(model_name=MODELO_EMBEDDINGS)
    db = FAISS.load_local(INDEX_FAISS, embeddings, allow_dangerous_deserialization=True)
    return db

def crear_agente(db):
    print("🤖 Inicializando agente...")
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
    llm = ChatOpenAI(model_name="gpt-4", temperature=0)
    retriever = db.as_retriever(search_type="mmr", search_kwargs={"k":5})
    chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    return chain

def hacer_pregunta(chain):
    print("\n💬 Agente listo. Escribe tu pregunta técnica (o 'salir'):")
    while True:
        query = input("🧠> ").strip()
        if query.lower() == "salir":
            break

        # Ejecutar la cadena
        resultado = chain.invoke({"query": query})
        context = "\n\n".join([doc.page_content[:5000] for doc in resultado['source_documents'][:5]])

        # Generar el prompt real que fue usado
        template = """Usa el siguiente contexto técnico para responder la pregunta.

Contexto:
---------
{context}

Pregunta:
---------
{question}

Respuesta:"""

        prompt = PromptTemplate.from_template(template)
        full_prompt = prompt.format(context=context, question=query)

        print("\n📄 Prompt enviado al LLM:\n")
        print(full_prompt)
        print("\n📘 Respuesta:\n")
        print(resultado['result'])
        print()

In [10]:
db = cargar_faiss()

📂 Cargando FAISS existente...


In [11]:
agente = crear_agente(db)

🤖 Inicializando agente...


In [12]:
docs = agente.retriever.invoke("what is an MME?")
for i, doc in enumerate(docs[:3]):
    print(f"\n--- Documento {i+1} ---\n{doc.page_content[:1000]}")


--- Documento 1 ---
\- means to handle different versions of application part
implementations and protocol errors (error indication).

#### 19.2.1.9 MME Load balancing Function

MME Load balancing is the function which achieves load-balanced MMEs
with respect to their processing capacity within a pool area during
system operation. The means to load-balance MMEs is to distribute UEs
newly entering the pool to different MMEs in the MME pool. In addition
the MME load balancing function is able to achieve equally loaded MMEs
within a pool area after the introduction of a new MME and after the
removal of a MME from the network.

--- Documento 2 ---
|           |                                                       |   |   |   |   |   |   |   |
| Octet m+2 | \-                                                    |   |   |   |   |   |   |   |
|           |                                                       |   |   |   |   |   |   |   |
| \-        | Rightmost character of the last label o