In [None]:
import os
from fastapi import FastAPI
from pydantic import BaseModel
from langchain_pinecone import PineconeVectorStore
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from pinecone import Pinecone, ServerlessSpec

# ---------------------------------------------
# Load environment variables
# ---------------------------------------------
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Validate
if not PINECONE_API_KEY or not PINECONE_INDEX_NAME:
    raise ValueError("Missing Pinecone environment variables.")
if not OPENAI_API_KEY:
    raise ValueError("Missing OpenAI API key.")

app = FastAPI()

# ---------------------------------------------
#1. Load embedding model
# ---------------------------------------------
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large",
    openai_api_key=OPENAI_API_KEY
)

# ---------------------------------------------
#2. Create vector store
# ---------------------------------------------
#2A. Connect Pinecone
pc = Pinecone(api_key= PINECONE_API_KEY)

index_name = PINECONE_INDEX_NAME

index = pc.Index(index_name)

#2B. LangChain wrapper
vector_store = PineconeVectorStore(embedding=embeddings, index=index)


# ---------------------------------------------
# 3. Select Chat LLM
# ---------------------------------------------
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, openai_api_key= OPENAI_API_KEY)


# ---------------------------------------------
# Request model - define an HTTP endpoint that your website frontend can call.
# ---------------------------------------------
class Query(BaseModel):
    question: str


# ---------------------------------------------
# API endpoint
# ---------------------------------------------
@app.post("/api/ask")
async def ask(query: Query):

    # 1. Retrieve most relevant documents
    docs = vector_store.similarity_search(query.question, k=5)
    context = "\n\n".join(doc.page_content for doc in docs)

    # 2. Build prompt
    prompt = f"""
    You are a factual assistant. Answer ONLY using the provided context.
    If the answer is not found in the context, say:
    "I could not find relevant information in the knowledge base."

    CONTEXT:
    {context}

    QUESTION:
    {query.question}
    """

    # 3. Query the LLM
    response = llm.invoke(prompt)

    return {"answer": response.content}
