In [9]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.documents import Document
from langchain_core.messages import AIMessage


In [2]:
loader = TextLoader("my_notes.txt")
docs=loader.load()
splitter=CharacterTextSplitter(chunk_size=500,chunk_overlap=50)
chunks=splitter.split_documents(docs)

In [3]:
embeddings = OpenAIEmbeddings(openai_api_key="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")
vectorstore=FAISS.from_documents(chunks,embeddings)

In [4]:
retriever = vectorstore.as_retriever()
vectorstore.save_local("selfrag_db")
llm=ChatOpenAI(temperature=0,openai_api_key="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")

In [10]:
#Creating the RAG chain using LCEL (LangChain Expression Language - modern approach)
template = """Answer the question based only on the following context:{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

qa_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [24]:
def corrective_rag(question, top_k=4):
    """
    Runs a first-pass LLM answer, and if low confidence, runs a RAG correction:
      - builds a small FAISS DB from docs
      - retrieves top_k docs
      - composes a prompt with the retrieved context and asks the LLM to answer only using that context
    Returns a string formatted as:
      original answer: <first_guess>
      corrected using documents: <correction or None>
    """

    # ---- FIRST LLM ATTEMPT ----
    first_guess = llm.invoke(f"Q: {question}\nA:")  # or llm.predict depending on your LLM wrapper
    first_text = first_guess.content if isinstance(first_guess, AIMessage) else str(first_guess)

    # ---- SIMPLE CONFIDENCE CHECK ----
    low_conf = (
        "i am not sure" in first_text.lower()
        or "sorry" in first_text.lower()
        or len(first_text.strip()) < 30
    )

    # ---- BUILD A TEMP FAISS DB (demo) ----
    docs = [Document(page_content="The largest cat is the liger.")]
    db = FAISS.from_documents(docs, embeddings)
    retriever = db.as_retriever(search_kwargs={"k": top_k})

    # If original answer is confident, return original and indicate no correction needed
    if not low_conf:
        return (
            f"original answer: {first_text}\n"
            f"corrected using documents: None (no correction needed)"
        )

    # ---- LOW CONFIDENCE -> RETRIEVE AND FORCE LLM TO USE CONTEXT ----
    retrieved_docs = retriever.get_relevant_documents(question)
    if not retrieved_docs:
        corrected_text = "NO_DOCUMENTS_RETRIEVED"
        return f"original answer: {first_text}\ncorrected using documents: {corrected_text}"

    # Compose context by concatenating retrieved documents (trim if too long)
    # You can implement token-aware truncation if desired.
    context = "\n\n---\n\n".join(d.page_content for d in retrieved_docs)

    # Build a strict prompt that tells the model to use only the context
    rag_prompt = (
        "You are given context extracted from documents. Use ONLY that context to answer the question.\n\n"
        "CONTEXT:\n"
        f"{context}\n\n"
        "QUESTION:\n"
        f"{question}\n\n"
        "Answer concisely using only the context above. If the answer is not present in the context, say 'I don't know from the provided documents.'\n"
    )

    # Call the LLM with the RAG prompt
    corrected = llm.invoke(rag_prompt)
    corrected_text = corrected.content if isinstance(corrected, AIMessage) else str(corrected)

    # Return formatted output
    return (
        f"original answer: {first_text}\n"
        f"corrected using documents: {corrected_text}"
    )


# ---- TEST (example) ----
if __name__ == "__main__":
    # ensure llm and embeddings are initialized above before running
    response = corrective_rag("Which is the largest cat?")
    print("\nFinal Answer:\n", response)



Final Answer:
 original answer: The largest cat is the Siberian tiger, also known as the Amur tiger, which can weigh up to 660 pounds and grow up to 11 feet in length.
corrected using documents: None (no correction needed)
