Human in Loop

In [None]:
# main.py
from orchestrator.langgraph_flow import run_rag_workflow

if __name__ == "__main__":
    print("Human-in-the-Loop RAG System Ready. Type 'exit' to quit.")
    while True:
        query = input("\nUser: ")
        if query.lower() in ["exit", "quit"]:
            break
        answer = run_rag_workflow(query)
        print("\nAssistant:", answer)

# config.py
MODEL_NAME = "mistral"
EMBEDDING_MODEL = "nomic-embed-text"
VECTOR_DB_PATH = "db"
SOURCE_DOC = "data/source_docs/ai_education_article.pdf"

# requirements.txt
langchain
langchain-community
chromadb
ollama
reportlab
rank_bm25
langgraph

# llm/generate.py
from langchain_community.llms import Ollama
from config import MODEL_NAME

def get_llm():
    return Ollama(model=MODEL_NAME, temperature=0.2)

# llm/react_prompt.py
from langchain.prompts import PromptTemplate

react_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are an intelligent assistant using the ReAct (Reasoning + Acting) technique.
Break down the user query into reasoning steps and retrieve relevant information accordingly.

Question: {question}
Relevant Context:
{context}

First, list your reasoning steps clearly.
Then, provide a final answer based on those steps and the retrieved context.

Reasoning Steps:
1.
"""
)

# embeddings/embedder.py
from langchain.embeddings import OllamaEmbeddings
from config import EMBEDDING_MODEL

def get_embedding_model():
    return OllamaEmbeddings(model=EMBEDDING_MODEL)

# vectorstore/db_handler.py
from langchain_community.vectorstores import Chroma
from embeddings.embedder import get_embedding_model
from config import VECTOR_DB_PATH

def get_vectorstore(documents):
    embedding_model = get_embedding_model()
    return Chroma.from_documents(documents, embedding=embedding_model, persist_directory=VECTOR_DB_PATH)

# vectorstore/metadata_schema.py
def add_metadata_to_chunks(chunks, source_name):
    for chunk in chunks:
        if not chunk.metadata:
            chunk.metadata = {}
        chunk.metadata["source"] = source_name
    return chunks

# retriever/hybrid_search.py
from langchain.retrievers import BM25Retriever, EnsembleRetriever

def get_hybrid_retriever(chunks, vectorstore):
    bm25_retriever = BM25Retriever.from_documents(chunks)
    bm25_retriever.k = 4
    vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
    return EnsembleRetriever(retrievers=[bm25_retriever, vector_retriever], weights=[0.5, 0.5])

# parser/pdf_parser.py
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from config import SOURCE_DOC
from vectorstore.metadata_schema import add_metadata_to_chunks
import os

def load_and_chunk_pdf():
    loader = PyPDFLoader(SOURCE_DOC)
    documents = loader.load()
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = splitter.split_documents(documents)
    return add_metadata_to_chunks(chunks, os.path.basename(SOURCE_DOC))

# memory/conversation_buffer.py
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    output_key="answer"
)

# grader/doc_relevance.py
def is_relevant(doc, question):
    return question.lower() in doc.page_content.lower()

# grader/hallucination.py
def is_grounded(answer, documents):
    return any(doc.page_content.lower() in answer.lower() for doc in documents)

# grader/human_feedback.py
def human_approval_required():
    return input("\nApprove the answer? (yes/no): ").strip().lower() != "yes"

# orchestrator/langgraph_flow.py
from parser.pdf_parser import load_and_chunk_pdf
from vectorstore.db_handler import get_vectorstore
from retriever.hybrid_search import get_hybrid_retriever
from llm.generate import get_llm
from llm.react_prompt import react_prompt
from memory.conversation_buffer import memory
from utils.cite_sources import format_sources
from grader.human_feedback import human_approval_required

from langchain.chains import ConversationalRetrievalChain

def run_rag_workflow(question):
    chunks = load_and_chunk_pdf()
    vectorstore = get_vectorstore(chunks)
    retriever = get_hybrid_retriever(chunks, vectorstore)
    llm = get_llm()
    rag_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        memory=memory,
        return_source_documents=True,
        combine_docs_chain_kwargs={"prompt": react_prompt},
        output_key="answer"
    )

    retries = 3
    for attempt in range(retries):
        result = rag_chain.invoke({"question": question})
        sources = format_sources(result.get("source_documents", []))

        print("\nSources:")
        for src in sources:
            print("-", src)

        print("\nGenerated Answer:")
        print(result["answer"])

        if not human_approval_required():
            return result["answer"]

        print("\nRetrying with same question...")

    return "Answer rejected after multiple attempts."

# utils/cite_sources.py
def format_sources(source_documents):
    return [doc.metadata.get("source", "[unknown]") for doc in source_documents]


Section 2: Multi agent Human in loop

In [None]:
# main.py
from orchestrator.langgraph_flow import graph

if __name__ == "__main__":
    print("Multi-Agent Human-in-the-Loop RAG System Ready. Type 'exit' to quit.")
    while True:
        query = input("\nUser: ")
        if query.lower() in ["exit", "quit"]:
            break
        result = graph.invoke({"question": query})
        print("\nAssistant:", result.get("answer", "[No answer generated]"))

# config.py
MODEL_NAME = "mistral"
EMBEDDING_MODEL = "nomic-embed-text"
VECTOR_DB_PATH = "db"
SOURCE_DOC = "data/source_docs/ai_education_article.pdf"

# requirements.txt
langchain
langchain-community
chromadb
ollama
reportlab
rank_bm25
langgraph

# llm/generate.py
from langchain_community.llms import Ollama
from config import MODEL_NAME

def get_llm():
    return Ollama(model=MODEL_NAME, temperature=0.2)

# llm/react_prompt.py
from langchain.prompts import PromptTemplate

react_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are an intelligent assistant using the ReAct (Reasoning + Acting) technique.
Break down the user query into reasoning steps and retrieve relevant information accordingly.

Question: {question}
Relevant Context:
{context}

First, list your reasoning steps clearly.
Then, provide a final answer based on those steps and the retrieved context.

Reasoning Steps:
1.
"""
)

# embeddings/embedder.py
from langchain.embeddings import OllamaEmbeddings
from config import EMBEDDING_MODEL

def get_embedding_model():
    return OllamaEmbeddings(model=EMBEDDING_MODEL)

# vectorstore/db_handler.py
from langchain_community.vectorstores import Chroma
from embeddings.embedder import get_embedding_model
from config import VECTOR_DB_PATH

def get_vectorstore(documents):
    embedding_model = get_embedding_model()
    return Chroma.from_documents(documents, embedding=embedding_model, persist_directory=VECTOR_DB_PATH)

# vectorstore/metadata_schema.py
def add_metadata_to_chunks(chunks, source_name):
    for chunk in chunks:
        if not chunk.metadata:
            chunk.metadata = {}
        chunk.metadata["source"] = source_name
    return chunks

# retriever/hybrid_search.py
from langchain.retrievers import BM25Retriever, EnsembleRetriever

def get_hybrid_retriever(chunks, vectorstore):
    bm25_retriever = BM25Retriever.from_documents(chunks)
    bm25_retriever.k = 4
    vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
    return EnsembleRetriever(retrievers=[bm25_retriever, vector_retriever], weights=[0.5, 0.5])

# parser/pdf_parser.py
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from config import SOURCE_DOC
from vectorstore.metadata_schema import add_metadata_to_chunks
import os

def load_and_chunk_pdf():
    loader = PyPDFLoader(SOURCE_DOC)
    documents = loader.load()
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = splitter.split_documents(documents)
    return add_metadata_to_chunks(chunks, os.path.basename(SOURCE_DOC))

# memory/conversation_buffer.py
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    output_key="answer"
)

# grader/doc_relevance.py
def is_relevant(doc, question):
    return question.lower() in doc.page_content.lower()

# grader/hallucination.py
def is_grounded(answer, documents):
    return any(doc.page_content.lower() in answer.lower() for doc in documents)

# grader/human_feedback.py
def human_approval_required():
    return input("
Approve the answer? (yes/no): ").strip().lower() != "yes"

# utils/cite_sources.py
def format_sources(source_documents):
    return [doc.metadata.get("source", "[unknown]") for doc in source_documents]

# orchestrator/langgraph_flow.py
from parser.pdf_parser import load_and_chunk_pdf
from vectorstore.db_handler import get_vectorstore
from retriever.hybrid_search import get_hybrid_retriever
from llm.generate import get_llm
from llm.react_prompt import react_prompt
from memory.conversation_buffer import memory
from utils.cite_sources import format_sources
from grader.human_feedback import human_approval_required
from langgraph.graph import StateGraph, END
from langchain_core.messages import HumanMessage

from langchain.chains import ConversationalRetrievalChain

# Setup shared components
chunks = load_and_chunk_pdf()
vectorstore = get_vectorstore(chunks)
retriever = get_hybrid_retriever(chunks, vectorstore)
llm = get_llm()

rag_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    return_source_documents=True,
    combine_docs_chain_kwargs={"prompt": react_prompt},
    output_key="answer"
)

# Define LangGraph nodes as agents
def retrieval_agent(state):
    print("\n[Retrieval Agent Invoked]")
    return {"documents": retriever.get_relevant_documents(state["question"])}

def generation_agent(state):
    print("\n[Generation Agent Invoked]")
    result = rag_chain.invoke({"question": state["question"]})
    sources = format_sources(result.get("source_documents", []))
    print("\nSources:")
    for src in sources:
        print("-", src)
    print("\nGenerated Answer:")
    print(result["answer"])
    return {"answer": result["answer"], "source_documents": result.get("source_documents", [])}

def human_feedback_agent(state):
    print("\n[Human Validator Agent Invoked]")
    approved = not human_approval_required()
    return {"approved": approved}

# Define state graph
class GraphState(dict):
    question: str
    documents: list
    answer: str
    approved: bool
    source_documents: list

workflow = StateGraph(GraphState)
workflow.add_node("retrieve", retrieval_agent)
workflow.add_node("generate", generation_agent)
workflow.add_node("validate", human_feedback_agent)

workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "generate")
workflow.add_edge("generate", "validate")
workflow.add_conditional_edges(
    "validate",
    lambda state: "end" if state.get("approved") else "generate",
    {
        "end": END,
        "generate": "generate"
    }
)

graph = workflow.compile()
