In [None]:
!pip install docling langchain-text-splitters chromadb sentence-transformers rank-bm25 openai gradio langgraph

In [None]:
import os
import hashlib
from typing import TypedDict, List, Dict
from docling.document_converter import DocumentConverter
from langchain_text_splitters import MarkdownHeaderTextSplitter
import chromadb
from chromadb.utils import embedding_functions
from rank_bm25 import BM25Okapi
from openai import OpenAI
import gradio as gr
from langgraph.graph import StateGraph, END

# LLM Client
client = OpenAI(
    api_key="R9BJEe5Zj5a4f3uPxtzzQaU-e6CedJfd6duFJnNgFVU",
    base_url="https://api.poe.com/v1",
)

def llm_call(system_prompt: str, user_prompt: str, model: str = "GPT-4o-mini") -> str:
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
    )
    return response.choices[0].message.content

# Document Processing
def get_cache_key(file_path: str) -> str:
    with open(file_path, 'rb') as f:
        return hashlib.md5(f.read()).hexdigest()

def process_document(file_path: str) -> List[str]:
    if os.path.getsize(file_path) > 10 * 1024 * 1024:  # 10 MB limit
        raise ValueError(f"File {file_path} is too large (max 10MB).")

    ext = os.path.splitext(file_path)[1].lower()
    if ext in ['.txt', '.md']:
        with open(file_path, 'r', encoding='utf-8') as f:
            md_content = f.read()
    else:
        converter = DocumentConverter()
        result = converter.convert(file_path)
        md_content = result.document.export_to_markdown()

    # Split into chunks based on headers
    headers_to_split_on = [
        ("#", "Header 1"),
        ("##", "Header 2"),
        ("###", "Header 3"),
    ]
    splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
    docs = splitter.split_text(md_content)
    chunks = [doc.page_content for doc in docs]
    return chunks

# Hybrid Retriever
class HybridRetriever:
    def __init__(self, chunks: List[str]):
        self.chunks = chunks
        tokenized_chunks = [chunk.split(" ") for chunk in chunks]
        self.bm25 = BM25Okapi(tokenized_chunks)
        self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
        self.chroma_client = chromadb.Client()
        self.collection = self.chroma_client.get_or_create_collection(name="doc_chunks")
        self.collection.add(
            documents=chunks,
            ids=[str(i) for i in range(len(chunks))],
            embeddings=self.embedding_function(chunks),  # Precompute embeddings
        )

    def retrieve(self, query: str, top_k: int = 5) -> List[str]:
        # BM25 retrieval
        bm25_scores = self.bm25.get_scores(query.split(" "))
        bm25_indices = sorted(range(len(bm25_scores)), key=lambda i: bm25_scores[i], reverse=True)[:top_k]

        # Vector retrieval
        query_embedding = self.embedding_function([query])[0]
        vector_results = self.collection.query(
            query_embeddings=[query_embedding],
            n_results=top_k,
        )
        vector_indices = [int(id) for id in vector_results['ids'][0]]

        # Combine and deduplicate
        combined_indices = list(set(bm25_indices + vector_indices))
        retrieved_chunks = [self.chunks[i] for i in combined_indices]
        return retrieved_chunks

# LangGraph State
class State(TypedDict):
    question: str
    documents: List[str]
    relevance: str
    answer: str
    verification: str

# Nodes
def retrieve_documents(state: State) -> Dict:
    return {"documents": retriever.retrieve(state["question"])}

def check_relevance(state: State) -> Dict:
    context = "\n\n".join(state["documents"])
    system_prompt = "You are a relevance checker. Classify if the provided context can answer the question."
    user_prompt = f"Question: {state['question']}\nContext: {context}\nOutput ONLY one of: CAN_ANSWER (fully answers), PARTIAL (partially answers), NO_MATCH (cannot answer)."
    relevance = llm_call(system_prompt, user_prompt)
    return {"relevance": relevance.strip()}

def generate_answer(state: State) -> Dict:
    context = "\n\n".join(state["documents"])
    system_prompt = "You are a research agent. Provide an accurate answer based ONLY on the provided context. Do not add external knowledge."
    user_prompt = f"Question: {state['question']}\nContext: {context}\nAnswer the question concisely."
    answer = llm_call(system_prompt, user_prompt)
    return {"answer": answer}

def verify_answer(state: State) -> Dict:
    context = "\n\n".join(state["documents"])
    system_prompt = "You are a verification agent. Check if the answer is fully supported by the context, identify any unsupported claims, contradictions, or irrelevance."
    user_prompt = f"Question: {state['question']}\nAnswer: {state['answer']}\nContext: {context}\nOutput: 'Supported' or 'Unsupported', followed by a brief explanation."
    verification = llm_call(system_prompt, user_prompt)
    return {"verification": verification}

# Conditional Edge
def relevance_condition(state: State):
    rel = state["relevance"].upper()
    if rel in ["CAN_ANSWER", "PARTIAL"]:
        return "generate_answer"
    return "end"

# Build Graph
graph = StateGraph(State)
graph.add_node("retrieve", retrieve_documents)
graph.add_node("check_relevance", check_relevance)
graph.add_node("generate_answer", generate_answer)
graph.add_node("verify", verify_answer)

graph.set_entry_point("retrieve")
graph.add_edge("retrieve", "check_relevance")
graph.add_conditional_edges("check_relevance", relevance_condition, {"generate_answer": "generate_answer", "end": END})
graph.add_edge("generate_answer", "verify")
graph.add_edge("verify", END)

app = graph.compile()

# Global variables
cache: Dict[str, List[str]] = {}
retriever: HybridRetriever = None

# Gradio Interface Function
def qa_interface(files: List[str], question: str):
    global retriever
    yield gr.update(value="Processing...")
    all_chunks: List[str] = []
    for file_path in files:
        cache_key = get_cache_key(file_path)
        if cache_key not in cache:
            cache[cache_key] = process_document(file_path)
        all_chunks.extend(cache[cache_key])

    retriever = HybridRetriever(all_chunks)  # Rebuild retriever with current chunks

    initial_state = {"question": question}
    result = app.invoke(initial_state)

    if "answer" not in result:
        yield "No relevant information found in the documents."
    else:
        answer = result["answer"]
        verification = result.get("verification", "Verification not performed.")
        yield f"**Answer:**\n{answer}\n\n**Verification Report:**\n{verification}"

# Gradio UI
iface = gr.Interface(
    fn=qa_interface,
    flagging_mode="never",
    inputs=[
        gr.File(file_count="multiple", label="Upload Documents (PDF, DOCX, TXT, MD)", file_types=[".pdf", ".docx", ".txt", ".md"]),
        gr.Textbox(label="Ask a Question"),
    ],
    outputs=gr.Markdown(),
    title="Document-Based QA System",
    description="Upload documents and ask questions. Answers are generated and verified using AI."
)

if __name__ == "__main__":
    iface.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ebc23571823f5ffb2d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
