In [1]:
!pip install -U langgraph langchain langchain-community langchain-huggingface chromadb duckduckgo-search pydantic




In [55]:
import os
from typing import List, TypedDict

from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_community.vectorstores import Chroma
from langchain_community.tools import DuckDuckGoSearchResults

from langchain_huggingface import HuggingFaceEndpoint
from langgraph.graph import StateGraph, START, END


In [None]:
# Hugging Face token


HF_TOKEN = os.environ["HF_TOKEN"]

# Model (text-generation ONLY)
HF_REPO_ID = "mistralai/Mistral-7B-Instruct-v0.2"

# Documents folder
DOCS_DIR = r"C:\Users\PMLS\Downloads\langgraph\lmkr_data"

LMKR_SITE = "https://lmkr.com/"


In [57]:
LLM = HuggingFaceEndpoint(
    repo_id=HF_REPO_ID,
    task="text-generation",
    provider="hf-inference",
    huggingfacehub_api_token=HF_TOKEN,
    max_new_tokens=600,
    temperature=0.2,
    top_p=0.9,
)


In [58]:
loader = DirectoryLoader(
    DOCS_DIR,
    glob="**/*.txt",
    loader_cls=TextLoader,
    loader_kwargs={"encoding": "utf-8"},
)

docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=120)
splits = splitter.split_documents(docs)

vectorstore = Chroma.from_documents(
    splits,
    embedding=None,  # uses default HF embeddings internally
    persist_directory="./chroma_lmkr"
)

retriever = vectorstore.as_retriever(search_kwargs={"k": 6})

len(splits)


C:\Users\PMLS\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:57<00:00, 1.45MiB/s]


320

In [59]:
ddg = DuckDuckGoSearchResults(output_format="list")

def web_search_lmkr(question: str, k: int = 5) -> List[Document]:
    q = f"site:{LMKR_SITE} {question}"
    results = ddg.invoke(q)
    docs = []
    for r in results[:k]:
        docs.append(
            Document(
                page_content=f"{r.get('title','')}\n{r.get('snippet','')}",
                metadata={"source": r.get("link","")}
            )
        )
    return docs


In [60]:
router_prompt = PromptTemplate.from_template(
    "Decide how to answer the question.\n\n"
    "Choose ONE label:\n"
    "- no_retrieval\n"
    "- web_search\n"
    "- rag_single\n"
    "- rag_iterative\n\n"
    "Rules:\n"
    "- no_retrieval: greetings or common knowledge\n"
    "- web_search: recent or website-only info\n"
    "- rag_single: answerable from local docs\n"
    "- rag_iterative: ambiguous, may need rewrite\n\n"
    "Question:\n{question}\n\n"
    "Answer with ONLY the label."
)

router_chain = router_prompt | LLM | StrOutputParser()


In [61]:
rel_prompt = PromptTemplate.from_template(
    "Question:\n{question}\n\n"
    "Document:\n{document}\n\n"
    "Is the document relevant? Answer ONLY yes or no."
)
rel_chain = rel_prompt | LLM | StrOutputParser()


In [62]:
ground_prompt = PromptTemplate.from_template(
    "Documents:\n{documents}\n\n"
    "Answer:\n{generation}\n\n"
    "Is the answer fully supported by the documents? "
    "Answer ONLY yes or no."
)
ground_chain = ground_prompt | LLM | StrOutputParser()


In [63]:
use_prompt = PromptTemplate.from_template(
    "Question:\n{question}\n\n"
    "Answer:\n{generation}\n\n"
    "Does the answer address the question? "
    "Answer ONLY yes or no."
)
use_chain = use_prompt | LLM | StrOutputParser()


In [64]:
rewrite_prompt = PromptTemplate.from_template(
    "Rewrite the question to improve document retrieval.\n\n"
    "Original question:\n{question}\n\n"
    "Rewritten question:"
)
rewrite_chain = rewrite_prompt | LLM | StrOutputParser()


In [65]:
rag_prompt = PromptTemplate.from_template(
    "Use ONLY the context to answer.\n"
    "If the answer is not present, say you don't know.\n\n"
    "Context:\n{context}\n\n"
    "Question:\n{question}"
)
rag_chain = rag_prompt | LLM | StrOutputParser()


In [66]:
def route_label(q: str) -> str:
    out = router_chain.invoke({"question": q}).strip().lower()
    return out if out in {"no_retrieval","web_search","rag_single","rag_iterative"} else "rag_iterative"

def is_relevant(q: str, d: Document) -> bool:
    return rel_chain.invoke({"question": q, "document": d.page_content}).lower().startswith("y")

def is_grounded(docs, gen) -> bool:
    joined = "\n\n".join(d.page_content for d in docs)
    return ground_chain.invoke({"documents": joined, "generation": gen}).lower().startswith("y")

def is_useful(q, gen) -> bool:
    return use_chain.invoke({"question": q, "generation": gen}).lower().startswith("y")

def rewrite(q: str) -> str:
    return rewrite_chain.invoke({"question": q}).strip()

def format_docs(docs):
    return "\n\n---\n\n".join(d.page_content for d in docs)

def generate(q, docs):
    return rag_chain.invoke({"question": q, "context": format_docs(docs)}).strip()


In [67]:
class GraphState(TypedDict):
    question: str
    route: str
    documents: List[Document]
    generation: str
    retries: int


In [68]:
MAX_RETRIES = 2

def node_route(state):
    return {**state, "route": route_label(state["question"])}

def node_no_retrieval(state):
    gen = LLM.invoke(state["question"])
    return {**state, "generation": gen}

def node_retrieve(state):
    return {**state, "documents": retriever.invoke(state["question"])}

def node_grade_docs(state):
    q = state["question"]
    return {**state, "documents": [d for d in state["documents"] if is_relevant(q, d)]}

def node_web(state):
    return {**state, "documents": web_search_lmkr(state["question"])}

def node_rewrite(state):
    return {
        **state,
        "question": rewrite(state["question"]),
        "retries": state["retries"] + 1
    }

def node_generate(state):
    return {**state, "generation": generate(state["question"], state["documents"])}


In [69]:
def from_route(state):
    return state["route"]

def decide_after_docs(state):
    if not state["documents"] and state["route"] == "rag_iterative" and state["retries"] < MAX_RETRIES:
        return "rewrite"
    return "generate"

def grade_generation(state):
    if state["documents"] and not is_grounded(state["documents"], state["generation"]):
        return "generate"
    if not is_useful(state["question"], state["generation"]) and state["retries"] < MAX_RETRIES:
        return "rewrite"
    return "end"


In [70]:
wf = StateGraph(GraphState)

wf.add_node("route", node_route)
wf.add_node("no_retrieval", node_no_retrieval)
wf.add_node("retrieve", node_retrieve)
wf.add_node("grade_docs", node_grade_docs)
wf.add_node("web", node_web)
wf.add_node("rewrite", node_rewrite)
wf.add_node("generate", node_generate)

wf.add_edge(START, "route")

wf.add_conditional_edges("route", from_route, {
    "no_retrieval": "no_retrieval",
    "web_search": "web",
    "rag_single": "retrieve",
    "rag_iterative": "retrieve",
})

wf.add_edge("retrieve", "grade_docs")

wf.add_conditional_edges("grade_docs", decide_after_docs, {
    "rewrite": "rewrite",
    "generate": "generate",
})

wf.add_edge("rewrite", "retrieve")
wf.add_edge("web", "generate")

wf.add_conditional_edges("generate", grade_generation, {
    "generate": "generate",
    "rewrite": "rewrite",
    "end": END,
})

wf.add_edge("no_retrieval", END)

app = wf.compile()


In [71]:
def ask(q):
    out = app.invoke({
        "question": q,
        "route": "",
        "documents": [],
        "generation": "",
        "retries": 0
    })
    print(out["generation"])

ask("When was LMKR founded?")
ask("What are the recent announcements on LMKR?")
ask("Hi, how are you?")


HfHubHTTPError: 404 Client Error: Not Found for url: https://router.huggingface.co/hf-inference/models/mistralai/Mistral-7B-Instruct-v0.2 (Request ID: Root=1-694053cd-6d936cb07f17b43811475ae9;9b53d6a2-db43-4c9e-acc4-40afd3efe8ce)

In [73]:
from IPython.display import Markdown, display

mermaid_src = app.get_graph().draw_mermaid()

display(Markdown(f"""
```mermaid
{mermaid_src}
"""))


```mermaid
---
config:
  flowchart:
    curve: linear
---
graph TD;
	__start__([<p>__start__</p>]):::first
	route(route)
	no_retrieval(no_retrieval)
	retrieve(retrieve)
	grade_docs(grade_docs)
	web(web)
	rewrite(rewrite)
	generate(generate)
	__end__([<p>__end__</p>]):::last
	__start__ --> route;
	generate -. &nbsp;end&nbsp; .-> __end__;
	generate -.-> rewrite;
	grade_docs -.-> generate;
	grade_docs -.-> rewrite;
	retrieve --> grade_docs;
	rewrite --> retrieve;
	route -.-> no_retrieval;
	route -. &nbsp;rag_iterative&nbsp; .-> retrieve;
	route -. &nbsp;web_search&nbsp; .-> web;
	web --> generate;
	no_retrieval --> __end__;
	generate -.-> generate;
	classDef default fill:#f2f0ff,line-height:1.2
	classDef first fill-opacity:0
	classDef last fill:#bfb6fc

