In [86]:
import os
import getpass

from langchain_core.vectorstores import VectorStore
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage, SystemMessage
from langchain.chat_models import init_chat_model

from langgraph.graph import END, MessagesState, StateGraph
from langgraph.prebuilt import ToolNode, tools_condition

vector_store = None
llm = None


In [87]:
def get_new_vector_store() -> VectorStore:
    from langchain_openai import OpenAIEmbeddings
    embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
    vector_store = InMemoryVectorStore(embeddings)
    return vector_store

In [89]:
@tool
def load_pdf_into_vector(file_path) -> str:
    """Clear the current PDF data and reset the vector store."""
    global vector_store
    try:
        from langchain_community.document_loaders import PyPDFLoader
        loader = PyPDFLoader(file_path)
        pdf = loader.load()
        _ = vector_store.add_documents(documents=pdf)
        return "Pdf is loaded into vector."
    except Exception as e:
        return f"Error loading PDF: {e}"

@tool
def clear_pdf_vector_store() -> str:
    """Clear the current PDF data and reset the vector store."""
    global vector_store
    vector_store = get_new_vector_store()
    return "Vector store cleared."

@tool(response_format="content_and_artifact")
def query_pdf_with_vector(query: str) -> tuple[str, list]:
    """Retrieve information related to a query from the loaded PDF."""
    global vector_store
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\n" f"Content: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs


In [None]:
# from langchain.tools import Tool
# from langgraph.prebuilt import ToolNode

# tools = ToolNode[
#     Tool(
#         name = "load_pdf",
#         func = lambda input: load_pdf_into_vector(input),
#         description = "Use this tool when the user asks to load or upload a PDF file. Input should be the file url, like 'https://arxiv.org/pdf/2503.00085'."
#     ),
#     Tool(
#         name = "clear_pdf",
#         func = lambda _: clear_pdf_vector_store(),
#         description = "Clear the PDFs or data from vector",
#     ),
#     Tool(
#         name = "query_pdf",
#         func = lambda input: query_pdf_with_vector(input),
#         description = "Query the PDFs with a question",
#     )
# ]


In [91]:
def query_or_respond(state: MessagesState) -> dict:
    """Let GPT decide whether to respond or call a tool."""
    llm_with_tools = llm.bind_tools([load_pdf_into_vector, clear_pdf_vector_store, query_pdf_with_vector])
    response = llm_with_tools.invoke(state["messages"])
    return {"messages": [response]}

def generate(state: MessagesState) -> dict:
    """Generate GPT response using the output of tools."""
    tool_outputs = [msg for msg in reversed(state["messages"]) if isinstance(msg, ToolMessage)]
    docs_content = "\n\n".join(msg.content for msg in reversed(tool_outputs))
    
    system_message = SystemMessage(
        content=(
            "You are an assistant for answering questions based on retrieved context. "
            "If you don't know the answer, say so. Be concise (max 3 sentences).\n\n"
            f"{docs_content}"
        )
    )

    convo = [
        msg for msg in state["messages"]
        if isinstance(msg, (HumanMessage, AIMessage)) and not getattr(msg, "tool_calls", False)
    ]

    final_response = llm.invoke([system_message] + convo)
    return {"messages": [final_response]}

In [None]:
if __name__ == "__main__":

    global llm
    llm = init_chat_model("gpt-4o-mini", model_provider="openai", store=True)

    global vector_store
    vector_store = get_new_vector_store()

    tools = ToolNode([
        load_pdf_into_vector,
        clear_pdf_vector_store,
        query_pdf_with_vector
    ])

    graph = (
        StateGraph(MessagesState)
        .add_node("query_or_respond", query_or_respond)
        .add_node("tools", tools)
        .add_node("generate", generate)
        .set_entry_point("query_or_respond")
        .add_conditional_edges("query_or_respond", tools_condition, {
            "tools": "tools",
            END: END
        })
        .add_edge("tools", "generate")
        .add_edge("generate", END)
        .compile()
    )

    print("🤖 LangGraph PDF Chatbot")
    print("Say something like:")
    print(" - 'Load this PDF: https://example.com/doc.pdf'")
    print(" - 'What does the PDF say about climate change?'")
    print(" - 'Clear everything'")
    print(" - Type 'exit' to quit.\n")

    while True:
        user_input = input("You: ").strip()
        if user_input.lower() in ["exit", "quit"]:
            print("👋 Bye!")
            break

        result = graph.invoke({"messages": [HumanMessage(content=user_input)]})
        for msg in result["messages"]:
            if isinstance(msg, AIMessage):
                print(f"\n🤖 Bot: {msg.content}\n")


                store was transferred to model_kwargs.
                Please confirm that store is what you intended.
  llm = init_chat_model("gpt-4o-mini", model_provider="openai", store=True)
