<a href="https://colab.research.google.com/github/githubramkiran/LanggraphAgent/blob/main/langraph_api_endpoint.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Exposing a LangGraph agent as an API is slightly different from a standard LangChain agent because LangGraph is stateful. You need to handle thread IDs (conversation IDs) to ensure the graph remembers previous interactions.

In [None]:
!pip install langchain langgraph langchain_core langchain_community
!pip install requests httpx fastapi uvicorn

In [None]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from langgraph.graph import StateGraph, MessagesState, START, END
from langgraph.checkpoint.memory import MemorySaver
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage
import os
import getpass
from langchain.chat_models import init_chat_model
if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")
llm = init_chat_model("google_genai:gemini-2.5-flash-lite")

# 1. Define Graph & Memory (Same as above)
#llm = ChatOpenAI(model="gpt-4o")
checkpointer = MemorySaver()  # Use PostgresSaver for production


def call_model(state: MessagesState):
    return {"messages": llm.invoke(state["messages"])}


builder = StateGraph(MessagesState)
builder.add_node("chatbot", call_model)
builder.add_edge(START, "chatbot")
graph = builder.compile(checkpointer=checkpointer)


# 2. Define Request Schema
class ChatRequest(BaseModel):
    message: str
    thread_id: str = "default_thread"


# 3. Create Endpoint
app = FastAPI()


@app.post("/chat")
async def chat_endpoint(request: ChatRequest):
    try:
        # Prepare input state
        input_state = {"messages": [HumanMessage(content=request.message)]}

        # Prepare config for state persistence
        config = {"configurable": {"thread_id": request.thread_id}}

        # Invoke Graph
        # Use ainvoke for non-blocking async execution
        result = await graph.ainvoke(input_state, config=config)

        # Extract the last message content
        last_message = result["messages"][-1].content
        return {"response": last_message, "thread_id": request.thread_id}

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="localhost", port=8000)

In [None]:
curl -X POST http://localhost:8000/graph/invoke \
  -H "Content-Type: application/json" \
  -d '{
    "input": {"messages": [{"role": "user", "content": "Hi, my name is Alice"}]},
    "config": {"configurable": {"thread_id": "session_1"}}
  }'

http://0.0.0.0:8000/chat/
body:
{
    "message": "tell about rajahmundry",
    "thread_id": "123"
}
Content-Type:application/json