In [1]:
# Here's a simplified version of a RAG (Retrieval-Augmented Generation) agent that includes only a simple retriever tool along with the start and end nodes. This version will demonstrate the basic agent loop concepts without the grading and other complexities.

#!/usr/bin/env python
# coding: utf-8

# Simplified RAG Agent

from langchain import hub
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langgraph.graph import MessagesState, StateGraph, START, END
from langgraph.prebuilt import ToolNode
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.tools.retriever import create_retriever_tool
import os
import getpass

# Set environment keys
def _set_env(key: str):
    if key not in os.environ:
        os.environ[key] = getpass.getpass(f"{key}:")

_set_env("OPENAI_API_KEY")

# Load documents
urls_from_pj = [
    "https://blog.langchain.dev/launching-long-term-memory-support-in-langgraph/",
    "https://kavourei.github.io/LonTermMemory",
    "https://github.com/langchain-ai/langgra"
]
docs = [WebBaseLoader(url).load() for url in urls_from_pj]
docs_list = [item for sublist in docs for item in sublist]

# Split documents
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=100, chunk_overlap=50
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="simple-rag-chroma",
    embedding=OpenAIEmbeddings(),
)
retriever = vectorstore.as_retriever()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
# Create retriever tool
retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_blog_post",
    "Retrieve information about Lilian Weng blog posts on agents.",
)

tools = [retriever_tool]

# Agent function
def agent(state: MessagesState) -> MessagesState:
    """Invoke the agent to decide whether to retrieve."""
    model = ChatOpenAI(temperature=0, streaming=True, model="gpt-4o-mini")
    model = model.bind_tools(tools)
    response = model.invoke(state["messages"])
    return {"messages": [response]}

# Define a simple graph with only start and end nodes
workflow = StateGraph(MessagesState)
workflow.add_node("agent", agent)
retrieve = ToolNode([retriever_tool])
workflow.add_node("retrieve", retrieve)
workflow.add_edge(START, "agent")
workflow.add_edge("agent", "retrieve")
workflow.add_edge("retrieve", END)

# Compile the graph
graph = workflow.compile()

# Input message
inputs = {
    "messages": [
        ("user", "What does Lilian Weng say about the types of agent memory?"),
    ]
}

output = graph.invoke(inputs)

for m in output["messages"]:
    m.pretty_print()


What does Lilian Weng say about the types of agent memory?
Tool Calls:
  retrieve_blog_post (call_9wFMcOQDrHo9XvDkOyKu607G)
 Call ID: call_9wFMcOQDrHo9XvDkOyKu607G
  Args:
    query: types of agent memory
Name: retrieve_blog_post

we've realized something important: there's no universally perfect solution for AI memory. The best memory for each application still contains very application specific logic. By extension, most "agent memory" products today are too high-level. They try to create a one-size-fits-all product that doesn't satisfy many production users' needs.This insight is why we have built our initial memory support into LangGraph as a simple document store. High level abstractions can be easily built on top (as

user preferences. This feature is part of the OSS library, and it is enabled by default for all LangGraph Cloud & Studio users.Memory: from short (thread-scoped) to long (cross-thread)On MemoryMost AI applications today are goldfish; they forget everything between c