In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = "Corrective RAG"
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ['TAVILY_API_KEY'] = os.getenv('TAVILY_API_KEY')

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=OllamaEmbeddings(model="nomic-embed-text:latest"),
)
retriever = vectorstore.as_retriever()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq

class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents"""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )

llm = ChatGroq(model="llama3-groq-8b-8192-tool-use-preview")
structured_llm_grader = llm.with_structured_output(GradeDocuments)

system = """You are a grader assessing relevance of a retrieved document to a user question. \n
            If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. \n
            Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader
question = "agent memory"
docs = retriever.get_relevant_documents(question)
doc_text = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_text}))

  warn_deprecated(


binary_score='yes'


In [4]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser

prompt = hub.pull("rlm/rag-prompt")
llm = ChatGroq(model="llama3-8b-8192")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = prompt | llm | StrOutputParser()
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)

The context discusses the concept of agent memory in LLM-powered autonomous agents. According to the context, the agent has a Long-term memory, which provides the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.


In [5]:
llm = ChatGroq(model="llama3-8b-8192")

system = """You are a question re-writer that converts an input question to a better version that is optimized \n
            for web search. Look at the input and try to reason about the underlying semantic intent / meaning.
            Don't add any preamble or introduction, simply rewrite the question"""
re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human",
         "Here is the intial question: \n\n {question} \n Formulate an improved question."
        ),
    ]
)
question_rewriter = re_write_prompt | llm | StrOutputParser()
question_rewriter.invoke({"question": question})

'What is agent memory and how does it work?'

In [6]:
from langchain_community.tools.tavily_search import TavilySearchResults

web_search_tool = TavilySearchResults(k=3)

In [7]:
from typing import List

from typing_extensions import TypedDict

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents
    """

    question: str
    generation: str
    web_search: str
    documents: List[str]

In [8]:
from langchain.schema import Document

def retrieve(state):
    """
    Retrieve documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("--- Retrieving documents ---")

    question = state['question']
    docs = retriever.get_relevant_documents(question)
    return {"documents": docs, "question": question}

In [9]:
def generate(state):
    """
    Generate answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("--- Generating answer ---")

    question = state['question']
    docs = state['documents']
    generation = rag_chain.invoke({"context": docs, "question": question})
    return {"question": question, "generation": generation, "documents": docs}

In [10]:
def grade_documents(state):
    """
    Determines whether documents are relevant to the question

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, web_search, that contains graded documents
    """
    print("--- Grading documents ---")

    question = state['question']
    docs = state['documents']
    
    filtered_docs = []
    web_search = "No"
    for doc in docs:
        score = retrieval_grader.invoke({"question": question, "document": doc.page_content})
        grade = score.binary_score
        if grade == "yes":
            print(f"---Document is relevant---")
            filtered_docs.append(doc)
        else:
            print(f"---Document is not relevant---")
            web_search = "Yes"
            continue
        return {"documents": filtered_docs, "question": question, "web_search": web_search}

def transform_query(state):
    """
    Transform the query to produce a better question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates question key with a re-phrased question
    """

    print("---Transforming query---")
    question = state['question']
    documents = state['documents']
    
    better_question = question_rewriter.invoke({"question": question})
    return {"question": better_question, "documents": documents}

def web_search(state):
    """
    Perform web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with search results
    """

    print("---Performing web search---")
    question = state['question']
    documents = state['documents']

    docs = web_search_tool.search(question)
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)
    documents.append(web_results)
    return {"question": question, "documents": documents}

In [11]:
def decide_to_generate(state):
    """ Determines whether to generate an answer or re-generate a question
     
    Args:
        state (dict): The current graph state
        
    Returns:
        state (dict): Updates web_search key with a decision to generate or re-generate
    """

    print("---ASSESS GRADED DOCUMENTS---")
    web_search = state['web_search']

    if web_search == "Yes":
        print("---RE-GENERATE QUESTION---")
        return "transform_query"
    else:
        print("---GENERATE ANSWER---")
        return "generate"

In [12]:
from langgraph.graph import END, StateGraph, START

graph = StateGraph(GraphState)

graph.add_node("retrieve", retrieve)
graph.add_node("grade_documents", grade_documents)
graph.add_node("transform_query", transform_query)
graph.add_node("generate", generate)
graph.add_node("web_search_node", web_search)

graph.add_edge(START, "retrieve")
graph.add_edge("retrieve", "grade_documents")
graph.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "transform_query": "transform_query",
        "generate": "generate",
    },
)
graph.add_edge("transform_query", "web_search_node")
graph.add_edge("web_search_node", "generate")
graph.add_edge("generate", END)

app = graph.compile()

In [13]:
from pprint import pprint

inputs = {"question": "What are the types of agent memory?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Node '{key}':")
    pprint("\n---\n")

pprint(value['generation'])

--- Retrieving documents ---
"Node 'retrieve':"
'\n---\n'
--- Grading documents ---
---Document is relevant---
---ASSESS GRADED DOCUMENTS---
---GENERATE ANSWER---
"Node 'grade_documents':"
'\n---\n'
--- Generating answer ---
"Node 'generate':"
'\n---\n'
('According to the context, there are several types of memory in human brains, '
 'including Sensory Memory, which is the earliest stage of memory, providing '
 'the ability to retain impressions of sensory information after the original '
 'stimuli have ended.')
