In [28]:
import os
from dotenv import load_dotenv

load_dotenv()
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = "Self RAG"
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ['TAVILY_API_KEY'] = os.getenv('TAVILY_API_KEY')

In [22]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]
splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = splitter.split_documents(docs_list)
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=OllamaEmbeddings(model="nomic-embed-text:latest"),
)
retriever = vectorstore.as_retriever()

In [38]:
#### Retrieval Grader

from langchain.prompts import PromptTemplate
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import JsonOutputParser
from langchain_groq import ChatGroq
from langchain_core.pydantic_v1 import BaseModel

llm = ChatOllama(model="llama3.1:8b-instruct-q8_0", format="json", temperature=0)


prompt = PromptTemplate(
    template="""You are a grader assessing relevance of a retrieved document to a user question. \n 
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n
    If the document contains keywords related to the user question, grade it as relevant. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explanation.""",
    input_variables=["question", "document"],
)

retrieval_grader = prompt | llm | JsonOutputParser()

#### Test the retrieval grader
question = "agent memory"
docs = retriever.get_relevant_documents(question)
doc_text = docs[1].page_content
retrieval_grader.invoke({"question": question, "document": doc_text})

{'score': 'yes'}

In [40]:
### Generate 

from langchain import hub
from langchain_core.output_parsers import StrOutputParser

prompt = hub.pull("rlm/rag-prompt")

llm = ChatGroq(model="llama3-8b-8192", temperature=0)

#Post Processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = prompt | llm | StrOutputParser()

#Test
generation = rag_chain.invoke({"question": question, "context": format_docs(docs)})
print(generation)

The context suggests that the agent has both short-term and long-term memory capabilities. Short-term memory is used for in-context learning and learning from external APIs, while long-term memory is used to retain and recall information over extended periods through an external vector store and fast retrieval.


In [41]:
### Hallucination Grader

#LLM
llm = ChatOllama(model="llama3.1:8b-instruct-q8_0", format="json", temperature=0)

prompt = PromptTemplate(
        template="""You are a grader assessing whether an answer is grounded in / supported by a set of facts. \n 
    Here are the facts:
    \n ------- \n
    {documents} 
    \n ------- \n
    Here is the answer: {generation}
    Give a binary score 'yes' or 'no' score to indicate whether the answer is grounded in / supported by a set of facts. \n
    Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.""",
    input_variables=["generation", "documents"],
)

hallucination_grader = prompt | llm | JsonOutputParser()

#Test
hallucination_grader.invoke({"generation": generation, "documents": format_docs(docs)})

{'score': 'yes'}

In [43]:
### Answer Grader

llm = ChatOllama(model="llama3.1:8b-instruct-q8_0", format="json", temperature=0)

prompt = PromptTemplate(
    template="""You are a grader assessing whether an answer is useful to resolve a question. \n 
    Here is the answer:
    \n ------- \n
    {generation} 
    \n ------- \n
    Here is the question: {question}
    Give a binary score 'yes' or 'no' to indicate whether the answer is useful to resolve a question. \n
    Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.""",
    input_variables=["generation", "question"],
)

answer_grader = prompt | llm | JsonOutputParser()
answer_grader.invoke({"question": question, "generation": generation})

{'score': 'yes'}

In [48]:
### Question Re-writer

llm = ChatGroq(model="llama3-8b-8192", temperature=0)

# Prompt
re_write_prompt = PromptTemplate(
    template="""You a question re-writer that converts an input question to a better version that is optimized \n 
     for vectorstore retrieval. Look at the initial and formulate an improved question. \n
     Here is the initial question: \n\n {question}. Don't write anything aside from the rewritten question: \n """,
    input_variables=["generation", "question"],
)

question_rewriter = re_write_prompt | llm | StrOutputParser()

#Test
question_rewriter.invoke({"question": question})

'"Retrieve information related to the concept of \'agent memory\'."'

In [49]:
from typing import List
from typing_extensions import TypedDict

class GraphState(TypedDict):
    """Represents the state of the graph"""
    question: str
    documents: List[str]
    generation: str

In [50]:
### Nodes

def retrieve(state):
    """Retrieve documents relevant to the question"""
    print("--RETRIEVE--")
    question = state["question"]
    documents = retriever.get_relevant_documents(question)

    return {"documents": documents, "question": question}

def generate(state):
    """Generate an answer to the question"""
    print("--GENERATE--")
    question = state["question"]
    docs = state["documents"]
    generation = rag_chain.invoke({"question": question, "context": format_docs(docs)})

    return {"generation": generation, "documents": docs, "question": question}

def grade_documents(state):
    """Grade documents retrieved for relevance to the question"""
    print("--GRADE DOCUMENTS--")
    question = state["question"]
    docs = state["documents"]

    filtered_docs = [] 
    for doc in docs:
        doc_text = doc.page_content
        result = retrieval_grader.invoke({"question": question, "document": doc_text})
        grade = result["score"]
        if grade == "yes":
            print("--GRADE DOCUMENT: IS RELEVANT--")
            filtered_docs.append(doc)
        else:
            print("--GRADE DOCUMENT: NOT RELEVANT--")
            continue

    return {"documents": filtered_docs, "question": question}

def transform_query(state):
    """Rewrite the question to improve retrieval"""
    print("--TRANSFORM QUERY--")
    question = state["question"]
    documents = state["documents"]
    new_question = question_rewriter.invoke({"question": question})

    return {"question": new_question, "documents": documents}

### Edge

def decide_to_generate(state):
    """Decide whether to generate an answer"""
    print("--ASSESS GRADED DOCUMENTS--")
    question = state["question"]
    docs = state["documents"]

    if not docs:
        print("--ASSESS GRADED DOCUMENTS: TRANSFORM QUERY--")
        return "transform_query"
    else:
        print("--ASSESS GRADED DOCUMENTS: GENERATE--")
        return "generate"

def grade_generation_v_documents_and_question(state):
    """Grade the generation against the documents and question"""
    print("--GRADE GENERATION--")
    question = state["question"]
    docs = state["documents"]
    generation = state["generation"]

    result = hallucination_grader.invoke({"generation": generation, "documents": format_docs(docs)})
    grade = result["score"]
    if grade == "yes":
        print("--GRADE GENERATION: IS GROUNDED--")
        print("--GRADE GENERATION: ASSESS ANSWER--")
        score = answer_grader.invoke({"generation": generation, "question": question})
        grade = score["score"]
        if grade == "yes":
            print("--GRADE GENERATION: IS USEFUL--")
            return "useful"
        else:
            print("--GRADE GENERATION: NOT USEFUL--")
            return "not_useful"
    else: 
        print("--GRADE GENERATION: NOT GROUNDED--")
        return "not_supported"

In [52]:
from langgraph.graph import END, START, StateGraph

graph = StateGraph(GraphState)

# Add nodes
graph.add_node("retrieve", retrieve)
graph.add_node("generate", generate)
graph.add_node("grade_documents", grade_documents)
graph.add_node("transform_query", transform_query)

# Add edges
graph.add_edge(START, "retrieve")
graph.add_edge("retrieve", "grade_documents")
graph.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "transform_query": "transform_query",
        "generate": "generate",
    },
)
graph.add_edge("transform_query", "retrieve")
graph.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "useful": END,
        "not_useful": "transform_query",
        "not_supported": "generate",
    },
)
app = graph.compile()

In [55]:
from pprint import pprint

inputs = {"question": "Explain how the different types of agent memory work?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Node '{key}':")
    pprint("\n---\n")

pprint(value["generation"])

--RETRIEVE--
"Node 'retrieve':"
'\n---\n'
--GRADE DOCUMENTS--
--GRADE DOCUMENT: NOT RELEVANT--
--GRADE DOCUMENT: NOT RELEVANT--
--GRADE DOCUMENT: IS RELEVANT--
--GRADE DOCUMENT: IS RELEVANT--
--ASSESS GRADED DOCUMENTS--
--ASSESS GRADED DOCUMENTS: GENERATE--
"Node 'grade_documents':"
'\n---\n'
--GENERATE--
--GRADE GENERATION--
--GRADE GENERATION: IS GROUNDED--
--GRADE GENERATION: ASSESS ANSWER--
--GRADE GENERATION: IS USEFUL--
"Node 'generate':"
'\n---\n'
('According to the provided context, the Component Two of the Agent System '
 'Overview discusses the different types of memory, which includes Maximum '
 'Inner Product Search (MIPS).')
