In [1]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

True

In [2]:
local_llm = "llama3"

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=GPT4AllEmbeddings(),
)
retriever = vectorstore.as_retriever()

### Retrieval Grader

In [4]:
from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing relevance 
    of a retrieved document to a user question. If the document contains keywords related to the user question, 
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination.
     <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "document"]
)

retrieval_grader = prompt | llm | JsonOutputParser()
demo_question = "agent memory"
docs = retriever.invoke(demo_question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": demo_question, "document": doc_txt}))

{'score': 'yes'}


### Generate

In [5]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. 
    Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question} 
    Context: {context} 
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "context"]
)
llm = ChatOllama(model=local_llm, temperature=0)


def format_docs(documents):
    return "\n\n".join(doc.page_content for doc in documents)


rag_chain = prompt | llm | StrOutputParser()

demo_question = "agent memory"
docs = retriever.invoke(demo_question)
generated_answer = rag_chain.invoke({"context": docs, "question": demo_question})
print(generated_answer)

According to the provided context, agent memory refers to a long-term memory module (external database) that records a comprehensive list of agents' experience in natural language. This memory stream enables agents to behave conditioned on past experience and interact with other agents.


### Hallucination Grader

In [6]:
llm = ChatOllama(model=local_llm, temperature=0, format="json")

prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether 
    an answer is grounded in / supported by a set of facts. Give a binary 'yes' or 'no' score to indicate 
    whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a 
    single key 'score' and no preamble or explanation. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here are the facts:
    \n ------- \n
    {documents} 
    \n ------- \n
    Here is the answer: {generation}  <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["documents", "generation"]
)
hallucination_grader = prompt | llm | JsonOutputParser()
hallucination_grader.invoke({"generation": generated_answer, "documents": docs})

{'score': 'yes'}

### Answer Grader

In [7]:
llm = ChatOllama(model=local_llm, temperature=0, format="json")

prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether an 
    answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is 
    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
     <|eot_id|><|start_header_id|>user<|end_header_id|> Here is the answer:
    \n ------- \n
    {generation} 
    \n ------- \n
    Here is the question: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "generation"]
)

answer_grader = prompt | llm | JsonOutputParser()
answer_grader.invoke({"generation": generated_answer, "question": demo_question})

{'score': 'yes'}

### Router

In [8]:
llm = ChatOllama(model=local_llm, temperature=0, format="json")
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an expert at routing a 
    user question to a vectorstore or web search. Use the vectorstore for questions on LLM agents, 
    prompt engineering, and adversarial attacks. You do not need to be stringent with the keywords 
    in the question related to these topics. Otherwise, use web-search. Give a binary choice 'web_search' 
    or 'vectorstore' based on the question. Return the a JSON with a single key 'datasource' and 
    no premable or explaination. Question to route: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question"],
)

question_router = prompt | llm | JsonOutputParser()
question_router.invoke({"question": demo_question})

{'datasource': 'vectorstore'}

In [9]:
from langchain_community.tools.tavily_search import TavilySearchResults

web_search_tool = TavilySearchResults(k=3)

In [10]:
from typing_extensions import TypedDict, NotRequired
from typing import List


class GraphState(TypedDict):
    question: str
    generation: NotRequired[str]
    web_search: NotRequired[str]
    documents: List[str]

In [11]:
from langchain.schema import Document

### Nodes

In [12]:
def retrieve(state: GraphState) -> GraphState:
    print("---RETRIEVE---")
    question = state.get("question")
    documents = retriever.invoke(question)

    return {"documents": documents, "question": question}


def generate(state: GraphState) -> GraphState:
    print("---GENERATE---")
    question = state.get("question")
    documents = state.get("documents")

    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}


def grade_documents(state: GraphState) -> GraphState:
    print("---GRADE_DOCS---")
    question = state.get("question")
    documents = state.get("documents")

    filtered_docs = []
    should_perform_web_search = "No"
    for doc in documents:
        score = retrieval_grader.invoke(
            {"question": question, "document": doc.page_content}
        )
        grade = score.get("score")
        if grade.lower() == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(doc)
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            should_perform_web_search = "Yes"
    return {"documents": filtered_docs, "question": question, "web_search": should_perform_web_search}


def web_search(state: GraphState) -> GraphState:
    print("---WEB_SEARCH---")
    question = state.get("question")
    documents = state.get("documents")

    web_docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in web_docs])
    web_results = Document(page_content=web_results)
    if documents is not None:
        documents.append(web_results)
    else:
        documents = [web_results]
    return {"documents": documents, "question": question}


### Conditional Edges

In [13]:
def route_question(state: GraphState) -> str:
    print("---ROUTE_QUESTION---")
    question = state.get("question")
    source = question_router.invoke({"question": question})
    if source.get("datasource") == "web_search":
        print("---ROUTE QUESTION TO WEB SEARCH---")
        return "websearch"
    else:
        print("---ROUTE QUESTION TO RAG---")
        return "vectorstore"


def decide_to_generate(state: GraphState) -> str:
    should_perform_web_search = state.get("web_search")

    if should_perform_web_search == "Yes":
        print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---")
        return "websearch"
    else:
        print("---DECISION: GENERATE---")
        return "generate"


def grade_generation_v_documents_and_question(state: GraphState) -> str:
    print("---CHECK HALLUCINATIONS---")
    question = state.get("question")
    documents = state.get("documents")
    generation = state.get("generation")
    score = hallucination_grader.invoke({"documents": documents, "generation": generation})
    grade = score.get("score")

    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question, "generation": generation})
        grade = score.get("score")
        if grade == "yes":
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES ADDRESS QUESTION---")
            return "not useful"
    else:
        print("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"


In [14]:
class Node:
    websearch = "websearch"
    retrieve = "retrieve"
    generate = "generate"
    grade_documents = "grade_documents"
    

### Graph Build

In [15]:
from langgraph.graph import END, StateGraph

workflow = StateGraph(GraphState)

workflow.add_node(Node.websearch, web_search)
workflow.add_node(Node.retrieve, retrieve)
workflow.add_node(Node.grade_documents, grade_documents)
workflow.add_node(Node.generate, generate)

In [16]:
workflow.set_conditional_entry_point(
    route_question,
    {
        "websearch": Node.websearch,
        "vectorstore": Node.retrieve
    }
)
workflow.add_edge(Node.retrieve, Node.grade_documents)
workflow.add_conditional_edges(
    Node.grade_documents,
    decide_to_generate,
    {
        "websearch": Node.websearch,
        "generate": Node.generate
    }
)
workflow.add_edge(Node.websearch, Node.generate)
workflow.add_conditional_edges(
    Node.generate,
    grade_generation_v_documents_and_question,
    {
        "not supported": Node.generate,
        "useful": END,
        "not useful": Node.websearch
    }
)

In [17]:
app = workflow.compile()

from pprint import pprint

inputs = {"question": "What are the types of agent memory?"}
last_value = None
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
        last_value = value
pprint(last_value["generation"])

---ROUTE_QUESTION---
---ROUTE QUESTION TO RAG---
---RETRIEVE---
'Finished running: retrieve:'
---GRADE_DOCS---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---DECISION: GENERATE---
'Finished running: grade_documents:'
---GENERATE---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION DOES NOT ADDRESS QUESTION---
'Finished running: generate:'
('According to the provided context, there are two types of agent memory '
 'mentioned:\n'
 '\n'
 '1. Memory stream: a long-term memory module that records a comprehensive '
 "list of agents' experience in natural language.\n"
 '2. Reflection mechanism: synthesizes memories into higher-level inferences '
 "over time and guides the agent's future behavior.\n"
 '\n'
 'These types of memory enable the agent to behave conditioned on past '
 'experience, as well as interact with other agents

In [19]:
app.invoke({"question": "What is zero-shot learning?"})

---ROUTE_QUESTION---
---ROUTE QUESTION TO WEB SEARCH---
---WEB_SEARCH---
---GENERATE---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION DOES NOT ADDRESS QUESTION---


{'question': 'What is zero-shot learning?',
 'generation': "Zero-shot learning is a machine learning scenario where an AI model is trained to recognize and categorize objects or concepts without having seen any examples of those categories or concepts beforehand. This setup uses auxiliary information, such as textual descriptions, during the training process instead of explicit labels. The goal is to recognize things that the model hasn't explicitly seen before in training.",
 'documents': [Document(page_content='Zero-shot Learning is a setup in which a model can learn to recognize things that it hasn\'t explicitly seen before in training. There are different zero-shot learning approaches, but a commonality is that auxiliary information such as textual descriptions are used or encoded during the training process instead of explicit labels.\nZero-shot classification refers to the problem setting where we want to recognize objects from classes that our model has not seen during training.

In [20]:
app.invoke({"question": "What is zero-shot learning in the context of prompt engineering?"})

---ROUTE_QUESTION---
---ROUTE QUESTION TO RAG---
---RETRIEVE---
---GRADE_DOCS---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---
---WEB_SEARCH---
---GENERATE---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION DOES NOT ADDRESS QUESTION---


{'question': 'What is zero-shot learning in the context of prompt engineering?',
 'generation': 'In the context of prompt engineering, zero-shot learning refers to a method where the model is asked to perform a task without any additional examples or guidance. The model relies solely on its pre-trained information to generate an output.',
 'web_search': 'Yes',
 'documents': [Document(page_content='Zero-shot generation: This is to find a number of prompts that can trigger harmful output conditioned on a preset prompt.\nStochastic few-shot generation: The red team prompts found from the above step are then used as few-shot examples to generate more similar cases. Each zero-shot test case might be selected in few-shot examples with a probability $\\propto \\exp(r(\\mathbf{x}, \\mathbf{y}) / \\tau)$\nSupervised learning: The red team model can be fine-tuned on failing, zero-shot test cases. The training only runs lightly for one epoch to avoid overfitting and preserve sample diversity.', m