In [1]:
import os 
from dotenv import load_dotenv
load_dotenv() # loading all the environment variable

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorDB
vectorstore  = FAISS.from_documents(
    documents=doc_splits,
    embedding=embeddings
)

retreiver = vectorstore.as_retriever()

In [9]:
### Router

from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

from pydantic import BaseModel, Field

# Data model
class RouteQuery(BaseModel):
    """Route a user query to the most relavant datasource."""

    datasource: Literal["vectorstore", "web_search"] = Field(
        description="Given a user question choose to route it to web search or a vectorstore.",
    )

# LLM with function call
llm = ChatGroq(model='llama3-70b-8192')
structured_llm_router = llm.with_structured_output(RouteQuery)

# Prompt
system="""You are an expert at routing a user question to a vectorstore or web search.
The vectorstore contains document related to agents, prompt engineering, and adversarial attacks.
Use the vectorstore for the questions on these topics, Otherwise, use web search."""

route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

question_router = route_prompt |structured_llm_router

print(question_router.invoke({"question": "What are the types of  agent memory"}))

datasource='vectorstore'


In [10]:
## Retrieval Grader

# Data model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )

llm = ChatGroq(model='llama3-70b-8192')
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# prompt
system = """You are a grader assessing relevance of a retreived document to a user question. \n
If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
It does not need to be a stringent test. The goal is to filter out erroneous retreivals. \n
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader

question = "agent memory" 
docs = retreiver.invoke(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))

binary_score='yes'


In [11]:
### Generate

from langchain import hub
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatGroq(model='llama3-70b-8192')

# Post-processing
# def format_docs(docs):
#     return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)



The agent memory consists of two components: short-term memory, which utilizes in-context learning to learn from the model, and long-term memory, which provides the capability to retain and recall infinite information over extended periods, often by leveraging an external vector store and fast retrieval.


In [12]:
### Hallucination Grader

# Data Model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer."""

    binary_Score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )

    # LLM with function call

llm = ChatGroq(model='llama3-70b-8192')
structured_llm_grader = llm.with_structured_output(GradeHallucinations)

# Prompt
system = """you are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n
Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts"""
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)

hallucination_grader = hallucination_prompt | structured_llm_grader
hallucination_grader.invoke({"documents": docs, "generation":generation})


GradeHallucinations(binary_Score='yes')