In [2]:
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyPDFLoader
from dotenv import load_dotenv
import warnings
import os
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts.chat import (
    ChatPromptTemplate,
    MessagesPlaceholder
)
from langchain.agents import AgentExecutor, create_react_agent
from langchain.memory import ChatMessageHistory
from langchain_community.tools import BraveSearch
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.tools import Tool
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_aws import BedrockEmbeddings
from langchain_core.messages import HumanMessage,SystemMessage
from langchain.globals import set_llm_cache
from langchain_core.caches import InMemoryCache
from langchain_astradb import AstraDBVectorStore
import getpass
from langchain_community.document_loaders import WebBaseLoader
from cachetools import TTLCache
from langchain import hub
from langchain.utilities import WikipediaAPIWrapper


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
load_dotenv()
warnings.filterwarnings("ignore")
os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")
set_llm_cache(InMemoryCache())  # Initialize the cache
qa_cache=TTLCache(maxsize=100,ttl=600)


In [4]:
llm = ChatGroq(
    model="llama3-70b-8192",
    temperature=0.7,
)
embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", region_name="us-east-1")


In [5]:
vectore_store=AstraDBVectorStore(
    collection_name="trail",
    embedding=embeddings,
    token= os.getenv("token"),
    api_endpoint=os.getenv("api_end_point"),
)

In [6]:
loader = WebBaseLoader("https://blog.langchain.dev/memory-for-agents/")
document=loader.load()

In [7]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
doc_split=text_splitter.split_documents(document)

In [8]:
vectore_store.add_documents(doc_split)

['ebeb5021f5ce435f8ee56b8cb3c94b34',
 '3e2f5871d5694df9bd6b3fea93e2fea7',
 '21100be27a464707b1ad8e75398afe2b',
 'd2595cd006b34ebbadd23fbc2417b7f3',
 '9bd086cf48884b898ee723f49689b1bd',
 '38130aecce3a494d81f32caa7daae152',
 'f4eb6bdc22354393a8f8f428df6ba7a9',
 '39597dd20fe149308ab7ee9653a372c9',
 '7d28030a0eda4dc48c47ac8117132804',
 'a46e8302f8f94561a53bacdb346aa829',
 '07bee337eb3344d09297f2f1de6f0937']

In [26]:
global result

In [27]:
result=vectore_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k":1},
)


In [10]:
x=result.invoke("what is memory in agents?")

In [11]:
x

[Document(id='012d20a159b54bfea5d89701c77e2d12', metadata={'source': 'https://blog.langchain.dev/memory-for-agents/', 'title': 'Memory for agents', 'language': 'en'}, page_content='memory in agents: the CoALA paper defines episodic memory as storing sequences of the agent’s past actions.This is used primarily to get an agent to perform as intended.In practice, episodic memory is implemented as few-shot example prompting. If you collect enough of these sequences, then this can be done via dynamic few-shot prompting. This is usually great for guiding the agent if there is a correct way to perform specific actions that have been done before. In contrast, semantic memory is more relevant if there isn’t necessarily a correct way to do things, or if the agent is constantly doing new things so the previous examples don’t help much.How to update memoryBesides just thinking about the type of memory to update in their agents, we also see developers thinking about how to update agent memory.One w

In [12]:
data=""
for doc in x:
    data+= doc.page_content
    print(data)

memory in agents: the CoALA paper defines episodic memory as storing sequences of the agent’s past actions.This is used primarily to get an agent to perform as intended.In practice, episodic memory is implemented as few-shot example prompting. If you collect enough of these sequences, then this can be done via dynamic few-shot prompting. This is usually great for guiding the agent if there is a correct way to perform specific actions that have been done before. In contrast, semantic memory is more relevant if there isn’t necessarily a correct way to do things, or if the agent is constantly doing new things so the previous examples don’t help much.How to update memoryBesides just thinking about the type of memory to update in their agents, we also see developers thinking about how to update agent memory.One way to update agent memory is “in the hot path”. This is where the agent system explicitly decides to remember facts (usually via tool calling) before responding. This is the


In [13]:
memory = ChatMessageHistory(session_id="test-session")



In [14]:
def question_answer(question:str)->str:
    if question in qa_cache:
        return qa_cache[question]
    else:
        result=result.invoke(question)
        data=""
        for doc in x:
            data+= doc.page_content
        qa_cache[question]=data
        return data
        
        

In [15]:
qa_tool=Tool(
    name="knowldege_base_qa_using_vector",
    func=question_answer,
    description=" answer question based on the knowledge using the vectore store and cache memeory"
    
)

In [16]:
wikipedia=WikipediaAPIWrapper()
w=wikipedia.run("langchain")

In [17]:
w

"Page: LangChain\nSummary: LangChain is a software framework that helps facilitate the integration of large language models (LLMs) into applications. As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.\n\nPage: Retrieval-augmented generation\nSummary: Retrieval augmented generation (RAG) is a technique that grants generative artificial intelligence models information retrieval capabilities. It modifies interactions with a large language model (LLM) so that the model responds to user queries with reference to a specified set of documents, using this information to augment information drawn from its own vast, static training data. This allows LLMs to use domain-specific and/or updated information.  \nUse cases include providing chatbot access to internal company data, or giving factual information only from an authoritative source.\n\n\n\nPage

In [18]:
def wiki(query:str)->str:
    wiki_data=wikipedia.run(query)
    return wiki_data

In [19]:
wiki_tool=Tool(
    name="wikipedia",
    func=wiki,
    description="search for the information on wikipedia",
)


In [20]:
tools=[wiki_tool,qa_tool]

In [21]:
prompt=hub.pull("hwchase17/react")
agent=create_react_agent(llm,tools,prompt)
agent_executor=AgentExecutor(agent=agent,tools=tools)
agent_with_memory=RunnableWithMessageHistory(
    agent_executor,
    lambda session_id: memory,
    input_messages_key="input",
    history_messages_key="chat_history"
)


In [22]:

agent_with_memory.invoke(
    {"input":"what is memory in agents"},
    config={"configurable":{"session_id":"session"}}
)

{'input': 'what is memory in agents',
 'chat_history': [],
 'output': 'In the context of artificial intelligence, "memory" refers to the ability of agents to store and retrieve information. Specifically, episodic memory is a type of memory that refers to the memory of experiences and specific events that occur in an agent\'s "life". This is distinct from semantic memory, which refers to general world knowledge that an agent has accumulated throughout its "life".'}

In [31]:
from cachetools import TTLCache
set_llm_cache=TTLCache(maxsize=100,ttl=600)
def llm_with_cache(inputs):
    if inputs in set_llm_cache:
        return set_llm_cache[inputs]
    else:
        z=agent_with_memory.invoke(
    {"input":inputs},
    config={"configurable":{"session_id":"session"}}
)
        set_llm_cache[inputs]=z
        return z




In [35]:
data=llm_with_cache("what is memory in agents")

In [34]:
agent_without_memory=agent_executor
agent_without_memory.invoke({"input":"who is amir khan?"})

{'input': 'who is amir khan?',
 'output': 'Amir Khan can refer to two different individuals: Amir Iqbal Khan, a British former professional boxer, or Aamir Khan, an Indian actor, filmmaker, and television personality. Without more context, it is unclear which Amir Khan is being referred to.'}

In [37]:
data["output"]

'In the context of agents, "memory" refers to the ability of an artificial intelligence system to store and retrieve information, similar to human memory. This can include short-term memory, long-term memory, and other forms of memory storage. In artificial intelligence, memory is often implemented using computer memory, such as RAM or flash memory, and can be used to store data, programs, and other information needed for the AI system to function. In-memory processing is a technique used in AI systems to improve performance by storing and processing data in memory rather than on disk storage. Additionally, AI accelerators, such as neural processing units (NPUs), are designed to accelerate AI and machine learning applications, including those that rely on memory storage and retrieval.'