# **RAG With Memory:**


* **Conversation Buffer Memory**
* **Conversation Buffer Window Memory**
* **Conversation Summary Memory**
* **Conversation Summary Buffer Memory**
* **Conversation Entity Memory**

In [None]:
# install necessary libaries:

%pip install --upgrade --quiet sentence_transformers
%pip install --upgrade --quiet  langchain langchain-community langchainhub langchain_google_genai langchain-chroma bs4

## **Vector Store:**

In [2]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter



In [None]:
# Get the Embeddings:

from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)

In [39]:
# Load LLM:

from google.colab import userdata
from langchain_google_genai import ChatGoogleGenerativeAI


GEMINI_API_KEY = userdata.get("GEMINI_API_KEY")

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key=GEMINI_API_KEY,
    temperature=0.1,
    max_tokens=1024,
    max_length=1024,
)

In [5]:
# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

In [6]:
from re import search
# retriever:

retriever = vectorstore.as_retriever(search_kwargs=dict(k=5))

In [7]:
retriever.invoke("What are the approaches to Task Decomposition?")

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.'),
 Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs

## **Generation With LLM & Memory:**

In [19]:
from langchain.chains import ConversationalRetrievalChain, ConversationChain
from langchain.chains.conversational_retrieval.prompts import QA_PROMPT
from langchain.memory import (
    ChatMessageHistory,
    ConversationBufferMemory,
    ConversationBufferWindowMemory,
    ConversationSummaryMemory,
    ConversationSummaryBufferMemory,
    ConversationEntityMemory
)
import os
import asyncio
from IPython.display import display, Markdown

**ChatMessageHistory:**<br>
One of the core utility classes underpinning most (if not all) memory modules is the **ChatMessageHistory** class. This is a super lightweight wrapper that provides convenience methods for saving HumanMessages, AIMessages, and then fetching them all.<br>

```python

    from langchain.memory import ChatMessageHistory

    history = ChatMessageHistory()

    history.add_user_message("hi!")
    history.add_ai_message("whats up?")

    history.messages
    #  [HumanMessage(content='hi!', additional_kwargs={}),
    #  AIMessage(content='whats up?', additional_kwargs={})]
```

### **Conversation Buffer Memory:**

#### **Example 01:**

In [15]:
async def conversational_qa_chain():
  message_history = ChatMessageHistory()
  chat_memory = ConversationBufferMemory(
      memory_key="chat_history", output_key="answer", chat_memory=message_history, return_messages=True
  )

  # ConversationRetrievalQnA:
  chain = ConversationalRetrievalChain.from_llm(
      llm=llm,
      retriever=retriever,
      chain_type="stuff",
      memory=chat_memory,
      return_source_documents=True,
      get_chat_history=lambda h : h
  )

  return chain


chain = await conversational_qa_chain()

In [16]:
# Response:

res = chain.invoke("What is Agents?")
res

{'question': 'What is Agents?',
 'chat_history': [HumanMessage(content='What is Agents?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='LLM-powered autonomous agents use LLMs as their core controller, acting as the agent\'s "brain."  They are capable of planning, including breaking down complex tasks and learning from mistakes.  They also utilize memory and can interact with tools.  Examples of such agents include AutoGPT, GPT-Engineer, and BabyAGI.\n', additional_kwargs={}, response_metadata={})],
 'answer': 'LLM-powered autonomous agents use LLMs as their core controller, acting as the agent\'s "brain."  They are capable of planning, including breaking down complex tasks and learning from mistakes.  They also utilize memory and can interact with tools.  Examples of such agents include AutoGPT, GPT-Engineer, and BabyAGI.\n',
 'source_documents': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Reliability of natur

In [20]:
display(Markdown(res["answer"]))

LLM-powered autonomous agents use LLMs as their core controller, acting as the agent's "brain."  They are capable of planning, including breaking down complex tasks and learning from mistakes.  They also utilize memory and can interact with tools.  Examples of such agents include AutoGPT, GPT-Engineer, and BabyAGI.


#### **Example 02:**

Use Prompts

In [22]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate

In [28]:
chat_memory = None

async def conversational_qa_chain():
  global chat_memory

  # Define your custom prompt template
  custom_prompt_template = """You are a helpful assistants, named Lily, designed by Dibyendu Biswas.
  Your task is to answers the question based on available context.
  If you do not have enough information, then say I don't know.

  {context}

  Question: {question}
  """

  # Create a PromptTemplate instance with your custom template
  custom_prompt = PromptTemplate(
      template=custom_prompt_template,
      input_variables=["context", "question"],
  )

  message_history = ChatMessageHistory()
  chat_memory = ConversationBufferMemory(
      memory_key="chat_history", output_key="answer", chat_memory=message_history, return_messages=True
  )

  # ConversationRetrievalQnA:
  chain = ConversationalRetrievalChain.from_llm(
      llm=llm,
      retriever=retriever,
      chain_type="stuff",
      memory=chat_memory,
      return_source_documents=True,
      get_chat_history=lambda h : h,
      combine_docs_chain_kwargs={"prompt": custom_prompt}
  )

  return chain


chain = await conversational_qa_chain()


res = chain.invoke("Tell me something about you.")
display(Markdown(res["answer"]))

I am Lily, a helpful assistant designed by Dibyendu Biswas. My purpose is to answer your questions based on the context provided.


In [29]:
# memory:
chat_memory

ConversationBufferMemory(chat_memory=InMemoryChatMessageHistory(messages=[HumanMessage(content='Tell me something about you.', additional_kwargs={}, response_metadata={}), AIMessage(content='I am Lily, a helpful assistant designed by Dibyendu Biswas. My purpose is to answer your questions based on the context provided.\n', additional_kwargs={}, response_metadata={})]), output_key='answer', return_messages=True, memory_key='chat_history')

In [30]:
res = chain.invoke("What kind of infotmation do you have?")
display(Markdown(res["answer"]))

As Lily, I have access to the information provided in the context you gave me, which discusses memory types, including sensory, short-term, long-term, explicit/declarative, and implicit/procedural memory.  I also have information about how these relate to AI agents, including memory streams, retrieval models, and reflection mechanisms.  Additionally, I have a small amount of information about tool use for agents and a few figures comparing different AI models.  I do not have access to external websites or specific files beyond what was included in your initial prompt.


In [31]:
# memory:
chat_memory

ConversationBufferMemory(chat_memory=InMemoryChatMessageHistory(messages=[HumanMessage(content='Tell me something about you.', additional_kwargs={}, response_metadata={}), AIMessage(content='I am Lily, a helpful assistant designed by Dibyendu Biswas. My purpose is to answer your questions based on the context provided.\n', additional_kwargs={}, response_metadata={}), HumanMessage(content='What kind of infotmation do you have?', additional_kwargs={}, response_metadata={}), AIMessage(content='As Lily, I have access to the information provided in the context you gave me, which discusses memory types, including sensory, short-term, long-term, explicit/declarative, and implicit/procedural memory.  I also have information about how these relate to AI agents, including memory streams, retrieval models, and reflection mechanisms.  Additionally, I have a small amount of information about tool use for agents and a few figures comparing different AI models.  I do not have access to external websi

In [32]:
chat_memory.load_memory_variables({})

{'chat_history': [HumanMessage(content='Tell me something about you.', additional_kwargs={}, response_metadata={}),
  AIMessage(content='I am Lily, a helpful assistant designed by Dibyendu Biswas. My purpose is to answer your questions based on the context provided.\n', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='What kind of infotmation do you have?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='As Lily, I have access to the information provided in the context you gave me, which discusses memory types, including sensory, short-term, long-term, explicit/declarative, and implicit/procedural memory.  I also have information about how these relate to AI agents, including memory streams, retrieval models, and reflection mechanisms.  Additionally, I have a small amount of information about tool use for agents and a few figures comparing different AI models.  I do not have access to external websites or specific files beyond what was included in 

## **Conversational Buffer Window Memory:**

In [34]:
chat_memory = None

async def conversational_qa_chain():
  global chat_memory

  message_history = ChatMessageHistory()
  chat_memory = ConversationBufferWindowMemory(
      memory_key="chat_history", output_key="answer", k=2, chat_memory=message_history, return_messages=True
  )

  # ConversationRetrievalQnA:
  chain = ConversationalRetrievalChain.from_llm(
      llm=llm,
      retriever=retriever,
      chain_type="stuff",
      memory=chat_memory,
      return_source_documents=True,
      get_chat_history=lambda h : h
  )

  return chain


chain = await conversational_qa_chain()


res = chain.invoke("What is Memory?")
display(Markdown(res["answer"]))

Memory is defined as the processes used to acquire, store, retain, and later retrieve information.  There are several types of memory in human brains, including sensory memory, short-term memory, and long-term memory (which includes explicit/declarative and implicit/procedural memory).  In the context of AI agents, memory can also refer to mechanisms like in-context learning, external vector stores, and memory streams.


In [35]:
# Memory:
chat_memory

ConversationBufferWindowMemory(chat_memory=InMemoryChatMessageHistory(messages=[HumanMessage(content='What is Memory?', additional_kwargs={}, response_metadata={}), AIMessage(content='Memory is defined as the processes used to acquire, store, retain, and later retrieve information.  There are several types of memory in human brains, including sensory memory, short-term memory, and long-term memory (which includes explicit/declarative and implicit/procedural memory).  In the context of AI agents, memory can also refer to mechanisms like in-context learning, external vector stores, and memory streams.\n', additional_kwargs={}, response_metadata={})]), output_key='answer', return_messages=True, memory_key='chat_history', k=2)

In [36]:
res = chain.invoke("What is Chain of thought prompt?")
display(Markdown(res["answer"]))

A Chain of thought (CoT) prompt instructs a large language model (LLM) to "think step by step".  This encourages the model to break down complex tasks into smaller, simpler steps, utilizing more processing time to arrive at a solution.  It's a standard prompting technique used to improve model performance on difficult tasks.


In [37]:
# Memory:
chat_memory

ConversationBufferWindowMemory(chat_memory=InMemoryChatMessageHistory(messages=[HumanMessage(content='What is Memory?', additional_kwargs={}, response_metadata={}), AIMessage(content='Memory is defined as the processes used to acquire, store, retain, and later retrieve information.  There are several types of memory in human brains, including sensory memory, short-term memory, and long-term memory (which includes explicit/declarative and implicit/procedural memory).  In the context of AI agents, memory can also refer to mechanisms like in-context learning, external vector stores, and memory streams.\n', additional_kwargs={}, response_metadata={}), HumanMessage(content='What is Chain of thought prompt?', additional_kwargs={}, response_metadata={}), AIMessage(content='A Chain of thought (CoT) prompt instructs a large language model (LLM) to "think step by step".  This encourages the model to break down complex tasks into smaller, simpler steps, utilizing more processing time to arrive at

In [41]:
res = chain.invoke("Tell me Something about youself.")
display(Markdown(res["answer"]))

In [None]:
# Memory:
chat_memory

## **Conversationa Summary Memory:**

#### **Example 01:**

In [None]:
chat_memory = None

async def conversational_qa_chain():
  global chat_memory

  message_history = ChatMessageHistory()
  chat_memory = ConversationSummaryMemory(
      memory_key="chat_history", output_key="answer", k=2, chat_memory=message_history, return_messages=True
  )

  # ConversationRetrievalQnA:
  chain = ConversationalRetrievalChain.from_llm(
      llm=llm,
      retriever=retriever,
      chain_type="stuff",
      memory=chat_memory,
      return_source_documents=True,
      get_chat_history=lambda h : h
  )

  return chain


chain = await conversational_qa_chain()

## **Conversational Summary Buffer Memory:**

#### **Example 01:**

In [None]:
chat_memory = None

async def conversational_qa_chain():
  global chat_memory

  message_history = ChatMessageHistory()
  chat_memory = ConversationSummaryBufferMemory(
      llm=llm, max_token_limit=100,
      memory_key="chat_history", output_key="answer", k=2, chat_memory=message_history, return_messages=True
  )

  # ConversationRetrievalQnA:
  chain = ConversationalRetrievalChain.from_llm(
      llm=llm,
      retriever=retriever,
      chain_type="stuff",
      memory=chat_memory,
      return_source_documents=True,
      get_chat_history=lambda h : h
  )

  return chain


chain = await conversational_qa_chain()

## **Conversational Entity Memory:**

In [42]:
from langchain.memory.prompt import ENTITY_MEMORY_CONVERSATION_TEMPLATE
from pydantic import BaseModel
from typing import List, Dict, Any

#### **Example 01:**

In [None]:
chat_memory = None

async def conversational_qa_chain():
  global chat_memory

  message_history = ChatMessageHistory()
  chat_memory = ConversationEntityMemory(
      llm=llm,
      memory_key="chat_history", output_key="answer", k=2, chat_memory=message_history, return_messages=True
  )

  # ConversationRetrievalQnA:
  chain = ConversationalRetrievalChain.from_llm(
      llm=llm,
      retriever=retriever,
      chain_type="stuff",
      memory=chat_memory,
      return_source_documents=True,
      get_chat_history=lambda h : h,
      get_prompt=lambda h : ENTITY_MEMORY_CONVERSATION_TEMPLATE
  )

  return chain


chain = await conversational_qa_chain()