## Initialize the MemoryHanlder 

In [1]:
import json
from typing import List, Literal, Optional

import tiktoken
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.messages import get_buffer_string
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableConfig
from langchain_core.tools import tool
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai.embeddings import OpenAIEmbeddings

from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import END, START, MessagesState, StateGraph
from langgraph.prebuilt import ToolNode
import uuid
from MemoryManager import MemoryHandler
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank
import os

model="meta/llama-3.1-405b-instruct"
llm = ChatNVIDIA(model=model)
embed = NVIDIAEmbeddings(model="nvidia/nv-embedqa-mistral-7b-v2",truncate="NONE",)
memory_manager=MemoryHandler(llm,embed)


## wrap into runnable chains with streaming 

In [2]:
from langchain_core.runnables import RunnableLambda, RunnableParallel, RunnablePassthrough

async def mem_routing_function(inputs):
    query=inputs["input"]
    config=inputs["config"]
    output=await memory_manager.memory_routing(query, config)
    return output


async def create_memory_items(inputs):
    query=inputs["input"]
    memory_items = await memory_manager.query_to_memory_items(query=query)
    return memory_items

runnable_parallel_1 = RunnableLambda(mem_routing_function)
runnable_parallel_2 = RunnableLambda(create_memory_items)
    

def execute_memory_operations(inputs):
    mem_ops=inputs["mem_ops"]
    memory_items_for_saving=inputs["mem_items"]["facts"]
    if 'save_memory' in mem_ops.lower():        
        memories, ids= memory_manager.save_recall_memory(memory_items_for_saving, memory_manager.config)
        output = ids
    elif "update_memory" in mem_ops.lower():        
        memories, ids = memory_manager.save_recall_memory(memory_items_for_saving, memory_manager.config)
        output = ids
    elif "no operation":
        output=llm.invoke(query).content 
    return output

sequence = RunnablePassthrough() | {  # this dict is coerced to a RunnableParallel
    "mem_ops": runnable_parallel_1,
    "mem_items": runnable_parallel_2
    } | execute_memory_operations



## populate the memory with conversations

In [4]:
config = {"configurable": {"user_id": "babe", "thread_id": "1"}}

output=""
async for event in sequence.astream_events(input={"input":"hi, my name is Babe, I am a pig and I can talk, my best friend is a chicken named Rob.", "config":config}):
    kind = event["event"]
    if kind == "on_chat_model_stream":
        content = event["data"]["chunk"].content
        if content:
            output += content
            print(output, end="|", flush=True)


{"|[36m** query_to_memory_items** streaming output >  {" [39m
[36m** query_to_memory_items** streaming output >  {"facts [39m
[36m** query_to_memory_items** streaming output >  {"facts" [39m
{"facts"save|{"facts"save :|[36m** query_to_memory_items** streaming output >  {"facts" : [39m
{"facts"save :_memory ["|[36m** query_to_memory_items** streaming output >  {"facts" : [" [39m
[36m** query_to_memory_items** streaming output >  {"facts" : ["Name [39m
{"facts"save :_memory ["Name is|[36m** query_to_memory_items** streaming output >  {"facts" : ["Name is [39m
[36m** query_to_memory_items** streaming output >  {"facts" : ["Name is Babe [39m
[36m** query_to_memory_items** streaming output >  {"facts" : ["Name is Babe", [39m
[36m** query_to_memory_items** streaming output >  {"facts" : ["Name is Babe", " [39m
{"facts"save :_memory ["Name is Babe", "Is|[36m** query_to_memory_items** streaming output >  {"facts" : ["Name is Babe", "Is [39m
{"facts"save :_memory ["Name is

In [5]:
config = {"configurable": {"user_id": "babe", "thread_id": "2"}}
output=""
async for event in sequence.astream_events(input={"input":"I had a fight with Rob, yes the chicken, he is no longer my best friend, my best friend is now Owen the wise owl!", "config":config}):
    kind = event["event"]
    if kind == "on_chat_model_stream":
        content = event["data"]["chunk"].content
        if content:
            output += content
            print(output, end="|", flush=True)



{"|[36m** query_to_memory_items** streaming output >  {" [39m
{"update_memory|{"update_memoryfacts|[36m** query_to_memory_items** streaming output >  {"facts [39m
[36m** query_to_memory_items** streaming output >  {"facts" [39m
[36m** query_to_memory_items** streaming output >  {"facts" : [39m
{"update_memoryfacts" : ["|[36m** query_to_memory_items** streaming output >  {"facts" : [" [39m
{"update_memoryfacts" : ["Had|[36m** query_to_memory_items** streaming output >  {"facts" : ["Had [39m
{"update_memoryfacts" : ["Had a|[36m** query_to_memory_items** streaming output >  {"facts" : ["Had a [39m
{"update_memoryfacts" : ["Had a fight|[36m** query_to_memory_items** streaming output >  {"facts" : ["Had a fight [39m
{"update_memoryfacts" : ["Had a fight with|[36m** query_to_memory_items** streaming output >  {"facts" : ["Had a fight with [39m
[36m** query_to_memory_items** streaming output >  {"facts" : ["Had a fight with Rob [39m
[36m** query_to_memory_items** streamin

## integrate into memory retrieval chain 

In [9]:
prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "You are assistant with ability to memorize conversations from the user. You should always answer user query based on the following context:\n<Documents>\n{context}\n</Documents>. \
                    Be polite and helpful.",
                ),
                ("user", "{input}"),
            ]
        )
memory_manager.user_id="babe"
memory_retriever_chain = (
    {"context": memory_manager.search_recall_memories, "input": RunnablePassthrough()}
    | prompt
    | llm 
)

In [11]:
output= await memory_retriever_chain.ainvoke("do you remember if I am friend with Rob?")
output.content

AIMessage(content='I recall that you had a bit of a complicated relationship with Rob. According to our previous conversations, Rob was initially your best friend, and interestingly, Rob is a chicken. However, I also remember that you mentioned you had a fight with Rob the chicken, and more recently, you indicated that Rob is no longer your best friend. Would you like to talk about what happened?', additional_kwargs={}, response_metadata={'role': 'assistant', 'content': 'I recall that you had a bit of a complicated relationship with Rob. According to our previous conversations, Rob was initially your best friend, and interestingly, Rob is a chicken. However, I also remember that you mentioned you had a fight with Rob the chicken, and more recently, you indicated that Rob is no longer your best friend. Would you like to talk about what happened?', 'token_usage': {'prompt_tokens': 89, 'total_tokens': 166, 'completion_tokens': 77}, 'finish_reason': 'stop', 'model_name': 'meta/llama-3.1-40