--------------------------
## directly wrap into a chatbot-like interface via custom MCP client

In [None]:
## custom snippet to handling threading with asyncio
import threading
import asyncio

class RunThread(threading.Thread):
    def __init__(self, func, args, kwargs):
        self.func = func
        self.args = args
        self.kwargs = kwargs
        self.result = None
        super().__init__()

    def run(self):
        self.result = asyncio.run(self.func(*self.args, **self.kwargs))

def run_async(func, *args, **kwargs):
    try:
        loop = asyncio.get_running_loop()
    except RuntimeError:
        loop = None
    if loop and loop.is_running():
        thread = RunThread(func, args, kwargs)
        thread.start()
        thread.join()
        return thread.result
    else:
        return asyncio.run(func(*args, **kwargs))




In [None]:
import os
from typing import List, Dict
from datetime import datetime

from fastmcp import Client
from fastmcp.client.transports import StreamableHttpTransport
from fastmcp.tools import Tool
from colorama import Fore
async def memory_client(query, user_id):
    client = Client(transport=StreamableHttpTransport("http://127.0.0.1:4200/mcp"))  # use /mcp path
    async with client:
        tools: list[Tool] = await client.list_tools()
        for tool in tools:
            print(f"Tool: {tool}")
        
        result = await client.call_tool(
            "memory_agent",
            {
                "query": query ,
                "user_id": user_id
            }
        )
    output=result.content[0].text # mcp response to text , which a list with TextContent in the list, access the text via attribute 
    ## example below 
    ### CallToolResult(content=[TextContent(type='text', text="That's quite an interesting introduction, Babe the talking pig! I'm excited to meet you and your feathered friend, Rob the chicken. What kind of adventures do you two like to have on the farm?", annotations=None, meta=None)], structured_content={'result': "That's quite an interesting introduction, Babe the talking pig! I'm excited to meet you and your feathered friend, Rob the chicken. What kind of adventures do you two like to have on the farm?"}, data="That's quite an interesting introduction, Babe the talking pig! I'm excited to meet you and your feathered friend, Rob the chicken. What kind of adventures do you two like to have on the farm?", is_error=False)
    
    print(Fore.CYAN + "inside mcp client , the respond from memory enabled agent:\n", output, Fore.RESET)
    return output


In [None]:
from colorama import Fore

user_id = "user_1"
print("Warm welcome, I am a personal conversational assistant chatbot, \n I have a very good memory and will keep track on our conversation.\n when you are done talking to me. \n Type 'exit' to end the conversation.")
turns=0
while True:
    # Get user input
    turns +=1
    query = input()
    print(f"------------------------------------------- turn {str(turns)} ------------------------------------------\n")
    print(Fore.LIGHTGREEN_EX + "User:", query)
    
    # Check if user wants to exit
    if query.lower() == 'exit':
        print("Thank you for talking to me, I wish you a nice day. Bye for now ^__^b ")
        break
    
    # Handle the query and print the response    
    response =    run_async(memory_client, query, user_id)  # blocks for 5 seconds and returns "hello user"

    print("Assistant:", response, Fore.RESET , "\n\n")


-------------------------------------------
## integrating standalone Agent Memory into langGraph 

In [None]:
# Or you can directly instantiate the tool
from langchain_community.tools import HumanInputRun
from langchain.agents import AgentType, load_tools
from langchain.agents import AgentType, initialize_agent, load_tools


def get_human_input() -> str:
    """ Put human as decision maker, human will decide whether to start from scratch or load from previous memory"""
    
    print("Decide whether to load from previous saved memory or not")
    print("""\n
            Yes/No            
            Enter ONLY Yes or No and nothing else !""")
    contents = []
    while True:
        try:            
            line = input()
            if 'y' in line.lower():
                tool="LoadingMemory"                
                line=tool
                
            elif 'no' in line.lower():
                tool="FreshStart"                
                line=tool
                
            else:
                pass
            
        except EOFError:
            break
        if line.lower() == 'exit':
            print("You've chosen : ", tool , " exiting now ,thank you!")            
            break
        contents.append(line)
        
    return "\n".join(contents)


# You can modify the tool when loading

ask_human = HumanInputRun(input_func=get_human_input)

In [None]:
## first we define GraphState 
from typing import Dict, TypedDict
from typing import TypedDict, Annotated, List, Union
from langchain_core.agents import AgentAction, AgentFinish
import operator

from langchain_core.messages import BaseMessage
class State(TypedDict):
    # The input string
    query: str    
    user_id: str
    human_choice : str
    agent_with_memory_response : str
    agent_without_memory_response: str

In [None]:
from langgraph.graph import END, StateGraph
from colorama  import Fore,Style
from langchain_nvidia_ai_endpoints import ChatNVIDIA
llm = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", max_tokens=1024)


async def restart_memory_client(query, user_id):
    client = Client(transport=StreamableHttpTransport("http://127.0.0.1:4200/mcp"))  # use /mcp path
    async with client:
        tools: list[Tool] = await client.list_tools()
        for tool in tools:
            print(f"Tool: {tool}")
        
        result = await client.call_tool(
            "restart_memory_agent",
            {
                "query": query ,
                "user_id": user_id
            }
        )
    output=result.content[0].text # mcp response to text , which a list with TextContent in the list, access the text via attribute 
    ## example below 
    ### CallToolResult(content=[TextContent(type='text', text="That's quite an interesting introduction, Babe the talking pig! I'm excited to meet you and your feathered friend, Rob the chicken. What kind of adventures do you two like to have on the farm?", annotations=None, meta=None)], structured_content={'result': "That's quite an interesting introduction, Babe the talking pig! I'm excited to meet you and your feathered friend, Rob the chicken. What kind of adventures do you two like to have on the farm?"}, data="That's quite an interesting introduction, Babe the talking pig! I'm excited to meet you and your feathered friend, Rob the chicken. What kind of adventures do you two like to have on the farm?", is_error=False)
    
    print(Fore.CYAN + "inside mcp client , the respond from memory enabled agent:\n", output, Fore.RESET)
    return output




# Define the functions needed 
def human_choice_node(state):
    # ensure using original prompt 
    print(Fore.BLUE+ "state: " , state)
    print("---"*10)
    query=state["query"]
    
    agent_choice=ask_human.invoke(input=query)
    print(Fore.CYAN+ "choosen_agent : " + agent_choice + Fore.RESET)
    return {"human_choice": agent_choice , "query":query }

def memory_execution_node(state):    
    query = state["query"]
    user_id= state["user_id"]
    print(Fore.CYAN + "user query: ", query , Fore.RESET)
    # choosen agent will execute the task
    choosen_agent = state['human_choice']
    if choosen_agent=='LoadingMemory':
        ## logic to load memory 
        response = run_async(memory_client, query, user_id)  # blocks for 5 seconds and returns "hello user"
    elif choosen_agent=="FreshStart":
        ## clear the memory and start afresh
        response = run_async(restart_memory_client, query, user_id)  # blocks for 5 seconds and returns "hello user"
    else:
        response="Please make sure you made a choice to load pre-existing memory or not."
        
    output=llm.invoke(query)
    no_memory_response = output.content
    print(Fore.CYAN+ "agent_output: \n" + response + Fore.RESET)

    return {"agent_with_memory_response": response , "agent_without_memory_response": no_memory_response}

In [None]:
from langgraph.graph import END, StateGraph

# Define a new graph
workflow = StateGraph(State)

# Define the two nodes 
workflow.add_node("start", human_choice_node)
workflow.add_node("end", memory_execution_node)

# This means that this node is the first one called
workflow.set_entry_point("start")
workflow.add_edge("start", "end")
workflow.add_edge("end", END)

# Finally, we compile it!
# This compiles it into a LangChain Runnable,
# meaning you can use it as you would any other runnable
app = workflow.compile()

In [None]:
my_query="Hello there, my name is Sofia and I am an young artists, I am very good in drawing realistic human faces and expressions."
user_id="sofia"
respond=app.invoke({"query":my_query, "user_id":user_id})

In [None]:
my_query="I tried to apply for jobs as illustrator in many different companies, but I cannot seem to get hired. I am quite sad."
user_id="sofia"
respond=app.invoke({"query":my_query, "user_id":user_id})

In [None]:
my_query="Tell me what do you remember about me?"
user_id="sofia"
respond=app.invoke({"query":my_query, "user_id":user_id})

In [None]:
respond["agent_with_memory_response"]

In [None]:
respond["agent_without_memory_response"]cd 

-------------------------------------------
## integrating standalone Agent Memory into llama-index 

In [None]:
!pip install nvidia-haystack==0.1.2


In [None]:
import ast 
from fastmcp import Client
from fastmcp.client.transports import StreamableHttpTransport
from fastmcp.tools import Tool
from colorama import Fore
import nest_asyncio
nest_asyncio.apply()

async def haystack_recall_memory(query, user_id):
    client = Client(transport=StreamableHttpTransport("http://127.0.0.1:4200/mcp"))  # use /mcp path
    async with client:
        tools: list[Tool] = await client.list_tools()
        for tool in tools:
            print(f"Tool: {tool}")        
        result = await client.call_tool(
            "fetch_memory_items",
            {
                "query": query ,
                "user_id": user_id
            }
        )
        print(Fore.GREEN +"fetch_memory_items result type ", type(result), result, Fore.RESET)
        output=result.content[0].text
        output=ast.literal_eval(output)
        print(Fore.GREEN +"fetch_memory_items output type ", type(output), output, Fore.RESET)
    return output
                               

In [None]:
from haystack_integrations.components.generators.nvidia import NvidiaGenerator

generator = NvidiaGenerator(
    model="meta/llama-3.1-405b-instruct",
    api_url="https://integrate.api.nvidia.com/v1",
    model_arguments={
        "temperature": 0.2,
        "top_p": 0.7,
        "max_tokens": 1024,
    },
)
generator.warm_up()

In [None]:
async def wrap_in_haystack(query,user_id):
    
    recall_memories = await haystack_recall_memory(query, user_id)
    print(type(recall_memories), recall_memories)
    if isinstance(recall_memories, list):
        print("recall_memories is already a list of strings"  )
        if len(recall_memories)>0:
            #memories = [Document(content=memory) for memory in recall_memories]
            memories_str=','.join([mem for mem in recall_memories])
    else: 
        recall_memories = ast.literal_eval(recall_memories)

        memories_str =f"no memories item found for user {user_id}"
    
    prompt_template = f"""
    Given these recalled memories, continue the conversation with the user, ask more question and make the user feel you are eager to find out more about him or her.\nDocuments:
    {memories_str}
    \nQuestion: {query}
    \nAnswer:
    """
    output=generator.run(prompt_template)
    response=output['replies'][0]
    print("---"*10)
    print(Fore.LIGHTMAGENTA_EX + "respond :\n", response)
    return response, recall_memories

In [None]:
query = "Hello, my name is Kevin and I am a software engineer"
user_id="kevin"
output=await wrap_in_haystack(query,user_id)


In [None]:
query = "My favorite movie is interstella and forest Gump"
user_id="kevin"
output=await wrap_in_haystack(query,user_id)


In [None]:
query = "Tell me what you remember about me?"
user_id="kevin"
output=await wrap_in_haystack(query,user_id)


In [None]:
cdcd