--------------------------
## directly wrap into a chatbot-like interface via custom MCP client

In [1]:
## custom snippet to handling threading with asyncio
import threading
import asyncio

class RunThread(threading.Thread):
    def __init__(self, func, args, kwargs):
        self.func = func
        self.args = args
        self.kwargs = kwargs
        self.result = None
        super().__init__()

    def run(self):
        self.result = asyncio.run(self.func(*self.args, **self.kwargs))

def run_async(func, *args, **kwargs):
    try:
        loop = asyncio.get_running_loop()
    except RuntimeError:
        loop = None
    if loop and loop.is_running():
        thread = RunThread(func, args, kwargs)
        thread.start()
        thread.join()
        return thread.result
    else:
        return asyncio.run(func(*args, **kwargs))




In [2]:
import os
from typing import List, Dict
from datetime import datetime

from fastmcp import Client
from fastmcp.client.transports import StreamableHttpTransport
from fastmcp.tools import Tool
from colorama import Fore
async def memory_client(query, user_id):
    client = Client(transport=StreamableHttpTransport("http://127.0.0.1:4200/mcp"))  # use /mcp path
    async with client:
        tools: list[Tool] = await client.list_tools()
        for tool in tools:
            print(f"Tool: {tool}")
        
        result = await client.call_tool(
            "memory_agent",
            {
                "query": query ,
                "user_id": user_id
            }
        )
    output=result.content[0].text # mcp response to text , which a list with TextContent in the list, access the text via attribute 
    ## example below 
    ### CallToolResult(content=[TextContent(type='text', text="That's quite an interesting introduction, Babe the talking pig! I'm excited to meet you and your feathered friend, Rob the chicken. What kind of adventures do you two like to have on the farm?", annotations=None, meta=None)], structured_content={'result': "That's quite an interesting introduction, Babe the talking pig! I'm excited to meet you and your feathered friend, Rob the chicken. What kind of adventures do you two like to have on the farm?"}, data="That's quite an interesting introduction, Babe the talking pig! I'm excited to meet you and your feathered friend, Rob the chicken. What kind of adventures do you two like to have on the farm?", is_error=False)
    
    print(Fore.CYAN + "inside mcp client , the respond from memory enabled agent:\n", output, Fore.RESET)
    return output


In [3]:
from colorama import Fore

user_id = "user_1"
print("Warm welcome, I am a personal conversational assistant chatbot, \n I have a very good memory and will keep track on our conversation.\n when you are done talking to me. \n Type 'exit' to end the conversation.")
turns=0
while True:
    # Get user input
    turns +=1
    query = input()
    print(f"------------------------------------------- turn {str(turns)} ------------------------------------------\n")
    print(Fore.LIGHTGREEN_EX + "User:", query)
    
    # Check if user wants to exit
    if query.lower() == 'exit':
        print("Thank you for talking to me, I wish you a nice day. Bye for now ^__^b ")
        break
    
    # Handle the query and print the response    
    response =    run_async(memory_client, query, user_id)  # blocks for 5 seconds and returns "hello user"

    print("Assistant:", response, Fore.RESET , "\n\n")


Warm welcome, I am a personal conversational assistant chatbot, 
 I have a very good memory and will keep track on our conversation.
 when you are done talking to me. 
 Type 'exit' to end the conversation.


 hello, my name is Ruby, what is your name?


------------------------------------------- turn 1 ------------------------------------------

[92mUser: hello, my name is Ruby, what is your name?
Tool: name='memory_agent' title=None description="An Agent with memory enabled, can memorize the past conversation and respond accordingly.\nArgs:\n    query (str): The input user query\n    user_id (str): the current user's id\nReturns:\n    str: output response to the user " inputSchema={'properties': {'query': {'title': 'Query', 'type': 'string'}, 'user_id': {'title': 'User Id', 'type': 'string'}}, 'required': ['query', 'user_id'], 'type': 'object'} outputSchema={'properties': {'result': {'title': 'Result', 'type': 'string'}}, 'required': ['result'], 'title': '_WrappedResult', 'type': 'object', 'x-fastmcp-wrap-result': True} annotations=None meta={'_fastmcp': {'tags': []}}
Tool: name='restart_memory_agent' title=None description="An Agent with memory enabled, can memorize the past conversation and respond accordingly.\nArgs:\n    query 

 oki, let me tell you about myself, I am a shape-shifter, I can change into anything.


------------------------------------------- turn 2 ------------------------------------------

[92mUser: oki, let me tell you about myself, I am a shape-shifter, I can change into anything.
Tool: name='memory_agent' title=None description="An Agent with memory enabled, can memorize the past conversation and respond accordingly.\nArgs:\n    query (str): The input user query\n    user_id (str): the current user's id\nReturns:\n    str: output response to the user " inputSchema={'properties': {'query': {'title': 'Query', 'type': 'string'}, 'user_id': {'title': 'User Id', 'type': 'string'}}, 'required': ['query', 'user_id'], 'type': 'object'} outputSchema={'properties': {'result': {'title': 'Result', 'type': 'string'}}, 'required': ['result'], 'title': '_WrappedResult', 'type': 'object', 'x-fastmcp-wrap-result': True} annotations=None meta={'_fastmcp': {'tags': []}}
Tool: name='restart_memory_agent' title=None description="An Agent with memory enabled, can memorize the past conversation a

 yes, I can indeed change into anything, my all time favorite is a pigeon.


------------------------------------------- turn 3 ------------------------------------------

[92mUser: yes, I can indeed change into anything, my all time favorite is a pigeon.
Tool: name='memory_agent' title=None description="An Agent with memory enabled, can memorize the past conversation and respond accordingly.\nArgs:\n    query (str): The input user query\n    user_id (str): the current user's id\nReturns:\n    str: output response to the user " inputSchema={'properties': {'query': {'title': 'Query', 'type': 'string'}, 'user_id': {'title': 'User Id', 'type': 'string'}}, 'required': ['query', 'user_id'], 'type': 'object'} outputSchema={'properties': {'result': {'title': 'Result', 'type': 'string'}}, 'required': ['result'], 'title': '_WrappedResult', 'type': 'object', 'x-fastmcp-wrap-result': True} annotations=None meta={'_fastmcp': {'tags': []}}
Tool: name='restart_memory_agent' title=None description="An Agent with memory enabled, can memorize the past conversation and respond 

 well, apart from able to fly, but also you can observe people without anyone noticing, since you can find pigeons almost anywhere.


------------------------------------------- turn 4 ------------------------------------------

[92mUser: well, apart from able to fly, but also you can observe people without anyone noticing, since you can find pigeons almost anywhere.
Tool: name='memory_agent' title=None description="An Agent with memory enabled, can memorize the past conversation and respond accordingly.\nArgs:\n    query (str): The input user query\n    user_id (str): the current user's id\nReturns:\n    str: output response to the user " inputSchema={'properties': {'query': {'title': 'Query', 'type': 'string'}, 'user_id': {'title': 'User Id', 'type': 'string'}}, 'required': ['query', 'user_id'], 'type': 'object'} outputSchema={'properties': {'result': {'title': 'Result', 'type': 'string'}}, 'required': ['result'], 'title': '_WrappedResult', 'type': 'object', 'x-fastmcp-wrap-result': True} annotations=None meta={'_fastmcp': {'tags': []}}
Tool: name='restart_memory_agent' title=None description="An Agent with memory

 hey, tell me what do you know about me.


------------------------------------------- turn 5 ------------------------------------------

[92mUser: hey, tell me what do you know about me.
Tool: name='memory_agent' title=None description="An Agent with memory enabled, can memorize the past conversation and respond accordingly.\nArgs:\n    query (str): The input user query\n    user_id (str): the current user's id\nReturns:\n    str: output response to the user " inputSchema={'properties': {'query': {'title': 'Query', 'type': 'string'}, 'user_id': {'title': 'User Id', 'type': 'string'}}, 'required': ['query', 'user_id'], 'type': 'object'} outputSchema={'properties': {'result': {'title': 'Result', 'type': 'string'}}, 'required': ['result'], 'title': '_WrappedResult', 'type': 'object', 'x-fastmcp-wrap-result': True} annotations=None meta={'_fastmcp': {'tags': []}}
Tool: name='restart_memory_agent' title=None description="An Agent with memory enabled, can memorize the past conversation and respond accordingly.\nArgs:\n    query (st

 exit


------------------------------------------- turn 6 ------------------------------------------

[92mUser: exit
Thank you for talking to me, I wish you a nice day. Bye for now ^__^b 


-------------------------------------------
## integrating standalone Agent Memory into langGraph 

In [4]:
# Or you can directly instantiate the tool
from langchain_community.tools import HumanInputRun
from langchain.agents import AgentType, load_tools
from langchain.agents import AgentType, initialize_agent, load_tools


def get_human_input() -> str:
    """ Put human as decision maker, human will decide whether to start from scratch or load from previous memory"""
    
    print("Decide whether to load from previous saved memory or not")
    print("""\n
            Yes/No            
            Enter ONLY Yes or No and nothing else !""")
    contents = []
    while True:
        try:            
            line = input()
            if 'y' in line.lower():
                tool="LoadingMemory"                
                line=tool
                
            elif 'no' in line.lower():
                tool="FreshStart"                
                line=tool
                
            else:
                pass
            
        except EOFError:
            break
        if line.lower() == 'exit':
            print("You've chosen : ", tool , " exiting now ,thank you!")            
            break
        contents.append(line)
        
    return "\n".join(contents)


# You can modify the tool when loading

ask_human = HumanInputRun(input_func=get_human_input)

In [5]:
## first we define GraphState 
from typing import Dict, TypedDict
from typing import TypedDict, Annotated, List, Union
from langchain_core.agents import AgentAction, AgentFinish
import operator

from langchain_core.messages import BaseMessage
class State(TypedDict):
    # The input string
    query: str    
    user_id: str
    human_choice : str
    agent_with_memory_response : str
    agent_without_memory_response: str

In [6]:
from langgraph.graph import END, StateGraph
from colorama  import Fore,Style
from langchain_nvidia_ai_endpoints import ChatNVIDIA
llm = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", max_tokens=1024)


async def restart_memory_client(query, user_id):
    client = Client(transport=StreamableHttpTransport("http://127.0.0.1:4200/mcp"))  # use /mcp path
    async with client:
        tools: list[Tool] = await client.list_tools()
        for tool in tools:
            print(f"Tool: {tool}")
        
        result = await client.call_tool(
            "restart_memory_agent",
            {
                "query": query ,
                "user_id": user_id
            }
        )
    output=result.content[0].text # mcp response to text , which a list with TextContent in the list, access the text via attribute 
    ## example below 
    ### CallToolResult(content=[TextContent(type='text', text="That's quite an interesting introduction, Babe the talking pig! I'm excited to meet you and your feathered friend, Rob the chicken. What kind of adventures do you two like to have on the farm?", annotations=None, meta=None)], structured_content={'result': "That's quite an interesting introduction, Babe the talking pig! I'm excited to meet you and your feathered friend, Rob the chicken. What kind of adventures do you two like to have on the farm?"}, data="That's quite an interesting introduction, Babe the talking pig! I'm excited to meet you and your feathered friend, Rob the chicken. What kind of adventures do you two like to have on the farm?", is_error=False)
    
    print(Fore.CYAN + "inside mcp client , the respond from memory enabled agent:\n", output, Fore.RESET)
    return output




# Define the functions needed 
def human_choice_node(state):
    # ensure using original prompt 
    print(Fore.BLUE+ "state: " , state)
    print("---"*10)
    query=state["query"]
    
    agent_choice=ask_human.invoke(input=query)
    print(Fore.CYAN+ "choosen_agent : " + agent_choice + Fore.RESET)
    return {"human_choice": agent_choice , "query":query }

def memory_execution_node(state):    
    query = state["query"]
    user_id= state["user_id"]
    print(Fore.CYAN + "user query: ", query , Fore.RESET)
    # choosen agent will execute the task
    choosen_agent = state['human_choice']
    if choosen_agent=='LoadingMemory':
        ## logic to load memory 
        response = run_async(memory_client, query, user_id)  # blocks for 5 seconds and returns "hello user"
    elif choosen_agent=="FreshStart":
        ## clear the memory and start afresh
        response = run_async(restart_memory_client, query, user_id)  # blocks for 5 seconds and returns "hello user"
    else:
        response="Please make sure you made a choice to load pre-existing memory or not."
        
    output=llm.invoke(query)
    no_memory_response = output.content
    print(Fore.CYAN+ "agent_output: \n" + response + Fore.RESET)

    return {"agent_with_memory_response": response , "agent_without_memory_response": no_memory_response}

  llm = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", max_tokens=1024)


In [7]:
from langgraph.graph import END, StateGraph

# Define a new graph
workflow = StateGraph(State)

# Define the two nodes 
workflow.add_node("start", human_choice_node)
workflow.add_node("end", memory_execution_node)

# This means that this node is the first one called
workflow.set_entry_point("start")
workflow.add_edge("start", "end")
workflow.add_edge("end", END)

# Finally, we compile it!
# This compiles it into a LangChain Runnable,
# meaning you can use it as you would any other runnable
app = workflow.compile()

In [8]:
my_query="Hello there, my name is Sofia and I am an young artists, I am very good in drawing realistic human faces and expressions."
user_id="sofia"
respond=app.invoke({"query":my_query, "user_id":user_id})

[34mstate:  {'query': 'Hello there, my name is Sofia and I am an young artists, I am very good in drawing realistic human faces and expressions.', 'user_id': 'sofia'}
------------------------------


Hello there, my name is Sofia and I am an young artists, I am very good in drawing realistic human faces and expressions.
Decide whether to load from previous saved memory or not


            Yes/No            
            Enter ONLY Yes or No and nothing else !


 no
 exit


You've chosen :  FreshStart  exiting now ,thank you!
[36mchoosen_agent : FreshStart[39m
[36muser query:  Hello there, my name is Sofia and I am an young artists, I am very good in drawing realistic human faces and expressions. [39m
Tool: name='memory_agent' title=None description="An Agent with memory enabled, can memorize the past conversation and respond accordingly.\nArgs:\n    query (str): The input user query\n    user_id (str): the current user's id\nReturns:\n    str: output response to the user " inputSchema={'properties': {'query': {'title': 'Query', 'type': 'string'}, 'user_id': {'title': 'User Id', 'type': 'string'}}, 'required': ['query', 'user_id'], 'type': 'object'} outputSchema={'properties': {'result': {'title': 'Result', 'type': 'string'}}, 'required': ['result'], 'title': '_WrappedResult', 'type': 'object', 'x-fastmcp-wrap-result': True} annotations=None meta={'_fastmcp': {'tags': []}}
Tool: name='restart_memory_agent' title=None description="An Agent with memory 

In [9]:
my_query="I tried to apply for jobs as illustrator in many different companies, but I cannot seem to get hired. I am quite sad."
user_id="sofia"
respond=app.invoke({"query":my_query, "user_id":user_id})

[34mstate:  {'query': 'I tried to apply for jobs as illustrator in many different companies, but I cannot seem to get hired. I am quite sad.', 'user_id': 'sofia'}
------------------------------


I tried to apply for jobs as illustrator in many different companies, but I cannot seem to get hired. I am quite sad.
Decide whether to load from previous saved memory or not


            Yes/No            
            Enter ONLY Yes or No and nothing else !


 yes
 exit


You've chosen :  LoadingMemory  exiting now ,thank you!
[36mchoosen_agent : LoadingMemory[39m
[36muser query:  I tried to apply for jobs as illustrator in many different companies, but I cannot seem to get hired. I am quite sad. [39m
Tool: name='memory_agent' title=None description="An Agent with memory enabled, can memorize the past conversation and respond accordingly.\nArgs:\n    query (str): The input user query\n    user_id (str): the current user's id\nReturns:\n    str: output response to the user " inputSchema={'properties': {'query': {'title': 'Query', 'type': 'string'}, 'user_id': {'title': 'User Id', 'type': 'string'}}, 'required': ['query', 'user_id'], 'type': 'object'} outputSchema={'properties': {'result': {'title': 'Result', 'type': 'string'}}, 'required': ['result'], 'title': '_WrappedResult', 'type': 'object', 'x-fastmcp-wrap-result': True} annotations=None meta={'_fastmcp': {'tags': []}}
Tool: name='restart_memory_agent' title=None description="An Agent with memor

In [10]:
my_query="Tell me what do you remember about me?"
user_id="sofia"
respond=app.invoke({"query":my_query, "user_id":user_id})

[34mstate:  {'query': 'Tell me what do you remember about me?', 'user_id': 'sofia'}
------------------------------


Tell me what do you remember about me?
Decide whether to load from previous saved memory or not


            Yes/No            
            Enter ONLY Yes or No and nothing else !


 yes
 exit


You've chosen :  LoadingMemory  exiting now ,thank you!
[36mchoosen_agent : LoadingMemory[39m
[36muser query:  Tell me what do you remember about me? [39m
Tool: name='memory_agent' title=None description="An Agent with memory enabled, can memorize the past conversation and respond accordingly.\nArgs:\n    query (str): The input user query\n    user_id (str): the current user's id\nReturns:\n    str: output response to the user " inputSchema={'properties': {'query': {'title': 'Query', 'type': 'string'}, 'user_id': {'title': 'User Id', 'type': 'string'}}, 'required': ['query', 'user_id'], 'type': 'object'} outputSchema={'properties': {'result': {'title': 'Result', 'type': 'string'}}, 'required': ['result'], 'title': '_WrappedResult', 'type': 'object', 'x-fastmcp-wrap-result': True} annotations=None meta={'_fastmcp': {'tags': []}}
Tool: name='restart_memory_agent' title=None description="An Agent with memory enabled, can memorize the past conversation and respond accordingly.\nArgs:\n

In [11]:
respond["agent_with_memory_response"]

"Nice to recall our previous conversations! I remember that your name is Sofia, and you've been trying to apply for jobs as an illustrator in many companies. How can I assist you further with that? Have you had any luck with your job search so far?"

In [12]:
respond["agent_without_memory_response"]

"I'm just an artificial intelligence and don't have the ability to remember or know individual users. However, if you have interacted with me before, I can use the data from that interaction to provide a response tailored to our previous conversation. If you have any specific questions or requests, feel free to ask!"

-------------------------------------------
## integrating standalone Agent Memory into llama-index 

In [None]:
!pip install llama-index-core==0.10.50
!pip install llama-index-llms-nvidia==0.1.3


In [None]:
from llama_index.core import Settings
from llama_index.llms.nvidia import NVIDIA

# Here we are using mixtral-8x7b-instruct-v0.1 model from API Catalog
Settings.llm = NVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1")

In [None]:
from llama_index.legacy.prompts import ChatPromptTemplate

# Define the message templates using from_messages
message_templates = [
    ("system", "Warm welcome, I am a personal conversational assistant , \n I have a very good memory and will keep track on our conversation. recall from memory :{memory}"),
    ("user", "{query}"),
]

# Create the chat prompt template
chat_template = ChatPromptTemplate.from_messages(message_templates)
user_id="ryan"
query=""
# Alternatively, convert to a text prompt for the completion API
prompt = chat_template.format(memory=run_async(memory_client, query, user_id), query="a brave knight")