# Lab 3: Building custom data connectors for MemGPT
This example notebook goes over how to create a connector to load external data sources into MemGPT agents. 

In [1]:
from pprint import pprint

In [4]:
from memgpt import create_client 

client = create_client() 

In [5]:
client.list_models()

[LLMConfig(model='gpt-4', model_endpoint_type='openai', model_endpoint='https://api.openai.com/v1', model_wrapper=None, context_window=8192)]

In [6]:
import memgpt
import chromadb

print(memgpt.__version__)
print(chromadb.__version__)

0.3.24
0.5.0


### Loading external data into archival memory 
In this section, we'll how you how you can use the `llama-index` library add external data sources as memories into MemGPT. 

In [7]:
%pip install llama-index llama-index-readers-web

Collecting llama-index-readers-web
  Downloading llama_index_readers_web-0.2.2-py3-none-any.whl.metadata (1.2 kB)
Collecting chromedriver-autoinstaller<0.7.0,>=0.6.3 (from llama-index-readers-web)
  Using cached chromedriver_autoinstaller-0.6.4-py3-none-any.whl.metadata (2.1 kB)
Collecting html2text<2025.0.0,>=2024.2.26 (from llama-index-readers-web)
  Using cached html2text-2024.2.26-py3-none-any.whl
INFO: pip is looking at multiple versions of llama-index-readers-web to determine which version is compatible with other requirements. This could take a while.
Collecting llama-index-readers-web
  Using cached llama_index_readers_web-0.2.1-py3-none-any.whl.metadata (1.2 kB)
  Using cached llama_index_readers_web-0.2.0-py3-none-any.whl.metadata (1.2 kB)
  Using cached llama_index_readers_web-0.1.23-py3-none-any.whl.metadata (1.2 kB)
Collecting newspaper3k<0.3.0,>=0.2.8 (from llama-index-readers-web)
  Using cached newspaper3k-0.2.8-py3-none-any.whl.metadata (11 kB)
Collecting playwright<2.

In [11]:
from memgpt.data_sources.connectors import DataConnector 
from memgpt.schemas.document import Document
from llama_index.core import Document as LlamaIndexDocument
from llama_index.core import SummaryIndex
from llama_index.readers.web import SimpleWebPageReader

from typing import Iterator, Tuple, Dict, List

class MyCustomConnector(DataConnector): 

    def __init__(self, web_pages: List[str]): 
        self.web_pages = web_pages

    def generate_documents(self) -> Iterator[Tuple[str, Dict]]:  
        documents = SimpleWebPageReader(html_to_text=True).load_data(
            self.web_pages
        )
        for document, web_page in zip(documents, self.web_pages): 
            metadata = {"source_page": web_page, "my_metadata": "example"}
            yield document.text, metadata
            
    def generate_passages(
        self, 
        documents: List[Document], 
        chunk_size: int = 1024
    ) -> Iterator[Tuple[str, Dict]]:  # -> Iterator[Passage]:
        from llama_index.core.node_parser import TokenTextSplitter

        parser = TokenTextSplitter(chunk_size=chunk_size)
        for document in documents:
            llama_index_docs = [LlamaIndexDocument(text=document.text)]
            nodes = parser.get_nodes_from_documents(llama_index_docs)
            for node in nodes:
                yield node.text, None

In [12]:
web_pages = [
    "https://en.wikipedia.org/wiki/Memory", 
    #"https://en.wikipedia.org/wiki/Brain"
]

connector = MyCustomConnector(web_pages)

In [13]:
source = client.create_source("wikipedia_brain")

In [14]:
client.load_data(connector, source_name=source.name)

In [16]:
from memgpt.schemas.memory import ChatMemory

wiki_persona = "You a study assistant with a great source of knowlege " \
+ "stored in archival. You should always search your archival memory " \
+ "before responding to the human's queries. "

wiki_agent = client.create_agent(
    name="wiki_agent", 
    memory=ChatMemory(
        human="Name: Sarah. Occupation: Biology PhD", 
        persona=wiki_persona
    )
)

MemGPT.memgpt.server.server - INFO - Created new agent from config: <memgpt.agent.Agent object at 0x14be2e960>


In [18]:
client.attach_source_to_agent(agent_id=wiki_agent.id, source_id=source.id)

MemGPT.memgpt.server.server - INFO - Grabbing agent user_id=user-552dee3c-baaf-443a-9d23-8bb54f4af964 agent_id=agent-897ef46b-2682-4d79-be8a-3ad0250ee084 from database
MemGPT.memgpt.server.server - INFO - Creating an agent object
MemGPT.memgpt.server.server - INFO - Adding agent to the agent cache: user_id=user-552dee3c-baaf-443a-9d23-8bb54f4af964, agent_id=agent-897ef46b-2682-4d79-be8a-3ad0250ee084


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  4.74it/s]


In [20]:
response = client.send_message(
    agent_id=wiki_agent.id, 
    message="what is the role of memory? search archival again", 
    role="user"
)
response.messages

[Message(id='message-f44014c3-f5aa-46b3-b64e-fbdd6e5550b6', role=<MessageRole.assistant: 'assistant'>, text="Let's search the archival memory to see what information I have that could help with this question.", user_id='user-552dee3c-baaf-443a-9d23-8bb54f4af964', agent_id='agent-897ef46b-2682-4d79-be8a-3ad0250ee084', model='gpt-4', name=None, created_at=datetime.datetime(2024, 9, 3, 22, 10, 19, 834760, tzinfo=datetime.timezone.utc), tool_calls=[ToolCall(id='f9389727-5b10-4e92-bf6b-6f74c', type='function', function=ToolCallFunction(name='archival_memory_search', arguments='{\n  "query": "role of memory",\n  "request_heartbeat": true\n}'))], tool_call_id=None),
 Message(id='message-b9c79d85-5081-4202-9d9c-5ad56c13baae', role=<MessageRole.tool: 'tool'>, text='{\n  "status": "OK",\n  "message": "Showing 5 of 5 results (page 0/0): [\\n  \\"timestamp: 2024-09-03 03:10:20 PM PDT-0700, memory: as a first kiss, first day of school\\\\nor first time winning a championship. These are key events i

## Connecting to external data via tools
In the last section, we went over how to store data inside of MemGPT's archival memory. However in many cases, it can be easier to simply connect a MemGPT agent to access an external data source directly via a tool. 

Lets define a function that pretends to access an external database to lookup someone's birthday. 

In [21]:
def query_birthday_db(self, name: str): 
    """
    This tool queries an external database to 
    lookup the birthday of someone given their name.

    Args: 
        name (str): The name to look up 

    Returns: 
        birthday (str): The birthday in mm-dd-yyyy format
    
    """
    my_fake_data = {
        "bob": "03-06-1997", 
        "sarah": "03-06-1997"
    } 
    name = name.lower() 
    if name not in my_fake_data: 
        return None
    else: 
        return my_fake_data[name]

### Adding a custom tool to MemGPT 
We can access this external data via an agent by adding the function as a tool to MemGPT. 

In [23]:
tool = client.create_tool(query_birthday_db, tags=["extras"])
tool

Tool(description=None, source_type='python', module=None, user_id='user-552dee3c-baaf-443a-9d23-8bb54f4af964', id='tool-7559f3f1-e988-4363-a1dd-2dfff8d91a64', name='query_birthday_db', tags=['extras'], source_code='def query_birthday_db(self, name: str): \n    """\n    This tool queries an external database to \n    lookup the birthday of someone given their name.\n\n    Args: \n        name (str): The name to look up \n\n    Returns: \n        birthday (str): The birthday in mm-dd-yyyy format\n\n    """\n    my_fake_data = {\n        "bob": "03-06-1997", \n        "sarah": "03-06-1997"\n    } \n    name = name.lower() \n    if name not in my_fake_data: \n        return None\n    else: \n        return my_fake_data[name]\n', json_schema={'name': 'query_birthday_db', 'description': 'This tool queries an external database to ', 'parameters': {'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name to look up '}, 'request_heartbeat': {'type': 'boolean', 'descr

We can include the tool name as an extra tool when creating an agent: 

In [25]:
agent_state = client.create_agent(
    name="birthday_agent", 
    tools=[tool.name], 
    memory=ChatMemory(
        human="My name is Sarah", 
        persona="You are a agent with access to a birthday_db " \
        + "that you use to lookup information about users' birthdays."
    )
)

MemGPT.memgpt.server.server - INFO - Created new agent from config: <memgpt.agent.Agent object at 0x14c0c79e0>


In [26]:
response = client.send_message(
    agent_id=agent_state.id, 
    message = "When is my birthday?", 
    role = "user"
) 
pprint(response.messages)

MemGPT.memgpt.server.server - INFO - Grabbing agent user_id=user-552dee3c-baaf-443a-9d23-8bb54f4af964 agent_id=agent-f207e43b-2021-45be-9dde-48822c898e77 from database
MemGPT.memgpt.server.server - INFO - Creating an agent object
MemGPT.memgpt.server.server - INFO - Adding agent to the agent cache: user_id=user-552dee3c-baaf-443a-9d23-8bb54f4af964, agent_id=agent-f207e43b-2021-45be-9dde-48822c898e77
[Message(id='message-d9b432de-2bb6-4c85-8bb9-a31067e271fc', role=<MessageRole.assistant: 'assistant'>, text="Let's access the birthday_db and find out Sarah's birthday.", user_id='user-552dee3c-baaf-443a-9d23-8bb54f4af964', agent_id='agent-f207e43b-2021-45be-9dde-48822c898e77', model='gpt-4', name=None, created_at=datetime.datetime(2024, 9, 3, 22, 11, 24, 961893, tzinfo=datetime.timezone.utc), tool_calls=[ToolCall(id='cad6f053-27d7-4281-a04b-05a57', type='function', function=ToolCallFunction(name='query_birthday_db', arguments='{\n  "name": "Sarah",\n  "request_heartbeat": true\n}'))], tool