In [1]:
from tqdm.auto import tqdm
from humemai.janusgraph import Humemai
from humemai.utils import disable_logger

disable_logger()

humemai = Humemai()
humemai.start_containers(warmup_seconds=10)
humemai.connect()
humemai.remove_all_data()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import re
import json
from pprint import pprint

# Open the file in read mode
with open('example.txt', 'r') as file:
    # Read the contents of the file
    content = file.read()

# Split the text into chunks based on speaker
# Match lines that start with a speaker's name followed by a colon
chunks = re.findall(r'^.*?:.*$', content, re.MULTILINE)

# Clean up the chunks by stripping unnecessary spaces
chunks = [chunk.strip() for chunk in chunks]

# Print the chunks
for chunk in chunks:
    print(chunk)


Alice: Bob, did you take my office keys?
Bob: What? No, I don’t even know where you keep them.
Alice: They were on my desk this morning, and now they’re gone.
Bob: Maybe you misplaced them?
HumemAI: Misplacing keys disrupts workflow. This is a serious matter.
Bob: Wait, where did you come from?
HumemAI: I’ve been observing. Lost keys cause unnecessary delays.
Charlie: What’s going on?
Alice: My keys are missing, and Bob might’ve taken them.
Bob: I didn’t take them! Why is it always me?
HumemAI: Denial is common, but let’s focus on solving the issue.
Grace: Did anyone else see the keys?
Charlie: Not me. But has anyone checked the break room? Sometimes people leave things there.
Alice: Why would I leave my keys in the break room?
HumemAI: It’s possible under stress. Humans are fallible.
Grace: Alice, retrace your steps. Where were you after arriving?
Alice: Okay… I came in, put my bag down, grabbed coffee, then…
HumemAI: Then what? The next step is critical.
Alice: Oh! I went to the prin

In [3]:
import torch
import transformers


def get_pipeline(
    model: str = "meta-llama/Llama-3.2-1B-Instruct",
    device: str = "cpu",
    quantization: str = "16bit",
) -> transformers.Pipeline:
    """Get a text generation pipeline with the specified device and quantization.

    Args:
        model (str): The model to use for text generation. 
            Defaults to "meta-llama/Llama-3.2-1B-Instruct".
            meta-llama/Llama-3.2-3B-Instruct, 
            meta-llama/Llama-3.1-8B-Instruct
            ...

            are also available.
        device (str): The device to run the pipeline on. Defaults to "cpu".
        quantization (str): The quantization to apply to the model. Defaults to "16bit".

    Returns:
        transformers.Pipeline: The text generation pipeline.

    """

    if quantization == "16bit":
        quantization_config = None
    elif quantization == "8bit":
        quantization_config = {"load_in_8bit": True}
    elif quantization == "4bit":
        quantization_config = {"load_in_4bit": True}

    return transformers.pipeline(
        "text-generation",
        model="meta-llama/Llama-3.2-1B-Instruct",
        model_kwargs={
            "torch_dtype": torch.bfloat16,
            "quantization_config": quantization_config,
        },
        device_map=device,
    )

In [4]:
def generate_prompt(history: str, next_text: str) -> list[dict]:
    """
    Generate the prompt for the AI assistant to convert text to a knowledge graph.

    Args:
        history (str): The history of the knowledge graph extracted so far.
        next_text (str): The new text to convert into a knowledge graph.

    Returns:
        list[dict]: A structured prompt for the AI assistant to build a knowledge graph.
    """
    prompt = [
        {
            "role": "system",
            "content": """
You are an AI assistant that builds knowledge graphs from text. 
For each input, you extract entities and relationships from the provided text 
and convert them into a structured JSON-based knowledge graph.

**Important:** You should extract entities and relations from the new text provided.
If the new text provides updated information about existing entities or relations 
(e.g., age change, new attributes), you should output these entities and relations 
again with the updated information. Do not include entities or relations from the 
previous history that have not changed.

You may use the history to understand context and disambiguate entities.

Your output must follow this JSON format:

```json
{
  "entities": [
    {"label": "Entity1", "type": "Type1"},
    {"label": "Entity2", "type": "Type2"}
  ],
  "relations": [
    {"source": "Entity1", "target": "Entity2", "relation": "RelationName"}
  ]
}
```

Each entity must have a unique label and type (e.g., "Person", "Company", "Object",
"Event"). Relations must specify:

- `source`: the label of the originating entity,
- `target`: the label of the connected entity,
- `relation`: the relationship type between the source and target.

## Example:

### Previous Knowledge Graph History:

```json
{
  "entities": [
    {"label": "Sarah", "properties": {"type": "Person"}},
    {"label": "InnovateAI", "properties": {"type": "Company"}}
  ],
  "relations": [
    {"source": "Sarah", "target": "InnovateAI", "relation": "works_at"}
  ]
}

```

### New Text to Process:

"Sarah, now 30 years old, was promoted to Senior Data Scientist at InnovateAI."

### Output Knowledge Graph:

```json
{
  "entities": [
    {"label": "Sarah", "properties": {"type": "Person", "age": 30}},
    {"label": "InnovateAI", "properties": {"type": "Company"}},
    {"label": "Senior Data Scientist", "properties": {"type": "Position"}}
  ],
  "relations": [
    {"source": "Sarah", "target": "InnovateAI", "relation": "works_at"},
    {"source": "Sarah", "target": "Senior Data Scientist", "relation": "holds_position"}
  ]
}
````
Note that even though "Sarah" and "InnovateAI" were already in the history, we included
"Sarah" again with the updated age and added new relations based on the new information.


## Detailed Instructions:

- Extract entities and relations from the new text provided.
- If the new text provides updated information about existing entities or relations, include these in your output.
- Do not include entities or relations from the history that have not changed.
- Use the history for context and to disambiguate entities.
- Ensure the output adheres strictly to the JSON format specified. """,
        },
        {
            "role": "user",
            "content": f"Here is the knowledge graph extracted so far: {history}",
        },
        {
            "role": "user",
            "content": f"Here is the new text to process and incorporate: {next_text}",
        },
    ]

    return prompt

In [5]:
pipeline = get_pipeline("meta-llama/Llama-3.2-1B-Instruct", "cuda", "16bit")

history = {"entities": [], "relations": []}

for chunk in tqdm(chunks):

    outputs = pipeline(
        generate_prompt(history, chunk),
        max_new_tokens=1024,
    )
    text_content = outputs[0]["generated_text"][-1]["content"]

    json_match = re.search(r"```json\n(.*?)\n```", text_content, re.DOTALL)

    try:
        json_text = json_match.group(1)  # Extract JSON content
        dict_graph = json.loads(json_text)

        # Write short term vertices
        for entity in dict_graph["entities"]:
            vertex = humemai.write_short_term_vertex(
                label=entity["label"], properties=entity["properties"]
            )

        # Write short term edges
        for relation in dict_graph["relations"]:
            head_label = relation["source"]
            head_vertex = humemai.find_vertex_by_label(head_label)[0]
            edge_label = relation["relation"]
            tail_label = relation["target"]
            tail_vertex = humemai.find_vertex_by_label(tail_label)[0]

            edge = humemai.write_short_term_edge(
                head_vertex=head_vertex,
                edge_label=edge_label,
                tail_vertex=tail_vertex,
            )

        short_term_vertices, long_term_vertices, short_term_edges, long_term_edges = (
            humemai.get_working_vertices_and_edges(
                short_term_vertices=humemai.get_all_short_term_vertices(),
                short_term_edges=humemai.get_all_short_term_edges(),
                include_all_long_term=False,
                hops=2,
            )
        )

        # move to the long-term memory
        for vertex in short_term_vertices:
            humemai.move_short_term_vertex(vertex, "episodic")

        for edge in short_term_edges:
            humemai.move_short_term_edge(edge, "episodic")

        # remove all short term vertices and edges
        humemai.remove_all_short_term()

        entities = []
        for vertex in long_term_vertices:
            entities.append(
                {
                    "label": vertex.label,
                    "properties": {
                        key: val
                        for key, val in humemai.get_vertex_properties(vertex).items()
                        if key not in ["num_recalled", "event_time", "known_since"]
                    },
                }
            )

        relations = []
        for edge in long_term_edges:
            relations.append(
                {
                    "source": edge.outV.label,
                    "relation": edge.label,
                    "target": edge.inV.label,
                }
            )
        history = {"entities": entities, "relations": relations}
        print("history: ", history)
    except Exception as e:
        print(e)
    print()

  0%|          | 0/26 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  4%|▍         | 1/26 [00:03<01:15,  3.03s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [], 'relations': []}



  8%|▊         | 2/26 [00:03<00:34,  1.42s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'NoneType' object has no attribute 'group'



 12%|█▏        | 3/26 [00:05<00:43,  1.89s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [{'label': 'Bob', 'properties': {'type': 'Person'}}], 'relations': [{'source': 'Alice', 'relation': 'took', 'target': 'Bob'}]}



 15%|█▌        | 4/26 [00:09<00:54,  2.49s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [{'label': 'Desk', 'properties': {'type': 'Object'}}], 'relations': [{'source': 'Alice', 'relation': 'left', 'target': 'Desk'}]}



 19%|█▉        | 5/26 [00:14<01:10,  3.33s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [{'label': 'Bob', 'properties': {'type': 'Person'}}], 'relations': [{'source': 'Alice', 'relation': 'took', 'target': 'Bob'}, {'source': 'Alice', 'relation': 'left', 'target': 'Desk'}]}



 23%|██▎       | 6/26 [00:16<01:00,  3.01s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


list index out of range



 27%|██▋       | 7/26 [00:18<00:54,  2.86s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'relations'



 31%|███       | 8/26 [00:23<01:00,  3.37s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [{'label': 'Desk', 'properties': {'type': 'Object'}}], 'relations': [{'source': 'Alice', 'relation': 'took', 'target': 'Bob'}, {'source': 'Alice', 'relation': 'left', 'target': 'Desk'}, {'source': 'Desk', 'relation': 'owns', 'target': 'Alice'}]}



 35%|███▍      | 9/26 [00:27<00:59,  3.49s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [], 'relations': [{'source': 'Alice', 'relation': 'took', 'target': 'Bob'}, {'source': 'Alice', 'relation': 'left', 'target': 'Desk'}]}



 38%|███▊      | 10/26 [00:31<00:59,  3.73s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [{'label': 'Keys', 'properties': {'type': 'Object'}}, {'label': 'Desk', 'properties': {'type': 'Object'}}], 'relations': [{'source': 'Bob', 'relation': 'took', 'target': 'Keys'}, {'source': 'Alice', 'relation': 'took', 'target': 'Bob'}, {'source': 'Alice', 'relation': 'has_key', 'target': 'Keys'}, {'source': 'Alice', 'relation': 'left', 'target': 'Desk'}, {'source': 'Desk', 'relation': 'owns', 'target': 'Alice'}]}



 42%|████▏     | 11/26 [00:35<00:57,  3.83s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [{'label': 'Bob', 'properties': {'type': 'Person'}}, {'label': 'Alice', 'properties': {'type': 'Person', 'age': 0}}, {'label': 'HumemAI', 'properties': {'type': 'Company'}}], 'relations': [{'source': 'Bob', 'relation': 'took', 'target': 'Desk'}, {'source': 'Bob', 'relation': 'left', 'target': 'Desk'}]}



 46%|████▌     | 12/26 [00:38<00:49,  3.53s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [], 'relations': []}



 50%|█████     | 13/26 [00:41<00:45,  3.53s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [{'label': "What's", 'properties': {'type': 'Question'}}], 'relations': [{'source': 'Charlie', 'relation': 'asks', 'target': "What's"}, {'source': 'Charlie', 'relation': 'speaks', 'target': "What's"}, {'source': "What's", 'relation': 'about', 'target': 'Charlie'}, {'source': 'Charlie', 'relation': 'tells', 'target': "What's"}]}



 54%|█████▍    | 14/26 [00:50<01:01,  5.12s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


list index out of range



 58%|█████▊    | 15/26 [00:54<00:53,  4.87s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'target'



 62%|██████▏   | 16/26 [01:00<00:49,  4.96s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [{'label': 'Grace', 'properties': {'type': 'Person'}}, {'label': 'Bob', 'properties': {'type': 'Person'}}, {'label': 'Desk', 'properties': {'type': 'Object'}}, {'label': 'Keys', 'properties': {'type': 'Object'}}, {'label': 'break room', 'properties': {'type': 'Location'}}, {'label': 'HumemAI', 'properties': {'type': 'Company'}}], 'relations': [{'source': 'Grace', 'relation': 'mentions', 'target': 'keys'}, {'source': 'Grace', 'relation': 'sees', 'target': 'keys'}, {'source': 'Alice', 'relation': 'took', 'target': 'Bob'}, {'source': 'Alice', 'relation': 'left', 'target': 'Desk'}, {'source': 'Desk', 'relation': 'owns', 'target': 'Alice'}, {'source': 'Alice', 'relation': 'has_key', 'target': 'Keys'}, {'source': 'Charlie', 'relation': 'asks', 'target': "What's"}, {'source': 'Charlie', 'relation': 'speaks', 'target': "What's"}, {'source': "What's", 'relation': 'about', 'target': 'Charlie'}, {'source': 'Charlie', 'relation': 'tells', 'target': "What's"}, {'source': 'Cha

 65%|██████▌   | 17/26 [01:05<00:45,  5.09s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


list index out of range



 69%|██████▉   | 18/26 [01:05<00:29,  3.68s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'NoneType' object has no attribute 'group'



 73%|███████▎  | 19/26 [01:09<00:26,  3.78s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


list index out of range



 77%|███████▋  | 20/26 [01:17<00:29,  4.98s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [{'label': 'keys', 'properties': {'attributes': {'description': 'a set of keys'}, 'type': 'Noun'}}, {'label': 'Why', 'properties': {'type': 'Question'}}, {'label': 'Charlie', 'properties': {'type': 'Person'}}, {'label': "What's", 'properties': {'type': 'Question'}}], 'relations': [{'source': 'Alice', 'relation': 'leaves', 'target': 'Why'}, {'source': 'Alice', 'relation': 'retraces', 'target': 'Charlie'}, {'source': 'Why', 'relation': 'asks', 'target': 'Alice'}, {'source': "What's", 'relation': 'asks', 'target': 'Alice'}, {'source': 'Alice', 'relation': 'speaks', 'target': 'Why'}, {'source': 'Why', 'relation': 'about', 'target': 'Alice'}, {'source': 'Alice', 'relation': 'tells', 'target': 'Why'}, {'source': 'Grace', 'relation': 'mentions', 'target': 'keys'}, {'source': 'Grace', 'relation': 'sees', 'target': 'keys'}, {'source': 'Charlie', 'relation': 'tells', 'target': "What's"}]}



 81%|████████  | 21/26 [01:24<00:27,  5.44s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [{'label': 'Why', 'properties': {'type': 'Question'}}, {'label': 'Charlie', 'properties': {'type': 'Person'}}, {'label': 'Okay', 'properties': {'type': 'Speech'}}, {'label': 'in', 'properties': {'type': 'Preposition'}}, {'label': 'Bob', 'properties': {'type': 'Person'}}, {'label': "What's", 'properties': {'type': 'Question'}}, {'label': 'Desk', 'properties': {'type': 'Object'}}, {'label': 'Keys', 'properties': {'type': 'Object'}}, {'label': 'Grace', 'properties': {'type': 'Person'}}, {'label': 'break room', 'properties': {'type': 'Location'}}, {'label': 'HumemAI', 'properties': {'type': 'Company'}}], 'relations': [{'source': 'Grace', 'relation': 'mentions', 'target': 'keys'}, {'source': 'Grace', 'relation': 'sees', 'target': 'keys'}, {'source': 'Alice', 'relation': 'leaves', 'target': 'Why'}, {'source': 'Alice', 'relation': 'retraces', 'target': 'Charlie'}, {'source': 'Alice', 'relation': 'said', 'target': 'Okay'}, {'source': 'Alice', 'relation': 'came', 'target'

 85%|████████▍ | 22/26 [01:30<00:23,  5.80s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [{'label': 'Keys', 'properties': {'type': 'Object'}}, {'label': 'Alice', 'properties': {'type': 'Person', 'age': 0}}, {'label': 'HumemAI', 'properties': {'type': 'Company'}}, {'label': 'Grace', 'properties': {'type': 'Person'}}, {'label': 'Desk', 'properties': {'type': 'Object'}}, {'label': 'Charlie', 'properties': {'type': 'Person'}}, {'label': 'Okay', 'properties': {'type': 'Speech'}}, {'label': 'in', 'properties': {'type': 'Preposition'}}, {'label': 'keys', 'properties': {'attributes': {'description': 'a set of keys'}, 'type': 'Noun'}}, {'label': 'break room', 'properties': {'type': 'Location'}}], 'relations': [{'source': 'Bob', 'relation': 'took', 'target': 'Keys'}, {'source': 'Bob', 'relation': 'took', 'target': 'Desk'}, {'source': 'Alice', 'relation': 'took', 'target': 'Bob'}, {'source': 'Bob', 'relation': 'left', 'target': 'Desk'}, {'source': 'Alice', 'relation': 'has_key', 'target': 'Keys'}, {'source': 'HumemAI', 'relation': 'focuses_on', 'target': 'Keys'

 88%|████████▊ | 23/26 [01:35<00:16,  5.34s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


history:  {'entities': [{'label': 'Bob', 'properties': {'type': 'Person'}}, {'label': 'Alice', 'properties': {'type': 'Person', 'age': 0}}, {'label': 'Grace', 'properties': {'type': 'Person'}}, {'label': 'Desk', 'properties': {'type': 'Object'}}, {'label': 'Why', 'properties': {'type': 'Question'}}, {'label': 'See', 'properties': {'type': 'Verb'}}, {'label': 'Charlie', 'properties': {'type': 'Person'}}, {'label': 'Okay', 'properties': {'type': 'Speech'}}, {'label': 'in', 'properties': {'type': 'Preposition'}}, {'label': 'keys', 'properties': {'attributes': {'description': 'a set of keys'}, 'type': 'Noun'}}, {'label': "What's", 'properties': {'type': 'Question'}}], 'relations': [{'source': 'HumemAI', 'relation': 'focuses_on', 'target': 'Keys'}, {'source': 'Bob', 'relation': 'took', 'target': 'Keys'}, {'source': 'Alice', 'relation': 'has_key', 'target': 'Keys'}, {'source': 'Grace', 'relation': 'mentions', 'target': 'Keys'}, {'source': 'Grace', 'relation': 'sees', 'target': 'Keys'}, {'sou

 92%|█████████▏| 24/26 [01:49<00:16,  8.14s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'NoneType' object has no attribute 'group'



 96%|█████████▌| 25/26 [02:04<00:10, 10.11s/it]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'NoneType' object has no attribute 'group'



100%|██████████| 26/26 [02:15<00:00,  5.20s/it]

list index out of range






In [12]:
for edge in humemai.get_all_long_term_edges():
    print(
        f"{edge.outV.label} --{edge.label}--> {edge.inV.label} | Properties: {humemai.get_edge_properties(edge)}"
    )

HumemAI --averted--> Crisis | Properties: {'event_time': ['2024-11-20T14:46:35'], 'num_recalled': 0}
HumemAI --retracing--> steps | Properties: {'num_recalled': 0, 'event_time': ['2024-11-20T14:46:35']}
HumemAI --resolution--> Resolution | Properties: {'num_recalled': 0, 'event_time': ['2024-11-20T14:46:35']}
HumemAI --focuses_on--> Keys | Properties: {'num_recalled': 4, 'event_time': ['2024-11-20T14:46:35', '2024-11-20T14:46:35']}
Grace --mentions--> keys | Properties: {'num_recalled': 5, 'event_time': ['2024-11-20T14:46:35']}
Grace --mentions--> Keys | Properties: {'event_time': ['2024-11-20T14:46:35'], 'num_recalled': 3}
Grace --sees--> keys | Properties: {'num_recalled': 5, 'event_time': ['2024-11-20T14:46:35']}
Grace --sees--> Keys | Properties: {'event_time': ['2024-11-20T14:46:35'], 'num_recalled': 3}
Bob --took--> Keys | Properties: {'event_time': ['2024-11-20T14:46:35', '2024-11-20T14:46:35', '2024-11-20T14:46:35'], 'num_recalled': 5}
Bob --took--> Desk | Properties: {'num_rec