In [1]:
import sys
import os

# Get the absolute path to the project root directory
root_path = os.path.abspath(os.path.join(os.getcwd(), ".."))

# Add the project root directory to the Python path
if root_path not in sys.path:
    sys.path.insert(0, root_path)

import yaml
from glob import glob
from tqdm.auto import tqdm
from collections import defaultdict
import numpy as np
import pandas as pd

pd.set_option("display.max_rows", None)

# Collect results grouped by config (excluding seed)
results_by_config = defaultdict(list)

for path in tqdm(glob("../training-results/*/results.yaml")):
    with open(path, "r") as f:
        results = yaml.safe_load(f)

    test_mean = results["test_score"]["mean"]

    with open(path.replace("results.yaml", "train.yaml")) as f:
        hp = yaml.safe_load(f)

    room_size = hp["env_config"]["room_size"]
    qa_policy = hp["qa_policy"]

    explore_policy = hp["explore_policy"]

    mm_policy = hp.get("mm_policy", None)

    # if explore_policy != "dijkstra":
    #     # Skip non-dijkstra policies for now
    #     continue

    # if mm_policy.lower() not in ["fifo", "lru", "lfu"]:
    #     # Skip non-standard MM policies for now
    #     continue

    memory_size = hp.get("max_long_term_memory_size", None)

    config_key = (room_size, qa_policy, explore_policy, mm_policy, memory_size)
    results_by_config[config_key].append(test_mean)

# Build a DataFrame from the aggregated results
records = []
for config, scores in sorted(results_by_config.items()):
    room_size, qa_policy, explore_policy, mm_policy, memory_size = config
    records.append(
        {
            "room_size": room_size,
            "mean_score": np.mean(scores),
            "std_score": np.std(scores),
            "qa_policy": qa_policy,
            "explore_policy": explore_policy,
            "mm_policy": mm_policy,
            "memory_size": memory_size,
            "n_seeds": len(scores),
        }
    )

df = pd.DataFrame(records)
pd.set_option("display.precision", 4)

# Display each room size section sorted by mean_score
# for room in df["room_size"].unique():
for room in [
    "xl-different-prob",
    "xxl-different-prob",
]:
    print(f"\n=== Results for room size: {room} ===\n")
    section = df[df["room_size"] == room].drop(columns="room_size")
    section = section.sort_values(by="mean_score", ascending=False).reset_index(
        drop=True
    )
    display(section)
# Export results for each room size to separate JSON files
for room in ["xl-different-prob", "xxl-different-prob"]:
    section = df[df["room_size"] == room].drop(columns="room_size")
    section = section.sort_values(by="mean_score", ascending=False).reset_index(
        drop=True
    )

    # Export to JSON
    filename = f"../data/results_{room}.json"
    section.to_json(filename, orient="records", indent=2)
    print(f"Results for {room} exported to {filename}")

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 5280/5280 [00:08<00:00, 642.10it/s]



=== Results for room size: xl-different-prob ===



Unnamed: 0,mean_score,std_score,qa_policy,explore_policy,mm_policy,memory_size,n_seeds
0,669.6,6.8586,most_frequently_used,dijkstra,lfu,64,5
1,663.8,13.0292,most_frequently_used,dijkstra,fifo,1024,5
2,663.8,13.0292,most_frequently_used,dijkstra,random,1024,5
3,663.8,4.1665,most_frequently_used,dijkstra,lru,512,5
4,663.8,13.0292,most_frequently_used,dijkstra,lru,1024,5
5,663.8,13.0292,most_frequently_used,dijkstra,lfu,1024,5
6,660.8,5.1147,most_frequently_used,bfs,lfu,256,5
7,660.6,14.5272,most_frequently_used,dijkstra,lfu,256,5
8,660.6,5.2764,most_frequently_used,dijkstra,lfu,512,5
9,660.0,8.0747,most_frequently_used,dijkstra,lfu,128,5



=== Results for room size: xxl-different-prob ===



Unnamed: 0,mean_score,std_score,qa_policy,explore_policy,mm_policy,memory_size,n_seeds
0,355.6,5.7131,most_recently_added,bfs,lru,512,5
1,355.4,4.4091,most_recently_added,bfs,lfu,1024,5
2,355.4,4.4091,most_recently_added,bfs,random,1024,5
3,355.4,4.4091,most_recently_added,bfs,fifo,1024,5
4,355.4,4.4091,most_recently_added,bfs,lru,1024,5
5,354.8,3.9699,most_recently_added,dijkstra,lfu,1024,5
6,354.8,3.9699,most_recently_added,dijkstra,random,1024,5
7,354.8,3.9699,most_recently_added,dijkstra,fifo,1024,5
8,354.8,3.9699,most_recently_added,dijkstra,lru,1024,5
9,354.6,8.1633,most_recently_added,dijkstra,lru,512,5


Results for xl-different-prob exported to ../data/results_xl-different-prob.json
Results for xxl-different-prob exported to ../data/results_xxl-different-prob.json


In [2]:
import logging
from agent import LongTermAgent

# Disable logging
logging.getLogger().setLevel(logging.CRITICAL)


agent = LongTermAgent(
    env_config={
        "question_prob": 1.0,
        "seed": 0,
        "terminates_at": 99,
        "randomize_observations": "all",
        "room_size": "xl-different-prob",
        "rewards": {"correct": 1, "wrong": 0, "partial": 0},
        "make_everything_static": False,
        "num_total_questions": 1000,
        "question_interval": 1,
        "include_walls_in_observations": True,
        "deterministic_objects": False,
    },
    qa_policy="most_frequently_used",
    explore_policy="dijkstra",
    mm_policy="lfu",
    max_long_term_memory_size=32,
    num_samples_for_results=1,
    default_root_dir="./foo/",
    save_results=False,
)
agent.test()

  logger.deprecation(
  logger.deprecation(
  logger.warn(f"{pre} should be an int or np.int64, actual type: {type(obs)}")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} should be an int or np.int64, actual type: {type(obs)}")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(


Episode 1/1 completed. Score: 566.00
Results: {'test_score': {'mean': 566.0, 'std': 0.0, 'min': 566.0, 'max': 566.0}, 'num_episodes': 1}


{'test_score': {'mean': 566.0, 'std': 0.0, 'min': 566.0, 'max': 566.0},
 'num_episodes': 1}

In [3]:
agent.humemai.get_raw_triple_count(), agent.humemai.get_main_triple_count(), agent.humemai.get_memory_count()

(319, 27, 38)

In [4]:
agent.humemai.get_short_term_memory_count(), agent.humemai.get_long_term_memory_count(), agent.humemai.get_main_triple_count(), agent.humemai.get_raw_triple_count()

(6, 32, 27, 319)

In [5]:
agent.humemai.print_raw_triples()

(ind_002, at_location, room_024)
(N143787818f1e47468ce51c07825b1f3b, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement)
(N4287153e514b4357aa8b02ed3ad3d8bb, http://www.w3.org/1999/02/22-rdf-syntax-ns#object, room_006)
(N76325c62ec874ee3b85a26264fcc4450, https://humem.ai/ontology#memoryID, 1064)
(Nadabbdbb3a0c49b392dfe81cc7c38bca, https://humem.ai/ontology#time_added, 2025-01-04T00:00:00)
(Nbb767a48ea454917bf00b4fee05b5859, https://humem.ai/ontology#memoryID, 69)
(N6195024f8af24ffaa08d18f032f55ba6, https://humem.ai/ontology#time_added, 2025-01-17T00:00:00)
(Nb3fa1b31a86b4599b01b8e1f1a4e8be3, https://humem.ai/ontology#memoryID, 691)
(Nb2584cf7f905493db33d76df6521b573, https://humem.ai/ontology#num_recalled, 2)
(N43964baa4e9344aabbe7640cf07e0cd1, http://www.w3.org/1999/02/22-rdf-syntax-ns#object, room_024)
(N11c480a89066454d941e0039fc7f71c8, https://humem.ai/ontology#time_added, 2025-01-30T00:00:00)
(N43964baa4e9344aabbe7640cf07e0cd1, htt

In [6]:
agent.humemai.print_main_triples()

(ind_002, at_location, room_024)
(ind_004, at_location, room_023)
(room_023, south, room_030)
(room_018, west, wall)
(sta_006, at_location, room_025)
(dep_005, at_location, room_018)
(room_023, east, room_024)
(room_023, west, room_022)
(ind_001, at_location, room_021)
(dep_003, at_location, room_018)
(room_005, east, room_006)
(sta_004, at_location, room_006)
(sta_005, at_location, room_021)
(agent, at_location, room_023)
(room_006, south, room_010)
(ind_005, at_location, room_016)
(room_023, north, room_018)
(sta_002, at_location, room_008)
(room_018, south, room_023)
(dep_001, at_location, room_000)
(sta_003, at_location, room_015)
(sta_000, at_location, room_023)
(dep_005, at_location, room_023)
(dep_002, at_location, room_008)
(ind_007, at_location, room_024)
(dep_007, at_location, room_000)
(room_024, west, room_023)


In [7]:

agent.humemai.print_memories() 

(dep_007, at_location, room_000, {'memoryID': '11', 'num_recalled': '22', 'time_added': '2025-01-01T00:00:00', 'last_accessed': '2025-03-30T00:00:00'})
(sta_004, at_location, room_006, {'memoryID': '45', 'num_recalled': '46', 'time_added': '2025-01-04T00:00:00', 'last_accessed': '2025-04-10T00:00:00'})
(dep_002, at_location, room_008, {'memoryID': '68', 'num_recalled': '103', 'time_added': '2025-01-06T00:00:00', 'last_accessed': '2025-04-10T00:00:00'})
(sta_002, at_location, room_008, {'memoryID': '69', 'num_recalled': '100', 'time_added': '2025-01-06T00:00:00', 'last_accessed': '2025-04-10T00:00:00'})
(sta_005, at_location, room_021, {'memoryID': '110', 'num_recalled': '70', 'time_added': '2025-01-10T00:00:00', 'last_accessed': '2025-04-10T00:00:00'})
(ind_001, at_location, room_021, {'memoryID': '111', 'num_recalled': '95', 'time_added': '2025-01-10T00:00:00', 'last_accessed': '2025-04-10T00:00:00'})
(sta_006, at_location, room_025, {'memoryID': '145', 'num_recalled': '124', 'time_ad