In [None]:
from rdflib import BNode, Graph, Literal, Namespace, URIRef
import logging
from agent import ShortTermAgent, LongTermAgent
import itertools
import multiprocessing
import random

# Disable logging
logging.getLogger().setLevel(logging.CRITICAL)


def run_experiment(params):
    seed, room_size, qa_policy, explore_policy, mm_policy, max_memory = params
    print(
        f"Seed: {seed}, Room size: {room_size}, QA: {qa_policy}, Explore: {explore_policy}, "
        f"MM: {mm_policy}, Max memory: {max_memory}"
    )

    agent = LongTermAgent(
        env_config={
            "question_prob": 1.0,
            "seed": seed,
            "terminates_at": 99,
            "randomize_observations": "all",
            "room_size": room_size,
            "rewards": {"correct": 1, "wrong": 0, "partial": 0},
            "make_everything_static": False,
            "num_total_questions": 1000,
            "question_interval": 1,
            "include_walls_in_observations": True,
            "deterministic_objects": False,
        },
        qa_policy=qa_policy,
        explore_policy=explore_policy,
        mm_policy=mm_policy,
        max_long_term_memory_size=max_memory,
        num_samples_for_results=10,
        default_root_dir="./foo/",
    )
    agent.test()


if __name__ == "__main__":
    seeds = [0, 1, 2, 3, 4]
    room_sizes = [
        "xxl-different-prob",
        # "xl-different-prob",
        # "l-different-prob",
        # "m-different-prob",
        # "s-different-prob",
        # "xs-different-prob",
    ]
    qa_policies = ["latest"]
    explore_policies = ["dijkstra", "bfs", "avoid_walls"]
    mm_policies = ["fifo", "lru", "lfu", "lru+lfu", "random"]
    max_memories = [256]

    all_combinations = list(
        itertools.product(
            seeds, room_sizes, qa_policies, explore_policies, mm_policies, max_memories
        )
    )
    random.shuffle(all_combinations)

    num_processes = multiprocessing.cpu_count()  # or choose a fixed number
    # num_processes = 1

    with multiprocessing.Pool(num_processes) as pool:
        pool.map(run_experiment, all_combinations)

In [6]:
import yaml
from glob import glob
from tqdm.auto import tqdm
from collections import defaultdict
import numpy as np
import pandas as pd

pd.set_option("display.max_rows", None)

# Collect results grouped by config (excluding seed)
results_by_config = defaultdict(list)

for path in tqdm(glob("./foo/*/results.yaml")):
    with open(path, "r") as f:
        results = yaml.safe_load(f)

    test_mean = results["test_score"]["mean"]

    with open(path.replace("results.yaml", "train.yaml")) as f:
        hp = yaml.safe_load(f)

    room_size = hp["env_config"]["room_size"]
    qa_policy = hp["qa_policy"]
    explore_policy = hp["explore_policy"]
    mm_policy = hp.get("mm_policy", None)
    memory_size = hp.get("max_long_term_memory_size", None)

    config_key = (room_size, qa_policy, explore_policy, mm_policy, memory_size)
    results_by_config[config_key].append(test_mean)

# Build a DataFrame from the aggregated results
records = []
for config, scores in sorted(results_by_config.items()):
    room_size, qa_policy, explore_policy, mm_policy, memory_size = config
    records.append(
        {
            "room_size": room_size,
            "mean_score": np.mean(scores),
            "std_score": np.std(scores),
            "qa_policy": qa_policy,
            "explore_policy": explore_policy,
            "mm_policy": mm_policy,
            "memory_size": memory_size,
            "n_seeds": len(scores),
        }
    )

df = pd.DataFrame(records)
pd.set_option("display.precision", 4)

# Display each room size section sorted by mean_score
# for room in df["room_size"].unique():
for room in [
    "xs-different-prob",
    "s-different-prob",
    "m-different-prob",
    "l-different-prob",
    "xl-different-prob",
    "xxl-different-prob",
]:
    print(f"\n=== Results for room size: {room} ===\n")
    section = df[df["room_size"] == room].drop(columns="room_size")
    section = section.sort_values(by="mean_score", ascending=False).reset_index(
        drop=True
    )
    display(section)

100%|██████████| 75/75 [00:00<00:00, 1364.18it/s]


=== Results for room size: xs-different-prob ===






Unnamed: 0,mean_score,std_score,qa_policy,explore_policy,mm_policy,memory_size,n_seeds



=== Results for room size: s-different-prob ===



Unnamed: 0,mean_score,std_score,qa_policy,explore_policy,mm_policy,memory_size,n_seeds



=== Results for room size: m-different-prob ===



Unnamed: 0,mean_score,std_score,qa_policy,explore_policy,mm_policy,memory_size,n_seeds



=== Results for room size: l-different-prob ===



Unnamed: 0,mean_score,std_score,qa_policy,explore_policy,mm_policy,memory_size,n_seeds



=== Results for room size: xl-different-prob ===



Unnamed: 0,mean_score,std_score,qa_policy,explore_policy,mm_policy,memory_size,n_seeds



=== Results for room size: xxl-different-prob ===



Unnamed: 0,mean_score,std_score,qa_policy,explore_policy,mm_policy,memory_size,n_seeds
0,378.0,15.3103,latest,bfs,lru,256,5
1,372.12,23.9325,latest,bfs,lru+lfu,256,5
2,372.12,23.9325,latest,bfs,lfu,256,5
3,351.98,32.5242,latest,dijkstra,lfu,256,5
4,351.98,32.5242,latest,dijkstra,lru+lfu,256,5
5,351.76,21.0162,latest,dijkstra,lru,256,5
6,305.12,11.4489,latest,bfs,fifo,256,5
7,304.38,13.7918,latest,bfs,random,256,5
8,299.06,11.3392,latest,dijkstra,fifo,256,5
9,289.6,4.369,latest,dijkstra,random,256,5


In [None]:
import itertools
import multiprocessing
import shutil
from copy import deepcopy
from datetime import datetime, timedelta
import random
from typing import Any

import numpy as np
import gymnasium as gym
from rdflib import XSD, Graph, Literal, Namespace, URIRef

from humemai.rdflib import Humemai

from rdflib import BNode, Graph, Literal, Namespace, URIRef
import logging
from agent import ShortTermAgent

ns = Namespace("https://humem.ai/ontology#")

# Disable logging
logging.getLogger().setLevel(logging.CRITICAL)


def run_short_term_experiment(params: tuple[int, str, str, str]) -> None:
    seed, room_size, qa_policy, explore_policy = params

    print(
        f"Seed: {seed}, Room size: {room_size}, QA policy: {qa_policy}, "
        f"Explore policy: {explore_policy}"
    )

    agent = ShortTermAgent(
        env_config={
            "question_prob": 1.0,
            "seed": seed,
            "terminates_at": 99,
            "randomize_observations": "all",
            "room_size": room_size,
            "rewards": {"correct": 1, "wrong": 0, "partial": 0},
            "make_everything_static": False,
            "num_total_questions": 1000,
            "question_interval": 1,
            "include_walls_in_observations": True,
            "deterministic_objects": False,
        },
        qa_policy=qa_policy,
        explore_policy=explore_policy,
        num_samples_for_results=10,
    )

    agent.test()


if __name__ == "__main__":
    seeds = [0, 1, 2, 3, 4]
    room_sizes = ["m-different-prob", "s-different-prob", "xs-different-prob"]
    qa_policies = ["one_hop"]
    explore_policies = ["avoid_walls"]

    all_combinations = list(
        itertools.product(seeds, room_sizes, qa_policies, explore_policies)
    )

    num_processes = multiprocessing.cpu_count()  # or manually set, e.g., 8
    with multiprocessing.Pool(num_processes) as pool:
        pool.map(run_short_term_experiment, all_combinations)

In [None]:
from rdflib import BNode, Graph, Literal, Namespace, URIRef
import logging
from agent import ShortTermAgent, LongTermAgent
import itertools
import multiprocessing

# Disable logging
logging.getLogger().setLevel(logging.CRITICAL)


def run_experiment(params):
    seed, room_size, qa_policy, explore_policy, mm_policy, max_memory = params
    print(
        f"Seed: {seed}, Room size: {room_size}, QA: {qa_policy}, Explore: {explore_policy}, "
        f"MM: {mm_policy}, Max memory: {max_memory}"
    )

    agent = LongTermAgent(
        env_config={
            "question_prob": 1.0,
            "seed": seed,
            "terminates_at": 99,
            "randomize_observations": "all",
            "room_size": room_size,
            "rewards": {"correct": 1, "wrong": 0, "partial": 0},
            "make_everything_static": False,
            "num_total_questions": 1000,
            "question_interval": 1,
            "include_walls_in_observations": True,
            "deterministic_objects": False,
        },
        qa_policy=qa_policy,
        explore_policy=explore_policy,
        mm_policy=mm_policy,
        max_long_term_memory_size=max_memory,
        num_samples_for_results=10,
    )
    agent.test()


if __name__ == "__main__":
    seeds = [0, 1, 2, 3, 4]
    room_sizes = [
        "xxl-different-prob",
        "xl-different-prob",
        "l-different-prob",
        "m-different-prob",
        "s-different-prob",
        "xs-different-prob",
    ]
    qa_policies = ["latest"]
    explore_policies = ["dijkstra", "bfs"]
    mm_policies = ["random"]
    max_memories = [0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]

    all_combinations = list(
        itertools.product(
            seeds, room_sizes, qa_policies, explore_policies, mm_policies, max_memories
        )
    )

    num_processes = multiprocessing.cpu_count()  # or choose a fixed number
    with multiprocessing.Pool(num_processes) as pool:
        pool.map(run_experiment, all_combinations)

In [None]:
agent.humemai.get_raw_triple_count(), agent.humemai.get_main_triple_count(), agent.humemai.get_memory_count()

In [None]:
agent.humemai.get_short_term_memory_count(), agent.humemai.get_long_term_memory_count(), agent.humemai.get_main_triple_count(), agent.humemai.get_raw_triple_count()

In [None]:
agent.humemai.print_raw_triples()

In [None]:
agent.humemai.print_main_triples()

In [None]:
agent.humemai.print_memories()

In [None]:
foo = agent.humemai.graph.query(
    """
PREFIX humemai: <https://humem.ai/ontology#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT (SUM(?recalledValue) AS ?totalRecalled)
WHERE {
  ?statement rdf:type rdf:Statement ;
             humemai:recalled ?recalledValue .
}
"""
)

# SPARQLResult can be iterated as if it were a list of rows
for row in foo:
    print(row)
    # row.totalRecalled corresponds to the variable ?totalRecalled
    total_recalled_value = row.totalRecalled
    print(f"Sum of all recalled values: {total_recalled_value}")

agent.humemai.get_long_term_memory_count()