## Collect data and parse data

In [None]:
import logging

logger = logging.getLogger()
logger.disabled = True

from agent import HandcraftedAgent


import os
from glob import glob
from humemai.utils import read_yaml
from collections import defaultdict
import json
import numpy as np
import pandas as pd


num_seeds = 1
for room_size in [
    "xxs",
    "xs",
    "s",
    "m",
    "l",
    "xl",
    "xxl",
]:

    env_str = "room_env:RoomEnv-v2"
    env_config = {
        "question_prob": 1.0,
        "terminates_at": 99,
        "randomize_observations": "objects",
        "room_size": room_size,
        "rewards": {"correct": 1, "wrong": 0, "partial": 0},
        "make_everything_static": False,
        "num_total_questions": 1000,
        "question_interval": 5,
        "include_walls_in_observations": True,
        "deterministic_objects": False,
    }

    if "different-prob" in room_size:
        root_path = f"./training-results/non-equal-object-probs/handcrafted/"
    else:
        root_path = f"./training-results/equal-object-probs/handcrafted/"

    results = []

    for mm_policy in ["handcrafted", "random", "episodic", "semantic"]:
        for capacity_max in [2, 6, 12, 24, 48, 96, 192]:
            if mm_policy == "random" or mm_policy == "handcrafted":
                qa_function_ = [
                    "episodic_semantic",
                    "episodic",
                    "semantic",
                    "random",
                ]
                pretrain_semantic_ = [False, "include_walls", "exclude_walls"]
                capacity = {
                    "episodic": capacity_max // 2,
                    "semantic": capacity_max // 2,
                    "short": 1,
                }
                semantic_decay_factor_ = [0.8, 0.9, 0.99]
            elif mm_policy == "episodic":
                qa_function_ = ["episodic"]
                pretrain_semantic_ = [False]
                capacity = {
                    "episodic": capacity_max,
                    "semantic": 0,
                    "short": 1,
                }
            elif mm_policy == "semantic":
                qa_function_ = ["semantic"]
                capacity = {
                    "episodic": 0,
                    "semantic": capacity_max,
                    "short": 1,
                }
                pretrain_semantic_ = [False, "include_walls", "exclude_walls"]
                semantic_decay_factor_ = [0.8, 0.9, 0.99]

            for explore_policy in ["random", "avoid_walls"]:
                for qa_function in qa_function_:
                    for pretrain_semantic in pretrain_semantic_:
                        for semantic_decay_factor in semantic_decay_factor_:
                            for seed in range(num_seeds):

                                agent = HandcraftedAgent(
                                    env_str=env_str,
                                    env_config={**env_config, "seed": seed},
                                    mm_policy=mm_policy,
                                    qa_function=qa_function,
                                    explore_policy=explore_policy,
                                    num_samples_for_results=10,
                                    capacity=capacity,
                                    pretrain_semantic=pretrain_semantic,
                                    semantic_decay_factor=semantic_decay_factor,
                                    default_root_dir=os.path.join(
                                        root_path,
                                        f"room_size={room_size}/mm_policy={mm_policy}/qa_function={qa_function}/explore_policy={explore_policy}/episodiccapacity={capacity['episodic']}/semanticcapacity={capacity['semantic']}/shortcapacity={capacity['short']}/pretrain_semantic={pretrain_semantic}/semantic_decay_factor={semantic_decay_factor}/",
                                    ),
                                )
                                agent.test()

    def parse_hyper_params_from_path(path):
        """Extracts hyperparameters from the given file path."""
        try:
            path_parts = path.split("/")
            return {
                "mm_policy": path_parts[5].split("=")[-1],
                "qa_function": path_parts[6].split("=")[-1],
                "explore_policy": path_parts[7].split("=")[-1],
                "episodic_capacity": int(path_parts[8].split("=")[-1]),
                "semantic_capacity": int(path_parts[9].split("=")[-1]),
                "long_capacity": int(path_parts[8].split("=")[-1])
                + int(path_parts[9].split("=")[-1]),
                "short_capacity": int(path_parts[10].split("=")[-1]),
                "pretrain_semantic": path_parts[11].split("=")[-1],
                "semantic_decay_factor": float(path_parts[12].split("=")[-1]),
            }
        except (IndexError, ValueError) as e:
            print(f"Error parsing hyperparameters from path {path}: {e}")
            return None

    def load_results(yaml_paths):
        """Loads YAML data from a list of file paths."""
        results = []
        for path in yaml_paths:
            try:
                results.append(read_yaml(path))
            except Exception as e:
                print(f"Error reading YAML file {path}: {e}")
        return results

    results_paths = glob(
        os.path.join(root_path, f"room_size={room_size}/*/*/*/*/*/*/*/*/*/results.yaml")
    )

    print(f"Before grouping: {len(results_paths)} results")

    # Group results by hyperparameters
    grouped_results = defaultdict(list)
    for path in results_paths:
        hyper_params = parse_hyper_params_from_path(path)
        if hyper_params is not None:
            results_data = read_yaml(path)
            hp_tuple = tuple(sorted(hyper_params.items()))
            grouped_results[hp_tuple].append(results_data)

    print(f"After grouping: {len(grouped_results)} results")

    # Simplify results to mean and std of test scores
    simplified_results = []
    for hp_tuple, results in grouped_results.items():
        mean_scores = [result["test_score"]["mean"] for result in results]

        mean_of_means = np.mean(mean_scores)
        std_of_means = np.std(mean_scores)

        simplified_results.append(
            {
                "hyper_params": dict(hp_tuple),
                "results": {
                    "test_mean": mean_of_means.item(),
                    "test_std": std_of_means.item(),
                },
            }
        )
    print(f"After simplifying: {len(simplified_results)} results")

    # filtered results
    filtered_results = []
    for result in simplified_results:
        if result["hyper_params"]["episodic_capacity"] == 0:
            if result["hyper_params"]["mm_policy"] == "episodic":
                continue
            if result["hyper_params"]["qa_function"] == "episodic":
                continue

        if result["hyper_params"]["semantic_capacity"] == 0:
            if result["hyper_params"]["mm_policy"] == "semantic":
                continue
            if result["hyper_params"]["qa_function"] == "semantic":
                continue

        if result["hyper_params"]["mm_policy"] == "episodic":
            if result["hyper_params"]["qa_function"] == "semantic":
                continue
            if result["hyper_params"]["qa_function"] == "random":
                continue
            if result["hyper_params"]["qa_function"] == "episodic_semantic":
                continue
            if result["hyper_params"]["semantic_capacity"] > 0:
                continue

        if result["hyper_params"]["mm_policy"] == "semantic":
            if result["hyper_params"]["qa_function"] == "episodic":
                continue
            if result["hyper_params"]["qa_function"] == "random":
                continue
            if result["hyper_params"]["qa_function"] == "episodic_semantic":
                continue
            if result["hyper_params"]["episodic_capacity"] > 0:
                continue

        if result["hyper_params"]["mm_policy"] == "generalize":
            if result["hyper_params"]["qa_function"] == "episodic":
                continue
            if result["hyper_params"]["qa_function"] == "semantic":
                continue

        if result["hyper_params"]["mm_policy"] == "random":
            if result["hyper_params"]["qa_function"] == "episodic":
                continue
            if result["hyper_params"]["qa_function"] == "semantic":
                continue

        filtered_results.append(result)

    print(f"After filtering: {len(filtered_results)} results")

    # Create a DataFrame from the combined results
    data = []
    for item in filtered_results:
        row = item["hyper_params"]
        row.update(item["results"])
        data.append(row)

    df = pd.DataFrame(data)

    # Define the desired column order
    column_order = [
        "mm_policy",
        "qa_function",
        "explore_policy",
        "pretrain_semantic",
        "long_capacity",
        "episodic_capacity",
        "semantic_capacity",
        "short_capacity",
        "semantic_decay_factor",
        "test_mean",
        "test_std",
    ]

    df = df.sort_values(by=column_order, ascending=True)
    df = df.sort_values(by=["long_capacity", "test_mean"], ascending=[True, False])

    # Reorder the DataFrame columns
    df = df[column_order]

    # Save the DataFrame to a CSV file
    df.to_csv(
        os.path.join(root_path, f"hand-crafted-results-room_size={room_size}.csv"),
        index=False,
    )

    # Confirm the DataFrame is saved by printing the location
    print(os.path.join(root_path, f"hand-crafted-results-room_size={room_size}.csv"))

## Measure the average observations per room, by room size

In [None]:
import logging

logger = logging.getLogger()
logger.disabled = True

import random
import gymnasium as gym
from tqdm.auto import tqdm

for room_size in ["xxs", "xs", "s", "m", "l", "xl", "xxl"]:
    num_obs = []

    env_str = "room_env:RoomEnv-v2"
    env_config = {
        "question_prob": 1.0,
        "terminates_at": 99,
        "randomize_observations": "objects",
        "room_size": room_size,
        "rewards": {"correct": 1, "wrong": 0, "partial": 0},
        "make_everything_static": False,
        "num_total_questions": 1000,
        "question_interval": 5,
        "include_walls_in_observations": True,
        "deterministic_objects": False,
    }

    env = gym.make("room_env:RoomEnv-v2", room_size=room_size)
    observations, info = env.reset()

    while True:
        observations, reward, done, truncated, info = env.step(
            (
                ["random answer"] * len(observations["questions"]),
                random.choice(["north", "east", "south", "west"]),
            )
        )
        num_obs.append(len(observations["room"]))
        if done:
            break
    print(
        f"room_size={room_size}\tnum_obs={sum(num_obs) / len(num_obs)}\t"
        f" max_obs={max(num_obs)}\tmin_obs={min(num_obs)}"
    )