## Collect data

In [None]:
import logging

logger = logging.getLogger()
logger.disabled = True

from agent import HandcraftedAgent

env_str = "room_env:RoomEnv-v2"
env_config = {
    "question_prob": 1.0,
    "terminates_at": 99,
    "randomize_observations": "objects",
    "room_size": "l",
    "rewards": {"correct": 1, "wrong": 0, "partial": 0},
    "make_everything_static": False,
    "num_total_questions": 1000,
    "question_interval": 1,
    "include_walls_in_observations": True,
    "deterministic_objects": False,
}

results = []

for mm_policy in ["random", "episodic", "semantic", "generalize"]:
    for qa_function in ["episodic_semantic", "episodic", "semantic", "random"]:
        for explore_policy in ["random", "avoid_walls"]:
            for capacity in [96]:
                for pretrain_semantic in [False, "include_walls", "exclude_walls"]:
                    for capacity_ in [
                        {"episodic": capacity, "semantic": 0, "short": 1},
                        {"episodic": 0, "semantic": capacity, "short": 1},
                        {
                            "episodic": capacity // 2,
                            "semantic": capacity // 2,
                            "short": 1,
                        },
                    ]:

                        try:
                            for seed in range(5):
                                agent = HandcraftedAgent(
                                    env_str=env_str,
                                    env_config={**env_config, "seed": seed},
                                    mm_policy=mm_policy,
                                    qa_function=qa_function,
                                    explore_policy=explore_policy,
                                    capacity=capacity_,
                                    pretrain_semantic=pretrain_semantic,
                                    default_root_dir=f"./training-results/handcrafted/room_size={env_config['room_size']}/mm_policy={mm_policy}/qa_function={qa_function}/explore_policy={explore_policy}/episodiccapacity={capacity_['episodic']}/semanticcapacity={capacity_['semantic']}/shortcapacity={capacity_['short']}/pretrain_semantic={pretrain_semantic}",
                                )
                                agent.test()
                        except Exception as e:
                            print(e)

## Parse data

In [14]:
import os
from glob import glob
from humemai.utils import read_yaml
from collections import defaultdict
import json
import numpy as np
import pandas as pd


def parse_hyper_params_from_path(path):
    """Extracts hyperparameters from the given file path."""
    try:
        path_parts = path.split("/")
        return {
            "mm_policy": path_parts[4].split("=")[-1],
            "qa_function": path_parts[5].split("=")[-1],
            "explore_policy": path_parts[6].split("=")[-1],
            "episodic_capacity": int(path_parts[7].split("=")[-1]),
            "semantic_capacity": int(path_parts[8].split("=")[-1]),
            "long_capacity": int(path_parts[7].split("=")[-1])
            + int(path_parts[8].split("=")[-1]),
            "short_capacity": int(path_parts[9].split("=")[-1]),
            "pretrain_semantic": path_parts[10].split("=")[-1],
        }
    except (IndexError, ValueError) as e:
        print(f"Error parsing hyperparameters from path {path}: {e}")
        return None


def load_results(yaml_paths):
    """Loads YAML data from a list of file paths."""
    results = []
    for path in yaml_paths:
        try:
            results.append(read_yaml(path))
        except Exception as e:
            print(f"Error reading YAML file {path}: {e}")
    return results


room_size = "xl"
results_paths = glob(
    f"./training-results/handcrafted/room_size={room_size}/*/*/*/*/*/*/*/*/results.yaml"
)

print(f"Before grouping: {len(results_paths)} results")

# Group results by hyperparameters
grouped_results = defaultdict(list)
for path in results_paths:
    hyper_params = parse_hyper_params_from_path(path)
    if hyper_params is not None:
        results_data = read_yaml(path)
        hp_tuple = tuple(sorted(hyper_params.items()))
        grouped_results[hp_tuple].append(results_data)

print(f"After grouping: {len(grouped_results)} results")

# Simplify results to mean and std of test scores
simplified_results = []
for hp_tuple, results in grouped_results.items():
    mean_scores = [result["test_score"]["mean"] for result in results]

    mean_of_means = np.mean(mean_scores)
    std_of_means = np.std(mean_scores)

    simplified_results.append(
        {
            "hyper_params": dict(hp_tuple),
            "results": {
                "test_mean": mean_of_means.item(),
                "test_std": std_of_means.item(),
            },
        }
    )
print(f"After simplifying: {len(simplified_results)} results")

# filtered results
filtered_results = []
for result in simplified_results:
    if result["hyper_params"]["episodic_capacity"] == 0:
        if result["hyper_params"]["mm_policy"] == "episodic":
            continue
        if result["hyper_params"]["qa_function"] == "episodic":
            continue

    if result["hyper_params"]["semantic_capacity"] == 0:
        if result["hyper_params"]["mm_policy"] == "semantic":
            continue
        if result["hyper_params"]["qa_function"] == "semantic":
            continue

    if result["hyper_params"]["mm_policy"] == "episodic":
        if result["hyper_params"]["qa_function"] == "semantic":
            continue
        if result["hyper_params"]["qa_function"] == "random":
            continue
        if result["hyper_params"]["qa_function"] == "episodic_semantic":
            continue
        if result["hyper_params"]["semantic_capacity"] > 0:
            continue

    if result["hyper_params"]["mm_policy"] == "semantic":
        if result["hyper_params"]["qa_function"] == "episodic":
            continue
        if result["hyper_params"]["qa_function"] == "random":
            continue
        if result["hyper_params"]["qa_function"] == "episodic_semantic":
            continue
        if result["hyper_params"]["episodic_capacity"] > 0:
            continue

    if result["hyper_params"]["mm_policy"] == "generalize":
        if result["hyper_params"]["qa_function"] == "episodic":
            continue
        if result["hyper_params"]["qa_function"] == "semantic":
            continue

    if result["hyper_params"]["mm_policy"] == "random":
        if result["hyper_params"]["qa_function"] == "episodic":
            continue
        if result["hyper_params"]["qa_function"] == "semantic":
            continue

    if result["hyper_params"]["pretrain_semantic"] == "exclude_walls":
        continue

    filtered_results.append(result)

print(f"After filtering: {len(filtered_results)} results")


# Create a DataFrame from the combined results
data = []
for item in filtered_results:
    row = item["hyper_params"]
    row.update(item["results"])
    data.append(row)


df = pd.DataFrame(data)

# Define the desired column order
column_order = [
    "mm_policy",
    "qa_function",
    "explore_policy",
    "pretrain_semantic",
    "long_capacity",
    "episodic_capacity",
    "semantic_capacity",
    "short_capacity",
    "test_mean",
    "test_std",
]


df = df.sort_values(by=column_order, ascending=True)
df = df.sort_values(by=["long_capacity", "test_mean"], ascending=[True, False])

# Reorder the DataFrame columns
df = df[column_order]

# Save the DataFrame to a CSV file
df.to_csv(f"hand-crafted-results-room_size={room_size}.csv", index=False)

# Confirm the DataFrame is saved by printing the location
print(f"DataFrame saved to 'hand-crafted-results-room_size={room_size}.csv'")

Before grouping: 5760 results
After grouping: 1152 results
After simplifying: 1152 results
After filtering: 204 results
DataFrame saved to 'hand-crafted-results-room_size=xl.csv'
