In [1]:
# Setup
import re
import os
import json
import numpy as np
import pandas as pd
from scipy import stats
from scipy.stats import bootstrap
import matplotlib.pyplot as plt

metrics = {"gameof24": "success_rate", "crosswords": "r_letter"}

#############################################
# Just some util and plot styling functions #
#############################################
def get_files_in_folder(folder_path):
    files = []
    for file in os.listdir(folder_path):
        if os.path.isfile(os.path.join(folder_path, file)):
            files.append(folder_path + "/" +file)
    return sorted(files)

def get_number(string):
    numbers = re.findall(r'\d+', string)
    numbers = list(map(int, numbers))
    return numbers[0]

def get_params(file_path, task="gameof24"):
    data = {}

    file_name = file_path.split('/')[-1]
    params = file_name.split('_')

    data["set"] = params[0].split("-")[0]
    data["n_agents"] = get_number(params[1])
    data["n_steps"] = get_number(params[2])
    data["k"] = get_number(params[3])
    data["origin_value"] = get_number(params[4])
    data["backtrack"] = float("0." + str(get_number(params[5].split(".")[1])))
    data["resampling"] = params[6].split("-")[0]
    data["file_path"] = file_path
    data["name"] = f"{data['n_agents']}agents\n{data['n_steps']}steps\n{data['k']}k\n{data['backtrack']}b"

    return data

def get_gameof24_puzzle_results_foa(file_path):
    with open(file_path, "r") as experiment_file:
        data = json.load(experiment_file)
    cost = data.pop("Cost")["total_cost"]
    puzzle_results = []
    for _, puzzle in data.items():
        puzzle_results.append(({"r": 1} in puzzle["Verifications"])*1)
    return puzzle_results, cost

def get_crosswords_puzzle_results_foa(file_path, metric="r_letter"):
    with open(file_path, "r") as experiment_file:
        data = json.load(experiment_file)
    cost = data.pop("Cost")["Total cost"]["total_cost"]
    results = {}
    for puzzle_idx, puzzle in data.items():
        n_agents = len(puzzle) - 2 # -2 for "puzzle" and "Verifications"

        # Get the number of actions each agent performed for the puzzle
        puzzle_actions = []
        for agent in range(n_agents):
            agent_steps = puzzle[f"Agent {agent}"]
            agent_actions = []
            for step_id, step in agent_steps.items():
                actions = step.get("Step", "").split(" -> ")
                n_actions =  len(actions)
                agent_actions.append(n_actions)
            puzzle_actions.append(agent_actions)

        # Get the best state for each agent
        best_states = {}
        for agent in range(n_agents):
            best_states[f"Agent {agent}"] = {}
            best_step_idx = puzzle_actions[agent].index(max(puzzle_actions[agent]))
            best_states[f"Agent {agent}"]["Best step idx"] = best_step_idx
            best_states[f"Agent {agent}"]["Best step n_actions"] = max(puzzle_actions[agent])
            best_states[f"Agent {agent}"]["Best step"] = data[puzzle_idx][f"Agent {agent}"][f"Step {best_step_idx}"]
        
        best_agent = max(best_states, key=lambda x: best_states[x]["Best step n_actions"])
        results[puzzle_idx] = best_states[best_agent]

    r_letters = []
    r_words = []
    r_alls = []
    
    for puzzle_idx, result in results.items():
        r_letters.append(result["Best step"]["metrics"]["r_letter"])
        r_words.append(result["Best step"]["metrics"]["r_word"])
        r_alls.append(result["Best step"]["metrics"]["r_all"])
    
    puzzle_results = {"r_letter": r_letters, "r_word": r_words, "r_all": r_alls}
    return puzzle_results[metric], cost

get_task_puzzle_results_foa = {"gameof24": get_gameof24_puzzle_results_foa, "crosswords": get_crosswords_puzzle_results_foa}

In [3]:
# Loading the data
data = {"gameof24":{"cost":{}, "performance":{}, "both":{}}, "crosswords":{"cost":{}, "performance":{}, "both":{}}}

"""
data[task1][optimization1] Includes a list of experiments for task1 optimizing for optimization1 
Example data["gameof24"]["cost"] Includes experiments of the gameof24 task that optimize for cost
"""

for task, optimizations in data.items():
    for optimization, experiments in optimizations.items():
        folder = f"logs_recent/arxiv/{task}/{optimization}"
        files = get_files_in_folder(folder)
        experiments = [get_params(file, task) for file in files]

        configurations = {}
        for experiment in experiments:
            if experiment["name"] not in configurations:
                configurations[experiment["name"]] = []
            configurations[experiment["name"]].append(experiment)
        
        for configuration, experiments in configurations.items():
            puzzle_results = []
            costs = []
            for experiment in experiments:
                file_path = experiment["file_path"]
                result, cost = get_task_puzzle_results_foa[task](file_path)
                costs.append(cost)
                puzzle_results.extend(result)
            
            # Compute stats
            stats = {}
            puzzle_results = np.array(puzzle_results)
            costs = np.array(costs)
            performance_stats = bootstrap(puzzle_results.reshape((1,-1)), np.mean)
            stats.update({"performance_mean":performance_stats.bootstrap_distribution.mean()})
            stats.update({"performance_ci":performance_stats.confidence_interval})
            cost_stats = bootstrap(costs.reshape((1,-1)), np.mean)
            stats.update({"cost_mean":cost_stats.bootstrap_distribution.mean()})
            stats.update({"cost_ci":cost_stats.confidence_interval})

            data[task][optimization].update({configuration:{"experiments": experiments, "stats":stats}})

In [5]:
for task in data.keys():
    print(f"Task: {task}")
    for configuration, results in data[task]["both"].items():
        stats = results["stats"]
        print("Configuration: {}".format(configuration.split('\n')))
        print(f"\t {metrics[task]}: {stats['performance_mean']:.2f} ({stats['performance_ci'][0]:.2f}, {stats['performance_ci'][1]:.2f})")
        print(f"\t Cost: {stats['cost_mean']:.2f} ({stats['cost_ci'][0]:.2f}, {stats['cost_ci'][1]:.2f})")
        print("\n")

Task: gameof24
Configuration: ['10agents', '5steps', '1k', '0.1b']
	 success_rate: 0.20 (0.17, 0.24)
	 Cost: 1.46 (1.41, 1.51)


Task: crosswords
Configuration: ['4agents', '6steps', '3k', '0.5b']
	 r_letter: 0.39 (0.35, 0.43)
	 Cost: 0.37 (0.36, 0.37)


