In [1]:
from matplotlib import animation
import matplotlib.pyplot as plt
from pprint import pprint
import ray
import torch
%matplotlib notebook

from run import *



In [2]:
def run_evaluation(
    agent_run_names, 
    agent_checkpoints, 
    config, 
    policy_name="ppo", 
    seed=1,
    heterogeneous=True,
    num_episodes=1,
    record=True
):
    ray.shutdown()
    ray.init()
    eval_run_name = ""
    for i in range(len(agent_run_names)):
        eval_run_name += f"{agent_run_names[i]}_{agent_checkpoints[i]}_"
    eval_run_name = eval_run_name[:-1]
    ray_dir = f"{os.path.expanduser('~')}/ray_results"
    eval_results_dir = f"{ray_dir}/{eval_run_name}"
    register_env("ZSC-Cleaner", lambda _: CleanerEnv(config["env_config"], run_name=eval_run_name))

    agents = {}
    for i in range(config["env_config"]["num_agents"]):
        agent = Agent(
            policy_name=policy_name,
            run_name=agent_run_names[i],
            agent_num=i,
            config=config,
            seed=seed,
            heterogeneous=heterogeneous,
        )
        agents[agent.name] = agent
        trainer_agents = {}
        for other_agent_num in range(agent.config["env_config"]["num_agents"]):
            other_agent = deepcopy(agent)
            other_agent.agent_num = other_agent_num
            other_agent.name = f"{agent.run_name}:{other_agent_num}"
            trainer_agents[other_agent.name] = other_agent
        agent.trainer = create_trainer(
            agent.policy_name,
            trainer_agents, 
            agent.config, 
            agent.results_dir, 
            seed=agent.seed, 
            heterogeneous=agent.heterogeneous, 
            num_workers=1
        )
        checkpoint_num = agent_checkpoints[i]
        checkpoint_path = f"{ray_dir}/{agent.run_name}/checkpoint_" \
                          f"{str(checkpoint_num).zfill(6)}/checkpoint-{checkpoint_num}"
        agent.trainer.load_checkpoint(checkpoint_path)
    
    ep_rewards, ani = evaluate(
        agents,
        config,
        eval_run_name,
        heterogeneous=heterogeneous,
        num_episodes=num_episodes,
        record=record,
    )
    return ep_rewards, ani

In [3]:
def cross_play(
    run_name_0, run_name_1, checkpoint_0, checkpoint_1, config,
    heterogeneous=True, num_episodes=1
):
    num_agents = config["env_config"]["num_agents"]
    self_play_0, _ = run_evaluation(
        [run_name_0] * num_agents, 
        [checkpoint_0] * num_agents, 
        config, 
        policy_name="ppo", 
        seed=1,
        heterogeneous=heterogeneous,
        num_episodes=num_episodes,
        record=False
    )
    self_play_1, _ = run_evaluation(
        [run_name_1] * num_agents, 
        [checkpoint_1] * num_agents, 
        config, 
        policy_name="ppo", 
        seed=1,
        heterogeneous=heterogeneous,
        num_episodes=num_episodes,
        record=False
    )
    if num_agents == 2:
        run_names = [run_name_0, run_name_1]
        checkpoints = [checkpoint_0, checkpoint_1]
    elif num_agents == 4:
        run_names = [run_name_0, run_name_1, run_name_1, run_name_0]
        checkpoints = [checkpoint_0, checkpoint_1, checkpoint_1, checkpoint_0]
    elif num_agents == 5:
        run_names = [run_name_0, run_name_1, run_name_0, run_name_1, run_name_0]
        checkpoints = [checkpoint_0, checkpoint_1, checkpoint_0, checkpoint_1, checkpoint_0]
    elif num_agents == 8:
        run_names = [run_name_0, run_name_1, run_name_1, run_name_0, 
                     run_name_1, run_name_0, run_name_0, run_name_1]
        checkpoints = [checkpoint_0, checkpoint_1, checkpoint_1, checkpoint_0,
                       checkpoint_1, checkpoint_0, checkpoint_0, checkpoint_1]
    cross_play, _ = run_evaluation(
        run_names,
        checkpoints,
        config, 
        policy_name="ppo", 
        seed=1,
        heterogeneous=heterogeneous,
        num_episodes=num_episodes,
        record=False
    )
    return self_play_0, self_play_1, cross_play

In [4]:
run_name_0 = "simple123"
run_name_1 = "simple456"
config_name = "simple_2"
heterogeneous = True
max_checkpoint = 501
num_episodes = 50
random_start = False

config = load_config(config_name)
config["env_config"]["random_start"] = random_start
self_play_mean_reward_2_0 = []
self_play_mean_reward_2_1 = []
cross_play_mean_reward_2 = []
all_checkpoints = list(range(1, max_checkpoint + 1, 25))
for checkpoint in all_checkpoints:
    sp_0, sp_1, cp = cross_play(
        run_name_0, run_name_1, checkpoint, checkpoint, config,
        heterogeneous=heterogeneous, num_episodes=num_episodes
    )
    self_play_mean_reward_2_0.append(sum(sp_0) / num_episodes)
    self_play_mean_reward_2_1.append(sum(sp_1) / num_episodes)
    cross_play_mean_reward_2.append(sum(cp) / num_episodes)

2021-09-16 14:54:53,924	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2021-09-16 14:54:55,767	INFO trainer.py:696 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


<IPython.core.display.Javascript object>

episode rewards: [29.0, 34.0, 27.0, 35.0, 28.0, 39.0, 30.0, 32.0, 35.0, 35.0, 41.0, 28.0, 23.0, 30.0, 33.0, 38.0, 25.0, 18.0, 41.0, 20.0, 31.0, 32.0, 32.0, 26.0, 26.0, 43.0, 31.0, 37.0, 37.0, 41.0, 37.0, 19.0, 32.0, 26.0, 33.0, 32.0, 31.0, 29.0, 39.0, 28.0, 40.0, 40.0, 33.0, 33.0, 29.0, 24.0, 30.0, 39.0, 41.0, 18.0] (mean = 31.8)


2021-09-16 14:55:16,308	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [36.0, 43.0, 37.0, 21.0, 29.0, 29.0, 31.0, 39.0, 27.0, 35.0, 40.0, 24.0, 27.0, 38.0, 37.0, 33.0, 14.0, 35.0, 30.0, 35.0, 30.0, 18.0, 19.0, 19.0, 29.0, 40.0, 25.0, 35.0, 22.0, 25.0, 18.0, 32.0, 26.0, 38.0, 23.0, 39.0, 12.0, 33.0, 38.0, 30.0, 36.0, 43.0, 33.0, 31.0, 26.0, 50.0, 36.0, 43.0, 42.0, 17.0] (mean = 30.96)


2021-09-16 14:55:38,214	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [36.0, 30.0, 41.0, 32.0, 30.0, 32.0, 26.0, 33.0, 31.0, 29.0, 29.0, 47.0, 26.0, 43.0, 35.0, 29.0, 21.0, 15.0, 38.0, 40.0, 28.0, 25.0, 28.0, 31.0, 32.0, 25.0, 23.0, 49.0, 33.0, 14.0, 33.0, 34.0, 23.0, 34.0, 31.0, 27.0, 23.0, 27.0, 38.0, 33.0, 39.0, 34.0, 31.0, 39.0, 24.0, 22.0, 29.0, 48.0, 34.0, 17.0] (mean = 31.02)


2021-09-16 14:55:58,970	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [50.0, 47.0, 34.0, 30.0, 51.0, 37.0, 42.0, 45.0, 48.0, 35.0, 43.0, 48.0, 28.0, 47.0, 39.0, 44.0, 45.0, 47.0, 45.0, 45.0, 44.0, 46.0, 45.0, 30.0, 36.0, 43.0, 51.0, 35.0, 47.0, 45.0, 46.0, 15.0, 18.0, 54.0, 43.0, 51.0, 57.0, 50.0, 49.0, 41.0, 31.0, 60.0, 26.0, 52.0, 62.0, 63.0, 32.0, 26.0, 52.0, 41.0] (mean = 42.82)


2021-09-16 14:56:19,813	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [39.0, 21.0, 46.0, 28.0, 44.0, 47.0, 50.0, 50.0, 49.0, 36.0, 35.0, 44.0, 44.0, 38.0, 34.0, 47.0, 37.0, 30.0, 43.0, 51.0, 55.0, 48.0, 52.0, 57.0, 35.0, 48.0, 44.0, 28.0, 42.0, 49.0, 46.0, 48.0, 36.0, 50.0, 38.0, 49.0, 43.0, 41.0, 30.0, 45.0, 41.0, 33.0, 51.0, 39.0, 44.0, 36.0, 44.0, 49.0, 51.0, 45.0] (mean = 42.6)


2021-09-16 14:56:41,735	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [39.0, 37.0, 46.0, 34.0, 32.0, 46.0, 45.0, 49.0, 42.0, 39.0, 47.0, 53.0, 48.0, 53.0, 33.0, 24.0, 33.0, 35.0, 40.0, 28.0, 27.0, 42.0, 23.0, 33.0, 24.0, 33.0, 29.0, 21.0, 42.0, 44.0, 18.0, 47.0, 36.0, 22.0, 18.0, 47.0, 32.0, 44.0, 35.0, 44.0, 27.0, 32.0, 43.0, 26.0, 37.0, 37.0, 33.0, 23.0, 37.0, 35.0] (mean = 35.88)


2021-09-16 14:57:03,789	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [59.0, 40.0, 40.0, 45.0, 46.0, 50.0, 56.0, 40.0, 47.0, 53.0, 42.0, 33.0, 39.0, 35.0, 48.0, 51.0, 40.0, 43.0, 45.0, 42.0, 44.0, 60.0, 44.0, 35.0, 53.0, 25.0, 41.0, 11.0, 37.0, 43.0, 50.0, 37.0, 49.0, 21.0, 45.0, 42.0, 55.0, 46.0, 30.0, 34.0, 60.0, 39.0, 54.0, 27.0, 40.0, 56.0, 36.0, 37.0, 37.0, 38.0] (mean = 42.4)


2021-09-16 14:57:24,144	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [52.0, 31.0, 55.0, 51.0, 52.0, 38.0, 63.0, 38.0, 48.0, 56.0, 50.0, 45.0, 40.0, 40.0, 32.0, 46.0, 40.0, 35.0, 37.0, 39.0, 39.0, 62.0, 35.0, 43.0, 44.0, 43.0, 26.0, 52.0, 35.0, 47.0, 42.0, 32.0, 50.0, 52.0, 54.0, 52.0, 48.0, 47.0, 42.0, 49.0, 41.0, 52.0, 48.0, 49.0, 43.0, 43.0, 53.0, 48.0, 43.0, 40.0] (mean = 44.84)


2021-09-16 14:57:44,615	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [43.0, 29.0, 23.0, 26.0, 36.0, 26.0, 31.0, 31.0, 50.0, 41.0, 29.0, 38.0, 39.0, 47.0, 44.0, 50.0, 45.0, 52.0, 54.0, 40.0, 25.0, 45.0, 44.0, 27.0, 32.0, 20.0, 52.0, 27.0, 14.0, 32.0, 37.0, 32.0, 49.0, 25.0, 33.0, 35.0, 35.0, 24.0, 44.0, 42.0, 29.0, 26.0, 45.0, 16.0, 48.0, 48.0, 33.0, 33.0, 34.0, 22.0] (mean = 35.64)


2021-09-16 14:58:04,607	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [53.0, 55.0, 47.0, 47.0, 48.0, 33.0, 25.0, 49.0, 67.0, 50.0, 50.0, 51.0, 46.0, 52.0, 32.0, 58.0, 59.0, 43.0, 41.0, 46.0, 49.0, 45.0, 56.0, 44.0, 57.0, 52.0, 47.0, 54.0, 47.0, 57.0, 51.0, 57.0, 64.0, 39.0, 56.0, 58.0, 44.0, 64.0, 50.0, 57.0, 55.0, 20.0, 52.0, 44.0, 57.0, 50.0, 57.0, 34.0, 43.0, 42.0] (mean = 49.08)


2021-09-16 14:58:24,520	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [49.0, 49.0, 36.0, 38.0, 57.0, 48.0, 59.0, 53.0, 49.0, 55.0, 53.0, 44.0, 51.0, 58.0, 43.0, 35.0, 38.0, 49.0, 41.0, 44.0, 50.0, 67.0, 36.0, 55.0, 41.0, 41.0, 49.0, 47.0, 52.0, 45.0, 49.0, 43.0, 53.0, 58.0, 52.0, 43.0, 46.0, 45.0, 57.0, 35.0, 50.0, 55.0, 43.0, 43.0, 60.0, 43.0, 51.0, 49.0, 59.0, 58.0] (mean = 48.48)


2021-09-16 14:58:44,362	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [37.0, 37.0, 46.0, 29.0, 45.0, 47.0, 31.0, 40.0, 23.0, 41.0, 30.0, 28.0, 40.0, 25.0, 35.0, 37.0, 17.0, 37.0, 33.0, 32.0, 40.0, 25.0, 44.0, 30.0, 44.0, 27.0, 37.0, 48.0, 31.0, 15.0, 20.0, 47.0, 38.0, 39.0, 34.0, 43.0, 40.0, 29.0, 40.0, 48.0, 51.0, 49.0, 43.0, 27.0, 46.0, 39.0, 32.0, 36.0, 40.0, 27.0] (mean = 35.98)


2021-09-16 14:59:04,348	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [45.0, 65.0, 36.0, 57.0, 34.0, 38.0, 42.0, 41.0, 55.0, 60.0, 52.0, 50.0, 67.0, 48.0, 54.0, 54.0, 48.0, 38.0, 46.0, 55.0, 55.0, 58.0, 38.0, 46.0, 64.0, 74.0, 46.0, 59.0, 58.0, 52.0, 43.0, 50.0, 56.0, 47.0, 51.0, 59.0, 63.0, 43.0, 58.0, 53.0, 59.0, 20.0, 47.0, 49.0, 36.0, 28.0, 47.0, 53.0, 58.0, 71.0] (mean = 50.52)


2021-09-16 14:59:24,413	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [52.0, 41.0, 54.0, 52.0, 49.0, 54.0, 52.0, 45.0, 45.0, 53.0, 24.0, 44.0, 52.0, 47.0, 42.0, 23.0, 47.0, 61.0, 64.0, 52.0, 44.0, 41.0, 41.0, 52.0, 21.0, 51.0, 46.0, 45.0, 58.0, 44.0, 52.0, 42.0, 39.0, 56.0, 55.0, 51.0, 43.0, 58.0, 56.0, 46.0, 50.0, 61.0, 50.0, 50.0, 59.0, 46.0, 51.0, 46.0, 55.0, 61.0] (mean = 48.46)


2021-09-16 14:59:44,689	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [43.0, 34.0, 31.0, 11.0, 29.0, 17.0, 46.0, 37.0, 56.0, 33.0, 11.0, 41.0, 37.0, 27.0, 30.0, 31.0, 36.0, 26.0, 45.0, 38.0, 31.0, 17.0, 16.0, 39.0, 37.0, 30.0, 51.0, 27.0, 29.0, 38.0, 23.0, 29.0, 26.0, 38.0, 48.0, 40.0, 42.0, 45.0, 32.0, 31.0, 14.0, 29.0, 21.0, 14.0, 29.0, 30.0, 14.0, 27.0, 30.0, 37.0] (mean = 31.46)


2021-09-16 15:00:04,640	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [57.0, 47.0, 59.0, 40.0, 55.0, 58.0, 55.0, 56.0, 63.0, 61.0, 56.0, 67.0, 63.0, 60.0, 51.0, 54.0, 60.0, 52.0, 56.0, 60.0, 58.0, 71.0, 62.0, 45.0, 56.0, 52.0, 40.0, 54.0, 42.0, 55.0, 63.0, 59.0, 62.0, 59.0, 58.0, 59.0, 60.0, 47.0, 56.0, 57.0, 65.0, 57.0, 42.0, 57.0, 54.0, 60.0, 63.0, 53.0, 73.0, 47.0] (mean = 56.32)


2021-09-16 15:00:24,675	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [52.0, 54.0, 69.0, 48.0, 33.0, 57.0, 54.0, 65.0, 48.0, 64.0, 51.0, 44.0, 53.0, 42.0, 50.0, 48.0, 47.0, 56.0, 65.0, 51.0, 56.0, 61.0, 62.0, 33.0, 46.0, 54.0, 58.0, 43.0, 50.0, 43.0, 60.0, 41.0, 54.0, 44.0, 55.0, 48.0, 60.0, 60.0, 63.0, 50.0, 59.0, 53.0, 57.0, 41.0, 56.0, 58.0, 49.0, 54.0, 49.0, 48.0] (mean = 52.32)


2021-09-16 15:00:45,202	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [29.0, 29.0, 32.0, 15.0, 49.0, 26.0, 31.0, 49.0, 51.0, 49.0, 31.0, 38.0, 41.0, 28.0, 14.0, 39.0, 44.0, 15.0, 34.0, 36.0, 44.0, 31.0, 14.0, 41.0, 19.0, 22.0, 42.0, 40.0, 44.0, 45.0, 27.0, 28.0, 47.0, 17.0, 46.0, 45.0, 24.0, 16.0, 39.0, 43.0, 19.0, 56.0, 38.0, 23.0, 51.0, 10.0, 34.0, 24.0, 47.0, 16.0] (mean = 33.44)


2021-09-16 15:01:05,367	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [64.0, 57.0, 49.0, 60.0, 45.0, 43.0, 64.0, 59.0, 60.0, 54.0, 63.0, 67.0, 56.0, 61.0, 51.0, 52.0, 61.0, 54.0, 52.0, 64.0, 64.0, 64.0, 60.0, 58.0, 64.0, 59.0, 71.0, 63.0, 70.0, 38.0, 52.0, 58.0, 45.0, 55.0, 61.0, 63.0, 59.0, 67.0, 61.0, 67.0, 55.0, 62.0, 62.0, 52.0, 59.0, 61.0, 65.0, 63.0, 59.0, 36.0] (mean = 58.18)


2021-09-16 15:01:26,192	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [61.0, 62.0, 63.0, 39.0, 53.0, 65.0, 52.0, 67.0, 53.0, 62.0, 47.0, 50.0, 41.0, 51.0, 47.0, 69.0, 58.0, 60.0, 54.0, 40.0, 63.0, 65.0, 40.0, 58.0, 67.0, 66.0, 59.0, 59.0, 61.0, 53.0, 57.0, 57.0, 57.0, 70.0, 57.0, 55.0, 44.0, 54.0, 51.0, 35.0, 38.0, 59.0, 69.0, 24.0, 64.0, 49.0, 68.0, 53.0, 57.0, 51.0] (mean = 55.08)


2021-09-16 15:01:47,106	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
  fig, ax = plt.subplots()


<IPython.core.display.Javascript object>

episode rewards: [23.0, 47.0, 38.0, 39.0, 60.0, 40.0, 34.0, 50.0, 60.0, 27.0, 36.0, 35.0, 39.0, 51.0, 40.0, 15.0, 48.0, 19.0, 28.0, 48.0, 34.0, 44.0, 51.0, 33.0, 56.0, 24.0, 41.0, 32.0, 39.0, 49.0, 15.0, 13.0, 19.0, 39.0, 43.0, 49.0, 22.0, 34.0, 54.0, 32.0, 27.0, 53.0, 29.0, 63.0, 48.0, 32.0, 51.0, 41.0, 51.0, 23.0] (mean = 38.36)


2021-09-16 15:02:07,708	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [63.0, 57.0, 53.0, 69.0, 73.0, 60.0, 58.0, 46.0, 48.0, 52.0, 64.0, 60.0, 47.0, 60.0, 69.0, 51.0, 58.0, 72.0, 45.0, 60.0, 67.0, 67.0, 58.0, 53.0, 69.0, 63.0, 69.0, 53.0, 65.0, 69.0, 62.0, 66.0, 50.0, 60.0, 68.0, 67.0, 65.0, 69.0, 76.0, 68.0, 56.0, 70.0, 32.0, 67.0, 73.0, 66.0, 51.0, 64.0, 64.0, 67.0] (mean = 61.18)


2021-09-16 15:02:28,257	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [58.0, 51.0, 68.0, 51.0, 58.0, 74.0, 59.0, 57.0, 59.0, 59.0, 55.0, 74.0, 48.0, 50.0, 58.0, 52.0, 60.0, 33.0, 58.0, 65.0, 48.0, 37.0, 61.0, 75.0, 57.0, 59.0, 57.0, 55.0, 39.0, 57.0, 54.0, 56.0, 66.0, 55.0, 64.0, 58.0, 50.0, 67.0, 63.0, 52.0, 59.0, 54.0, 58.0, 60.0, 43.0, 56.0, 54.0, 51.0, 50.0, 32.0] (mean = 55.88)


2021-09-16 15:02:48,656	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [56.0, 54.0, 56.0, 32.0, 48.0, 40.0, 34.0, 37.0, 27.0, 41.0, 31.0, 39.0, 42.0, 19.0, 32.0, 42.0, 34.0, 42.0, 43.0, 20.0, 36.0, 25.0, 23.0, 47.0, 44.0, 48.0, 54.0, 25.0, 18.0, 30.0, 41.0, 49.0, 52.0, 52.0, 40.0, 42.0, 32.0, 60.0, 29.0, 17.0, 25.0, 37.0, 35.0, 41.0, 19.0, 8.0, 48.0, 40.0, 42.0, 51.0] (mean = 37.58)


2021-09-16 15:03:09,281	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [67.0, 59.0, 44.0, 62.0, 70.0, 72.0, 74.0, 62.0, 74.0, 67.0, 62.0, 66.0, 62.0, 46.0, 69.0, 60.0, 62.0, 71.0, 64.0, 61.0, 70.0, 61.0, 59.0, 68.0, 57.0, 60.0, 66.0, 74.0, 63.0, 64.0, 63.0, 69.0, 56.0, 70.0, 65.0, 67.0, 71.0, 51.0, 60.0, 74.0, 61.0, 69.0, 44.0, 65.0, 63.0, 54.0, 56.0, 63.0, 63.0, 64.0] (mean = 63.28)


2021-09-16 15:03:29,749	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [41.0, 56.0, 63.0, 60.0, 65.0, 56.0, 57.0, 67.0, 71.0, 63.0, 53.0, 57.0, 62.0, 65.0, 51.0, 66.0, 50.0, 49.0, 55.0, 24.0, 57.0, 59.0, 65.0, 55.0, 47.0, 52.0, 75.0, 63.0, 56.0, 50.0, 55.0, 59.0, 45.0, 50.0, 63.0, 53.0, 46.0, 62.0, 55.0, 62.0, 53.0, 46.0, 59.0, 59.0, 64.0, 52.0, 69.0, 56.0, 60.0, 53.0] (mean = 56.62)


2021-09-16 15:03:50,388	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [43.0, 20.0, 59.0, 37.0, 36.0, 46.0, 44.0, 29.0, 35.0, 43.0, 47.0, 42.0, 20.0, 29.0, 9.0, 53.0, 31.0, 33.0, 28.0, 36.0, 40.0, 27.0, 40.0, 44.0, 57.0, 17.0, 41.0, 46.0, 47.0, 66.0, 51.0, 33.0, 61.0, 32.0, 44.0, 42.0, 38.0, 38.0, 34.0, 31.0, 19.0, 38.0, 19.0, 30.0, 32.0, 41.0, 38.0, 53.0, 57.0, 51.0] (mean = 38.54)


2021-09-16 15:04:10,989	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [67.0, 67.0, 79.0, 61.0, 53.0, 59.0, 66.0, 75.0, 62.0, 79.0, 49.0, 71.0, 55.0, 67.0, 72.0, 71.0, 69.0, 66.0, 75.0, 73.0, 68.0, 67.0, 61.0, 60.0, 67.0, 72.0, 65.0, 69.0, 55.0, 63.0, 72.0, 58.0, 58.0, 68.0, 74.0, 40.0, 62.0, 81.0, 74.0, 64.0, 69.0, 67.0, 67.0, 65.0, 73.0, 69.0, 65.0, 72.0, 62.0, 75.0] (mean = 66.36)


2021-09-16 15:04:31,299	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 313, in <module>
    agent = DashboardAgent(
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 74, in __init__
    self.grpc_port = self.server.add_insecure_port(
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/grpc/aio/_server.py", line 83, in add_insecure_port
    return _common.validate_port_binding_result(
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/grpc/_common.py", line 166, in validate_port_binding_result
    raise RuntimeError(_ERROR_MESSAGE_PORT_BINDING_FAILED % address)
RuntimeError: Failed to bind to address [::]:58318; set GRPC_VERBOSITY=debug environment variable to see detailed error message.

[2m[33m(raylet)[0m E0916 15:04:33.342364122  2339

[2m[33m(raylet)[0m E0916 15:04:36.172980822  234000 server_chttp2.cc:40]        {"created":"@1631819076.172934511","description":"No address added out of total 1 resolved","file":"src/core/ext/transport/chttp2/server/chttp2_server.cc","file_line":320,"referenced_errors":[{"created":"@1631819076.172928484","description":"Failed to add any wildcard listeners","file":"src/core/lib/iomgr/tcp_server_posix.cc","file_line":340,"referenced_errors":[{"created":"@1631819076.172920221","description":"Unable to configure socket","fd":19,"file":"src/core/lib/iomgr/tcp_server_utils_posix_common.cc","file_line":214,"referenced_errors":[{"created":"@1631819076.172917230","description":"Address already in use","errno":98,"file":"src/core/lib/iomgr/tcp_server_utils_posix_common.cc","file_line":188,"os_error":"Address already in use","syscall":"bind"}]},{"created":"@1631819076.172928167","description":"Unable to configure socket","fd":19,"file":"src/core/lib/iomgr/tcp_server_utils_posix_common.cc","fi

<IPython.core.display.Javascript object>

Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 313, in <module>
    agent = DashboardAgent(
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 74, in __init__
    self.grpc_port = self.server.add_insecure_port(
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/grpc/aio/_server.py", line 83, in add_insecure_port
    return _common.validate_port_binding_result(
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/grpc/_common.py", line 166, in validate_port_binding_result
    raise RuntimeError(_ERROR_MESSAGE_PORT_BINDING_FAILED % address)
RuntimeError: Failed to bind to address [::]:58318; set GRPC_VERBOSITY=debug environment variable to see detailed error message.

[2m[33m(raylet)[0m E0916 15:04:37.587891189  234009 server_chttp2.cc:40]        {"created":"@1631819077.587846555","description":"No address added out of total 1 res

Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 313, in <module>
    agent = DashboardAgent(
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 74, in __init__
    self.grpc_port = self.server.add_insecure_port(
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/grpc/aio/_server.py", line 83, in add_insecure_port
    return _common.validate_port_binding_result(
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/grpc/_common.py", line 166, in validate_port_binding_result
    raise RuntimeError(_ERROR_MESSAGE_PORT_BINDING_FAILED % address)
RuntimeError: Failed to bind to address [::]:58318; set GRPC_VERBOSITY=debug environment variable to see detailed error message.

[2m[33m(raylet)[0m E0916 15:04:41.786834506  234042 server_chttp2.cc:40]        {"created":"@1631819081.786789967","description":"No address added out of total 1 res

[2m[33m(raylet)[0m E0916 15:04:44.571501393  234056 server_chttp2.cc:40]        {"created":"@1631819084.571460510","description":"No address added out of total 1 resolved","file":"src/core/ext/transport/chttp2/server/chttp2_server.cc","file_line":320,"referenced_errors":[{"created":"@1631819084.571454515","description":"Failed to add any wildcard listeners","file":"src/core/lib/iomgr/tcp_server_posix.cc","file_line":340,"referenced_errors":[{"created":"@1631819084.571447335","description":"Unable to configure socket","fd":19,"file":"src/core/lib/iomgr/tcp_server_utils_posix_common.cc","file_line":214,"referenced_errors":[{"created":"@1631819084.571444728","description":"Address already in use","errno":98,"file":"src/core/lib/iomgr/tcp_server_utils_posix_common.cc","file_line":188,"os_error":"Address already in use","syscall":"bind"}]},{"created":"@1631819084.571454125","description":"Unable to configure socket","fd":19,"file":"src/core/lib/iomgr/tcp_server_utils_posix_common.cc","fi

Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 313, in <module>
    agent = DashboardAgent(
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 74, in __init__
    self.grpc_port = self.server.add_insecure_port(
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/grpc/aio/_server.py", line 83, in add_insecure_port
    return _common.validate_port_binding_result(
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/grpc/_common.py", line 166, in validate_port_binding_result
    raise RuntimeError(_ERROR_MESSAGE_PORT_BINDING_FAILED % address)
RuntimeError: Failed to bind to address [::]:58318; set GRPC_VERBOSITY=debug environment variable to see detailed error message.

[2m[33m(raylet)[0m E0916 15:04:48.757729339  234078 server_chttp2.cc:40]        {"created":"@1631819088.757683632","description":"No address added out of total 1 res

episode rewards: [77.0, 60.0, 58.0, 72.0, 59.0, 73.0, 50.0, 65.0, 70.0, 54.0, 57.0, 68.0, 60.0, 57.0, 72.0, 71.0, 74.0, 66.0, 53.0, 57.0, 79.0, 60.0, 57.0, 64.0, 66.0, 65.0, 68.0, 71.0, 44.0, 63.0, 42.0, 69.0, 59.0, 61.0, 66.0, 69.0, 71.0, 73.0, 72.0, 67.0, 58.0, 67.0, 49.0, 62.0, 53.0, 54.0, 70.0, 47.0, 47.0, 51.0] (mean = 62.34)


2021-09-16 15:04:53,726	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [37.0, 24.0, 55.0, 36.0, 51.0, 45.0, 52.0, 47.0, 35.0, 28.0, 36.0, 50.0, 36.0, 37.0, 17.0, 40.0, 41.0, 33.0, 29.0, 43.0, 44.0, 42.0, 43.0, 50.0, 37.0, 40.0, 39.0, 36.0, 17.0, 39.0, 48.0, 62.0, 37.0, 18.0, 50.0, 55.0, 26.0, 31.0, 54.0, 42.0, 49.0, 30.0, 47.0, 53.0, 65.0, 49.0, 45.0, 46.0, 47.0, 26.0] (mean = 40.78)


2021-09-16 15:05:13,952	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [69.0, 76.0, 70.0, 70.0, 76.0, 72.0, 68.0, 71.0, 52.0, 76.0, 62.0, 73.0, 61.0, 63.0, 56.0, 69.0, 61.0, 70.0, 73.0, 70.0, 76.0, 72.0, 77.0, 70.0, 77.0, 60.0, 48.0, 69.0, 69.0, 79.0, 70.0, 79.0, 75.0, 56.0, 66.0, 73.0, 77.0, 71.0, 70.0, 78.0, 58.0, 68.0, 75.0, 79.0, 46.0, 57.0, 70.0, 69.0, 68.0, 56.0] (mean = 68.32)


2021-09-16 15:05:34,225	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [69.0, 67.0, 67.0, 59.0, 56.0, 55.0, 64.0, 62.0, 71.0, 57.0, 73.0, 70.0, 63.0, 58.0, 65.0, 67.0, 73.0, 45.0, 68.0, 64.0, 72.0, 64.0, 67.0, 66.0, 65.0, 67.0, 60.0, 59.0, 59.0, 60.0, 71.0, 68.0, 71.0, 66.0, 67.0, 65.0, 70.0, 38.0, 49.0, 70.0, 70.0, 54.0, 68.0, 55.0, 66.0, 58.0, 66.0, 74.0, 76.0, 59.0] (mean = 63.86)


2021-09-16 15:05:54,276	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [40.0, 46.0, 15.0, 16.0, 50.0, 65.0, 54.0, 62.0, 46.0, 56.0, 46.0, 51.0, 56.0, 36.0, 22.0, 32.0, 25.0, 47.0, 20.0, 54.0, 61.0, 46.0, 13.0, 41.0, 23.0, 27.0, 31.0, 27.0, 44.0, 56.0, 30.0, 36.0, 33.0, 20.0, 52.0, 54.0, 45.0, 36.0, 21.0, 33.0, 20.0, 22.0, 61.0, 34.0, 39.0, 52.0, 42.0, 36.0, 55.0, 30.0] (mean = 39.18)


2021-09-16 15:06:16,207	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [78.0, 78.0, 81.0, 76.0, 73.0, 71.0, 75.0, 73.0, 80.0, 69.0, 75.0, 72.0, 71.0, 65.0, 75.0, 76.0, 58.0, 41.0, 73.0, 62.0, 79.0, 70.0, 76.0, 68.0, 76.0, 76.0, 75.0, 62.0, 78.0, 79.0, 84.0, 72.0, 76.0, 68.0, 74.0, 77.0, 72.0, 75.0, 75.0, 69.0, 76.0, 72.0, 64.0, 69.0, 83.0, 75.0, 74.0, 73.0, 67.0, 70.0] (mean = 72.52)


2021-09-16 15:06:36,755	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [67.0, 70.0, 72.0, 69.0, 66.0, 61.0, 70.0, 66.0, 59.0, 75.0, 67.0, 71.0, 70.0, 69.0, 66.0, 71.0, 72.0, 69.0, 59.0, 62.0, 68.0, 72.0, 73.0, 69.0, 72.0, 65.0, 60.0, 75.0, 63.0, 61.0, 61.0, 64.0, 60.0, 59.0, 65.0, 70.0, 66.0, 77.0, 65.0, 71.0, 69.0, 69.0, 73.0, 52.0, 72.0, 68.0, 75.0, 58.0, 79.0, 78.0] (mean = 67.6)


2021-09-16 15:06:57,353	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [16.0, 20.0, 25.0, 36.0, 57.0, 34.0, 52.0, 30.0, 37.0, 54.0, 21.0, 55.0, 13.0, 31.0, 72.0, 15.0, 24.0, 37.0, 40.0, 44.0, 39.0, 35.0, 23.0, 56.0, 51.0, 24.0, 39.0, 22.0, 56.0, 27.0, 14.0, 21.0, 53.0, 50.0, 26.0, 55.0, 49.0, 58.0, 36.0, 19.0, 41.0, 22.0, 22.0, 51.0, 35.0, 36.0, 46.0, 44.0, 53.0, 48.0] (mean = 37.28)


2021-09-16 15:07:17,849	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [76.0, 78.0, 85.0, 79.0, 80.0, 81.0, 80.0, 79.0, 69.0, 63.0, 81.0, 80.0, 77.0, 77.0, 77.0, 79.0, 80.0, 47.0, 82.0, 83.0, 80.0, 77.0, 77.0, 47.0, 77.0, 75.0, 69.0, 82.0, 82.0, 81.0, 80.0, 79.0, 82.0, 75.0, 80.0, 78.0, 82.0, 79.0, 82.0, 79.0, 79.0, 84.0, 77.0, 78.0, 83.0, 74.0, 79.0, 81.0, 70.0, 72.0] (mean = 77.06)


2021-09-16 15:07:38,227	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [75.0, 75.0, 50.0, 72.0, 53.0, 65.0, 56.0, 64.0, 69.0, 71.0, 73.0, 58.0, 65.0, 48.0, 72.0, 71.0, 53.0, 50.0, 73.0, 75.0, 63.0, 72.0, 69.0, 71.0, 54.0, 73.0, 72.0, 63.0, 70.0, 70.0, 71.0, 72.0, 75.0, 70.0, 59.0, 72.0, 76.0, 70.0, 68.0, 69.0, 76.0, 75.0, 72.0, 62.0, 57.0, 78.0, 72.0, 74.0, 65.0, 71.0] (mean = 67.38)


2021-09-16 15:07:58,751	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [36.0, 42.0, 20.0, 14.0, 45.0, 15.0, 45.0, 40.0, 55.0, 10.0, 44.0, 35.0, 50.0, 22.0, 39.0, 27.0, 24.0, 56.0, 25.0, 51.0, 58.0, 14.0, 15.0, 61.0, 46.0, 18.0, 26.0, 48.0, 46.0, 37.0, 16.0, 42.0, 10.0, 49.0, 42.0, 43.0, 47.0, 32.0, 61.0, 17.0, 14.0, 60.0, 19.0, 9.0, 40.0, 39.0, 24.0, 43.0, 38.0, 17.0] (mean = 34.52)


2021-09-16 15:08:19,425	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [83.0, 81.0, 71.0, 81.0, 82.0, 83.0, 81.0, 77.0, 82.0, 86.0, 78.0, 84.0, 74.0, 84.0, 81.0, 79.0, 81.0, 29.0, 82.0, 82.0, 80.0, 79.0, 86.0, 81.0, 79.0, 84.0, 79.0, 81.0, 66.0, 84.0, 80.0, 70.0, 78.0, 83.0, 80.0, 82.0, 82.0, 51.0, 83.0, 84.0, 82.0, 79.0, 83.0, 86.0, 80.0, 82.0, 78.0, 83.0, 78.0, 85.0] (mean = 78.98)


2021-09-16 15:08:39,935	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [73.0, 71.0, 77.0, 61.0, 52.0, 74.0, 69.0, 75.0, 75.0, 73.0, 79.0, 71.0, 76.0, 48.0, 79.0, 73.0, 80.0, 69.0, 64.0, 83.0, 72.0, 76.0, 78.0, 70.0, 58.0, 79.0, 74.0, 75.0, 64.0, 69.0, 72.0, 76.0, 59.0, 66.0, 81.0, 82.0, 68.0, 75.0, 64.0, 69.0, 80.0, 79.0, 78.0, 71.0, 76.0, 76.0, 70.0, 77.0, 80.0, 73.0] (mean = 72.18)


2021-09-16 15:09:00,558	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [45.0, 24.0, 15.0, 19.0, 41.0, 45.0, 27.0, 27.0, 15.0, 23.0, 48.0, 48.0, 19.0, 31.0, 40.0, 14.0, 61.0, 50.0, 37.0, 37.0, 18.0, 56.0, 49.0, 21.0, 19.0, 29.0, 15.0, 36.0, 17.0, 48.0, 50.0, 15.0, 29.0, 28.0, 50.0, 36.0, 21.0, 17.0, 38.0, 35.0, 42.0, 48.0, 46.0, 27.0, 14.0, 34.0, 56.0, 28.0, 53.0, 16.0] (mean = 33.14)


2021-09-16 15:09:21,214	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [85.0, 75.0, 78.0, 85.0, 76.0, 82.0, 88.0, 61.0, 87.0, 87.0, 86.0, 86.0, 68.0, 75.0, 64.0, 82.0, 88.0, 66.0, 84.0, 65.0, 86.0, 84.0, 87.0, 78.0, 81.0, 84.0, 82.0, 85.0, 84.0, 79.0, 87.0, 76.0, 83.0, 85.0, 82.0, 89.0, 84.0, 84.0, 86.0, 72.0, 85.0, 72.0, 87.0, 81.0, 84.0, 66.0, 85.0, 85.0, 85.0, 81.0] (mean = 80.74)


2021-09-16 15:09:41,687	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [80.0, 78.0, 79.0, 85.0, 75.0, 75.0, 87.0, 84.0, 65.0, 81.0, 82.0, 88.0, 76.0, 78.0, 83.0, 82.0, 81.0, 78.0, 74.0, 70.0, 57.0, 74.0, 70.0, 82.0, 75.0, 77.0, 83.0, 80.0, 64.0, 57.0, 73.0, 79.0, 66.0, 76.0, 78.0, 86.0, 80.0, 74.0, 63.0, 82.0, 75.0, 74.0, 71.0, 63.0, 80.0, 77.0, 68.0, 72.0, 79.0, 41.0] (mean = 75.14)


2021-09-16 15:10:02,309	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [30.0, 21.0, 15.0, 31.0, 17.0, 32.0, 47.0, 26.0, 53.0, 7.0, 24.0, 50.0, 59.0, 22.0, 22.0, 38.0, 34.0, 15.0, 40.0, 49.0, 36.0, 18.0, 23.0, 16.0, 16.0, 40.0, 23.0, 29.0, 33.0, 27.0, 17.0, 22.0, 19.0, 31.0, 38.0, 42.0, 57.0, 34.0, 23.0, 23.0, 17.0, 14.0, 9.0, 16.0, 36.0, 28.0, 39.0, 26.0, 22.0, 38.0] (mean = 28.88)


2021-09-16 15:10:22,912	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [78.0, 74.0, 83.0, 69.0, 87.0, 84.0, 83.0, 75.0, 88.0, 86.0, 88.0, 85.0, 71.0, 71.0, 62.0, 88.0, 83.0, 86.0, 86.0, 86.0, 86.0, 78.0, 86.0, 44.0, 82.0, 87.0, 84.0, 84.0, 84.0, 84.0, 81.0, 85.0, 85.0, 89.0, 83.0, 89.0, 87.0, 86.0, 86.0, 89.0, 88.0, 84.0, 83.0, 65.0, 87.0, 78.0, 84.0, 88.0, 78.0, 87.0] (mean = 81.88)


2021-09-16 15:10:43,558	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [85.0, 86.0, 84.0, 88.0, 86.0, 81.0, 81.0, 82.0, 77.0, 67.0, 79.0, 89.0, 70.0, 83.0, 82.0, 77.0, 80.0, 78.0, 81.0, 88.0, 86.0, 88.0, 86.0, 79.0, 84.0, 81.0, 73.0, 79.0, 83.0, 68.0, 89.0, 89.0, 77.0, 83.0, 87.0, 85.0, 80.0, 90.0, 86.0, 91.0, 76.0, 87.0, 85.0, 46.0, 79.0, 79.0, 67.0, 72.0, 77.0, 41.0] (mean = 79.94)


2021-09-16 15:11:04,064	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [30.0, 38.0, 18.0, 33.0, 19.0, 42.0, 30.0, 9.0, 37.0, 31.0, -11.0, 16.0, 41.0, 18.0, 17.0, 30.0, 27.0, 12.0, 18.0, -11.0, 6.0, 16.0, 17.0, 10.0, 17.0, 17.0, 22.0, 33.0, 6.0, 2.0, 1.0, 15.0, 19.0, 27.0, 32.0, 11.0, 17.0, 29.0, 17.0, 23.0, 7.0, -11.0, 32.0, 15.0, 44.0, 14.0, 6.0, 15.0, 42.0, 1.0] (mean = 18.92)


2021-09-16 15:11:26,036	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [88.0, 86.0, 83.0, 84.0, 60.0, 69.0, 84.0, 70.0, 55.0, 34.0, 88.0, 59.0, 48.0, 87.0, 88.0, 87.0, 83.0, 54.0, 84.0, 61.0, 88.0, 86.0, 88.0, 56.0, 87.0, 87.0, 88.0, 89.0, 13.0, 79.0, 88.0, 89.0, 89.0, 86.0, 60.0, 87.0, 83.0, 86.0, 79.0, 85.0, 64.0, 84.0, 87.0, 85.0, 83.0, 72.0, 89.0, 85.0, 84.0, 60.0] (mean = 76.76)


2021-09-16 15:11:47,044	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [49.0, 89.0, 79.0, 87.0, 77.0, 88.0, 85.0, 89.0, 70.0, 88.0, 89.0, 91.0, 81.0, 88.0, 89.0, 92.0, 89.0, 91.0, 88.0, 89.0, 68.0, 92.0, 89.0, 86.0, 69.0, 82.0, 73.0, 89.0, 87.0, 87.0, 89.0, 87.0, 81.0, 84.0, 87.0, 88.0, 84.0, 89.0, 77.0, 90.0, 89.0, 80.0, 88.0, 89.0, 89.0, 89.0, 65.0, 87.0, 88.0, 88.0] (mean = 84.36)


2021-09-16 15:12:09,012	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [52.0, 52.0, -11.0, 47.0, 8.0, 34.0, 27.0, 27.0, 32.0, 9.0, -6.0, -13.0, 27.0, 50.0, 50.0, 14.0, 64.0, 20.0, 31.0, 11.0, 30.0, 12.0, 17.0, 7.0, 21.0, 25.0, 19.0, 28.0, 50.0, 16.0, 18.0, 15.0, 12.0, 15.0, 29.0, 45.0, 43.0, 25.0, 12.0, 33.0, 16.0, 15.0, 30.0, 21.0, 36.0, 12.0, 12.0, 43.0, 46.0, 18.0] (mean = 24.92)


2021-09-16 15:12:29,729	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [89.0, 90.0, 89.0, 92.0, 75.0, 87.0, 89.0, 88.0, 65.0, 90.0, 88.0, 91.0, 62.0, 90.0, 61.0, 90.0, 87.0, 34.0, 90.0, 86.0, 47.0, 88.0, 88.0, 61.0, 91.0, 82.0, 90.0, 80.0, 63.0, 79.0, 88.0, 89.0, 87.0, 87.0, 85.0, 88.0, 90.0, 91.0, 90.0, 87.0, 91.0, 87.0, 89.0, 81.0, 87.0, 91.0, 82.0, 84.0, 88.0, 82.0] (mean = 82.92)


2021-09-16 15:12:50,252	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [67.0, 91.0, 86.0, 84.0, 85.0, 90.0, 91.0, 91.0, 79.0, 86.0, 89.0, 91.0, 60.0, 84.0, 91.0, 89.0, 91.0, 91.0, 86.0, 89.0, 81.0, 86.0, 91.0, 82.0, 79.0, 89.0, 79.0, 61.0, 90.0, 87.0, 91.0, 87.0, 84.0, 72.0, 83.0, 92.0, 81.0, 88.0, 57.0, 88.0, 90.0, 86.0, 91.0, 89.0, 83.0, 91.0, 75.0, 86.0, 89.0, 84.0] (mean = 84.46)


2021-09-16 15:13:10,808	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [50.0, 16.0, 26.0, 56.0, 46.0, 28.0, 29.0, 19.0, 31.0, 13.0, -2.0, 18.0, 25.0, 50.0, 27.0, 15.0, 20.0, 17.0, 40.0, 37.0, 10.0, 22.0, 27.0, 31.0, 17.0, 13.0, 25.0, 20.0, 42.0, 36.0, 19.0, 16.0, 18.0, 20.0, 31.0, 27.0, 17.0, 25.0, 19.0, 15.0, 16.0, 37.0, 20.0, 16.0, 16.0, 26.0, 34.0, 31.0, 32.0, 48.0] (mean = 25.74)


2021-09-16 15:13:31,296	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [90.0, 91.0, 53.0, 90.0, 89.0, 77.0, 88.0, 84.0, 70.0, 90.0, 91.0, 89.0, 71.0, 91.0, 65.0, 91.0, 74.0, 73.0, 89.0, 90.0, 48.0, 86.0, 90.0, 91.0, 89.0, 72.0, 87.0, 90.0, 76.0, 89.0, 91.0, 76.0, 91.0, 88.0, 91.0, 89.0, 91.0, 89.0, 89.0, 91.0, 91.0, 84.0, 90.0, 90.0, 89.0, 89.0, 90.0, 90.0, 90.0, 67.0] (mean = 84.4)


2021-09-16 15:13:51,978	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [88.0, 89.0, 86.0, 78.0, 77.0, 87.0, 85.0, 91.0, 61.0, 88.0, 77.0, 90.0, 87.0, 85.0, 92.0, 89.0, 91.0, 87.0, 91.0, 91.0, 80.0, 66.0, 87.0, 90.0, 74.0, 87.0, 87.0, 91.0, 90.0, 74.0, 91.0, 80.0, 77.0, 88.0, 83.0, 87.0, 78.0, 87.0, 72.0, 87.0, 91.0, 92.0, 89.0, 52.0, 82.0, 84.0, 50.0, 86.0, 78.0, 70.0] (mean = 82.8)


2021-09-16 15:14:12,653	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [13.0, 17.0, -14.0, 12.0, -14.0, 27.0, 13.0, 2.0, 15.0, -12.0, -14.0, -3.0, 21.0, 15.0, 12.0, 37.0, 26.0, 27.0, 36.0, 18.0, 24.0, -14.0, 27.0, 20.0, 14.0, 53.0, -12.0, -1.0, 21.0, 13.0, -12.0, 37.0, 10.0, 28.0, 32.0, 38.0, 1.0, 6.0, 43.0, 19.0, -12.0, 46.0, 20.0, -14.0, 35.0, 14.0, 39.0, 4.0, 46.0, -14.0] (mean = 14.9)


2021-09-16 15:14:33,187	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [89.0, 84.0, 76.0, 84.0, 83.0, 86.0, 90.0, 87.0, 74.0, 90.0, 89.0, 91.0, 75.0, 90.0, 92.0, 92.0, 87.0, 78.0, 89.0, 88.0, 91.0, 86.0, 90.0, 76.0, 82.0, 84.0, 90.0, 88.0, 79.0, 90.0, 92.0, 90.0, 88.0, 74.0, 90.0, 90.0, 90.0, 89.0, 92.0, 87.0, 90.0, 87.0, 91.0, 89.0, 88.0, 81.0, 89.0, 83.0, 84.0, 86.0] (mean = 86.4)


2021-09-16 15:14:53,851	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [93.0, 87.0, 78.0, 91.0, 88.0, 85.0, 91.0, 92.0, 64.0, 90.0, 90.0, 93.0, 89.0, 74.0, 92.0, 87.0, 90.0, 90.0, 86.0, 91.0, 75.0, 85.0, 91.0, 84.0, 90.0, 88.0, 64.0, 90.0, 75.0, 92.0, 87.0, 93.0, 90.0, 67.0, 91.0, 90.0, 74.0, 90.0, 87.0, 91.0, 89.0, 90.0, 59.0, 86.0, 85.0, 84.0, 86.0, 87.0, 85.0, 90.0] (mean = 85.52)


2021-09-16 15:15:14,613	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [34.0, 38.0, 16.0, 46.0, 30.0, 18.0, 21.0, 33.0, 47.0, 14.0, 25.0, 24.0, 23.0, 26.0, 14.0, 17.0, 15.0, 10.0, 36.0, 18.0, 43.0, 39.0, 44.0, 48.0, 13.0, 28.0, 24.0, 25.0, 18.0, 43.0, 19.0, 39.0, 10.0, 47.0, 26.0, 14.0, 32.0, 26.0, 28.0, 23.0, 20.0, 12.0, 23.0, 8.0, 26.0, 17.0, 24.0, 14.0, 29.0, 17.0] (mean = 25.68)


2021-09-16 15:15:35,218	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [91.0, 82.0, 90.0, 87.0, 92.0, 81.0, 90.0, 90.0, 87.0, 91.0, 89.0, 91.0, 84.0, 92.0, 86.0, 90.0, 90.0, 91.0, 90.0, 90.0, 90.0, 89.0, 92.0, 92.0, 92.0, 87.0, 91.0, 91.0, 90.0, 89.0, 91.0, 92.0, 87.0, 91.0, 90.0, 88.0, 90.0, 88.0, 89.0, 91.0, 90.0, 91.0, 91.0, 86.0, 90.0, 84.0, 89.0, 87.0, 89.0, 90.0] (mean = 89.22)


2021-09-16 15:15:55,847	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [92.0, 92.0, 93.0, 90.0, 62.0, 75.0, 91.0, 89.0, 85.0, 83.0, 91.0, 91.0, 88.0, 91.0, 91.0, 89.0, 94.75, 74.0, 90.0, 92.0, 72.0, 92.0, 87.0, 94.75, 92.0, 86.0, 89.0, 87.0, 91.0, 81.0, 87.0, 95.5, 87.0, 91.0, 87.0, 92.0, 79.0, 89.0, 86.0, 92.0, 90.0, 91.0, 50.0, 91.0, 90.0, 91.0, 85.0, 85.0, 80.0, 76.0] (mean = 86.6)


2021-09-16 15:16:16,508	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [13.0, 14.0, 10.0, 18.0, 21.0, 15.0, 17.0, 21.0, 14.0, 18.0, 17.0, 14.0, 17.0, 13.0, 14.0, 15.0, 20.0, 10.0, 19.0, 14.0, 12.0, 15.0, 31.0, 15.0, 15.0, 18.0, 16.0, 15.0, 26.0, 17.0, 24.0, 11.0, 16.0, 18.0, 14.0, 16.0, 43.0, 14.0, 15.0, 16.0, 23.0, 12.0, 16.0, 27.0, 19.0, 21.0, 26.0, 16.0, 22.0, 14.0] (mean = 17.54)


In [5]:
run_name_0 = "simple_4_123"
run_name_1 = "simple_4_456"
config_name = "simple_4"
heterogeneous = True
max_checkpoint = 501
num_episodes = 50
random_start = False

config = load_config(config_name)
config["env_config"]["random_start"] = random_start
self_play_mean_reward_4_0 = []
self_play_mean_reward_4_1 = []
cross_play_mean_reward_4 = []
all_checkpoints = list(range(1, max_checkpoint + 1, 25))
for checkpoint in all_checkpoints:
    sp_0, sp_1, cp = cross_play(
        run_name_0, run_name_1, checkpoint, checkpoint, config,
        heterogeneous=heterogeneous, num_episodes=num_episodes
    )
    self_play_mean_reward_4_0.append(sum(sp_0) / num_episodes)
    self_play_mean_reward_4_1.append(sum(sp_1) / num_episodes)
    cross_play_mean_reward_4.append(sum(cp) / num_episodes)

2021-09-16 15:16:37,178	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [52.5, 59.5, 54.5, 54.5, 58.5, 66.5, 44.5, 51.5, 49.5, 62.5, 62.5, 64.5, 55.5, 56.5, 59.5, 69.5, 61.5, 51.5, 63.5, 61.5, 60.5, 46.5, 58.5, 63.5, 49.5, 68.5, 45.5, 45.5, 56.5, 63.5, 51.5, 61.5, 46.5, 53.5, 51.5, 46.5, 60.5, 57.5, 52.5, 69.5, 42.5, 54.5, 52.5, 54.5, 44.5, 44.5, 50.5, 61.5, 46.5, 63.5] (mean = 55.68)


2021-09-16 15:17:09,424	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [46.5, 57.5, 56.5, 59.5, 59.5, 64.5, 35.5, 55.5, 56.5, 46.5, 46.5, 58.5, 49.5, 57.5, 47.5, 61.5, 47.5, 69.5, 69.5, 54.5, 53.5, 60.5, 49.5, 49.5, 47.5, 62.5, 42.5, 69.5, 46.5, 54.5, 53.5, 58.5, 55.5, 40.5, 53.5, 51.5, 56.5, 51.5, 43.5, 51.5, 40.5, 50.5, 62.5, 63.5, 57.5, 63.5, 55.5, 56.5, 53.5, 54.5] (mean = 54.2)


2021-09-16 15:17:46,460	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [49.5, 48.5, 58.5, 54.5, 64.5, 56.5, 42.5, 49.5, 57.5, 49.5, 38.5, 42.5, 51.5, 44.5, 41.5, 56.5, 50.5, 57.5, 72.5, 48.5, 64.5, 63.5, 61.5, 49.5, 48.5, 66.5, 49.5, 40.5, 48.5, 51.5, 47.5, 49.5, 49.5, 47.5, 51.5, 56.5, 48.5, 54.5, 49.5, 37.5, 52.5, 56.5, 50.5, 50.5, 46.5, 45.5, 59.5, 61.5, 32.5, 61.5] (mean = 51.76)


2021-09-16 15:18:18,650	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [49.5, 45.5, 58.5, 53.5, 62.5, 56.5, 54.5, 62.5, 70.5, 62.5, 60.5, 61.5, 58.5, 65.5, 55.5, 68.5, 56.5, 62.5, 61.5, 65.5, 61.5, 61.5, 70.5, 57.5, 64.5, 52.5, 51.5, 67.5, 51.5, 52.5, 52.5, 61.5, 49.5, 71.5, 59.5, 51.5, 63.5, 57.5, 58.5, 55.5, 65.5, 72.5, 58.5, 58.5, 47.5, 61.5, 65.5, 43.5, 63.5, 63.5] (mean = 59.28)


2021-09-16 15:18:51,606	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [44.5, 58.5, 46.5, 55.5, 59.5, 69.5, 73.5, 62.5, 46.5, 67.5, 55.5, 65.5, 69.5, 53.5, 50.5, 54.5, 68.5, 45.5, 57.5, 60.5, 55.5, 61.5, 57.5, 67.5, 55.5, 54.5, 53.5, 49.5, 62.5, 54.5, 61.5, 69.5, 56.5, 60.5, 54.5, 59.5, 66.5, 41.5, 54.5, 58.5, 59.5, 51.5, 52.5, 57.5, 36.5, 55.5, 66.5, 51.5, 54.5, 65.5] (mean = 57.42)


2021-09-16 15:19:24,096	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [49.5, 33.5, 63.5, 70.5, 48.5, 59.5, 49.5, 48.5, 57.5, 66.5, 56.5, 56.5, 57.5, 62.5, 60.5, 52.5, 58.5, 61.5, 63.5, 60.5, 64.5, 53.5, 50.5, 71.5, 60.5, 53.5, 54.5, 60.5, 48.5, 44.5, 39.5, 40.5, 57.5, 68.5, 57.5, 55.5, 47.5, 49.5, 41.5, 49.5, 38.5, 55.5, 60.5, 59.5, 39.5, 62.5, 46.5, 57.5, 46.5, 70.5] (mean = 54.86)


2021-09-16 15:19:56,801	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [52.5, 49.5, 62.5, 66.5, 64.5, 61.5, 68.5, 72.5, 66.5, 63.5, 51.5, 63.5, 65.5, 56.5, 52.5, 56.5, 66.5, 56.5, 60.5, 69.5, 57.5, 55.5, 71.5, 62.5, 77.5, 53.5, 55.5, 62.5, 54.5, 69.5, 49.5, 62.5, 58.5, 63.5, 50.5, 43.5, 65.5, 43.5, 63.5, 57.5, 56.5, 54.5, 69.5, 44.5, 62.5, 51.5, 53.5, 47.5, 58.5, 67.5] (mean = 59.42)


2021-09-16 15:20:29,496	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [66.5, 63.5, 65.5, 65.5, 34.5, 71.5, 53.5, 63.5, 62.5, 56.5, 36.5, 55.5, 58.5, 42.5, 63.5, 55.5, 65.5, 46.5, 65.5, 58.5, 42.5, 66.5, 62.5, 58.5, 55.5, 59.5, 63.5, 58.5, 56.5, 58.5, 56.5, 75.5, 64.5, 60.5, 64.5, 66.5, 66.5, 58.5, 53.5, 58.5, 51.5, 58.5, 44.5, 51.5, 60.5, 62.5, 59.5, 47.5, 38.5, 61.5] (mean = 57.88)


2021-09-16 15:21:03,914	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [40.5, 49.5, 30.5, 37.5, 65.5, 41.5, 40.5, 47.5, 54.5, 59.5, 57.5, 32.5, 69.5, 68.5, 60.5, 36.5, 39.5, 74.5, 54.5, 55.5, 41.5, 53.5, 62.5, 44.5, 62.5, 39.5, 54.5, 49.5, 46.5, 68.5, 68.5, 57.5, 38.5, 29.5, 35.5, 42.5, 66.5, 47.5, 39.5, 47.5, 57.5, 53.5, 37.5, 47.5, 55.5, 51.5, 55.5, 53.5, 63.5, 48.5] (mean = 50.72)


2021-09-16 15:21:39,265	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [66.5, 53.5, 76.5, 70.5, 49.5, 64.5, 54.5, 73.5, 65.5, 52.5, 66.5, 59.5, 71.5, 67.5, 66.5, 79.5, 48.5, 66.5, 58.5, 56.5, 72.5, 58.5, 60.5, 58.5, 69.5, 50.5, 69.5, 48.5, 61.5, 57.5, 59.5, 57.5, 59.5, 62.5, 60.5, 48.5, 61.5, 61.5, 69.5, 65.5, 67.5, 69.5, 60.5, 66.5, 63.5, 65.5, 63.5, 64.5, 62.5, 55.5] (mean = 62.4)


2021-09-16 15:22:13,212	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [54.5, 55.5, 55.5, 60.5, 64.5, 62.5, 51.5, 64.5, 55.5, 58.5, 67.5, 72.5, 64.5, 57.5, 56.5, 57.5, 54.5, 48.5, 60.5, 59.5, 49.5, 69.5, 65.5, 60.5, 57.5, 54.5, 69.5, 71.5, 61.5, 69.5, 44.5, 53.5, 58.5, 60.5, 51.5, 55.5, 69.5, 73.5, 56.5, 75.5, 59.5, 55.5, 59.5, 64.5, 60.5, 70.5, 53.5, 73.5, 66.5, 62.5] (mean = 60.72)


2021-09-16 15:22:47,188	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [51.5, 59.5, 58.5, 60.5, 32.5, 69.5, 51.5, 41.5, 50.5, 80.5, 62.5, 59.5, 58.5, 42.5, 37.5, 61.5, 55.5, 31.5, 43.5, 45.5, 62.5, 49.5, 54.5, 53.5, 72.5, 52.5, 51.5, 45.5, 51.5, 50.5, 69.5, 41.5, 50.5, 58.5, 54.5, 74.5, 32.5, 71.5, 21.5, 57.5, 50.5, 67.5, 54.5, 51.5, 54.5, 59.5, 50.5, 52.5, 45.5, 49.5] (mean = 53.32)


2021-09-16 15:23:22,150	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [68.5, 70.5, 67.5, 61.5, 70.5, 55.5, 64.5, 68.5, 62.5, 68.5, 63.5, 63.5, 76.5, 75.5, 71.5, 72.5, 76.5, 65.5, 56.5, 61.5, 70.5, 68.5, 62.5, 68.5, 58.5, 70.5, 68.5, 64.5, 54.5, 64.5, 63.5, 69.5, 67.5, 59.5, 60.5, 64.5, 69.5, 63.5, 72.5, 81.5, 67.5, 52.5, 63.5, 43.5, 61.5, 63.5, 71.5, 57.5, 79.5, 76.5] (mean = 66.02)


2021-09-16 15:23:55,515	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [51.5, 56.5, 67.5, 53.5, 62.5, 61.5, 58.5, 60.5, 68.5, 61.5, 61.5, 65.5, 56.5, 66.5, 74.5, 50.5, 51.5, 69.5, 62.5, 56.5, 60.5, 50.5, 53.5, 37.5, 69.5, 56.5, 63.5, 57.5, 74.5, 68.5, 58.5, 61.5, 55.5, 56.5, 67.5, 56.5, 55.5, 72.5, 42.5, 45.5, 57.5, 61.5, 63.5, 62.5, 72.5, 70.5, 62.5, 47.5, 47.5, 72.5] (mean = 59.98)


2021-09-16 15:24:28,438	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [61.5, 57.5, 59.5, 67.5, 41.5, 63.5, 33.5, 54.5, 60.5, 50.5, 65.5, 63.5, 49.5, 43.5, 57.5, 65.5, 46.5, 46.5, 67.5, 46.5, 50.5, 61.5, 41.5, 60.5, 61.5, 39.5, 65.5, 58.5, 42.5, 64.5, 74.5, 63.5, 63.5, 48.5, 61.5, 49.5, 60.5, 41.5, 63.5, 73.5, 62.5, 41.5, 61.5, 67.5, 47.5, 69.5, 53.5, 63.5, 71.5, 41.5] (mean = 56.56)


2021-09-16 15:25:03,400	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [77.5, 73.5, 74.5, 77.5, 50.5, 71.5, 60.5, 67.5, 74.5, 69.5, 68.5, 74.5, 59.5, 56.5, 67.5, 51.5, 72.5, 74.5, 72.5, 58.5, 77.5, 75.5, 53.5, 65.5, 62.5, 64.5, 61.5, 65.5, 70.5, 76.5, 63.5, 72.5, 60.5, 73.5, 64.5, 69.5, 58.5, 64.5, 80.5, 66.5, 66.5, 69.5, 64.5, 76.5, 57.5, 77.5, 68.5, 63.5, 73.5, 66.5] (mean = 67.68)


2021-09-16 15:25:37,791	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [66.5, 51.5, 69.5, 55.5, 68.5, 44.5, 69.5, 59.5, 67.5, 56.5, 62.5, 62.5, 68.5, 72.5, 62.5, 66.5, 52.5, 62.5, 51.5, 63.5, 63.5, 69.5, 54.5, 78.5, 60.5, 71.5, 68.5, 52.5, 64.5, 63.5, 61.5, 60.5, 83.5, 60.5, 58.5, 58.5, 52.5, 77.5, 74.5, 71.5, 67.5, 63.5, 52.5, 59.5, 54.5, 71.5, 66.5, 60.5, 62.5, 62.5] (mean = 63.22)


2021-09-16 15:26:11,467	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [44.5, 46.5, 71.5, 74.5, 54.5, 48.5, 42.5, 76.5, 70.5, 53.5, 38.5, 52.5, 53.5, 69.5, 64.5, 44.5, 65.5, 70.5, 44.5, 54.5, 36.5, 48.5, 62.5, 58.5, 69.5, 62.5, 45.5, 60.5, 63.5, 62.5, 65.5, 62.5, 67.5, 56.5, 52.5, 46.5, 56.5, 49.5, 58.5, 44.5, 49.5, 62.5, 43.5, 60.5, 39.5, 54.5, 52.5, 38.5, 64.5, 35.5] (mean = 55.44)


2021-09-16 15:26:45,051	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [62.5, 70.5, 68.5, 70.5, 76.5, 64.5, 72.5, 72.5, 80.5, 64.5, 82.5, 67.5, 80.5, 70.5, 74.5, 66.5, 44.5, 69.5, 65.5, 71.5, 50.5, 61.5, 74.5, 66.5, 60.5, 59.5, 72.5, 79.5, 66.5, 72.5, 65.5, 64.5, 68.5, 73.5, 57.5, 68.5, 68.5, 55.5, 73.5, 74.5, 71.5, 76.5, 47.5, 70.5, 78.5, 63.5, 75.5, 69.5, 60.5, 69.5] (mean = 68.26)


2021-09-16 15:27:18,935	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [58.5, 57.5, 70.5, 74.5, 37.5, 72.5, 75.5, 77.5, 66.5, 68.5, 66.5, 62.5, 57.5, 61.5, 70.5, 64.5, 55.5, 52.5, 70.5, 66.5, 60.5, 70.5, 61.5, 65.5, 64.5, 59.5, 66.5, 68.5, 69.5, 77.5, 70.5, 62.5, 70.5, 56.5, 68.5, 67.5, 70.5, 57.5, 65.5, 82.5, 69.5, 64.5, 68.5, 78.5, 36.5, 67.5, 59.5, 57.5, 73.5, 64.5] (mean = 65.26)


2021-09-16 15:27:52,659	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [65.5, 66.5, 44.5, 66.5, 68.5, 66.5, 47.5, 68.5, 42.5, 69.5, 75.5, 64.5, 60.5, 41.5, 43.5, 49.5, 56.5, 67.5, 49.5, 68.5, 67.5, 69.5, 57.5, 59.5, 52.5, 55.5, 67.5, 38.5, 53.5, 49.5, 69.5, 57.5, 52.5, 21.5, 58.5, 62.5, 67.5, 71.5, 56.5, 60.5, 60.5, 66.5, 61.5, 48.5, 63.5, 42.5, 59.5, 59.5, 71.5, 63.5] (mean = 58.58)


2021-09-16 15:28:25,789	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [79.5, 80.5, 78.5, 77.5, 68.5, 59.5, 69.5, 71.5, 62.5, 62.5, 71.5, 73.5, 67.5, 75.5, 72.5, 73.5, 69.5, 73.5, 77.5, 65.5, 70.5, 68.5, 73.5, 66.5, 69.5, 71.5, 79.5, 70.5, 68.5, 71.5, 52.5, 70.5, 72.5, 72.5, 45.5, 67.5, 85.5, 75.5, 69.5, 79.5, 76.5, 72.5, 67.5, 80.5, 65.5, 64.5, 82.5, 68.5, 64.5, 66.5] (mean = 70.8)


2021-09-16 15:28:58,674	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [56.5, 54.5, 55.5, 79.5, 50.5, 65.5, 55.5, 69.5, 58.5, 48.5, 40.5, 74.5, 59.5, 58.5, 64.5, 61.5, 62.5, 70.5, 50.5, 67.5, 62.5, 77.5, 62.5, 77.5, 75.5, 67.5, 63.5, 69.5, 74.5, 69.5, 75.5, 59.5, 66.5, 66.5, 70.5, 52.5, 63.5, 76.5, 58.5, 69.5, 78.5, 53.5, 75.5, 73.5, 57.5, 54.5, 57.5, 65.5, 72.5, 55.5] (mean = 64.14)


2021-09-16 15:29:34,549	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [60.5, 45.5, 57.5, 70.5, 59.5, 70.5, 60.5, 71.5, 53.5, 72.5, 45.5, 37.5, 49.5, 54.5, 60.5, 37.5, 55.5, 51.5, 53.5, 66.5, 58.5, 60.5, 64.5, 46.5, 53.5, 58.5, 64.5, 72.5, 62.5, 59.5, 62.5, 74.5, 54.5, 61.5, 61.5, 68.5, 54.5, 51.5, 57.5, 66.5, 61.5, 53.5, 73.5, 60.5, 66.5, 54.5, 69.5, 65.5, 71.5, 73.5] (mean = 59.96)


2021-09-16 15:30:08,846	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [71.5, 83.5, 73.5, 70.5, 72.5, 74.5, 80.5, 74.5, 79.5, 78.5, 72.5, 73.5, 81.5, 64.5, 70.5, 80.5, 71.5, 71.5, 68.5, 80.5, 67.5, 72.5, 79.5, 80.5, 82.5, 78.5, 79.5, 77.5, 79.5, 77.5, 82.5, 62.5, 75.5, 74.5, 81.5, 66.5, 80.5, 81.5, 67.5, 47.5, 67.5, 59.5, 84.5, 72.5, 73.5, 78.5, 79.5, 69.5, 68.5, 74.5] (mean = 74.34)


2021-09-16 15:30:43,042	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [66.5, 60.5, 53.5, 71.5, 60.5, 77.5, 79.5, 61.5, 70.5, 68.5, 54.5, 62.5, 69.5, 60.5, 67.5, 71.5, 68.5, 65.5, 62.5, 69.5, 64.5, 70.5, 67.5, 73.5, 54.5, 70.5, 63.5, 47.5, 78.5, 62.5, 70.5, 54.5, 63.5, 73.5, 66.5, 50.5, 72.5, 54.5, 55.5, 60.5, 69.5, 71.5, 58.5, 42.5, 61.5, 63.5, 62.5, 57.5, 72.5, 60.5] (mean = 64.34)


2021-09-16 15:31:16,951	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [51.5, 65.5, 53.5, 36.5, 56.5, 36.5, 61.5, 73.5, 58.5, 69.5, 44.5, 61.5, 57.5, 49.5, 68.5, 73.5, 65.5, 40.5, 60.5, 65.5, 64.5, 63.5, 62.5, 56.5, 53.5, 52.5, 74.5, 65.5, 64.5, 44.5, 77.5, 52.5, 45.5, 61.5, 56.5, 59.5, 46.5, 67.5, 71.5, 53.5, 66.5, 50.5, 69.5, 43.5, 50.5, 45.5, 59.5, 74.5, 63.5, 60.5] (mean = 58.56)


2021-09-16 15:31:50,608	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [80.5, 84.5, 82.5, 86.5, 79.5, 75.5, 75.5, 62.5, 84.5, 77.5, 58.5, 85.5, 84.5, 71.5, 74.5, 79.5, 78.5, 82.5, 83.5, 72.5, 77.5, 78.5, 80.5, 71.5, 68.5, 81.5, 82.5, 78.5, 68.5, 81.5, 81.5, 87.5, 78.5, 69.5, 84.5, 81.5, 82.5, 83.5, 77.5, 86.5, 75.5, 85.5, 82.5, 81.5, 83.5, 81.5, 82.5, 85.5, 83.5, 77.5] (mean = 79.22)


2021-09-16 15:32:23,896	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [73.5, 77.5, 63.5, 67.5, 62.5, 69.5, 74.5, 49.5, 70.5, 74.5, 83.5, 79.5, 75.5, 83.5, 61.5, 74.5, 79.5, 71.5, 77.5, 82.5, 63.5, 55.5, 75.5, 72.5, 67.5, 68.5, 70.5, 57.5, 55.5, 66.5, 70.5, 67.5, 73.5, 69.5, 64.5, 65.5, 70.5, 74.5, 79.5, 70.5, 67.5, 61.5, 81.5, 74.5, 70.5, 36.5, 76.5, 73.5, 63.5, 61.5] (mean = 69.56)


2021-09-16 15:32:57,581	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [44.5, 50.5, 54.5, 60.5, 56.5, 41.5, 63.5, 69.5, 65.5, 50.5, 59.5, 59.5, 67.5, 50.5, 68.5, 58.5, 53.5, 42.5, 65.5, 56.5, 48.5, 48.5, 50.5, 46.5, 57.5, 59.5, 65.5, 61.5, 62.5, 37.5, 82.5, 68.5, 59.5, 75.5, 61.5, 63.5, 58.5, 64.5, 57.5, 24.5, 67.5, 56.5, 71.5, 67.5, 64.5, 68.5, 69.5, 66.5, 62.5, 72.5] (mean = 59.2)


2021-09-16 15:33:32,821	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [89.5, 85.5, 85.5, 83.5, 68.5, 81.5, 81.5, 81.5, 81.5, 83.5, 75.5, 85.5, 79.5, 68.5, 74.5, 87.5, 77.5, 83.5, 82.5, 68.5, 83.5, 87.5, 83.5, 84.5, 84.5, 86.5, 63.5, 74.5, 73.5, 82.5, 84.5, 83.5, 74.5, 77.5, 86.5, 83.5, 87.5, 83.5, 88.5, 84.5, 83.5, 83.5, 85.5, 86.5, 85.5, 74.5, 87.5, 85.5, 86.5, 70.5] (mean = 81.44)


2021-09-16 15:34:08,417	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [78.5, 67.5, 68.5, 76.5, 72.5, 70.5, 76.5, 77.5, 65.5, 57.5, 62.5, 74.5, 76.5, 63.5, 56.5, 53.5, 56.5, 64.5, 67.5, 79.5, 74.5, 69.5, 78.5, 73.5, 77.5, 60.5, 75.5, 55.5, 85.5, 75.5, 84.5, 58.5, 66.5, 71.5, 66.5, 73.5, 72.5, 75.5, 81.5, 54.5, 64.5, 82.5, 54.5, 67.5, 71.5, 73.5, 76.5, 74.5, 64.5, 66.5] (mean = 69.86)


2021-09-16 15:34:43,095	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [69.5, 72.5, 69.5, 65.5, 46.5, 57.5, 51.5, 76.5, 73.5, 70.5, 82.5, 33.5, 43.5, 58.5, 77.5, 68.5, 62.5, 33.5, 72.5, 66.5, 66.5, 57.5, 36.5, 58.5, 66.5, 64.5, 59.5, 49.5, 65.5, 58.5, 72.5, 52.5, 58.5, 44.5, 83.5, 75.5, 33.5, 56.5, 64.5, 62.5, 57.5, 77.5, 43.5, 62.5, 54.5, 63.5, 45.5, 45.5, 69.5, 60.5] (mean = 60.38)


2021-09-16 15:35:17,449	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [85.5, 80.5, 86.5, 84.5, 61.5, 83.5, 82.5, 85.5, 89.5, 89.5, 75.5, 84.5, 85.5, 82.5, 65.5, 88.5, 86.5, 87.5, 72.5, 77.5, 82.5, 86.5, 91.5, 84.5, 68.5, 86.5, 84.5, 85.5, 79.5, 90.5, 86.5, 85.5, 68.5, 87.5, 80.5, 86.5, 82.5, 85.5, 85.5, 85.5, 78.5, 83.5, 80.5, 84.5, 83.5, 82.5, 84.5, 81.5, 86.5, 79.5] (mean = 82.68)


2021-09-16 15:35:51,491	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [82.5, 64.5, 70.5, 71.5, 61.5, 69.5, 66.5, 64.5, 78.5, 70.5, 72.5, 71.5, 69.5, 70.5, 61.5, 80.5, 70.5, 70.5, 64.5, 65.5, 64.5, 78.5, 65.5, 69.5, 75.5, 78.5, 61.5, 74.5, 71.5, 73.5, 73.5, 67.5, 82.5, 81.5, 69.5, 63.5, 66.5, 77.5, 73.5, 76.5, 75.5, 80.5, 69.5, 66.5, 75.5, 78.5, 76.5, 77.5, 68.5, 71.5] (mean = 71.64)


2021-09-16 15:36:25,423	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [56.5, 44.5, 52.5, 74.5, 71.5, 56.5, 71.5, 57.5, 68.5, 61.5, 63.5, 52.5, 54.5, 71.5, 56.5, 48.5, 72.5, 47.5, 50.5, 64.5, 55.5, 40.5, 46.5, 59.5, 58.5, 70.5, 47.5, 66.5, 65.5, 63.5, 74.5, 61.5, 53.5, 65.5, 61.5, 58.5, 69.5, 74.5, 61.5, 63.5, 70.5, 75.5, 75.5, 47.5, 65.5, 66.5, 54.5, 67.5, 24.5, 66.5] (mean = 60.58)


2021-09-16 15:36:59,947	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [92.5, 90.5, 84.5, 91.5, 72.5, 69.5, 84.5, 70.5, 87.5, 88.5, 70.5, 89.5, 87.5, 82.5, 90.5, 77.5, 91.5, 89.5, 73.5, 70.5, 91.5, 88.5, 88.5, 86.5, 47.5, 86.5, 91.5, 78.5, 88.5, 84.5, 73.5, 85.5, 67.5, 86.5, 86.5, 88.5, 85.5, 90.5, 91.5, 86.5, 73.5, 90.5, 87.5, 79.5, 87.5, 88.5, 86.5, 90.5, 86.5, 67.5] (mean = 83.34)


2021-09-16 15:37:34,013	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [80.5, 66.5, 80.5, 67.5, 64.5, 61.5, 68.5, 75.5, 76.5, 75.5, 69.5, 70.5, 79.5, 82.5, 72.5, 68.5, 70.5, 68.5, 73.5, 70.5, 53.5, 66.5, 77.5, 73.5, 79.5, 72.5, 60.5, 80.5, 71.5, 79.5, 78.5, 72.5, 67.5, 78.5, 82.5, 61.5, 72.5, 72.5, 74.5, 74.5, 74.5, 68.5, 75.5, 75.5, 67.5, 81.5, 67.5, 76.5, 83.5, 73.5] (mean = 72.74)


2021-09-16 15:38:09,778	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [58.5, 69.5, 68.5, 54.5, 70.5, 59.5, 60.5, 59.5, 44.5, 59.5, 76.5, 61.5, 50.5, 65.5, 60.5, 54.5, 55.5, 71.5, 70.5, 54.5, 63.5, 71.5, 61.5, 68.5, 66.5, 50.5, 72.5, 67.5, 73.5, 62.5, 46.5, 52.5, 62.5, 71.5, 68.5, 49.5, 60.5, 69.5, 53.5, 68.5, 51.5, 52.5, 44.5, 69.5, 72.5, 61.5, 70.5, 84.5, 53.5, 50.5] (mean = 61.96)


2021-09-16 15:38:44,497	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [86.5, 84.5, 92.5, 91.5, 70.5, 79.5, 79.5, 88.5, 89.5, 89.5, 84.5, 79.5, 82.5, 82.5, 79.5, 85.5, 88.5, 88.5, 86.5, 84.5, 84.5, 93.5, 73.5, 91.5, 80.5, 86.5, 55.5, 84.5, 69.5, 56.5, 66.5, 85.5, 69.5, 93.5, 81.5, 88.5, 85.5, 86.5, 90.5, 83.5, 84.5, 84.5, 92.5, 89.5, 87.5, 93.5, 88.5, 84.5, 84.5, 61.5] (mean = 83.04)


2021-09-16 15:39:19,062	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [77.5, 79.5, 75.5, 84.5, 75.5, 72.5, 78.5, 80.5, 66.5, 74.5, 84.5, 82.5, 78.5, 71.5, 74.5, 91.5, 84.5, 77.5, 79.5, 81.5, 77.5, 76.5, 66.5, 77.5, 64.5, 82.5, 73.5, 71.5, 80.5, 73.5, 78.5, 73.5, 77.5, 76.5, 71.5, 78.5, 78.5, 73.5, 77.5, 81.5, 70.5, 78.5, 80.5, 78.5, 78.5, 78.5, 80.5, 76.5, 83.5, 73.5] (mean = 77.22)


2021-09-16 15:39:53,451	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [50.5, 68.5, 68.5, 81.5, 58.5, 53.5, 44.5, 76.5, 50.5, 62.5, 71.5, 75.5, 56.5, 73.5, 54.5, 43.5, 65.5, 57.5, 71.5, 58.5, 65.5, 73.5, 58.5, 60.5, 64.5, 63.5, 66.5, 71.5, 17.5, 68.5, 68.5, 78.5, 63.5, 64.5, 43.5, 64.5, 44.5, 61.5, 59.5, 73.5, 68.5, 63.5, 53.5, 54.5, 59.5, 56.5, 22.5, 71.5, 72.5, 74.5] (mean = 61.44)


2021-09-16 15:40:28,773	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [83.5, 88.5, 94.5, 87.5, 79.5, 85.5, 82.5, 84.5, 91.5, 71.5, 79.5, 87.5, 87.5, 83.5, 81.5, 80.5, 89.5, 91.5, 86.5, 74.5, 89.5, 87.5, 82.5, 87.5, 65.5, 89.5, 77.5, 82.5, 69.5, 80.5, 69.5, 82.5, 78.5, 89.5, 89.5, 88.5, 89.5, 88.5, 92.5, 84.5, 84.5, 86.5, 88.5, 84.5, 89.5, 79.5, 88.5, 88.5, 92.5, 80.5] (mean = 84.38)


2021-09-16 15:41:03,314	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [74.5, 79.5, 79.5, 77.5, 87.5, 77.5, 69.5, 73.5, 82.5, 85.5, 79.5, 77.5, 76.5, 82.5, 81.5, 78.5, 76.5, 66.5, 70.5, 77.5, 75.5, 83.5, 79.5, 84.5, 59.5, 81.5, 72.5, 67.5, 82.5, 89.5, 77.5, 75.5, 77.5, 86.5, 81.5, 71.5, 61.5, 81.5, 78.5, 78.5, 86.5, 86.5, 91.5, 71.5, 81.5, 73.5, 77.5, 70.5, 79.5, 77.5] (mean = 77.92)


2021-09-16 15:41:36,985	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [64.5, 67.5, 68.5, 75.5, 52.5, 63.5, 43.5, 55.5, 31.5, 49.5, 73.5, 46.5, 66.5, 51.5, 57.5, 55.5, 57.5, 41.5, 66.5, 40.5, 52.5, 52.5, 62.5, 72.5, 62.5, 76.5, 74.5, 41.5, 69.5, 31.5, 68.5, 63.5, 62.5, 47.5, 60.5, 75.5, 64.5, 63.5, 64.5, 72.5, 71.5, 71.5, 69.5, 71.5, 50.5, 65.5, 35.5, 44.5, 57.5, 44.5] (mean = 59.0)


2021-09-16 15:42:11,252	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [85.5, 85.5, 96.5, 86.5, 85.5, 83.5, 83.5, 80.5, 88.5, 94.5, 83.5, 89.5, 89.5, 77.5, 80.5, 87.5, 89.5, 91.5, 85.5, 89.5, 91.5, 94.5, 96.5, 90.5, 86.5, 96.5, 81.5, 77.5, 70.5, 84.5, 93.5, 91.5, 80.5, 86.5, 93.5, 89.5, 94.5, 90.5, 89.5, 90.5, 88.5, 91.5, 96.5, 80.5, 89.5, 89.5, 92.5, 87.5, 96.5, 90.5] (mean = 88.14)


2021-09-16 15:42:48,552	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [58.5, 76.5, 81.5, 80.5, 71.5, 80.5, 77.5, 76.5, 83.5, 85.5, 84.5, 66.5, 77.5, 81.5, 76.5, 72.5, 80.5, 76.5, 83.5, 73.5, 79.5, 69.5, 84.5, 71.5, 79.5, 76.5, 85.5, 71.5, 81.5, 85.5, 72.5, 73.5, 75.5, 84.5, 83.5, 81.5, 78.5, 67.5, 80.5, 83.5, 78.5, 74.5, 68.5, 79.5, 81.5, 85.5, 76.5, 82.5, 74.5, 83.5] (mean = 77.92)


2021-09-16 15:43:23,052	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [63.5, 78.5, 63.5, 71.5, 56.5, 52.5, 66.5, 62.5, 78.5, 67.5, 38.5, 81.5, 71.5, 69.5, 55.5, 62.5, 66.5, 70.5, 44.5, 39.5, 73.5, 65.5, 65.5, 58.5, 72.5, 75.5, 63.5, 46.5, 66.5, 75.5, 70.5, 64.5, 38.5, 71.5, 12.5, 49.5, 79.5, 77.5, 66.5, 69.5, 69.5, 68.5, 68.5, 65.5, 72.5, 69.5, 68.5, 69.5, 74.5, 51.5] (mean = 64.04)


2021-09-16 15:43:57,896	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [78.5, 95.5, 90.5, 91.5, 71.5, 85.5, 78.5, 90.5, 92.5, 92.5, 56.5, 87.5, 94.5, 83.5, 59.5, 88.5, 79.5, 91.5, 77.5, 75.5, 94.5, 89.5, 96.5, 94.5, 73.5, 91.5, 83.5, 74.5, 53.5, 84.5, 77.5, 93.5, 59.5, 90.5, 91.5, 96.5, 88.5, 96.5, 94.5, 80.5, 83.5, 94.5, 97.5, 77.5, 92.5, 79.5, 93.5, 94.5, 96.5, 79.5] (mean = 85.1)


2021-09-16 15:44:32,178	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [74.5, 83.5, 81.5, 83.5, 66.5, 81.5, 84.5, 77.5, 79.5, 83.5, 77.5, 78.5, 82.5, 83.5, 83.5, 82.5, 85.5, 74.5, 79.5, 85.5, 80.5, 81.5, 78.5, 81.5, 83.5, 82.5, 86.5, 81.5, 82.5, 75.5, 80.5, 89.5, 79.5, 85.5, 91.5, 81.5, 74.5, 84.5, 90.5, 86.5, 73.5, 78.5, 81.5, 79.5, 78.5, 78.5, 82.5, 85.5, 84.5, 79.5] (mean = 81.38)


2021-09-16 15:45:06,531	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [67.5, 60.5, 64.5, 60.5, 63.5, 57.5, 68.5, 48.5, 72.5, 63.5, 54.5, 53.5, 69.5, 64.5, 52.5, 63.5, 71.5, 45.5, 74.5, 67.5, 60.5, 70.5, 53.5, 63.5, 71.5, 35.5, 54.5, 60.5, 73.5, 61.5, 47.5, 54.5, 51.5, 47.5, 57.5, 53.5, 61.5, 65.5, 74.5, 59.5, 52.5, 76.5, 57.5, 51.5, 57.5, 34.5, 73.5, 72.5, 58.5, 52.5] (mean = 60.18)


2021-09-16 15:45:40,921	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [96.5, 92.5, 90.5, 96.5, 94.5, 77.5, 90.5, 88.5, 94.5, 95.5, 84.5, 86.5, 92.5, 91.5, 89.5, 80.5, 87.5, 92.5, 94.5, 94.5, 98.5, 76.5, 97.5, 92.5, 90.5, 95.5, 99.75, 81.5, 83.5, 94.5, 95.5, 93.5, 58.5, 91.5, 91.5, 92.5, 87.5, 94.5, 85.5, 93.5, 87.5, 91.5, 94.5, 88.5, 91.5, 91.5, 73.5, 91.5, 94.5, 77.5] (mean = 89.745)


2021-09-16 15:46:15,429	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [76.5, 85.5, 85.5, 85.5, 84.5, 74.5, 83.5, 80.5, 76.5, 89.5, 73.5, 82.5, 87.5, 77.5, 82.5, 64.5, 87.5, 80.5, 83.5, 80.5, 78.5, 86.5, 86.5, 87.5, 73.5, 83.5, 81.5, 76.5, 88.5, 66.5, 86.5, 87.5, 83.5, 85.5, 87.5, 80.5, 81.5, 88.5, 90.5, 81.5, 87.5, 82.5, 86.5, 81.5, 76.5, 86.5, 84.5, 88.5, 90.5, 82.5] (mean = 82.62)


2021-09-16 15:46:49,406	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [80.5, 78.5, 80.5, 72.5, 53.5, 53.5, 73.5, 47.5, 71.5, 71.5, 80.5, 67.5, 62.5, 52.5, 73.5, 77.5, 60.5, 67.5, 72.5, 66.5, 52.5, 76.5, 75.5, 62.5, 74.5, 48.5, 56.5, 42.5, 65.5, 74.5, 66.5, 58.5, 44.5, 67.5, 66.5, 43.5, 81.5, 66.5, 79.5, 60.5, 69.5, 76.5, 56.5, 77.5, 67.5, 73.5, 71.5, 54.5, 63.5, 61.5] (mean = 66.0)


2021-09-16 15:47:25,606	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [97.5, 93.5, 95.5, 95.5, 95.5, 85.5, 82.5, 89.5, 97.5, 98.5, 70.5, 92.5, 80.5, 88.5, 91.5, 96.5, 98.5, 98.5, 94.5, 93.5, 96.5, 96.5, 97.5, 97.5, 77.5, 89.5, 98.5, 81.5, 71.5, 77.5, 89.5, 98.5, 62.5, 85.5, 91.5, 97.5, 93.5, 97.5, 95.5, 87.5, 96.5, 96.5, 97.5, 94.5, 96.5, 81.5, 95.5, 96.5, 97.5, 96.5] (mean = 91.36)


2021-09-16 15:48:00,459	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [84.5, 85.5, 94.5, 82.5, 71.5, 78.5, 77.5, 85.5, 88.5, 84.5, 63.5, 88.5, 67.5, 87.5, 80.5, 80.5, 86.5, 88.5, 76.5, 86.5, 75.5, 87.5, 83.5, 87.5, 85.5, 84.5, 84.5, 88.5, 89.5, 79.5, 86.5, 82.5, 84.5, 81.5, 76.5, 83.5, 84.5, 86.5, 86.5, 77.5, 83.5, 85.5, 86.5, 89.5, 82.5, 77.5, 93.5, 82.5, 87.5, 85.5] (mean = 83.38)


2021-09-16 15:48:35,655	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [74.5, 54.5, 68.5, 69.5, 52.5, 61.5, 69.5, 80.5, 69.5, 67.5, 60.5, 70.5, 71.5, 45.5, 70.5, 67.5, 67.5, 59.5, 69.5, 40.5, 77.5, 67.5, 71.5, 69.5, 69.5, 56.5, 60.5, 65.5, 59.5, 72.5, 65.5, 66.5, 75.5, 67.5, 62.5, 64.5, 58.5, 57.5, 71.5, 81.5, 68.5, 71.5, 57.5, 56.5, 53.5, 71.5, 49.5, 54.5, 69.5, 52.5] (mean = 64.74)


2021-09-16 15:49:10,511	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [96.5, 93.5, 77.5, 98.5, 78.5, 84.5, 86.5, 58.5, 94.5, 96.5, 70.5, 94.5, 93.5, 102.0, 90.5, 96.5, 92.5, 64.5, 98.5, 88.5, 98.5, 98.5, 98.5, 101.5, 88.5, 102.5, 72.5, 85.5, 91.5, 89.5, 97.5, 89.5, 98.5, 97.5, 89.5, 97.5, 72.5, 101.5, 98.5, 77.5, 86.5, 90.5, 96.5, 96.5, 98.5, 98.5, 71.5, 98.5, 94.5, 94.5] (mean = 90.59)


2021-09-16 15:49:46,223	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [78.5, 87.5, 90.5, 89.5, 87.5, 78.5, 89.5, 85.5, 89.5, 93.5, 80.5, 83.5, 77.5, 88.5, 83.5, 81.5, 88.5, 90.5, 61.5, 91.5, 88.5, 86.5, 83.5, 92.5, 84.5, 83.5, 92.5, 86.5, 86.5, 88.5, 85.5, 91.5, 87.5, 81.5, 74.5, 86.5, 75.5, 86.5, 86.5, 92.5, 71.5, 91.5, 90.5, 85.5, 91.5, 90.5, 88.5, 90.5, 89.5, 86.5] (mean = 85.88)


2021-09-16 15:50:21,010	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [65.5, 43.5, 63.5, 68.5, 41.5, 58.5, 68.5, 65.5, 48.5, 53.5, 42.5, 76.5, 78.5, 61.5, 63.5, 74.5, 53.5, 70.5, 53.5, 70.5, 71.5, 60.5, 64.5, 65.5, 38.5, 49.5, 67.5, 66.5, 62.5, 59.5, 67.5, 55.5, 66.5, 58.5, 69.5, 55.5, 49.5, 54.5, 71.5, 39.5, 65.5, 68.5, 39.5, 39.5, 68.5, 57.5, 61.5, 68.5, 23.5, 54.5] (mean = 59.26)


2021-09-16 15:50:56,641	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [88.5, 93.5, 97.5, 99.75, 102.75, 103.75, 92.5, 92.5, 95.5, 88.5, 102.75, 94.5, 71.5, 96.5, 102.75, 104.25, 92.5, 96.5, 93.5, 94.5, 80.5, 101.0, 98.5, 70.5, 77.5, 81.5, 93.5, 79.5, 96.5, 82.5, 69.5, 95.5, 96.5, 94.5, 98.5, 100.0, 95.5, 93.5, 98.5, 84.5, 105.0, 104.75, 92.5, 91.5, 96.5, 70.5, 104.25, 98.5, 89.5, 101.0] (mean = 92.92)


2021-09-16 15:51:32,738	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [91.5, 84.5, 87.5, 87.5, 78.5, 78.5, 87.5, 80.5, 94.5, 92.5, 77.5, 72.5, 81.5, 82.5, 86.5, 78.5, 92.5, 88.5, 74.5, 73.5, 89.5, 84.5, 87.5, 89.5, 75.5, 84.5, 93.5, 91.5, 87.5, 78.5, 91.5, 89.5, 89.5, 87.5, 68.5, 90.5, 79.5, 89.5, 87.5, 84.5, 78.5, 92.5, 89.5, 80.5, 86.5, 89.5, 90.5, 89.5, 91.5, 81.5] (mean = 85.22)


2021-09-16 15:52:09,980	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [73.5, 58.5, 78.5, 70.5, 64.5, 38.5, 69.5, 63.5, 53.5, 42.5, 49.5, 74.5, 63.5, 65.5, 68.5, 72.5, 29.5, 69.5, 66.5, 61.5, 51.5, 73.5, 63.5, 51.5, 57.5, 78.5, 60.5, 69.5, 51.5, 72.5, 81.5, 58.5, 69.5, 71.5, 66.5, 70.5, 53.5, 67.5, 80.5, 62.5, 67.5, 70.5, 42.5, 57.5, 59.5, 43.5, 76.5, 56.5, 67.5, 64.5] (mean = 63.04)


In [None]:
run_name_0 = "simple_8_123"
run_name_1 = "simple_8_456"
config_name = "simple_8"
heterogeneous = True
max_checkpoint = 501
num_episodes = 50
random_start = False

config = load_config(config_name)
config["env_config"]["random_start"] = random_start
self_play_mean_reward_8_0 = []
self_play_mean_reward_8_1 = []
cross_play_mean_reward_8 = []
all_checkpoints = list(range(1, max_checkpoint + 1, 25))
for checkpoint in all_checkpoints:
    sp_0, sp_1, cp = cross_play(
        run_name_0, run_name_1, checkpoint, checkpoint, config,
        heterogeneous=heterogeneous, num_episodes=num_episodes
    )
    self_play_mean_reward_8_0.append(sum(sp_0) / num_episodes)
    self_play_mean_reward_8_1.append(sum(sp_1) / num_episodes)
    cross_play_mean_reward_8.append(sum(cp) / num_episodes)

2021-09-16 15:52:45,828	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [55.0, 66.0, 59.0, 48.0, 52.0, 60.0, 65.0, 76.0, 72.0, 59.0, 58.0, 61.0, 66.0, 65.0, 77.0, 57.0, 44.0, 77.0, 59.0, 49.0, 79.0, 63.0, 66.0, 53.0, 60.0, 64.0, 57.0, 60.0, 78.0, 64.0, 53.0, 58.0, 61.0, 45.0, 61.0, 73.0, 58.0, 57.0, 69.0, 55.0, 73.0, 58.0, 63.0, 53.0, 57.0, 50.0, 69.0, 59.0, 58.0, 71.0] (mean = 61.4)


2021-09-16 15:53:39,625	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [56.0, 68.0, 56.0, 59.0, 79.0, 64.0, 52.0, 67.0, 73.0, 68.0, 61.0, 72.0, 71.0, 62.0, 59.0, 65.0, 54.0, 69.0, 59.0, 58.0, 74.0, 66.0, 70.0, 38.0, 71.0, 66.0, 63.0, 61.0, 66.0, 72.0, 72.0, 70.0, 70.0, 57.0, 76.0, 51.0, 59.0, 61.0, 69.0, 64.0, 67.0, 67.0, 54.0, 64.0, 59.0, 55.0, 57.0, 65.0, 63.0, 66.0] (mean = 63.7)


2021-09-16 15:54:34,064	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [68.0, 61.0, 53.0, 56.0, 59.0, 56.0, 62.0, 58.0, 64.0, 59.0, 58.0, 54.0, 64.0, 56.0, 55.0, 57.0, 52.0, 61.0, 64.0, 51.0, 80.0, 72.0, 59.0, 56.0, 64.0, 63.0, 46.0, 55.0, 75.0, 54.0, 62.0, 54.0, 53.0, 64.0, 64.0, 55.0, 67.0, 58.0, 63.0, 63.0, 71.0, 71.0, 42.0, 61.0, 56.0, 53.0, 73.0, 58.0, 57.0, 65.0] (mean = 60.04)


2021-09-16 15:55:29,476	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
    loop.run_until_complete(agent.run())
  File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
    return future.result()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
    modules = self._load_modules()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
    c = cls(self)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/modules/reporter/reporter_agent.py", line 148, in __init__
    self._metrics_agent = MetricsAgent(dashboard_agent.metrics_export_port)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packa

[2m[33m(raylet)[0m Traceback (most recent call last):
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 338, in <module>
[2m[33m(raylet)[0m     raise e
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
[2m[33m(raylet)[0m     loop.run_until_complete(agent.run())
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
[2m[33m(raylet)[0m     return future.result()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
[2m[33m(raylet)[0m     modules = self._load_modules()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
[2m[33m(raylet)[0m     c = cls(self)
[2m[33m(raylet)[0m   

Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
    loop.run_until_complete(agent.run())
  File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
    return future.result()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
    modules = self._load_modules()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
    c = cls(self)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/modules/reporter/reporter_agent.py", line 148, in __init__
    self._metrics_agent = MetricsAgent(dashboard_agent.metrics_export_port)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/_private/metrics_agent.py", line 75, in __init__
    prometheus_exporter.new_stats_exporter(
  File "/home/a

[2m[33m(raylet)[0m Traceback (most recent call last):
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 338, in <module>
[2m[33m(raylet)[0m     raise e
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
[2m[33m(raylet)[0m     loop.run_until_complete(agent.run())
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
[2m[33m(raylet)[0m     return future.result()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
[2m[33m(raylet)[0m     modules = self._load_modules()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
[2m[33m(raylet)[0m     c = cls(self)
[2m[33m(raylet)[0m   

Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
    loop.run_until_complete(agent.run())
  File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
    return future.result()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
    modules = self._load_modules()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
    c = cls(self)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/modules/reporter/reporter_agent.py", line 148, in __init__
    self._metrics_agent = MetricsAgent(dashboard_agent.metrics_export_port)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/_private/metrics_agent.py", line 75, in __init__
    prometheus_exporter.new_stats_exporter(
  File "/home/a

[2m[33m(raylet)[0m Traceback (most recent call last):
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 338, in <module>
[2m[33m(raylet)[0m     raise e
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
[2m[33m(raylet)[0m     loop.run_until_complete(agent.run())
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
[2m[33m(raylet)[0m     return future.result()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
[2m[33m(raylet)[0m     modules = self._load_modules()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
[2m[33m(raylet)[0m     c = cls(self)
[2m[33m(raylet)[0m   

Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
    loop.run_until_complete(agent.run())
  File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
    return future.result()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
    modules = self._load_modules()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
    c = cls(self)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/modules/reporter/reporter_agent.py", line 148, in __init__
    self._metrics_agent = MetricsAgent(dashboard_agent.metrics_export_port)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/_private/metrics_agent.py", line 75, in __init__
    prometheus_exporter.new_stats_exporter(
  File "/home/a

[2m[33m(raylet)[0m Traceback (most recent call last):
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 338, in <module>
[2m[33m(raylet)[0m     raise e
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
[2m[33m(raylet)[0m     loop.run_until_complete(agent.run())
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
[2m[33m(raylet)[0m     return future.result()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
[2m[33m(raylet)[0m     modules = self._load_modules()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
[2m[33m(raylet)[0m     c = cls(self)
[2m[33m(raylet)[0m   

Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
    loop.run_until_complete(agent.run())
  File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
    return future.result()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
    modules = self._load_modules()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
    c = cls(self)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/modules/reporter/reporter_agent.py", line 148, in __init__
    self._metrics_agent = MetricsAgent(dashboard_agent.metrics_export_port)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/_private/metrics_agent.py", line 75, in __init__
    prometheus_exporter.new_stats_exporter(
  File "/home/a

[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 338, in <module>
[2m[33m(raylet)[0m     raise e
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
[2m[33m(raylet)[0m     loop.run_until_complete(agent.run())
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
[2m[33m(raylet)[0m     return future.result()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
[2m[33m(raylet)[0m     modules = self._load_modules()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
[2m[33m(raylet)[0m     c = cls(self)
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-pack

Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
    loop.run_until_complete(agent.run())
  File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
    return future.result()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
    modules = self._load_modules()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
    c = cls(self)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/modules/reporter/reporter_agent.py", line 148, in __init__
    self._metrics_agent = MetricsAgent(dashboard_agent.metrics_export_port)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/_private/metrics_agent.py", line 75, in __init__
    prometheus_exporter.new_stats_exporter(
  File "/home/a

<IPython.core.display.Javascript object>

Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
    loop.run_until_complete(agent.run())
  File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
    return future.result()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
    modules = self._load_modules()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
    c = cls(self)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/modules/reporter/reporter_agent.py", line 148, in __init__
    self._metrics_agent = MetricsAgent(dashboard_agent.metrics_export_port)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/_private/metrics_agent.py", line 75, in __init__
    prometheus_exporter.new_stats_exporter(
  File "/home/a

[2m[33m(raylet)[0m Traceback (most recent call last):
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 338, in <module>
[2m[33m(raylet)[0m     raise e
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
[2m[33m(raylet)[0m     loop.run_until_complete(agent.run())
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
[2m[33m(raylet)[0m     return future.result()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
[2m[33m(raylet)[0m     modules = self._load_modules()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
[2m[33m(raylet)[0m     c = cls(self)
[2m[33m(raylet)[0m   

Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
    loop.run_until_complete(agent.run())
  File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
    return future.result()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
    modules = self._load_modules()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
    c = cls(self)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/modules/reporter/reporter_agent.py", line 148, in __init__
    self._metrics_agent = MetricsAgent(dashboard_agent.metrics_export_port)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/_private/metrics_agent.py", line 75, in __init__
    prometheus_exporter.new_stats_exporter(
  File "/home/a

Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
    loop.run_until_complete(agent.run())
  File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
    return future.result()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
    modules = self._load_modules()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
    c = cls(self)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/modules/reporter/reporter_agent.py", line 148, in __init__
    self._metrics_agent = MetricsAgent(dashboard_agent.metrics_export_port)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/_private/metrics_agent.py", line 75, in __init__
    prometheus_exporter.new_stats_exporter(
  File "/home/a

Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
    loop.run_until_complete(agent.run())
  File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
    return future.result()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
    modules = self._load_modules()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
    c = cls(self)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/modules/reporter/reporter_agent.py", line 148, in __init__
    self._metrics_agent = MetricsAgent(dashboard_agent.metrics_export_port)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/_private/metrics_agent.py", line 75, in __init__
    prometheus_exporter.new_stats_exporter(
  File "/home/a

[2m[33m(raylet)[0m Traceback (most recent call last):
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 338, in <module>
[2m[33m(raylet)[0m     raise e
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
[2m[33m(raylet)[0m     loop.run_until_complete(agent.run())
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
[2m[33m(raylet)[0m     return future.result()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
[2m[33m(raylet)[0m     modules = self._load_modules()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
[2m[33m(raylet)[0m     c = cls(self)
[2m[33m(raylet)[0m   

Traceback (most recent call last):
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
    loop.run_until_complete(agent.run())
  File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
    return future.result()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
    modules = self._load_modules()
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
    c = cls(self)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/modules/reporter/reporter_agent.py", line 148, in __init__
    self._metrics_agent = MetricsAgent(dashboard_agent.metrics_export_port)
  File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/_private/metrics_agent.py", line 75, in __init__
    prometheus_exporter.new_stats_exporter(
  File "/home/a

[2m[33m(raylet)[0m Traceback (most recent call last):
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 338, in <module>
[2m[33m(raylet)[0m     raise e
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 326, in <module>
[2m[33m(raylet)[0m     loop.run_until_complete(agent.run())
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/asyncio/base_events.py", line 642, in run_until_complete
[2m[33m(raylet)[0m     return future.result()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 138, in run
[2m[33m(raylet)[0m     modules = self._load_modules()
[2m[33m(raylet)[0m   File "/home/anchorwatt/miniconda3/lib/python3.9/site-packages/ray/new_dashboard/agent.py", line 92, in _load_modules
[2m[33m(raylet)[0m     c = cls(self)
[2m[33m(raylet)[0m   

episode rewards: [68.0, 86.0, 76.0, 76.0, 72.0, 56.0, 73.0, 65.0, 82.0, 73.0, 74.0, 70.0, 61.0, 63.0, 73.0, 78.0, 68.0, 64.0, 70.0, 78.0, 64.0, 81.0, 81.0, 65.0, 52.0, 78.0, 65.0, 79.0, 82.0, 67.0, 78.0, 78.0, 73.0, 83.0, 68.0, 63.0, 60.0, 71.0, 83.0, 78.0, 85.0, 72.0, 76.0, 80.0, 81.0, 67.0, 69.0, 75.0, 63.0, 74.0] (mean = 72.34)


2021-09-16 15:56:28,443	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [75.0, 79.0, 79.0, 79.0, 64.0, 81.0, 72.0, 81.0, 75.0, 80.0, 79.0, 69.0, 77.0, 78.0, 84.0, 85.0, 66.0, 78.0, 84.0, 79.0, 71.0, 78.0, 69.0, 79.0, 72.0, 75.0, 76.0, 75.0, 69.0, 74.0, 70.0, 78.0, 65.0, 74.0, 74.0, 75.0, 86.0, 68.0, 75.0, 70.0, 82.0, 73.0, 78.0, 80.0, 80.0, 74.0, 69.0, 73.0, 69.0, 66.0] (mean = 75.22)


2021-09-16 15:57:26,543	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


<IPython.core.display.Javascript object>

episode rewards: [72.0, 79.0, 75.0, 72.0, 79.0, 74.0, 74.0, 82.0, 75.0, 77.0, 61.0, 75.0, 72.0, 70.0, 68.0, 69.0, 73.0, 71.0, 64.0, 71.0, 74.0, 71.0, 69.0, 78.0, 70.0, 66.0, 81.0, 68.0, 63.0, 81.0, 71.0, 69.0, 82.0, 78.0, 79.0, 86.0, 76.0, 63.0, 81.0, 63.0, 69.0, 73.0, 64.0, 78.0, 63.0, 69.0, 78.0, 64.0, 64.0, 74.0] (mean = 72.36)


2021-09-16 15:58:23,863	INFO services.py:1272 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


In [None]:
%matplotlib inline
all_checkpoints = list(range(1, 502, 25))
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 3))
ax1.set_title("2 agents")
ax1.set_ylim(0, 100)
ax1.plot(all_checkpoints, self_play_mean_reward_2_0, label="self-play (seed 123)")
ax1.plot(all_checkpoints, self_play_mean_reward_2_1, label="self-play (seed 456)")
ax1.plot(all_checkpoints, cross_play_mean_reward_2, label="cross-play")
ax1.set_xlabel("Episode")
ax1.set_ylabel("Reward")

ax2.set_title("4 agents")
ax2.set_ylim(0, 100)
ax2.plot(all_checkpoints, self_play_mean_reward_4_0, label="self-play (seed 123)")
ax2.plot(all_checkpoints, self_play_mean_reward_4_1, label="self-play (seed 456)")
ax2.plot(all_checkpoints, cross_play_mean_reward_4, label="cross-play")
ax2.set_xlabel("Episode")

ax3.set_title("8 agents")
ax3.set_ylim(0, 100)
ax3.plot(all_checkpoints, self_play_mean_reward_8_0, label="self-play (seed 123)")
ax3.plot(all_checkpoints, self_play_mean_reward_8_1, label="self-play (seed 456)")
ax3.plot(all_checkpoints, cross_play_mean_reward_8, label="cross-play")
ax3.set_xlabel("Episode")
ax3.legend(bbox_to_anchor=(1, 1), loc="upper left")

In [None]:
run_name_0 = "hom123"
run_name_1 = "hom456"
config_name = "simple_2"
heterogeneous = False
max_checkpoint = 501
num_episodes = 50
random_start = False

config = load_config(config_name)
config["env_config"]["random_start"] = random_start
self_play_mean_reward_hom_0 = []
self_play_mean_reward_hom_1 = []
cross_play_mean_reward_hom = []
all_checkpoints = list(range(1, max_checkpoint + 1, 25))
for checkpoint in all_checkpoints:
    sp_0, sp_1, cp = cross_play(
        run_name_0, run_name_1, checkpoint, checkpoint, config,
        heterogeneous=heterogeneous, num_episodes=num_episodes
    )
    self_play_mean_reward_hom_0.append(sum(sp_0) / num_episodes)
    self_play_mean_reward_hom_1.append(sum(sp_1) / num_episodes)
    cross_play_mean_reward_hom.append(sum(cp) / num_episodes)

In [None]:
run_name_0 = "simple_4_random_123"
run_name_1 = "simple_4_random_456"
config_name = "simple_4"
heterogeneous = True
max_checkpoint = 501
num_episodes = 50
random_start = True

config = load_config(config_name)
config["env_config"]["random_start"] = random_start
self_play_mean_reward_random_0 = []
self_play_mean_reward_random_1 = []
cross_play_mean_reward_random = []
all_checkpoints = list(range(1, max_checkpoint + 1, 25))
for checkpoint in all_checkpoints:
    sp_0, sp_1, cp = cross_play(
        run_name_0, run_name_1, checkpoint, checkpoint, config,
        heterogeneous=heterogeneous, num_episodes=num_episodes
    )
    self_play_mean_reward_random_0.append(sum(sp_0) / num_episodes)
    self_play_mean_reward_random_1.append(sum(sp_1) / num_episodes)
    cross_play_mean_reward_random.append(sum(cp) / num_episodes)

In [None]:
%matplotlib inline
all_checkpoints = list(range(1, 502, 25))
plt.ylim(0, 100)
plt.plot(all_checkpoints, self_play_mean_reward_hom_0, label="self-play (seed 123)")
plt.plot(all_checkpoints, self_play_mean_reward_hom_1, label="self-play (seed 456)")
plt.plot(all_checkpoints, cross_play_mean_reward_hom, label="cross-play")
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.title("2 agents")
# plt.legend(bbox_to_anchor=(1,1), loc="upper left")

In [None]:
plt.ylim(0, 100)
plt.plot(all_checkpoints, self_play_mean_reward_random_0, label="self-play (seed 123)")
plt.plot(all_checkpoints, self_play_mean_reward_random_1, label="self-play (seed 456)")
plt.plot(all_checkpoints, cross_play_mean_reward_random, label="cross-play")
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.title("4 agents")
# plt.legend(bbox_to_anchor=(1,1), loc="upper left")

In [None]:
run_names = ["simple_4_123", "simple_4_456", "simple_4_456", "simple_4_123"]
checkpoints = [301, 301, 301, 301]
config = load_config("simple_4")
cross_play, ani = run_evaluation(
    run_names,
    checkpoints,
    config, 
    policy_name="ppo", 
    seed=1,
    heterogeneous=True,
    num_episodes=1,
    record=True
)