## PZ seed test

In [1]:
from pettingzoo.test import seed_test, parallel_seed_test
from pettingzoo.mpe import simple_tag_v3

seed = 1000
env_fn = simple_tag_v3.parallel_env
parallel_seed_test(env_fn, num_cycles=10000)

In [2]:
from algatross.agents.on_policy.ppo import TorchPPOAgent

env = env_fn()
agent = TorchPPOAgent(
    obs_space=env.observation_space("agent_0"),
    act_space=env.action_space("agent_0"),
    seed=seed
)

In [3]:
next_obs, _info = env.reset(seed=0)

In [4]:
import torch
import numpy as np

actions: dict[str, torch.Tensor | np.ndarray] = {}
logits: dict[str, torch.Tensor | np.ndarray] = {}
values: dict[str, torch.Tensor | np.ndarray] = {}
logp: dict[str, torch.Tensor | np.ndarray] = {}

agent_id = "agent_0"

with torch.no_grad():
    obs = next_obs
    torch_obs = torch.from_numpy(obs["agent_0"]).unsqueeze(0)
    ag_logits = agent.actor(torch_obs)
    ag_actions, ag_values, dist = agent.get_action_and_value(torch_obs, logits=ag_logits)
    ag_logp = dist.log_prob(ag_actions)
    actions[agent_id] = ag_actions.numpy()[0]
    logits[agent_id] = ag_logits.numpy()
    values[agent_id] = ag_values.numpy()
    logp[agent_id] = ag_logp.numpy()

In [19]:
obs["agent_0"]

array([ 0.        ,  0.        , -0.6734749 , -0.00259911,  0.51732236,
       -0.7848932 ,  1.2760446 ,  0.49880967,  1.6354212 , -0.87701476,
        0.63363993, -0.29134926,  1.4245683 , -0.89229226], dtype=float32)

## config -> rollout with runner from scratch

In [1]:
import ray
import os
import numpy as np

from algatross.utils.parsers.yaml_loader import load_config
os.chdir("/home/wgar/mo-marl/")
seed = 1000

ray.shutdown()

In [2]:
config_file = "/home/wgar/mo-marl/config/simple_tag/test_algatross.yml"  # noqa: PLR2004
print(f"Using configuration: {config_file}")

config = load_config(config_file)
# grab one of the islands
island_config = config['islands'][0]
# get the UDP constructor and data
udp_constructor = island_config['problem_constructor'].config

Using configuration: /home/wgar/mo-marl/config/simple_tag/test_algatross.yml


In [3]:
from supersuit.multiagent_wrappers import pad_action_space_v0, pad_observations_v0

seed = 1000
raw_env = udp_constructor['env_constructor_data'].construct()
print(raw_env)
env = pad_observations_v0(pad_action_space_v0(raw_env))
env.reset(1000)


simple_tag_v3


({'adversary_0': array([ 0.        ,  0.        ,  0.04277148,  0.2076837 ,  0.5068985 ,
         -0.6606129 , -0.60102814,  0.66350836, -0.10088788, -0.8011878 ,
          0.01474658, -0.8256111 , -0.47968027,  0.2996794 ,  0.06057208,
          0.51976043,  0.        ,  0.        ,  0.        ,  0.        ],
        dtype=float32),
  'adversary_1': array([ 0.        ,  0.        , -0.05811641, -0.59350413,  0.6077864 ,
          0.1405749 , -0.50014025,  1.4646962 ,  0.10088788,  0.8011878 ,
          0.11563446, -0.02442333, -0.3787924 ,  1.1008673 ,  0.16145995,
          1.3209482 ,  0.        ,  0.        ,  0.        ,  0.        ],
        dtype=float32),
  'adversary_2': array([ 0.        ,  0.        ,  0.05751805, -0.61792743,  0.49215195,
          0.16499823, -0.6157747 ,  1.4891195 , -0.01474658,  0.8256111 ,
         -0.11563446,  0.02442333, -0.49442685,  1.1252905 ,  0.0458255 ,
          1.3453716 ,  0.        ,  0.        ,  0.        ,  0.        ],
        dtype=fl

In [4]:
## create a env runner
print(udp_constructor['cleanrl_trainer_constructor_data'].constructor)
print(udp_constructor['cleanrl_trainer_constructor_data'].config)

runner = udp_constructor['cleanrl_trainer_constructor_data'].constructor(
    seed=seed,
    env=env,
    n_envs=1,
    train_config=udp_constructor['cleanrl_trainer_constructor_data'].config,
    evaluate_config=udp_constructor['cleanrl_evaluator_constructor_data'].config
)

<class 'algatross.environments.mpe.simple_tag.MPESimpleTagRunner'>
{'rollout_kwargs': {'batch_size': 300, 'gamma': 0.85, 'gae_lambda': 0.85}, 'train_kwargs': {'device': 'cpu', 'sgd_minibatch_size': 30, 'num_sgd_iter': 20}}
Seeding runner with 1000!


In [5]:
from algatross.utils.types import AgentID, ConstructorData, MOAIMIslandInhabitant, OptimizationTypeEnum, TeammateID, IslandID, TeamID
from typing import Callable
import functools

## UDP fundamentals
print(udp_constructor['cleanrl_agent_constructor_data'].constructor)
print(udp_constructor['cleanrl_agent_constructor_data'].config)

fitness_metric_keys = udp_constructor['fitness_metric_keys']
fitness_metric_optimization_type = udp_constructor['fitness_metric_optimization_type']
fitness_multiplier = udp_constructor['fitness_multiplier']
fitness_reduce_fn = udp_constructor['fitness_reduce_fn']

if isinstance(fitness_metric_optimization_type, str):
    fitness_opt_enum = [OptimizationTypeEnum(fitness_metric_optimization_type.lower())] * len(fitness_metric_keys)
else:
    # use zip(..., strict=True) to ensure equal length iterables
    fitness_opt_enum = [
        OptimizationTypeEnum(fo_type.lower())
        for _, fo_type in zip(fitness_metric_keys, fitness_metric_optimization_type, strict=True)
    ]

_fitness_metric_opt_enum = fitness_opt_enum
_fitness_sign = np.array([1 if opt_type == OptimizationTypeEnum.MAX else -1 for opt_type in fitness_opt_enum])
_fitness_reduce_fn: Callable = (
    functools.partial(  # type: ignore[assignment]
        functools.partial(lambda x, y, *x_args, **x_kwargs: getattr(y, x)(*x_args, **x_kwargs), fitness_reduce_fn),
        axis=-1,
    )
    if isinstance(fitness_reduce_fn, str)
    else fitness_reduce_fn
)

if fitness_multiplier is None:
    fitness_multiplier = np.array([1.0] * len(fitness_metric_keys), dtype=np.float32)

fitness_multiplier = np.array([0.0 if fm is None else fm for fm in fitness_multiplier], dtype=np.float32)
fitness_multiplier = fitness_multiplier * _fitness_sign


### agents and team composition
agent_ids = env.possible_agents
training_agents = udp_constructor['training_agents']

try:
    ally_agents = set()
    ally_teams = udp_constructor['ally_teams']
except KeyError:
    ally_teams = {"allies_0": list(training_agents)}


try:
    opponent_agents = set()
    opponent_teams = udp_constructor['opponent_teams']
except KeyError:
    opponent_teams = {}


try:
    neutral_agents = set()
    neutral_teams = udp_constructor['neutral_teams']
except KeyError:
    neutral_teams = {}


for team in ally_teams.values():
    ally_agents.update(set(team))
for team in opponent_teams.values():
    opponent_agents.update(set(team))
for team in neutral_teams.values():
    neutral_agents.update(set(team))


## UDP.init_agents()
_agent_map = {
    agent_id: udp_constructor['cleanrl_agent_constructor_data'].constructor(
        agent_id=agent_id,
        obs_space=env.observation_space(agent_id),
        act_space=env.action_space(agent_id),
        **udp_constructor['cleanrl_agent_constructor_data'].config
    ) for agent_id in agent_ids
}
_solution_dim = {
    agent_id: np.prod(agent.flat_parameters.shape) for agent_id, agent in _agent_map.items() if agent_id in training_agents
}

<class 'algatross.agents.on_policy.ppo.TorchPPOAgent'>
{'critic_outs': 1, 'shared_encoder': False, 'free_log_std': True, 'entropy_coeff': 0.0, 'kl_target': 0.2, 'kl_coeff': 0.2, 'vf_coeff': 1.0, 'seed': 1000, 'logp_clip_param': 0.2, 'vf_clip_param': None, 'optimizer_class': <class 'torch.optim.adam.Adam'>, 'optimizer_kwargs': {'lr': 0.0003}}


In [6]:
ally_agents

{'agent_0', 'agent_1'}

In [7]:
### population and pyribs archive create
###
import dataclasses
from algatross.algorithms.genetic.mo_aim.population import MOAIMIslandPopulationConfig, EmitterBase
from algatross.utils.types import AgentID, ConstructorData, MOAIMIslandInhabitant, OptimizationTypeEnum, TeammateID, IslandID, TeamID
from algatross.utils.random import get_generators
from contextlib import suppress
from typing import Sequence
from itertools import chain, cycle

island_id: IslandID = 0
isl_pop_config = MOAIMIslandPopulationConfig(**island_config["population_constructor"].config)

_isl_pop_numpy_generator, _ = get_generators(seed=seed)

## MOAIMIslandPopulation setup()
pop_solution_dim = next(iter((_solution_dim if isl_pop_config.solution_dim is None else isl_pop_config.solution_dim).values()))

# copy instead of pass by reference because jupyter
archive_config = (
    isl_pop_config.archive_config.copy() if isinstance(isl_pop_config.archive_config, dict) else dataclasses.asdict(isl_pop_config.archive_config).copy()
)
extra_fields = archive_config.get("extra_fields", {})
extra_fields["trajectory"] = ((), np.object_)
archive_config["extra_fields"] = extra_fields
archive_config["solution_dim"] = pop_solution_dim
archive_config["seed"] = seed

result_archive = None

###### emitter
emitter_config = (
    isl_pop_config.emitter_config if isinstance(isl_pop_config.emitter_config, dict) else dataclasses.asdict(isl_pop_config.emitter_config)
)
emitter_config["seed"] = seed
if emitter_config.get("initial_solutions") is None:
    emitter_config["initial_solutions"] = _isl_pop_numpy_generator.normal(
        0,
        1,
        (emitter_config["batch_size"], next(iter(_solution_dim.values()))),
    )


##### random emitter
random_emitter_config = (
    isl_pop_config.random_emitter_config
    if isinstance(isl_pop_config.random_emitter_config, dict)
    else dataclasses.asdict(isl_pop_config.random_emitter_config)
)
random_emitter_config["seed"] = seed
if random_emitter_config.get("initial_solutions") is None:
    random_emitter_config["initial_solutions"] = _isl_pop_numpy_generator.normal(
        0,
        1,
        (random_emitter_config["batch_size"], next(iter(_solution_dim.values()))),
    )

if isl_pop_config.use_result_archive:
    result_archive_config = (
        isl_pop_config.result_archive_config
        if isinstance(isl_pop_config.result_archive_config, dict)
        else dataclasses.asdict(isl_pop_config.result_archive_config)
    )
    extra_fields = result_archive_config.get("extra_fields", {})
    extra_fields["trajectory"] = ((), np.object_)
    result_archive_config["extra_fields"] = extra_fields
    result_archive_config["solution_dim"] = pop_solution_dim
    result_archive_config["seed"] = seed

# default to 1 sample per migrant if set to 0
isl_pop_config.max_samples_per_migrant = isl_pop_config.max_samples_per_migrant or 1

## setup_qd()
# create the archive and possibly result archive
print("Create archive with the following parameter")
print(archive_config)
archive = isl_pop_config.archive_base_class(**archive_config)

if isl_pop_config.use_result_archive:
    result_archive_config = (
        isl_pop_config.result_archive_config
        if isinstance(isl_pop_config.result_archive_config, dict)
        else dataclasses.asdict(isl_pop_config.result_archive_config)
    )
    result_archive = isl_pop_config.result_archive_base_class(**result_archive_config)

# create the emitters
# TODO: currently always length one
emitters = [
    isl_pop_config.emitter_base_class(archive, **emitter_config),
]
random_emitter = isl_pop_config.random_emitter_base_class(
    archive,
    **random_emitter_config,
)

# create the scheduler
scheduler = isl_pop_config.scheduler_base_class(
    archive,
    emitters,
    result_archive=result_archive,
    **isl_pop_config.scheduler_config,
)

def random_emitter_generator():
    while True:
        yield _isl_pop_numpy_generator.choice(emitters)

def get_random_teammates(n_teammates: int = 0) -> list[np.ndarray]:
    return [random_emitter.ask() for _ in range(n_teammates)]

def build_batch(
    emitter: EmitterBase,
    batch_size: int | None,
    team_id: int = 0,
    do_batched: bool = False,
    teammates: Sequence[np.ndarray] | None = None,
    names: Sequence[AgentID] | None = None,
) -> dict[TeamID, dict[AgentID, MOAIMIslandInhabitant]]:
    teams = {}
    name = iter(names or [f"agent_{idx}" for idx in range(1 + len(teammates or []))])
    genomes = emitter.ask()
    for genome in genomes:
        ind = MOAIMIslandInhabitant(
            name=next(name),
            team_id=team_id,
            inhabitant_id=0,
            genome=genome,
            island_id=island_id,
            current_island_id=island_id,
            conspecific_utility_dict=dict(zip(fitness_metric_keys, fitness_multiplier, strict=True)),
        )
        team = {ind.name: ind}
        if teammates is not None:
            team.update(
                {
                    ally.name: ally
                    for ally in [
                        MOAIMIslandInhabitant(
                            name=next(name),
                            team_id=team_id,
                            inhabitant_id=tm_id + 1,
                            genome=t,
                            island_id=island_id,
                            current_island_id=island_id,
                            conspecific_utility_dict=dict(
                                zip(fitness_metric_keys, fitness_multiplier, strict=True),
                            ),
                        )
                        for tm_id, t in enumerate(teammates)
                    ]
                },
            )
        teams[team_id] = team
        name = iter(names or [f"agent_{idx}" for idx in range(1 + len(teammates or []))])
        team_id += 1
        if do_batched and team_id >= batch_size:
            break
    return teams


def get_teams_for_training(
        batch_size: int | None = None,
        randomized: bool = True
    ):
    teams: dict[TeamID, list] = {}
    do_batched = batch_size is not None
    n_emitters = len(emitters)
    names = sorted(ally_agents)
    if randomized:
        batch_size = n_emitters
        emitter = random_emitter_generator()
    elif do_batched:
        emitter = cycle(emitters)  # type: ignore[assignment]
    else:
        emitter = iter(emitters)  # type: ignore[assignment]

    # convenience function for checking if while look should continue
    def should_continue(teams, do_batched, n_emitters=n_emitters, batch_size=batch_size):
        if do_batched:
            return len(teams) < batch_size
        return len(teams) < n_emitters

    loops = 0
    with suppress(StopIteration):
        while should_continue(teams, do_batched, n_emitters):
            loops += 1
            teammates = (
                get_random_teammates(isl_pop_config.team_size - len(training_agents)) if isl_pop_config.team_size else []
            )
            print('teammates', teammates)
            teams.update(
                build_batch(  # type: ignore[arg-type]
                    emitter=next(emitter),
                    batch_size=batch_size,
                    team_id=len(teams),
                    do_batched=do_batched,
                    teammates=teammates,
                    names=names,
                ),
            )
            loops += 1
    return teams

Create archive with the following parameter
{'measure_dim': 4, 'k_neighbors': 5, 'novelty_threshold': 2, 'qd_score_offset': -500, 'extra_fields': {'trajectory': ((), <class 'numpy.object_'>)}, 'solution_dim': 11205, 'seed': 1000}


In [8]:
from collections import defaultdict
from typing import Any, Literal
from ray.rllib.policy.sample_batch import SampleBatch
from algatross.utils.merge_dicts import flatten_dicts


def load_weights(agent_weight_map):
    for agent_id, agent_weights in agent_weight_map.items():
        agent = _agent_map[agent_id]
        agent.load_flat_params(agent_weights)
        _solution_dim[agent_id] = np.prod(agent.flat_parameters.shape)
        agent.reset_optimizer()
        _agent_map[agent_id] = agent

## UDP._calc_fitness
def _calc_fitness(rollout_data: dict[AgentID, list[SampleBatch]]) -> tuple[np.ndarray, dict[str, Any]]:
    # Team fitness [F]:
    # > for each agent: stack fitness metrics
    # > [F] <- [A]x[F] (mean)
    # >   for each fitness metric: stack rollout by episode
    # >   [A, F] <- [E]x[A, F] (mean)
    # >     for each episode: stack the metric over the trajectory
    # >     [E, A, F] <- [E, A, F]x[T] (sum)
    f = np.stack(
        [
            np.stack(
                [
                    np.stack([f[fitness_metric].sum() for f in rollout_data[agent_id]]).mean(axis=0, keepdims=False)
                    for fitness_metric in fitness_metric_keys
                ],
            )
            for agent_id in training_agents
        ],
    )
    assert f.shape[-1] == len(fitness_metric_keys)  # noqa: S101
    f = f if fitness_multiplier is None else f * fitness_multiplier[None]
    mean_f = f.mean(axis=0, keepdims=False)
    infos = {
        f"agent/{agent_id}": {fitness_metric: agent_f[idx] for idx, fitness_metric in enumerate(fitness_metric_keys)}
        for agent_id, agent_f in zip(training_agents, f, strict=True)
    }
    infos.update({fitness_metric: mean_f[idx] for idx, fitness_metric in enumerate(fitness_metric_keys)})
    return f if _fitness_reduce_fn is None else _fitness_reduce_fn(f), infos



## UDP.fitness()
teams: dict[AgentID, MOAIMIslandInhabitant] = get_teams_for_training(batch_size=None, randomized=True)
display(teams)

training_iterations = 1
train = True

for team_id, team in teams.items():
    _rollout_buffers = {}
    _agent_id_map = {}

    for agent in _agent_map.values():
        # set all agents to eval mode
        agent.train(mode=False)

    agent_weight_map = {agent.name: agent.genome.flatten() for agent in team.values()}
    load_weights(agent_weight_map)
    for idx, agent_id in enumerate(team):
        _agent_id_map.clear()
        _agent_id_map.update({agent_id: idx})
        _agent_map[agent_id].reset_optimizer()
        _agent_map[agent_id].train(mode=True)


    ## train
    infos = defaultdict(list)
    for _it in range(training_iterations):
        # inside runner, collect rollouts then do an island training step
        results: dict[AgentID, dict[str, SampleBatch | Any]] = runner(
            train=train,
            remote=False,
            agent_map=_agent_map,
            trainable_agents=training_agents,
            opponent_agents=opponent_agents,
            reward_metrics=dict.fromkeys(_agent_map, fitness_metric_keys),
            reward_metric_gains=dict.fromkeys(_agent_map, fitness_multiplier),
        )
        flat = flatten_dicts(
            {
                f"agent/{agent_id}/{'training' if train and 'training_stats' in res else 'rollout'}": res[
                    "training_stats" if train and "training_stats" in res else "rollout_stats"
                ]
                for agent_id, res in results.items()
            },
        )
        for path, info in flat.items():
            infos[path].append(info)


    # if train:
    #     # evaluate the agent once
    #     for agent in _agent_map.values():
    #         agent.train(False)
    #     results = self.rollout_rl(
    #         agent_map=self._agent_map,
    #         trainable_agents=self.training_agents,
    #         opponent_agents=self.opponent_agents,
    #         reward_metrics=dict.fromkeys(self._agent_map, self.fitness_metric_keys),
    #         reward_metric_gains=dict.fromkeys(self._agent_map, self.fitness_multiplier),
    #         **kwargs,
    #     )

    #     flat = flatten_dicts(
    #         {
    #             f"agent/{agent_id}/{'training' if train and 'training_stats' in res else 'rollout'}": res[
    #                 "training_stats" if train and "training_stats" in res else "rollout_stats"
    #             ]
    #             for agent_id, res in results.items()
    #         },
    #     )
    #     for path, info in flat.items():
    #         infos[path].append(info)

    for path, info in infos.items():
        infos[path] = np.stack(info).mean(axis=0)

    # make sure we clear the buffers before each rollout
    _rollout_buffers.clear()
    _rollout_buffers.update(
        {agent_id: results[agent_id]["extra_info"]["rollout_buffer"].split_by_episode() for agent_id in training_agents},
    )
    fitness, extra_infos = _calc_fitness(_rollout_buffers)
    infos.update(extra_infos)

    display(infos)
    print("\n" * 4)
    print("tag_score", infos["tag_score"])
    print("agent/agent_0/training/total_loss", infos["agent/agent_0/training/total_loss"])

teammates [array([[-0.1058925 ,  0.68494484,  0.71998265, ..., -0.69250414,
         0.03490563,  0.51367037]])]


{0: {'agent_0': MOAIMIslandInhabitant(name='agent_0', team_id=0, inhabitant_id=0, island_id=0, current_island_id=0, genome=array([-0.32133021, -0.48566148,  1.68005813, ...,  1.34315646,
         -0.36653313,  0.34672924]), conspecific_utility_dict={'tag_score': 0.0, 'minimum_ally_speed': -0.25, 'minimum_adversary_speed': 0.5, 'closest_ally_distance': -0.25, 'closest_adversary_distance': 1.0, 'closest_landmark_distance': -1.0, 'boundary_penalty': 1.0}, db_hash=UUID('b435ed27-34ba-4e18-8aa0-9fee90cdd4eb')),
  'agent_1': MOAIMIslandInhabitant(name='agent_1', team_id=0, inhabitant_id=1, island_id=0, current_island_id=0, genome=array([[-0.1058925 ,  0.68494484,  0.71998265, ..., -0.69250414,
           0.03490563,  0.51367037]]), conspecific_utility_dict={'tag_score': 0.0, 'minimum_ally_speed': -0.25, 'minimum_adversary_speed': 0.5, 'closest_ally_distance': -0.25, 'closest_adversary_distance': 1.0, 'closest_landmark_distance': -1.0, 'boundary_penalty': 1.0}, db_hash=UUID('ee9724e4-cb60-48e

	reset env with params {'seed': 1000, 'tape_index': 0, 'env_rank': 0}


2024-08-01 18:50:03,489	INFO worker.py:1740 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


defaultdict(list,
            {'agent/adversary_0/rollout/returns': 6.6666665,
             'agent/adversary_1/rollout/returns': 6.6666665,
             'agent/adversary_2/rollout/returns': 6.6666665,
             'agent/agent_0/training/actor_loss': array([0.24308524, 0.23575436, 0.22775751, 0.22112583, 0.21621878,
                    0.21186645, 0.20772243, 0.203897  , 0.20057376, 0.19804901,
                    0.19608526, 0.1955801 , 0.19495025, 0.19422422, 0.19351475,
                    0.19284995, 0.19210418, 0.1912758 , 0.19043167, 0.18975091],
                   dtype=float32),
             'agent/agent_0/training/critic_loss': array([122.07038 , 120.478355, 118.89169 , 117.310905, 115.73661 ,
                    114.16967 , 112.61091 , 111.061195, 109.52139 , 107.99229 ,
                    106.47454 , 104.96884 , 103.47575 , 101.99589 , 100.52983 ,
                     99.07817 ,  97.64146 ,  96.22032 ,  94.81526 ,  93.42678 ],
                   dtype=float32),
            






tag_score 0.0
agent/agent_0/training/total_loss [122.31345  120.714264 119.120026 117.533356 115.95518  114.385056
 112.82352  111.2715   109.72999  108.199974 106.68188  105.17694
 103.684235 102.20443  100.738205  99.28619   97.8489    96.42693
  95.020935  93.63156 ]
