In [68]:
from deep_nmmo.utils import get_class_from_path
from deep_nmmo.envs.team_based_env.env_configs.custom_competition_config import CustomCompetitionConfig
from deep_nmmo.envs.team_based_env.loops.utils import init_env_params

import nmmo
from nmmo import config
from nmmo.io import action

import neurips2022nmmo
from neurips2022nmmo.scripted import baselines
from neurips2022nmmo import Team
from neurips2022nmmo import CompetitionConfig, scripted, RollOut, TeamBasedEnv

import ray
ray.shutdown()
ray.init()

from ray.rllib.agents import ppo
from ray.tune.registry import register_env
from ray.rllib.models import ModelCatalog

# from ray.rllib.env.multi_agent_env import make_multi_agent
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.rllib.policy.policy import PolicySpec

import gym

import numpy as np

from IPython.display import display

from typing import Dict, Any, Type, List

2022-09-01 14:55:28,903	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


### TODO

- To use own custom multi-agent environment, should use the team's perspective and implement inside RLlibTeam class
- I.e. TeamBasedEnv is still the main env, but RLlibTeam class uses an RLlibMultiAgentTeamEnv 

PROBLEM: How are we going to register `RLlibMultiAgentTeamEnv` if it requires us to use the `TeamBasedEnv`? We cannot access internal RLlib env loop, so not sure this is possible. May be forced to somehow get `TeamBasedEnv` working with RLlib directly...

In [72]:
class RLlibMultiAgentTeamEnv(MultiAgentEnv):
    def __init__(self, 
                 env_config,
                 agent_ids_to_agents: dict,
                 team_id,
                 *args,
                 **kwargs):
        # inherit from RLlib multi-agent env
        MultiAgentEnv.__init__(self)
        
        self.team_id = team_id # use for indexing to get team's params from TeamBasedEnv
        
        # init dummy env to get obs and action spaces
        dummy_env = TeamBasedEnv(env_config)
        self.observation_space = dummy_env.observation_space(self.team_id)
        self.action_space = dummy_env.action_space(self.team_id)
        print(f'observation_space: {self.observation_space}')
        print(f'action_space: {self.action_space}')
        
        # init agents
        self.agent_ids_to_agents = agent_ids_to_agents
        self._agent_ids = list(agent_ids_to_agents.keys())
        self.agents = list(agent_ids_to_agents.values())
        print(f'agents: {self.agents}')
        print(f'_agent_ids: {self._agent_ids}')
        
        self.dones = set()
        
    @override(MultiAgentEnv)
    def observation_space_sample(self, agent_ids: list = None) -> MultiAgentDict:
        if agent_ids is None:
            agent_ids = list(range(len(self.agents)))
        obs = {agent_id: self.observation_space.sample() for agent_id in agent_ids}

        return obs

    @override(MultiAgentEnv)
    def action_space_sample(self, agent_ids: list = None) -> MultiAgentDict:
        if agent_ids is None:
            agent_ids = list(range(len(self.agents)))
        actions = {agent_id: self.action_space.sample() for agent_id in agent_ids}

        return actions

    @override(MultiAgentEnv)
    def action_space_contains(self, x: MultiAgentDict) -> bool:
        if not isinstance(x, dict):
            return False
        return all(self.action_space.contains(val) for val in x.values())

    @override(MultiAgentEnv)
    def observation_space_contains(self, x: MultiAgentDict) -> bool:
        if not isinstance(x, dict):
            return False
        return all(self.observation_space.contains(val) for val in x.values())

    @override(MultiAgentEnv)
    def reset(self):
        self.dones = set()
        return {i: a.reset() for i, a in enumerate(self.agents)}

    @override(MultiAgentEnv)
    def step(self, action_dict):
        obs, rew, done, info = {}, {}, {}, {}
        for i, action in action_dict.items():
            obs[i], rew[i], done[i], info[i] = self.agents[i].step(action)
            if done[i]:
                self.dones.add(i)
        done["__all__"] = len(self.dones) == len(self.agents)
        return obs, rew, done, info

class RLlibScriptedHybridAgentTeam(Team):
    def __init__(self,
                 team_id,
                 env_config,
                 paths_to_scripted_agents_cls,
                 **kwargs):
        super().__init__(team_id, env_config)
        self.id = team_id
        print(f'team_id: {self.id}')
        
        self.scripted_agents = [get_class_from_path(path_to_scripted_agent_cls)(config=env_config, idx=int(idx)) for idx, path_to_scripted_agent_cls in paths_to_scripted_agents_cls.items()]
        self.scripted_agent_ids = set([int(idx) for idx in paths_to_scripted_agents_cls.keys()])
        self.scripted_agent_idxs = set([idx for idx in range(len(self.scripted_agents))])
        
        self.rllib_agents = [None]
        self.rllib_agent_ids = set([sorted(self.scripted_agent_ids)[-1]+1])
        self.rllib_agent_idxs = set([len(self.scripted_agents)+idx for idx in range(1, len(self.rllib_agents)+1)])
        
        print(f'scripted_agents: {self.scripted_agents}')
        print(f'scripted_agent_ids: {self.scripted_agent_ids}')
        print(f'scripted_agent_idxs: {self.scripted_agent_idxs}')
        
        print(f'rllib_agents: {self.rllib_agents}')
        print(f'rllib_agent_ids: {self.rllib_agent_ids}')
        print(f'rllib_agent_idxs: {self.rllib_agent_idxs}')
        
        # init multi-agent env for team to interact with
        agent_ids_to_agents = {}
        for agent_id, agent in zip(self.scripted_agent_ids, self.scripted_agents):
            agent_ids_to_agents[agent_id] = agent
        for agent_id, agent in zip(self.rllib_agent_ids, self.rllib_agents):
            agent_ids_to_agents[agent_id] = agent
        self.team_env = RLlibMultiAgentTeamEnv(
                                            env_config=env_config,
                                            agent_ids_to_agents=agent_ids_to_agents,
                                            team_id=self.id,
                                        )
        print(f'team_env: {self.team_env}')
        
    def reset(self, *args, **kwargs):
        pass
    
    def get_rllib_agent_actions(self, observations):
        return {i: self.agents[i](obs) for i, obs in observations.items() if i in self.rllib_agent_idxs}
    
    def get_scripted_agent_actions(self, observations):
        return {i: self.agents[i](obs) for i, obs in observations.items() if i in self.scripted_agent_idxs}
    
    def post_process_actions(self, actions):
        for i in actions:
            for atn, args in actions[i].items():
                for arg, val in args.items():
                    if arg.argType == nmmo.action.Fixed:
                        actions[i][atn][arg] = arg.edges.index(val)
                    elif arg == nmmo.action.Target:
                        actions[i][atn][arg] = self.get_target_index(
                            val, self.agents[i].ob.agents)
                    elif atn in (nmmo.action.Sell,
                                 nmmo.action.Use) and arg == nmmo.action.Item:
                        actions[i][atn][arg] = self.get_item_index(
                            val, self.agents[i].ob.items)
                    elif atn == nmmo.action.Buy and arg == nmmo.action.Item:
                        actions[i][atn][arg] = self.get_item_index(
                            val, self.agents[i].ob.market)
        return actions
    
    def act(self, observations, rewards=None):
        '''
        During training, pass rewards from last step to update RLlib policy.
        
        During inference (e.g. when make submission), no rewards need to be
        passed to act().
        '''
        if "stat" in observations:
            stat = observations.pop("stat")
            
        if rewards is None:
            # agent is training, register rewards
            # if an action was chosen at the last step, assign team reward for taking that action
            # TODO
            pass
        else:
            # not training, no need to consider rewards
            pass
        
        # TODO
        # get team actions for this step
        actions = {}
        actions.update(self.get_scripted_agent_actions(observations))
        actions.update(self.get_rllib_agent_actions(observations))
        
        # return team actions to TeamBasedEnv
        return self.post_process_actions(actions)

    @staticmethod
    def get_item_index(instance: int, items: np.ndarray) -> int:
        for i, itm in enumerate(items):
            id_ = nmmo.scripting.Observation.attribute(itm,
                                                       nmmo.Serialized.Item.ID)
            if id_ == instance:
                return i
        raise ValueError(f"Instance {instance} not found")

    @staticmethod
    def get_target_index(target: int, agents: np.ndarray) -> int:
        targets = [
            x for x in [
                nmmo.scripting.Observation.attribute(
                    agent, nmmo.Serialized.Entity.ID) for agent in agents
            ] if x
        ]
        return targets.index(target)
        
        
        
        
        
# env config
path_to_env_cls = 'neurips2022nmmo.TeamBasedEnv'
path_to_env_config_cls = 'deep_nmmo.envs.team_based_env.env_configs.custom_competition_config.CustomCompetitionConfig'
env_config_kwargs = None
# teams_config = {
#     'Combat':
#         {
#             'path_to_team_cls': 'neurips2022nmmo.scripted.CombatTeam'
#         },
#     'Mixture':
#         {
#             'path_to_team_cls': 'neurips2022nmmo.scripted.MixtureTeam'
#         },
# }
rllib_paths_to_scripted_agents_cls = {
                                    '0': 'neurips2022nmmo.scripted.baselines.Mage',
                                    '1': 'neurips2022nmmo.scripted.baselines.Mage',
                                    '2': 'neurips2022nmmo.scripted.baselines.Mage',
                                    '3': 'neurips2022nmmo.scripted.baselines.Mage',
                                    '4': 'neurips2022nmmo.scripted.baselines.Mage',
                                    '5': 'neurips2022nmmo.scripted.baselines.Mage',
                                    '6': 'neurips2022nmmo.scripted.baselines.Mage',
                                    }

teams_config = {
    'RLlib': {'cls': RLlibScriptedHybridAgentTeam, 'kwargs': {'paths_to_scripted_agents_cls': rllib_paths_to_scripted_agents_cls}},
    'Combat': {'cls': neurips2022nmmo.scripted.CombatTeam, 'kwargs': {}},
    'Mixture': {'cls': neurips2022nmmo.scripted.MixtureTeam, 'kwargs': {}},
}
teams_copies = [1, 2]        
        
        
        
        
        
# init env params
env_config = get_class_from_path(path_to_env_config_cls)()

teams = []
for team_id, params in teams_config.items():
    team_cls, team_kwargs = params['cls'], params['kwargs']
    team_kwargs['env_config'] = env_config
    team_kwargs['team_id'] = team_id
    teams.append(team_cls(**team_kwargs))
print(f'Teams: {teams}')

for i, team in enumerate(teams):
    class Agent(nmmo.Agent):
        name = f'{team.id}'
        policy = f'{team.id}'
    env_config.PLAYERS[i] = Agent

  5%|████████▎                                                                                                                                                              | 2/40 [00:00<00:02, 14.91it/s]

team_id: RLlib | kwargs: {'paths_to_scripted_agents_cls': {'0': 'neurips2022nmmo.scripted.baselines.Mage', '1': 'neurips2022nmmo.scripted.baselines.Mage', '2': 'neurips2022nmmo.scripted.baselines.Mage', '3': 'neurips2022nmmo.scripted.baselines.Mage', '4': 'neurips2022nmmo.scripted.baselines.Mage', '5': 'neurips2022nmmo.scripted.baselines.Mage', '6': 'neurips2022nmmo.scripted.baselines.Mage'}}
team_id: RLlib
scripted_agents: [<neurips2022nmmo.scripted.baselines.Mage object at 0x7f252bfcc610>, <neurips2022nmmo.scripted.baselines.Mage object at 0x7f252bfcc2e0>, <neurips2022nmmo.scripted.baselines.Mage object at 0x7f252bfcc2b0>, <neurips2022nmmo.scripted.baselines.Mage object at 0x7f252bfcc3a0>, <neurips2022nmmo.scripted.baselines.Mage object at 0x7f252bfcc310>, <neurips2022nmmo.scripted.baselines.Mage object at 0x7f252bfcc400>, <neurips2022nmmo.scripted.baselines.Mage object at 0x7f252bfcc190>]
scripted_agent_ids: {0, 1, 2, 3, 4, 5, 6}
scripted_agent_idxs: {0, 1, 2, 3, 4, 5, 6}
rllib_agen

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40/40 [00:02<00:00, 14.97it/s]


observation_space: Dict(Entity:Dict(Continuous:Box(-1048576.0, 1048576.0, (100, 24), float32), Discrete:Box(0, 4096, (100, 5), int32), N:Box(0, 100, (1,), int32)), Item:Dict(Continuous:Box(-1048576.0, 1048576.0, (170, 16), float32), Discrete:Box(0, 4096, (170, 3), int32), N:Box(0, 170, (1,), int32)), Market:Dict(Continuous:Box(-1048576.0, 1048576.0, (170, 16), float32), Discrete:Box(0, 4096, (170, 3), int32), N:Box(0, 170, (1,), int32)), Tile:Dict(Continuous:Box(-1048576.0, 1048576.0, (225, 4), float32), Discrete:Box(0, 4096, (225, 3), int32), N:Box(0, 15, (1,), int32)))
action_space: Dict(<class 'nmmo.io.action.Attack'>:Dict(<class 'nmmo.io.action.Style'>:Discrete(3), <class 'nmmo.io.action.Target'>:Discrete(100)), <class 'nmmo.io.action.Buy'>:Dict(<class 'nmmo.io.action.Item'>:Discrete(170)), <class 'nmmo.io.action.Comm'>:Dict(<class 'nmmo.io.action.Token'>:Discrete(170)), <class 'nmmo.io.action.Move'>:Dict(<class 'nmmo.io.action.Direction'>:Discrete(4)), <class 'nmmo.io.action.Sell'

In [48]:
# set up configs

# env config
path_to_env_cls = 'neurips2022nmmo.TeamBasedEnv'
path_to_env_config_cls = 'deep_nmmo.envs.team_based_env.env_configs.custom_competition_config.CustomCompetitionConfig'
env_config_kwargs = None
teams_config = {
    'Combat':
        {
            'path_to_team_cls': 'neurips2022nmmo.scripted.CombatTeam'
        },
    'Mixture':
        {
            'path_to_team_cls': 'neurips2022nmmo.scripted.MixtureTeam'
        },
}
teams_copies = [1, 2]

# init env params
env_config, teams_copies, teams = init_env_params(path_to_env_config_cls=path_to_env_config_cls,
                                                  env_config_kwargs=env_config_kwargs,
                                                  teams_copies=teams_copies,
                                                  teams_config=teams_config)



# rllib config
path_to_rllib_trainer_cls = 'ray.rllib.agents.ppo.PPOTrainer'

# ma_cls = make_multi_agent(lambda env_config: get_class_from_path(path_to_env_cls)(env_config))
# print(ma_cls)
# dummy_env = ma_cls(env_config)
# print(dummy_env)
# print(type(dummy_env))
# _ = dummy_env.reset()


dummy_env = RLlibMultiAgentTeamBasedEnv(
                                    path_to_env_cls=path_to_env_cls,
                                    path_to_env_config_cls=path_to_env_config_cls,
                                    teams_copies=teams_copies,
                                    teams_config=teams_config,
                                )
print(dummy_env)

policies = {}
for policy_id in dummy_env._agent_ids:
    policies[str(policy_id)] = PolicySpec(
                                # policy_class=player.__class__, # infer automatically from algorithm
                                # observation_space=dummy_env.observation_space(player_id),
                                # action_space=dummy_env.action_space(player_id),
                                config={},
                            )

def policy_mapping_fn(agent_id, episode, worker, **kwargs):
    '''Maps agent ID to corresponding policy ID of policy which agent should use.'''
    return str(agent_id)

multiagent_config = {
    'policies': policies,
    'policy_mapping_fn': policy_mapping_fn,
}


rllib_config = {
    'framework': 'torch',
    
    'multiagent': multiagent_config,
    
}

  5%|████████▎                                                                                                                                                              | 2/40 [00:00<00:02, 14.90it/s]

Generating 40 maps


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40/40 [00:02<00:00, 15.15it/s]


agents: [<nmmo.entity.player.Player object at 0x7f543a832a60>, <nmmo.entity.player.Player object at 0x7f544fe559a0>, <nmmo.entity.player.Player object at 0x7f54502fe8e0>, <nmmo.entity.player.Player object at 0x7f5439637820>, <nmmo.entity.player.Player object at 0x7f5439624760>, <nmmo.entity.player.Player object at 0x7f54398b46a0>, <nmmo.entity.player.Player object at 0x7f54398a55e0>, <nmmo.entity.player.Player object at 0x7f5439a6e520>, <nmmo.entity.player.Player object at 0x7f5439a4c460>, <nmmo.entity.player.Player object at 0x7f543a0ef3a0>, <nmmo.entity.player.Player object at 0x7f543a1b62e0>, <nmmo.entity.player.Player object at 0x7f543a1a6220>, <nmmo.entity.player.Player object at 0x7f543a5b5160>, <nmmo.entity.player.Player object at 0x7f543a5a70a0>, <nmmo.entity.player.Player object at 0x7f543a5a7fa0>, <nmmo.entity.player.Player object at 0x7f544ff07ee0>, <nmmo.entity.player.Player object at 0x7f544feebe20>, <nmmo.entity.player.Player object at 0x7f544fffcd60>, <nmmo.entity.player

TypeError: 'NoneType' object is not callable

In [None]:
# register env with ray
register_env(path_to_env_cls.split('.')[-1], lambda env_config: make_multi_agent(get_class_from_path(path_to_env_cls))(env_config))

In [14]:
# # update rllib config with observation and action spaces
# dummy_env = get_class_from_path(path_to_env_cls)(env_config)
# agent = 0
# rllib_config['observation_space'] = dummy_env.observation_space(agent)
# rllib_config['action_space'] = dummy_env.action_space(agent)

# init rllib trainer
trainer = get_class_from_path(path_to_rllib_trainer_cls)(config=rllib_config)

RayTaskError(AssertionError): [36mray::RolloutWorker.apply()[39m (pid=67793, ip=128.40.41.23, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f0f6846fb50>)
  File "/scratch/zciccwf/py36/envs/nmmo/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1664, in apply
    return func(self, *args, **kwargs)
  File "/scratch/zciccwf/py36/envs/nmmo/lib/python3.9/site-packages/ray/rllib/evaluation/worker_set.py", line 269, in <lambda>
    self.foreach_worker(lambda w: w.assert_healthy())
  File "/scratch/zciccwf/py36/envs/nmmo/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 766, in assert_healthy
    assert is_healthy, (
AssertionError: RolloutWorker <ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f0f6846fb50> (idx=1; num_workers=2) not healthy!

In [None]:
env = TeamBasedEnv(env_config)

In [None]:
print(env.observation_space(0))
print(type(env.observation_space(0)))

2022-09-01 13:22:29,162	ERROR worker.py:399 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::RolloutWorker.apply()[39m (pid=67794, ip=128.40.41.23, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f86f6178a90>)
  File "/scratch/zciccwf/py36/envs/nmmo/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1664, in apply
    return func(self, *args, **kwargs)
  File "/scratch/zciccwf/py36/envs/nmmo/lib/python3.9/site-packages/ray/rllib/evaluation/worker_set.py", line 269, in <lambda>
    self.foreach_worker(lambda w: w.assert_healthy())
  File "/scratch/zciccwf/py36/envs/nmmo/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 766, in assert_healthy
    assert is_healthy, (
AssertionError: RolloutWorker <ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f86f6178a90> (idx=2; num_workers=2) not healthy!


In [None]:
for key, val in env.observation_space(0).items():
    print(f'\nkey: {key}')
    print(f'val: {val}')

2022-09-01 13:08:06,043	ERROR worker.py:399 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::RolloutWorker.apply()[39m (pid=65655, ip=128.40.41.23, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f07c37f8b50>)
  File "/scratch/zciccwf/py36/envs/nmmo/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1664, in apply
    return func(self, *args, **kwargs)
  File "/scratch/zciccwf/py36/envs/nmmo/lib/python3.9/site-packages/ray/rllib/evaluation/worker_set.py", line 269, in <lambda>
    self.foreach_worker(lambda w: w.assert_healthy())
  File "/scratch/zciccwf/py36/envs/nmmo/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 766, in assert_healthy
    assert is_healthy, (
AssertionError: RolloutWorker <ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f07c37f8b50> (idx=2; num_workers=2) not healthy!
