In [103]:
import time
import enum

from pettingzoo.mpe import simple_tag_v2
from pettingzoo.utils import random_demo


Arguments in instantiate environment.

- num_good: number of good agents
- num_adversaries: number of adversaries
- num_obstacles: number of obstacles
- max_cycles: number of frames (a step for each agent) until game terminates
- continuous_actions: Whether agent action spaces are discrete(default) or continuous

In [64]:
env = simple_tag_v2.env(
    num_good=3,
    num_adversaries=3,
    num_obstacles=2,
    max_cycles=100,
    continuous_actions=False
).unwrapped
print("Peek into unwrapped environment:", *dir(env))

Peek into unwrapped environment: __class__ __delattr__ __dict__ __dir__ __doc__ __eq__ __format__ __ge__ __getattribute__ __gt__ __hash__ __init__ __init_subclass__ __le__ __lt__ __module__ __ne__ __new__ __reduce__ __reduce_ex__ __repr__ __setattr__ __sizeof__ __str__ __subclasshook__ __weakref__ _accumulate_rewards _agent_selector _clear_rewards _dones_step_first _execute_world_step _index_map _reset_render _set_action _was_done_step action_space action_spaces agent_iter agents close continuous_actions current_actions last local_ratio max_cycles max_num_agents metadata np_random num_agents observation_space observation_spaces observe possible_agents render reset scenario seed state state_space step steps unwrapped viewer world


Adversaries (red) capture non-adversary (green). The map is a 2D grid and everything is initialized in the region [-1, +1]. There doesn't seem to be position clipping for out of bounds, but non-adversary agent are penalized for out of bounds.
Agent's observation is a ndarray vector of concatenated data in the following order:

1. current velocity (2,)
2. current position (2,)
3. relative position (2,) of each landmark
4. relative position (2,) of each other agent
5. velocity (2,) of each other non-adversary agent

Actions:

- 0 is NOP
- 1 is go left
- 2 is go right
- 3 is go down
- 4 is go up

In [97]:
# Print variables of the environment
# Documentation:   https://www.pettingzoo.ml/api
env.reset()
print("State size", env.state_space.shape)
print("Name of current agent", env.agent_selection)
print("Observation space of current agent", env.observation_space(env.agent_selection).shape)
print("Action space of current agent", env.action_space(env.agent_selection))
print("Sample random action from current agent", env.action_space(env.agent_selection).sample())
print()

print("D")

# select an agent in the environment world, after using env.unwrapped
agent = env.world.agents[0]
print("agent's name is", agent.name)
print("agent's position and velocity coordinates", agent.state.p_vel, agent.state.p_pos)
print("is agent an adversary?", agent.adversary)

landmark = env.world.landmarks[0]
print("landmark's name is", landmark.name)
print("landmark's position coordinates (doesn't move)", landmark.state.p_pos)

State size (138,)
Name of current agent adversary_0
Observation space of current agent (24,)
Action space of current agent Discrete(5)
Sample random action from current agent 0

D
agent's name is adversary_0
agent's position and velocity coordinates [0. 0.] [0.90968928 0.33838488]
is agent an adversary? True
landmark's name is landmark 0
landmark's position coordinates (doesn't move) [-0.53688665 -0.01926829]


In [112]:
# Demo environment with random policy
env.reset()
random_demo(env, render=True, episodes=5)

Average total reward -574.4233956074986


-2872.116978037493

In [114]:
def policy(observation, agent):
    """
    Parameters
    ==========
    agent : str
    """
#     print(observation.shape)
#     print(agent)
    if "adversary" in agent:
        # adversary
        if agent == "adversary_0":
            return 4
        
    if "agent" in agent:
        # non-adversary
        pass
    return 0

env.reset()
for agent in env.agent_iter():
    env.render()
    observation, reward, done, info = env.last()
    if done:
        env.step(None)
    else:
        action = policy(observation, agent)
        env.step(action)
    # time.sleep(0.1)