### Step 1: Install and import libraries

In [None]:
pip install momaland

Note: you may need to restart the kernel to use updated packages.


### Step 2: Create an environment

In [None]:
from momaland.envs.multiwalker_stability import momultiwalker_stability_v0
import numpy as np

env=momultiwalker_stability_v0.env()

### Step 3: Extract environment information

In [None]:
env.observation_spaces

{'walker_0': Box(-inf, inf, (31,), float32),
 'walker_1': Box(-inf, inf, (31,), float32),
 'walker_2': Box(-inf, inf, (31,), float32)}

In [None]:
env.action_spaces

{'walker_0': Box(-1.0, 1.0, (4,), float32),
 'walker_1': Box(-1.0, 1.0, (4,), float32),
 'walker_2': Box(-1.0, 1.0, (4,), float32)}

In [None]:
env.reward_spaces

{'walker_0': Box([  -0.46666667 -110.         -100.        ], [0.46666667 0.         0.        ], (3,), float32),
 'walker_1': Box([  -0.46666667 -110.         -100.        ], [0.46666667 0.         0.        ], (3,), float32),
 'walker_2': Box([  -0.46666667 -110.         -100.        ], [0.46666667 0.         0.        ], (3,), float32)}

### Step 4.1: AEC API Demo
Observation, rewards, termination, truncation, and info are returned by the `last()` function, as in PZ. Except the rewards are vectorial!

In [None]:
env.reset()
episode_rewards = []
for agent in env.agent_iter():
    # the rewards are vectors!
    observation, vec_reward, termination, truncation, info = env.last()
    episode_rewards.append(vec_reward)
    if termination or truncation:
        action = None
    else:
        action = env.action_space(agent).sample()
    env.step(action)
env.close()

# rewards of all agents from the first step
episode_rewards[0:len(env.possible_agents)]

[array([0., 0., 0.], dtype=float32),
 array([0., 0., 0.], dtype=float32),
 array([0., 0., 0.], dtype=float32)]

### Step 4.2: Parallel API Demo
The environment is initialized with the `parallel_env()` function. Agents `step()` all at the same time through the environment with their actions. A key difference between AEC and Parallel is that actions and observations are dictionaries in Parallel, as they are all received at the same time.

In [None]:
from momaland.envs.item_gathering import moitem_gathering_v0

# new parallel env
env = moitem_gathering_v0.parallel_env()
observations, infos = env.reset()
episode_rewards = []
while env.agents:
    actions = {agent: env.action_space(agent).sample() for agent in env.agents}
    observations, vec_rewards, terminations, truncations, infos = env.step(actions)
    episode_rewards.append(vec_rewards)
env.close()

# rewards are stored in a dictionary, can be accessed per agent
episode_rewards[0]

{'agent_0': array([0., 0., 0.]), 'agent_1': array([0., 0., 0.])}

### Step 5: Wrappers
On top of the native wrappers provided by MOMAland; SuperSuit and PettingZoo wrappers are also compatible with MOMAland environments.

#### MOMAland

In [None]:
from momaland.utils.aec_wrappers import LinearizeReward, NormalizeReward
import numpy as np

env = momultiwalker_stability_v0.env()

# Normalizing the reward of each agent
for agent in env.possible_agents:
    for idx in range(env.reward_space(agent).shape[0]):
        env = NormalizeReward(env, agent, idx)

# Making the vector reward a scalar reward to shift to single-objective multi-agent (aka PettingZoo)
# We can assign different weights to the objectives of each agent.
weights = {
    "walker_0": np.array([0.7, 0.3]),
    "walker_1": np.array([0.5, 0.5]),
    "walker_2": np.array([0.2, 0.8]),
}
env = LinearizeReward(env, weights)

# Now we are dealing with a regular PZ env
env.reset()
episode_rewards = []
for agent in env.agent_iter():
    observation, reward, termination, truncation, info = env.last()
    episode_rewards.append(reward)
    if termination or truncation:
        action = None
    else:
        action = env.action_space(agent).sample()
    env.step(action)
env.close()

# scalarized and normalized rewards of all agents from the last step
episode_rewards.reverse()
episode_rewards[0:len(env.possible_agents)]

[-1.7753998190722102, -0.9956741660833359, -6.084330405294895]

#### SuperSuit

In [None]:
from supersuit import clip_actions_v0, normalize_obs_v0, agent_indicator_v0
from momaland.envs.crazyrl.catch import catch_v0
# Parallel SS wrappers
env = catch_v0.parallel_env()
env = clip_actions_v0(env)
env = normalize_obs_v0(env)
env = agent_indicator_v0(env)

observations, infos = env.reset()
actions = {agent: env.action_space(agent).sample() for agent in env.agents}
observations, vec_rewards, terminations, truncations, infos = env.step(actions)

# normalized observation
observations

{'agent_0': array([0.48762643, 0.48954174, 0.28093657, 0.66940665, 0.6666667 ,
        0.86621314, 0.63619226, 0.6529357 , 0.3703892 , 0.5159465 ,
        0.66146713, 0.39128128, 0.8341556 , 0.8615172 , 0.332849  ,
        1.        , 0.        , 0.        , 0.        ], dtype=float32),
 'agent_1': array([0.63619226, 0.6529357 , 0.3703892 , 0.66940665, 0.6666667 ,
        0.86621314, 0.48762643, 0.48954174, 0.28093657, 0.5159465 ,
        0.66146713, 0.39128128, 0.8341556 , 0.8615172 , 0.332849  ,
        0.        , 1.        , 0.        , 0.        ], dtype=float32),
 'agent_2': array([0.5159465 , 0.66146713, 0.39128128, 0.66940665, 0.6666667 ,
        0.86621314, 0.48762643, 0.48954174, 0.28093657, 0.63619226,
        0.6529357 , 0.3703892 , 0.8341556 , 0.8615172 , 0.332849  ,
        0.        , 0.        , 1.        , 0.        ], dtype=float32),
 'agent_3': array([0.8341556 , 0.8615172 , 0.332849  , 0.66940665, 0.6666667 ,
        0.86621314, 0.48762643, 0.48954174, 0.28093657, 0

#### PettingZoo

In [None]:
# AEC PZ wrappers
from pettingzoo.utils.wrappers.clip_out_of_bounds import ClipOutOfBoundsWrapper
env = momultiwalker_stability_v0.env()
env = ClipOutOfBoundsWrapper(env)

env.reset()
for agent in env.agent_iter():
    # the rewards are vectors!
    observation, vec_reward, termination, truncation, info = env.last()
    if termination or truncation:
        action = None
    else:
        action = env.action_space(agent).sample()
    env.step(action)
env.close()
