In [1]:
from gym.spaces import Box, Dict, Discrete, MultiDiscrete, Tuple
import numpy as np
import unittest

import ray
from ray.tune import register_env
from ray.rllib.algorithms.qmix import QMixConfig
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from smac.env import StarCraft2Env
import numpy as np

In [2]:
def test_sc():
    env = StarCraft2Env(map_name="8m")
    env_info = env.get_env_info()

    n_actions = env_info["n_actions"]
    n_agents = env_info["n_agents"]

    n_episodes = 10

    for e in range(n_episodes):
        env.reset()
        terminated = False
        episode_reward = 0

        while not terminated:
            obs = env.get_obs()
            state = env.get_state()
            # env.render()  # Uncomment for rendering

            actions = []
            for agent_id in range(n_agents):
                avail_actions = env.get_avail_agent_actions(agent_id)
                avail_actions_ind = np.nonzero(avail_actions)[0]
                action = np.random.choice(avail_actions_ind)
                actions.append(action)

            reward, terminated, _ = env.step(actions)
            episode_reward += reward

        print("Total reward in episode {} = {}".format(e, episode_reward))

    env.close()

test_sc()

Version: B75689 (SC2.4.10)
Build: Aug 12 2019 17:16:57
Command Line: '"/home/jovyan/code/pymarl/3rdparty/StarCraftII/Versions/Base75689/SC2_x64" -listen 127.0.0.1 -port 38299 -dataDir /home/jovyan/code/pymarl/3rdparty/StarCraftII/ -tempDir /tmp/sc-uez6ge3z/'
Starting up...
Startup Phase 1 complete
Startup Phase 2 complete
Creating stub renderer...
Listening on: 127.0.0.1:38299
Startup Phase 3 complete. Ready for commands.
ConnectHandler: Request from 127.0.0.1:48494 accepted
ReadyHandler: 127.0.0.1:48494 ready
Requesting to join a single player game
Configuring interface options
Configure: raw interface enabled
Configure: feature layer interface disabled
Configure: score interface disabled
Configure: render interface disabled
Launching next game.
Next launch phase started: 2
Next launch phase started: 3
Next launch phase started: 4
Next launch phase started: 5
Next launch phase started: 6
Next launch phase started: 7
Next launch phase started: 8
Game has started.
Using default stable i

Total reward in episode 0 = 2.0625
Total reward in episode 1 = 1.875
Total reward in episode 2 = 1.6875
Total reward in episode 3 = 2.25
Total reward in episode 4 = 1.125
Total reward in episode 5 = 1.3125
Total reward in episode 6 = 2.4375
Total reward in episode 7 = 1.125
Total reward in episode 8 = 1.875
Total reward in episode 9 = 1.6875


RequestQuit command received.
Closing Application...
DataHandler: unable to parse websocket frame.
CloseHandler: 127.0.0.1:48494 disconnected


In [8]:
env = StarCraft2Env(map_name="3m")

In [9]:
env.get_env_info()

{'state_shape': 48,
 'obs_shape': 30,
 'n_actions': 9,
 'n_agents': 3,
 'episode_limit': 60,
 'agent_features': ['health', 'energy/cooldown', 'rel_x', 'rel_y'],
 'enemy_features': ['health', 'rel_x', 'rel_y']}

In [7]:
ray.init()

2022-10-17 15:53:11,141	INFO worker.py:1518 -- Started a local Ray instance.


0,1
Python version:,3.9.13
Ray version:,2.0.0


In [None]:
grouping = {
    "group_1": ["agent_1", "agent_2"],
}
obs_space = Tuple(
    [
        AvailActionsTestEnv.observation_space,
        AvailActionsTestEnv.observation_space,
    ]
)
act_space = Tuple(
    [AvailActionsTestEnv.action_space, AvailActionsTestEnv.action_space]
)
register_env(
    "action_mask_test",
    lambda config: AvailActionsTestEnv(config).with_agent_groups(
        grouping, obs_space=obs_space, act_space=act_space
    ),
)

config = (
    QMixConfig()
    .framework(framework="torch")
    .environment(
        env="action_mask_test",
        env_config={"avail_actions": [3, 4, 8]},
    )
    .rollouts(num_envs_per_worker=5)
)  # Test with vectorization on.

trainer = config.build()

for _ in range(4):
    trainer.train()  # OK if it doesn't trip the action assertion error

assert trainer.train()["episode_reward_mean"] == 30.0
trainer.stop()
ray.shutdown()