In [1]:
import numpy as np
import itertools
import typing as t

from vizdoom import Button


# Buttons that cannot be used together
MUTUALLY_EXCLUSIVE_GROUPS = [
    [Button.MOVE_RIGHT, Button.MOVE_LEFT],
    [Button.TURN_RIGHT, Button.TURN_LEFT],
    [Button.MOVE_FORWARD, Button.MOVE_BACKWARD],
]

# Buttons that can only be used alone.
EXCLUSIVE_BUTTONS = [Button.ATTACK]


def has_exclusive_button(actions: np.ndarray, buttons: np.array) -> np.array:
    exclusion_mask = np.isin(buttons, EXCLUSIVE_BUTTONS)
    
    # Flag actions that have more than 1 active button among exclusive list.
    return (np.any(actions.astype(bool) & exclusion_mask, axis=-1)) & (np.sum(actions, axis=-1) > 1)


def has_excluded_pair(actions: np.ndarray, buttons: np.array) -> np.array:
    # Create mask of shape (n_mutual_exclusion_groups, n_available_buttons), marking location of excluded pairs.
    mutual_exclusion_mask = np.array([np.isin(buttons, excluded_group) 
                                      for excluded_group in MUTUALLY_EXCLUSIVE_GROUPS])

    # Flag actions that have more than 1 button active in any of the mutual exclusion groups.
    return np.any(np.sum(
        # Resulting shape (n_actions, n_mutual_exclusion_groups, n_available_buttons)
        (actions[:, np.newaxis, :] * mutual_exclusion_mask.astype(int)),
        axis=-1) > 1, axis=-1)


def get_available_actions(buttons: np.array) -> t.List[t.List[float]]:
    # Create list of all possible actions of size (2^n_available_buttons x n_available_buttons)
    action_combinations = np.array([list(seq) for seq in itertools.product([0., 1.], repeat=len(buttons))])

    # Build action mask from action combinations and exclusion mask
    illegal_mask = (has_excluded_pair(action_combinations, buttons)
                    | has_exclusive_button(action_combinations, buttons))

    possible_actions = action_combinations[~illegal_mask]
    possible_actions = possible_actions[np.sum(possible_actions, axis=1) > 0]  # Remove no-op

    print('Built action space of size {} from buttons {}'.format(len(possible_actions), buttons))
    return possible_actions.tolist()

possible_actions = get_available_actions(np.array([
    Button.ATTACK, Button.MOVE_FORWARD, Button.MOVE_LEFT, 
    Button.MOVE_RIGHT, Button.TURN_LEFT, Button.TURN_RIGHT]))

possible_actions

Built action space of size 18 from buttons [<Button.ATTACK: 0> <Button.MOVE_FORWARD: 13> <Button.MOVE_LEFT: 11>
 <Button.MOVE_RIGHT: 10> <Button.TURN_LEFT: 15> <Button.TURN_RIGHT: 14>]


[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
 [0.0, 0.0, 0.0, 0.0, 1.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
 [0.0, 0.0, 0.0, 1.0, 1.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0, 1.0],
 [0.0, 0.0, 1.0, 0.0, 1.0, 0.0],
 [0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 1.0, 0.0, 0.0, 0.0, 1.0],
 [0.0, 1.0, 0.0, 0.0, 1.0, 0.0],
 [0.0, 1.0, 0.0, 1.0, 0.0, 0.0],
 [0.0, 1.0, 0.0, 1.0, 0.0, 1.0],
 [0.0, 1.0, 0.0, 1.0, 1.0, 0.0],
 [0.0, 1.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 1.0, 1.0, 0.0, 0.0, 1.0],
 [0.0, 1.0, 1.0, 0.0, 1.0, 0.0],
 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0]]

In [12]:
import stable_baselines3
from stable_baselines3.common import policies
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import PPO
from stable_baselines3.ppo import ppo



def create_agent(env, **kwargs):
    return ppo.PPO(policy=policies.ActorCriticCnnPolicy,
                   env=env,
                   n_steps=4096,
                   batch_size=32,
                   learning_rate=1e-4,
                   verbose=0,
                   seed=0,
                   **kwargs)

agent = create_agent(env)
agent.policy

NameError: name 'env' is not defined

In [6]:
import numpy as np
import itertools

from vizdoom import Button

# Buttons that cannot be used together
MUTUALLY_EXCLUSIVE_GROUPS = [
    [Button.MOVE_RIGHT, Button.MOVE_LEFT],
    [Button.TURN_RIGHT, Button.TURN_LEFT],
    [Button.MOVE_FORWARD, Button.MOVE_BACKWARD],
]

# Buttons that can only be used alone.
EXCLUSIVE_BUTTONS = [Button.ATTACK]

def get_available_actions(buttons: np.array) -> list:
    # Create list of all possible actions of size (2^n_available_buttons x n_available_buttons)
    action_combinations = np.array([list(seq) for seq in itertools.product([0, 1], repeat=len(buttons))])

    # Remove mutually exclusive and exclusive buttons
    for group in [MUTUALLY_EXCLUSIVE_GROUPS, EXCLUSIVE_BUTTONS]:
        for button in group:
            button_index = np.argwhere(buttons == button)[0][0]
            action_combinations = action_combinations[np.sum(action_combinations[:, button_index], axis=1) <= 1]

    # Remove no-op actions
    action_combinations = action_combinations[np.sum(action_combinations, axis=1) > 0]

    print('Built action space of size {} from buttons {}'.format(len(action_combinations), buttons))
    return action_combinations.tolist()



possible_actions = get_available_actions(np.array([
    Button.ATTACK, Button.MOVE_FORWARD, Button.MOVE_LEFT, 
    Button.MOVE_RIGHT, Button.TURN_LEFT, Button.TURN_RIGHT]))

possible_actions

  button_index = np.argwhere(buttons == button)[0][0]


IndexError: index 0 is out of bounds for axis 0 with size 0

In [10]:

# Buttons that cannot be used together
MUTUALLY_EXCLUSIVE_GROUPS = [
    [Button.MOVE_RIGHT, Button.MOVE_LEFT],
    [Button.TURN_RIGHT, Button.TURN_LEFT],
    [Button.MOVE_FORWARD, Button.MOVE_BACKWARD],
]

# Buttons that can only be used alone.
EXCLUSIVE_BUTTONS = [Button.ATTACK]

def get_available_actions(buttons: np.array) -> list:
    # Create list of all possible actions of size (2^n_available_buttons x n_available_buttons)
    action_combinations = np.array([list(seq) for seq in itertools.product([0, 1], repeat=len(buttons))])

    # Remove mutually exclusive and exclusive buttons
    for group in [MUTUALLY_EXCLUSIVE_GROUPS, EXCLUSIVE_BUTTONS]:
        for button in group:
            if button in buttons:
                button_index = np.argwhere(buttons == button)[0][0]
                action_combinations = action_combinations[np.sum(action_combinations[:, button_index], axis=0) <= 1]

    # Remove no-op actions
    action_combinations = action_combinations[np.sum(action_combinations, axis=1) > 0]

    print('Built action space of size {} from buttons {}'.format(len(action_combinations), buttons))
    return action_combinations.tolist()



possible_actions = get_available_actions(np.array([
    Button.ATTACK, Button.MOVE_FORWARD, Button.MOVE_LEFT, 
    Button.MOVE_RIGHT, Button.TURN_LEFT, Button.TURN_RIGHT]))

possible_actions

  if button in buttons:


IndexError: boolean index did not match indexed array along dimension 1; dimension is 64 but corresponding boolean dimension is 6