From a4ae6c7ca42eced280bc76bb3b69370eb35a068a Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Tue, 3 Dec 2019 16:02:24 -0800
Subject: [PATCH 01/30] initial commit for LL-API

---
 ml-agents-envs/mlagents/envs/base_env.py      | 251 +++++++++++++
 .../mlagents/envs/base_unity_environment.py   |  25 --
 ml-agents-envs/mlagents/envs/environment.py   | 352 ++++++++----------
 ml-agents-envs/mlagents/envs/rpc_utils.py     | 133 +++++++
 .../mlagents/envs/tests/test_rpc_utils.py     | 130 +++++++
 5 files changed, 663 insertions(+), 228 deletions(-)
 create mode 100644 ml-agents-envs/mlagents/envs/base_env.py
 delete mode 100644 ml-agents-envs/mlagents/envs/base_unity_environment.py
 create mode 100644 ml-agents-envs/mlagents/envs/rpc_utils.py
 create mode 100644 ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py

diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
new file mode 100644
index 0000000000..82078e7ee2
--- /dev/null
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -0,0 +1,251 @@
+"""
+Python Environment API for the ML-Agents toolkit
+The aim of this API is to expose groups of similar Agents evolving in Unity
+to perform reinforcement learning on.
+There can be multiple groups of similar Agents (same observations and actions
+spaces) in the simulation. These groups are identified by a agent_group that
+corresponds to a single group of Agents in the simulation.
+For performance reasons, the data of each group of agents is processed in a
+batched manner. When retrieving the state of a group of Agents, said state
+contains the data for the whole group. Agents in these groups are identified
+by a unique int identifier that allows tracking of Agents accross simulation
+steps. Note that there is no guarantee that the number or order of the Agents
+in the state will be consistent accross simulation steps.
+A simulation steps corresponds to moving the simulation forward until at least
+one agent in the simulation sends its observations to Python again. Since
+Agents can request decisions at different frequencies, a simulation step does
+not necessarily correspond to a fixed simulation time increment.
+Changes from ML-Agents v0.11 :
+ - Step now takes no arguments and returns None.
+ - The data received from a step is no longer a dictionary of NamedTuple, the
+state data must now be retrieved manually from the environment object.
+ - Reset does no longer take any input arguments.
+ - Modifying properties of the environment besides actions is handled in
+SideChannels.
+"""
+
+from abc import ABC, abstractmethod
+from typing import List, NamedTuple, Tuple, Union, Optional
+import numpy as np
+from enum import Enum
+
+
+class StepResult(NamedTuple):
+    """
+    Contains the data a single Agent collected since the last
+    simulation step.
+     - obs is a list of numpy arrays observations collected by the group of
+    agent.
+     - reward is a float. Corresponds to the rewards collected by the agent
+     since the last simulation step.
+     - done is a bool. Is true if the Agent was terminated during the last
+     simulation step.
+     - max_step is a bool. Is true if the Agent reached its maximum number of
+     steps during the last simulation step.
+     - agent_id is an int and an unique identifier for the corresponding Agent.
+     - action_mask is an optional list of one dimensional array of booleans.
+     Each array corresponds to an action branch. Each array contains a mask
+     for each action of the branch. If true, the action is not available for
+     the agent during this simulation step.
+    """
+
+    obs: List[np.array]
+    reward: float
+    done: bool
+    max_step: bool
+    agent_id: int
+    action_mask: Optional[List[np.array]]
+
+
+class BatchedStepResult(NamedTuple):
+    """
+    Contains the data a group of similar Agents collected since the last
+    simulation step. Note that all Agents do not necessarily have new
+    information to send at each simulation step. Therefore, the ordering of
+    agents and the batch size of the BatchedStepResult are not fixed accross
+    simulation steps.
+     - obs is a list of numpy arrays observations collected by the group of
+    agent. The first dimension of the array corresponds to the batch size of
+    the group.
+     - reward is a float vector of length batch size. Corresponds to the
+     rewards collected by each agent since the last simulation step.
+     - done is an array of booleans of length batch size. Is true if the
+     associated Agent was terminated during the last simulation step.
+     - max_step is an array of booleans of length batch size. Is true if the
+     associated Agent reached its maximum number of steps during the last
+     simulation step.
+     - agent_id is an int vector of length batch size containing unique
+     identifier for the corresponding Agent. This is used to track Agents
+     accross simulation steps.
+     - action_mask is an optional list of two dimensional array of booleans.
+     Each array corresponds to an action branch. The first dimension of each
+     array is the batch size and the second contains a mask for each action of
+     the branch. If true, the action is not available for the agent during
+     this simulation step.
+    """
+
+    obs: List[np.array]
+    reward: np.array
+    done: np.array
+    max_step: np.array
+    agent_id: np.array
+    action_mask: Optional[List[np.array]]
+
+    def get_agent_step_result(self, agent_id: int) -> StepResult:
+        """
+        returns the step result for a specific agent.
+        :param agent_id: The id of the agent
+        :returns: obs, reward, done, agent_id and optional action mask for a
+        specific agent
+        """
+        try:
+            agent_index = np.where(self.agent_id == agent_id)[0][0]
+        except IndexError as ie:
+            raise IndexError(
+                "agent_id {} is not present in the BatchedStepResult".format(agent_id)
+            ) from ie
+        agent_obs = []
+        for batched_obs in self.obs:
+            agent_obs.append(batched_obs[agent_index])
+        agent_mask = None
+        if self.action_mask is not None:
+            agent_mask = []
+            for mask in self.action_mask:
+                agent_mask.append(mask[0])
+        return StepResult(
+            obs=agent_obs,
+            reward=self.reward[agent_index],
+            done=self.done[agent_index],
+            max_step=self.max_step[agent_index],
+            agent_id=agent_id,
+            action_mask=agent_mask,
+        )
+
+    @staticmethod
+    def empty(spec):
+        """
+        Returns an empty BatchedStepResult.
+        :param spec: The AgentGroupSpec for the BatchedStepResult
+        """
+        obs = []
+        for shape in spec.observation_shapes:
+            obs += [np.zeros((0,) + shape, dtype=np.float32)]
+        return BatchedStepResult(
+            obs=obs,
+            reward=np.zeros(0, dtype=np.float32),
+            done=np.zeros(0, dtype=np.bool),
+            max_step=np.zeros(0, dtype=np.bool),
+            agent_id=np.zeros(0, dtype=np.int32),
+            action_mask=None,
+        )
+
+    def n_agents(self) -> int:
+        return len(self.agent_id)
+
+
+class ActionType(Enum):
+    DISCRETE = 0
+    CONTINUOUS = 1
+
+
+class AgentGroupSpec(NamedTuple):
+    """
+    A NamedTuple to containing information about the observations and actions
+    spaces for a group of Agents.
+     - observation_shapes is a List of Tuples of int : Each Tuple corresponds
+     to an observation's dimensionsthe shape tuples have the same ordering as
+     the ordering of the BatchedStepResult.
+     - action_type is the type of data of the action. it can be discrete or
+     continuous. If discrete, the action tensors are expected to be int32. If
+     discrete, the actions are expected to be float32.
+     - action_shape is:
+       - An int in continuous action space corresponding to the number of
+     floats that constitute the action.
+       - A Tuple of int in discrete action space where each int corresponds to
+       the number of discrete actions available to the agent.
+    """
+
+    observation_shapes: List[Tuple]
+    action_type: ActionType
+    action_shape: Union[int, Tuple]
+
+
+class BaseEnv(ABC):
+    @abstractmethod
+    def step(self) -> None:
+        """
+        Signals the environment that it must move the simulation forward
+        by one step.
+        """
+        pass
+
+    @abstractmethod
+    def reset(self) -> None:
+        """
+        Signals the environment that it must reset the simulation.
+        """
+        pass
+
+    @abstractmethod
+    def close(self) -> None:
+        """
+        Signals the environment that it must close.
+        """
+        pass
+
+    @abstractmethod
+    def get_agent_groups(self) -> List[str]:
+        """
+        Returns the list of the agent group names present in the environment.
+        Agents grouped under the same group name have the same action and
+        observation specs are expected to behave similarly in the environment.
+        This list can grow with time as new policies are instantiated.
+        :return: the list of agent group names.
+        """
+        pass
+
+    @abstractmethod
+    def set_action(self, agent_group: str, action: np.array) -> None:
+        """
+        Sets the action for all of the agents in the simulation for the next
+        step. The Actions must be in the same order as the order received in
+        the step result.
+        :param agent_group: The name of the group the agents are part of
+        :param action: A two dimensional np.array corresponding to the action
+        (either int or float)
+        """
+        pass
+
+    @abstractmethod
+    def set_action_for_agent(
+        self, agent_group: str, agent_id: int, action: np.array
+    ) -> None:
+        """
+        Sets the action for one of the agents in the simulation for the next
+        step.
+        :param agent_group: The name of the group the agent is part of
+        :param agent_id: The id of the agent the action is set for
+        :param action: A two dimensional np.array corresponding to the action
+        (either int or float)
+        """
+        pass
+
+    @abstractmethod
+    def get_step_result(self, agent_group: str) -> BatchedStepResult:
+        """
+        Retrieves the observations of the agents that requested a step in the
+        simulation.
+        :param agent_group: The name of the group the agents are part of
+        :return: A BatchedStepResult NamedTuple containing the observations,
+        the rewards and the done flags for this group of agents.
+        """
+        pass
+
+    @abstractmethod
+    def get_agent_group_spec(self, agent_group: str) -> AgentGroupSpec:
+        """
+        Get the AgentGroupSpec corresponding to the agent group name
+        :param agent_group: The name of the group the agents are part of
+        :return: A AgentGroupSpec corresponding to that agent group name
+        """
+        pass
diff --git a/ml-agents-envs/mlagents/envs/base_unity_environment.py b/ml-agents-envs/mlagents/envs/base_unity_environment.py
deleted file mode 100644
index 6c88335d13..0000000000
--- a/ml-agents-envs/mlagents/envs/base_unity_environment.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Dict, Optional
-
-from mlagents.envs.brain import AllBrainInfo, BrainParameters
-
-
-class BaseUnityEnvironment(ABC):
-    @abstractmethod
-    def step(
-        self, vector_action: Optional[Dict] = None, value: Optional[Dict] = None
-    ) -> AllBrainInfo:
-        pass
-
-    @abstractmethod
-    def reset(self) -> AllBrainInfo:
-        pass
-
-    @property
-    @abstractmethod
-    def external_brains(self) -> Dict[str, BrainParameters]:
-        pass
-
-    @abstractmethod
-    def close(self):
-        pass
diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
index 20954931a1..54728518f5 100644
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
@@ -7,9 +7,14 @@
 from typing import Dict, List, Optional, Any
 
 from mlagents.envs.side_channel.side_channel import SideChannel
-from mlagents.envs.base_unity_environment import BaseUnityEnvironment
+
+from mlagents.envs.base_env import (
+    BaseEnv,
+    BatchedStepResult,
+    ActionType,
+    AgentGroupSpec,
+)
 from mlagents.envs.timers import timed, hierarchical_timer
-from .brain import AllBrainInfo, BrainInfo, BrainParameters
 from .exception import (
     UnityEnvironmentException,
     UnityCommunicationException,
@@ -17,6 +22,11 @@
     UnityTimeOutException,
 )
 
+from mlagents.envs.rpc_utils import (
+    agent_group_spec_from_proto,
+    batched_step_result_from_proto,
+)
+
 from mlagents.envs.communicator_objects.unity_rl_input_pb2 import UnityRLInputProto
 from mlagents.envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto
 from mlagents.envs.communicator_objects.agent_action_pb2 import AgentActionProto
@@ -36,7 +46,7 @@
 logger = logging.getLogger("mlagents.envs")
 
 
-class UnityEnvironment(BaseUnityEnvironment):
+class UnityEnvironment(BaseEnv):
     SCALAR_ACTION_TYPES = (int, np.int32, np.int64, float, np.float32, np.float64)
     SINGLE_BRAIN_ACTION_TYPES = SCALAR_ACTION_TYPES + (list, np.ndarray)
     API_VERSION = "API-12"
@@ -123,49 +133,16 @@ def __init__(
                 "{1}.\nPlease go to https://github.com/Unity-Technologies/ml-agents to download the latest version "
                 "of ML-Agents.".format(self._version_, self._unity_version)
             )
-        self._n_agents: Dict[str, int] = {}
+        self._env_state: Dict[str, BatchedStepResult] = {}
+        self._env_specs: Dict[str, AgentGroupSpec] = {}
+        self._env_actions: Dict[str, np.array] = {}
         self._is_first_message = True
-        self._academy_name = aca_params.name
-        self._log_path = aca_params.log_path
-        self._brains: Dict[str, BrainParameters] = {}
-        self._external_brain_names: List[str] = []
-        self._num_external_brains = 0
-        self._update_brain_parameters(aca_output)
-        logger.info(
-            "\n'{0}' started successfully!\n{1}".format(self._academy_name, str(self))
-        )
-
-    @property
-    def logfile_path(self):
-        return self._log_path
-
-    @property
-    def brains(self):
-        return self._brains
-
-    @property
-    def academy_name(self):
-        return self._academy_name
-
-    @property
-    def number_external_brains(self):
-        return self._num_external_brains
-
-    @property
-    def external_brain_names(self):
-        return self._external_brain_names
+        self._update_group_specs(aca_output)
 
     @staticmethod
     def get_communicator(worker_id, base_port, timeout_wait):
         return RpcCommunicator(worker_id, base_port, timeout_wait)
 
-    @property
-    def external_brains(self):
-        external_brains = {}
-        for brain_name in self.external_brain_names:
-            external_brains[brain_name] = self.brains[brain_name]
-        return external_brains
-
     def executable_launcher(self, file_name, docker_training, no_graphics, args):
         cwd = os.getcwd()
         file_name = (
@@ -279,156 +256,160 @@ def executable_launcher(self, file_name, docker_training, no_graphics, args):
                     shell=True,
                 )
 
-    def __str__(self):
-        return """Unity Academy name: {0}""".format(self._academy_name)
+    def _update_group_specs(self, output: UnityOutputProto) -> None:
+        init_output = output.rl_initialization_output
+        for brain_param in init_output.brain_parameters:
+            # Each BrainParameter in the rl_initialization_output should have at least one AgentInfo
+            # Get that agent, because we need some of its observations.
+            agent_infos = output.rl_output.agentInfos[brain_param.brain_name]
+            if agent_infos.value:
+                agent = agent_infos.value[0]
+                new_spec = agent_group_spec_from_proto(brain_param, agent)
+                self._env_specs[brain_param.brain_name] = new_spec
+                logger.info(f"Connected new brain:\n{brain_param.brain_name}")
 
-    def reset(self) -> AllBrainInfo:
+    def _update_state(self, output: UnityRLOutputProto) -> None:
         """
-        Sends a signal to reset the unity environment.
-        :return: AllBrainInfo  : A data structure corresponding to the initial reset state of the environment.
+        Collects experience information from all external brains in environment at current step.
         """
+        for brain_name in self._env_specs.keys():
+            if brain_name in output.agentInfos:
+                agent_info_list = output.agentInfos[brain_name].value
+                self._env_state[brain_name] = batched_step_result_from_proto(
+                    agent_info_list, self._env_specs[brain_name]
+                )
+            else:
+                self._env_state[brain_name] = BatchedStepResult.empty(
+                    self._env_specs[brain_name]
+                )
+        self._parse_side_channel_message(self.side_channels, output.side_channel)
+
+    @staticmethod
+    def _empty_action(spec: AgentGroupSpec, n_agents: int) -> np.array:
+        if spec.action_type == ActionType.DISCRETE:
+            action_size = np.sum(spec.action_shape)
+            return np.zeros((n_agents, action_size), dtype=np.int32)
+        else:
+            action_size = spec.action_shape
+            return np.zeros((n_agents, action_size), dtype=np.float32)
+
+    def reset(self) -> None:
         if self._loaded:
             outputs = self.communicator.exchange(self._generate_reset_input())
             if outputs is None:
                 raise UnityCommunicationException("Communicator has stopped.")
-            self._update_brain_parameters(outputs)
+            self._update_group_specs(outputs)
             rl_output = outputs.rl_output
-            s = self._get_state(rl_output)
-            for _b in self._external_brain_names:
-                self._n_agents[_b] = len(s[_b].agents)
+            self._update_state(rl_output)
             self._is_first_message = False
-            return s
+            self._env_actions.clear()
         else:
             raise UnityEnvironmentException("No Unity environment is loaded.")
 
     @timed
-    def step(
-        self,
-        vector_action: Dict[str, np.ndarray] = None,
-        value: Optional[Dict[str, np.ndarray]] = None,
-    ) -> AllBrainInfo:
-        """
-        Provides the environment with an action, moves the environment dynamics forward accordingly,
-        and returns observation, state, and reward information to the agent.
-        :param value: Value estimates provided by agents.
-        :param vector_action: Agent's vector action. Can be a scalar or vector of int/floats.
-        :param memory: Vector corresponding to memory used for recurrent policies.
-        :return: AllBrainInfo  : A Data structure corresponding to the new state of the environment.
-        """
+    def step(self) -> None:
         if self._is_first_message:
             return self.reset()
-        vector_action = {} if vector_action is None else vector_action
-        value = {} if value is None else value
-
-        # Check that environment is loaded, and episode is currently running.
         if not self._loaded:
             raise UnityEnvironmentException("No Unity environment is loaded.")
-        else:
-            if isinstance(vector_action, self.SINGLE_BRAIN_ACTION_TYPES):
-                if self._num_external_brains == 1:
-                    vector_action = {self._external_brain_names[0]: vector_action}
-                elif self._num_external_brains > 1:
-                    raise UnityActionException(
-                        "You have {0} brains, you need to feed a dictionary of brain names a keys, "
-                        "and vector_actions as values".format(self._num_external_brains)
-                    )
-                else:
-                    raise UnityActionException(
-                        "There are no external brains in the environment, "
-                        "step cannot take a vector_action input"
-                    )
-
-            if isinstance(value, self.SINGLE_BRAIN_ACTION_TYPES):
-                if self._num_external_brains == 1:
-                    value = {self._external_brain_names[0]: value}
-                elif self._num_external_brains > 1:
-                    raise UnityActionException(
-                        "You have {0} brains, you need to feed a dictionary of brain names as keys "
-                        "and state/action value estimates as values".format(
-                            self._num_external_brains
-                        )
-                    )
-                else:
-                    raise UnityActionException(
-                        "There are no external brains in the environment, "
-                        "step cannot take a value input"
-                    )
-
-            for brain_name in list(vector_action.keys()):
-                if brain_name not in self._external_brain_names:
-                    raise UnityActionException(
-                        "The name {0} does not correspond to an external brain "
-                        "in the environment".format(brain_name)
-                    )
-
-            for brain_name in self._external_brain_names:
-                n_agent = self._n_agents[brain_name]
-                if brain_name not in vector_action:
-                    if self._brains[brain_name].vector_action_space_type == "discrete":
-                        vector_action[brain_name] = (
-                            [0.0]
-                            * n_agent
-                            * len(self._brains[brain_name].vector_action_space_size)
-                        )
-                    else:
-                        vector_action[brain_name] = (
-                            [0.0]
-                            * n_agent
-                            * self._brains[brain_name].vector_action_space_size[0]
-                        )
-                else:
-                    vector_action[brain_name] = self._flatten(vector_action[brain_name])
-
-                discrete_check = (
-                    self._brains[brain_name].vector_action_space_type == "discrete"
+        # fill the blanks for missing actions
+        for group_name in self._env_specs:
+            if group_name not in self._env_actions:
+                n_agents = 0
+                if group_name in self._env_state:
+                    n_agents = self._env_state[group_name].n_agents()
+                self._env_actions[group_name] = self._empty_action(
+                    self._env_specs[group_name], n_agents
                 )
+        step_input = self._generate_step_input(self._env_actions)
+        with hierarchical_timer("communicator.exchange"):
+            outputs = self.communicator.exchange(step_input)
+        if outputs is None:
+            raise UnityCommunicationException("Communicator has stopped.")
+        self._update_group_specs(outputs)
+        rl_output = outputs.rl_output
+        self._update_state(rl_output)
+        self._env_actions.clear()
+
+    def get_agent_groups(self) -> List[str]:
+        return list(self._env_specs.keys())
+
+    def _assert_group_exists(self, agent_group: str) -> None:
+        if agent_group not in self._env_specs:
+            raise UnityActionException(
+                "The group {0} does not correspond to an existing agent group "
+                "in the environment".format(agent_group)
+            )
 
-                expected_discrete_size = n_agent * len(
-                    self._brains[brain_name].vector_action_space_size
+    def set_action(self, agent_group: str, action: np.array) -> None:
+        self._assert_group_exists(agent_group)
+        if agent_group not in self._env_state:
+            return
+        spec = self._env_specs[agent_group]
+        expected_a = (
+            spec.action_shape
+            if spec.action_type == ActionType.CONTINUOUS
+            else np.sum(spec.action_shape)
+        )
+        expected_type = (
+            np.float32 if spec.action_type == ActionType.CONTINUOUS else np.int32
+        )
+        expected_shape = (self._env_state[agent_group].n_agents(), expected_a)
+        if action.shape != expected_shape:
+            raise UnityActionException(
+                "The group {0} needs an input of dimension {1} but received input of dimension {2}".format(
+                    agent_group, expected_shape, action.shape
                 )
+            )
 
-                continuous_check = (
-                    self._brains[brain_name].vector_action_space_type == "continuous"
-                )
+        if action.dtype != expected_type:
+            raise UnityActionException(
+                "The group {0} needs to receive action of type {1} but received"
+                "actions of type {2}".format(agent_group, expected_type, action.dtype)
+            )
+        self._env_actions[agent_group] = action
 
-                expected_continuous_size = (
-                    self._brains[brain_name].vector_action_space_size[0] * n_agent
+    def set_action_for_agent(
+        self, agent_group: str, agent_id: int, action: np.array
+    ) -> None:
+        self._assert_group_exists(agent_group)
+        if agent_group not in self._env_state:
+            return
+        spec = self._env_specs[agent_group]
+        expected_shape = (
+            (
+                spec.action_shape
+                if spec.action_type == ActionType.CONTINUOUS
+                else np.sum(spec.action_shape)
+            ),
+        )
+        if action.shape != expected_shape:
+            raise UnityActionException(
+                "The Agent {0} in group {1} needs an input of dimension {2} but received input of dimension {3}".format(
+                    agent_id, agent_group, expected_shape, action.shape
+                )
+            )
+        if agent_group not in self._env_actions:
+            self._env_actions[agent_group] = self._empty_action(
+                spec, self._env_state[agent_group].n_agents()
+            )
+        try:
+            index = np.where(self._env_state[agent_group].agent_id == agent_id)[0][0]
+        except IndexError as ie:
+            raise IndexError(
+                "agent_id {} is did not request a decision at the previous step".format(
+                    agent_id
                 )
+            ) from ie
+        self._env_actions[agent_group][index] = action
 
-                if not (
-                    (
-                        discrete_check
-                        and len(vector_action[brain_name]) == expected_discrete_size
-                    )
-                    or (
-                        continuous_check
-                        and len(vector_action[brain_name]) == expected_continuous_size
-                    )
-                ):
-                    raise UnityActionException(
-                        "There was a mismatch between the provided action and "
-                        "the environment's expectation: "
-                        "The brain {0} expected {1} {2} action(s), but was provided: {3}".format(
-                            brain_name,
-                            str(expected_discrete_size)
-                            if discrete_check
-                            else str(expected_continuous_size),
-                            self._brains[brain_name].vector_action_space_type,
-                            str(vector_action[brain_name]),
-                        )
-                    )
+    def get_step_result(self, agent_group: str) -> BatchedStepResult:
+        self._assert_group_exists(agent_group)
+        return self._env_state[agent_group]
 
-            step_input = self._generate_step_input(vector_action, value)
-            with hierarchical_timer("communicator.exchange"):
-                outputs = self.communicator.exchange(step_input)
-            if outputs is None:
-                raise UnityCommunicationException("Communicator has stopped.")
-            self._update_brain_parameters(outputs)
-            rl_output = outputs.rl_output
-            state = self._get_state(rl_output)
-            for _b in self._external_brain_names:
-                self._n_agents[_b] = len(state[_b].agents)
-            return state
+    def get_agent_group_spec(self, agent_group: str) -> AgentGroupSpec:
+        self._assert_group_exists(agent_group)
+        return self._env_specs[agent_group]
 
     def close(self):
         """
@@ -478,20 +459,6 @@ def _flatten(cls, arr: Any) -> List[float]:
         arr = [float(x) for x in arr]
         return arr
 
-    def _get_state(self, output: UnityRLOutputProto) -> AllBrainInfo:
-        """
-        Collects experience information from all external brains in environment at current step.
-        :return: a dictionary of BrainInfo objects.
-        """
-        _data = {}
-        for brain_name in output.agentInfos:
-            agent_info_list = output.agentInfos[brain_name].value
-            _data[brain_name] = BrainInfo.from_agent_proto(
-                self.worker_id, agent_info_list, self.brains[brain_name]
-            )
-        self._parse_side_channel_message(self.side_channels, output.side_channel)
-        return _data
-
     @staticmethod
     def _parse_side_channel_message(
         side_channels: Dict[int, SideChannel], data: bytearray
@@ -533,38 +500,17 @@ def _generate_side_channel_data(side_channels: Dict[int, SideChannel]) -> bytear
             channel.message_queue = []
         return result
 
-    def _update_brain_parameters(self, output: UnityOutputProto) -> None:
-        init_output = output.rl_initialization_output
-
-        for brain_param in init_output.brain_parameters:
-            # Each BrainParameter in the rl_initialization_output should have at least one AgentInfo
-            # Get that agent, because we need some of its observations.
-            agent_infos = output.rl_output.agentInfos[brain_param.brain_name]
-            if agent_infos.value:
-                agent = agent_infos.value[0]
-                new_brain = BrainParameters.from_proto(brain_param, agent)
-                self._brains[brain_param.brain_name] = new_brain
-                logger.info(f"Connected new brain:\n{new_brain}")
-        self._external_brain_names = list(self._brains.keys())
-        self._num_external_brains = len(self._external_brain_names)
-
     @timed
     def _generate_step_input(
-        self, vector_action: Dict[str, np.ndarray], value: Dict[str, np.ndarray]
+        self, vector_action: Dict[str, np.ndarray]
     ) -> UnityInputProto:
         rl_in = UnityRLInputProto()
         for b in vector_action:
-            n_agents = self._n_agents[b]
+            n_agents = self._env_state[b].n_agents()
             if n_agents == 0:
                 continue
-            _a_s = len(vector_action[b]) // n_agents
             for i in range(n_agents):
-                action = AgentActionProto(
-                    vector_actions=vector_action[b][i * _a_s : (i + 1) * _a_s]
-                )
-                if b in value:
-                    if value[b] is not None:
-                        action.value = float(value[b][i])
+                action = AgentActionProto(vector_actions=vector_action[b][i])
                 rl_in.agent_actions[b].value.extend([action])
                 rl_in.command = 0
         rl_in.side_channel = bytes(self._generate_side_channel_data(self.side_channels))
diff --git a/ml-agents-envs/mlagents/envs/rpc_utils.py b/ml-agents-envs/mlagents/envs/rpc_utils.py
new file mode 100644
index 0000000000..a81880e050
--- /dev/null
+++ b/ml-agents-envs/mlagents/envs/rpc_utils.py
@@ -0,0 +1,133 @@
+from mlagents.envs.base_env import AgentGroupSpec, ActionType, BatchedStepResult
+from mlagents.envs.timers import hierarchical_timer, timed
+from mlagents.envs.communicator_objects.agent_info_pb2 import AgentInfoProto
+from mlagents.envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
+import logging
+import numpy as np
+import io
+from typing import List, Tuple
+from PIL import Image
+
+logger = logging.getLogger("mlagents.envs")
+
+
+def agent_group_spec_from_proto(
+    brain_param_proto: BrainParametersProto, agent_info: AgentInfoProto
+) -> AgentGroupSpec:
+    """
+    Converts brain parameter and agent info proto to AgentGroupSpec object.
+    :param brain_param_proto: protobuf object.
+    :param agent_info: protobuf object.
+    :return: AgentGroupSpec object.
+    """
+    observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
+    action_type = (
+        ActionType.DISCRETE
+        if brain_param_proto.vector_action_space_type == 0
+        else ActionType.CONTINUOUS
+    )
+    action_shape = None
+    if action_type == ActionType.CONTINUOUS:
+        action_shape = brain_param_proto.vector_action_size[0]
+    if action_type == ActionType.DISCRETE:
+        action_shape = tuple(brain_param_proto.vector_action_size)
+    return AgentGroupSpec(observation_shape, action_type, action_shape)
+
+
+@timed
+def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray:
+    """
+    Converts byte array observation image into numpy array, re-sizes it,
+    and optionally converts it to grey scale
+    :param gray_scale: Whether to convert the image to grayscale.
+    :param image_bytes: input byte array corresponding to image
+    :return: processed numpy array of observation from environment
+    """
+    with hierarchical_timer("image_decompress"):
+        image_bytearray = bytearray(image_bytes)
+        image = Image.open(io.BytesIO(image_bytearray))
+        # Normally Image loads lazily, this forces it to do loading in the timer scope.
+        image.load()
+    s = np.array(image) / 255.0
+    if gray_scale:
+        s = np.mean(s, axis=2)
+        s = np.reshape(s, [s.shape[0], s.shape[1], 1])
+    return s
+
+
+@timed
+def _process_visual_observation(
+    obs_index: int, shape: Tuple[int, int, int], agent_info_list: List[AgentInfoProto]
+) -> np.ndarray:
+    if len(agent_info_list) == 0:
+        return np.zeros((0, shape[0], shape[1], shape[2]))
+
+    gray_scale = shape[2] == 1
+    batched_visual = [
+        process_pixels(agent_obs.observations[obs_index].compressed_data, gray_scale)
+        for agent_obs in agent_info_list
+    ]
+    return np.array(batched_visual)
+
+
+@timed
+def _process_vector_observation(
+    obs_index: int, shape: Tuple[int, ...], agent_info_list: List[AgentInfoProto]
+) -> np.ndarray:
+    if len(agent_info_list) == 0:
+        return np.zeros((0, shape[0]), dtype=np.float32)
+    np_obs = np.array(
+        [
+            agent_obs.observations[obs_index].float_data.data
+            for agent_obs in agent_info_list
+        ]
+    )
+    # Check for NaNs or infs in the observations
+    # If there's a NaN in the observations, the dot() result will be NaN
+    # If there's an Inf (either sign) then the result will be Inf
+    # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background
+    # Note that a very large values (larger than sqrt(float_max)) will result in an Inf value here
+    # This is OK though, worst case it results in an unnecessary (but harmless) nan_to_num call.
+    d = np.mean(np_obs)
+    has_nan = np.isnan(d)
+    has_inf = not np.isfinite(d)
+
+    # In we have any NaN or Infs, use np.nan_to_num to replace these with finite values
+    if has_nan or has_inf:
+        np_obs = np.nan_to_num(np_obs)
+
+    if has_nan:
+        logger.warning(f"An agent had a NaN observation in the environment")
+    return np_obs
+
+
+@timed
+def batched_step_result_from_proto(
+    agent_info_list: List[AgentInfoProto], group_spec: AgentGroupSpec
+) -> BatchedStepResult:
+    obs_list: List[np.array] = []
+    for obs_index, obs_shape in enumerate(group_spec.observation_shapes):
+        is_visual = len(obs_shape) == 3
+        if is_visual:
+            obs_list += [
+                _process_visual_observation(obs_index, obs_shape, agent_info_list)
+            ]
+        else:
+            obs_list += [
+                _process_vector_observation(obs_index, obs_shape, agent_info_list)
+            ]
+    rewards = np.array(
+        [agent_info.reward for agent_info in agent_info_list], dtype=np.float32
+    )
+    done = np.array([agent_info.done for agent_info in agent_info_list], dtype=np.bool)
+    max_step = np.array(
+        [agent_info.max_step_reached for agent_info in agent_info_list], dtype=np.bool
+    )
+    agent_id = np.array(
+        [agent_info.id for agent_info in agent_info_list], dtype=np.int32
+    )
+    action_mask = None
+    if group_spec.action_type == ActionType.DISCRETE:
+        # TODO
+        print("ERROR")
+    return BatchedStepResult(obs_list, rewards, done, max_step, agent_id, action_mask)
diff --git a/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
new file mode 100644
index 0000000000..5fd1e56dff
--- /dev/null
+++ b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
@@ -0,0 +1,130 @@
+from typing import List, Tuple
+from mlagents.envs.communicator_objects.agent_info_pb2 import AgentInfoProto
+from mlagents.envs.communicator_objects.observation_pb2 import ObservationProto
+from mlagents.envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
+import numpy as np
+from mlagents.envs.base_env import AgentGroupSpec, ActionType
+import io
+from mlagents.envs.rpc_utils import (
+    agent_group_spec_from_proto,
+    process_pixels,
+    _process_visual_observation,
+    _process_vector_observation,
+    batched_step_result_from_proto,
+)
+from PIL import Image
+
+
+def generate_list_agent_proto(
+    n_agent: int, shape: List[Tuple[int]]
+) -> List[AgentInfoProto]:
+    result = []
+    for agent_index in range(n_agent):
+        ap = AgentInfoProto()
+        ap.reward = agent_index
+        ap.done = agent_index % 2 == 0
+        ap.max_step_reached = agent_index % 2 == 1
+        ap.id = agent_index
+        ap.action_mask.extend([True, False] * 5)
+        obs_proto_list = []
+        for obs_index in range(len(shape)):
+            obs_proto = ObservationProto()
+            obs_proto.shape.extend(list(shape[obs_index]))
+            obs_proto.compression_type = 0
+            obs_proto.float_data.data.extend([0.1] * np.prod(shape[obs_index]))
+            obs_proto_list.append(obs_proto)
+        ap.observations.extend(obs_proto_list)
+        result.append(ap)
+    return result
+
+
+def generate_compressed_data(in_array: np.array) -> bytes:
+    image_arr = (in_array * 255).astype(np.uint8)
+    im = Image.fromarray(image_arr, "RGB")
+    byteIO = io.BytesIO()
+    im.save(byteIO, format="PNG")
+    return byteIO.getvalue()
+
+
+def generate_compressed_proto_obs(in_array: np.array) -> ObservationProto:
+    obs_proto = ObservationProto()
+    obs_proto.compressed_data = generate_compressed_data(in_array)
+    obs_proto.compression_type = 1
+    obs_proto.shape.extend(in_array.shape)
+    return obs_proto
+
+
+def test_process_pixels():
+    in_array = np.random.rand(128, 128, 3)
+    byte_arr = generate_compressed_data(in_array)
+    out_array = process_pixels(byte_arr, False)
+    assert out_array.shape == (128, 128, 3)
+    assert np.sum(in_array - out_array) / np.prod(in_array.shape) < 0.01
+    assert (in_array - out_array < 0.01).all()
+
+
+def test_process_pixels_gray():
+    in_array = np.random.rand(128, 128, 3)
+    byte_arr = generate_compressed_data(in_array)
+    out_array = process_pixels(byte_arr, True)
+    assert out_array.shape == (128, 128, 1)
+    assert np.mean(in_array.mean(axis=2, keepdims=True) - out_array) < 0.01
+    assert (in_array.mean(axis=2, keepdims=True) - out_array < 0.01).all()
+
+
+def test_vector_observation():
+    n_agents = 10
+    shapes = [(3,), (4,)]
+    list_proto = generate_list_agent_proto(n_agents, shapes)
+    for obs_index, shape in enumerate(shapes):
+        arr = _process_vector_observation(obs_index, shape, list_proto)
+        assert list(arr.shape) == ([n_agents] + list(shape))
+        assert (np.abs(arr - 0.1) < 0.01).all()
+
+
+def test_process_visual_observation():
+    in_array_1 = np.random.rand(128, 128, 3)
+    proto_obs_1 = generate_compressed_proto_obs(in_array_1)
+    in_array_2 = np.random.rand(128, 128, 3)
+    proto_obs_2 = generate_compressed_proto_obs(in_array_2)
+    ap1 = AgentInfoProto()
+    ap1.observations.append(proto_obs_1)
+    ap2 = AgentInfoProto()
+    ap2.observations.append(proto_obs_2)
+    ap_list = [ap1, ap2]
+    arr = _process_visual_observation(0, (128, 128, 3), ap_list)
+    assert list(arr.shape) == [2, 128, 128, 3]
+    assert (arr[0, :, :, :] - in_array_1 < 0.01).all()
+    assert (arr[1, :, :, :] - in_array_2 < 0.01).all()
+
+
+def test_batched_step_result_from_proto():
+    n_agents = 10
+    shapes = [(3,), (4,)]
+    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
+    ap_list = generate_list_agent_proto(n_agents, shapes)
+    result = batched_step_result_from_proto(ap_list, group_spec)
+    assert list(result.reward) == list(range(n_agents))
+    assert list(result.agent_id) == list(range(n_agents))
+    for index in range(n_agents):
+        assert result.done[index] == (index % 2 == 0)
+        assert result.max_step[index] == (index % 2 == 1)
+    assert list(result.obs[0].shape) == [n_agents] + list(shapes[0])
+    assert list(result.obs[1].shape) == [n_agents] + list(shapes[1])
+
+
+def test_agent_group_spec_from_proto():
+    agent_proto = generate_list_agent_proto(1, [(3,), (4,)])[0]
+    bp = BrainParametersProto()
+    bp.vector_action_size.extend([5, 4])
+    bp.vector_action_space_type = 0
+    group_spec = agent_group_spec_from_proto(bp, agent_proto)
+    assert group_spec.action_type == ActionType.DISCRETE
+    assert group_spec.observation_shapes == [(3,), (4,)]
+    assert group_spec.action_shape == (5, 4)
+    bp = BrainParametersProto()
+    bp.vector_action_size.extend([6])
+    bp.vector_action_space_type = 1
+    group_spec = agent_group_spec_from_proto(bp, agent_proto)
+    assert group_spec.action_type == ActionType.CONTINUOUS
+    assert group_spec.action_shape == 6

From 7acd22762a697f84d233c2786b958d04d8d6f51c Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Tue, 3 Dec 2019 16:39:25 -0800
Subject: [PATCH 02/30] fixing ml-agents-envs tests

---
 ml-agents-envs/mlagents/envs/base_env.py      |   2 +-
 ml-agents-envs/mlagents/envs/brain.py         | 224 +-----------------
 ml-agents-envs/mlagents/envs/environment.py   |  14 +-
 .../mlagents/envs/simple_env_manager.py       |   8 +-
 .../mlagents/envs/subprocess_env_manager.py   |   8 +-
 .../mlagents/envs/tests/test_brain.py         |  73 ------
 .../mlagents/envs/tests/test_envs.py          |  79 +++---
 .../envs/tests/test_subprocess_env_manager.py |   4 +-
 8 files changed, 47 insertions(+), 365 deletions(-)
 delete mode 100644 ml-agents-envs/mlagents/envs/tests/test_brain.py

diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index 82078e7ee2..8b35b8f23e 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -205,7 +205,7 @@ def get_agent_groups(self) -> List[str]:
         pass
 
     @abstractmethod
-    def set_action(self, agent_group: str, action: np.array) -> None:
+    def set_actions(self, agent_group: str, action: np.array) -> None:
         """
         Sets the action for all of the agents in the simulation for the next
         step. The Actions must be in the same order as the order received in
diff --git a/ml-agents-envs/mlagents/envs/brain.py b/ml-agents-envs/mlagents/envs/brain.py
index 21e349fe28..de874e8adf 100644
--- a/ml-agents-envs/mlagents/envs/brain.py
+++ b/ml-agents-envs/mlagents/envs/brain.py
@@ -1,13 +1,5 @@
 import logging
-import numpy as np
-import io
-
-from mlagents.envs.communicator_objects.agent_info_pb2 import AgentInfoProto
-from mlagents.envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
-from mlagents.envs.communicator_objects.observation_pb2 import ObservationProto
-from mlagents.envs.timers import hierarchical_timer, timed
-from typing import Dict, List, NamedTuple, Optional
-from PIL import Image
+from typing import Dict, List, NamedTuple
 
 logger = logging.getLogger("mlagents.envs")
 
@@ -65,37 +57,6 @@ def __str__(self):
             ", ".join(self.vector_action_descriptions),
         )
 
-    @staticmethod
-    def from_proto(
-        brain_param_proto: BrainParametersProto, agent_info: AgentInfoProto
-    ) -> "BrainParameters":
-        """
-        Converts brain parameter proto to BrainParameter object.
-        :param brain_param_proto: protobuf object.
-        :return: BrainParameter object.
-        """
-        resolutions = [
-            CameraResolution(obs.shape[0], obs.shape[1], obs.shape[2])
-            for obs in agent_info.observations
-            if len(obs.shape) >= 3
-        ]
-
-        total_vector_obs = sum(
-            obs.shape[0] for obs in agent_info.observations if len(obs.shape) == 1
-        )
-
-        brain_params = BrainParameters(
-            brain_name=brain_param_proto.brain_name,
-            vector_observation_space_size=total_vector_obs,
-            camera_resolutions=resolutions,
-            vector_action_space_size=list(brain_param_proto.vector_action_size),
-            vector_action_descriptions=list(
-                brain_param_proto.vector_action_descriptions
-            ),
-            vector_action_space_type=brain_param_proto.vector_action_space_type,
-        )
-        return brain_params
-
 
 class BrainInfo:
     def __init__(
@@ -119,189 +80,6 @@ def __init__(
         self.agents = agents
         self.action_masks = action_mask
 
-    @staticmethod
-    def merge_memories(m1, m2, agents1, agents2):
-        if len(m1) == 0 and len(m2) != 0:
-            m1 = np.zeros((len(agents1), m2.shape[1]), dtype=np.float32)
-        elif len(m2) == 0 and len(m1) != 0:
-            m2 = np.zeros((len(agents2), m1.shape[1]), dtype=np.float32)
-        elif m2.shape[1] > m1.shape[1]:
-            new_m1 = np.zeros((m1.shape[0], m2.shape[1]), dtype=np.float32)
-            new_m1[0 : m1.shape[0], 0 : m1.shape[1]] = m1
-            return np.append(new_m1, m2, axis=0)
-        elif m1.shape[1] > m2.shape[1]:
-            new_m2 = np.zeros((m2.shape[0], m1.shape[1]), dtype=np.float32)
-            new_m2[0 : m2.shape[0], 0 : m2.shape[1]] = m2
-            return np.append(m1, new_m2, axis=0)
-        return np.append(m1, m2, axis=0)
-
-    @staticmethod
-    @timed
-    def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray:
-        """
-        Converts byte array observation image into numpy array, re-sizes it,
-        and optionally converts it to grey scale
-        :param gray_scale: Whether to convert the image to grayscale.
-        :param image_bytes: input byte array corresponding to image
-        :return: processed numpy array of observation from environment
-        """
-        with hierarchical_timer("image_decompress"):
-            image_bytearray = bytearray(image_bytes)
-            image = Image.open(io.BytesIO(image_bytearray))
-            # Normally Image loads lazily, this forces it to do loading in the timer scope.
-            image.load()
-        s = np.array(image) / 255.0
-        if gray_scale:
-            s = np.mean(s, axis=2)
-            s = np.reshape(s, [s.shape[0], s.shape[1], 1])
-        return s
-
-    @staticmethod
-    @timed
-    def from_agent_proto(
-        worker_id: int,
-        agent_info_list: List[AgentInfoProto],
-        brain_params: BrainParameters,
-    ) -> "BrainInfo":
-        """
-        Converts list of agent infos to BrainInfo.
-        """
-        vis_obs = BrainInfo._process_visual_observations(brain_params, agent_info_list)
-
-        total_num_actions = sum(brain_params.vector_action_space_size)
-        mask_actions = np.ones(
-            (len(agent_info_list), total_num_actions), dtype=np.float32
-        )
-        for agent_index, agent_info in enumerate(agent_info_list):
-            if agent_info.action_mask is not None:
-                if len(agent_info.action_mask) == total_num_actions:
-                    mask_actions[agent_index, :] = [
-                        0 if agent_info.action_mask[k] else 1
-                        for k in range(total_num_actions)
-                    ]
-        if any(np.isnan(x.reward) for x in agent_info_list):
-            logger.warning(
-                "An agent had a NaN reward for brain " + brain_params.brain_name
-            )
-
-        vector_obs = BrainInfo._process_vector_observations(
-            brain_params, agent_info_list
-        )
-
-        agents = [f"${worker_id}-{x.id}" for x in agent_info_list]
-        brain_info = BrainInfo(
-            visual_observation=vis_obs,
-            vector_observation=vector_obs,
-            reward=[x.reward if not np.isnan(x.reward) else 0 for x in agent_info_list],
-            agents=agents,
-            local_done=[x.done for x in agent_info_list],
-            max_reached=[x.max_step_reached for x in agent_info_list],
-            action_mask=mask_actions,
-        )
-        return brain_info
-
-    @staticmethod
-    def _process_visual_observations(
-        brain_params: BrainParameters, agent_info_list: List[AgentInfoProto]
-    ) -> List[np.ndarray]:
-
-        visual_observation_protos: List[List[ObservationProto]] = []
-
-        # Grab the visual observations - need this together so we can iterate with the camera observations
-        for agent in agent_info_list:
-            agent_vis: List[ObservationProto] = []
-            for proto_obs in agent.observations:
-                is_visual = len(proto_obs.shape) == 3
-                if is_visual:
-                    agent_vis.append(proto_obs)
-            visual_observation_protos.append(agent_vis)
-
-        vis_obs: List[np.ndarray] = []
-        for i in range(brain_params.number_visual_observations):
-            # TODO check compression type, handle uncompressed visuals
-            obs = [
-                BrainInfo.process_pixels(
-                    agent_obs[i].compressed_data,
-                    brain_params.camera_resolutions[i].gray_scale,
-                )
-                for agent_obs in visual_observation_protos
-            ]
-            vis_obs += [obs]
-        return vis_obs
-
-    @staticmethod
-    def _process_vector_observations(
-        brain_params: BrainParameters, agent_info_list: List[AgentInfoProto]
-    ) -> np.ndarray:
-        if len(agent_info_list) == 0:
-            vector_obs = np.zeros(
-                (0, brain_params.vector_observation_space_size), dtype=np.float32
-            )
-        else:
-            stacked_obs = []
-            has_nan = False
-            has_inf = False
-            for agent_info in agent_info_list:
-                vec_obs = [
-                    obs for obs in agent_info.observations if len(obs.shape) == 1
-                ]
-                # Concatenate vector obs
-                proto_vector_obs: List[float] = []
-                for vo in vec_obs:
-                    # TODO consider itertools.chain here
-                    proto_vector_obs.extend(vo.float_data.data)
-                np_obs = np.array(proto_vector_obs, dtype=np.float32)
-
-                # Check for NaNs or infs in the observations
-                # If there's a NaN in the observations, the dot() result will be NaN
-                # If there's an Inf (either sign) then the result will be Inf
-                # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background
-                # Note that a very large values (larger than sqrt(float_max)) will result in an Inf value here
-                # This is OK though, worst case it results in an unnecessary (but harmless) nan_to_num call.
-                d = np.dot(np_obs, np_obs)
-                has_nan = has_nan or np.isnan(d)
-                has_inf = has_inf or not np.isfinite(d)
-                stacked_obs.append(np_obs)
-            vector_obs = np.array(stacked_obs, dtype=np.float32)
-
-            # In we have any NaN or Infs, use np.nan_to_num to replace these with finite values
-            if has_nan or has_inf:
-                vector_obs = np.nan_to_num(vector_obs)
-
-            if has_nan:
-                logger.warning(
-                    f"An agent had a NaN observation for brain {brain_params.brain_name}"
-                )
-        return vector_obs
-
-
-def safe_concat_lists(l1: Optional[List], l2: Optional[List]) -> Optional[List]:
-    if l1 is None:
-        if l2 is None:
-            return None
-        else:
-            return l2.copy()
-    else:
-        if l2 is None:
-            return l1.copy()
-        else:
-            copy = l1.copy()
-            copy.extend(l2)
-            return copy
-
-
-def safe_concat_np_ndarray(
-    a1: Optional[np.ndarray], a2: Optional[np.ndarray]
-) -> Optional[np.ndarray]:
-    if a1 is not None and a1.size != 0:
-        if a2 is not None and a2.size != 0:
-            return np.append(a1, a2, axis=0)
-        else:
-            return a1.copy()
-    elif a2 is not None and a2.size != 0:
-        return a2.copy()
-    return None
-
 
 # Renaming of dictionary of brain name to BrainInfo for clarity
 AllBrainInfo = Dict[str, BrainInfo]
diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
index 54728518f5..e32ad4b1a4 100644
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
@@ -341,7 +341,7 @@ def _assert_group_exists(self, agent_group: str) -> None:
                 "in the environment".format(agent_group)
             )
 
-    def set_action(self, agent_group: str, action: np.array) -> None:
+    def set_actions(self, agent_group: str, action: np.array) -> None:
         self._assert_group_exists(agent_group)
         if agent_group not in self._env_state:
             return
@@ -361,12 +361,8 @@ def set_action(self, agent_group: str, action: np.array) -> None:
                     agent_group, expected_shape, action.shape
                 )
             )
-
         if action.dtype != expected_type:
-            raise UnityActionException(
-                "The group {0} needs to receive action of type {1} but received"
-                "actions of type {2}".format(agent_group, expected_type, action.dtype)
-            )
+            action = action.astype(expected_type)
         self._env_actions[agent_group] = action
 
     def set_action_for_agent(
@@ -389,6 +385,12 @@ def set_action_for_agent(
                     agent_id, agent_group, expected_shape, action.shape
                 )
             )
+        expected_type = (
+            np.float32 if spec.action_type == ActionType.CONTINUOUS else np.int32
+        )
+        if action.dtype != expected_type:
+            action = action.astype(expected_type)
+
         if agent_group not in self._env_actions:
             self._env_actions[agent_group] = self._empty_action(
                 spec, self._env_state[agent_group].n_agents()
diff --git a/ml-agents-envs/mlagents/envs/simple_env_manager.py b/ml-agents-envs/mlagents/envs/simple_env_manager.py
index 248416ea59..734be50111 100644
--- a/ml-agents-envs/mlagents/envs/simple_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/simple_env_manager.py
@@ -1,6 +1,6 @@
 from typing import Dict, List
 
-from mlagents.envs.base_unity_environment import BaseUnityEnvironment
+from mlagents.envs.base_env import BaseEnv
 from mlagents.envs.env_manager import EnvManager, EnvironmentStep
 from mlagents.envs.timers import timed
 from mlagents.envs.action_info import ActionInfo
@@ -10,13 +10,11 @@
 
 class SimpleEnvManager(EnvManager):
     """
-    Simple implementation of the EnvManager interface that only handles one BaseUnityEnvironment at a time.
+    Simple implementation of the EnvManager interface that only handles one BaseEnv at a time.
     This is generally only useful for testing; see SubprocessEnvManager for a production-quality implementation.
     """
 
-    def __init__(
-        self, env: BaseUnityEnvironment, float_prop_channel: FloatPropertiesChannel
-    ):
+    def __init__(self, env: BaseEnv, float_prop_channel: FloatPropertiesChannel):
         super().__init__()
         self.shared_float_properties = float_prop_channel
         self.env = env
diff --git a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
index 930559b9a7..8f71a23b35 100644
--- a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
@@ -7,7 +7,7 @@
 from multiprocessing import Process, Pipe, Queue
 from multiprocessing.connection import Connection
 from queue import Empty as EmptyQueueException
-from mlagents.envs.base_unity_environment import BaseUnityEnvironment
+from mlagents.envs.base_env import BaseEnv
 from mlagents.envs.env_manager import EnvManager, EnvironmentStep
 from mlagents.envs.timers import (
     TimerNode,
@@ -92,7 +92,7 @@ def worker(
     shared_float_properties = FloatPropertiesChannel()
     engine_configuration_channel = EngineConfigurationChannel()
     engine_configuration_channel.set_configuration(engine_configuration)
-    env: BaseUnityEnvironment = env_factory(
+    env: BaseEnv = env_factory(
         worker_id, [shared_float_properties, engine_configuration_channel]
     )
 
@@ -151,7 +151,7 @@ def _send_response(cmd_name, payload):
 class SubprocessEnvManager(EnvManager):
     def __init__(
         self,
-        env_factory: Callable[[int, List[SideChannel]], BaseUnityEnvironment],
+        env_factory: Callable[[int, List[SideChannel]], BaseEnv],
         engine_configuration: EngineConfig,
         n_env: int = 1,
     ):
@@ -169,7 +169,7 @@ def __init__(
     def create_worker(
         worker_id: int,
         step_queue: Queue,
-        env_factory: Callable[[int, List[SideChannel]], BaseUnityEnvironment],
+        env_factory: Callable[[int, List[SideChannel]], BaseEnv],
         engine_configuration: EngineConfig,
     ) -> UnityEnvWorker:
         parent_conn, child_conn = Pipe()
diff --git a/ml-agents-envs/mlagents/envs/tests/test_brain.py b/ml-agents-envs/mlagents/envs/tests/test_brain.py
deleted file mode 100644
index 3191300c68..0000000000
--- a/ml-agents-envs/mlagents/envs/tests/test_brain.py
+++ /dev/null
@@ -1,73 +0,0 @@
-from typing import List
-import logging
-import numpy as np
-from unittest import mock
-
-from mlagents.envs.communicator_objects.agent_info_pb2 import AgentInfoProto
-from mlagents.envs.communicator_objects.observation_pb2 import (
-    ObservationProto,
-    NONE as COMPRESSION_TYPE_NONE,
-)
-from mlagents.envs.brain import BrainInfo, BrainParameters
-
-test_brain = BrainParameters(
-    brain_name="test_brain",
-    vector_observation_space_size=3,
-    camera_resolutions=[],
-    vector_action_space_size=[],
-    vector_action_descriptions=[],
-    vector_action_space_type=1,
-)
-
-
-def _make_agent_info_proto(vector_obs: List[float]) -> AgentInfoProto:
-    obs = ObservationProto(
-        float_data=ObservationProto.FloatData(data=vector_obs),
-        shape=[len(vector_obs)],
-        compression_type=COMPRESSION_TYPE_NONE,
-    )
-    agent_info_proto = AgentInfoProto(observations=[obs])
-    return agent_info_proto
-
-
-@mock.patch.object(np, "nan_to_num", wraps=np.nan_to_num)
-@mock.patch.object(logging.Logger, "warning")
-def test_from_agent_proto_nan(mock_warning, mock_nan_to_num):
-    agent_info_proto = _make_agent_info_proto([1.0, 2.0, float("nan")])
-
-    brain_info = BrainInfo.from_agent_proto(1, [agent_info_proto], test_brain)
-    # nan gets set to 0.0
-    expected = [1.0, 2.0, 0.0]
-    assert (brain_info.vector_observations == expected).all()
-    mock_nan_to_num.assert_called()
-    mock_warning.assert_called()
-
-
-@mock.patch.object(np, "nan_to_num", wraps=np.nan_to_num)
-@mock.patch.object(logging.Logger, "warning")
-def test_from_agent_proto_inf(mock_warning, mock_nan_to_num):
-    agent_info_proto = _make_agent_info_proto([1.0, float("inf"), 0.0])
-
-    brain_info = BrainInfo.from_agent_proto(1, [agent_info_proto], test_brain)
-    # inf should get set to float32_max
-    float32_max = np.finfo(np.float32).max
-    expected = [1.0, float32_max, 0.0]
-    assert (brain_info.vector_observations == expected).all()
-    mock_nan_to_num.assert_called()
-    # We don't warn on inf, just NaN
-    mock_warning.assert_not_called()
-
-
-@mock.patch.object(np, "nan_to_num", wraps=np.nan_to_num)
-@mock.patch.object(logging.Logger, "warning")
-def test_from_agent_proto_fast_path(mock_warning, mock_nan_to_num):
-    """
-    Check that all finite values skips the nan_to_num call
-    """
-    agent_info_proto = _make_agent_info_proto([1.0, 2.0, 3.0])
-
-    brain_info = BrainInfo.from_agent_proto(1, [agent_info_proto], test_brain)
-    expected = [1.0, 2.0, 3.0]
-    assert (brain_info.vector_observations == expected).all()
-    mock_nan_to_num.assert_not_called()
-    mock_warning.assert_not_called()
diff --git a/ml-agents-envs/mlagents/envs/tests/test_envs.py b/ml-agents-envs/mlagents/envs/tests/test_envs.py
index 7b6c518727..6af986601f 100755
--- a/ml-agents-envs/mlagents/envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_envs.py
@@ -4,8 +4,8 @@
 import numpy as np
 
 from mlagents.envs.environment import UnityEnvironment
+from mlagents.envs.base_env import BatchedStepResult
 from mlagents.envs.exception import UnityEnvironmentException, UnityActionException
-from mlagents.envs.brain import BrainInfo
 from mlagents.envs.mock_communicator import MockCommunicator
 
 
@@ -22,7 +22,7 @@ def test_initialization(mock_communicator, mock_launcher):
         discrete_action=False, visual_inputs=0
     )
     env = UnityEnvironment(" ")
-    assert env.external_brain_names[0] == "RealFakeBrain"
+    assert env.get_agent_groups() == ["RealFakeBrain"]
     env.close()
 
 
@@ -33,24 +33,15 @@ def test_reset(mock_communicator, mock_launcher):
         discrete_action=False, visual_inputs=0
     )
     env = UnityEnvironment(" ")
-    brain = env.brains["RealFakeBrain"]
-    brain_info = env.reset()
+    spec = env.get_agent_group_spec("RealFakeBrain")
+    env.reset()
+    batched_step_result = env.get_step_result("RealFakeBrain")
     env.close()
-    assert isinstance(brain_info, dict)
-    assert isinstance(brain_info["RealFakeBrain"], BrainInfo)
-    assert isinstance(brain_info["RealFakeBrain"].visual_observations, list)
-    assert isinstance(brain_info["RealFakeBrain"].vector_observations, np.ndarray)
-    assert (
-        len(brain_info["RealFakeBrain"].visual_observations)
-        == brain.number_visual_observations
-    )
-    assert len(brain_info["RealFakeBrain"].vector_observations) == len(
-        brain_info["RealFakeBrain"].agents
-    )
-    assert (
-        len(brain_info["RealFakeBrain"].vector_observations[0])
-        == brain.vector_observation_space_size
-    )
+    assert isinstance(batched_step_result, BatchedStepResult)
+    assert len(spec.observation_shapes) == len(batched_step_result.obs)
+    n_agents = batched_step_result.n_agents()
+    for shape, obs in zip(spec.observation_shapes, batched_step_result.obs):
+        assert (n_agents,) + shape == obs.shape
 
 
 @mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@@ -60,40 +51,26 @@ def test_step(mock_communicator, mock_launcher):
         discrete_action=False, visual_inputs=0
     )
     env = UnityEnvironment(" ")
-    brain = env.brains["RealFakeBrain"]
-    brain_info = env.step()
-    brain_info = env.step(
-        [0]
-        * brain.vector_action_space_size[0]
-        * len(brain_info["RealFakeBrain"].agents)
-    )
+    spec = env.get_agent_group_spec("RealFakeBrain")
+    env.step()
+    batched_step_result = env.get_step_result("RealFakeBrain")
+    n_agents = batched_step_result.n_agents()
+    env.set_actions("RealFakeBrain", np.zeros((n_agents, spec.action_shape)))
+    env.step()
     with pytest.raises(UnityActionException):
-        env.step([0])
-    brain_info = env.step(
-        [-1]
-        * brain.vector_action_space_size[0]
-        * len(brain_info["RealFakeBrain"].agents)
-    )
-    env.close()
-    assert isinstance(brain_info, dict)
-    assert isinstance(brain_info["RealFakeBrain"], BrainInfo)
-    assert isinstance(brain_info["RealFakeBrain"].visual_observations, list)
-    assert isinstance(brain_info["RealFakeBrain"].vector_observations, np.ndarray)
-    assert (
-        len(brain_info["RealFakeBrain"].visual_observations)
-        == brain.number_visual_observations
-    )
-    assert len(brain_info["RealFakeBrain"].vector_observations) == len(
-        brain_info["RealFakeBrain"].agents
-    )
-    assert (
-        len(brain_info["RealFakeBrain"].vector_observations[0])
-        == brain.vector_observation_space_size
-    )
+        env.set_actions("RealFakeBrain", np.zeros((n_agents - 1, spec.action_shape)))
+    batched_step_result = env.get_step_result("RealFakeBrain")
+    n_agents = batched_step_result.n_agents()
+    env.set_actions("RealFakeBrain", -1 * np.ones((n_agents, spec.action_shape)))
+    env.step()
 
-    print("\n\n\n\n\n\n\n" + str(brain_info["RealFakeBrain"].local_done))
-    assert not brain_info["RealFakeBrain"].local_done[0]
-    assert brain_info["RealFakeBrain"].local_done[2]
+    env.close()
+    assert isinstance(batched_step_result, BatchedStepResult)
+    assert len(spec.observation_shapes) == len(batched_step_result.obs)
+    for shape, obs in zip(spec.observation_shapes, batched_step_result.obs):
+        assert (n_agents,) + shape == obs.shape
+    assert not batched_step_result.done[0]
+    assert batched_step_result.done[2]
 
 
 @mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
diff --git a/ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py b/ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py
index 44508131da..0571704447 100644
--- a/ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py
@@ -8,12 +8,12 @@
     EnvironmentResponse,
     StepResponse,
 )
-from mlagents.envs.base_unity_environment import BaseUnityEnvironment
+from mlagents.envs.base_env import BaseEnv
 from mlagents.envs.side_channel.engine_configuration_channel import EngineConfig
 
 
 def mock_env_factory(worker_id):
-    return mock.create_autospec(spec=BaseUnityEnvironment)
+    return mock.create_autospec(spec=BaseEnv)
 
 
 class MockEnvWorker:

From c49be07e50e7af294d7d933e060201afc1b84716 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Wed, 4 Dec 2019 10:42:32 -0800
Subject: [PATCH 03/30] Implementing action masks

---
 ml-agents-envs/mlagents/envs/rpc_utils.py     | 25 ++++++++-
 .../mlagents/envs/tests/test_rpc_utils.py     | 54 +++++++++++++++++++
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/rpc_utils.py b/ml-agents-envs/mlagents/envs/rpc_utils.py
index a81880e050..b70bfe9238 100644
--- a/ml-agents-envs/mlagents/envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents/envs/rpc_utils.py
@@ -128,6 +128,27 @@ def batched_step_result_from_proto(
     )
     action_mask = None
     if group_spec.action_type == ActionType.DISCRETE:
-        # TODO
-        print("ERROR")
+        if any([agent_info.action_mask is not None] for agent_info in agent_info_list):
+            n_agents = len(agent_info_list)
+            a_size = np.sum(group_spec.action_shape)
+            mask_matrix = np.ones((n_agents, a_size), dtype=np.bool)
+            for agent_index, agent_info in enumerate(agent_info_list):
+                if agent_info.action_mask is not None:
+                    if len(agent_info.action_mask) == a_size:
+                        mask_matrix[agent_index, :] = [
+                            False if agent_info.action_mask[k] else True
+                            for k in range(a_size)
+                        ]
+            action_mask = (1 - mask_matrix).astype(np.bool)
+            indices = _generate_split_indices(group_spec.action_shape)
+            action_mask = np.split(action_mask, indices, axis=1)
     return BatchedStepResult(obs_list, rewards, done, max_step, agent_id, action_mask)
+
+
+def _generate_split_indices(dims):
+    if len(dims) <= 1:
+        return ()
+    result = (dims[0],)
+    for i in range(len(dims) - 2):
+        result += (dims[i + 1] + result[i],)
+    return result
diff --git a/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
index 5fd1e56dff..5d2cbca223 100644
--- a/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
@@ -113,6 +113,60 @@ def test_batched_step_result_from_proto():
     assert list(result.obs[1].shape) == [n_agents] + list(shapes[1])
 
 
+def test_action_masking_discrete():
+    n_agents = 10
+    shapes = [(3,), (4,)]
+    group_spec = AgentGroupSpec(shapes, ActionType.DISCRETE, (7, 3))
+    ap_list = generate_list_agent_proto(n_agents, shapes)
+    result = batched_step_result_from_proto(ap_list, group_spec)
+    masks = result.action_mask
+    assert isinstance(masks, list)
+    assert len(masks) == 2
+    assert masks[0].shape == (n_agents, 7)
+    assert masks[1].shape == (n_agents, 3)
+    assert masks[0][0, 0]
+    assert not masks[1][0, 0]
+    assert masks[1][0, 1]
+
+
+def test_action_masking_discrete_1():
+    n_agents = 10
+    shapes = [(3,), (4,)]
+    group_spec = AgentGroupSpec(shapes, ActionType.DISCRETE, (10,))
+    ap_list = generate_list_agent_proto(n_agents, shapes)
+    result = batched_step_result_from_proto(ap_list, group_spec)
+    masks = result.action_mask
+    assert isinstance(masks, list)
+    assert len(masks) == 1
+    assert masks[0].shape == (n_agents, 10)
+    assert masks[0][0, 0]
+
+
+def test_action_masking_discrete_2():
+    n_agents = 10
+    shapes = [(3,), (4,)]
+    group_spec = AgentGroupSpec(shapes, ActionType.DISCRETE, (2, 2, 6))
+    ap_list = generate_list_agent_proto(n_agents, shapes)
+    result = batched_step_result_from_proto(ap_list, group_spec)
+    masks = result.action_mask
+    assert isinstance(masks, list)
+    assert len(masks) == 3
+    assert masks[0].shape == (n_agents, 2)
+    assert masks[1].shape == (n_agents, 2)
+    assert masks[2].shape == (n_agents, 6)
+    assert masks[0][0, 0]
+
+
+def test_action_masking_continuous():
+    n_agents = 10
+    shapes = [(3,), (4,)]
+    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 10)
+    ap_list = generate_list_agent_proto(n_agents, shapes)
+    result = batched_step_result_from_proto(ap_list, group_spec)
+    masks = result.action_mask
+    assert masks is None
+
+
 def test_agent_group_spec_from_proto():
     agent_proto = generate_list_agent_proto(1, [(3,), (4,)])[0]
     bp = BrainParametersProto()

From bcce783b4ec082685c9d9b1da25761dc6fc4a7a6 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Wed, 4 Dec 2019 13:17:58 -0800
Subject: [PATCH 04/30] training is fixed for 3DBall

---
 .../mlagents/envs/brain_conversion_utils.py   | 69 +++++++++++++++++++
 .../mlagents/envs/simple_env_manager.py       | 34 ++++++---
 .../mlagents/envs/subprocess_env_manager.py   | 33 +++++++--
 3 files changed, 120 insertions(+), 16 deletions(-)
 create mode 100644 ml-agents-envs/mlagents/envs/brain_conversion_utils.py

diff --git a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
new file mode 100644
index 0000000000..5bb5e59001
--- /dev/null
+++ b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
@@ -0,0 +1,69 @@
+from mlagents.envs.brain import BrainInfo, BrainParameters, CameraResolution
+from mlagents.envs.base_env import BatchedStepResult, AgentGroupSpec, ActionType
+import numpy as np
+from typing import List
+
+# BrainInfo(
+#         visual_observation,
+#         vector_observation,
+#         reward=None,
+#         agents=None,
+#         local_done=None,
+#         max_reached=None,
+#         action_mask=None,
+#     )
+# BrainParameters
+#         brain_name: str,
+#         vector_observation_space_size: int,
+#         camera_resolutions: List[CameraResolution],
+#         vector_action_space_size: List[int],
+#         vector_action_descriptions: List[str],
+#         vector_action_space_type: int,
+#     )
+
+
+def step_result_to_brain_info(
+    step_result: BatchedStepResult, group_spec: AgentGroupSpec
+) -> BrainInfo:
+    n_agents = step_result.n_agents()
+    vis_obs_indices = []
+    vec_obs_indices = []
+    for index, observation in enumerate(step_result.obs):
+        if len(observation.shape) == 2:
+            vec_obs_indices.append(index)
+        if len(observation.shape) == 4:
+            vis_obs_indices.append(index)
+    vec_obs = np.concatenate([step_result.obs[i] for i in vec_obs_indices], axis=1)
+    vis_obs = [step_result.obs[i] for i in vis_obs_indices]
+    mask = np.ones((n_agents, np.sum(group_spec.action_shape)))
+    if step_result.action_mask is not None:
+        mask = 1 - np.concatenate(step_result.action_mask, axis=1)
+    return BrainInfo(
+        vis_obs,
+        vec_obs,
+        list(step_result.reward),
+        list(step_result.agent_id),
+        list(step_result.done),
+        list(step_result.max_step),
+        mask,
+    )
+
+
+def group_spec_to_brain_parameters(
+    name: str, group_spec: AgentGroupSpec
+) -> BrainParameters:
+    vec_size = np.sum(
+        [shape[0] for shape in group_spec.observation_shapes if len(shape) == 1]
+    )
+    vis_sizes = [shape for shape in group_spec.observation_shapes if len(shape) == 3]
+    cam_res = [CameraResolution(s[0], s[1], s[2]) for s in vis_sizes]
+    a_size: List[int] = []
+    if group_spec.action_type == ActionType.DISCRETE:
+        a_size += list(group_spec.action_shape)
+        vector_action_space_type = 0
+    else:
+        a_size += [group_spec.action_shape]
+        vector_action_space_type = 1
+    return BrainParameters(
+        name, vec_size, cam_res, a_size, [], vector_action_space_type
+    )
diff --git a/ml-agents-envs/mlagents/envs/simple_env_manager.py b/ml-agents-envs/mlagents/envs/simple_env_manager.py
index 734be50111..928248490c 100644
--- a/ml-agents-envs/mlagents/envs/simple_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/simple_env_manager.py
@@ -4,8 +4,12 @@
 from mlagents.envs.env_manager import EnvManager, EnvironmentStep
 from mlagents.envs.timers import timed
 from mlagents.envs.action_info import ActionInfo
-from mlagents.envs.brain import BrainParameters
+from mlagents.envs.brain import BrainParameters, AllBrainInfo
 from mlagents.envs.side_channel.float_properties_channel import FloatPropertiesChannel
+from mlagents.envs.brain_conversion_utils import (
+    step_result_to_brain_info,
+    group_spec_to_brain_parameters,
+)
 
 
 class SimpleEnvManager(EnvManager):
@@ -22,16 +26,13 @@ def __init__(self, env: BaseEnv, float_prop_channel: FloatPropertiesChannel):
         self.previous_all_action_info: Dict[str, ActionInfo] = {}
 
     def step(self) -> List[EnvironmentStep]:
-
         all_action_info = self._take_step(self.previous_step)
         self.previous_all_action_info = all_action_info
 
-        actions = {}
-        values = {}
         for brain_name, action_info in all_action_info.items():
-            actions[brain_name] = action_info.action
-            values[brain_name] = action_info.value
-        all_brain_info = self.env.step(vector_action=actions, value=values)
+            self.env.set_actions(brain_name, action_info.action)
+        self.env.step()
+        all_brain_info = self._generate_all_brain_info()
         step_brain_info = all_brain_info
 
         step_info = EnvironmentStep(
@@ -48,13 +49,19 @@ def reset(
         if config is not None:
             for k, v in config.items():
                 self.shared_float_properties.set_property(k, v)
-        all_brain_info = self.env.reset()
+        self.env.reset()
+        all_brain_info = self._generate_all_brain_info()
         self.previous_step = EnvironmentStep(None, all_brain_info, None)
         return [self.previous_step]
 
     @property
     def external_brains(self) -> Dict[str, BrainParameters]:
-        return self.env.external_brains
+        result = {}
+        for brain_name in self.env.get_agent_groups():
+            result[brain_name] = group_spec_to_brain_parameters(
+                brain_name, self.env.get_agent_group_spec(brain_name)
+            )
+        return result
 
     @property
     def get_properties(self) -> Dict[str, float]:
@@ -74,3 +81,12 @@ def _take_step(self, last_step: EnvironmentStep) -> Dict[str, ActionInfo]:
                 brain_info
             )
         return all_action_info
+
+    def _generate_all_brain_info(self) -> AllBrainInfo:
+        all_brain_info = {}
+        for brain_name in self.env.get_agent_groups():
+            all_brain_info[brain_name] = step_result_to_brain_info(
+                self.env.get_step_result(brain_name),
+                self.env.get_agent_group_spec(brain_name),
+            )
+        return all_brain_info
diff --git a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
index 8f71a23b35..6cdb0bedda 100644
--- a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
@@ -24,6 +24,10 @@
     EngineConfig,
 )
 from mlagents.envs.side_channel.side_channel import SideChannel
+from mlagents.envs.brain_conversion_utils import (
+    step_result_to_brain_info,
+    group_spec_to_brain_parameters,
+)
 
 logger = logging.getLogger("mlagents.envs")
 
@@ -99,17 +103,31 @@ def worker(
     def _send_response(cmd_name, payload):
         parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))
 
+    def _generate_all_brain_info() -> AllBrainInfo:
+        all_brain_info = {}
+        for brain_name in env.get_agent_groups():
+            all_brain_info[brain_name] = step_result_to_brain_info(
+                env.get_step_result(brain_name), env.get_agent_group_spec(brain_name)
+            )
+        return all_brain_info
+
+    def external_brains():
+        result = {}
+        for brain_name in env.get_agent_groups():
+            result[brain_name] = group_spec_to_brain_parameters(
+                brain_name, env.get_agent_group_spec(brain_name)
+            )
+        return result
+
     try:
         while True:
             cmd: EnvironmentCommand = parent_conn.recv()
             if cmd.name == "step":
                 all_action_info = cmd.payload
-                actions = {}
-                values = {}
                 for brain_name, action_info in all_action_info.items():
-                    actions[brain_name] = action_info.action
-                    values[brain_name] = action_info.value
-                all_brain_info = env.step(vector_action=actions, value=values)
+                    env.set_actions(brain_name, action_info.action)
+                env.step()
+                all_brain_info = _generate_all_brain_info()
                 # The timers in this process are independent from all the processes and the "main" process
                 # So after we send back the root timer, we can safely clear them.
                 # Note that we could randomly return timers a fraction of the time if we wanted to reduce
@@ -119,7 +137,7 @@ def _send_response(cmd_name, payload):
                 step_queue.put(EnvironmentResponse("step", worker_id, step_response))
                 reset_timers()
             elif cmd.name == "external_brains":
-                _send_response("external_brains", env.external_brains)
+                _send_response("external_brains", external_brains())
             elif cmd.name == "get_properties":
                 reset_params = {}
                 for k in shared_float_properties.list_properties():
@@ -129,7 +147,8 @@ def _send_response(cmd_name, payload):
             elif cmd.name == "reset":
                 for k, v in cmd.payload.items():
                     shared_float_properties.set_property(k, v)
-                all_brain_info = env.reset()
+                env.reset()
+                all_brain_info = _generate_all_brain_info()
                 _send_response("reset", all_brain_info)
             elif cmd.name == "close":
                 break

From 942c0d27083c8dfcd2dc09604fb00c6865a38bfd Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Wed, 4 Dec 2019 14:56:28 -0800
Subject: [PATCH 05/30] Tests all fixed, gym is broken and missing
 documentation changes

---
 ml-agents-envs/mlagents/envs/brain.py         | 173 ++++++++++++++++++
 .../mlagents/envs/brain_conversion_utils.py   |  30 +--
 ml-agents-envs/mlagents/envs/environment.py   |   2 +-
 .../mlagents/envs/subprocess_env_manager.py   |   7 +-
 ml-agents/mlagents/trainers/learn.py          |   4 +-
 ml-agents/mlagents/trainers/tests/test_bc.py  |  23 ++-
 ml-agents/mlagents/trainers/tests/test_ppo.py |  36 ++--
 .../mlagents/trainers/tests/test_simple_rl.py | 112 +++++-------
 8 files changed, 280 insertions(+), 107 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/brain.py b/ml-agents-envs/mlagents/envs/brain.py
index de874e8adf..14cd4f15bd 100644
--- a/ml-agents-envs/mlagents/envs/brain.py
+++ b/ml-agents-envs/mlagents/envs/brain.py
@@ -1,5 +1,13 @@
 import logging
+import numpy as np
+import io
+
+from mlagents.envs.communicator_objects.agent_info_pb2 import AgentInfoProto
+from mlagents.envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
+from mlagents.envs.communicator_objects.observation_pb2 import ObservationProto
+from mlagents.envs.timers import hierarchical_timer, timed
 from typing import Dict, List, NamedTuple
+from PIL import Image
 
 logger = logging.getLogger("mlagents.envs")
 
@@ -57,6 +65,37 @@ def __str__(self):
             ", ".join(self.vector_action_descriptions),
         )
 
+    @staticmethod
+    def from_proto(
+        brain_param_proto: BrainParametersProto, agent_info: AgentInfoProto
+    ) -> "BrainParameters":
+        """
+        Converts brain parameter proto to BrainParameter object.
+        :param brain_param_proto: protobuf object.
+        :return: BrainParameter object.
+        """
+        resolutions = [
+            CameraResolution(obs.shape[0], obs.shape[1], obs.shape[2])
+            for obs in agent_info.observations
+            if len(obs.shape) >= 3
+        ]
+
+        total_vector_obs = sum(
+            obs.shape[0] for obs in agent_info.observations if len(obs.shape) == 1
+        )
+
+        brain_params = BrainParameters(
+            brain_name=brain_param_proto.brain_name,
+            vector_observation_space_size=total_vector_obs,
+            camera_resolutions=resolutions,
+            vector_action_space_size=list(brain_param_proto.vector_action_size),
+            vector_action_descriptions=list(
+                brain_param_proto.vector_action_descriptions
+            ),
+            vector_action_space_type=brain_param_proto.vector_action_space_type,
+        )
+        return brain_params
+
 
 class BrainInfo:
     def __init__(
@@ -80,6 +119,140 @@ def __init__(
         self.agents = agents
         self.action_masks = action_mask
 
+    @staticmethod
+    @timed
+    def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray:
+        """
+        Converts byte array observation image into numpy array, re-sizes it,
+        and optionally converts it to grey scale
+        :param gray_scale: Whether to convert the image to grayscale.
+        :param image_bytes: input byte array corresponding to image
+        :return: processed numpy array of observation from environment
+        """
+        with hierarchical_timer("image_decompress"):
+            image_bytearray = bytearray(image_bytes)
+            image = Image.open(io.BytesIO(image_bytearray))
+            # Normally Image loads lazily, this forces it to do loading in the timer scope.
+            image.load()
+        s = np.array(image) / 255.0
+        if gray_scale:
+            s = np.mean(s, axis=2)
+            s = np.reshape(s, [s.shape[0], s.shape[1], 1])
+        return s
+
+    @staticmethod
+    def from_agent_proto(
+        worker_id: int,
+        agent_info_list: List[AgentInfoProto],
+        brain_params: BrainParameters,
+    ) -> "BrainInfo":
+        """
+        Converts list of agent infos to BrainInfo.
+        """
+        vis_obs = BrainInfo._process_visual_observations(brain_params, agent_info_list)
+
+        total_num_actions = sum(brain_params.vector_action_space_size)
+        mask_actions = np.ones((len(agent_info_list), total_num_actions))
+        for agent_index, agent_info in enumerate(agent_info_list):
+            if agent_info.action_mask is not None:
+                if len(agent_info.action_mask) == total_num_actions:
+                    mask_actions[agent_index, :] = [
+                        0 if agent_info.action_mask[k] else 1
+                        for k in range(total_num_actions)
+                    ]
+        if any(np.isnan(x.reward) for x in agent_info_list):
+            logger.warning(
+                "An agent had a NaN reward for brain " + brain_params.brain_name
+            )
+
+        vector_obs = BrainInfo._process_vector_observations(
+            brain_params, agent_info_list
+        )
+
+        agents = [f"${worker_id}-{x.id}" for x in agent_info_list]
+        brain_info = BrainInfo(
+            visual_observation=vis_obs,
+            vector_observation=vector_obs,
+            reward=[x.reward if not np.isnan(x.reward) else 0 for x in agent_info_list],
+            agents=agents,
+            local_done=[x.done for x in agent_info_list],
+            max_reached=[x.max_step_reached for x in agent_info_list],
+            action_mask=mask_actions,
+        )
+        return brain_info
+
+    @staticmethod
+    def _process_visual_observations(
+        brain_params: BrainParameters, agent_info_list: List[AgentInfoProto]
+    ) -> List[np.ndarray]:
+
+        visual_observation_protos: List[List[ObservationProto]] = []
+
+        # Grab the visual observations - need this together so we can iterate with the camera observations
+        for agent in agent_info_list:
+            agent_vis: List[ObservationProto] = []
+            for proto_obs in agent.observations:
+                is_visual = len(proto_obs.shape) == 3
+                if is_visual:
+                    agent_vis.append(proto_obs)
+            visual_observation_protos.append(agent_vis)
+
+        vis_obs: List[np.ndarray] = []
+        for i in range(brain_params.number_visual_observations):
+            # TODO check compression type, handle uncompressed visuals
+            obs = [
+                BrainInfo.process_pixels(
+                    agent_obs[i].compressed_data,
+                    brain_params.camera_resolutions[i].gray_scale,
+                )
+                for agent_obs in visual_observation_protos
+            ]
+            vis_obs += [obs]
+        return vis_obs
+
+    @staticmethod
+    def _process_vector_observations(
+        brain_params: BrainParameters, agent_info_list: List[AgentInfoProto]
+    ) -> np.ndarray:
+        if len(agent_info_list) == 0:
+            vector_obs = np.zeros((0, brain_params.vector_observation_space_size))
+        else:
+            stacked_obs = []
+            has_nan = False
+            has_inf = False
+            for agent_info in agent_info_list:
+                vec_obs = [
+                    obs for obs in agent_info.observations if len(obs.shape) == 1
+                ]
+                # Concatenate vector obs
+                proto_vector_obs: List[float] = []
+                for vo in vec_obs:
+                    # TODO consider itertools.chain here
+                    proto_vector_obs.extend(vo.float_data.data)
+                np_obs = np.array(proto_vector_obs)
+
+                # Check for NaNs or infs in the observations
+                # If there's a NaN in the observations, the dot() result will be NaN
+                # If there's an Inf (either sign) then the result will be Inf
+                # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background
+                # Note that a very large values (larger than sqrt(float_max)) will result in an Inf value here
+                # This is OK though, worst case it results in an unnecessary (but harmless) nan_to_num call.
+                d = np.dot(np_obs, np_obs)
+                has_nan = has_nan or np.isnan(d)
+                has_inf = has_inf or not np.isfinite(d)
+                stacked_obs.append(np_obs)
+            vector_obs = np.array(stacked_obs)
+
+            # In we have any NaN or Infs, use np.nan_to_num to replace these with finite values
+            if has_nan or has_inf:
+                vector_obs = np.nan_to_num(vector_obs)
+
+            if has_nan:
+                logger.warning(
+                    f"An agent had a NaN observation for brain {brain_params.brain_name}"
+                )
+        return vector_obs
+
 
 # Renaming of dictionary of brain name to BrainInfo for clarity
 AllBrainInfo = Dict[str, BrainInfo]
diff --git a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
index 5bb5e59001..9e211bd714 100644
--- a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
+++ b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
@@ -3,27 +3,11 @@
 import numpy as np
 from typing import List
 
-# BrainInfo(
-#         visual_observation,
-#         vector_observation,
-#         reward=None,
-#         agents=None,
-#         local_done=None,
-#         max_reached=None,
-#         action_mask=None,
-#     )
-# BrainParameters
-#         brain_name: str,
-#         vector_observation_space_size: int,
-#         camera_resolutions: List[CameraResolution],
-#         vector_action_space_size: List[int],
-#         vector_action_descriptions: List[str],
-#         vector_action_space_type: int,
-#     )
-
 
 def step_result_to_brain_info(
-    step_result: BatchedStepResult, group_spec: AgentGroupSpec
+    step_result: BatchedStepResult,
+    group_spec: AgentGroupSpec,
+    agent_id_prefix: int = None,
 ) -> BrainInfo:
     n_agents = step_result.n_agents()
     vis_obs_indices = []
@@ -38,11 +22,17 @@ def step_result_to_brain_info(
     mask = np.ones((n_agents, np.sum(group_spec.action_shape)))
     if step_result.action_mask is not None:
         mask = 1 - np.concatenate(step_result.action_mask, axis=1)
+    if agent_id_prefix is None:
+        agent_ids = list(step_result.agent_id)
+    else:
+        agent_ids = [
+            f"${agent_id_prefix}-{ag_id}" for ag_id in list(step_result.agent_id)
+        ]
     return BrainInfo(
         vis_obs,
         vec_obs,
         list(step_result.reward),
-        list(step_result.agent_id),
+        agent_ids,
         list(step_result.done),
         list(step_result.max_step),
         mask,
diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
index e32ad4b1a4..34ea4fe458 100644
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
@@ -349,7 +349,7 @@ def set_actions(self, agent_group: str, action: np.array) -> None:
         expected_a = (
             spec.action_shape
             if spec.action_type == ActionType.CONTINUOUS
-            else np.sum(spec.action_shape)
+            else len(spec.action_shape)
         )
         expected_type = (
             np.float32 if spec.action_type == ActionType.CONTINUOUS else np.int32
diff --git a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
index 6cdb0bedda..e2c367d39c 100644
--- a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
@@ -107,7 +107,9 @@ def _generate_all_brain_info() -> AllBrainInfo:
         all_brain_info = {}
         for brain_name in env.get_agent_groups():
             all_brain_info[brain_name] = step_result_to_brain_info(
-                env.get_step_result(brain_name), env.get_agent_group_spec(brain_name)
+                env.get_step_result(brain_name),
+                env.get_agent_group_spec(brain_name),
+                worker_id,
             )
         return all_brain_info
 
@@ -125,7 +127,8 @@ def external_brains():
             if cmd.name == "step":
                 all_action_info = cmd.payload
                 for brain_name, action_info in all_action_info.items():
-                    env.set_actions(brain_name, action_info.action)
+                    if len(action_info.action) != 0:
+                        env.set_actions(brain_name, action_info.action)
                 env.step()
                 all_brain_info = _generate_all_brain_info()
                 # The timers in this process are independent from all the processes and the "main" process
diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index 0d14555a47..c94d9ddb99 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -20,7 +20,7 @@
 from mlagents.envs.environment import UnityEnvironment
 from mlagents.envs.sampler_class import SamplerManager
 from mlagents.envs.exception import SamplerException
-from mlagents.envs.base_unity_environment import BaseUnityEnvironment
+from mlagents.envs.base_env import BaseEnv
 from mlagents.envs.subprocess_env_manager import SubprocessEnvManager
 from mlagents.envs.side_channel.side_channel import SideChannel
 from mlagents.envs.side_channel.engine_configuration_channel import EngineConfig
@@ -371,7 +371,7 @@ def create_environment_factory(
     seed: Optional[int],
     start_port: int,
     env_args: Optional[List[str]],
-) -> Callable[[int, List[SideChannel]], BaseUnityEnvironment]:
+) -> Callable[[int, List[SideChannel]], BaseEnv]:
     if env_path is not None:
         # Strip out executable extensions if passed
         env_path = (
diff --git a/ml-agents/mlagents/trainers/tests/test_bc.py b/ml-agents/mlagents/trainers/tests/test_bc.py
index 940e39e65f..3b98f797d2 100644
--- a/ml-agents/mlagents/trainers/tests/test_bc.py
+++ b/ml-agents/mlagents/trainers/tests/test_bc.py
@@ -10,9 +10,14 @@
 import mlagents.trainers.tests.mock_brain as mb
 from mlagents.trainers.bc.policy import BCPolicy
 from mlagents.trainers.bc.offline_trainer import BCTrainer
-from mlagents.envs.environment import UnityEnvironment
+
 from mlagents.envs.mock_communicator import MockCommunicator
 from mlagents.trainers.tests.mock_brain import make_brain_parameters
+from mlagents.envs.environment import UnityEnvironment
+from mlagents.envs.brain_conversion_utils import (
+    step_result_to_brain_info,
+    group_spec_to_brain_parameters,
+)
 
 
 @pytest.fixture
@@ -122,16 +127,20 @@ def test_bc_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
         discrete_action=False, visual_inputs=0
     )
     env = UnityEnvironment(" ")
-    brain_infos = env.reset()
-    brain_info = brain_infos[env.external_brain_names[0]]
+    env.reset()
+    brain_name = env.get_agent_groups()[0]
+    brain_info = step_result_to_brain_info(
+        env.get_step_result(brain_name), env.get_agent_group_spec(brain_name)
+    )
+    brain_params = group_spec_to_brain_parameters(
+        brain_name, env.get_agent_group_spec(brain_name)
+    )
 
     trainer_parameters = dummy_config
-    model_path = env.external_brain_names[0]
+    model_path = brain_name
     trainer_parameters["model_path"] = model_path
     trainer_parameters["keep_checkpoints"] = 3
-    policy = BCPolicy(
-        0, env.brains[env.external_brain_names[0]], trainer_parameters, False
-    )
+    policy = BCPolicy(0, brain_params, trainer_parameters, False)
     run_out = policy.evaluate(brain_info)
     assert run_out["action"].shape == (3, 2)
 
diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py
index 2207a37a58..3bd22ecfc3 100644
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
@@ -16,6 +16,10 @@
 from mlagents.envs.mock_communicator import MockCommunicator
 from mlagents.trainers.tests import mock_brain as mb
 from mlagents.trainers.tests.mock_brain import make_brain_parameters
+from mlagents.envs.brain_conversion_utils import (
+    step_result_to_brain_info,
+    group_spec_to_brain_parameters,
+)
 
 
 @pytest.fixture
@@ -66,16 +70,20 @@ def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
         discrete_action=False, visual_inputs=0
     )
     env = UnityEnvironment(" ")
-    brain_infos = env.reset()
-    brain_info = brain_infos[env.external_brain_names[0]]
+    env.reset()
+    brain_name = env.get_agent_groups()[0]
+    brain_info = step_result_to_brain_info(
+        env.get_step_result(brain_name), env.get_agent_group_spec(brain_name)
+    )
+    brain_params = group_spec_to_brain_parameters(
+        brain_name, env.get_agent_group_spec(brain_name)
+    )
 
     trainer_parameters = dummy_config
-    model_path = env.external_brain_names[0]
+    model_path = brain_name
     trainer_parameters["model_path"] = model_path
     trainer_parameters["keep_checkpoints"] = 3
-    policy = PPOPolicy(
-        0, env.brains[env.external_brain_names[0]], trainer_parameters, False, False
-    )
+    policy = PPOPolicy(0, brain_params, trainer_parameters, False, False)
     run_out = policy.evaluate(brain_info)
     assert run_out["action"].shape == (3, 2)
     env.close()
@@ -89,16 +97,20 @@ def test_ppo_get_value_estimates(mock_communicator, mock_launcher, dummy_config)
         discrete_action=False, visual_inputs=0
     )
     env = UnityEnvironment(" ")
-    brain_infos = env.reset()
-    brain_info = brain_infos[env.external_brain_names[0]]
+    env.reset()
+    brain_name = env.get_agent_groups()[0]
+    brain_info = step_result_to_brain_info(
+        env.get_step_result(brain_name), env.get_agent_group_spec(brain_name)
+    )
+    brain_params = group_spec_to_brain_parameters(
+        brain_name, env.get_agent_group_spec(brain_name)
+    )
 
     trainer_parameters = dummy_config
-    model_path = env.external_brain_names[0]
+    model_path = brain_name
     trainer_parameters["model_path"] = model_path
     trainer_parameters["keep_checkpoints"] = 3
-    policy = PPOPolicy(
-        0, env.brains[env.external_brain_names[0]], trainer_parameters, False, False
-    )
+    policy = PPOPolicy(0, brain_params, trainer_parameters, False, False)
     run_out = policy.get_value_estimates(brain_info, 0, done=False)
     for key, val in run_out.items():
         assert type(key) is str
diff --git a/ml-agents/mlagents/trainers/tests/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
index 1c31e59cfa..188eadd2f7 100644
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
@@ -3,23 +3,23 @@
 import tempfile
 import pytest
 import yaml
-from typing import Any, Dict
+from typing import Dict
+import numpy as np
 
 
 from mlagents.trainers.trainer_controller import TrainerController
 from mlagents.trainers.trainer_util import TrainerFactory
-from mlagents.envs.base_unity_environment import BaseUnityEnvironment
-from mlagents.envs.brain import BrainInfo, AllBrainInfo, BrainParameters
-from mlagents.envs.communicator_objects.agent_info_pb2 import AgentInfoProto
-from mlagents.envs.communicator_objects.observation_pb2 import (
-    ObservationProto,
-    NONE as COMPRESSION_TYPE_NONE,
+from mlagents.envs.base_env import (
+    BaseEnv,
+    AgentGroupSpec,
+    BatchedStepResult,
+    ActionType,
 )
+from mlagents.envs.brain import BrainParameters
 from mlagents.envs.simple_env_manager import SimpleEnvManager
 from mlagents.envs.sampler_class import SamplerManager
 from mlagents.envs.side_channel.float_properties_channel import FloatPropertiesChannel
 
-
 BRAIN_NAME = __name__
 OBS_SIZE = 1
 STEP_SIZE = 0.1
@@ -33,7 +33,7 @@ def clamp(x, min_val, max_val):
     return max(min_val, min(x, max_val))
 
 
-class Simple1DEnvironment(BaseUnityEnvironment):
+class Simple1DEnvironment(BaseEnv):
     """
     Very simple "game" - the agent has a position on [-1, 1], gets a reward of 1 if it reaches 1, and a reward of -1 if
     it reaches -1. The position is incremented by the action amount (clamped to [-step_size, step_size]).
@@ -42,36 +42,41 @@ class Simple1DEnvironment(BaseUnityEnvironment):
     def __init__(self, use_discrete):
         super().__init__()
         self.discrete = use_discrete
-        self._brains: Dict[str, BrainParameters] = {}
-        brain_params = BrainParameters(
-            brain_name=BRAIN_NAME,
-            vector_observation_space_size=OBS_SIZE,
-            camera_resolutions=[],
-            vector_action_space_size=[2] if use_discrete else [1],
-            vector_action_descriptions=["moveDirection"],
-            vector_action_space_type=0 if use_discrete else 1,
+        action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS
+        self.group_spec = AgentGroupSpec(
+            [(OBS_SIZE,)], action_type, (2,) if use_discrete else 1
         )
-        self._brains[BRAIN_NAME] = brain_params
-
         # state
         self.position = 0.0
         self.step_count = 0
-        self.random = random.Random(str(brain_params))
+        self.random = random.Random(str(self.group_spec))
         self.goal = self.random.choice([-1, 1])
+        self.action = None
+        self.step_result = None
+
+    def get_agent_groups(self):
+        return [BRAIN_NAME]
+
+    def get_agent_group_spec(self, name):
+        return self.group_spec
+
+    def set_action_for_agent(self, name, id, data):
+        pass
+
+    def set_actions(self, name, data):
+        self.action = data
 
-    def step(
-        self,
-        vector_action: Dict[str, Any] = None,
-        memory: Dict[str, Any] = None,
-        value: Dict[str, Any] = None,
-    ) -> AllBrainInfo:
-        assert vector_action is not None
+    def get_step_result(self, name):
+        return self.step_result
+
+    def step(self) -> None:
+        assert self.action is not None
 
         if self.discrete:
-            act = vector_action[BRAIN_NAME][0][0]
+            act = self.action[0][0]
             delta = 1 if act else -1
         else:
-            delta = vector_action[BRAIN_NAME][0][0]
+            delta = self.action[0][0]
         delta = clamp(delta, -STEP_SIZE, STEP_SIZE)
         self.position += delta
         self.position = clamp(self.position, -1, 1)
@@ -82,53 +87,34 @@ def step(
         else:
             reward = -TIME_PENALTY
 
-        vector_obs = [self.goal] * OBS_SIZE
-        vector_obs_proto = ObservationProto(
-            float_data=ObservationProto.FloatData(data=vector_obs),
-            shape=[len(vector_obs)],
-            compression_type=COMPRESSION_TYPE_NONE,
-        )
-        agent_info = AgentInfoProto(
-            reward=reward, done=bool(done), observations=[vector_obs_proto]
-        )
+        m_vector_obs = [np.ones((1, OBS_SIZE), dtype=np.float32) * self.goal]
+        m_reward = np.array([reward], dtype=np.float32)
+        m_done = np.array([done], dtype=np.bool)
+        m_agent_id = np.array([0], dtype=np.int32)
 
         if done:
             self._reset_agent()
 
-        return {
-            BRAIN_NAME: BrainInfo.from_agent_proto(
-                0, [agent_info], self._brains[BRAIN_NAME]
-            )
-        }
+        self.step_result = BatchedStepResult(
+            m_vector_obs, m_reward, m_done, m_done, m_agent_id, None
+        )
 
     def _reset_agent(self):
         self.position = 0.0
         self.step_count = 0
         self.goal = self.random.choice([-1, 1])
 
-    def reset(
-        self,
-        config: Dict[str, float] = None,
-        train_mode: bool = True,
-        custom_reset_parameters: Any = None,
-    ) -> AllBrainInfo:  # type: ignore
+    def reset(self) -> None:  # type: ignore
         self._reset_agent()
 
-        vector_obs = [self.goal] * OBS_SIZE
-        vector_obs_proto = ObservationProto(
-            float_data=ObservationProto.FloatData(data=vector_obs),
-            shape=[len(vector_obs)],
-            compression_type=COMPRESSION_TYPE_NONE,
-        )
-        agent_info = AgentInfoProto(
-            done=False, max_step_reached=False, observations=[vector_obs_proto]
-        )
+        m_vector_obs = [np.ones((1, OBS_SIZE), dtype=np.float32) * self.goal]
+        m_reward = np.array([0], dtype=np.float32)
+        m_done = np.array([False], dtype=np.bool)
+        m_agent_id = np.array([0], dtype=np.int32)
 
-        return {
-            BRAIN_NAME: BrainInfo.from_agent_proto(
-                0, [agent_info], self._brains[BRAIN_NAME]
-            )
-        }
+        self.step_result = BatchedStepResult(
+            m_vector_obs, m_reward, m_done, m_done, m_agent_id, None
+        )
 
     @property
     def external_brains(self) -> Dict[str, BrainParameters]:

From 7ccf5fcc31210d0254912f99313b89002cbc30d5 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Wed, 4 Dec 2019 15:03:01 -0800
Subject: [PATCH 06/30] adding case where no vector obs

---
 ml-agents-envs/mlagents/envs/brain_conversion_utils.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
index 9e211bd714..d4fe2adfab 100644
--- a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
+++ b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
@@ -1,7 +1,7 @@
 from mlagents.envs.brain import BrainInfo, BrainParameters, CameraResolution
 from mlagents.envs.base_env import BatchedStepResult, AgentGroupSpec, ActionType
 import numpy as np
-from typing import List
+from typing import List, Any
 
 
 def step_result_to_brain_info(
@@ -17,7 +17,10 @@ def step_result_to_brain_info(
             vec_obs_indices.append(index)
         if len(observation.shape) == 4:
             vis_obs_indices.append(index)
-    vec_obs = np.concatenate([step_result.obs[i] for i in vec_obs_indices], axis=1)
+    if len(vec_obs_indices) == 0:
+        vec_obs: List[Any] = []
+    else:
+        vec_obs = np.concatenate([step_result.obs[i] for i in vec_obs_indices], axis=1)
     vis_obs = [step_result.obs[i] for i in vis_obs_indices]
     mask = np.ones((n_agents, np.sum(group_spec.action_shape)))
     if step_result.action_mask is not None:

From 89b1d34eebd8feb573669b673be17cbab893bb92 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Wed, 4 Dec 2019 16:00:51 -0800
Subject: [PATCH 07/30] Fixed Gym

---
 gym-unity/gym_unity/envs/__init__.py          | 39 +++++---
 gym-unity/gym_unity/tests/test_gym.py         | 91 +++++++++----------
 .../mlagents/envs/brain_conversion_utils.py   |  2 +-
 3 files changed, 71 insertions(+), 61 deletions(-)

diff --git a/gym-unity/gym_unity/envs/__init__.py b/gym-unity/gym_unity/envs/__init__.py
index 33daeb3114..02edc0c55c 100644
--- a/gym-unity/gym_unity/envs/__init__.py
+++ b/gym-unity/gym_unity/envs/__init__.py
@@ -4,6 +4,10 @@
 import numpy as np
 from mlagents.envs.environment import UnityEnvironment
 from gym import error, spaces
+from mlagents.envs.brain_conversion_utils import (
+    step_result_to_brain_info,
+    group_spec_to_brain_parameters,
+)
 
 
 class UnityGymException(error.Error):
@@ -53,10 +57,9 @@ def __init__(
         )
 
         # Take a single step so that the brain information will be sent over
-        if not self._env.brains:
+        if not self._env.get_agent_groups():
             self._env.step()
 
-        self.name = self._env.academy_name
         self.visual_obs = None
         self._current_state = None
         self._n_agents = None
@@ -67,18 +70,17 @@ def __init__(
         self._allow_multiple_visual_obs = allow_multiple_visual_obs
 
         # Check brain configuration
-        if len(self._env.brains) != 1:
+        if len(self._env.get_agent_groups()) != 1:
             raise UnityGymException(
                 "There can only be one brain in a UnityEnvironment "
                 "if it is wrapped in a gym."
             )
-        if len(self._env.external_brain_names) <= 0:
-            raise UnityGymException(
-                "There are not any external brain in the UnityEnvironment"
-            )
 
-        self.brain_name = self._env.external_brain_names[0]
-        brain = self._env.brains[self.brain_name]
+        self.brain_name = self._env.get_agent_groups()[0]
+        self.name = self.brain_name
+        brain = group_spec_to_brain_parameters(
+            self.brain_name, self._env.get_agent_group_spec(self.brain_name)
+        )
 
         if use_visual and brain.number_visual_observations == 0:
             raise UnityGymException(
@@ -103,7 +105,11 @@ def __init__(
             )
 
         # Check for number of agents in scene.
-        initial_info = self._env.reset()[self.brain_name]
+        self._env.reset()
+        initial_info = step_result_to_brain_info(
+            self._env.get_step_result(self.brain_name),
+            self._env.get_agent_group_spec(self.brain_name),
+        )
         self._check_agents(len(initial_info.agents))
 
         # Set observation and action spaces
@@ -153,7 +159,11 @@ def reset(self):
         Returns: observation (object/list): the initial observation of the
             space.
         """
-        info = self._env.reset()[self.brain_name]
+        self._env.reset()
+        info = step_result_to_brain_info(
+            self._env.get_step_result(self.brain_name),
+            self._env.get_agent_group_spec(self.brain_name),
+        )
         n_agents = len(info.agents)
         self._check_agents(n_agents)
         self.game_over = False
@@ -201,7 +211,12 @@ def step(self, action):
                 # Translate action into list
                 action = self._flattener.lookup_action(action)
 
-        info = self._env.step(action)[self.brain_name]
+        self._env.set_actions(self.brain_name, np.array(action))
+        self._env.step()
+        info = step_result_to_brain_info(
+            self._env.get_step_result(self.brain_name),
+            self._env.get_agent_group_spec(self.brain_name),
+        )
         n_agents = len(info.agents)
         self._check_agents(n_agents)
         self._current_state = info
diff --git a/gym-unity/gym_unity/tests/test_gym.py b/gym-unity/gym_unity/tests/test_gym.py
index c8286b54b8..db17b419e7 100644
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
@@ -4,13 +4,13 @@
 
 from gym import spaces
 from gym_unity.envs import UnityEnv, UnityGymException
-from mlagents.envs.brain import CameraResolution
+from mlagents.envs.base_env import AgentGroupSpec, ActionType, BatchedStepResult
 
 
 @mock.patch("gym_unity.envs.UnityEnvironment")
 def test_gym_wrapper(mock_env):
-    mock_brain = create_mock_brainparams()
-    mock_braininfo = create_mock_vector_braininfo()
+    mock_brain = create_mock_group_spec()
+    mock_braininfo = create_mock_vector_step_result()
     setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
 
     env = UnityEnv(" ", use_visual=False, multiagent=False)
@@ -22,14 +22,14 @@ def test_gym_wrapper(mock_env):
     assert env.observation_space.contains(obs)
     assert isinstance(obs, np.ndarray)
     assert isinstance(rew, float)
-    assert isinstance(done, bool)
+    assert isinstance(done, (bool, np.bool_))
     assert isinstance(info, dict)
 
 
 @mock.patch("gym_unity.envs.UnityEnvironment")
 def test_multi_agent(mock_env):
-    mock_brain = create_mock_brainparams()
-    mock_braininfo = create_mock_vector_braininfo(num_agents=2)
+    mock_brain = create_mock_group_spec()
+    mock_braininfo = create_mock_vector_step_result(num_agents=2)
     setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
 
     with pytest.raises(UnityGymException):
@@ -47,10 +47,10 @@ def test_multi_agent(mock_env):
 
 @mock.patch("gym_unity.envs.UnityEnvironment")
 def test_branched_flatten(mock_env):
-    mock_brain = create_mock_brainparams(
+    mock_brain = create_mock_group_spec(
         vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
     )
-    mock_braininfo = create_mock_vector_braininfo(num_agents=1)
+    mock_braininfo = create_mock_vector_step_result(num_agents=1)
     setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
 
     env = UnityEnv(" ", use_visual=False, multiagent=False, flatten_branched=True)
@@ -67,8 +67,8 @@ def test_branched_flatten(mock_env):
 @pytest.mark.parametrize("use_uint8", [True, False], ids=["float", "uint8"])
 @mock.patch("gym_unity.envs.UnityEnvironment")
 def test_gym_wrapper_visual(mock_env, use_uint8):
-    mock_brain = create_mock_brainparams(number_visual_observations=1)
-    mock_braininfo = create_mock_vector_braininfo(number_visual_observations=1)
+    mock_brain = create_mock_group_spec(number_visual_observations=1)
+    mock_braininfo = create_mock_vector_step_result(number_visual_observations=1)
     setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
 
     env = UnityEnv(" ", use_visual=True, multiagent=False, uint8_visual=use_uint8)
@@ -80,14 +80,14 @@ def test_gym_wrapper_visual(mock_env, use_uint8):
     assert env.observation_space.contains(obs)
     assert isinstance(obs, np.ndarray)
     assert isinstance(rew, float)
-    assert isinstance(done, bool)
+    assert isinstance(done, (bool, np.bool_))
     assert isinstance(info, dict)
 
 
 # Helper methods
 
 
-def create_mock_brainparams(
+def create_mock_group_spec(
     number_visual_observations=0,
     vector_action_space_type="continuous",
     vector_observation_space_size=3,
@@ -97,54 +97,49 @@ def create_mock_brainparams(
     Creates a mock BrainParameters object with parameters.
     """
     # Avoid using mutable object as default param
-    if vector_action_space_size is None:
-        vector_action_space_size = [2]
-    mock_brain = mock.Mock()
-    mock_brain.return_value.number_visual_observations = number_visual_observations
-    if number_visual_observations:
-        mock_brain.return_value.camera_resolutions = [
-            CameraResolution(width=8, height=8, num_channels=3)
-            for _ in range(number_visual_observations)
-        ]
-
-    mock_brain.return_value.vector_action_space_type = vector_action_space_type
-    mock_brain.return_value.vector_observation_space_size = (
-        vector_observation_space_size
-    )
-    mock_brain.return_value.vector_action_space_size = vector_action_space_size
-    return mock_brain()
-
-
-def create_mock_vector_braininfo(num_agents=1, number_visual_observations=0):
+    act_type = ActionType.DISCRETE
+    if vector_action_space_type == "continuous":
+        act_type = ActionType.CONTINUOUS
+        if vector_action_space_size is None:
+            vector_action_space_size = 2
+        else:
+            vector_action_space_size = vector_action_space_size[0]
+    else:
+        if vector_action_space_size is None:
+            vector_action_space_size = (2,)
+        else:
+            vector_action_space_size = tuple(vector_action_space_size)
+    obs_shapes = [(vector_observation_space_size,)]
+    for i in range(number_visual_observations):
+        obs_shapes += [(8, 8, 3)]
+    return AgentGroupSpec(obs_shapes, act_type, vector_action_space_size)
+
+
+def create_mock_vector_step_result(num_agents=1, number_visual_observations=0):
     """
     Creates a mock BrainInfo with vector observations. Imitates constant
     vector observations, rewards, dones, and agents.
 
     :int num_agents: Number of "agents" to imitate in your BrainInfo values.
     """
-    mock_braininfo = mock.Mock()
-    mock_braininfo.return_value.vector_observations = np.array([num_agents * [1, 2, 3]])
+    obs = [np.array([num_agents * [1, 2, 3]])]
     if number_visual_observations:
-        mock_braininfo.return_value.visual_observations = [
-            [np.zeros(shape=(8, 8, 3), dtype=np.float32)]
-        ]
-    mock_braininfo.return_value.rewards = num_agents * [1.0]
-    mock_braininfo.return_value.local_done = num_agents * [False]
-    mock_braininfo.return_value.agents = range(0, num_agents)
-    return mock_braininfo()
+        obs += [np.zeros(shape=(num_agents, 8, 8, 3), dtype=np.float32)]
+    rewards = np.array(num_agents * [1.0])
+    done = np.array(num_agents * [False])
+    agents = np.array(range(0, num_agents))
+    return BatchedStepResult(obs, rewards, done, done, agents, None)
 
 
-def setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo):
+def setup_mock_unityenvironment(mock_env, mock_spec, mock_result):
     """
     Takes a mock UnityEnvironment and adds the appropriate properties, defined by the mock
     BrainParameters and BrainInfo.
 
     :Mock mock_env: A mock UnityEnvironment, usually empty.
-    :Mock mock_brain: A mock Brain object that specifies the params of this environment.
-    :Mock mock_braininfo: A mock BrainInfo object that will be returned at each step and reset.
+    :Mock mock_spec: A mock AgentGroupSpec object that specifies the params of this environment.
+    :Mock mock_result: A mock BatchedStepResult object that will be returned at each step and reset.
     """
-    mock_env.return_value.academy_name = "MockAcademy"
-    mock_env.return_value.brains = {"MockBrain": mock_brain}
-    mock_env.return_value.external_brain_names = ["MockBrain"]
-    mock_env.return_value.reset.return_value = {"MockBrain": mock_braininfo}
-    mock_env.return_value.step.return_value = {"MockBrain": mock_braininfo}
+    mock_env.return_value.get_agent_groups.return_value = ["MockBrain"]
+    mock_env.return_value.get_agent_group_spec.return_value = mock_spec
+    mock_env.return_value.get_step_result.return_value = mock_result
diff --git a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
index d4fe2adfab..23940fb0c8 100644
--- a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
+++ b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
@@ -58,5 +58,5 @@ def group_spec_to_brain_parameters(
         a_size += [group_spec.action_shape]
         vector_action_space_type = 1
     return BrainParameters(
-        name, vec_size, cam_res, a_size, [], vector_action_space_type
+        name, int(vec_size), cam_res, a_size, [], vector_action_space_type
     )

From 48d675d54cfb251050f08a6cdc44eb4c28039e1c Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Wed, 4 Dec 2019 16:11:13 -0800
Subject: [PATCH 08/30] fixing tests of float64

---
 ml-agents-envs/mlagents/envs/brain_conversion_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
index 23940fb0c8..b29e037c0c 100644
--- a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
+++ b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
@@ -22,7 +22,7 @@ def step_result_to_brain_info(
     else:
         vec_obs = np.concatenate([step_result.obs[i] for i in vec_obs_indices], axis=1)
     vis_obs = [step_result.obs[i] for i in vis_obs_indices]
-    mask = np.ones((n_agents, np.sum(group_spec.action_shape)))
+    mask = np.ones((n_agents, np.sum(group_spec.action_shape)), dtype=np.float32)
     if step_result.action_mask is not None:
         mask = 1 - np.concatenate(step_result.action_mask, axis=1)
     if agent_id_prefix is None:

From 9d2d70c7209c015c19b2778513acbabb8d138258 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Wed, 4 Dec 2019 16:20:29 -0800
Subject: [PATCH 09/30] fixing float64

---
 ml-agents-envs/mlagents/envs/rpc_utils.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/rpc_utils.py b/ml-agents-envs/mlagents/envs/rpc_utils.py
index b70bfe9238..9b3d3e7b06 100644
--- a/ml-agents-envs/mlagents/envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents/envs/rpc_utils.py
@@ -60,14 +60,14 @@ def _process_visual_observation(
     obs_index: int, shape: Tuple[int, int, int], agent_info_list: List[AgentInfoProto]
 ) -> np.ndarray:
     if len(agent_info_list) == 0:
-        return np.zeros((0, shape[0], shape[1], shape[2]))
+        return np.zeros((0, shape[0], shape[1], shape[2]), dtype=np.float32)
 
     gray_scale = shape[2] == 1
     batched_visual = [
         process_pixels(agent_obs.observations[obs_index].compressed_data, gray_scale)
         for agent_obs in agent_info_list
     ]
-    return np.array(batched_visual)
+    return np.array(batched_visual, dtype=np.float32)
 
 
 @timed
@@ -80,7 +80,8 @@ def _process_vector_observation(
         [
             agent_obs.observations[obs_index].float_data.data
             for agent_obs in agent_info_list
-        ]
+        ],
+        dtype=np.float32,
     )
     # Check for NaNs or infs in the observations
     # If there's a NaN in the observations, the dot() result will be NaN

From 099f12b9916d6ab56cab7263d861aa7b2acb3ef8 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Wed, 4 Dec 2019 16:31:58 -0800
Subject: [PATCH 10/30] reverting some of brain.py

---
 ml-agents-envs/mlagents/envs/brain.py           | 13 +++++++++----
 ml-agents-envs/mlagents/envs/tests/test_envs.py |  9 +++++++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/brain.py b/ml-agents-envs/mlagents/envs/brain.py
index 14cd4f15bd..1db75de68d 100644
--- a/ml-agents-envs/mlagents/envs/brain.py
+++ b/ml-agents-envs/mlagents/envs/brain.py
@@ -141,6 +141,7 @@ def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray:
         return s
 
     @staticmethod
+    @timed
     def from_agent_proto(
         worker_id: int,
         agent_info_list: List[AgentInfoProto],
@@ -152,7 +153,9 @@ def from_agent_proto(
         vis_obs = BrainInfo._process_visual_observations(brain_params, agent_info_list)
 
         total_num_actions = sum(brain_params.vector_action_space_size)
-        mask_actions = np.ones((len(agent_info_list), total_num_actions))
+        mask_actions = np.ones(
+            (len(agent_info_list), total_num_actions), dtype=np.float32
+        )
         for agent_index, agent_info in enumerate(agent_info_list):
             if agent_info.action_mask is not None:
                 if len(agent_info.action_mask) == total_num_actions:
@@ -215,7 +218,9 @@ def _process_vector_observations(
         brain_params: BrainParameters, agent_info_list: List[AgentInfoProto]
     ) -> np.ndarray:
         if len(agent_info_list) == 0:
-            vector_obs = np.zeros((0, brain_params.vector_observation_space_size))
+            vector_obs = np.zeros(
+                (0, brain_params.vector_observation_space_size), dtype=np.float32
+            )
         else:
             stacked_obs = []
             has_nan = False
@@ -229,7 +234,7 @@ def _process_vector_observations(
                 for vo in vec_obs:
                     # TODO consider itertools.chain here
                     proto_vector_obs.extend(vo.float_data.data)
-                np_obs = np.array(proto_vector_obs)
+                np_obs = np.array(proto_vector_obs, dtype=np.float32)
 
                 # Check for NaNs or infs in the observations
                 # If there's a NaN in the observations, the dot() result will be NaN
@@ -241,7 +246,7 @@ def _process_vector_observations(
                 has_nan = has_nan or np.isnan(d)
                 has_inf = has_inf or not np.isfinite(d)
                 stacked_obs.append(np_obs)
-            vector_obs = np.array(stacked_obs)
+            vector_obs = np.array(stacked_obs, dtype=np.float32)
 
             # In we have any NaN or Infs, use np.nan_to_num to replace these with finite values
             if has_nan or has_inf:
diff --git a/ml-agents-envs/mlagents/envs/tests/test_envs.py b/ml-agents-envs/mlagents/envs/tests/test_envs.py
index 6af986601f..c61eed9de6 100755
--- a/ml-agents-envs/mlagents/envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_envs.py
@@ -55,10 +55,15 @@ def test_step(mock_communicator, mock_launcher):
     env.step()
     batched_step_result = env.get_step_result("RealFakeBrain")
     n_agents = batched_step_result.n_agents()
-    env.set_actions("RealFakeBrain", np.zeros((n_agents, spec.action_shape)))
+    env.set_actions(
+        "RealFakeBrain", np.zeros((n_agents, spec.action_shape), dtype=np.float32)
+    )
     env.step()
     with pytest.raises(UnityActionException):
-        env.set_actions("RealFakeBrain", np.zeros((n_agents - 1, spec.action_shape)))
+        env.set_actions(
+            "RealFakeBrain",
+            np.zeros((n_agents - 1, spec.action_shape), dtype=np.float32),
+        )
     batched_step_result = env.get_step_result("RealFakeBrain")
     n_agents = batched_step_result.n_agents()
     env.set_actions("RealFakeBrain", -1 * np.ones((n_agents, spec.action_shape)))

From 02cae4d5aab411a6834071d1fa5983f5b426e461 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Wed, 4 Dec 2019 16:45:25 -0800
Subject: [PATCH 11/30] removing old proto apis

---
 ml-agents-envs/mlagents/envs/tests/test_envs.py      | 4 +++-
 ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/tests/test_envs.py b/ml-agents-envs/mlagents/envs/tests/test_envs.py
index c61eed9de6..cfdad55b25 100755
--- a/ml-agents-envs/mlagents/envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_envs.py
@@ -66,7 +66,9 @@ def test_step(mock_communicator, mock_launcher):
         )
     batched_step_result = env.get_step_result("RealFakeBrain")
     n_agents = batched_step_result.n_agents()
-    env.set_actions("RealFakeBrain", -1 * np.ones((n_agents, spec.action_shape)))
+    env.set_actions(
+        "RealFakeBrain", -1 * np.ones((n_agents, spec.action_shape), dtype=np.float32)
+    )
     env.step()
 
     env.close()
diff --git a/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
index 5d2cbca223..f365ff02ac 100644
--- a/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
@@ -88,9 +88,9 @@ def test_process_visual_observation():
     in_array_2 = np.random.rand(128, 128, 3)
     proto_obs_2 = generate_compressed_proto_obs(in_array_2)
     ap1 = AgentInfoProto()
-    ap1.observations.append(proto_obs_1)
+    ap1.observations.extend([proto_obs_1])
     ap2 = AgentInfoProto()
-    ap2.observations.append(proto_obs_2)
+    ap2.observations.extend([proto_obs_2])
     ap_list = [ap1, ap2]
     arr = _process_visual_observation(0, (128, 128, 3), ap_list)
     assert list(arr.shape) == [2, 128, 128, 3]

From 782584bbbf8896e8ae73d1a5fea7b37e3b10746e Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 5 Dec 2019 12:47:14 -0800
Subject: [PATCH 12/30] comment type fixes

---
 ml-agents-envs/mlagents/envs/base_env.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index 8b35b8f23e..19dad6192c 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -8,9 +8,9 @@
 For performance reasons, the data of each group of agents is processed in a
 batched manner. When retrieving the state of a group of Agents, said state
 contains the data for the whole group. Agents in these groups are identified
-by a unique int identifier that allows tracking of Agents accross simulation
+by a unique int identifier that allows tracking of Agents across simulation
 steps. Note that there is no guarantee that the number or order of the Agents
-in the state will be consistent accross simulation steps.
+in the state will be consistent across simulation steps.
 A simulation steps corresponds to moving the simulation forward until at least
 one agent in the simulation sends its observations to Python again. Since
 Agents can request decisions at different frequencies, a simulation step does
@@ -35,7 +35,7 @@ class StepResult(NamedTuple):
     Contains the data a single Agent collected since the last
     simulation step.
      - obs is a list of numpy arrays observations collected by the group of
-    agent.
+     agent.
      - reward is a float. Corresponds to the rewards collected by the agent
      since the last simulation step.
      - done is a bool. Is true if the Agent was terminated during the last
@@ -44,6 +44,7 @@ class StepResult(NamedTuple):
      steps during the last simulation step.
      - agent_id is an int and an unique identifier for the corresponding Agent.
      - action_mask is an optional list of one dimensional array of booleans.
+     Only available in multi-discrete action space type.
      Each array corresponds to an action branch. Each array contains a mask
      for each action of the branch. If true, the action is not available for
      the agent during this simulation step.
@@ -62,11 +63,12 @@ class BatchedStepResult(NamedTuple):
     Contains the data a group of similar Agents collected since the last
     simulation step. Note that all Agents do not necessarily have new
     information to send at each simulation step. Therefore, the ordering of
-    agents and the batch size of the BatchedStepResult are not fixed accross
+    agents and the batch size of the BatchedStepResult are not fixed across
     simulation steps.
      - obs is a list of numpy arrays observations collected by the group of
-    agent. The first dimension of the array corresponds to the batch size of
-    the group.
+     agent. Each obs has one extra dimension compared to StepResult: the first
+     dimension of the array corresponds to the batch size of
+     the group.
      - reward is a float vector of length batch size. Corresponds to the
      rewards collected by each agent since the last simulation step.
      - done is an array of booleans of length batch size. Is true if the
@@ -76,8 +78,9 @@ class BatchedStepResult(NamedTuple):
      simulation step.
      - agent_id is an int vector of length batch size containing unique
      identifier for the corresponding Agent. This is used to track Agents
-     accross simulation steps.
+     across simulation steps.
      - action_mask is an optional list of two dimensional array of booleans.
+     Only available in multi-discrete action space type.
      Each array corresponds to an action branch. The first dimension of each
      array is the batch size and the second contains a mask for each action of
      the branch. If true, the action is not available for the agent during
@@ -153,11 +156,11 @@ class AgentGroupSpec(NamedTuple):
     A NamedTuple to containing information about the observations and actions
     spaces for a group of Agents.
      - observation_shapes is a List of Tuples of int : Each Tuple corresponds
-     to an observation's dimensionsthe shape tuples have the same ordering as
-     the ordering of the BatchedStepResult.
+     to an observation's dimensions. The shape tuples have the same ordering as
+     the ordering of the BatchedStepResult and StepResult.
      - action_type is the type of data of the action. it can be discrete or
      continuous. If discrete, the action tensors are expected to be int32. If
-     discrete, the actions are expected to be float32.
+     continuous, the actions are expected to be float32.
      - action_shape is:
        - An int in continuous action space corresponding to the number of
      floats that constitute the action.

From d0b6d7dacfe59e8c8a25a13a348bdd1b8b616143 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 5 Dec 2019 14:43:04 -0800
Subject: [PATCH 13/30] added properties to AgentGroupSpec and edited the
 notebooks.

---
 gym-unity/gym_unity/envs/__init__.py          |   7 +-
 ml-agents-envs/mlagents/envs/base_env.py      |  41 ++++-
 .../mlagents/envs/brain_conversion_utils.py   |  18 +-
 ml-agents-envs/mlagents/envs/environment.py   |  38 +---
 ml-agents-envs/mlagents/envs/rpc_utils.py     |   8 +-
 .../mlagents/envs/tests/test_envs.py          |   6 +-
 .../mlagents/envs/tests/test_rpc_utils.py     |  11 +-
 notebooks/getting-started-gym.ipynb           | 162 ++++++++++++++++--
 notebooks/getting-started.ipynb               |  84 ++++++---
 9 files changed, 275 insertions(+), 100 deletions(-)

diff --git a/gym-unity/gym_unity/envs/__init__.py b/gym-unity/gym_unity/envs/__init__.py
index 02edc0c55c..6300afd094 100644
--- a/gym-unity/gym_unity/envs/__init__.py
+++ b/gym-unity/gym_unity/envs/__init__.py
@@ -211,11 +211,12 @@ def step(self, action):
                 # Translate action into list
                 action = self._flattener.lookup_action(action)
 
-        self._env.set_actions(self.brain_name, np.array(action))
+        spec = self._env.get_agent_group_spec(self.brain_name)
+        action = np.array(action).reshape((self._n_agents, spec.action_size))
+        self._env.set_actions(self.brain_name, action)
         self._env.step()
         info = step_result_to_brain_info(
-            self._env.get_step_result(self.brain_name),
-            self._env.get_agent_group_spec(self.brain_name),
+            self._env.get_step_result(self.brain_name), spec
         )
         n_agents = len(info.agents)
         self._check_agents(n_agents)
diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index 19dad6192c..572502ebfe 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -25,7 +25,7 @@
 """
 
 from abc import ABC, abstractmethod
-from typing import List, NamedTuple, Tuple, Union, Optional
+from typing import List, NamedTuple, Tuple, Optional, Any
 import numpy as np
 from enum import Enum
 
@@ -170,7 +170,44 @@ class AgentGroupSpec(NamedTuple):
 
     observation_shapes: List[Tuple]
     action_type: ActionType
-    action_shape: Union[int, Tuple]
+    action_shape: Any
+
+    def is_action_discrete(self) -> bool:
+        """
+        Returns true if the Agent group uses discrete actions
+        """
+        return self.action_type == ActionType.DISCRETE
+
+    def is_action_continuous(self) -> bool:
+        """
+        Returns true if the Agent group uses continuous actions
+        """
+        return self.action_type == ActionType.CONTINUOUS
+
+    @property
+    def action_size(self) -> int:
+        """
+        Returns the dimension of the action.
+         - In the continuous case, will return the number of continuous actions.
+         - In the (multi-)discrete case, will return the number of action.
+         branches.
+        """
+        if self.action_type == ActionType.DISCRETE:
+            return len(self.action_shape)
+        else:
+            return self.action_shape
+
+    @property
+    def discrete_action_branches(self) -> Optional[Tuple[int, ...]]:
+        """
+        Returns a Tuple of int corresponding to the number of possible actions
+        for each branch (only for discrete actions). Will return None in
+        for continuous actions.
+        """
+        if self.action_type == ActionType.DISCRETE:
+            return self.action_shape
+        else:
+            return None
 
 
 class BaseEnv(ABC):
diff --git a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
index b29e037c0c..4db0249bf5 100644
--- a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
+++ b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
@@ -1,5 +1,5 @@
 from mlagents.envs.brain import BrainInfo, BrainParameters, CameraResolution
-from mlagents.envs.base_env import BatchedStepResult, AgentGroupSpec, ActionType
+from mlagents.envs.base_env import BatchedStepResult, AgentGroupSpec
 import numpy as np
 from typing import List, Any
 
@@ -22,9 +22,13 @@ def step_result_to_brain_info(
     else:
         vec_obs = np.concatenate([step_result.obs[i] for i in vec_obs_indices], axis=1)
     vis_obs = [step_result.obs[i] for i in vis_obs_indices]
-    mask = np.ones((n_agents, np.sum(group_spec.action_shape)), dtype=np.float32)
-    if step_result.action_mask is not None:
-        mask = 1 - np.concatenate(step_result.action_mask, axis=1)
+    mask = np.ones((n_agents, np.sum(group_spec.action_size)), dtype=np.float32)
+    if group_spec.is_action_discrete():
+        mask = np.ones(
+            (n_agents, np.sum(group_spec.discrete_action_branches)), dtype=np.float32
+        )
+        if step_result.action_mask is not None:
+            mask = 1 - np.concatenate(step_result.action_mask, axis=1)
     if agent_id_prefix is None:
         agent_ids = list(step_result.agent_id)
     else:
@@ -51,11 +55,11 @@ def group_spec_to_brain_parameters(
     vis_sizes = [shape for shape in group_spec.observation_shapes if len(shape) == 3]
     cam_res = [CameraResolution(s[0], s[1], s[2]) for s in vis_sizes]
     a_size: List[int] = []
-    if group_spec.action_type == ActionType.DISCRETE:
-        a_size += list(group_spec.action_shape)
+    if group_spec.is_action_discrete():
+        a_size += list(group_spec.discrete_action_branches)
         vector_action_space_type = 0
     else:
-        a_size += [group_spec.action_shape]
+        a_size += [group_spec.action_size]
         vector_action_space_type = 1
     return BrainParameters(
         name, int(vec_size), cam_res, a_size, [], vector_action_space_type
diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
index 34ea4fe458..6260f2d955 100644
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
@@ -8,12 +8,7 @@
 
 from mlagents.envs.side_channel.side_channel import SideChannel
 
-from mlagents.envs.base_env import (
-    BaseEnv,
-    BatchedStepResult,
-    ActionType,
-    AgentGroupSpec,
-)
+from mlagents.envs.base_env import BaseEnv, BatchedStepResult, AgentGroupSpec
 from mlagents.envs.timers import timed, hierarchical_timer
 from .exception import (
     UnityEnvironmentException,
@@ -286,12 +281,10 @@ def _update_state(self, output: UnityRLOutputProto) -> None:
 
     @staticmethod
     def _empty_action(spec: AgentGroupSpec, n_agents: int) -> np.array:
-        if spec.action_type == ActionType.DISCRETE:
-            action_size = np.sum(spec.action_shape)
-            return np.zeros((n_agents, action_size), dtype=np.int32)
+        if spec.is_action_discrete():
+            return np.zeros((n_agents, spec.action_size), dtype=np.int32)
         else:
-            action_size = spec.action_shape
-            return np.zeros((n_agents, action_size), dtype=np.float32)
+            return np.zeros((n_agents, spec.action_size), dtype=np.float32)
 
     def reset(self) -> None:
         if self._loaded:
@@ -346,15 +339,8 @@ def set_actions(self, agent_group: str, action: np.array) -> None:
         if agent_group not in self._env_state:
             return
         spec = self._env_specs[agent_group]
-        expected_a = (
-            spec.action_shape
-            if spec.action_type == ActionType.CONTINUOUS
-            else len(spec.action_shape)
-        )
-        expected_type = (
-            np.float32 if spec.action_type == ActionType.CONTINUOUS else np.int32
-        )
-        expected_shape = (self._env_state[agent_group].n_agents(), expected_a)
+        expected_type = np.float32 if spec.is_action_continuous() else np.int32
+        expected_shape = (self._env_state[agent_group].n_agents(), spec.action_size)
         if action.shape != expected_shape:
             raise UnityActionException(
                 "The group {0} needs an input of dimension {1} but received input of dimension {2}".format(
@@ -372,22 +358,14 @@ def set_action_for_agent(
         if agent_group not in self._env_state:
             return
         spec = self._env_specs[agent_group]
-        expected_shape = (
-            (
-                spec.action_shape
-                if spec.action_type == ActionType.CONTINUOUS
-                else np.sum(spec.action_shape)
-            ),
-        )
+        expected_shape = (spec.action_size,)
         if action.shape != expected_shape:
             raise UnityActionException(
                 "The Agent {0} in group {1} needs an input of dimension {2} but received input of dimension {3}".format(
                     agent_id, agent_group, expected_shape, action.shape
                 )
             )
-        expected_type = (
-            np.float32 if spec.action_type == ActionType.CONTINUOUS else np.int32
-        )
+        expected_type = np.float32 if spec.is_action_continuous() else np.int32
         if action.dtype != expected_type:
             action = action.astype(expected_type)
 
diff --git a/ml-agents-envs/mlagents/envs/rpc_utils.py b/ml-agents-envs/mlagents/envs/rpc_utils.py
index 9b3d3e7b06..c517cf9da4 100644
--- a/ml-agents-envs/mlagents/envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents/envs/rpc_utils.py
@@ -29,7 +29,7 @@ def agent_group_spec_from_proto(
     action_shape = None
     if action_type == ActionType.CONTINUOUS:
         action_shape = brain_param_proto.vector_action_size[0]
-    if action_type == ActionType.DISCRETE:
+    else:
         action_shape = tuple(brain_param_proto.vector_action_size)
     return AgentGroupSpec(observation_shape, action_type, action_shape)
 
@@ -128,10 +128,10 @@ def batched_step_result_from_proto(
         [agent_info.id for agent_info in agent_info_list], dtype=np.int32
     )
     action_mask = None
-    if group_spec.action_type == ActionType.DISCRETE:
+    if group_spec.is_action_discrete():
         if any([agent_info.action_mask is not None] for agent_info in agent_info_list):
             n_agents = len(agent_info_list)
-            a_size = np.sum(group_spec.action_shape)
+            a_size = np.sum(group_spec.discrete_action_branches)
             mask_matrix = np.ones((n_agents, a_size), dtype=np.bool)
             for agent_index, agent_info in enumerate(agent_info_list):
                 if agent_info.action_mask is not None:
@@ -141,7 +141,7 @@ def batched_step_result_from_proto(
                             for k in range(a_size)
                         ]
             action_mask = (1 - mask_matrix).astype(np.bool)
-            indices = _generate_split_indices(group_spec.action_shape)
+            indices = _generate_split_indices(group_spec.discrete_action_branches)
             action_mask = np.split(action_mask, indices, axis=1)
     return BatchedStepResult(obs_list, rewards, done, max_step, agent_id, action_mask)
 
diff --git a/ml-agents-envs/mlagents/envs/tests/test_envs.py b/ml-agents-envs/mlagents/envs/tests/test_envs.py
index cfdad55b25..9769bccc79 100755
--- a/ml-agents-envs/mlagents/envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_envs.py
@@ -56,18 +56,18 @@ def test_step(mock_communicator, mock_launcher):
     batched_step_result = env.get_step_result("RealFakeBrain")
     n_agents = batched_step_result.n_agents()
     env.set_actions(
-        "RealFakeBrain", np.zeros((n_agents, spec.action_shape), dtype=np.float32)
+        "RealFakeBrain", np.zeros((n_agents, spec.action_size), dtype=np.float32)
     )
     env.step()
     with pytest.raises(UnityActionException):
         env.set_actions(
             "RealFakeBrain",
-            np.zeros((n_agents - 1, spec.action_shape), dtype=np.float32),
+            np.zeros((n_agents - 1, spec.action_size), dtype=np.float32),
         )
     batched_step_result = env.get_step_result("RealFakeBrain")
     n_agents = batched_step_result.n_agents()
     env.set_actions(
-        "RealFakeBrain", -1 * np.ones((n_agents, spec.action_shape), dtype=np.float32)
+        "RealFakeBrain", -1 * np.ones((n_agents, spec.action_size), dtype=np.float32)
     )
     env.step()
 
diff --git a/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
index f365ff02ac..8a0baa5431 100644
--- a/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
@@ -173,12 +173,15 @@ def test_agent_group_spec_from_proto():
     bp.vector_action_size.extend([5, 4])
     bp.vector_action_space_type = 0
     group_spec = agent_group_spec_from_proto(bp, agent_proto)
-    assert group_spec.action_type == ActionType.DISCRETE
+    assert group_spec.is_action_discrete()
+    assert not group_spec.is_action_continuous()
     assert group_spec.observation_shapes == [(3,), (4,)]
-    assert group_spec.action_shape == (5, 4)
+    assert group_spec.discrete_action_branches == (5, 4)
+    assert group_spec.action_size == 2
     bp = BrainParametersProto()
     bp.vector_action_size.extend([6])
     bp.vector_action_space_type = 1
     group_spec = agent_group_spec_from_proto(bp, agent_proto)
-    assert group_spec.action_type == ActionType.CONTINUOUS
-    assert group_spec.action_shape == 6
+    assert not group_spec.is_action_discrete()
+    assert group_spec.is_action_continuous()
+    assert group_spec.action_size == 6
diff --git a/notebooks/getting-started-gym.ipynb b/notebooks/getting-started-gym.ipynb
index e6e80960c8..c95295f717 100644
--- a/notebooks/getting-started-gym.ipynb
+++ b/notebooks/getting-started-gym.ipynb
@@ -29,9 +29,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Python version:\n",
+      "3.6.5 (default, Jun 17 2018, 12:13:06) \n",
+      "[GCC 4.2.1 Compatible Apple LLVM 9.1.0 (clang-902.0.39.2)]\n"
+     ]
+    }
+   ],
    "source": [
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
@@ -59,9 +69,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:mlagents.envs:Connected new brain:\n",
+      "GridWorld\n",
+      "INFO:gym_unity:1 agents within environment.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<UnityEnv instance>\n"
+     ]
+    }
+   ],
    "source": [
     "env_name = \"../envs/GridWorld\"  # Name of the Unity environment binary to launch\n",
     "env = UnityEnv(env_name, worker_id=0, use_visual=True)\n",
@@ -80,9 +107,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Agent observations look like:\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAT8AAAD7CAYAAAAcqJO9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAReUlEQVR4nO3df6yddX3A8ffHIqK3alt1TcdtVhaIhiyjuAYxGgNFXHWG8ochEF2apUn/cRM2EwWXLHHZFk02lSXLkkaczXQIQx2EGLSrkGXLglwEFahIRdQ2hTqFQe+is/jZH+fpvFzvj3PPeX6c53zfr+Tknuc5597nc3700+/n83yf54nMRJJK86KuA5CkLpj8JBXJ5CepSCY/SUUy+UkqkslPUpHGSn4RsSsiHo2IIxFxfV1BSVLTYtR5fhGxDvgOcDlwFLgPuCYzH6kvPElqxhlj/O5FwJHMfBwgIj4H7AaWTX4zM+tz46aNY2yyHs89+1zXIUhT7+WveHnXIfD0T55mfv5kLPXYOMnvbOCHC5aPAm9Y6Rc2btrI+657/xibHMOCAe5X776nmxga4RE6w1vy34AasvPSS7oOgb+98W+WfazxHR4RsS8i5iJibn5+vunNSdJQxhn5HQO2Llierda9QGbuB/YDzG7d2t4wZaoGRFP1Yjq00vvY31HhzZtOLfvYNT8Z55/46ttr4u+3ZZyR333AeRFxTkScCVwN3FFPWJLUrJHTdmaeiog/BL4MrAM+lZkP1xaZJDVorDFrZn4J+FJNsUhSa/pbsC+l162xXgc/BfrVD1ypz7fS80bp0a20rTr+flc8vE1SkUx+korUnzHqUnpZKfYy6MIt95l1Vw4vLC+HLYEXP3elEnXYv9mnMncxR36SimTyk1Qkk5+kIvWvYO9dy6x3AWtoiz/bbnqAi/tuo06DGXV7feXIT1KRTH6SitSP8WvvKsfeBTy2Ll/x5Bx/sfBd6N80mGH+3jRx5CepSCY/SUWazPFsL6rGXgQ5kr69smHjbbcQ7fee4BI48pNUJJOfpCKZ/CQVaXJ6fr1oNPUiyCX1N/L6dNsb7GYaTB09vj6fsHQljvwkFcnkJ6lI3Y1frcMa51s8muav5tFsCTzqiUhHOSFCn0tgR36SimTyk1Qkk5+kIvW3YG/F5HfNJj/C6VJ/t278w+DWMp1lpR7dKGeD6fM0mFVHfhHxqYg4EREPLVi3KSIORsRj1c+NzYYpSfUapuz9NLBr0brrgUOZeR5wqFqWpN5YdYyamf8WEdsWrd4NXFLdPwDcA3xwqC1OfJ028QH2IMIyNDMlpt7CetQytO4Tok6iUXd4bM7M49X9J4HNNcUjSa0Ye29vZiYr/CcYEfsiYi4i5uZPzo+7OUmqxai7Zp6KiC2ZeTwitgAnlntiZu4H9gPMbt06gRXbBIbEpEalYdVzKtPh/krTe1hXOhKkT3t3Fxt15HcHsKe6vwe4vZ5wJKkdw0x1uRn4T+C1EXE0IvYCHwEuj4jHgLdWy5LUG8Ps7b1mmYcuqzkWSWpNfwv2KWOPb7rVM4Fl8q4L3Gce2yupSCY/SUWajvHrmk1GkTkZUZTppx/q3wSFl/7V7q5DmCqO/CQVyeQnqUgmP0lFKrTnJ/XdZEx76TNHfpKKZPKTVCTL3pY5vaV/zmpxikkfp+D0lSM/SUUy+UkqkmVvwyxzGf1NcCfmkOo5dWppHPlJKpLJT1KRTH6SimTPT6NruqE5yt+33aUhOfKTVCSTn6QiWfY2YKqmt/TtxTjrA096MBxHfpKKZPKTVCSTn6Qi2fOrQd/aYquaphc0Ra/FTl69Vh35RcTWiLg7Ih6JiIcj4tpq/aaIOBgRj1U/NzYfriTVY5iy9xTw/sw8H7gYeG9EnA9cDxzKzPOAQ9WyJPXCqskvM49n5ter+88Bh4Gzgd3AgeppB4ArmwpSDctFN6kAa9rhERHbgAuBe4HNmXm8euhJYHOtkUlSg4ZOfhGxHvg8cF1mPrvwscxcdswQEfsiYi4i5uZPzo8VrCTVZajkFxEvZpD4PpuZX6hWPxURW6rHtwAnlvrdzNyfmTsyc8fM+pk6YpaksQ2ztzeAm4DDmfmxBQ/dAeyp7u8BvPJKn9jj6zU/vvENM8/vTcDvA9+KiAerdR8CPgLcGhF7ge8DVzUToiTVb9Xkl5n/zvJzKi+rNxxJaodHeIyod+VG7wJWPTzNzXI8tldSkUx+kopk2TvNLHVr8dMPOZFhGjnyk1Qkk5+kIpn8JBXJnp/Uc05mGY0jP0lFMvlJKpJl7xr0YuZIL4KUuufIT1KRTH6SimTyk1Qke34q0ll/uXu0X3QeydRw5CepSCY/SUUy+UkqkslPUpFMfpKK5N5eaS0WHkHjnt9ec+QnqUgmP0lFMvlJKpLJT1KRVk1+EXFWRHwtIr4REQ9HxIer9edExL0RcSQibomIM5sPV5LqMczI72fAzsy8ANgO7IqIi4GPAh/PzHOBp4G9zYUpSfVaNfnlwMlq8cXVLYGdwG3V+gPAlY1EqJXlopukoQzV84uIdRHxIHACOAh8F3gmM09VTzkKnN1MiJJUv6GSX2Y+n5nbgVngIuB1w24gIvZFxFxEzM2fnB8xTEmq15r29mbmM8DdwBuBDRFx+giRWeDYMr+zPzN3ZOaOmfUzYwUrSXUZZm/vayJiQ3X/pcDlwGEGSfBd1dP2ALc3FaRWEItuapbv9dQY5tjeLcCBiFjHIFnempl3RsQjwOci4i+AB4CbGoxTkmq1avLLzG8CFy6x/nEG/T9J6h2P8JBUJJOfpCKZ/CQVyeQnqUgmP0lFMvlJKpLJT1KRTH6SimTyk1Qkk5+kInnd3jVYeCz7xJ43tBdB9kyvT2LQ6+Ab5chPUpFMfpKKZPKTVCSTn6QimfwkFcnkJ6lITnWZZk57GU3PZof0LNyJ4chPUpFMfpKKZNkrQTG14+Wf2TXU8w6+566GI+meIz9JRTL5SSqSyU9Skez5jah3s0gW97R6EXSDprrHN9UvrjZDj/wiYl1EPBARd1bL50TEvRFxJCJuiYgzmwtTkuq1lrL3WuDwguWPAh/PzHOBp4G9dQYmSU0aKvlFxCzwe8Anq+UAdgK3VU85AFzZRIBqSKxwm1ZT9Bqn6KV0ZtiR3yeADwC/qJZfBTyTmaeq5aPA2TXHJkmNWTX5RcQ7gROZef8oG4iIfRExFxFz8yfnR/kTklS7Yfb2vgm4IiLeAZwFvAK4EdgQEWdUo79Z4NhSv5yZ+4H9ALNbt5a+j1HShFh15JeZN2TmbGZuA64GvpqZ7wbuBt5VPW0PcHtjUU64qWuZTdOLmabXsqyp+wa2YpxJzh8E/iQijjDoAd5UT0iS1Lw1TXLOzHuAe6r7jwMX1R+SJDXPIzy0spWqqEnp4BZS6RXyMlvjsb2SimTyk1Qky94G9O6kB6OyDuuQb/64HPlJKpLJT1KRTH6SilRoz6+Yrpx6bHFX7/LPvL21bQ97oaOVPH/TT2uIpDmO/CQVyeQnqUiFlr0LNXtxCy+dobVwAkt7HPlJKpLJT1KRTH6SimTPr2VOstFCK/f4XvjowffcNfb2hp3CUse2dl56ydh/o0mO/CQVyeQnqUiWvb+ivcLUErhMayl11RxHfpKKZPKTVCTL3hW1d3yGR4JMN4vZyePIT1KRTH6SimTyk1Qke35r4jQYDWf4Hp/dwK4Mlfwi4gngOeB54FRm7oiITcAtwDbgCeCqzHy6mTAlqV5rKXsvzcztmbmjWr4eOJSZ5wGHqmVJ6oVxen67gQPV/QPAleOH0yex4NbeliySJtfwn5Gf5iQYNvkl8JWIuD8i9lXrNmfm8er+k8Dm2qOTpIYMu8PjzZl5LCJ+DTgYEd9e+GBmZkQs2ZevkuU+gA0bNo4VrCTVZaiRX2Yeq36eAL4IXAQ8FRFbAKqfJ5b53f2ZuSMzd8ysn6knakka06ojv4iYAV6Umc9V998G/DlwB7AH+Ej18/YmA51s7R6ctlKnyGkx7enj2VnqOEnptBim7N0MfDEiTj//nzLzroi4D7g1IvYC3weuai5MSarXqskvMx8HLlhi/Y+By5oISpKa5hEejeiuMPXIkPp5tMZ08theSUUy+UkqkslPUpHs+bWumzPDrMTe4KjdOnt8febIT1KRTH6SimTZ26nJOFZjMqJoRj2FqeXtNHLkJ6lIJj9JRbLsnViTUYyWU/CV80o14MhPUpFMfpKKZPKTVCR7fr00Gf3A/rGvp19y5CepSCY/SUWy7J06JZ7OwHJWa+fIT1KRTH6SimTyk1Qke37Fsk+msjnyk1Qkk5+kIpn8JBVpqOQXERsi4raI+HZEHI6IN0bEpog4GBGPVT83Nh2sJNVl2JHfjcBdmfk64ALgMHA9cCgzzwMOVcuS1AurJr+IeCXwFuAmgMz838x8BtgNHKiedgC4sqkgJaluw4z8zgF+BPxDRDwQEZ+MiBlgc2Yer57zJLC5qSAlqW7DJL8zgNcDf5+ZFwLzLCpxMzNZ5mDRiNgXEXMRMTd/cn7ceCWpFsMkv6PA0cy8t1q+jUEyfCoitgBUP08s9cuZuT8zd2Tmjpn1M3XELEljWzX5ZeaTwA8j4rXVqsuAR4A7gD3Vuj3A7Y1EKEkNGPbwtj8CPhsRZwKPA3/AIHHeGhF7ge8DVzUToiTVb6jkl5kPAjuWeOiyesORpHZ4hIekIpn8JBXJ5CepSCY/SUUy+UkqkslPUpFMfpKKFIPDclvaWMSPGEyIfjXwX61teGmTEAMYx2LG8ULG8UJrjeM3MvM1Sz3QavL7/41GzGXmUpOmi4rBOIzDOLqLw7JXUpFMfpKK1FXy29/RdheahBjAOBYzjhcyjheqLY5Oen6S1DXLXklFajX5RcSuiHg0Io5ERGtXe4uIT0XEiYh4aMG61i+9GRFbI+LuiHgkIh6OiGu7iCUizoqIr0XEN6o4PlytPyci7q0+n1uq8zc2LiLWVdeHubOrOCLiiYj4VkQ8GBFz1bouviOdXyY2Il5bvQ+nb89GxHUdvR9/XH1HH4qIm6vvbi3fj9aSX0SsA/4OeDtwPnBNRJzf0uY/DexatK6LS2+eAt6fmecDFwPvrd6DtmP5GbAzMy8AtgO7IuJi4KPAxzPzXOBpYG/DcZx2LYPLoZ7WVRyXZub2BVMpuviOdH6Z2Mx8tHoftgO/A/wP8MW244iIs4H3ATsy87eAdcDV1PX9yMxWbsAbgS8vWL4BuKHF7W8DHlqw/Ciwpbq/BXi0rVgWxHA7cHmXsQAvA74OvIHB5NEzlvq8Gtz+LIN/SDuBO4HoKI4ngFcvWtfq5wK8EvgeVS++qzgWbfttwH909H6cDfwQ2MTgxMt3Ar9b1/ejzbL39As57Wi1riudXnozIrYBFwL3dhFLVWo+yODCUweB7wLPZOap6iltfT6fAD4A/KJaflVHcSTwlYi4PyL2Veva/lwm8TKxVwM3V/dbjSMzjwF/DfwAOA78N3A/NX0/3OHBypfebEJErAc+D1yXmc92EUtmPp+DsmYWuAh4XdPbXCwi3gmcyMz72972Et6cma9n0JZ5b0S8ZeGDLX0uY10mtm5VL+0K4J8XP9ZGHFVPcTeD/xR+HZjhV9tXI2sz+R0Dti5Ynq3WdWWoS2/WLSJezCDxfTYzv9BlLACZ+QxwN4PyYUNEnL6uSxufz5uAKyLiCeBzDErfGzuI4/Qog8w8waC/dRHtfy5jXSa2AW8Hvp6ZT1XLbcfxVuB7mfmjzPw58AUG35lavh9tJr/7gPOqPTVnMhhO39Hi9hdr/dKbERHATcDhzPxYV7FExGsiYkN1/6UM+o6HGSTBd7UVR2bekJmzmbmNwffhq5n57rbjiIiZiHj56fsM+lwP0fLnkpN3mdhr+GXJSwdx/AC4OCJeVv3bOf1+1PP9aKtxWjUn3wF8h0F/6U9b3O7NDHoGP2fwv+teBr2lQ8BjwL8Cm1qI480MSoVvAg9Wt3e0HQvw28ADVRwPAX9Wrf9N4GvAEQalzkta/IwuAe7sIo5qe9+obg+f/m529B3ZDsxVn82/ABs7imMG+DHwygXruojjw8C3q+/pPwIvqev74REekorkDg9JRTL5SSqSyU9SkUx+kopk8pNUJJOfpCKZ/CQVyeQnqUj/B7TPtOg7BYI1AAAAAElFTkSuQmCC\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "# Reset the environment\n",
     "initial_observation = env.reset()\n",
@@ -111,9 +158,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total reward this episode: -0.1099999975413084\n",
+      "Total reward this episode: -0.1099999975413084\n",
+      "Total reward this episode: 0.9000000022351742\n",
+      "Total reward this episode: -1.0999999977648258\n",
+      "Total reward this episode: -0.1099999975413084\n",
+      "Total reward this episode: -1.0199999995529652\n",
+      "Total reward this episode: -0.1099999975413084\n",
+      "Total reward this episode: -0.1099999975413084\n",
+      "Total reward this episode: -0.1099999975413084\n",
+      "Total reward this episode: -0.1099999975413084\n"
+     ]
+    }
+   ],
    "source": [
     "for episode in range(10):\n",
     "    initial_observation = env.reset()\n",
@@ -135,9 +199,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:mlagents.envs:Environment shut down with return code 0.\n"
+     ]
+    }
+   ],
    "source": [
     "env.close()"
    ]
@@ -162,9 +234,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:mlagents.envs:Connected new brain:\n",
+      "3DBall\n",
+      "INFO:gym_unity:12 agents within environment.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<UnityEnv instance>\n"
+     ]
+    }
+   ],
    "source": [
     "# Name of the Unity environment binary to launch\n",
     "multi_env_name = \"../envs/3DBall\"  \n",
@@ -184,9 +273,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Agent observations look like: \n",
+      "[-0.04583858 -0.02706882 -1.4387455   3.9411402   0.979342    0.\n",
+      " -0.981       0.        ]\n"
+     ]
+    }
+   ],
    "source": [
     "# Reset the environment\n",
     "initial_observations = multi_env.reset()\n",
@@ -212,9 +311,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total reward this episode: 1.59166672360152\n",
+      "Total reward this episode: 1.2333333566784859\n",
+      "Total reward this episode: 1.433333346620202\n",
+      "Total reward this episode: 1.3833333905786276\n",
+      "Total reward this episode: 1.0083333496004343\n",
+      "Total reward this episode: 1.1750000240281224\n",
+      "Total reward this episode: 1.2916667247191072\n",
+      "Total reward this episode: 1.0250000301748514\n",
+      "Total reward this episode: 1.2250000331550837\n",
+      "Total reward this episode: 0.9750000406056643\n"
+     ]
+    }
+   ],
    "source": [
     "for episode in range(10):\n",
     "    initial_observation = multi_env.reset()\n",
@@ -237,9 +353,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:mlagents.envs:Environment shut down with return code 0.\n"
+     ]
+    }
+   ],
    "source": [
     "multi_env.close()"
    ]
@@ -262,7 +386,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.6"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/getting-started.ipynb b/notebooks/getting-started.ipynb
index cf9c28f92c..25a0c67fb2 100755
--- a/notebooks/getting-started.ipynb
+++ b/notebooks/getting-started.ipynb
@@ -24,7 +24,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "env_name = \"../envs/3DBall\"  # Name of the Unity environment binary to launch\n",
+    "env_name = \"../envs/GridWorld\"  # Name of the Unity environment binary to launch\n",
     "train_mode = True  # Whether to run the environment in training or inference mode"
    ]
   },
@@ -48,6 +48,7 @@
     "import sys\n",
     "\n",
     "from mlagents.envs.environment import UnityEnvironment\n",
+    "from mlagents.envs.side_channel.engine_configuration_channel import EngineConfig, EngineConfigurationChannel\n",
     "\n",
     "%matplotlib inline\n",
     "\n",
@@ -75,11 +76,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "env = UnityEnvironment(file_name=env_name)\n",
+    "engine_configuration_channel = EngineConfigurationChannel()\n",
+    "env = UnityEnvironment(base_port = 5006, file_name=env_name, side_channels = [engine_configuration_channel])\n",
+    "\n",
+    "#Reset the environment\n",
+    "env.reset()\n",
     "\n",
     "# Set the default brain to work with\n",
-    "default_brain = env.external_brain_names[0]\n",
-    "brain = env.brains[default_brain]"
+    "group_name = env.get_agent_groups()[0]\n",
+    "group_spec = env.get_agent_group_spec(group_name)\n",
+    "\n",
+    "# Set the time scale of the engine\n",
+    "engine_configuration_channel.set_configuration_parameters(time_scale = 3.0)"
    ]
   },
   {
@@ -96,19 +104,28 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Reset the environment\n",
-    "env_info = env.reset(train_mode=train_mode)[default_brain]\n",
-    "\n",
-    "# Examine the state space for the default brain\n",
-    "print(\"Agent state looks like: \\n{}\".format(env_info.vector_observations[0]))\n",
-    "\n",
-    "# Examine the observation space for the default brain\n",
-    "for observation in env_info.visual_observations:\n",
-    "    print(\"Agent observations look like:\")\n",
-    "    if observation.shape[3] == 3:\n",
-    "        plt.imshow(observation[0,:,:,:])\n",
-    "    else:\n",
-    "        plt.imshow(observation[0,:,:,0])"
+    "# Get the state of the agents\n",
+    "step_result = env.get_step_result(group_name)\n",
+    "\n",
+    "# Examine the number of observations per Agent\n",
+    "print(\"Number of observations : \", len(group_spec.observation_shapes))\n",
+    "\n",
+    "# Examine the state space for the first observation for all agents\n",
+    "print(\"Agent state looks like: \\n{}\".format(step_result.obs[0]))\n",
+    "\n",
+    "# Examine the state space for the first observation for the first agent\n",
+    "print(\"Agent state looks like: \\n{}\".format(step_result.obs[0][0]))\n",
+    "\n",
+    "# Is there a visual observation ?\n",
+    "vis_obs = any([len(shape) == 3 for shape in group_spec.observation_shapes])\n",
+    "print(\"Is there a visual observation ?\", vis_obs)\n",
+    "\n",
+    "# Examine the visual observations\n",
+    "if vis_obs:\n",
+    "    vis_obs_index = next(i for i,v in enumerate(group_spec.observation_shapes) if len(v) == 3)\n",
+    "    print(\"Agent visual observation look like:\")\n",
+    "    obs = step_result.obs[vis_obs_index]\n",
+    "    plt.imshow(obs[0,:,:,:])\n"
    ]
   },
   {
@@ -128,19 +145,23 @@
    "outputs": [],
    "source": [
     "for episode in range(10):\n",
-    "    env_info = env.reset(train_mode=train_mode)[default_brain]\n",
+    "    env.reset()\n",
+    "    step_result = env.get_step_result(group_name)\n",
     "    done = False\n",
     "    episode_rewards = 0\n",
     "    while not done:\n",
-    "        action_size = brain.vector_action_space_size\n",
-    "        if brain.vector_action_space_type == 'continuous':\n",
-    "            env_info = env.step(np.random.randn(len(env_info.agents), \n",
-    "                                                action_size[0]))[default_brain]\n",
-    "        else:\n",
-    "            action = np.column_stack([np.random.randint(0, action_size[i], size=(len(env_info.agents))) for i in range(len(action_size))])\n",
-    "            env_info = env.step(action)[default_brain]\n",
-    "        episode_rewards += env_info.rewards[0]\n",
-    "        done = env_info.local_done[0]\n",
+    "        action_size = group_spec.action_size\n",
+    "        if group_spec.is_action_continuous():\n",
+    "            action = np.random.randn(step_result.n_agents(), group_spec.action_size)\n",
+    "            \n",
+    "        if group_spec.is_action_discrete():\n",
+    "            branch_size = group_spec.discrete_action_branches\n",
+    "            action = np.column_stack([np.random.randint(0, branch_size[i], size=(step_result.n_agents())) for i in range(len(branch_size))])\n",
+    "        env.set_actions(group_name, action)\n",
+    "        env.step()\n",
+    "        step_result = env.get_step_result(group_name)\n",
+    "        episode_rewards += step_result.reward[0]\n",
+    "        done = step_result.done[0]\n",
     "    print(\"Total reward this episode: {}\".format(episode_rewards))"
    ]
   },
@@ -160,6 +181,13 @@
    "source": [
     "env.close()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -179,7 +207,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.6"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,

From 6d09c917e0b59097d3251d743465417a53caf036 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 5 Dec 2019 15:26:21 -0800
Subject: [PATCH 14/30] clearing the notebook outputs

---
 notebooks/getting-started-gym.ipynb | 162 ++++------------------------
 1 file changed, 19 insertions(+), 143 deletions(-)

diff --git a/notebooks/getting-started-gym.ipynb b/notebooks/getting-started-gym.ipynb
index c95295f717..be2bb0d615 100644
--- a/notebooks/getting-started-gym.ipynb
+++ b/notebooks/getting-started-gym.ipynb
@@ -29,19 +29,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Python version:\n",
-      "3.6.5 (default, Jun 17 2018, 12:13:06) \n",
-      "[GCC 4.2.1 Compatible Apple LLVM 9.1.0 (clang-902.0.39.2)]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
@@ -69,28 +59,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:mlagents.envs:Connected new brain:\n",
-      "GridWorld\n",
-      "INFO:gym_unity:1 agents within environment.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<UnityEnv instance>\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "env_name = \"../envs/GridWorld\"  # Name of the Unity environment binary to launch\n",
+    "env_name = \"../envs/3DBall\"  # Name of the Unity environment binary to launch\n",
     "env = UnityEnv(env_name, worker_id=0, use_visual=True)\n",
     "\n",
     "# Examine environment parameters\n",
@@ -107,29 +80,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Agent observations look like:\n"
-     ]
-    },
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAT8AAAD7CAYAAAAcqJO9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAReUlEQVR4nO3df6yddX3A8ffHIqK3alt1TcdtVhaIhiyjuAYxGgNFXHWG8ochEF2apUn/cRM2EwWXLHHZFk02lSXLkkaczXQIQx2EGLSrkGXLglwEFahIRdQ2hTqFQe+is/jZH+fpvFzvj3PPeX6c53zfr+Tknuc5597nc3700+/n83yf54nMRJJK86KuA5CkLpj8JBXJ5CepSCY/SUUy+UkqkslPUpHGSn4RsSsiHo2IIxFxfV1BSVLTYtR5fhGxDvgOcDlwFLgPuCYzH6kvPElqxhlj/O5FwJHMfBwgIj4H7AaWTX4zM+tz46aNY2yyHs89+1zXIUhT7+WveHnXIfD0T55mfv5kLPXYOMnvbOCHC5aPAm9Y6Rc2btrI+657/xibHMOCAe5X776nmxga4RE6w1vy34AasvPSS7oOgb+98W+WfazxHR4RsS8i5iJibn5+vunNSdJQxhn5HQO2Llierda9QGbuB/YDzG7d2t4wZaoGRFP1Yjq00vvY31HhzZtOLfvYNT8Z55/46ttr4u+3ZZyR333AeRFxTkScCVwN3FFPWJLUrJHTdmaeiog/BL4MrAM+lZkP1xaZJDVorDFrZn4J+FJNsUhSa/pbsC+l162xXgc/BfrVD1ypz7fS80bp0a20rTr+flc8vE1SkUx+korUnzHqUnpZKfYy6MIt95l1Vw4vLC+HLYEXP3elEnXYv9mnMncxR36SimTyk1Qkk5+kIvWvYO9dy6x3AWtoiz/bbnqAi/tuo06DGXV7feXIT1KRTH6SitSP8WvvKsfeBTy2Ll/x5Bx/sfBd6N80mGH+3jRx5CepSCY/SUWazPFsL6rGXgQ5kr69smHjbbcQ7fee4BI48pNUJJOfpCKZ/CQVaXJ6fr1oNPUiyCX1N/L6dNsb7GYaTB09vj6fsHQljvwkFcnkJ6lI3Y1frcMa51s8muav5tFsCTzqiUhHOSFCn0tgR36SimTyk1Qkk5+kIvW3YG/F5HfNJj/C6VJ/t278w+DWMp1lpR7dKGeD6fM0mFVHfhHxqYg4EREPLVi3KSIORsRj1c+NzYYpSfUapuz9NLBr0brrgUOZeR5wqFqWpN5YdYyamf8WEdsWrd4NXFLdPwDcA3xwqC1OfJ028QH2IMIyNDMlpt7CetQytO4Tok6iUXd4bM7M49X9J4HNNcUjSa0Ye29vZiYr/CcYEfsiYi4i5uZPzo+7OUmqxai7Zp6KiC2ZeTwitgAnlntiZu4H9gPMbt06gRXbBIbEpEalYdVzKtPh/krTe1hXOhKkT3t3Fxt15HcHsKe6vwe4vZ5wJKkdw0x1uRn4T+C1EXE0IvYCHwEuj4jHgLdWy5LUG8Ps7b1mmYcuqzkWSWpNfwv2KWOPb7rVM4Fl8q4L3Gce2yupSCY/SUWajvHrmk1GkTkZUZTppx/q3wSFl/7V7q5DmCqO/CQVyeQnqUgmP0lFKrTnJ/XdZEx76TNHfpKKZPKTVCTL3pY5vaV/zmpxikkfp+D0lSM/SUUy+UkqkmVvwyxzGf1NcCfmkOo5dWppHPlJKpLJT1KRTH6SimTPT6NruqE5yt+33aUhOfKTVCSTn6QiWfY2YKqmt/TtxTjrA096MBxHfpKKZPKTVCSTn6Qi2fOrQd/aYquaphc0Ra/FTl69Vh35RcTWiLg7Ih6JiIcj4tpq/aaIOBgRj1U/NzYfriTVY5iy9xTw/sw8H7gYeG9EnA9cDxzKzPOAQ9WyJPXCqskvM49n5ter+88Bh4Gzgd3AgeppB4ArmwpSDctFN6kAa9rhERHbgAuBe4HNmXm8euhJYHOtkUlSg4ZOfhGxHvg8cF1mPrvwscxcdswQEfsiYi4i5uZPzo8VrCTVZajkFxEvZpD4PpuZX6hWPxURW6rHtwAnlvrdzNyfmTsyc8fM+pk6YpaksQ2ztzeAm4DDmfmxBQ/dAeyp7u8BvPJKn9jj6zU/vvENM8/vTcDvA9+KiAerdR8CPgLcGhF7ge8DVzUToiTVb9Xkl5n/zvJzKi+rNxxJaodHeIyod+VG7wJWPTzNzXI8tldSkUx+kopk2TvNLHVr8dMPOZFhGjnyk1Qkk5+kIpn8JBXJnp/Uc05mGY0jP0lFMvlJKpJl7xr0YuZIL4KUuufIT1KRTH6SimTyk1Qke34q0ll/uXu0X3QeydRw5CepSCY/SUUy+UkqkslPUpFMfpKK5N5eaS0WHkHjnt9ec+QnqUgmP0lFMvlJKpLJT1KRVk1+EXFWRHwtIr4REQ9HxIer9edExL0RcSQibomIM5sPV5LqMczI72fAzsy8ANgO7IqIi4GPAh/PzHOBp4G9zYUpSfVaNfnlwMlq8cXVLYGdwG3V+gPAlY1EqJXlopukoQzV84uIdRHxIHACOAh8F3gmM09VTzkKnN1MiJJUv6GSX2Y+n5nbgVngIuB1w24gIvZFxFxEzM2fnB8xTEmq15r29mbmM8DdwBuBDRFx+giRWeDYMr+zPzN3ZOaOmfUzYwUrSXUZZm/vayJiQ3X/pcDlwGEGSfBd1dP2ALc3FaRWEItuapbv9dQY5tjeLcCBiFjHIFnempl3RsQjwOci4i+AB4CbGoxTkmq1avLLzG8CFy6x/nEG/T9J6h2P8JBUJJOfpCKZ/CQVyeQnqUgmP0lFMvlJKpLJT1KRTH6SimTyk1Qkk5+kInnd3jVYeCz7xJ43tBdB9kyvT2LQ6+Ab5chPUpFMfpKKZPKTVCSTn6QimfwkFcnkJ6lITnWZZk57GU3PZof0LNyJ4chPUpFMfpKKZNkrQTG14+Wf2TXU8w6+566GI+meIz9JRTL5SSqSyU9Skez5jah3s0gW97R6EXSDprrHN9UvrjZDj/wiYl1EPBARd1bL50TEvRFxJCJuiYgzmwtTkuq1lrL3WuDwguWPAh/PzHOBp4G9dQYmSU0aKvlFxCzwe8Anq+UAdgK3VU85AFzZRIBqSKxwm1ZT9Bqn6KV0ZtiR3yeADwC/qJZfBTyTmaeq5aPA2TXHJkmNWTX5RcQ7gROZef8oG4iIfRExFxFz8yfnR/kTklS7Yfb2vgm4IiLeAZwFvAK4EdgQEWdUo79Z4NhSv5yZ+4H9ALNbt5a+j1HShFh15JeZN2TmbGZuA64GvpqZ7wbuBt5VPW0PcHtjUU64qWuZTdOLmabXsqyp+wa2YpxJzh8E/iQijjDoAd5UT0iS1Lw1TXLOzHuAe6r7jwMX1R+SJDXPIzy0spWqqEnp4BZS6RXyMlvjsb2SimTyk1Qky94G9O6kB6OyDuuQb/64HPlJKpLJT1KRTH6SilRoz6+Yrpx6bHFX7/LPvL21bQ97oaOVPH/TT2uIpDmO/CQVyeQnqUiFlr0LNXtxCy+dobVwAkt7HPlJKpLJT1KRTH6SimTPr2VOstFCK/f4XvjowffcNfb2hp3CUse2dl56ydh/o0mO/CQVyeQnqUiWvb+ivcLUErhMayl11RxHfpKKZPKTVCTL3hW1d3yGR4JMN4vZyePIT1KRTH6SimTyk1Qke35r4jQYDWf4Hp/dwK4Mlfwi4gngOeB54FRm7oiITcAtwDbgCeCqzHy6mTAlqV5rKXsvzcztmbmjWr4eOJSZ5wGHqmVJ6oVxen67gQPV/QPAleOH0yex4NbeliySJtfwn5Gf5iQYNvkl8JWIuD8i9lXrNmfm8er+k8Dm2qOTpIYMu8PjzZl5LCJ+DTgYEd9e+GBmZkQs2ZevkuU+gA0bNo4VrCTVZaiRX2Yeq36eAL4IXAQ8FRFbAKqfJ5b53f2ZuSMzd8ysn6knakka06ojv4iYAV6Umc9V998G/DlwB7AH+Ej18/YmA51s7R6ctlKnyGkx7enj2VnqOEnptBim7N0MfDEiTj//nzLzroi4D7g1IvYC3weuai5MSarXqskvMx8HLlhi/Y+By5oISpKa5hEejeiuMPXIkPp5tMZ08theSUUy+UkqkslPUpHs+bWumzPDrMTe4KjdOnt8febIT1KRTH6SimTZ26nJOFZjMqJoRj2FqeXtNHLkJ6lIJj9JRbLsnViTUYyWU/CV80o14MhPUpFMfpKKZPKTVCR7fr00Gf3A/rGvp19y5CepSCY/SUWy7J06JZ7OwHJWa+fIT1KRTH6SimTyk1Qke37Fsk+msjnyk1Qkk5+kIpn8JBVpqOQXERsi4raI+HZEHI6IN0bEpog4GBGPVT83Nh2sJNVl2JHfjcBdmfk64ALgMHA9cCgzzwMOVcuS1AurJr+IeCXwFuAmgMz838x8BtgNHKiedgC4sqkgJaluw4z8zgF+BPxDRDwQEZ+MiBlgc2Yer57zJLC5qSAlqW7DJL8zgNcDf5+ZFwLzLCpxMzNZ5mDRiNgXEXMRMTd/cn7ceCWpFsMkv6PA0cy8t1q+jUEyfCoitgBUP08s9cuZuT8zd2Tmjpn1M3XELEljWzX5ZeaTwA8j4rXVqsuAR4A7gD3Vuj3A7Y1EKEkNGPbwtj8CPhsRZwKPA3/AIHHeGhF7ge8DVzUToiTVb6jkl5kPAjuWeOiyesORpHZ4hIekIpn8JBXJ5CepSCY/SUUy+UkqkslPUpFMfpKKFIPDclvaWMSPGEyIfjXwX61teGmTEAMYx2LG8ULG8UJrjeM3MvM1Sz3QavL7/41GzGXmUpOmi4rBOIzDOLqLw7JXUpFMfpKK1FXy29/RdheahBjAOBYzjhcyjheqLY5Oen6S1DXLXklFajX5RcSuiHg0Io5ERGtXe4uIT0XEiYh4aMG61i+9GRFbI+LuiHgkIh6OiGu7iCUizoqIr0XEN6o4PlytPyci7q0+n1uq8zc2LiLWVdeHubOrOCLiiYj4VkQ8GBFz1bouviOdXyY2Il5bvQ+nb89GxHUdvR9/XH1HH4qIm6vvbi3fj9aSX0SsA/4OeDtwPnBNRJzf0uY/DexatK6LS2+eAt6fmecDFwPvrd6DtmP5GbAzMy8AtgO7IuJi4KPAxzPzXOBpYG/DcZx2LYPLoZ7WVRyXZub2BVMpuviOdH6Z2Mx8tHoftgO/A/wP8MW244iIs4H3ATsy87eAdcDV1PX9yMxWbsAbgS8vWL4BuKHF7W8DHlqw/Ciwpbq/BXi0rVgWxHA7cHmXsQAvA74OvIHB5NEzlvq8Gtz+LIN/SDuBO4HoKI4ngFcvWtfq5wK8EvgeVS++qzgWbfttwH909H6cDfwQ2MTgxMt3Ar9b1/ejzbL39As57Wi1riudXnozIrYBFwL3dhFLVWo+yODCUweB7wLPZOap6iltfT6fAD4A/KJaflVHcSTwlYi4PyL2Veva/lwm8TKxVwM3V/dbjSMzjwF/DfwAOA78N3A/NX0/3OHBypfebEJErAc+D1yXmc92EUtmPp+DsmYWuAh4XdPbXCwi3gmcyMz72972Et6cma9n0JZ5b0S8ZeGDLX0uY10mtm5VL+0K4J8XP9ZGHFVPcTeD/xR+HZjhV9tXI2sz+R0Dti5Ynq3WdWWoS2/WLSJezCDxfTYzv9BlLACZ+QxwN4PyYUNEnL6uSxufz5uAKyLiCeBzDErfGzuI4/Qog8w8waC/dRHtfy5jXSa2AW8Hvp6ZT1XLbcfxVuB7mfmjzPw58AUG35lavh9tJr/7gPOqPTVnMhhO39Hi9hdr/dKbERHATcDhzPxYV7FExGsiYkN1/6UM+o6HGSTBd7UVR2bekJmzmbmNwffhq5n57rbjiIiZiHj56fsM+lwP0fLnkpN3mdhr+GXJSwdx/AC4OCJeVv3bOf1+1PP9aKtxWjUn3wF8h0F/6U9b3O7NDHoGP2fwv+teBr2lQ8BjwL8Cm1qI480MSoVvAg9Wt3e0HQvw28ADVRwPAX9Wrf9N4GvAEQalzkta/IwuAe7sIo5qe9+obg+f/m529B3ZDsxVn82/ABs7imMG+DHwygXruojjw8C3q+/pPwIvqev74REekorkDg9JRTL5SSqSyU9SkUx+kopk8pNUJJOfpCKZ/CQVyeQnqUj/B7TPtOg7BYI1AAAAAElFTkSuQmCC\n",
-      "text/plain": [
-       "<Figure size 432x288 with 1 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Reset the environment\n",
     "initial_observation = env.reset()\n",
@@ -158,26 +111,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Total reward this episode: -0.1099999975413084\n",
-      "Total reward this episode: -0.1099999975413084\n",
-      "Total reward this episode: 0.9000000022351742\n",
-      "Total reward this episode: -1.0999999977648258\n",
-      "Total reward this episode: -0.1099999975413084\n",
-      "Total reward this episode: -1.0199999995529652\n",
-      "Total reward this episode: -0.1099999975413084\n",
-      "Total reward this episode: -0.1099999975413084\n",
-      "Total reward this episode: -0.1099999975413084\n",
-      "Total reward this episode: -0.1099999975413084\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "for episode in range(10):\n",
     "    initial_observation = env.reset()\n",
@@ -199,17 +135,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:mlagents.envs:Environment shut down with return code 0.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "env.close()"
    ]
@@ -234,26 +162,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:mlagents.envs:Connected new brain:\n",
-      "3DBall\n",
-      "INFO:gym_unity:12 agents within environment.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<UnityEnv instance>\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Name of the Unity environment binary to launch\n",
     "multi_env_name = \"../envs/3DBall\"  \n",
@@ -273,19 +184,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Agent observations look like: \n",
-      "[-0.04583858 -0.02706882 -1.4387455   3.9411402   0.979342    0.\n",
-      " -0.981       0.        ]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Reset the environment\n",
     "initial_observations = multi_env.reset()\n",
@@ -311,26 +212,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Total reward this episode: 1.59166672360152\n",
-      "Total reward this episode: 1.2333333566784859\n",
-      "Total reward this episode: 1.433333346620202\n",
-      "Total reward this episode: 1.3833333905786276\n",
-      "Total reward this episode: 1.0083333496004343\n",
-      "Total reward this episode: 1.1750000240281224\n",
-      "Total reward this episode: 1.2916667247191072\n",
-      "Total reward this episode: 1.0250000301748514\n",
-      "Total reward this episode: 1.2250000331550837\n",
-      "Total reward this episode: 0.9750000406056643\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "for episode in range(10):\n",
     "    initial_observation = multi_env.reset()\n",
@@ -353,17 +237,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:mlagents.envs:Environment shut down with return code 0.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "multi_env.close()"
    ]

From 82e5e43f37b83ce54aa1dabe40b40e92b1d5d664 Mon Sep 17 00:00:00 2001
From: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Date: Thu, 5 Dec 2019 16:03:48 -0800
Subject: [PATCH 15/30] Update gym-unity/gym_unity/tests/test_gym.py

Co-Authored-By: Chris Elion <chris.elion@unity3d.com>
---
 gym-unity/gym_unity/tests/test_gym.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gym-unity/gym_unity/tests/test_gym.py b/gym-unity/gym_unity/tests/test_gym.py
index db17b419e7..19a7e64a2b 100644
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
@@ -137,7 +137,7 @@ def setup_mock_unityenvironment(mock_env, mock_spec, mock_result):
     BrainParameters and BrainInfo.
 
     :Mock mock_env: A mock UnityEnvironment, usually empty.
-    :Mock mock_spec: A mock AgentGroupSpec object that specifies the params of this environment.
+    :Mock mock_spec: An AgentGroupSpec object that specifies the params of this environment.
     :Mock mock_result: A mock BatchedStepResult object that will be returned at each step and reset.
     """
     mock_env.return_value.get_agent_groups.return_value = ["MockBrain"]

From 16988d88606bcd7795540799314035f77317a429 Mon Sep 17 00:00:00 2001
From: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Date: Thu, 5 Dec 2019 16:03:56 -0800
Subject: [PATCH 16/30] Update gym-unity/gym_unity/tests/test_gym.py

Co-Authored-By: Chris Elion <chris.elion@unity3d.com>
---
 gym-unity/gym_unity/tests/test_gym.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gym-unity/gym_unity/tests/test_gym.py b/gym-unity/gym_unity/tests/test_gym.py
index 19a7e64a2b..fc7413aa26 100644
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
@@ -138,7 +138,7 @@ def setup_mock_unityenvironment(mock_env, mock_spec, mock_result):
 
     :Mock mock_env: A mock UnityEnvironment, usually empty.
     :Mock mock_spec: An AgentGroupSpec object that specifies the params of this environment.
-    :Mock mock_result: A mock BatchedStepResult object that will be returned at each step and reset.
+    :Mock mock_result: A BatchedStepResult object that will be returned at each step and reset.
     """
     mock_env.return_value.get_agent_groups.return_value = ["MockBrain"]
     mock_env.return_value.get_agent_group_spec.return_value = mock_spec

From 0fbd1709a48ab7c8d8fe85f7bc59eeef69b10aea Mon Sep 17 00:00:00 2001
From: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Date: Thu, 5 Dec 2019 16:05:43 -0800
Subject: [PATCH 17/30] Update ml-agents-envs/mlagents/envs/base_env.py

Co-Authored-By: Chris Elion <chris.elion@unity3d.com>
---
 ml-agents-envs/mlagents/envs/base_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index 572502ebfe..ae78b975d1 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -125,7 +125,7 @@ def get_agent_step_result(self, agent_id: int) -> StepResult:
         )
 
     @staticmethod
-    def empty(spec):
+    def empty(spec: AgentGroupSpec) -> "BatchedStepResult":
         """
         Returns an empty BatchedStepResult.
         :param spec: The AgentGroupSpec for the BatchedStepResult

From 8106f746d53d48cc235390b4424b7cfc0809bcf5 Mon Sep 17 00:00:00 2001
From: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Date: Thu, 5 Dec 2019 16:06:14 -0800
Subject: [PATCH 18/30] Update ml-agents-envs/mlagents/envs/base_env.py

Co-Authored-By: Chris Elion <chris.elion@unity3d.com>
---
 ml-agents-envs/mlagents/envs/base_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index ae78b975d1..b85a5caeda 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -238,7 +238,7 @@ def get_agent_groups(self) -> List[str]:
         """
         Returns the list of the agent group names present in the environment.
         Agents grouped under the same group name have the same action and
-        observation specs are expected to behave similarly in the environment.
+        observation specs, and are expected to behave similarly in the environment.
         This list can grow with time as new policies are instantiated.
         :return: the list of agent group names.
         """

From cc4456a9c2325948f59c8b7dcdf1539c1886d247 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 5 Dec 2019 16:20:29 -0800
Subject: [PATCH 19/30] addressing first comments

---
 ml-agents-envs/mlagents/envs/base_env.py           | 10 ++++++++--
 .../mlagents/envs/brain_conversion_utils.py        | 14 +++++++++-----
 ml-agents-envs/mlagents/envs/environment.py        | 13 +++----------
 3 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index b85a5caeda..763e1de36f 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -125,12 +125,12 @@ def get_agent_step_result(self, agent_id: int) -> StepResult:
         )
 
     @staticmethod
-    def empty(spec: AgentGroupSpec) -> "BatchedStepResult":
+    def empty(spec: "AgentGroupSpec") -> "BatchedStepResult":
         """
         Returns an empty BatchedStepResult.
         :param spec: The AgentGroupSpec for the BatchedStepResult
         """
-        obs = []
+        obs: List[np.array] = []
         for shape in spec.observation_shapes:
             obs += [np.zeros((0,) + shape, dtype=np.float32)]
         return BatchedStepResult(
@@ -209,6 +209,12 @@ def discrete_action_branches(self) -> Optional[Tuple[int, ...]]:
         else:
             return None
 
+    def create_empty_action(self, n_agents: int) -> np.array:
+        if self.action_type == ActionType.DISCRETE:
+            return np.zeros((n_agents, self.action_size), dtype=np.int32)
+        else:
+            return np.zeros((n_agents, self.action_size), dtype=np.float32)
+
 
 class BaseEnv(ABC):
     @abstractmethod
diff --git a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
index 4db0249bf5..37548e1b0c 100644
--- a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
+++ b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
@@ -1,5 +1,6 @@
 from mlagents.envs.brain import BrainInfo, BrainParameters, CameraResolution
 from mlagents.envs.base_env import BatchedStepResult, AgentGroupSpec
+from mlagents.envs.exception import UnityEnvironmentException
 import numpy as np
 from typing import List, Any
 
@@ -15,8 +16,13 @@ def step_result_to_brain_info(
     for index, observation in enumerate(step_result.obs):
         if len(observation.shape) == 2:
             vec_obs_indices.append(index)
-        if len(observation.shape) == 4:
+        elif len(observation.shape) == 4:
             vis_obs_indices.append(index)
+        else:
+            raise UnityEnvironmentException(
+                "Invalid input received from the environment, the observation should "
+                "either be a vector of float or a PNG image"
+            )
     if len(vec_obs_indices) == 0:
         vec_obs: List[Any] = []
     else:
@@ -30,11 +36,9 @@ def step_result_to_brain_info(
         if step_result.action_mask is not None:
             mask = 1 - np.concatenate(step_result.action_mask, axis=1)
     if agent_id_prefix is None:
-        agent_ids = list(step_result.agent_id)
+        agent_ids = [str(ag_id) for ag_id in list(step_result.agent_id)]
     else:
-        agent_ids = [
-            f"${agent_id_prefix}-{ag_id}" for ag_id in list(step_result.agent_id)
-        ]
+        agent_ids = [f"${agent_id_prefix}-{ag_id}" for ag_id in step_result.agent_id]
     return BrainInfo(
         vis_obs,
         vec_obs,
diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
index 6260f2d955..c854e8c829 100644
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
@@ -279,13 +279,6 @@ def _update_state(self, output: UnityRLOutputProto) -> None:
                 )
         self._parse_side_channel_message(self.side_channels, output.side_channel)
 
-    @staticmethod
-    def _empty_action(spec: AgentGroupSpec, n_agents: int) -> np.array:
-        if spec.is_action_discrete():
-            return np.zeros((n_agents, spec.action_size), dtype=np.int32)
-        else:
-            return np.zeros((n_agents, spec.action_size), dtype=np.float32)
-
     def reset(self) -> None:
         if self._loaded:
             outputs = self.communicator.exchange(self._generate_reset_input())
@@ -311,9 +304,9 @@ def step(self) -> None:
                 n_agents = 0
                 if group_name in self._env_state:
                     n_agents = self._env_state[group_name].n_agents()
-                self._env_actions[group_name] = self._empty_action(
-                    self._env_specs[group_name], n_agents
-                )
+                self._env_actions[group_name] = self._env_specs[
+                    group_name
+                ].create_empty_action(n_agents)
         step_input = self._generate_step_input(self._env_actions)
         with hierarchical_timer("communicator.exchange"):
             outputs = self.communicator.exchange(step_input)

From 5a45d7b3b572617389acdfdb5176199321cb79c4 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 5 Dec 2019 16:40:26 -0800
Subject: [PATCH 20/30] NaN checks for rewards are back

---
 ml-agents-envs/mlagents/envs/rpc_utils.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/ml-agents-envs/mlagents/envs/rpc_utils.py b/ml-agents-envs/mlagents/envs/rpc_utils.py
index c517cf9da4..ef77b4df09 100644
--- a/ml-agents-envs/mlagents/envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents/envs/rpc_utils.py
@@ -84,7 +84,7 @@ def _process_vector_observation(
         dtype=np.float32,
     )
     # Check for NaNs or infs in the observations
-    # If there's a NaN in the observations, the dot() result will be NaN
+    # If there's a NaN in the observations, the np.mean() result will be NaN
     # If there's an Inf (either sign) then the result will be Inf
     # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background
     # Note that a very large values (larger than sqrt(float_max)) will result in an Inf value here
@@ -120,6 +120,16 @@ def batched_step_result_from_proto(
     rewards = np.array(
         [agent_info.reward for agent_info in agent_info_list], dtype=np.float32
     )
+
+    d = np.dot(rewards, rewards)
+    has_nan = np.isnan(d)
+    has_inf = not np.isfinite(d)
+    # In we have any NaN or Infs, use np.nan_to_num to replace these with finite values
+    if has_nan or has_inf:
+        rewards = np.nan_to_num(rewards)
+    if has_nan:
+        logger.warning(f"An agent had a NaN reward in the environment")
+
     done = np.array([agent_info.done for agent_info in agent_info_list], dtype=np.bool)
     max_step = np.array(
         [agent_info.max_step_reached for agent_info in agent_info_list], dtype=np.bool

From 5b8a35487f91d802e4c5cdc6283dab92b3bd0005 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 5 Dec 2019 16:52:55 -0800
Subject: [PATCH 21/30] restoring Union[int, Tuple[int, ...]] for action_shape

---
 ml-agents-envs/mlagents/envs/base_env.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index 763e1de36f..e77569197b 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -25,7 +25,7 @@
 """
 
 from abc import ABC, abstractmethod
-from typing import List, NamedTuple, Tuple, Optional, Any
+from typing import List, NamedTuple, Tuple, Optional, Union
 import numpy as np
 from enum import Enum
 
@@ -170,7 +170,7 @@ class AgentGroupSpec(NamedTuple):
 
     observation_shapes: List[Tuple]
     action_type: ActionType
-    action_shape: Any
+    action_shape: Union[int, Tuple[int, ...]]
 
     def is_action_discrete(self) -> bool:
         """
@@ -193,9 +193,9 @@ def action_size(self) -> int:
          branches.
         """
         if self.action_type == ActionType.DISCRETE:
-            return len(self.action_shape)
+            return len(self.action_shape)  # type: ignore
         else:
-            return self.action_shape
+            return self.action_shape  # type: ignore
 
     @property
     def discrete_action_branches(self) -> Optional[Tuple[int, ...]]:
@@ -205,7 +205,7 @@ def discrete_action_branches(self) -> Optional[Tuple[int, ...]]:
         for continuous actions.
         """
         if self.action_type == ActionType.DISCRETE:
-            return self.action_shape
+            return self.action_shape  # type: ignore
         else:
             return None
 

From 3d0cad7432ca4b7d5ce5cf953e4e3b1ba549e11c Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 5 Dec 2019 17:30:55 -0800
Subject: [PATCH 22/30] Made BatchdStepResult an object

---
 ml-agents-envs/mlagents/envs/base_env.py | 34 +++++++++++++++---------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index e77569197b..9397b99e58 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -25,7 +25,7 @@
 """
 
 from abc import ABC, abstractmethod
-from typing import List, NamedTuple, Tuple, Optional, Union
+from typing import List, NamedTuple, Tuple, Optional, Union, Dict
 import numpy as np
 from enum import Enum
 
@@ -58,7 +58,7 @@ class StepResult(NamedTuple):
     action_mask: Optional[List[np.array]]
 
 
-class BatchedStepResult(NamedTuple):
+class BatchedStepResult(object):
     """
     Contains the data a group of similar Agents collected since the last
     simulation step. Note that all Agents do not necessarily have new
@@ -87,12 +87,21 @@ class BatchedStepResult(NamedTuple):
      this simulation step.
     """
 
-    obs: List[np.array]
-    reward: np.array
-    done: np.array
-    max_step: np.array
-    agent_id: np.array
-    action_mask: Optional[List[np.array]]
+    def __init__(self, obs, reward, done, max_step, agent_id, action_mask):
+        self.obs: List[np.array] = obs
+        self.reward: np.array = reward
+        self.done: np.array = done
+        self.max_step: np.array = max_step
+        self.agent_id: np.array = agent_id
+        self.action_mask: Optional[List[np.array]] = action_mask
+        self.agent_id_to_index: Optional[Dict[int, int]] = None
+
+    def contains_agent(self, agent_id: int) -> bool:
+        if self.agent_id_to_index is None:
+            self.agent_id_to_index = {}
+            for a_idx, a_id in enumerate(self.agent_id):
+                self.agent_id_to_index[a_id] = a_idx
+        return agent_id in self.agent_id_to_index
 
     def get_agent_step_result(self, agent_id: int) -> StepResult:
         """
@@ -101,12 +110,11 @@ def get_agent_step_result(self, agent_id: int) -> StepResult:
         :returns: obs, reward, done, agent_id and optional action mask for a
         specific agent
         """
-        try:
-            agent_index = np.where(self.agent_id == agent_id)[0][0]
-        except IndexError as ie:
+        if not self.contains_agent(agent_id):
             raise IndexError(
                 "agent_id {} is not present in the BatchedStepResult".format(agent_id)
-            ) from ie
+            )
+        agent_index = self.agent_id_to_index[agent_id]  # type: ignore
         agent_obs = []
         for batched_obs in self.obs:
             agent_obs.append(batched_obs[agent_index])
@@ -114,7 +122,7 @@ def get_agent_step_result(self, agent_id: int) -> StepResult:
         if self.action_mask is not None:
             agent_mask = []
             for mask in self.action_mask:
-                agent_mask.append(mask[0])
+                agent_mask.append(mask[agent_index])
         return StepResult(
             obs=agent_obs,
             reward=self.reward[agent_index],

From 1148f35349baecdbfba481620ca82f8758832d3b Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 5 Dec 2019 17:32:14 -0800
Subject: [PATCH 23/30] Made _agent_id_to_index private

---
 ml-agents-envs/mlagents/envs/base_env.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index 9397b99e58..db4ce9092f 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -94,14 +94,14 @@ def __init__(self, obs, reward, done, max_step, agent_id, action_mask):
         self.max_step: np.array = max_step
         self.agent_id: np.array = agent_id
         self.action_mask: Optional[List[np.array]] = action_mask
-        self.agent_id_to_index: Optional[Dict[int, int]] = None
+        self._agent_id_to_index: Optional[Dict[int, int]] = None
 
     def contains_agent(self, agent_id: int) -> bool:
-        if self.agent_id_to_index is None:
-            self.agent_id_to_index = {}
+        if self._agent_id_to_index is None:
+            self._agent_id_to_index = {}
             for a_idx, a_id in enumerate(self.agent_id):
-                self.agent_id_to_index[a_id] = a_idx
-        return agent_id in self.agent_id_to_index
+                self._agent_id_to_index[a_id] = a_idx
+        return agent_id in self._agent_id_to_index
 
     def get_agent_step_result(self, agent_id: int) -> StepResult:
         """
@@ -114,7 +114,7 @@ def get_agent_step_result(self, agent_id: int) -> StepResult:
             raise IndexError(
                 "agent_id {} is not present in the BatchedStepResult".format(agent_id)
             )
-        agent_index = self.agent_id_to_index[agent_id]  # type: ignore
+        agent_index = self._agent_id_to_index[agent_id]  # type: ignore
         agent_obs = []
         for batched_obs in self.obs:
             agent_obs.append(batched_obs[agent_index])

From d25a8855ea8dd91d74de42dd07fdbe5f7a66e3e8 Mon Sep 17 00:00:00 2001
From: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Date: Fri, 6 Dec 2019 10:13:47 -0800
Subject: [PATCH 24/30] Update ml-agents-envs/mlagents/envs/base_env.py

Co-Authored-By: Chris Elion <chris.elion@unity3d.com>
---
 ml-agents-envs/mlagents/envs/base_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index db4ce9092f..4a8938cb5f 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -58,7 +58,7 @@ class StepResult(NamedTuple):
     action_mask: Optional[List[np.array]]
 
 
-class BatchedStepResult(object):
+class BatchedStepResult:
     """
     Contains the data a group of similar Agents collected since the last
     simulation step. Note that all Agents do not necessarily have new

From 329fa341232a5678e720f6298aeb37929358963f Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Fri, 6 Dec 2019 11:16:56 -0800
Subject: [PATCH 25/30] replacing np.array with np.ndarray in typing

---
 ml-agents-envs/mlagents/envs/base_env.py      | 28 +++++++++----------
 ml-agents-envs/mlagents/envs/environment.py   |  6 ++--
 ml-agents-envs/mlagents/envs/rpc_utils.py     |  2 +-
 .../mlagents/envs/tests/test_rpc_utils.py     |  4 +--
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index 4a8938cb5f..f088c9c3d8 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -50,12 +50,12 @@ class StepResult(NamedTuple):
      the agent during this simulation step.
     """
 
-    obs: List[np.array]
+    obs: List[np.ndarray]
     reward: float
     done: bool
     max_step: bool
     agent_id: int
-    action_mask: Optional[List[np.array]]
+    action_mask: Optional[List[np.ndarray]]
 
 
 class BatchedStepResult:
@@ -88,12 +88,12 @@ class BatchedStepResult:
     """
 
     def __init__(self, obs, reward, done, max_step, agent_id, action_mask):
-        self.obs: List[np.array] = obs
-        self.reward: np.array = reward
-        self.done: np.array = done
-        self.max_step: np.array = max_step
-        self.agent_id: np.array = agent_id
-        self.action_mask: Optional[List[np.array]] = action_mask
+        self.obs: List[np.ndarray] = obs
+        self.reward: np.ndarray = reward
+        self.done: np.ndarray = done
+        self.max_step: np.ndarray = max_step
+        self.agent_id: np.ndarray = agent_id
+        self.action_mask: Optional[List[np.ndarray]] = action_mask
         self._agent_id_to_index: Optional[Dict[int, int]] = None
 
     def contains_agent(self, agent_id: int) -> bool:
@@ -138,7 +138,7 @@ def empty(spec: "AgentGroupSpec") -> "BatchedStepResult":
         Returns an empty BatchedStepResult.
         :param spec: The AgentGroupSpec for the BatchedStepResult
         """
-        obs: List[np.array] = []
+        obs: List[np.ndarray] = []
         for shape in spec.observation_shapes:
             obs += [np.zeros((0,) + shape, dtype=np.float32)]
         return BatchedStepResult(
@@ -217,7 +217,7 @@ def discrete_action_branches(self) -> Optional[Tuple[int, ...]]:
         else:
             return None
 
-    def create_empty_action(self, n_agents: int) -> np.array:
+    def create_empty_action(self, n_agents: int) -> np.ndarray:
         if self.action_type == ActionType.DISCRETE:
             return np.zeros((n_agents, self.action_size), dtype=np.int32)
         else:
@@ -259,27 +259,27 @@ def get_agent_groups(self) -> List[str]:
         pass
 
     @abstractmethod
-    def set_actions(self, agent_group: str, action: np.array) -> None:
+    def set_actions(self, agent_group: str, action: np.ndarray) -> None:
         """
         Sets the action for all of the agents in the simulation for the next
         step. The Actions must be in the same order as the order received in
         the step result.
         :param agent_group: The name of the group the agents are part of
-        :param action: A two dimensional np.array corresponding to the action
+        :param action: A two dimensional np.ndarray corresponding to the action
         (either int or float)
         """
         pass
 
     @abstractmethod
     def set_action_for_agent(
-        self, agent_group: str, agent_id: int, action: np.array
+        self, agent_group: str, agent_id: int, action: np.ndarray
     ) -> None:
         """
         Sets the action for one of the agents in the simulation for the next
         step.
         :param agent_group: The name of the group the agent is part of
         :param agent_id: The id of the agent the action is set for
-        :param action: A two dimensional np.array corresponding to the action
+        :param action: A two dimensional np.ndarray corresponding to the action
         (either int or float)
         """
         pass
diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
index c854e8c829..81b06080f7 100644
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
@@ -130,7 +130,7 @@ def __init__(
             )
         self._env_state: Dict[str, BatchedStepResult] = {}
         self._env_specs: Dict[str, AgentGroupSpec] = {}
-        self._env_actions: Dict[str, np.array] = {}
+        self._env_actions: Dict[str, np.ndarray] = {}
         self._is_first_message = True
         self._update_group_specs(aca_output)
 
@@ -327,7 +327,7 @@ def _assert_group_exists(self, agent_group: str) -> None:
                 "in the environment".format(agent_group)
             )
 
-    def set_actions(self, agent_group: str, action: np.array) -> None:
+    def set_actions(self, agent_group: str, action: np.ndarray) -> None:
         self._assert_group_exists(agent_group)
         if agent_group not in self._env_state:
             return
@@ -345,7 +345,7 @@ def set_actions(self, agent_group: str, action: np.array) -> None:
         self._env_actions[agent_group] = action
 
     def set_action_for_agent(
-        self, agent_group: str, agent_id: int, action: np.array
+        self, agent_group: str, agent_id: int, action: np.ndarray
     ) -> None:
         self._assert_group_exists(agent_group)
         if agent_group not in self._env_state:
diff --git a/ml-agents-envs/mlagents/envs/rpc_utils.py b/ml-agents-envs/mlagents/envs/rpc_utils.py
index ef77b4df09..c08f5c3f97 100644
--- a/ml-agents-envs/mlagents/envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents/envs/rpc_utils.py
@@ -106,7 +106,7 @@ def _process_vector_observation(
 def batched_step_result_from_proto(
     agent_info_list: List[AgentInfoProto], group_spec: AgentGroupSpec
 ) -> BatchedStepResult:
-    obs_list: List[np.array] = []
+    obs_list: List[np.ndarray] = []
     for obs_index, obs_shape in enumerate(group_spec.observation_shapes):
         is_visual = len(obs_shape) == 3
         if is_visual:
diff --git a/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
index 8a0baa5431..b47fd97401 100644
--- a/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
@@ -38,7 +38,7 @@ def generate_list_agent_proto(
     return result
 
 
-def generate_compressed_data(in_array: np.array) -> bytes:
+def generate_compressed_data(in_array: np.ndarray) -> bytes:
     image_arr = (in_array * 255).astype(np.uint8)
     im = Image.fromarray(image_arr, "RGB")
     byteIO = io.BytesIO()
@@ -46,7 +46,7 @@ def generate_compressed_data(in_array: np.array) -> bytes:
     return byteIO.getvalue()
 
 
-def generate_compressed_proto_obs(in_array: np.array) -> ObservationProto:
+def generate_compressed_proto_obs(in_array: np.ndarray) -> ObservationProto:
     obs_proto = ObservationProto()
     obs_proto.compressed_data = generate_compressed_data(in_array)
     obs_proto.compression_type = 1

From 98e10fe195aad2fff7f307a7033db14394f96159 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Fri, 6 Dec 2019 11:22:13 -0800
Subject: [PATCH 26/30] adding a new type for AgentGroup and AgentId

---
 ml-agents-envs/mlagents/envs/base_env.py    | 21 ++++++++++++---------
 ml-agents-envs/mlagents/envs/environment.py | 18 ++++++++++++------
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index f088c9c3d8..05575f9802 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -25,10 +25,13 @@
 """
 
 from abc import ABC, abstractmethod
-from typing import List, NamedTuple, Tuple, Optional, Union, Dict
+from typing import List, NamedTuple, Tuple, Optional, Union, Dict, NewType
 import numpy as np
 from enum import Enum
 
+AgentId = NewType("AgentId", int)
+AgentGroup = NewType("AgentGroup", str)
+
 
 class StepResult(NamedTuple):
     """
@@ -54,7 +57,7 @@ class StepResult(NamedTuple):
     reward: float
     done: bool
     max_step: bool
-    agent_id: int
+    agent_id: AgentId
     action_mask: Optional[List[np.ndarray]]
 
 
@@ -96,14 +99,14 @@ def __init__(self, obs, reward, done, max_step, agent_id, action_mask):
         self.action_mask: Optional[List[np.ndarray]] = action_mask
         self._agent_id_to_index: Optional[Dict[int, int]] = None
 
-    def contains_agent(self, agent_id: int) -> bool:
+    def contains_agent(self, agent_id: AgentId) -> bool:
         if self._agent_id_to_index is None:
             self._agent_id_to_index = {}
             for a_idx, a_id in enumerate(self.agent_id):
                 self._agent_id_to_index[a_id] = a_idx
         return agent_id in self._agent_id_to_index
 
-    def get_agent_step_result(self, agent_id: int) -> StepResult:
+    def get_agent_step_result(self, agent_id: AgentId) -> StepResult:
         """
         returns the step result for a specific agent.
         :param agent_id: The id of the agent
@@ -248,7 +251,7 @@ def close(self) -> None:
         pass
 
     @abstractmethod
-    def get_agent_groups(self) -> List[str]:
+    def get_agent_groups(self) -> List[AgentGroup]:
         """
         Returns the list of the agent group names present in the environment.
         Agents grouped under the same group name have the same action and
@@ -259,7 +262,7 @@ def get_agent_groups(self) -> List[str]:
         pass
 
     @abstractmethod
-    def set_actions(self, agent_group: str, action: np.ndarray) -> None:
+    def set_actions(self, agent_group: AgentGroup, action: np.ndarray) -> None:
         """
         Sets the action for all of the agents in the simulation for the next
         step. The Actions must be in the same order as the order received in
@@ -272,7 +275,7 @@ def set_actions(self, agent_group: str, action: np.ndarray) -> None:
 
     @abstractmethod
     def set_action_for_agent(
-        self, agent_group: str, agent_id: int, action: np.ndarray
+        self, agent_group: AgentGroup, agent_id: AgentId, action: np.ndarray
     ) -> None:
         """
         Sets the action for one of the agents in the simulation for the next
@@ -285,7 +288,7 @@ def set_action_for_agent(
         pass
 
     @abstractmethod
-    def get_step_result(self, agent_group: str) -> BatchedStepResult:
+    def get_step_result(self, agent_group: AgentGroup) -> BatchedStepResult:
         """
         Retrieves the observations of the agents that requested a step in the
         simulation.
@@ -296,7 +299,7 @@ def get_step_result(self, agent_group: str) -> BatchedStepResult:
         pass
 
     @abstractmethod
-    def get_agent_group_spec(self, agent_group: str) -> AgentGroupSpec:
+    def get_agent_group_spec(self, agent_group: AgentGroup) -> AgentGroupSpec:
         """
         Get the AgentGroupSpec corresponding to the agent group name
         :param agent_group: The name of the group the agents are part of
diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
index 81b06080f7..d024a7d02a 100644
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
@@ -8,7 +8,13 @@
 
 from mlagents.envs.side_channel.side_channel import SideChannel
 
-from mlagents.envs.base_env import BaseEnv, BatchedStepResult, AgentGroupSpec
+from mlagents.envs.base_env import (
+    BaseEnv,
+    BatchedStepResult,
+    AgentGroupSpec,
+    AgentGroup,
+    AgentId,
+)
 from mlagents.envs.timers import timed, hierarchical_timer
 from .exception import (
     UnityEnvironmentException,
@@ -317,7 +323,7 @@ def step(self) -> None:
         self._update_state(rl_output)
         self._env_actions.clear()
 
-    def get_agent_groups(self) -> List[str]:
+    def get_agent_groups(self) -> List[AgentGroup]:
         return list(self._env_specs.keys())
 
     def _assert_group_exists(self, agent_group: str) -> None:
@@ -327,7 +333,7 @@ def _assert_group_exists(self, agent_group: str) -> None:
                 "in the environment".format(agent_group)
             )
 
-    def set_actions(self, agent_group: str, action: np.ndarray) -> None:
+    def set_actions(self, agent_group: AgentGroup, action: np.ndarray) -> None:
         self._assert_group_exists(agent_group)
         if agent_group not in self._env_state:
             return
@@ -345,7 +351,7 @@ def set_actions(self, agent_group: str, action: np.ndarray) -> None:
         self._env_actions[agent_group] = action
 
     def set_action_for_agent(
-        self, agent_group: str, agent_id: int, action: np.ndarray
+        self, agent_group: AgentGroup, agent_id: AgentId, action: np.ndarray
     ) -> None:
         self._assert_group_exists(agent_group)
         if agent_group not in self._env_state:
@@ -376,11 +382,11 @@ def set_action_for_agent(
             ) from ie
         self._env_actions[agent_group][index] = action
 
-    def get_step_result(self, agent_group: str) -> BatchedStepResult:
+    def get_step_result(self, agent_group: AgentGroup) -> BatchedStepResult:
         self._assert_group_exists(agent_group)
         return self._env_state[agent_group]
 
-    def get_agent_group_spec(self, agent_group: str) -> AgentGroupSpec:
+    def get_agent_group_spec(self, agent_group: AgentGroup) -> AgentGroupSpec:
         self._assert_group_exists(agent_group)
         return self._env_specs[agent_group]
 

From 6da142de1090f983b73238136ec8e70555100f10 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Fri, 6 Dec 2019 14:51:35 -0800
Subject: [PATCH 27/30] fixing brain_info when vec_obs == 0

---
 ml-agents-envs/mlagents/envs/brain_conversion_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
index 37548e1b0c..49044a430a 100644
--- a/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
+++ b/ml-agents-envs/mlagents/envs/brain_conversion_utils.py
@@ -2,7 +2,7 @@
 from mlagents.envs.base_env import BatchedStepResult, AgentGroupSpec
 from mlagents.envs.exception import UnityEnvironmentException
 import numpy as np
-from typing import List, Any
+from typing import List
 
 
 def step_result_to_brain_info(
@@ -24,7 +24,7 @@ def step_result_to_brain_info(
                 "either be a vector of float or a PNG image"
             )
     if len(vec_obs_indices) == 0:
-        vec_obs: List[Any] = []
+        vec_obs = np.zeros((n_agents, 0), dtype=np.float32)
     else:
         vec_obs = np.concatenate([step_result.obs[i] for i in vec_obs_indices], axis=1)
     vis_obs = [step_result.obs[i] for i in vis_obs_indices]

From 617a768485c410c8ba2290bacb0a21fbe23cc87f Mon Sep 17 00:00:00 2001
From: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Date: Mon, 9 Dec 2019 11:22:49 -0800
Subject: [PATCH 28/30] Docs ll api (#3047)

* LL-API documentation changes

* fixes

* deleting implementation details

* Update docs/Python-API.md

Co-Authored-By: Chris Elion <chris.elion@unity3d.com>

* edited the migrating docs

* Update docs/Migrating.md

Co-Authored-By: Chris Elion <chris.elion@unity3d.com>
---
 docs/Migrating.md  |   1 +
 docs/Python-API.md | 220 +++++++++++++++++++++++++++++----------------
 2 files changed, 145 insertions(+), 76 deletions(-)

diff --git a/docs/Migrating.md b/docs/Migrating.md
index ba4dfa858b..3a22a0fb73 100644
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
@@ -3,6 +3,7 @@
 ## Migrating from master to develop
 
 ### Important changes
+* The low level Python API has changed. You can look at the document [Low Level Python API documentation](Python-API.md) for more information. This should only affect you if you're writing a custom trainer; if you use `mlagents-learn` for training, this should be a transparent change.
 * `CustomResetParameters` are now removed.
 * `reset()` on the Low-Level Python API no longer takes a `train_mode` argument. To modify the performance/speed of the engine, you must use an `EngineConfigurationChannel`
 * `reset()` on the Low-Level Python API no longer takes a `config` argument. `UnityEnvironment` no longer has a `reset_parameters` field. To modify float properties in the environment, you must use a `FloatPropertiesChannel`. For more information, refer to the [Low Level Python API documentation](Python-API.md)
diff --git a/docs/Python-API.md b/docs/Python-API.md
index 3a39c20d7c..ad19916764 100644
--- a/docs/Python-API.md
+++ b/docs/Python-API.md
@@ -1,9 +1,4 @@
-# Unity ML-Agents Python Interface and Trainers
-
-The `mlagents` Python package is part of the [ML-Agents
-Toolkit](https://github.com/Unity-Technologies/ml-agents). `mlagents` provides a
-Python API that allows direct interaction with the Unity game engine as well as
-a collection of trainers and algorithms to train agents in Unity environments.
+# Unity ML-Agents Python Low Level API
 
 The `mlagents` Python package contains two components: a low level API which
 allows you to interact directly with a Unity Environment (`mlagents.envs`) and
@@ -11,9 +6,12 @@ an entry point to train (`mlagents-learn`) which allows you to train agents in
 Unity Environments using our implementations of reinforcement learning or
 imitation learning.
 
+You can use the Python Low Level API to interact directly with your learning
+environment, and use it to develop new learning algorithms
+
 ## mlagents.envs
 
-The ML-Agents Toolkit provides a Python API for controlling the Agent simulation
+The ML-Agents Toolkit Low Level API is a Python API for controlling the simulation
 loop of an environment or game built with Unity. This API is used by the
 training algorithms inside the ML-Agent Toolkit, but you can also write your own
 Python programs using this API. Go [here](../notebooks/getting-started.ipynb)
@@ -24,25 +22,31 @@ The key objects in the Python API include:
 - **UnityEnvironment** — the main interface between the Unity application and
   your code. Use UnityEnvironment to start and control a simulation or training
   session.
-- **BrainInfo** — contains all the data from Agents in the simulation, such as
-  observations and rewards.
-- **BrainParameters** — describes the data elements in a BrainInfo object. For
-  example, provides the array length of an observation in BrainInfo.
+- **BatchedStepResult** — contains the data from Agents belonging to the same
+  "AgentGroup" in the simulation, such as observations and rewards.
+- **AgentGroupSpec** — describes the shape of the data inside a BatchedStepResult.
+  For example, provides the dimensions of the observations of a group.
 
-These classes are all defined in the `ml-agents/mlagents/envs` folder of
-the ML-Agents SDK.
+These classes are all defined in the [base_env](../ml-agents-envs/mlagents/envs/base_env.py)
+script.
+
+An Agent Group is a group of Agents identified by a string name that share the same
+observations and action types. You can think about Agent Group as a group of agents
+that will share the same policy or behavior. All Agents in a group have the same goal
+and reward signals.
 
 To communicate with an Agent in a Unity environment from a Python program, the
-Agent must use a LearningBrain.
-Your code is expected to return
-actions for Agents with LearningBrains.
+Agent in the simulation must have `Behavior Parameters` set to communicate. You
+must set the `Behavior Type` to `Default` and give it a `Behavior Name`.
+
+__Note__: The `Behavior Name` corresponds to the Agent Group name on Python.
 
 _Notice: Currently communication between Unity and Python takes place over an
 open socket without authentication. As such, please make sure that the network
 where training takes place is secure. This will be addressed in a future
 release._
 
-### Loading a Unity Environment
+## Loading a Unity Environment
 
 Python-side communication happens through `UnityEnvironment` which is located in
 `ml-agents/mlagents/envs`. To load a Unity environment from a built binary
@@ -51,7 +55,7 @@ of your Unity environment is 3DBall.app, in python, run:
 
 ```python
 from mlagents.envs.environment import UnityEnvironment
-env = UnityEnvironment(file_name="3DBall", worker_id=0, seed=1)
+env = UnityEnvironment(file_name="3DBall", base_port=5005, seed=1, side_channels=[])
 ```
 
 - `file_name` is the name of the environment binary (located in the root
@@ -62,6 +66,9 @@ env = UnityEnvironment(file_name="3DBall", worker_id=0, seed=1)
   training process. In environments which do not involve physics calculations,
   setting the seed enables reproducible experimentation by ensuring that the
   environment and trainers utilize the same random seed.
+- `side_channels` provides a way to exchange data with the Unity simulation that
+  is not related to the reinforcement learning loop. For example: configurations
+  or properties. More on them in the [Modifying the environment from Python](Python-API.md#modifying-the-environment-from-python) section.
 
 If you want to directly interact with the Editor, you need to use
 `file_name=None`, then press the :arrow_forward: button in the Editor when the
@@ -70,59 +77,125 @@ displayed on the screen
 
 ### Interacting with a Unity Environment
 
-A BrainInfo object contains the following fields:
-
-- **`visual_observations`** : A list of 4 dimensional numpy arrays. Matrix n of
-  the list corresponds to the n<sup>th</sup> observation of the Brain.
-- **`vector_observations`** : A two dimensional numpy array of dimension `(batch
-  size, vector observation size)`.
-- **`rewards`** : A list as long as the number of Agents using the Brain
-  containing the rewards they each obtained at the previous step.
-- **`local_done`** : A list as long as the number of Agents using the Brain
-  containing  `done` flags (whether or not the Agent is done).
-- **`max_reached`** : A list as long as the number of Agents using the Brain
-  containing true if the Agents reached their max steps.
-- **`agents`** : A list of the unique ids of the Agents using the Brain.
-
-Once loaded, you can use your UnityEnvironment object, which referenced by a
-variable named `env` in this example, can be used in the following way:
-
-- **Print : `print(str(env))`**
-  Prints all parameters relevant to the loaded environment and the
-  Brains.
-- **Reset : `env.reset()`**
-  Send a reset signal to the environment, and provides a dictionary mapping
-  Brain names to BrainInfo objects.
-- **Step : `env.step(action)`**
-  Sends a step signal to the environment using the actions. For each Brain :
-  - `action` can be one dimensional arrays or two dimensional arrays if you have
-    multiple Agents per Brain.
-
-    Returns a dictionary mapping Brain names to BrainInfo objects.
-
-    For example, to access the BrainInfo belonging to a Brain called
-    'brain_name', and the BrainInfo field 'vector_observations':
-
-    ```python
-    info = env.step()
-    brainInfo = info['brain_name']
-    observations = brainInfo.vector_observations
-    ```
-
-    Note that if you have more than one LearningBrain in the scene, you
-    must provide dictionaries from Brain names to arrays for `action`, `memory`
-    and `value`. For example: If you have two Learning Brains named `brain1` and
-    `brain2` each with one Agent taking two continuous actions, then you can
-    have:
-
-    ```python
-    action = {'brain1':[1.0, 2.0], 'brain2':[3.0,4.0]}
-    ```
-
-    Returns a dictionary mapping Brain names to BrainInfo objects.
-- **Close : `env.close()`**
-  Sends a shutdown signal to the environment and closes the communication
-  socket.
+#### The BaseEnv interface
+
+A `BaseEnv` has the following methods:
+
+ - **Reset : `env.reset()`** Sends a signal to reset the environment. Returns None.
+ - **Step : `env.step()`** Sends a signal to step the environment. Returns None.
+   Note that a "step" for Python does not correspond to either Unity `Update` nor
+   `FixedUpdate`. When `step()` or `reset()` is called, the Unity simulation will
+   move forward until an Agent in the simulation needs a input from Python to act.
+ - **Close : `env.close()`** Sends a shutdown signal to the environment and terminates
+   the communication.
+ - **Get Agent Group Names : `env.get_agent_groups()`** Returns a list of agent group ids.
+   Note that the number of groups can change over time in the simulation if new
+   agent groups are created in the simulation.
+ - **Get Agent Group Spec : `env.get_agent_group_spec(agent_group: str)`** Returns
+   the `AgentGroupSpec` corresponding to the agent_group given as input. An
+   `AgentGroupSpec` contains information such as the observation shapes, the action
+   type (multi-discrete or continuous) and the action shape. Note that the `AgentGroupSpec`
+   for a specific group is fixed throughout the simulation.
+ - **Get Batched Step Result for Agent Group : `env.get_step_result(agent_group: str)`**
+   Returns a `BatchedStepResult` corresponding to the agent_group given as input.
+   A `BatchedStepResult` contains information about the state of the agents in a group
+   such as the observations, the rewards, the done flags and the agent identifiers. The
+   data is in `np.array` of which the first dimension is always the number of agents which
+   requested a decision in the simulation since the last call to `env.step()` note that the
+   number of agents is not guaranteed to remain constant during the simulation.
+ - **Set Actions for Agent Group :`env.set_actions(agent_group: str, action: np.array)`**
+   Sets the actions for a whole agent group. `action` is a 2D `np.array` of `dtype=np.int32`
+   in the discrete action case and `dtype=np.float32` in the continuous action case.
+   The first dimension of `action` is the number of agents that requested a decision
+   since the last call to `env.step()`. The second dimension is the number of discrete actions
+   in multi-discrete action type and the number of actions in continuous action type.
+ - **Set Action for Agent : `env.set_action_for_agent(agent_group: str, agent_id: int, action: np.array)`**
+   Sets the action for a specific Agent in an agent group. `agent_group` is the name of the
+   group the Agent belongs to and `agent_id` is the integer identifier of the Agent. Action
+   is a 1D array of type `dtype=np.int32` and size equal to the number of discrete actions
+   in multi-discrete action type and of type `dtype=np.float32` and size equal to the number
+   of actions in continuous action type.
+
+
+__Note:__ If no action is provided for an agent group between two calls to `env.step()` then
+the default action will be all zeros (in either discrete or continuous action space)
+#### BathedStepResult and StepResult
+
+A `BatchedStepResult` has the following fields :
+
+ - `obs` is a list of numpy arrays observations collected by the group of
+ agent. The first dimension of the array corresponds to the batch size of
+ the group (number of agents requesting a decision since the last call to
+ `env.step()`).
+ - `reward` is a float vector of length batch size. Corresponds to the
+ rewards collected by each agent since the last simulation step.
+ - `done` is an array of booleans of length batch size. Is true if the
+ associated Agent was terminated during the last simulation step.
+ - `max_step` is an array of booleans of length batch size. Is true if the
+ associated Agent reached its maximum number of steps during the last
+ simulation step.
+ - `agent_id` is an int vector of length batch size containing unique
+ identifier for the corresponding Agent. This is used to track Agents
+ across simulation steps.
+ - `action_mask` is an optional list of two dimensional array of booleans.
+ Only available in multi-discrete action space type.
+ Each array corresponds to an action branch. The first dimension of each
+ array is the batch size and the second contains a mask for each action of
+ the branch. If true, the action is not available for the agent during
+ this simulation step.
+
+It also has the two following methods:
+
+ - `n_agents()` Returns the number of agents requesting a decision since
+ the last call to `env.step()`
+ - `get_agent_step_result(agent_id: int)` Returns a `StepResult`
+ for the Agent with the `agent_id` unique identifier.
+
+A `StepResult` has the following fields:
+
+ - `obs` is a list of numpy arrays observations collected by the group of
+ agent. (Each array has one less dimension than the arrays in `BatchedStepResult`)
+ - `reward` is a float. Corresponds to the rewards collected by the agent
+ since the last simulation step.
+ - `done` is a bool. Is true if the Agent was terminated during the last
+ simulation step.
+ - `max_step` is a bool. Is true if the Agent reached its maximum number of
+ steps during the last simulation step.
+ - `agent_id` is an int and an unique identifier for the corresponding Agent.
+ - `action_mask` is an optional list of one dimensional array of booleans.
+ Only available in multi-discrete action space type.
+ Each array corresponds to an action branch. Each array contains a mask
+ for each action of the branch. If true, the action is not available for
+ the agent during this simulation step.
+
+#### AgentGroupSpec
+
+An Agent group can either have discrete or continuous actions. To check which type
+it is, use `spec.is_action_discrete()` or `spec.is_action_continuous()` to see
+which one it is. If discrete, the action tensors are expected to be `np.int32`. If
+continuous, the actions are expected to be `np.float32`.
+
+An `AgentGroupSpec` has the following fields :
+
+ - `observation_shapes` is a List of Tuples of int : Each Tuple corresponds
+ to an observation's dimensions (without the number of agents dimension).
+ The shape tuples have the same ordering as the ordering of the
+ BatchedStepResult and StepResult.
+ - `action_type` is the type of data of the action. it can be discrete or
+ continuous. If discrete, the action tensors are expected to be `np.int32`. If
+ continuous, the actions are expected to be `np.float32`.
+ - `action_size` is an `int` corresponding to the expected dimension of the action
+ array.
+   - In continuous action space it is the number of floats that constitute the action.
+   - In discrete action space (same as multi-discrete) it corresponds to the
+   number of branches (the number of independent actions)
+ - `discrete_action_branches` is a Tuple of int only for discrete action space. Each int
+ corresponds to the number of different options for each branch of the action.
+ For example : In a game direction input (no movement, left, right) and jump input
+ (no jump, jump) there will be two branches (direction and jump), the first one with 3
+ options and the second with 2 options. (`action_size = 2` and
+ `discrete_action_branches = (3,2,)`)
+
 
 ### Modifying the environment from Python
 The Environment can be modified by using side channels to send data to the
@@ -194,8 +267,3 @@ var academy = FindObjectOfType<Academy>();
 var sharedProperties = academy.FloatProperties;
 float property1 = sharedProperties.GetPropertyWithDefault("parameter_1", 0.0f);
 ```
-
-## mlagents-learn
-
-For more detailed documentation on using `mlagents-learn`, check out
-[Training ML-Agents](Training-ML-Agents.md)

From d8b52c2851b8cf9d938bc9e8e3210fc99c642c1e Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Mon, 9 Dec 2019 11:47:32 -0800
Subject: [PATCH 29/30] adding a period

---
 docs/Python-API.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Python-API.md b/docs/Python-API.md
index ad19916764..279d7d0db3 100644
--- a/docs/Python-API.md
+++ b/docs/Python-API.md
@@ -7,7 +7,7 @@ Unity Environments using our implementations of reinforcement learning or
 imitation learning.
 
 You can use the Python Low Level API to interact directly with your learning
-environment, and use it to develop new learning algorithms
+environment, and use it to develop new learning algorithms.
 
 ## mlagents.envs
 

From 81acfaf1baa93ffd477b1b25e9e668c705a1a56a Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Mon, 9 Dec 2019 11:55:02 -0800
Subject: [PATCH 30/30] removing change log

---
 ml-agents-envs/mlagents/envs/base_env.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index 05575f9802..f6c678f80d 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -15,13 +15,6 @@
 one agent in the simulation sends its observations to Python again. Since
 Agents can request decisions at different frequencies, a simulation step does
 not necessarily correspond to a fixed simulation time increment.
-Changes from ML-Agents v0.11 :
- - Step now takes no arguments and returns None.
- - The data received from a step is no longer a dictionary of NamedTuple, the
-state data must now be retrieved manually from the environment object.
- - Reset does no longer take any input arguments.
- - Modifying properties of the environment besides actions is handled in
-SideChannels.
 """
 
 from abc import ABC, abstractmethod