Unity-Technologies · vincentpierre · Sep 17, 2019 · Sep 9, 2019 · Sep 9, 2019 · Sep 9, 2019
diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/RayPerceptionTests.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/RayPerceptionTests.cs
@@ -30,7 +30,6 @@ public void TestPerception2D()
             var rayPer2D = go.AddComponent<RayPerception2D>();
             var result = rayPer2D.Perceive(1f, angles,
                 tags);
-            Debug.Log(result.Count);
             Assert.IsTrue(result.Count == angles.Length * (tags.Length + 2));
         }
     }

diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs b/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
@@ -37,10 +37,12 @@ public class EnvironmentConfiguration
         [Tooltip("Height of the environment window in pixels.")]
         public int height;
 
-        [Tooltip("Rendering quality of environment. (Higher is better quality.)")][Range(0, 5)]
+        [Tooltip("Rendering quality of environment. (Higher is better quality.)")]
+        [Range(0, 5)]
         public int qualityLevel;
 
-        [Tooltip("Speed at which environment is run. (Higher is faster.)")][Range(1f, 100f)]
+        [Tooltip("Speed at which environment is run. (Higher is faster.)")]
+        [Range(1f, 100f)]
         public float timeScale;
 
         [Tooltip("Frames per second (FPS) engine attempts to maintain.")]
@@ -113,14 +115,6 @@ public abstract class Academy : MonoBehaviour
 
         [FormerlySerializedAs("maxSteps")]
         [SerializeField]
-        [Tooltip("Total number of steps per global episode.\nNon-positive " +
-            "values correspond to episodes without a maximum number of \n" +
-            "steps. Once the step counter reaches this maximum value, the " +
-            "environment will reset.")]
-        int m_MaxSteps;
-
-        [FormerlySerializedAs("trainingConfiguration")]
-        [SerializeField]
         [Tooltip("The engine-level settings which correspond to rendering " +
             "quality and engine speed during Training.")]
         EnvironmentConfiguration m_TrainingConfiguration =
@@ -167,15 +161,6 @@ public abstract class Academy : MonoBehaviour
         /// external Brain during reset via <see cref="SetIsInference"/>.
         bool m_IsInference = true;
 
-        /// The done flag of the academy. When set to true, the academy will
-        /// call <see cref="AcademyReset"/> instead of <see cref="AcademyStep"/>
-        /// at step time. If true, all agents done flags will be set to true.
-        bool m_Done;
-
-        /// Whether the academy has reached the maximum number of steps for the
-        /// current episode.
-        bool m_MaxStepReached;
-
         /// The number of episodes completed by the environment. Incremented
         /// each time the environment is reset.
         int m_EpisodeCount;
@@ -224,7 +209,7 @@ public abstract class Academy : MonoBehaviour
         // Academy's maxStepReached, done and stepCount values. The agents rely
         // on this event to update their own values of max step reached and done
         // in addition to aligning on the step count of the global episode.
-        public event System.Action<bool, bool, int> AgentSetStatus;
+        public event System.Action<int> AgentSetStatus;
 
         // Signals to all the agents at each environment step so they can reset
         // if their flag has been set to done (assuming the agent has requested a
@@ -364,13 +349,14 @@ private void InitializeEnvironment()
             // in inference mode.
             m_IsInference = !m_IsCommunicatorOn;
 
-            BrainDecideAction += () => {};
-            DestroyAction += () => {};
-            AgentSetStatus += (m, d, i) => {};
-            AgentResetIfDone += () => {};
-            AgentSendState += () => {};
-            AgentAct += () => {};
-            AgentForceReset += () => {};
+            BrainDecideAction += () => { };
+            DestroyAction += () => { };
+            AgentSetStatus += (i) => { };
+            AgentResetIfDone += () => { };
+            AgentSendState += () => { };
+            AgentAct += () => { };
+            AgentForceReset += () => { };
+
 
             // Configure the environment using the configurations provided by
             // the developer in the Editor.
@@ -526,25 +512,6 @@ public int GetTotalStepCount()
             return m_TotalStepCount;
         }
 
-        /// <summary>
-        /// Sets the done flag to true.
-        /// </summary>
-        public void Done()
-        {
-            m_Done = true;
-        }
-
-        /// <summary>
-        /// Returns whether or not the academy is done.
-        /// </summary>
-        /// <returns>
-        /// <c>true</c>, if academy is done, <c>false</c> otherwise.
-        /// </returns>
-        public bool IsDone()
-        {
-            return m_Done;
-        }
-
         /// <summary>
         /// Returns whether or not the communicator is on.
         /// </summary>
@@ -610,20 +577,7 @@ void EnvironmentStep()
                 ForcedFullReset();
             }
 
-            if ((m_StepCount >= m_MaxSteps) && m_MaxSteps > 0)
-            {
-                m_MaxStepReached = true;
-                Done();
-            }
-
-            AgentSetStatus(m_MaxStepReached, m_Done, m_StepCount);
-
-            m_BrainBatcher.RegisterAcademyDoneFlag(m_Done);
-
-            if (m_Done)
-            {
-                EnvironmentReset();
-            }
+            AgentSetStatus(m_StepCount);
 
             AgentResetIfDone();
 
@@ -646,8 +600,6 @@ void EnvironmentReset()
         {
             m_StepCount = 0;
             m_EpisodeCount++;
-            m_Done = false;
-            m_MaxStepReached = false;
             AcademyReset();
         }
 

diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
@@ -93,8 +93,8 @@ public AgentInfoProto ToProto()
         {
             var agentInfoProto = new AgentInfoProto
             {
-                StackedVectorObservation = {stackedVectorObservation},
-                StoredVectorActions = {storedVectorActions},
+                StackedVectorObservation = { stackedVectorObservation },
+                StoredVectorActions = { storedVectorActions },
                 StoredTextActions = storedTextActions,
                 TextObservation = textObservation,
                 Reward = reward,
@@ -386,7 +386,7 @@ void OnDisable()
                 academy.AgentResetIfDone -= ResetIfDone;
                 academy.AgentSendState -= SendInfo;
                 academy.AgentAct -= AgentStep;
-                academy.AgentForceReset -= _AgentReset;
+                academy.AgentForceReset -= ForceReset;
             }
         }
 
@@ -890,6 +890,17 @@ public virtual void AgentReset()
         {
         }
 
+        /// <summary>
+        /// This method will forcefully reset the agent and will also reset the hasAlreadyReset flag.
+        /// This way, even if the agent was already in the process of reseting, it will be reset again
+        /// and will not send a Done flag at the next step.
+        /// </summary>
+        void ForceReset()
+        {
+            m_HasAlreadyReset = false;
+            _AgentReset();
+        }
+
         /// <summary>
         /// An internal reset method that updates internal data structures in
         /// addition to calling <see cref="AgentReset"/>.
@@ -975,33 +986,13 @@ protected float ScaleAction(float rawAction, float min, float max)
         }
 
         /// <summary>
-        /// Sets the status of the agent.
+        /// Sets the status of the agent. Will request decisions or actions according 
+        /// to the Academy's stepcount.
         /// </summary>
-        /// <param name="academyMaxStep">If set to <c>true</c>
-        /// The agent must set maxStepReached.</param>
-        /// <param name="academyDone">If set to <c>true</c>
-        /// The agent must set done.</param>
         /// <param name="academyStepCounter">Number of current steps in episode</param>
-        void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
+        void SetStatus(int academyStepCounter)
         {
-            if (academyDone)
-            {
-                academyStepCounter = 0;
-            }
-
             MakeRequests(academyStepCounter);
-            if (academyMaxStep)
-            {
-                m_MaxStepReached = true;
-            }
-
-            // If the Academy needs to reset, the agent should reset
-            // even if it reset recently.
-            if (academyDone)
-            {
-                Done();
-                m_HasAlreadyReset = false;
-            }
         }
 
         /// Signals the agent that it must reset if its done flag is set to true.

diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs b/UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs
@@ -41,9 +41,6 @@ public class Batcher
         CommunicatorObjects.UnityRLOutput m_CurrentUnityRlOutput =
             new CommunicatorObjects.UnityRLOutput();
 
-        /// Keeps track of the done flag of the Academy
-        bool m_AcademyDone;
-
         /// Keeps track of last CommandProto sent by External
         CommunicatorObjects.CommandProto m_Command;
 
@@ -99,17 +96,6 @@ public Batcher(ICommunicator communicator)
             return initializationInput.RlInitializationInput;
         }
 
-        /// <summary>
-        /// Registers the done flag of the academy to the next output to be sent
-        /// to the communicator.
-        /// </summary>
-        /// <param name="done">If set to <c>true</c>
-        /// The academy done state will be sent to External at the next Exchange.</param>
-        public void RegisterAcademyDoneFlag(bool done)
-        {
-            m_AcademyDone = done;
-        }
-
         /// <summary>
         /// Gets the command. Is used by the academy to get reset or quit signals.
         /// </summary>
@@ -209,9 +195,8 @@ public void SubscribeBrain(string brainKey)
             // must be sent
             if (m_HasQueried.Values.All(x => x))
             {
-                if (m_HasData.Values.Any(x => x) || m_AcademyDone)
+                if (m_HasData.Values.Any(x => x))
                 {
-                    m_CurrentUnityRlOutput.GlobalDone = m_AcademyDone;
                     SendBatchedMessageHelper();
                 }
 
@@ -226,7 +211,7 @@ public void SubscribeBrain(string brainKey)
         }
 
         /// <summary>
-        /// Helper method that sends the curent UnityRLOutput, receives the next UnityInput and
+        /// Helper method that sends the current UnityRLOutput, receives the next UnityInput and
         /// Applies the appropriate AgentAction to the agents.
         /// </summary>
         void SendBatchedMessageHelper()

diff --git a/docs/Learning-Environment-Design-Academy.md b/docs/Learning-Environment-Design-Academy.md
@@ -31,7 +31,7 @@ in your Academy subclass.
 Implement an `AcademyReset()` function to alter the environment at the start of
 each episode. For example, you might want to reset an Agent to its starting
 position or move a goal to a random position. An environment resets when the
-Academy `Max Steps` count is reached.
+`reset()` method is called on the Python `UnityEnvironment`.
 
 When you reset an environment, consider the factors that should change so that
 training is generalizable to different conditions. For example, if you were
@@ -54,9 +54,6 @@ logic for creating them in the `AcademyStep()` function.
   process. Any Brain added to the Broadcast Hub will be visible from the external
   process. In addition, if the checkbox `Control` is checked, the Brain will be 
   controllable from the external process and will thus be trainable.
-* `Max Steps` - Total number of steps per-episode. `0` corresponds to episodes
-  without a maximum number of steps. Once the step counter reaches maximum, the
-  environment will reset.
 * `Configuration` - The engine-level settings which correspond to rendering
   quality and engine speed.
   * `Width` - Width of the environment window in pixels.

diff --git a/docs/Learning-Environment-Design.md b/docs/Learning-Environment-Design.md
@@ -28,9 +28,8 @@ Step-by-step procedures for running the training process are provided in the
 
 Training and simulation proceed in steps orchestrated by the ML-Agents Academy
 class. The Academy works with Agent objects in the scene to step
-through the simulation. When either the Academy has reached its maximum number
-of steps or all Agents in the scene are _done_, one training episode is
-finished.
+through the simulation. When all Agents in the scene are _done_,
+one training episode is finished.
 
 During training, the external Python training process communicates with the
 Academy to run a series of episodes while it collects data and optimizes its
@@ -54,8 +53,6 @@ The ML-Agents Academy class orchestrates the agent simulation loop as follows:
    Step` count or has otherwise marked itself as `done`. Optionally, you can set
    an Agent to restart if it finishes before the end of an episode. In this
    case, the Academy calls the `AgentReset()` function.
-8. When the Academy reaches its own `Max Step` count, it starts the next episode
-   again by calling your Academy subclass's `AcademyReset()` function.
 
 To create a training environment, extend the Academy and Agent classes to
 implement the above methods. The `Agent.CollectObservations()` and
@@ -101,12 +98,6 @@ following methods (all are optional):
   Agents have already collected their observations and chosen an action before
   the Academy invokes this method.
 
-The base Academy classes also defines several important properties that you can
-set in the Unity Editor Inspector. For training, the most important of these
-properties is `Max Steps`, which determines how long each training episode
-lasts. Once the Academy's step counter reaches this value, it calls the
-`AcademyReset()` function to start the next episode.
-
 See [Academy](Learning-Environment-Design-Academy.md) for a complete list of
 the Academy properties and their uses.
 
@@ -160,8 +151,7 @@ You must also determine how an Agent finishes its task or times out. You can
 manually set an Agent to done in your `AgentAction()` function when the Agent
 has finished (or irrevocably failed) its task by calling the `Done()` function. 
 You can also set the Agent's `Max Steps` property to a positive value and the 
-Agent will consider itself done after it has taken that many steps. When the 
-Academy reaches its own `Max Steps` count, it starts the next episode. If you 
+Agent will consider itself done after it has taken that many steps. If you 
 set an Agent's `ResetOnDone` property to true, then the Agent can attempt its 
 task several times in one episode. (Use the `Agent.AgentReset()` function to 
 prepare the Agent to start again.)
@@ -185,13 +175,7 @@ to control the agent decision making process. The Academy defines several
 properties that can be set differently for a training scene versus a regular
 scene. The Academy's **Configuration** properties control rendering and time
 scale. You can set the **Training Configuration** to minimize the time Unity
-spends rendering graphics in order to speed up training. You may need to adjust
-the other functional, Academy settings as well. For example, `Max Steps` should
-be as short as possible for training — just long enough for the agent to
-accomplish its task, with some extra time for "wandering" while it learns. In
-regular scenes, you often do not want the Academy to reset the scene at all; if
-so, `Max Steps` should be set to zero.
-
+spends rendering graphics in order to speed up training. 
 When you create a training environment in Unity, you must set up the scene so
 that it can be controlled by the external training process. Considerations
 include:

diff --git a/docs/Migrating.md b/docs/Migrating.md
@@ -8,6 +8,7 @@
 #### Steps to Migrate
 * `UnitySDK/Assets/ML-Agents/Scripts/Communicator.cs` and its class `Communicator` have been renamed to `UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs` and `ICommunicator` respectively.
 * The `SpaceType` Enums `discrete`, and `continuous` have been renamed to `Discrete` and `Continuous`.
+* We have removed the `Done` call as well as the capacity to set `Max Steps` on the Academy. Therefore, it is now impossible to call an AcademyReset from C# (only possible from Python). If you want to reset the simulation after a fixed number of steps, or when an event in the simulation occurs, we recommend looking at our multi-agent example environments (such as BananaCollector). In our examples, groups of Agents can be reset through an "Area" that can reset groups of Agents.
 
 
 ## Migrating from ML-Agents toolkit v0.8 to v0.9

diff --git a/docs/Training-Curriculum-Learning.md b/docs/Training-Curriculum-Learning.md
@@ -96,9 +96,7 @@ the BigWallBrain in the Wall Jump environment.
 Once our curriculum is defined, we have to use the reset parameters we defined
 and modify the environment from the Agent's `AgentReset()` function. See
 [WallJumpAgent.cs](https://github.com/Unity-Technologies/ml-agents/blob/master/UnitySDK/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs)
-for an example. Note that if the Academy's __Max Steps__ is not set to some
-positive number the environment will never be reset. The Academy must reset
-for the environment to reset.
+for an example.
 
 We will save this file into our metacurriculum folder with the name of its
 corresponding Brain. For example, in the Wall Jump environment, there are two

diff --git a/ml-agents-envs/mlagents/envs/base_unity_environment.py b/ml-agents-envs/mlagents/envs/base_unity_environment.py
@@ -17,11 +17,6 @@ def reset(
     ) -> AllBrainInfo:
         pass
 
-    @property
-    @abstractmethod
-    def global_done(self):
-        pass
-
     @property
     @abstractmethod
     def external_brains(self) -> Dict[str, BrainParameters]: