Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop remove academy done #2519

Merged
merged 18 commits into from
Sep 17, 2019
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
244 changes: 18 additions & 226 deletions UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ public void TestPerception2D()
var rayPer2D = go.AddComponent<RayPerception2D>();
var result = rayPer2D.Perceive(1f, angles,
tags);
Debug.Log(result.Count);
Assert.IsTrue(result.Count == angles.Length * (tags.Length + 2));
}
}
Expand Down
76 changes: 14 additions & 62 deletions UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ public class EnvironmentConfiguration
[Tooltip("Height of the environment window in pixels.")]
public int height;

[Tooltip("Rendering quality of environment. (Higher is better quality.)")][Range(0, 5)]
[Tooltip("Rendering quality of environment. (Higher is better quality.)")]
[Range(0, 5)]
public int qualityLevel;

[Tooltip("Speed at which environment is run. (Higher is faster.)")][Range(1f, 100f)]
[Tooltip("Speed at which environment is run. (Higher is faster.)")]
[Range(1f, 100f)]
public float timeScale;

[Tooltip("Frames per second (FPS) engine attempts to maintain.")]
Expand Down Expand Up @@ -113,14 +115,6 @@ public abstract class Academy : MonoBehaviour

[FormerlySerializedAs("maxSteps")]
[SerializeField]
[Tooltip("Total number of steps per global episode.\nNon-positive " +
"values correspond to episodes without a maximum number of \n" +
"steps. Once the step counter reaches this maximum value, the " +
"environment will reset.")]
int m_MaxSteps;

[FormerlySerializedAs("trainingConfiguration")]
[SerializeField]
[Tooltip("The engine-level settings which correspond to rendering " +
"quality and engine speed during Training.")]
EnvironmentConfiguration m_TrainingConfiguration =
Expand Down Expand Up @@ -167,15 +161,6 @@ public abstract class Academy : MonoBehaviour
/// external Brain during reset via <see cref="SetIsInference"/>.
bool m_IsInference = true;

/// The done flag of the academy. When set to true, the academy will
/// call <see cref="AcademyReset"/> instead of <see cref="AcademyStep"/>
/// at step time. If true, all agents done flags will be set to true.
bool m_Done;

/// Whether the academy has reached the maximum number of steps for the
/// current episode.
bool m_MaxStepReached;

/// The number of episodes completed by the environment. Incremented
/// each time the environment is reset.
int m_EpisodeCount;
Expand Down Expand Up @@ -224,7 +209,7 @@ public abstract class Academy : MonoBehaviour
// Academy's maxStepReached, done and stepCount values. The agents rely
// on this event to update their own values of max step reached and done
// in addition to aligning on the step count of the global episode.
public event System.Action<bool, bool, int> AgentSetStatus;
public event System.Action<int> AgentSetStatus;

// Signals to all the agents at each environment step so they can reset
// if their flag has been set to done (assuming the agent has requested a
Expand Down Expand Up @@ -364,13 +349,14 @@ private void InitializeEnvironment()
// in inference mode.
m_IsInference = !m_IsCommunicatorOn;

BrainDecideAction += () => {};
DestroyAction += () => {};
AgentSetStatus += (m, d, i) => {};
AgentResetIfDone += () => {};
AgentSendState += () => {};
AgentAct += () => {};
AgentForceReset += () => {};
BrainDecideAction += () => { };
DestroyAction += () => { };
AgentSetStatus += (i) => { };
AgentResetIfDone += () => { };
AgentSendState += () => { };
AgentAct += () => { };
AgentForceReset += () => { };


// Configure the environment using the configurations provided by
// the developer in the Editor.
Expand Down Expand Up @@ -526,25 +512,6 @@ public int GetTotalStepCount()
return m_TotalStepCount;
}

/// <summary>
/// Sets the done flag to true.
/// </summary>
public void Done()
{
m_Done = true;
}

/// <summary>
/// Returns whether or not the academy is done.
/// </summary>
/// <returns>
/// <c>true</c>, if academy is done, <c>false</c> otherwise.
/// </returns>
public bool IsDone()
{
return m_Done;
}

/// <summary>
/// Returns whether or not the communicator is on.
/// </summary>
Expand Down Expand Up @@ -610,20 +577,7 @@ void EnvironmentStep()
ForcedFullReset();
}

if ((m_StepCount >= m_MaxSteps) && m_MaxSteps > 0)
{
m_MaxStepReached = true;
Done();
}

AgentSetStatus(m_MaxStepReached, m_Done, m_StepCount);

m_BrainBatcher.RegisterAcademyDoneFlag(m_Done);

if (m_Done)
{
EnvironmentReset();
}
AgentSetStatus(m_StepCount);

AgentResetIfDone();

Expand All @@ -646,8 +600,6 @@ void EnvironmentReset()
{
m_StepCount = 0;
m_EpisodeCount++;
m_Done = false;
m_MaxStepReached = false;
AcademyReset();
}

Expand Down
43 changes: 17 additions & 26 deletions UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ public AgentInfoProto ToProto()
{
var agentInfoProto = new AgentInfoProto
{
StackedVectorObservation = {stackedVectorObservation},
StoredVectorActions = {storedVectorActions},
StackedVectorObservation = { stackedVectorObservation },
StoredVectorActions = { storedVectorActions },
StoredTextActions = storedTextActions,
TextObservation = textObservation,
Reward = reward,
Expand Down Expand Up @@ -386,7 +386,7 @@ void OnDisable()
academy.AgentResetIfDone -= ResetIfDone;
academy.AgentSendState -= SendInfo;
academy.AgentAct -= AgentStep;
academy.AgentForceReset -= _AgentReset;
academy.AgentForceReset -= ForceReset;
}
}

Expand Down Expand Up @@ -890,6 +890,17 @@ public virtual void AgentReset()
{
}

/// <summary>
/// This method will forcefully reset the agent and will also reset the hasAlreadyReset flag.
/// This way, even if the agent was already in the process of reseting, it will be reset again
/// and will not send a Done flag at the next step.
/// </summary>
void ForceReset()
{
m_HasAlreadyReset = false;
_AgentReset();
}

/// <summary>
/// An internal reset method that updates internal data structures in
/// addition to calling <see cref="AgentReset"/>.
Expand Down Expand Up @@ -975,33 +986,13 @@ protected float ScaleAction(float rawAction, float min, float max)
}

/// <summary>
/// Sets the status of the agent.
/// Sets the status of the agent. Will request decisions or actions according
/// to the Academy's stepcount.
/// </summary>
/// <param name="academyMaxStep">If set to <c>true</c>
/// The agent must set maxStepReached.</param>
/// <param name="academyDone">If set to <c>true</c>
/// The agent must set done.</param>
/// <param name="academyStepCounter">Number of current steps in episode</param>
void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
void SetStatus(int academyStepCounter)
{
if (academyDone)
{
academyStepCounter = 0;
}

MakeRequests(academyStepCounter);
if (academyMaxStep)
{
m_MaxStepReached = true;
}

// If the Academy needs to reset, the agent should reset
// even if it reset recently.
if (academyDone)
{
Done();
m_HasAlreadyReset = false;
}
}

/// Signals the agent that it must reset if its done flag is set to true.
Expand Down
19 changes: 2 additions & 17 deletions UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ public class Batcher
CommunicatorObjects.UnityRLOutput m_CurrentUnityRlOutput =
new CommunicatorObjects.UnityRLOutput();

/// Keeps track of the done flag of the Academy
bool m_AcademyDone;

/// Keeps track of last CommandProto sent by External
CommunicatorObjects.CommandProto m_Command;

Expand Down Expand Up @@ -99,17 +96,6 @@ public Batcher(ICommunicator communicator)
return initializationInput.RlInitializationInput;
}

/// <summary>
/// Registers the done flag of the academy to the next output to be sent
/// to the communicator.
/// </summary>
/// <param name="done">If set to <c>true</c>
/// The academy done state will be sent to External at the next Exchange.</param>
public void RegisterAcademyDoneFlag(bool done)
{
m_AcademyDone = done;
}

/// <summary>
/// Gets the command. Is used by the academy to get reset or quit signals.
/// </summary>
Expand Down Expand Up @@ -209,9 +195,8 @@ public void SubscribeBrain(string brainKey)
// must be sent
if (m_HasQueried.Values.All(x => x))
{
if (m_HasData.Values.Any(x => x) || m_AcademyDone)
if (m_HasData.Values.Any(x => x))
{
m_CurrentUnityRlOutput.GlobalDone = m_AcademyDone;
SendBatchedMessageHelper();
}

Expand All @@ -226,7 +211,7 @@ public void SubscribeBrain(string brainKey)
}

/// <summary>
/// Helper method that sends the curent UnityRLOutput, receives the next UnityInput and
/// Helper method that sends the current UnityRLOutput, receives the next UnityInput and
/// Applies the appropriate AgentAction to the agents.
/// </summary>
void SendBatchedMessageHelper()
Expand Down
5 changes: 1 addition & 4 deletions docs/Learning-Environment-Design-Academy.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ in your Academy subclass.
Implement an `AcademyReset()` function to alter the environment at the start of
each episode. For example, you might want to reset an Agent to its starting
position or move a goal to a random position. An environment resets when the
Academy `Max Steps` count is reached.
`reset()` method is called on the Python `UnityEnvironment`.

When you reset an environment, consider the factors that should change so that
training is generalizable to different conditions. For example, if you were
Expand All @@ -54,9 +54,6 @@ logic for creating them in the `AcademyStep()` function.
process. Any Brain added to the Broadcast Hub will be visible from the external
process. In addition, if the checkbox `Control` is checked, the Brain will be
controllable from the external process and will thus be trainable.
* `Max Steps` - Total number of steps per-episode. `0` corresponds to episodes
without a maximum number of steps. Once the step counter reaches maximum, the
environment will reset.
* `Configuration` - The engine-level settings which correspond to rendering
quality and engine speed.
* `Width` - Width of the environment window in pixels.
Expand Down
24 changes: 4 additions & 20 deletions docs/Learning-Environment-Design.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@ Step-by-step procedures for running the training process are provided in the

Training and simulation proceed in steps orchestrated by the ML-Agents Academy
class. The Academy works with Agent objects in the scene to step
through the simulation. When either the Academy has reached its maximum number
of steps or all Agents in the scene are _done_, one training episode is
finished.
through the simulation. When all Agents in the scene are _done_,
one training episode is finished.

During training, the external Python training process communicates with the
Academy to run a series of episodes while it collects data and optimizes its
Expand All @@ -54,8 +53,6 @@ The ML-Agents Academy class orchestrates the agent simulation loop as follows:
Step` count or has otherwise marked itself as `done`. Optionally, you can set
an Agent to restart if it finishes before the end of an episode. In this
case, the Academy calls the `AgentReset()` function.
8. When the Academy reaches its own `Max Step` count, it starts the next episode
again by calling your Academy subclass's `AcademyReset()` function.

To create a training environment, extend the Academy and Agent classes to
implement the above methods. The `Agent.CollectObservations()` and
Expand Down Expand Up @@ -101,12 +98,6 @@ following methods (all are optional):
Agents have already collected their observations and chosen an action before
the Academy invokes this method.

The base Academy classes also defines several important properties that you can
set in the Unity Editor Inspector. For training, the most important of these
properties is `Max Steps`, which determines how long each training episode
lasts. Once the Academy's step counter reaches this value, it calls the
`AcademyReset()` function to start the next episode.

See [Academy](Learning-Environment-Design-Academy.md) for a complete list of
the Academy properties and their uses.

Expand Down Expand Up @@ -160,8 +151,7 @@ You must also determine how an Agent finishes its task or times out. You can
manually set an Agent to done in your `AgentAction()` function when the Agent
has finished (or irrevocably failed) its task by calling the `Done()` function.
You can also set the Agent's `Max Steps` property to a positive value and the
Agent will consider itself done after it has taken that many steps. When the
Academy reaches its own `Max Steps` count, it starts the next episode. If you
Agent will consider itself done after it has taken that many steps. If you
set an Agent's `ResetOnDone` property to true, then the Agent can attempt its
task several times in one episode. (Use the `Agent.AgentReset()` function to
prepare the Agent to start again.)
Expand All @@ -185,13 +175,7 @@ to control the agent decision making process. The Academy defines several
properties that can be set differently for a training scene versus a regular
scene. The Academy's **Configuration** properties control rendering and time
scale. You can set the **Training Configuration** to minimize the time Unity
spends rendering graphics in order to speed up training. You may need to adjust
the other functional, Academy settings as well. For example, `Max Steps` should
be as short as possible for training — just long enough for the agent to
accomplish its task, with some extra time for "wandering" while it learns. In
regular scenes, you often do not want the Academy to reset the scene at all; if
so, `Max Steps` should be set to zero.

spends rendering graphics in order to speed up training.
When you create a training environment in Unity, you must set up the scene so
that it can be controlled by the external training process. Considerations
include:
Expand Down
1 change: 1 addition & 0 deletions docs/Migrating.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#### Steps to Migrate
* `UnitySDK/Assets/ML-Agents/Scripts/Communicator.cs` and its class `Communicator` have been renamed to `UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs` and `ICommunicator` respectively.
* The `SpaceType` Enums `discrete`, and `continuous` have been renamed to `Discrete` and `Continuous`.
* We have removed the `Done` call as well as the capacity to set `Max Steps` on the Academy. Therefore, it is now impossible to call an AcademyReset from C# (only possible from Python). If you want to reset the simulation after a fixed number of steps, or when an event in the simulation occurs, we recommend looking at our multi-agent example environments (such as BananaCollector). In our examples, groups of Agents can be reset through an "Area" that can reset groups of Agents.
vincentpierre marked this conversation as resolved.
Show resolved Hide resolved


## Migrating from ML-Agents toolkit v0.8 to v0.9
Expand Down
4 changes: 1 addition & 3 deletions docs/Training-Curriculum-Learning.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,7 @@ the BigWallBrain in the Wall Jump environment.
Once our curriculum is defined, we have to use the reset parameters we defined
and modify the environment from the Agent's `AgentReset()` function. See
[WallJumpAgent.cs](https://github.com/Unity-Technologies/ml-agents/blob/master/UnitySDK/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs)
for an example. Note that if the Academy's __Max Steps__ is not set to some
positive number the environment will never be reset. The Academy must reset
for the environment to reset.
for an example.

We will save this file into our metacurriculum folder with the name of its
corresponding Brain. For example, in the Wall Jump environment, there are two
Expand Down
5 changes: 0 additions & 5 deletions ml-agents-envs/mlagents/envs/base_unity_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@ def reset(
) -> AllBrainInfo:
pass

@property
@abstractmethod
def global_done(self):
pass

@property
@abstractmethod
def external_brains(self) -> Dict[str, BrainParameters]:
Expand Down
Loading