-
Notifications
You must be signed in to change notification settings - Fork 4.4k
More misc hybrid action followup #4777
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,9 +19,9 @@ namespace Unity.MLAgents | |
| internal struct AgentInfo | ||
| { | ||
| /// <summary> | ||
| /// Keeps track of the last vector action taken by the Brain. | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ... and my war on "vector action" |
||
| /// Keeps track of the last actions taken by the Brain. | ||
| /// </summary> | ||
| public ActionBuffers storedVectorActions; | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. public field in an internal struct, so this is OK to rename. |
||
| public ActionBuffers storedActions; | ||
|
|
||
| /// <summary> | ||
| /// For discrete control, specifies the actions that the agent cannot take. | ||
|
|
@@ -52,17 +52,17 @@ internal struct AgentInfo | |
|
|
||
| public void ClearActions() | ||
| { | ||
| storedVectorActions.Clear(); | ||
| storedActions.Clear(); | ||
| } | ||
|
|
||
| public void CopyActions(ActionBuffers actionBuffers) | ||
| { | ||
| var continuousActions = storedVectorActions.ContinuousActions; | ||
| var continuousActions = storedActions.ContinuousActions; | ||
| for (var i = 0; i < actionBuffers.ContinuousActions.Length; i++) | ||
| { | ||
| continuousActions[i] = actionBuffers.ContinuousActions[i]; | ||
| } | ||
| var discreteActions = storedVectorActions.DiscreteActions; | ||
| var discreteActions = storedActions.DiscreteActions; | ||
| for (var i = 0; i < actionBuffers.DiscreteActions.Length; i++) | ||
| { | ||
| discreteActions[i] = actionBuffers.DiscreteActions[i]; | ||
|
|
@@ -438,7 +438,7 @@ public void LazyInitialize() | |
| InitializeSensors(); | ||
| } | ||
|
|
||
| m_Info.storedVectorActions = new ActionBuffers( | ||
| m_Info.storedActions = new ActionBuffers( | ||
| new float[m_ActuatorManager.NumContinuousActions], | ||
| new int[m_ActuatorManager.NumDiscreteActions] | ||
| ); | ||
|
|
@@ -557,7 +557,7 @@ void NotifyAgentDone(DoneReason doneReason) | |
| m_CumulativeReward = 0f; | ||
| m_RequestAction = false; | ||
| m_RequestDecision = false; | ||
| m_Info.storedVectorActions.Clear(); | ||
| m_Info.storedActions.Clear(); | ||
| } | ||
|
|
||
| /// <summary> | ||
|
|
@@ -886,12 +886,22 @@ public virtual void Initialize() { } | |
| /// <seealso cref="IActionReceiver.OnActionReceived"/> | ||
| public virtual void Heuristic(in ActionBuffers actionsOut) | ||
| { | ||
| var brainParams = m_PolicyFactory.BrainParameters; | ||
| var actionSpec = brainParams.ActionSpec; | ||
| // For continuous and discrete actions together, we don't need to fall back to the legacy method | ||
| if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better to avoid any confusion here - don't call the float[] version if we're using continuous + discrete (plus avoid munging the arrays) |
||
| { | ||
| Debug.LogWarning("Heuristic method called but not implemented. Clearing ActionBuffers."); | ||
| actionsOut.Clear(); | ||
| return; | ||
| } | ||
|
|
||
| // Disable deprecation warnings so we can call the legacy overload. | ||
| #pragma warning disable CS0618 | ||
|
|
||
| // The default implementation of Heuristic calls the | ||
| // obsolete version for backward compatibility | ||
| switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType) | ||
| switch (brainParams.VectorActionSpaceType) | ||
| { | ||
| case SpaceType.Continuous: | ||
| Heuristic(actionsOut.ContinuousActions.Array); | ||
|
|
@@ -1038,7 +1048,7 @@ void SendInfoToBrain() | |
| CollectObservations(collectObservationsSensor); | ||
| } | ||
| } | ||
| using (TimerStack.Instance.Scoped("CollectDiscreteActionMasks")) | ||
| using (TimerStack.Instance.Scoped("WriteActionMask")) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Old name |
||
| { | ||
| m_ActuatorManager.WriteActionMask(); | ||
| } | ||
|
|
@@ -1135,7 +1145,7 @@ public ReadOnlyCollection<float> GetObservations() | |
| } | ||
|
|
||
| /// <summary> | ||
| /// Implement `CollectDiscreteActionMasks()` to collects the masks for discrete | ||
| /// Implement `WriteDiscreteActionMask()` to collects the masks for discrete | ||
| /// actions. When using discrete actions, the agent will not perform the masked | ||
| /// action. | ||
| /// </summary> | ||
|
|
@@ -1144,7 +1154,7 @@ public ReadOnlyCollection<float> GetObservations() | |
| /// </param> | ||
| /// <remarks> | ||
| /// When using Discrete Control, you can prevent the Agent from using a certain | ||
| /// action by masking it with <see cref="DiscreteActionMasker.SetMask(int, IEnumerable{int})"/>. | ||
| /// action by masking it with <see cref="IDiscreteActionMask.WriteMask(int, IEnumerable{int})"/>. | ||
| /// | ||
| /// See [Agents - Actions] for more information on masking actions. | ||
| /// | ||
|
|
@@ -1168,30 +1178,29 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask) | |
| /// on the provided action. | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// An action is passed to this function in the form of an array vector. Your | ||
| /// implementation must use the array to direct the agent's behavior for the | ||
| /// An action is passed to this function in the form of an <seealso cref="ActionBuffers"/>. | ||
| /// Your implementation must use the array to direct the agent's behavior for the | ||
| /// current step. | ||
| /// | ||
| /// You decide how many elements you need in the action array to control your | ||
| /// You decide how many elements you need in the ActionBuffers to control your | ||
| /// agent and what each element means. For example, if you want to apply a | ||
| /// force to move an agent around the environment, you can arbitrarily pick | ||
| /// three values in the action array to use as the force components. During | ||
| /// training, the agent's policy learns to set those particular elements of | ||
| /// three values in ActionBuffers.ContinuousActions array to use as the force components. | ||
| /// During training, the agent's policy learns to set those particular elements of | ||
| /// the array to maximize the training rewards the agent receives. (Of course, | ||
| /// if you implement a <seealso cref="Heuristic(in ActionBuffers)"/> function, it must use the same | ||
| /// elements of the action array for the same purpose since there is no learning | ||
| /// involved.) | ||
| /// | ||
| /// Actions for an agent can be either *Continuous* or *Discrete*. Specify which | ||
| /// type of action space an agent uses, along with the size of the action array, | ||
| /// in the <see cref="BrainParameters"/> of the agent's associated | ||
| /// An Agent can use continuous and/or discrete actions. Configure this along with the size | ||
| /// of the action array, in the <see cref="BrainParameters"/> of the agent's associated | ||
| /// <see cref="BehaviorParameters"/> component. | ||
| /// | ||
| /// When an agent uses the continuous action space, the values in the action | ||
| /// When an agent uses continuous actions, the values in the ActionBuffers.ContinuousActions | ||
| /// array are floating point numbers. You should clamp the values to the range, | ||
| /// -1..1, to increase numerical stability during training. | ||
| /// | ||
| /// When an agent uses the discrete action space, the values in the action array | ||
| /// When an agent uses discrete actions, the values in the ActionBuffers.DiscreteActions array | ||
| /// are integers that each represent a specific, discrete action. For example, | ||
| /// you could define a set of discrete actions such as: | ||
| /// | ||
|
|
@@ -1204,24 +1213,23 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask) | |
| /// </code> | ||
| /// | ||
| /// When making a decision, the agent picks one of the five actions and puts the | ||
| /// corresponding integer value in the action vector. For example, if the agent | ||
| /// decided to move left, the action vector parameter would contain an array with | ||
| /// corresponding integer value in the ActionBuffers.DiscreteActions array. For example, if the agent | ||
| /// decided to move left, the ActionBuffers.DiscreteActions parameter would be an array with | ||
| /// a single element with the value 1. | ||
| /// | ||
| /// You can define multiple sets, or branches, of discrete actions to allow an | ||
| /// agent to perform simultaneous, independent actions. For example, you could | ||
| /// use one branch for movement and another branch for throwing a ball left, right, | ||
| /// up, or down, to allow the agent to do both in the same step. | ||
| /// | ||
| /// The action vector of a discrete action space contains one element for each | ||
| /// branch. The value of each element is the integer representing the chosen | ||
| /// action for that branch. The agent always chooses one action for each | ||
| /// branch. | ||
| /// The ActionBuffers.DiscreteActions array of an agent with discrete actions contains one | ||
| /// element for each branch. The value of each element is the integer representing the | ||
| /// chosen action for that branch. The agent always chooses one action for each branch. | ||
| /// | ||
| /// When you use the discrete action space, you can prevent the training process | ||
| /// When you use the discrete actions, you can prevent the training process | ||
| /// or the neural network model from choosing specific actions in a step by | ||
| /// implementing the <see cref="CollectDiscreteActionMasks(DiscreteActionMasker)"/> | ||
| /// function. For example, if your agent is next to a wall, you could mask out any | ||
| /// implementing the <see cref="WriteDiscreteActionMask(IDiscreteActionMask)"/> | ||
| /// method. For example, if your agent is next to a wall, you could mask out any | ||
| /// actions that would result in the agent trying to move into the wall. | ||
| /// | ||
| /// For more information about implementing agent actions see [Agents - Actions]. | ||
|
|
@@ -1233,6 +1241,14 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask) | |
| /// </param> | ||
| public virtual void OnActionReceived(ActionBuffers actions) | ||
| { | ||
| var actionSpec = m_PolicyFactory.BrainParameters.ActionSpec; | ||
| // For continuous and discrete actions together, we don't need to fall back to the legacy method | ||
| if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0) | ||
| { | ||
| // Nothing implemented. | ||
| return; | ||
| } | ||
|
|
||
| if (!actions.ContinuousActions.IsEmpty()) | ||
| { | ||
| m_LegacyActionCache = actions.ContinuousActions.Array; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,7 +42,14 @@ public virtual void OnActionReceived(float[] vectorAction) { } | |
| [Obsolete("GetAction has been deprecated, please use GetStoredActionBuffers instead.")] | ||
| public float[] GetAction() | ||
| { | ||
| var storedAction = m_Info.storedVectorActions; | ||
| var actionSpec = m_PolicyFactory.BrainParameters.ActionSpec; | ||
| // For continuous and discrete actions together, this shouldn't be called because we can only return one. | ||
| if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0) | ||
| { | ||
| Debug.LogWarning("Agent.GetAction() when both continuous and discrete actions are in use. Use Agent.GetStoredActionBuffers() instead."); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if we should throw or log here. |
||
| } | ||
|
|
||
| var storedAction = m_Info.storedActions; | ||
| if (!storedAction.ContinuousActions.IsEmpty()) | ||
| { | ||
| return storedAction.ContinuousActions.Array; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Continuing my war on space.