Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions com.unity.ml-agents/Runtime/Academy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public class Academy : IDisposable
/// </item>
/// <item>
/// <term>1.3.0</term>
/// <description>Support action spaces with both continuous and discrete actions.</description>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Continuing my war on space.

/// <description>Support both continuous and discrete actions.</description>
/// </item>
/// </list>
/// </remarks>
Expand Down Expand Up @@ -590,7 +590,7 @@ void EnvironmentReset()
/// NNModel and the InferenceDevice as provided.
/// </summary>
/// <param name="model">The NNModel the ModelRunner must use.</param>
/// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
/// <param name="actionSpec"> Description of the actions for the Agent.</param>
/// <param name="inferenceDevice">
/// The inference device (CPU or GPU) the ModelRunner will use.
/// </param>
Expand Down
13 changes: 6 additions & 7 deletions com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
namespace Unity.MLAgents.Actuators
{
/// <summary>
/// Defines the structure of an Action Space to be used by the Actuator system.
/// Defines the structure of the actions to be used by the Actuator system.
/// </summary>
[Serializable]
public struct ActionSpec
Expand All @@ -15,9 +15,9 @@ public struct ActionSpec
int m_NumContinuousActions;

/// <summary>
/// An array of branch sizes for our action space.
/// An array of branch sizes for discrete actions.
///
/// For an IActuator that uses a Discrete <see cref="SpaceType"/>, the number of
/// For an IActuator that uses discrete actions, the number of
/// branches is the Length of the Array and each index contains the branch size.
/// The cumulative sum of the total number of discrete actions can be retrieved
/// by the <see cref="SumOfDiscreteBranchSizes"/> property.
Expand All @@ -27,12 +27,12 @@ public struct ActionSpec
public int[] BranchSizes;

/// <summary>
/// The number of actions for a Continuous <see cref="SpaceType"/>.
/// The number of continuous actions that an Agent can take.
/// </summary>
public int NumContinuousActions { get { return m_NumContinuousActions; } set { m_NumContinuousActions = value; } }

/// <summary>
/// The number of branches for a Discrete <see cref="SpaceType"/>.
/// The number of branches for discrete actions that an Agent can take.
/// </summary>
public int NumDiscreteActions { get { return BranchSizes == null ? 0 : BranchSizes.Length; } }

Expand All @@ -57,12 +57,11 @@ public static ActionSpec MakeContinuous(int numActions)
/// Creates a Discrete <see cref="ActionSpec"/> with the array of branch sizes that
/// represents the action space.
/// </summary>
/// <param name="branchSizes">The array of branch sizes for the discrete action space. Each index
/// <param name="branchSizes">The array of branch sizes for the discrete actions. Each index
/// contains the number of actions available for that branch.</param>
/// <returns>An Discrete ActionSpec initialized with the array of branch sizes.</returns>
public static ActionSpec MakeDiscrete(params int[] branchSizes)
{
var numActions = branchSizes.Length;
var actuatorSpace = new ActionSpec(0, branchSizes);
return actuatorSpace;
}
Expand Down
2 changes: 1 addition & 1 deletion com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public abstract class ActuatorComponent : MonoBehaviour
public abstract IActuator CreateActuator();

/// <summary>
/// The specification of the Action space for this ActuatorComponent.
/// The specification of the possible actions for this ActuatorComponent.
/// This must produce the same results as the corresponding IActuator's ActionSpec.
/// </summary>
/// <seealso cref="ActionSpec"/>
Expand Down
5 changes: 2 additions & 3 deletions com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ void ReadyActuatorsForExecution()
}

/// <summary>
/// This method validates that all <see cref="IActuator"/>s have unique names and equivalent action space types
/// This method validates that all <see cref="IActuator"/>s have unique names
/// if the `DEBUG` preprocessor macro is defined, and allocates the appropriate buffers to manage the actions for
/// all of the <see cref="IActuator"/>s that may live on a particular object.
/// </summary>
Expand All @@ -90,7 +90,6 @@ internal void ReadyActuatorsForExecution(IList<IActuator> actuators, int numCont
}
#if DEBUG
// Make sure the names are actually unique
// Make sure all Actuators have the same SpaceType
ValidateActuators();
#endif

Expand Down Expand Up @@ -272,7 +271,7 @@ void SortActuators()
}

/// <summary>
/// Validates that the IActuators managed by this object have unique names and equivalent action space types.
/// Validates that the IActuators managed by this object have unique names.
/// Each Actuator needs to have a unique name in order for this object to ensure that the storage of action
/// buffers, and execution of Actuators remains deterministic across different sessions of running.
/// </summary>
Expand Down
1 change: 1 addition & 0 deletions com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ public void Clear()
/// <summary>
/// Check if the <see cref="ActionBuffers"/> is empty.
/// </summary>
/// <returns>Whether the buffers are empty.</returns>
public bool IsEmpty()
{
return ContinuousActions.IsEmpty() && DiscreteActions.IsEmpty();
Expand Down
2 changes: 1 addition & 1 deletion com.unity.ml-agents/Runtime/Actuators/IActuator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace Unity.MLAgents.Actuators
public interface IActuator : IActionReceiver
{
/// <summary>
/// The specification of the Action space for this IActuator.
/// The specification of the actions for this IActuator.
/// </summary>
/// <seealso cref="ActionSpec"/>
ActionSpec ActionSpec { get; }
Expand Down
76 changes: 46 additions & 30 deletions com.unity.ml-agents/Runtime/Agent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ namespace Unity.MLAgents
internal struct AgentInfo
{
/// <summary>
/// Keeps track of the last vector action taken by the Brain.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... and my war on "vector action"

/// Keeps track of the last actions taken by the Brain.
/// </summary>
public ActionBuffers storedVectorActions;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

public field in an internal struct, so this is OK to rename.

public ActionBuffers storedActions;

/// <summary>
/// For discrete control, specifies the actions that the agent cannot take.
Expand Down Expand Up @@ -52,17 +52,17 @@ internal struct AgentInfo

public void ClearActions()
{
storedVectorActions.Clear();
storedActions.Clear();
}

public void CopyActions(ActionBuffers actionBuffers)
{
var continuousActions = storedVectorActions.ContinuousActions;
var continuousActions = storedActions.ContinuousActions;
for (var i = 0; i < actionBuffers.ContinuousActions.Length; i++)
{
continuousActions[i] = actionBuffers.ContinuousActions[i];
}
var discreteActions = storedVectorActions.DiscreteActions;
var discreteActions = storedActions.DiscreteActions;
for (var i = 0; i < actionBuffers.DiscreteActions.Length; i++)
{
discreteActions[i] = actionBuffers.DiscreteActions[i];
Expand Down Expand Up @@ -438,7 +438,7 @@ public void LazyInitialize()
InitializeSensors();
}

m_Info.storedVectorActions = new ActionBuffers(
m_Info.storedActions = new ActionBuffers(
new float[m_ActuatorManager.NumContinuousActions],
new int[m_ActuatorManager.NumDiscreteActions]
);
Expand Down Expand Up @@ -557,7 +557,7 @@ void NotifyAgentDone(DoneReason doneReason)
m_CumulativeReward = 0f;
m_RequestAction = false;
m_RequestDecision = false;
m_Info.storedVectorActions.Clear();
m_Info.storedActions.Clear();
}

/// <summary>
Expand Down Expand Up @@ -886,12 +886,22 @@ public virtual void Initialize() { }
/// <seealso cref="IActionReceiver.OnActionReceived"/>
public virtual void Heuristic(in ActionBuffers actionsOut)
{
var brainParams = m_PolicyFactory.BrainParameters;
var actionSpec = brainParams.ActionSpec;
// For continuous and discrete actions together, we don't need to fall back to the legacy method
if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to avoid any confusion here - don't call the float[] version if we're using continuous + discrete (plus avoid munging the arrays)

{
Debug.LogWarning("Heuristic method called but not implemented. Clearing ActionBuffers.");
actionsOut.Clear();
return;
}

// Disable deprecation warnings so we can call the legacy overload.
#pragma warning disable CS0618

// The default implementation of Heuristic calls the
// obsolete version for backward compatibility
switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType)
switch (brainParams.VectorActionSpaceType)
{
case SpaceType.Continuous:
Heuristic(actionsOut.ContinuousActions.Array);
Expand Down Expand Up @@ -1038,7 +1048,7 @@ void SendInfoToBrain()
CollectObservations(collectObservationsSensor);
}
}
using (TimerStack.Instance.Scoped("CollectDiscreteActionMasks"))
using (TimerStack.Instance.Scoped("WriteActionMask"))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Old name

{
m_ActuatorManager.WriteActionMask();
}
Expand Down Expand Up @@ -1135,7 +1145,7 @@ public ReadOnlyCollection<float> GetObservations()
}

/// <summary>
/// Implement `CollectDiscreteActionMasks()` to collects the masks for discrete
/// Implement `WriteDiscreteActionMask()` to collects the masks for discrete
/// actions. When using discrete actions, the agent will not perform the masked
/// action.
/// </summary>
Expand All @@ -1144,7 +1154,7 @@ public ReadOnlyCollection<float> GetObservations()
/// </param>
/// <remarks>
/// When using Discrete Control, you can prevent the Agent from using a certain
/// action by masking it with <see cref="DiscreteActionMasker.SetMask(int, IEnumerable{int})"/>.
/// action by masking it with <see cref="IDiscreteActionMask.WriteMask(int, IEnumerable{int})"/>.
///
/// See [Agents - Actions] for more information on masking actions.
///
Expand All @@ -1168,30 +1178,29 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
/// on the provided action.
/// </summary>
/// <remarks>
/// An action is passed to this function in the form of an array vector. Your
/// implementation must use the array to direct the agent's behavior for the
/// An action is passed to this function in the form of an <seealso cref="ActionBuffers"/>.
/// Your implementation must use the array to direct the agent's behavior for the
/// current step.
///
/// You decide how many elements you need in the action array to control your
/// You decide how many elements you need in the ActionBuffers to control your
/// agent and what each element means. For example, if you want to apply a
/// force to move an agent around the environment, you can arbitrarily pick
/// three values in the action array to use as the force components. During
/// training, the agent's policy learns to set those particular elements of
/// three values in ActionBuffers.ContinuousActions array to use as the force components.
/// During training, the agent's policy learns to set those particular elements of
/// the array to maximize the training rewards the agent receives. (Of course,
/// if you implement a <seealso cref="Heuristic(in ActionBuffers)"/> function, it must use the same
/// elements of the action array for the same purpose since there is no learning
/// involved.)
///
/// Actions for an agent can be either *Continuous* or *Discrete*. Specify which
/// type of action space an agent uses, along with the size of the action array,
/// in the <see cref="BrainParameters"/> of the agent's associated
/// An Agent can use continuous and/or discrete actions. Configure this along with the size
/// of the action array, in the <see cref="BrainParameters"/> of the agent's associated
/// <see cref="BehaviorParameters"/> component.
///
/// When an agent uses the continuous action space, the values in the action
/// When an agent uses continuous actions, the values in the ActionBuffers.ContinuousActions
/// array are floating point numbers. You should clamp the values to the range,
/// -1..1, to increase numerical stability during training.
///
/// When an agent uses the discrete action space, the values in the action array
/// When an agent uses discrete actions, the values in the ActionBuffers.DiscreteActions array
/// are integers that each represent a specific, discrete action. For example,
/// you could define a set of discrete actions such as:
///
Expand All @@ -1204,24 +1213,23 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
/// </code>
///
/// When making a decision, the agent picks one of the five actions and puts the
/// corresponding integer value in the action vector. For example, if the agent
/// decided to move left, the action vector parameter would contain an array with
/// corresponding integer value in the ActionBuffers.DiscreteActions array. For example, if the agent
/// decided to move left, the ActionBuffers.DiscreteActions parameter would be an array with
/// a single element with the value 1.
///
/// You can define multiple sets, or branches, of discrete actions to allow an
/// agent to perform simultaneous, independent actions. For example, you could
/// use one branch for movement and another branch for throwing a ball left, right,
/// up, or down, to allow the agent to do both in the same step.
///
/// The action vector of a discrete action space contains one element for each
/// branch. The value of each element is the integer representing the chosen
/// action for that branch. The agent always chooses one action for each
/// branch.
/// The ActionBuffers.DiscreteActions array of an agent with discrete actions contains one
/// element for each branch. The value of each element is the integer representing the
/// chosen action for that branch. The agent always chooses one action for each branch.
///
/// When you use the discrete action space, you can prevent the training process
/// When you use the discrete actions, you can prevent the training process
/// or the neural network model from choosing specific actions in a step by
/// implementing the <see cref="CollectDiscreteActionMasks(DiscreteActionMasker)"/>
/// function. For example, if your agent is next to a wall, you could mask out any
/// implementing the <see cref="WriteDiscreteActionMask(IDiscreteActionMask)"/>
/// method. For example, if your agent is next to a wall, you could mask out any
/// actions that would result in the agent trying to move into the wall.
///
/// For more information about implementing agent actions see [Agents - Actions].
Expand All @@ -1233,6 +1241,14 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
/// </param>
public virtual void OnActionReceived(ActionBuffers actions)
{
var actionSpec = m_PolicyFactory.BrainParameters.ActionSpec;
// For continuous and discrete actions together, we don't need to fall back to the legacy method
if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0)
{
// Nothing implemented.
return;
}

if (!actions.ContinuousActions.IsEmpty())
{
m_LegacyActionCache = actions.ContinuousActions.Array;
Expand Down
9 changes: 8 additions & 1 deletion com.unity.ml-agents/Runtime/Agent.deprecated.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,14 @@ public virtual void OnActionReceived(float[] vectorAction) { }
[Obsolete("GetAction has been deprecated, please use GetStoredActionBuffers instead.")]
public float[] GetAction()
{
var storedAction = m_Info.storedVectorActions;
var actionSpec = m_PolicyFactory.BrainParameters.ActionSpec;
// For continuous and discrete actions together, this shouldn't be called because we can only return one.
if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0)
{
Debug.LogWarning("Agent.GetAction() when both continuous and discrete actions are in use. Use Agent.GetStoredActionBuffers() instead.");
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if we should throw or log here.

}

var storedAction = m_Info.storedActions;
if (!storedAction.ContinuousActions.IsEmpty())
{
return storedAction.ContinuousActions.Array;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public static bool IsAnalyticsEnabled()
/// <param name="behaviorName">The BehaviorName of the Agent using the model</param>
/// <param name="inferenceDevice">Whether inference is being performed on the CPU or GPU</param>
/// <param name="sensors">List of ISensors for the Agent. Used to generate information about the observation space.</param>
/// <param name="actionSpec">ActionSpec for the Agent. Used to generate information about the action space.</param>
/// <param name="actionSpec">ActionSpec for the Agent. Used to generate information about the actions.</param>
/// <returns></returns>
public static void InferenceModelSet(
NNModel nnModel,
Expand Down
Loading