Skip to content
83 changes: 47 additions & 36 deletions UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
namespace MLAgents
{
/// <summary>
/// Struct that contains all the information for an Agent, including its
/// Struct that contains all the information for an Agent, including its
/// observations, actions and current status, that is sent to the Brain.
/// </summary>
public struct AgentInfo
Expand Down Expand Up @@ -120,15 +120,26 @@ public CommunicatorObjects.AgentInfoProto ToProto()
agentInfoProto.VisualObservations.Add(
ByteString.CopyFrom(obs.EncodeToPNG())
);
}
return agentInfoProto;
}

/// <summary>
/// Remove the visual observations from memory. Call at each timestep
/// to avoid memory leaks.
/// </summary>
public void ClearVisualObs()
{
foreach (Texture2D obs in visualObservations)
{
Object.Destroy(obs);
}
visualObservations.Clear();
return agentInfoProto;
}
}

/// <summary>
/// Struct that contains the action information sent from the Brain to the
/// Struct that contains the action information sent from the Brain to the
/// Agent.
/// </summary>
public struct AgentAction
Expand All @@ -141,7 +152,7 @@ public struct AgentAction
}

/// <summary>
/// Struct that contains all the Agent-specific parameters provided in the
/// Struct that contains all the Agent-specific parameters provided in the
/// Editor. This excludes the Brain linked to the Agent since it can be
/// modified programmatically.
/// </summary>
Expand All @@ -153,7 +164,7 @@ public class AgentParameters
/// observations.
/// </summary>
public List<Camera> agentCameras = new List<Camera>();

/// <summary>
/// The list of the RenderTextures the agent uses for visual
/// observations.
Expand All @@ -162,7 +173,7 @@ public class AgentParameters


/// <summary>
/// The maximum number of steps the agent takes before being done.
/// The maximum number of steps the agent takes before being done.
/// </summary>
/// <remarks>
/// If set to 0, the agent can only be set to done programmatically (or
Expand All @@ -184,7 +195,7 @@ public class AgentParameters
public bool resetOnDone = true;

/// <summary>
/// Whether to enable On Demand Decisions or make a decision at
/// Whether to enable On Demand Decisions or make a decision at
/// every step.
/// </summary>
public bool onDemandDecision;
Expand All @@ -199,8 +210,8 @@ public class AgentParameters

/// <summary>
/// Agent Monobehavior class that is attached to a Unity GameObject, making it
/// an Agent. An agent produces observations and takes actions in the
/// environment. Observations are determined by the cameras attached
/// an Agent. An agent produces observations and takes actions in the
/// environment. Observations are determined by the cameras attached
/// to the agent in addition to the vector observations implemented by the
/// user in <see cref="CollectObservations"/>. On the other hand, actions
/// are determined by decisions produced by a linked Brain. Currently, this
Expand All @@ -213,34 +224,34 @@ public class AgentParameters
/// however, an agent need not send its observation at every step since very
/// little may have changed between sucessive steps. Currently, how often an
/// agent updates its brain with a fresh observation is determined by the
/// Academy.
///
/// At any step, an agent may be considered <see cref="done"/>.
/// Academy.
///
/// At any step, an agent may be considered <see cref="done"/>.
/// This could occur due to a variety of reasons:
/// - The agent reached an end state within its environment.
/// - The agent reached the maximum # of steps (i.e. timed out).
/// - The academy reached the maximum # of steps (forced agent to be done).
///
///
/// Here, an agent reaches an end state if it completes its task successfully
/// or somehow fails along the way. In the case where an agent is done before
/// the academy, it either resets and restarts, or just lingers until the
/// academy is done.
///
///
/// An important note regarding steps and episodes is due. Here, an agent step
/// corresponds to an academy step, which also corresponds to Unity
/// environment step (i.e. each FixedUpdate call). This is not the case for
/// episodes. The academy controls the global episode count and each agent
/// episodes. The academy controls the global episode count and each agent
/// controls its own local episode count and can reset and start a new local
/// episode independently (based on its own experience). Thus an academy
/// (global) episode can be viewed as the upper-bound on an agents episode
/// length and that within a single global episode, an agent may have completed
/// multiple local episodes. Consequently, if an agent max step is
/// set to a value larger than the academy max steps value, then the academy
/// value takes precedence (since the agent max step will never be reached).
///
///
/// Lastly, note that at any step the brain linked to the agent is allowed to
/// change programmatically with <see cref="GiveBrain"/>.
///
///
/// Implementation-wise, it is required that this class is extended and the
/// virtual methods overridden. For sample implementations of agent behavior,
/// see the Examples/ directory within this Unity project.
Expand All @@ -252,7 +263,7 @@ public abstract class Agent : MonoBehaviour
{
/// <summary>
/// The Brain attached to this agent. A brain can be attached either
/// directly from the Editor through AgentEditor or
/// directly from the Editor through AgentEditor or
/// programmatically through <see cref="GiveBrain"/>. It is OK for an agent
/// to not have a brain, as long as no decision is requested.
/// </summary>
Expand Down Expand Up @@ -523,7 +534,7 @@ void ResetData()
actionMasker = new ActionMasker(param);
// If we haven't initialized vectorActions, initialize to 0. This should only
// happen during the creation of the Agent. In subsequent episodes, vectorAction
// should stay the previous action before the Done(), so that it is properly recorded.
// should stay the previous action before the Done(), so that it is properly recorded.
if (action.vectorActions == null)
{
if (param.vectorActionSpaceType == SpaceType.continuous)
Expand Down Expand Up @@ -598,9 +609,9 @@ void SendInfoToBrain()
brain.brainParameters.vectorObservationSize,
info.vectorObservation.Count));
}

Utilities.ShiftLeft(info.stackedVectorObservation, param.vectorObservationSize);
Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation,
Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation,
info.stackedVectorObservation.Count - info.vectorObservation.Count);

info.visualObservations.Clear();
Expand All @@ -624,7 +635,7 @@ void SendInfoToBrain()
param.cameraResolutions[i].height);
info.visualObservations.Add(obsTexture);
}

//Then add all renderTextures
var camCount = agentParameters.agentCameras.Count;
for (int i = 0; i < agentParameters.agentRenderTextures.Count; i++)
Expand Down Expand Up @@ -653,13 +664,13 @@ void SendInfoToBrain()

/// <summary>
/// Collects the (vector, visual, text) observations of the agent.
/// The agent observation describes the current environment from the
/// The agent observation describes the current environment from the
/// perspective of the agent.
/// </summary>
/// <remarks>
/// Simply, an agents observation is any environment information that helps
/// the Agent acheive its goal. For example, for a fighting Agent, its
/// observation could include distances to friends or enemies, or the
/// observation could include distances to friends or enemies, or the
/// current level of ammunition at its disposal.
/// Recall that an Agent may attach vector, visual or textual observations.
/// Vector observations are added by calling the provided helper methods:
Expand All @@ -678,7 +689,7 @@ void SendInfoToBrain()
/// needs to match the vectorObservationSize attribute of the linked Brain.
/// Visual observations are implicitly added from the cameras attached to
/// the Agent.
/// Lastly, textual observations are added using
/// Lastly, textual observations are added using
/// <see cref="SetTextObs(string)"/>.
/// </remarks>
public virtual void CollectObservations()
Expand Down Expand Up @@ -861,7 +872,7 @@ public virtual void AgentAction(float[] vectorAction, string textAction, Communi
}

/// <summary>
/// Specifies the agent behavior when done and
/// Specifies the agent behavior when done and
/// <see cref="AgentParameters.resetOnDone"/> is false. This method can be
/// used to remove the agent from the scene.
/// </summary>
Expand Down Expand Up @@ -906,12 +917,12 @@ public void UpdateMemoriesAction(List<float> memories)
{
action.memories = memories;
}

public void AppendMemoriesAction(List<float> memories)
{
action.memories.AddRange(memories);
}

public List<float> GetMemoriesAction()
{
return action.memories;
Expand Down Expand Up @@ -966,9 +977,9 @@ protected float ScaleAction(float rawAction, float min, float max)
/// <summary>
/// Sets the status of the agent.
/// </summary>
/// <param name="academyMaxStep">If set to <c>true</c>
/// <param name="academyMaxStep">If set to <c>true</c>
/// The agent must set maxStepReached.</param>
/// <param name="academyDone">If set to <c>true</c>
/// <param name="academyDone">If set to <c>true</c>
/// The agent must set done.</param>
/// <param name="academyStepCounter">Number of current steps in episode</param>
void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
Expand All @@ -984,7 +995,7 @@ void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
maxStepReached = true;
}

// If the Academy needs to reset, the agent should reset
// If the Academy needs to reset, the agent should reset
// even if it reseted recently.
if (academyDone)
{
Expand All @@ -996,7 +1007,7 @@ void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
/// Signals the agent that it must reset if its done flag is set to true.
void ResetIfDone()
{
// If an agent is done, then it will also
// If an agent is done, then it will also
// request for a decision and an action
if (IsDone())
{
Expand Down Expand Up @@ -1126,14 +1137,14 @@ public static Texture2D ObservationToTexture(Camera obsCamera, int width, int he
obsCamera.Render();

texture2D.ReadPixels(new Rect(0, 0, texture2D.width, texture2D.height), 0, 0);

obsCamera.targetTexture = prevCameraRT;
obsCamera.rect = oldRec;
RenderTexture.active = prevActiveRT;
RenderTexture.ReleaseTemporary(tempRT);
return texture2D;
}

/// <summary>
/// Converts a RenderTexture and correspinding resolution to a 2D texture.
/// </summary>
Expand All @@ -1150,7 +1161,7 @@ public static Texture2D ObservationToTexture(RenderTexture obsTexture, int width
{
texture2D.Resize(width, height);
}

if(width != obsTexture.width || height != obsTexture.height)
{
throw new UnityAgentsException(string.Format(
Expand All @@ -1175,5 +1186,5 @@ public void SetCustomObservation(CustomObservation customObservation)
{
info.customObservation = customObservation;
}
}
}
}
19 changes: 11 additions & 8 deletions UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@
namespace MLAgents
{
/// <summary>
/// The batcher is an RL specific class that makes sure that the information each object in
/// Unity (Academy and Brains) wants to send to External is appropriately batched together
/// The batcher is an RL specific class that makes sure that the information each object in
/// Unity (Academy and Brains) wants to send to External is appropriately batched together
/// and sent only when necessary.
///
///
/// The Batcher will only send a Message to the Communicator when either :
/// 1 - The academy is done
/// 2 - At least one brain has data to send
///
/// At each step, the batcher will keep track of the brains that queried the batcher for that
///
/// At each step, the batcher will keep track of the brains that queried the batcher for that
/// step. The batcher can only send the batched data when all the Brains have queried the
/// Batcher.
/// </summary>
Expand Down Expand Up @@ -67,7 +67,7 @@ public Batcher(Communicator communicator)
}

/// <summary>
/// Sends the academy parameters through the Communicator.
/// Sends the academy parameters through the Communicator.
/// Is used by the academy to send the AcademyParameters to the communicator.
/// </summary>
/// <returns>The External Initialization Parameters received.</returns>
Expand Down Expand Up @@ -104,7 +104,7 @@ public CommunicatorObjects.UnityRLInitializationInput SendAcademyParameters(
/// Registers the done flag of the academy to the next output to be sent
/// to the communicator.
/// </summary>
/// <param name="done">If set to <c>true</c>
/// <param name="done">If set to <c>true</c>
/// The academy done state will be sent to External at the next Exchange.</param>
public void RegisterAcademyDoneFlag(bool done)
{
Expand Down Expand Up @@ -164,7 +164,7 @@ public void SubscribeBrain(string brainKey)

/// <summary>
/// Sends the brain info. If at least one brain has an agent in need of
/// a decision or if the academy is done, the data is sent via
/// a decision or if the academy is done, the data is sent via
/// Communicator. Else, a new step is realized. The data can only be
/// sent once all the brains that subscribed to the batcher have tried
/// to send information.
Expand Down Expand Up @@ -198,6 +198,9 @@ public void SendBrainInfo(
{
CommunicatorObjects.AgentInfoProto agentInfoProto = agentInfo[agent].ToProto();
m_currentUnityRLOutput.AgentInfos[brainKey].Value.Add(agentInfoProto);
// Avoid visual obs memory leak. This should be called AFTER we are done with the visual obs.
// e.g. after recording them to demo and using them for inference.
agentInfo[agent].ClearVisualObs();
}

m_hasData[brainKey] = true;
Expand Down
4 changes: 2 additions & 2 deletions gym-unity/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@

setup(
name="gym_unity",
version="0.4.3",
version="0.4.4",
description="Unity Machine Learning Agents Gym Interface",
license="Apache License 2.0",
author="Unity Technologies",
author_email="ML-Agents@unity3d.com",
url="https://github.com/Unity-Technologies/ml-agents",
packages=find_packages(),
install_requires=["gym", "mlagents_envs==0.9.0"],
install_requires=["gym", "mlagents_envs==0.9.1"],
)
Loading