From c6f51111657f547724a3d54eab5b0e56b58732a6 Mon Sep 17 00:00:00 2001 From: Ervin T Date: Fri, 2 Aug 2019 17:10:46 -0700 Subject: [PATCH 1/8] Fix BCTrainer increment_steps (#2384) --- ml-agents/mlagents/trainers/bc/trainer.py | 7 +++-- .../mlagents/trainers/tests/mock_brain.py | 20 +++++++++++++ ml-agents/mlagents/trainers/tests/test_bc.py | 30 +++++++++++++++++++ .../mlagents/trainers/tests/test_bcmodule.py | 29 ++++-------------- 4 files changed, 59 insertions(+), 27 deletions(-) diff --git a/ml-agents/mlagents/trainers/bc/trainer.py b/ml-agents/mlagents/trainers/bc/trainer.py index 9b2c1553cc..e67d951b9b 100644 --- a/ml-agents/mlagents/trainers/bc/trainer.py +++ b/ml-agents/mlagents/trainers/bc/trainer.py @@ -67,12 +67,13 @@ def get_step(self): """ return self.policy.get_current_step() - def increment_step(self): + def increment_step(self, n_steps: int) -> None: """ Increment the step count of the trainer + + :param n_steps: number of steps to increment the step count by """ - self.policy.increment_step() - return + self.step = self.policy.increment_step(n_steps) def add_experiences( self, diff --git a/ml-agents/mlagents/trainers/tests/mock_brain.py b/ml-agents/mlagents/trainers/tests/mock_brain.py index 1fcfbbc710..6303e3a6c0 100644 --- a/ml-agents/mlagents/trainers/tests/mock_brain.py +++ b/ml-agents/mlagents/trainers/tests/mock_brain.py @@ -139,3 +139,23 @@ def create_buffer(brain_infos, brain_params, sequence_length): buffer.append_update_buffer(0, batch_size=None, training_length=sequence_length) return buffer + + +def create_mock_3dball_brain(): + mock_brain = create_mock_brainparams( + vector_action_space_type="continuous", + vector_action_space_size=[2], + vector_observation_space_size=8, + ) + mock_brain.brain_name = "Ball3DBrain" + return mock_brain + + +def create_mock_banana_brain(): + mock_brain = create_mock_brainparams( + number_visual_observations=1, + vector_action_space_type="discrete", + vector_action_space_size=[3, 3, 3, 2], + vector_observation_space_size=0, + ) + return mock_brain diff --git a/ml-agents/mlagents/trainers/tests/test_bc.py b/ml-agents/mlagents/trainers/tests/test_bc.py index 800ab4a23f..3d162dbcee 100644 --- a/ml-agents/mlagents/trainers/tests/test_bc.py +++ b/ml-agents/mlagents/trainers/tests/test_bc.py @@ -1,12 +1,15 @@ import unittest.mock as mock import pytest +import os import numpy as np import tensorflow as tf import yaml from mlagents.trainers.bc.models import BehavioralCloningModel +import mlagents.trainers.tests.mock_brain as mb from mlagents.trainers.bc.policy import BCPolicy +from mlagents.trainers.bc.offline_trainer import BCTrainer from mlagents.envs import UnityEnvironment from mlagents.envs.mock_communicator import MockCommunicator @@ -21,10 +24,37 @@ def dummy_config(): use_recurrent: false sequence_length: 32 memory_size: 32 + batches_per_epoch: 1 + batch_size: 32 + summary_freq: 2000 + max_steps: 4000 """ ) +@mock.patch("mlagents.envs.UnityEnvironment") +def test_bc_trainer(mock_env, dummy_config): + mock_brain = mb.create_mock_3dball_brain() + mock_braininfo = mb.create_mock_braininfo(num_agents=12, num_vector_observations=8) + mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo) + env = mock_env() + + trainer_parameters = dummy_config + trainer_parameters["summary_path"] = "tmp" + trainer_parameters["model_path"] = "tmp" + trainer_parameters["demo_path"] = ( + os.path.dirname(os.path.abspath(__file__)) + "/test.demo" + ) + trainer = BCTrainer( + mock_brain, trainer_parameters, training=True, load=False, seed=0, run_id=0 + ) + trainer.demonstration_buffer = mb.simulate_rollout(env, trainer.policy, 100) + trainer.update_policy() + assert len(trainer.stats["Losses/Cloning Loss"]) > 0 + trainer.increment_step(1) + assert trainer.step == 1 + + @mock.patch("mlagents.envs.UnityEnvironment.executable_launcher") @mock.patch("mlagents.envs.UnityEnvironment.get_communicator") def test_bc_policy_evaluate(mock_communicator, mock_launcher, dummy_config): diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py index 0eee0f4d2e..d57741448b 100644 --- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py +++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py @@ -42,25 +42,6 @@ def dummy_config(): ) -def create_mock_3dball_brain(): - mock_brain = mb.create_mock_brainparams( - vector_action_space_type="continuous", - vector_action_space_size=[2], - vector_observation_space_size=8, - ) - return mock_brain - - -def create_mock_banana_brain(): - mock_brain = mb.create_mock_brainparams( - number_visual_observations=1, - vector_action_space_type="discrete", - vector_action_space_size=[3, 3, 3, 2], - vector_observation_space_size=0, - ) - return mock_brain - - def create_ppo_policy_with_bc_mock( mock_env, mock_brain, dummy_config, use_rnn, demo_file ): @@ -84,7 +65,7 @@ def create_ppo_policy_with_bc_mock( @mock.patch("mlagents.envs.UnityEnvironment") def test_bcmodule_defaults(mock_env, dummy_config): # See if default values match - mock_brain = create_mock_3dball_brain() + mock_brain = mb.create_mock_3dball_brain() env, policy = create_ppo_policy_with_bc_mock( mock_env, mock_brain, dummy_config, False, "test.demo" ) @@ -105,7 +86,7 @@ def test_bcmodule_defaults(mock_env, dummy_config): # Test with continuous control env and vector actions @mock.patch("mlagents.envs.UnityEnvironment") def test_bcmodule_update(mock_env, dummy_config): - mock_brain = create_mock_3dball_brain() + mock_brain = mb.create_mock_3dball_brain() env, policy = create_ppo_policy_with_bc_mock( mock_env, mock_brain, dummy_config, False, "test.demo" ) @@ -118,7 +99,7 @@ def test_bcmodule_update(mock_env, dummy_config): # Test with RNN @mock.patch("mlagents.envs.UnityEnvironment") def test_bcmodule_rnn_update(mock_env, dummy_config): - mock_brain = create_mock_3dball_brain() + mock_brain = mb.create_mock_3dball_brain() env, policy = create_ppo_policy_with_bc_mock( mock_env, mock_brain, dummy_config, True, "test.demo" ) @@ -131,7 +112,7 @@ def test_bcmodule_rnn_update(mock_env, dummy_config): # Test with discrete control and visual observations @mock.patch("mlagents.envs.UnityEnvironment") def test_bcmodule_dc_visual_update(mock_env, dummy_config): - mock_brain = create_mock_banana_brain() + mock_brain = mb.create_mock_banana_brain() env, policy = create_ppo_policy_with_bc_mock( mock_env, mock_brain, dummy_config, False, "testdcvis.demo" ) @@ -144,7 +125,7 @@ def test_bcmodule_dc_visual_update(mock_env, dummy_config): # Test with discrete control, visual observations and RNN @mock.patch("mlagents.envs.UnityEnvironment") def test_bcmodule_rnn_dc_update(mock_env, dummy_config): - mock_brain = create_mock_banana_brain() + mock_brain = mb.create_mock_banana_brain() env, policy = create_ppo_policy_with_bc_mock( mock_env, mock_brain, dummy_config, True, "testdcvis.demo" ) From a84f2267153966ab0e56e1c708f059200d47a0df Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Mon, 5 Aug 2019 17:01:16 -0700 Subject: [PATCH 2/8] Increment package ver. Only trainers were changed. --- ml-agents/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/setup.py b/ml-agents/setup.py index c7486140ef..b30f8b4f02 100644 --- a/ml-agents/setup.py +++ b/ml-agents/setup.py @@ -10,7 +10,7 @@ setup( name="mlagents", - version="0.9.0", + version="0.9.0a", description="Unity Machine Learning Agents", long_description=long_description, long_description_content_type="text/markdown", From b243314213534eda124607a11c4fe008760a9fa1 Mon Sep 17 00:00:00 2001 From: Ervin T Date: Tue, 6 Aug 2019 17:08:24 -0700 Subject: [PATCH 3/8] Fix issue with visual obs destroyed too early (#2400) --- UnitySDK/Assets/ML-Agents/Scripts/Agent.cs | 83 +++++++++++--------- UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs | 19 +++-- 2 files changed, 58 insertions(+), 44 deletions(-) diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs index 85f74747ea..dec43fb406 100755 --- a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs @@ -8,7 +8,7 @@ namespace MLAgents { /// - /// Struct that contains all the information for an Agent, including its + /// Struct that contains all the information for an Agent, including its /// observations, actions and current status, that is sent to the Brain. /// public struct AgentInfo @@ -120,15 +120,26 @@ public CommunicatorObjects.AgentInfoProto ToProto() agentInfoProto.VisualObservations.Add( ByteString.CopyFrom(obs.EncodeToPNG()) ); + } + return agentInfoProto; + } + + /// + /// Remove the visual observations from memory. Call at each timestep + /// to avoid memory leaks. + /// + public void ClearVisualObs() + { + foreach (Texture2D obs in visualObservations) + { Object.Destroy(obs); } visualObservations.Clear(); - return agentInfoProto; } } /// - /// Struct that contains the action information sent from the Brain to the + /// Struct that contains the action information sent from the Brain to the /// Agent. /// public struct AgentAction @@ -141,7 +152,7 @@ public struct AgentAction } /// - /// Struct that contains all the Agent-specific parameters provided in the + /// Struct that contains all the Agent-specific parameters provided in the /// Editor. This excludes the Brain linked to the Agent since it can be /// modified programmatically. /// @@ -153,7 +164,7 @@ public class AgentParameters /// observations. /// public List agentCameras = new List(); - + /// /// The list of the RenderTextures the agent uses for visual /// observations. @@ -162,7 +173,7 @@ public class AgentParameters /// - /// The maximum number of steps the agent takes before being done. + /// The maximum number of steps the agent takes before being done. /// /// /// If set to 0, the agent can only be set to done programmatically (or @@ -184,7 +195,7 @@ public class AgentParameters public bool resetOnDone = true; /// - /// Whether to enable On Demand Decisions or make a decision at + /// Whether to enable On Demand Decisions or make a decision at /// every step. /// public bool onDemandDecision; @@ -199,8 +210,8 @@ public class AgentParameters /// /// Agent Monobehavior class that is attached to a Unity GameObject, making it - /// an Agent. An agent produces observations and takes actions in the - /// environment. Observations are determined by the cameras attached + /// an Agent. An agent produces observations and takes actions in the + /// environment. Observations are determined by the cameras attached /// to the agent in addition to the vector observations implemented by the /// user in . On the other hand, actions /// are determined by decisions produced by a linked Brain. Currently, this @@ -213,23 +224,23 @@ public class AgentParameters /// however, an agent need not send its observation at every step since very /// little may have changed between sucessive steps. Currently, how often an /// agent updates its brain with a fresh observation is determined by the - /// Academy. - /// - /// At any step, an agent may be considered . + /// Academy. + /// + /// At any step, an agent may be considered . /// This could occur due to a variety of reasons: /// - The agent reached an end state within its environment. /// - The agent reached the maximum # of steps (i.e. timed out). /// - The academy reached the maximum # of steps (forced agent to be done). - /// + /// /// Here, an agent reaches an end state if it completes its task successfully /// or somehow fails along the way. In the case where an agent is done before /// the academy, it either resets and restarts, or just lingers until the /// academy is done. - /// + /// /// An important note regarding steps and episodes is due. Here, an agent step /// corresponds to an academy step, which also corresponds to Unity /// environment step (i.e. each FixedUpdate call). This is not the case for - /// episodes. The academy controls the global episode count and each agent + /// episodes. The academy controls the global episode count and each agent /// controls its own local episode count and can reset and start a new local /// episode independently (based on its own experience). Thus an academy /// (global) episode can be viewed as the upper-bound on an agents episode @@ -237,10 +248,10 @@ public class AgentParameters /// multiple local episodes. Consequently, if an agent max step is /// set to a value larger than the academy max steps value, then the academy /// value takes precedence (since the agent max step will never be reached). - /// + /// /// Lastly, note that at any step the brain linked to the agent is allowed to /// change programmatically with . - /// + /// /// Implementation-wise, it is required that this class is extended and the /// virtual methods overridden. For sample implementations of agent behavior, /// see the Examples/ directory within this Unity project. @@ -252,7 +263,7 @@ public abstract class Agent : MonoBehaviour { /// /// The Brain attached to this agent. A brain can be attached either - /// directly from the Editor through AgentEditor or + /// directly from the Editor through AgentEditor or /// programmatically through . It is OK for an agent /// to not have a brain, as long as no decision is requested. /// @@ -523,7 +534,7 @@ void ResetData() actionMasker = new ActionMasker(param); // If we haven't initialized vectorActions, initialize to 0. This should only // happen during the creation of the Agent. In subsequent episodes, vectorAction - // should stay the previous action before the Done(), so that it is properly recorded. + // should stay the previous action before the Done(), so that it is properly recorded. if (action.vectorActions == null) { if (param.vectorActionSpaceType == SpaceType.continuous) @@ -598,9 +609,9 @@ void SendInfoToBrain() brain.brainParameters.vectorObservationSize, info.vectorObservation.Count)); } - + Utilities.ShiftLeft(info.stackedVectorObservation, param.vectorObservationSize); - Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation, + Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation, info.stackedVectorObservation.Count - info.vectorObservation.Count); info.visualObservations.Clear(); @@ -624,7 +635,7 @@ void SendInfoToBrain() param.cameraResolutions[i].height); info.visualObservations.Add(obsTexture); } - + //Then add all renderTextures var camCount = agentParameters.agentCameras.Count; for (int i = 0; i < agentParameters.agentRenderTextures.Count; i++) @@ -653,13 +664,13 @@ void SendInfoToBrain() /// /// Collects the (vector, visual, text) observations of the agent. - /// The agent observation describes the current environment from the + /// The agent observation describes the current environment from the /// perspective of the agent. /// /// /// Simply, an agents observation is any environment information that helps /// the Agent acheive its goal. For example, for a fighting Agent, its - /// observation could include distances to friends or enemies, or the + /// observation could include distances to friends or enemies, or the /// current level of ammunition at its disposal. /// Recall that an Agent may attach vector, visual or textual observations. /// Vector observations are added by calling the provided helper methods: @@ -678,7 +689,7 @@ void SendInfoToBrain() /// needs to match the vectorObservationSize attribute of the linked Brain. /// Visual observations are implicitly added from the cameras attached to /// the Agent. - /// Lastly, textual observations are added using + /// Lastly, textual observations are added using /// . /// public virtual void CollectObservations() @@ -861,7 +872,7 @@ public virtual void AgentAction(float[] vectorAction, string textAction, Communi } /// - /// Specifies the agent behavior when done and + /// Specifies the agent behavior when done and /// is false. This method can be /// used to remove the agent from the scene. /// @@ -906,12 +917,12 @@ public void UpdateMemoriesAction(List memories) { action.memories = memories; } - + public void AppendMemoriesAction(List memories) { action.memories.AddRange(memories); } - + public List GetMemoriesAction() { return action.memories; @@ -966,9 +977,9 @@ protected float ScaleAction(float rawAction, float min, float max) /// /// Sets the status of the agent. /// - /// If set to true + /// If set to true /// The agent must set maxStepReached. - /// If set to true + /// If set to true /// The agent must set done. /// Number of current steps in episode void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter) @@ -984,7 +995,7 @@ void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter) maxStepReached = true; } - // If the Academy needs to reset, the agent should reset + // If the Academy needs to reset, the agent should reset // even if it reseted recently. if (academyDone) { @@ -996,7 +1007,7 @@ void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter) /// Signals the agent that it must reset if its done flag is set to true. void ResetIfDone() { - // If an agent is done, then it will also + // If an agent is done, then it will also // request for a decision and an action if (IsDone()) { @@ -1126,14 +1137,14 @@ public static Texture2D ObservationToTexture(Camera obsCamera, int width, int he obsCamera.Render(); texture2D.ReadPixels(new Rect(0, 0, texture2D.width, texture2D.height), 0, 0); - + obsCamera.targetTexture = prevCameraRT; obsCamera.rect = oldRec; RenderTexture.active = prevActiveRT; RenderTexture.ReleaseTemporary(tempRT); return texture2D; } - + /// /// Converts a RenderTexture and correspinding resolution to a 2D texture. /// @@ -1150,7 +1161,7 @@ public static Texture2D ObservationToTexture(RenderTexture obsTexture, int width { texture2D.Resize(width, height); } - + if(width != obsTexture.width || height != obsTexture.height) { throw new UnityAgentsException(string.Format( @@ -1175,5 +1186,5 @@ public void SetCustomObservation(CustomObservation customObservation) { info.customObservation = customObservation; } - } + } } diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs b/UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs index a574b7f8f2..30f6495584 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs @@ -6,15 +6,15 @@ namespace MLAgents { /// - /// The batcher is an RL specific class that makes sure that the information each object in - /// Unity (Academy and Brains) wants to send to External is appropriately batched together + /// The batcher is an RL specific class that makes sure that the information each object in + /// Unity (Academy and Brains) wants to send to External is appropriately batched together /// and sent only when necessary. - /// + /// /// The Batcher will only send a Message to the Communicator when either : /// 1 - The academy is done /// 2 - At least one brain has data to send - /// - /// At each step, the batcher will keep track of the brains that queried the batcher for that + /// + /// At each step, the batcher will keep track of the brains that queried the batcher for that /// step. The batcher can only send the batched data when all the Brains have queried the /// Batcher. /// @@ -67,7 +67,7 @@ public Batcher(Communicator communicator) } /// - /// Sends the academy parameters through the Communicator. + /// Sends the academy parameters through the Communicator. /// Is used by the academy to send the AcademyParameters to the communicator. /// /// The External Initialization Parameters received. @@ -104,7 +104,7 @@ public CommunicatorObjects.UnityRLInitializationInput SendAcademyParameters( /// Registers the done flag of the academy to the next output to be sent /// to the communicator. /// - /// If set to true + /// If set to true /// The academy done state will be sent to External at the next Exchange. public void RegisterAcademyDoneFlag(bool done) { @@ -164,7 +164,7 @@ public void SubscribeBrain(string brainKey) /// /// Sends the brain info. If at least one brain has an agent in need of - /// a decision or if the academy is done, the data is sent via + /// a decision or if the academy is done, the data is sent via /// Communicator. Else, a new step is realized. The data can only be /// sent once all the brains that subscribed to the batcher have tried /// to send information. @@ -198,6 +198,9 @@ public void SendBrainInfo( { CommunicatorObjects.AgentInfoProto agentInfoProto = agentInfo[agent].ToProto(); m_currentUnityRLOutput.AgentInfos[brainKey].Value.Add(agentInfoProto); + // Avoid visual obs memory leak. This should be called AFTER we are done with the visual obs. + // e.g. after recording them to demo and using them for inference. + agentInfo[agent].ClearVisualObs(); } m_hasData[brainKey] = true; From 2a76490f92120e91d5aeb176de7c6a47666973bb Mon Sep 17 00:00:00 2001 From: sankalp04 <30798796+sankalp04@users.noreply.github.com> Date: Thu, 8 Aug 2019 14:08:31 -0700 Subject: [PATCH 4/8] =?UTF-8?q?Change=20samplers=20to=20use=20random=20sta?= =?UTF-8?q?te=20to=20allow=20consistency=20in=20reset=20par=E2=80=A6=20(#2?= =?UTF-8?q?398)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Change samplers to use random state to allow consistency in reset parameter draws for a specified seed --- ml-agents-envs/mlagents/envs/sampler_class.py | 86 ++++++++++++++++--- ml-agents/mlagents/trainers/learn.py | 6 +- 2 files changed, 79 insertions(+), 13 deletions(-) diff --git a/ml-agents-envs/mlagents/envs/sampler_class.py b/ml-agents-envs/mlagents/envs/sampler_class.py index cfff08253d..2fbd83d6fe 100644 --- a/ml-agents-envs/mlagents/envs/sampler_class.py +++ b/ml-agents-envs/mlagents/envs/sampler_class.py @@ -19,13 +19,27 @@ class UniformSampler(Sampler): """ def __init__( - self, min_value: Union[int, float], max_value: Union[int, float], **kwargs + self, + min_value: Union[int, float], + max_value: Union[int, float], + seed: Optional[int] = None, + **kwargs ) -> None: + """ + :param min_value: minimum value of the range to be sampled uniformly from + :param max_value: maximum value of the range to be sampled uniformly from + :param seed: Random seed used for making draws from the uniform sampler + """ self.min_value = min_value self.max_value = max_value + # Draw from random state to allow for consistent reset parameter draw for a seed + self.random_state = np.random.RandomState(seed) def sample_parameter(self) -> float: - return np.random.uniform(self.min_value, self.max_value) + """ + Draws and returns a sample from the specified interval + """ + return self.random_state.uniform(self.min_value, self.max_value) class MultiRangeUniformSampler(Sampler): @@ -36,19 +50,33 @@ class MultiRangeUniformSampler(Sampler): it proceeds to pick a value uniformly in that range. """ - def __init__(self, intervals: List[List[Union[int, float]]], **kwargs) -> None: + def __init__( + self, + intervals: List[List[Union[int, float]]], + seed: Optional[int] = None, + **kwargs + ) -> None: + """ + :param intervals: List of intervals to draw uniform samples from + :param seed: Random seed used for making uniform draws from the specified intervals + """ self.intervals = intervals # Measure the length of the intervals interval_lengths = [abs(x[1] - x[0]) for x in self.intervals] cum_interval_length = sum(interval_lengths) # Assign weights to an interval proportionate to the interval size self.interval_weights = [x / cum_interval_length for x in interval_lengths] + # Draw from random state to allow for consistent reset parameter draw for a seed + self.random_state = np.random.RandomState(seed) def sample_parameter(self) -> float: + """ + Selects an interval to pick and then draws a uniform sample from the picked interval + """ cur_min, cur_max = self.intervals[ - np.random.choice(len(self.intervals), p=self.interval_weights) + self.random_state.choice(len(self.intervals), p=self.interval_weights) ] - return np.random.uniform(cur_min, cur_max) + return self.random_state.uniform(cur_min, cur_max) class GaussianSampler(Sampler): @@ -58,13 +86,27 @@ class GaussianSampler(Sampler): """ def __init__( - self, mean: Union[float, int], st_dev: Union[float, int], **kwargs + self, + mean: Union[float, int], + st_dev: Union[float, int], + seed: Optional[int] = None, + **kwargs ) -> None: + """ + :param mean: Specifies the mean of the gaussian distribution to draw from + :param st_dev: Specifies the standard devation of the gaussian distribution to draw from + :param seed: Random seed used for making gaussian draws from the sample + """ self.mean = mean self.st_dev = st_dev + # Draw from random state to allow for consistent reset parameter draw for a seed + self.random_state = np.random.RandomState(seed) def sample_parameter(self) -> float: - return np.random.normal(self.mean, self.st_dev) + """ + Returns a draw from the specified Gaussian distribution + """ + return self.random_state.normal(self.mean, self.st_dev) class SamplerFactory: @@ -81,10 +123,23 @@ class SamplerFactory: @staticmethod def register_sampler(name: str, sampler_cls: Type[Sampler]) -> None: + """ + Registers the sampe in the Sampler Factory to be used later + :param name: String name to set as key for the sampler_cls in the factory + :param sampler_cls: Sampler object to associate to the name in the factory + """ SamplerFactory.NAME_TO_CLASS[name] = sampler_cls @staticmethod - def init_sampler_class(name: str, params: Dict[str, Any]): + def init_sampler_class( + name: str, params: Dict[str, Any], seed: Optional[int] = None + ) -> Sampler: + """ + Initializes the sampler class associated with the name with the params + :param name: Name of the sampler in the factory to initialize + :param params: Parameters associated to the sampler attached to the name + :param seed: Random seed to be used to set deterministic random draws for the sampler + """ if name not in SamplerFactory.NAME_TO_CLASS: raise SamplerException( name + " sampler is not registered in the SamplerFactory." @@ -92,6 +147,7 @@ def init_sampler_class(name: str, params: Dict[str, Any]): " associated to your sampler in the SamplerFactory." ) sampler_cls = SamplerFactory.NAME_TO_CLASS[name] + params["seed"] = seed try: return sampler_cls(**params) except TypeError: @@ -103,7 +159,13 @@ def init_sampler_class(name: str, params: Dict[str, Any]): class SamplerManager: - def __init__(self, reset_param_dict: Dict[str, Any]) -> None: + def __init__( + self, reset_param_dict: Dict[str, Any], seed: Optional[int] = None + ) -> None: + """ + :param reset_param_dict: Arguments needed for initializing the samplers + :param seed: Random seed to be used for drawing samples from the samplers + """ self.reset_param_dict = reset_param_dict if reset_param_dict else {} assert isinstance(self.reset_param_dict, dict) self.samplers: Dict[str, Sampler] = {} @@ -116,7 +178,7 @@ def __init__(self, reset_param_dict: Dict[str, Any]) -> None: ) sampler_name = cur_param_dict.pop("sampler-type") param_sampler = SamplerFactory.init_sampler_class( - sampler_name, cur_param_dict + sampler_name, cur_param_dict, seed ) self.samplers[param_name] = param_sampler @@ -128,6 +190,10 @@ def is_empty(self) -> bool: return not bool(self.samplers) def sample_all(self) -> Dict[str, float]: + """ + Loop over all samplers and draw a sample from each one for generating + next set of reset parameter values. + """ res = {} for param_name, param_sampler in list(self.samplers.items()): res[param_name] = param_sampler.sample_parameter() diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index c1ca879107..03398c7788 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -91,7 +91,7 @@ def run_training( env = SubprocessEnvManager(env_factory, num_envs) maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env) sampler_manager, resampling_interval = create_sampler_manager( - sampler_file_path, env.reset_parameters + sampler_file_path, env.reset_parameters, run_seed ) # Create controller and begin training. @@ -118,7 +118,7 @@ def run_training( tc.start_learning(env, trainer_config) -def create_sampler_manager(sampler_file_path, env_reset_params): +def create_sampler_manager(sampler_file_path, env_reset_params, run_seed=None): sampler_config = None resample_interval = None if sampler_file_path is not None: @@ -136,7 +136,7 @@ def create_sampler_manager(sampler_file_path, env_reset_params): "Resampling interval was not specified in the sampler file." " Please specify it with the 'resampling-interval' key in the sampler config file." ) - sampler_manager = SamplerManager(sampler_config) + sampler_manager = SamplerManager(sampler_config, run_seed) return sampler_manager, resample_interval From 8f6d0f824551dae57cd7d05c72862f658ef2a96c Mon Sep 17 00:00:00 2001 From: Ervin T Date: Fri, 9 Aug 2019 15:03:28 -0700 Subject: [PATCH 5/8] Fix naming conflict between Curiosity and GAIL (#2406) --- .../reward_signals/curiosity/model.py | 12 +++++------ .../components/reward_signals/gail/model.py | 20 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py index fd60b6306f..009e2663df 100644 --- a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py +++ b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py @@ -42,7 +42,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]: # Create input ops for next (t+1) visual observations. next_visual_input = LearningModel.create_visual_input( self.policy_model.brain.camera_resolutions[i], - name="next_visual_observation_" + str(i), + name="curiosity_next_visual_observation_" + str(i), ) self.next_visual_in.append(next_visual_input) @@ -53,7 +53,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]: self.encoding_size, LearningModel.swish, 1, - "stream_{}_visual_obs_encoder".format(i), + "curiosity_stream_{}_visual_obs_encoder".format(i), False, ) @@ -62,7 +62,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]: self.encoding_size, LearningModel.swish, 1, - "stream_{}_visual_obs_encoder".format(i), + "curiosity_stream_{}_visual_obs_encoder".format(i), True, ) visual_encoders.append(encoded_visual) @@ -80,7 +80,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]: self.next_vector_in = tf.placeholder( shape=[None, self.policy_model.vec_obs_size], dtype=tf.float32, - name="next_vector_observation", + name="curiosity_next_vector_observation", ) encoded_vector_obs = self.policy_model.create_vector_observation_encoder( @@ -88,7 +88,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]: self.encoding_size, LearningModel.swish, 2, - "vector_obs_encoder", + "curiosity_vector_obs_encoder", False, ) encoded_next_vector_obs = self.policy_model.create_vector_observation_encoder( @@ -96,7 +96,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]: self.encoding_size, LearningModel.swish, 2, - "vector_obs_encoder", + "curiosity_vector_obs_encoder", True, ) encoded_state_list.append(encoded_vector_obs) diff --git a/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py b/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py index e26d969da3..c652d3aeac 100644 --- a/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py +++ b/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py @@ -112,7 +112,7 @@ def make_inputs(self) -> None: # Create input ops for next (t+1) visual observations. visual_input = self.policy_model.create_visual_input( self.policy_model.brain.camera_resolutions[i], - name="visual_observation_" + str(i), + name="gail_visual_observation_" + str(i), ) self.expert_visual_in.append(visual_input) @@ -121,7 +121,7 @@ def make_inputs(self) -> None: self.encoding_size, LearningModel.swish, 1, - "stream_{}_visual_obs_encoder".format(i), + "gail_stream_{}_visual_obs_encoder".format(i), False, ) @@ -130,7 +130,7 @@ def make_inputs(self) -> None: self.encoding_size, LearningModel.swish, 1, - "stream_{}_visual_obs_encoder".format(i), + "gail_stream_{}_visual_obs_encoder".format(i), True, ) visual_policy_encoders.append(encoded_policy_visual) @@ -163,7 +163,7 @@ def create_encoder( concat_input, self.h_size, activation=LearningModel.swish, - name="d_hidden_1", + name="gail_d_hidden_1", reuse=reuse, ) @@ -171,7 +171,7 @@ def create_encoder( hidden_1, self.h_size, activation=LearningModel.swish, - name="d_hidden_2", + name="gail_d_hidden_2", reuse=reuse, ) @@ -182,7 +182,7 @@ def create_encoder( hidden_2, self.z_size, reuse=reuse, - name="z_mean", + name="gail_z_mean", kernel_initializer=LearningModel.scaled_init(0.01), ) @@ -198,7 +198,7 @@ def create_encoder( estimate_input, 1, activation=tf.nn.sigmoid, - name="d_estimate", + name="gail_d_estimate", reuse=reuse, ) return estimate, z_mean, concat_input @@ -209,7 +209,7 @@ def create_network(self) -> None: """ if self.use_vail: self.z_sigma = tf.get_variable( - "sigma_vail", + "gail_sigma_vail", self.z_size, dtype=tf.float32, initializer=tf.ones_initializer(), @@ -217,7 +217,7 @@ def create_network(self) -> None: self.z_sigma_sq = self.z_sigma * self.z_sigma self.z_log_sigma_sq = tf.log(self.z_sigma_sq + EPSILON) self.use_noise = tf.placeholder( - shape=[1], dtype=tf.float32, name="NoiseLevel" + shape=[1], dtype=tf.float32, name="gail_NoiseLevel" ) self.expert_estimate, self.z_mean_expert, _ = self.create_encoder( self.encoded_expert, self.expert_action, self.done_expert, reuse=False @@ -229,7 +229,7 @@ def create_network(self) -> None: reuse=True, ) self.discriminator_score = tf.reshape( - self.policy_estimate, [-1], name="GAIL_reward" + self.policy_estimate, [-1], name="gail_reward" ) self.intrinsic_reward = -tf.log(1.0 - self.discriminator_score + EPSILON) From de728e57c785a056823102e425c80f4bbfd6fc51 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 9 Aug 2019 17:03:40 -0700 Subject: [PATCH 6/8] Tick version to 0.9.0a --- gym-unity/setup.py | 4 ++-- ml-agents-envs/setup.py | 2 +- ml-agents/setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gym-unity/setup.py b/gym-unity/setup.py index f5ccf499c3..5f64c6ea76 100755 --- a/gym-unity/setup.py +++ b/gym-unity/setup.py @@ -4,12 +4,12 @@ setup( name="gym_unity", - version="0.4.3", + version="0.4.3a", description="Unity Machine Learning Agents Gym Interface", license="Apache License 2.0", author="Unity Technologies", author_email="ML-Agents@unity3d.com", url="https://github.com/Unity-Technologies/ml-agents", packages=find_packages(), - install_requires=["gym", "mlagents_envs==0.9.0"], + install_requires=["gym", "mlagents_envs==0.9.0a"], ) diff --git a/ml-agents-envs/setup.py b/ml-agents-envs/setup.py index 6edacf1721..9c440ebbcb 100644 --- a/ml-agents-envs/setup.py +++ b/ml-agents-envs/setup.py @@ -5,7 +5,7 @@ setup( name="mlagents_envs", - version="0.9.0", + version="0.9.0a", description="Unity Machine Learning Agents Interface", url="https://github.com/Unity-Technologies/ml-agents", author="Unity Technologies", diff --git a/ml-agents/setup.py b/ml-agents/setup.py index b30f8b4f02..910c52c630 100644 --- a/ml-agents/setup.py +++ b/ml-agents/setup.py @@ -29,7 +29,7 @@ ), zip_safe=False, install_requires=[ - "mlagents_envs==0.9.0", + "mlagents_envs==0.9.0a", "tensorflow>=1.7,<1.8", "Pillow>=4.2.1", "matplotlib", From 518c5eba7eeb35a52774162b94fd19fb7ba4fdb5 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 9 Aug 2019 17:38:04 -0700 Subject: [PATCH 7/8] Re-tick version for pypi --- gym-unity/setup.py | 4 ++-- ml-agents-envs/setup.py | 2 +- ml-agents/setup.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gym-unity/setup.py b/gym-unity/setup.py index 5f64c6ea76..64cb242bd6 100755 --- a/gym-unity/setup.py +++ b/gym-unity/setup.py @@ -4,12 +4,12 @@ setup( name="gym_unity", - version="0.4.3a", + version="0.4.4", description="Unity Machine Learning Agents Gym Interface", license="Apache License 2.0", author="Unity Technologies", author_email="ML-Agents@unity3d.com", url="https://github.com/Unity-Technologies/ml-agents", packages=find_packages(), - install_requires=["gym", "mlagents_envs==0.9.0a"], + install_requires=["gym", "mlagents_envs==0.9.1"], ) diff --git a/ml-agents-envs/setup.py b/ml-agents-envs/setup.py index 9c440ebbcb..06d8131546 100644 --- a/ml-agents-envs/setup.py +++ b/ml-agents-envs/setup.py @@ -5,7 +5,7 @@ setup( name="mlagents_envs", - version="0.9.0a", + version="0.9.1", description="Unity Machine Learning Agents Interface", url="https://github.com/Unity-Technologies/ml-agents", author="Unity Technologies", diff --git a/ml-agents/setup.py b/ml-agents/setup.py index 910c52c630..17174ebe38 100644 --- a/ml-agents/setup.py +++ b/ml-agents/setup.py @@ -10,7 +10,7 @@ setup( name="mlagents", - version="0.9.0a", + version="0.9.1", description="Unity Machine Learning Agents", long_description=long_description, long_description_content_type="text/markdown", @@ -29,7 +29,7 @@ ), zip_safe=False, install_requires=[ - "mlagents_envs==0.9.0a", + "mlagents_envs==0.9.1", "tensorflow>=1.7,<1.8", "Pillow>=4.2.1", "matplotlib", From 807e26033068da8729a55e30c81b3439e107193a Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 9 Aug 2019 18:01:12 -0700 Subject: [PATCH 8/8] Make sure all tests pass on BC --- ml-agents/mlagents/trainers/bc/policy.py | 16 +++------------- ml-agents/mlagents/trainers/bc/trainer.py | 2 +- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/ml-agents/mlagents/trainers/bc/policy.py b/ml-agents/mlagents/trainers/bc/policy.py index 10faf7fab9..2c31a19dec 100644 --- a/ml-agents/mlagents/trainers/bc/policy.py +++ b/ml-agents/mlagents/trainers/bc/policy.py @@ -79,24 +79,14 @@ def update(self, mini_batch, num_sequences): self.model.sequence_length: self.sequence_length, } if self.use_continuous_act: - feed_dict[self.model.true_action] = mini_batch["actions"].reshape( - [-1, self.brain.vector_action_space_size[0]] - ) + feed_dict[self.model.true_action] = mini_batch["actions"] else: - feed_dict[self.model.true_action] = mini_batch["actions"].reshape( - [-1, len(self.brain.vector_action_space_size)] - ) + feed_dict[self.model.true_action] = mini_batch["actions"] feed_dict[self.model.action_masks] = np.ones( (num_sequences, sum(self.brain.vector_action_space_size)) ) if self.use_vec_obs: - apparent_obs_size = ( - self.brain.vector_observation_space_size - * self.brain.num_stacked_vector_observations - ) - feed_dict[self.model.vector_in] = mini_batch["vector_obs"].reshape( - [-1, apparent_obs_size] - ) + feed_dict[self.model.vector_in] = mini_batch["vector_obs"] for i, _ in enumerate(self.model.visual_in): visual_obs = mini_batch["visual_obs%d" % i] feed_dict[self.model.visual_in[i]] = visual_obs diff --git a/ml-agents/mlagents/trainers/bc/trainer.py b/ml-agents/mlagents/trainers/bc/trainer.py index 59da9fa34f..7f53bcacf4 100644 --- a/ml-agents/mlagents/trainers/bc/trainer.py +++ b/ml-agents/mlagents/trainers/bc/trainer.py @@ -124,7 +124,7 @@ def update_policy(self): """ Updates the policy. """ - self.demonstration_buffer.update_buffer.shuffle() + self.demonstration_buffer.update_buffer.shuffle(self.policy.sequence_length) batch_losses = [] num_batches = min( len(self.demonstration_buffer.update_buffer["actions"]) // self.n_sequences,