From ffa50d8201f42b34d23fff27e54bd35d70d46655 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Wed, 24 Jul 2019 18:27:31 -0700 Subject: [PATCH 1/4] Fix default for vis_encode_type (#2330) --- config/trainer_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml index d09c886d14..9a60aefcf4 100644 --- a/config/trainer_config.yaml +++ b/config/trainer_config.yaml @@ -16,7 +16,7 @@ default: sequence_length: 64 summary_freq: 1000 use_recurrent: false - vis_encode_type: default + vis_encode_type: simple reward_signals: extrinsic: strength: 1.0 From 5837c71943dcc430806f866c0759714a9f8f13ec Mon Sep 17 00:00:00 2001 From: Ervin T Date: Wed, 24 Jul 2019 18:32:01 -0700 Subject: [PATCH 2/4] Tick version number for 0.9 (#2331) * Tick versions of gym, ml-agents, ml-agents-envs * Tick communication API to 9 --- UnitySDK/Assets/ML-Agents/Scripts/Academy.cs | 2 +- gym-unity/setup.py | 4 ++-- ml-agents-envs/mlagents/envs/environment.py | 2 +- ml-agents-envs/mlagents/envs/mock_communicator.py | 2 +- ml-agents-envs/setup.py | 2 +- ml-agents/setup.py | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs b/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs index d6cecb4acf..ae66f328cf 100755 --- a/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs @@ -95,7 +95,7 @@ public abstract class Academy : MonoBehaviour [SerializeField] public BroadcastHub broadcastHub = new BroadcastHub(); - private const string kApiVersion = "API-8"; + private const string kApiVersion = "API-9"; /// Temporary storage for global gravity value /// Used to restore oringal value when deriving Academy modifies it diff --git a/gym-unity/setup.py b/gym-unity/setup.py index 92a6bbdc4d..f5ccf499c3 100755 --- a/gym-unity/setup.py +++ b/gym-unity/setup.py @@ -4,12 +4,12 @@ setup( name="gym_unity", - version="0.4.2", + version="0.4.3", description="Unity Machine Learning Agents Gym Interface", license="Apache License 2.0", author="Unity Technologies", author_email="ML-Agents@unity3d.com", url="https://github.com/Unity-Technologies/ml-agents", packages=find_packages(), - install_requires=["gym", "mlagents_envs==0.8.2"], + install_requires=["gym", "mlagents_envs==0.9.0"], ) diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py index e09b7ee9bd..b6acf890cd 100644 --- a/ml-agents-envs/mlagents/envs/environment.py +++ b/ml-agents-envs/mlagents/envs/environment.py @@ -69,7 +69,7 @@ def __init__( atexit.register(self._close) self.port = base_port + worker_id self._buffer_size = 12000 - self._version_ = "API-8" + self._version_ = "API-9" self._loaded = ( False ) # If true, this means the environment was successfully loaded diff --git a/ml-agents-envs/mlagents/envs/mock_communicator.py b/ml-agents-envs/mlagents/envs/mock_communicator.py index 8a7e47bacd..da0cdf7b53 100755 --- a/ml-agents-envs/mlagents/envs/mock_communicator.py +++ b/ml-agents-envs/mlagents/envs/mock_communicator.py @@ -54,7 +54,7 @@ def initialize(self, inputs: UnityInput) -> UnityOutput: is_training=True, ) rl_init = UnityRLInitializationOutput( - name="RealFakeAcademy", version="API-8", log_path="", brain_parameters=[bp] + name="RealFakeAcademy", version="API-9", log_path="", brain_parameters=[bp] ) return UnityOutput(rl_initialization_output=rl_init) diff --git a/ml-agents-envs/setup.py b/ml-agents-envs/setup.py index 7e7fd49864..6edacf1721 100644 --- a/ml-agents-envs/setup.py +++ b/ml-agents-envs/setup.py @@ -5,7 +5,7 @@ setup( name="mlagents_envs", - version="0.8.2", + version="0.9.0", description="Unity Machine Learning Agents Interface", url="https://github.com/Unity-Technologies/ml-agents", author="Unity Technologies", diff --git a/ml-agents/setup.py b/ml-agents/setup.py index 355caa3f2a..c7486140ef 100644 --- a/ml-agents/setup.py +++ b/ml-agents/setup.py @@ -10,7 +10,7 @@ setup( name="mlagents", - version="0.8.2", + version="0.9.0", description="Unity Machine Learning Agents", long_description=long_description, long_description_content_type="text/markdown", @@ -29,7 +29,7 @@ ), zip_safe=False, install_requires=[ - "mlagents_envs==0.8.2", + "mlagents_envs==0.9.0", "tensorflow>=1.7,<1.8", "Pillow>=4.2.1", "matplotlib", From b78c1e0056cd2bdba74edf630473f6c8b2247652 Mon Sep 17 00:00:00 2001 From: Mantas Puida Date: Thu, 25 Jul 2019 04:41:02 +0300 Subject: [PATCH 3/4] Fix tests for Barracuda (#2333) * Removed obsolete 'TestDstWrongShape' test as it does not reflect how Barracuda tensors work * Added proper test cleanup, to avoid warning messages from finalizer thread. --- .../EditModeTestInternalBrainTensorApplier.cs | 9 ++++-- ...ditModeTestInternalBrainTensorGenerator.cs | 32 ++++++++++++------- .../ML-Agents/Editor/Tests/MultinomialTest.cs | 19 ----------- 3 files changed, 27 insertions(+), 33 deletions(-) diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs index f765cbf14c..cddebf13c7 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs @@ -36,8 +36,10 @@ private Dictionary GetFakeAgentInfos() public void Contruction() { var bp = new BrainParameters(); - var tensorGenerator = new TensorApplier(bp, 0, new TensorCachingAllocator()); + var alloc = new TensorCachingAllocator(); + var tensorGenerator = new TensorApplier(bp, 0, alloc); Assert.IsNotNull(tensorGenerator); + alloc.Dispose(); } [Test] @@ -76,8 +78,8 @@ public void ApplyDiscreteActionOutput() 4f, 5f, 6f, 7f, 8f}) }; var agentInfos = GetFakeAgentInfos(); - - var applier = new DiscreteActionOutputApplier(new int[]{2, 3}, 0, new TensorCachingAllocator()); + var alloc = new TensorCachingAllocator(); + var applier = new DiscreteActionOutputApplier(new int[]{2, 3}, 0, alloc); applier.Apply(inputTensor, agentInfos); var agents = agentInfos.Keys.ToList(); var agent = agents[0] as TestAgent; @@ -88,6 +90,7 @@ public void ApplyDiscreteActionOutput() action = agent.GetAction(); Assert.AreEqual(action.vectorActions[0], 1); Assert.AreEqual(action.vectorActions[1], 2); + alloc.Dispose(); } [Test] diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs index 5d47a14890..6ffcd72ed2 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs @@ -44,30 +44,36 @@ private Dictionary GetFakeAgentInfos() public void Contruction() { var bp = new BrainParameters(); - var tensorGenerator = new TensorGenerator(bp, 0, new TensorCachingAllocator()); + var alloc = new TensorCachingAllocator(); + var tensorGenerator = new TensorGenerator(bp, 0, alloc); Assert.IsNotNull(tensorGenerator); + alloc.Dispose(); } [Test] public void GenerateBatchSize() { var inputTensor = new TensorProxy(); + var alloc = new TensorCachingAllocator(); var batchSize = 4; - var generator = new BatchSizeGenerator(new TensorCachingAllocator()); + var generator = new BatchSizeGenerator(alloc); generator.Generate(inputTensor, batchSize, null); Assert.IsNotNull(inputTensor.Data); Assert.AreEqual(inputTensor.Data[0], batchSize); + alloc.Dispose(); } [Test] public void GenerateSequenceLength() { var inputTensor = new TensorProxy(); + var alloc = new TensorCachingAllocator(); var batchSize = 4; - var generator = new SequenceLengthGenerator(new TensorCachingAllocator()); + var generator = new SequenceLengthGenerator(alloc); generator.Generate(inputTensor, batchSize, null); Assert.IsNotNull(inputTensor.Data); Assert.AreEqual(inputTensor.Data[0], 1); + alloc.Dispose(); } [Test] @@ -79,14 +85,15 @@ public void GenerateVectorObservation() }; var batchSize = 4; var agentInfos = GetFakeAgentInfos(); - - var generator = new VectorObservationGenerator(new TensorCachingAllocator()); + var alloc = new TensorCachingAllocator(); + var generator = new VectorObservationGenerator(alloc); generator.Generate(inputTensor, batchSize, agentInfos); Assert.IsNotNull(inputTensor.Data); Assert.AreEqual(inputTensor.Data[0, 0], 1); Assert.AreEqual(inputTensor.Data[0, 2], 3); Assert.AreEqual(inputTensor.Data[1, 0], 4); Assert.AreEqual(inputTensor.Data[1, 2], 6); + alloc.Dispose(); } [Test] @@ -98,14 +105,15 @@ public void GenerateRecurrentInput() }; var batchSize = 4; var agentInfos = GetFakeAgentInfos(); - - var generator = new RecurrentInputGenerator(new TensorCachingAllocator()); + var alloc = new TensorCachingAllocator(); + var generator = new RecurrentInputGenerator(alloc); generator.Generate(inputTensor, batchSize, agentInfos); Assert.IsNotNull(inputTensor.Data); Assert.AreEqual(inputTensor.Data[0, 0], 0); Assert.AreEqual(inputTensor.Data[0, 4], 0); Assert.AreEqual(inputTensor.Data[1, 0], 1); Assert.AreEqual(inputTensor.Data[1, 4], 0); + alloc.Dispose(); } [Test] @@ -119,8 +127,8 @@ public void GeneratePreviousActionInput() }; var batchSize = 4; var agentInfos = GetFakeAgentInfos(); - - var generator = new PreviousActionInputGenerator(new TensorCachingAllocator()); + var alloc = new TensorCachingAllocator(); + var generator = new PreviousActionInputGenerator(alloc); generator.Generate(inputTensor, batchSize, agentInfos); Assert.IsNotNull(inputTensor.Data); @@ -128,6 +136,7 @@ public void GeneratePreviousActionInput() Assert.AreEqual(inputTensor.Data[0, 1], 2); Assert.AreEqual(inputTensor.Data[1, 0], 3); Assert.AreEqual(inputTensor.Data[1, 1], 4); + alloc.Dispose(); } [Test] @@ -141,14 +150,15 @@ public void GenerateActionMaskInput() }; var batchSize = 4; var agentInfos = GetFakeAgentInfos(); - - var generator = new ActionMaskInputGenerator(new TensorCachingAllocator()); + var alloc = new TensorCachingAllocator(); + var generator = new ActionMaskInputGenerator(alloc); generator.Generate(inputTensor, batchSize, agentInfos); Assert.IsNotNull(inputTensor.Data); Assert.AreEqual(inputTensor.Data[0, 0], 1); Assert.AreEqual(inputTensor.Data[0, 4], 1); Assert.AreEqual(inputTensor.Data[1, 0], 0); Assert.AreEqual(inputTensor.Data[1, 4], 1); + alloc.Dispose(); } } } diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/MultinomialTest.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/MultinomialTest.cs index 8b6ed0c7fb..7e7b62e605 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/MultinomialTest.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/MultinomialTest.cs @@ -160,25 +160,6 @@ public void TestDstDataNull() Assert.Throws(() => m.Eval(src, dst)); } - [Test] - public void TestDstWrongShape() - { - Multinomial m = new Multinomial(2018); - - TensorProxy src = new TensorProxy - { - ValueType = TensorProxy.TensorType.FloatingPoint, - Data = new Tensor(0,1) - }; - TensorProxy dst = new TensorProxy - { - ValueType = TensorProxy.TensorType.FloatingPoint, - Data = new Tensor(0,2) - }; - - Assert.Throws(() => m.Eval(src, dst)); - } - [Test] public void TestUnequalBatchSize() { From 7a2a922029fe2845dc38067ed594280f0936373d Mon Sep 17 00:00:00 2001 From: sankalp04 <30798796+sankalp04@users.noreply.github.com> Date: Fri, 26 Jul 2019 10:19:37 -0700 Subject: [PATCH 4/4] Fix docs for Generalization (#2334) * Fix naming conventions for consistency * Add generalization link to ML-Agents Overview * Add generalization to main Readme * Include types of samplers available for use --- README.md | 6 +-- ...alize_test.yaml => 3dball_generalize.yaml} | 0 docs/ML-Agents-Overview.md | 11 +++- docs/Training-Generalization-Learning.md | 51 +++++++++++++++---- docs/Training-ML-Agents.md | 2 +- 5 files changed, 56 insertions(+), 14 deletions(-) rename config/{generalize_test.yaml => 3dball_generalize.yaml} (100%) diff --git a/README.md b/README.md index 906029dece..ddb167655b 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ developer communities. * 10+ sample Unity environments * Support for multiple environment configurations and training scenarios * Train memory-enhanced agents using deep reinforcement learning -* Easily definable Curriculum Learning scenarios +* Easily definable Curriculum Learning and Generalization scenarios * Broadcasting of agent behavior for supervised learning * Built-in support for Imitation Learning * Flexible agent control with On Demand Decision Making @@ -77,11 +77,11 @@ If you run into any problems using the ML-Agents toolkit, [submit an issue](https://github.com/Unity-Technologies/ml-agents/issues) and make sure to include as much detail as possible. -Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents Toolkit can we continue to improve and grow. Please take a few minutes to [let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454). +Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents Toolkit can we continue to improve and grow. Please take a few minutes to [let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454). For any other questions or feedback, connect directly with the ML-Agents -team at ml-agents@unity3d.com. +team at ml-agents@unity3d.com. ## Translations diff --git a/config/generalize_test.yaml b/config/3dball_generalize.yaml similarity index 100% rename from config/generalize_test.yaml rename to config/3dball_generalize.yaml diff --git a/docs/ML-Agents-Overview.md b/docs/ML-Agents-Overview.md index 04224be32c..daeb770745 100644 --- a/docs/ML-Agents-Overview.md +++ b/docs/ML-Agents-Overview.md @@ -320,7 +320,8 @@ actions from the human player to learn a policy. [Video Link](https://youtu.be/kpb8ZkMBFYs). ML-Agents provides ways to both learn directly from demonstrations as well as -use demonstrations to help speed up reward-based training. The +use demonstrations to help speed up reward-based training, and two algorithms to do +so (Generative Adversarial Imitation Learning and Behavioral Cloning). The [Training with Imitation Learning](Training-Imitation-Learning.md) tutorial covers these features in more depth. @@ -421,6 +422,14 @@ training process. the broadcasting feature [here](Learning-Environment-Design-Brains.md#using-the-broadcast-feature). +- **Training with Environment Parameter Sampling** - To train agents to be robust + to changes in its environment (i.e., generalization), the agent should be exposed + to a variety of environment variations. Similarly to Curriculum Learning, which + allows environments to get more difficult as the agent learns, we also provide + a way to randomly resample aspects of the environment during training. See + [Training with Environment Parameter Sampling](Training-Generalization-Learning.md) + to learn more about this feature. + - **Docker Set-up (Experimental)** - To facilitate setting up ML-Agents without installing Python or TensorFlow directly, we provide a [guide](Using-Docker.md) on how to create and run a Docker container. diff --git a/docs/Training-Generalization-Learning.md b/docs/Training-Generalization-Learning.md index 9578a625b9..79dea8da9e 100644 --- a/docs/Training-Generalization-Learning.md +++ b/docs/Training-Generalization-Learning.md @@ -18,8 +18,9 @@ Ball scale of 0.5 | Ball scale of 4 _Variations of the 3D Ball environment._ To vary environments, we first decide what parameters to vary in an -environment. These parameters are known as `Reset Parameters`. In the 3D ball -environment example displayed in the figure above, the reset parameters are `gravity`, `ball_mass` and `ball_scale`. +environment. We call these parameters `Reset Parameters`. In the 3D ball +environment example displayed in the figure above, the reset parameters are +`gravity`, `ball_mass` and `ball_scale`. ## How-to @@ -31,17 +32,17 @@ can be done either deterministically or randomly. This is done by assigning each reset parameter a sampler, which samples a reset parameter value (such as a uniform sampler). If a sampler isn't provided for a reset parameter, the parameter maintains the default value throughout the -training, remaining unchanged. The samplers for all the reset parameters are -handled by a **Sampler Manager**, which also handles the generation of new +training procedure, remaining unchanged. The samplers for all the reset parameters +are handled by a **Sampler Manager**, which also handles the generation of new values for the reset parameters when needed. To setup the Sampler Manager, we setup a YAML file that specifies how we wish to generate new samples. In this file, we specify the samplers and the -`resampling-duration` (number of simulation steps after which reset parameters are +`resampling-interval` (number of simulation steps after which reset parameters are resampled). Below is an example of a sampler file for the 3D ball environment. ```yaml -episode-length: 5000 +resampling-interval: 5000 mass: sampler-type: "uniform" @@ -59,7 +60,7 @@ scale: ``` -* `resampling-duration` (int) - Specifies the number of steps for agent to +* `resampling-interval` (int) - Specifies the number of steps for agent to train under a particular environment configuration before resetting the environment with a new sample of reset parameters. @@ -77,8 +78,40 @@ environment, then this specification will be ignored. key under the `multirange_uniform` sampler for the gravity reset parameter. The key name should match the name of the corresponding argument in the sampler definition. (Look at defining a new sampler method) + The sampler manager allocates a sampler for a reset parameter by using the *Sampler Factory*, which maintains a dictionary mapping of string keys to sampler objects. The available samplers to be used for reset parameter resampling is as available in the Sampler Factory. +#### Possible Sampler Types + +The currently implemented samplers that can be used with the `sampler-type` arguments are: + +* `uniform` - Uniform sampler + * Uniformly samples a single float value between defined endpoints. + The sub-arguments for this sampler to specify the interval + endpoints are as below. The sampling is done in the range of + [`min_value`, `max_value`). + + * **sub-arguments** - `min_value`, `max_value` + +* `gaussian` - Gaussian sampler + * Samples a single float value from the distribution characterized by + the mean and standard deviation. The sub-arguments to specify the + gaussian distribution to use are as below. + + * **sub-arguments** - `mean`, `st_dev` + +* `multirange_uniform` - Multirange Uniform sampler + * Uniformly samples a single float value between the specified intervals. + Samples by first performing a weight pick of an interval from the list + of intervals (weighted based on interval width) and samples uniformly + from the selected interval (half-closed interval, same as the uniform + sampler). This sampler can take an arbitrary number of intervals in a + list in the following format: + [[`interval_1_min`, `interval_1_max`], [`interval_2_min`, `interval_2_max`], ...] + + * **sub-arguments** - `intervals` + + The implementation of the samplers can be found at `ml-agents-envs/mlagents/envs/sampler_class.py`. ### Defining a new sampler method @@ -115,10 +148,10 @@ With the sampler file setup, we can proceed to train our agent as explained in t ### Training with Generalization Learning -We first begin with setting up the sampler file. After the sampler file is defined and configured, we proceed by launching `mlagents-learn` and specify our configured sampler file with the `--sampler` flag. To demonstrate, if we wanted to train a 3D ball agent with generalization using the `config/generalization-test.yaml` sampling setup, we can run +We first begin with setting up the sampler file. After the sampler file is defined and configured, we proceed by launching `mlagents-learn` and specify our configured sampler file with the `--sampler` flag. To demonstrate, if we wanted to train a 3D ball agent with generalization using the `config/3dball_generalize.yaml` sampling setup, we can run ```sh -mlagents-learn config/trainer_config.yaml --sampler=config/generalize_test.yaml --run-id=3D-Ball-generalization --train +mlagents-learn config/trainer_config.yaml --sampler=config/3dball_generalize.yaml --run-id=3D-Ball-generalization --train ``` We can observe progress and metrics via Tensorboard. diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 1c12c32956..a36bfcca71 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -196,7 +196,7 @@ are conducting, see: * [Training with PPO](Training-PPO.md) * [Using Recurrent Neural Networks](Feature-Memory.md) * [Training with Curriculum Learning](Training-Curriculum-Learning.md) -* [Training with Generalization](Training-Generalization-Learning.md) +* [Training with Environment Parameter Sampling](Training-Generalization-Learning.md) * [Training with Imitation Learning](Training-Imitation-Learning.md) You can also compare the