Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ developer communities.
* 10+ sample Unity environments
* Support for multiple environment configurations and training scenarios
* Train memory-enhanced agents using deep reinforcement learning
* Easily definable Curriculum Learning scenarios
* Easily definable Curriculum Learning and Generalization scenarios
* Broadcasting of agent behavior for supervised learning
* Built-in support for Imitation Learning
* Flexible agent control with On Demand Decision Making
Expand Down Expand Up @@ -77,11 +77,11 @@ If you run into any problems using the ML-Agents toolkit,
[submit an issue](https://github.com/Unity-Technologies/ml-agents/issues) and
make sure to include as much detail as possible.

Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents Toolkit can we continue to improve and grow. Please take a few minutes to [let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454).
Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents Toolkit can we continue to improve and grow. Please take a few minutes to [let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454).


For any other questions or feedback, connect directly with the ML-Agents
team at ml-agents@unity3d.com.
team at ml-agents@unity3d.com.

## Translations

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@ private Dictionary<Agent, AgentInfo> GetFakeAgentInfos()
public void Contruction()
{
var bp = new BrainParameters();
var tensorGenerator = new TensorApplier(bp, 0, new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var tensorGenerator = new TensorApplier(bp, 0, alloc);
Assert.IsNotNull(tensorGenerator);
alloc.Dispose();
}

[Test]
Expand Down Expand Up @@ -76,8 +78,8 @@ public void ApplyDiscreteActionOutput()
4f, 5f, 6f, 7f, 8f})
};
var agentInfos = GetFakeAgentInfos();

var applier = new DiscreteActionOutputApplier(new int[]{2, 3}, 0, new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var applier = new DiscreteActionOutputApplier(new int[]{2, 3}, 0, alloc);
applier.Apply(inputTensor, agentInfos);
var agents = agentInfos.Keys.ToList();
var agent = agents[0] as TestAgent;
Expand All @@ -88,6 +90,7 @@ public void ApplyDiscreteActionOutput()
action = agent.GetAction();
Assert.AreEqual(action.vectorActions[0], 1);
Assert.AreEqual(action.vectorActions[1], 2);
alloc.Dispose();
}

[Test]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,30 +44,36 @@ private Dictionary<Agent, AgentInfo> GetFakeAgentInfos()
public void Contruction()
{
var bp = new BrainParameters();
var tensorGenerator = new TensorGenerator(bp, 0, new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var tensorGenerator = new TensorGenerator(bp, 0, alloc);
Assert.IsNotNull(tensorGenerator);
alloc.Dispose();
}

[Test]
public void GenerateBatchSize()
{
var inputTensor = new TensorProxy();
var alloc = new TensorCachingAllocator();
var batchSize = 4;
var generator = new BatchSizeGenerator(new TensorCachingAllocator());
var generator = new BatchSizeGenerator(alloc);
generator.Generate(inputTensor, batchSize, null);
Assert.IsNotNull(inputTensor.Data);
Assert.AreEqual(inputTensor.Data[0], batchSize);
alloc.Dispose();
}

[Test]
public void GenerateSequenceLength()
{
var inputTensor = new TensorProxy();
var alloc = new TensorCachingAllocator();
var batchSize = 4;
var generator = new SequenceLengthGenerator(new TensorCachingAllocator());
var generator = new SequenceLengthGenerator(alloc);
generator.Generate(inputTensor, batchSize, null);
Assert.IsNotNull(inputTensor.Data);
Assert.AreEqual(inputTensor.Data[0], 1);
alloc.Dispose();
}

[Test]
Expand All @@ -79,14 +85,15 @@ public void GenerateVectorObservation()
};
var batchSize = 4;
var agentInfos = GetFakeAgentInfos();

var generator = new VectorObservationGenerator(new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var generator = new VectorObservationGenerator(alloc);
generator.Generate(inputTensor, batchSize, agentInfos);
Assert.IsNotNull(inputTensor.Data);
Assert.AreEqual(inputTensor.Data[0, 0], 1);
Assert.AreEqual(inputTensor.Data[0, 2], 3);
Assert.AreEqual(inputTensor.Data[1, 0], 4);
Assert.AreEqual(inputTensor.Data[1, 2], 6);
alloc.Dispose();
}

[Test]
Expand All @@ -98,14 +105,15 @@ public void GenerateRecurrentInput()
};
var batchSize = 4;
var agentInfos = GetFakeAgentInfos();

var generator = new RecurrentInputGenerator(new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var generator = new RecurrentInputGenerator(alloc);
generator.Generate(inputTensor, batchSize, agentInfos);
Assert.IsNotNull(inputTensor.Data);
Assert.AreEqual(inputTensor.Data[0, 0], 0);
Assert.AreEqual(inputTensor.Data[0, 4], 0);
Assert.AreEqual(inputTensor.Data[1, 0], 1);
Assert.AreEqual(inputTensor.Data[1, 4], 0);
alloc.Dispose();
}

[Test]
Expand All @@ -119,15 +127,16 @@ public void GeneratePreviousActionInput()
};
var batchSize = 4;
var agentInfos = GetFakeAgentInfos();

var generator = new PreviousActionInputGenerator(new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var generator = new PreviousActionInputGenerator(alloc);

generator.Generate(inputTensor, batchSize, agentInfos);
Assert.IsNotNull(inputTensor.Data);
Assert.AreEqual(inputTensor.Data[0, 0], 1);
Assert.AreEqual(inputTensor.Data[0, 1], 2);
Assert.AreEqual(inputTensor.Data[1, 0], 3);
Assert.AreEqual(inputTensor.Data[1, 1], 4);
alloc.Dispose();
}

[Test]
Expand All @@ -141,14 +150,15 @@ public void GenerateActionMaskInput()
};
var batchSize = 4;
var agentInfos = GetFakeAgentInfos();

var generator = new ActionMaskInputGenerator(new TensorCachingAllocator());
var alloc = new TensorCachingAllocator();
var generator = new ActionMaskInputGenerator(alloc);
generator.Generate(inputTensor, batchSize, agentInfos);
Assert.IsNotNull(inputTensor.Data);
Assert.AreEqual(inputTensor.Data[0, 0], 1);
Assert.AreEqual(inputTensor.Data[0, 4], 1);
Assert.AreEqual(inputTensor.Data[1, 0], 0);
Assert.AreEqual(inputTensor.Data[1, 4], 1);
alloc.Dispose();
}
}
}
19 changes: 0 additions & 19 deletions UnitySDK/Assets/ML-Agents/Editor/Tests/MultinomialTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -160,25 +160,6 @@ public void TestDstDataNull()
Assert.Throws<ArgumentNullException>(() => m.Eval(src, dst));
}

[Test]
public void TestDstWrongShape()
{
Multinomial m = new Multinomial(2018);

TensorProxy src = new TensorProxy
{
ValueType = TensorProxy.TensorType.FloatingPoint,
Data = new Tensor(0,1)
};
TensorProxy dst = new TensorProxy
{
ValueType = TensorProxy.TensorType.FloatingPoint,
Data = new Tensor(0,2)
};

Assert.Throws<ArgumentException>(() => m.Eval(src, dst));
}

[Test]
public void TestUnequalBatchSize()
{
Expand Down
2 changes: 1 addition & 1 deletion UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public abstract class Academy : MonoBehaviour
[SerializeField]
public BroadcastHub broadcastHub = new BroadcastHub();

private const string kApiVersion = "API-8";
private const string kApiVersion = "API-9";

/// Temporary storage for global gravity value
/// Used to restore oringal value when deriving Academy modifies it
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion config/trainer_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ default:
sequence_length: 64
summary_freq: 1000
use_recurrent: false
vis_encode_type: default
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
Expand Down
11 changes: 10 additions & 1 deletion docs/ML-Agents-Overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,8 @@ actions from the human player to learn a policy. [Video
Link](https://youtu.be/kpb8ZkMBFYs).

ML-Agents provides ways to both learn directly from demonstrations as well as
use demonstrations to help speed up reward-based training. The
use demonstrations to help speed up reward-based training, and two algorithms to do
so (Generative Adversarial Imitation Learning and Behavioral Cloning). The
[Training with Imitation Learning](Training-Imitation-Learning.md) tutorial
covers these features in more depth.

Expand Down Expand Up @@ -421,6 +422,14 @@ training process.
the broadcasting feature
[here](Learning-Environment-Design-Brains.md#using-the-broadcast-feature).

- **Training with Environment Parameter Sampling** - To train agents to be robust
to changes in its environment (i.e., generalization), the agent should be exposed
to a variety of environment variations. Similarly to Curriculum Learning, which
allows environments to get more difficult as the agent learns, we also provide
a way to randomly resample aspects of the environment during training. See
[Training with Environment Parameter Sampling](Training-Generalization-Learning.md)
to learn more about this feature.

- **Docker Set-up (Experimental)** - To facilitate setting up ML-Agents without
installing Python or TensorFlow directly, we provide a
[guide](Using-Docker.md) on how to create and run a Docker container.
Expand Down
51 changes: 42 additions & 9 deletions docs/Training-Generalization-Learning.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ Ball scale of 0.5 | Ball scale of 4
_Variations of the 3D Ball environment._

To vary environments, we first decide what parameters to vary in an
environment. These parameters are known as `Reset Parameters`. In the 3D ball
environment example displayed in the figure above, the reset parameters are `gravity`, `ball_mass` and `ball_scale`.
environment. We call these parameters `Reset Parameters`. In the 3D ball
environment example displayed in the figure above, the reset parameters are
`gravity`, `ball_mass` and `ball_scale`.


## How-to
Expand All @@ -31,17 +32,17 @@ can be done either deterministically or randomly.
This is done by assigning each reset parameter a sampler, which samples a reset
parameter value (such as a uniform sampler). If a sampler isn't provided for a
reset parameter, the parameter maintains the default value throughout the
training, remaining unchanged. The samplers for all the reset parameters are
handled by a **Sampler Manager**, which also handles the generation of new
training procedure, remaining unchanged. The samplers for all the reset parameters
are handled by a **Sampler Manager**, which also handles the generation of new
values for the reset parameters when needed.

To setup the Sampler Manager, we setup a YAML file that specifies how we wish to
generate new samples. In this file, we specify the samplers and the
`resampling-duration` (number of simulation steps after which reset parameters are
`resampling-interval` (number of simulation steps after which reset parameters are
resampled). Below is an example of a sampler file for the 3D ball environment.

```yaml
episode-length: 5000
resampling-interval: 5000

mass:
sampler-type: "uniform"
Expand All @@ -59,7 +60,7 @@ scale:

```

* `resampling-duration` (int) - Specifies the number of steps for agent to
* `resampling-interval` (int) - Specifies the number of steps for agent to
train under a particular environment configuration before resetting the
environment with a new sample of reset parameters.

Expand All @@ -77,8 +78,40 @@ environment, then this specification will be ignored.
key under the `multirange_uniform` sampler for the gravity reset parameter.
The key name should match the name of the corresponding argument in the sampler definition. (Look at defining a new sampler method)


The sampler manager allocates a sampler for a reset parameter by using the *Sampler Factory*, which maintains a dictionary mapping of string keys to sampler objects. The available samplers to be used for reset parameter resampling is as available in the Sampler Factory.

#### Possible Sampler Types

The currently implemented samplers that can be used with the `sampler-type` arguments are:

* `uniform` - Uniform sampler
* Uniformly samples a single float value between defined endpoints.
The sub-arguments for this sampler to specify the interval
endpoints are as below. The sampling is done in the range of
[`min_value`, `max_value`).

* **sub-arguments** - `min_value`, `max_value`

* `gaussian` - Gaussian sampler
* Samples a single float value from the distribution characterized by
the mean and standard deviation. The sub-arguments to specify the
gaussian distribution to use are as below.

* **sub-arguments** - `mean`, `st_dev`

* `multirange_uniform` - Multirange Uniform sampler
* Uniformly samples a single float value between the specified intervals.
Samples by first performing a weight pick of an interval from the list
of intervals (weighted based on interval width) and samples uniformly
from the selected interval (half-closed interval, same as the uniform
sampler). This sampler can take an arbitrary number of intervals in a
list in the following format:
[[`interval_1_min`, `interval_1_max`], [`interval_2_min`, `interval_2_max`], ...]

* **sub-arguments** - `intervals`


The implementation of the samplers can be found at `ml-agents-envs/mlagents/envs/sampler_class.py`.

### Defining a new sampler method
Expand Down Expand Up @@ -115,10 +148,10 @@ With the sampler file setup, we can proceed to train our agent as explained in t

### Training with Generalization Learning

We first begin with setting up the sampler file. After the sampler file is defined and configured, we proceed by launching `mlagents-learn` and specify our configured sampler file with the `--sampler` flag. To demonstrate, if we wanted to train a 3D ball agent with generalization using the `config/generalization-test.yaml` sampling setup, we can run
We first begin with setting up the sampler file. After the sampler file is defined and configured, we proceed by launching `mlagents-learn` and specify our configured sampler file with the `--sampler` flag. To demonstrate, if we wanted to train a 3D ball agent with generalization using the `config/3dball_generalize.yaml` sampling setup, we can run

```sh
mlagents-learn config/trainer_config.yaml --sampler=config/generalize_test.yaml --run-id=3D-Ball-generalization --train
mlagents-learn config/trainer_config.yaml --sampler=config/3dball_generalize.yaml --run-id=3D-Ball-generalization --train
```

We can observe progress and metrics via Tensorboard.
2 changes: 1 addition & 1 deletion docs/Training-ML-Agents.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ are conducting, see:
* [Training with PPO](Training-PPO.md)
* [Using Recurrent Neural Networks](Feature-Memory.md)
* [Training with Curriculum Learning](Training-Curriculum-Learning.md)
* [Training with Generalization](Training-Generalization-Learning.md)
* [Training with Environment Parameter Sampling](Training-Generalization-Learning.md)
* [Training with Imitation Learning](Training-Imitation-Learning.md)

You can also compare the
Expand Down
4 changes: 2 additions & 2 deletions gym-unity/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@

setup(
name="gym_unity",
version="0.4.2",
version="0.4.3",
description="Unity Machine Learning Agents Gym Interface",
license="Apache License 2.0",
author="Unity Technologies",
author_email="ML-Agents@unity3d.com",
url="https://github.com/Unity-Technologies/ml-agents",
packages=find_packages(),
install_requires=["gym", "mlagents_envs==0.8.2"],
install_requires=["gym", "mlagents_envs==0.9.0"],
)
2 changes: 1 addition & 1 deletion ml-agents-envs/mlagents/envs/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def __init__(
atexit.register(self._close)
self.port = base_port + worker_id
self._buffer_size = 12000
self._version_ = "API-8"
self._version_ = "API-9"
self._loaded = (
False
) # If true, this means the environment was successfully loaded
Expand Down
2 changes: 1 addition & 1 deletion ml-agents-envs/mlagents/envs/mock_communicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def initialize(self, inputs: UnityInput) -> UnityOutput:
is_training=True,
)
rl_init = UnityRLInitializationOutput(
name="RealFakeAcademy", version="API-8", log_path="", brain_parameters=[bp]
name="RealFakeAcademy", version="API-9", log_path="", brain_parameters=[bp]
)
return UnityOutput(rl_initialization_output=rl_init)

Expand Down
2 changes: 1 addition & 1 deletion ml-agents-envs/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="mlagents_envs",
version="0.8.2",
version="0.9.0",
description="Unity Machine Learning Agents Interface",
url="https://github.com/Unity-Technologies/ml-agents",
author="Unity Technologies",
Expand Down
Loading