Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/Learning-Environment-Design.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ To create a training environment, extend the Academy and Agent classes to implem

To train and use ML-Agents in a Unity scene, the scene must contain a single Academy subclass along with as many Brain objects and Agent subclasses as you need. Any Brain instances in the scene must be attached to GameObjects that are children of the Academy in the Unity Scene Hierarchy. Agent instances should be attached to the GameObject representing that agent.

[Screenshot of scene hierarchy]
![Scene Hierarchy](images/scene-hierarchy.png)

You must assign a brain to every agent, but you can share brains between multiple agents. Each agent will make its own observations and act independently, but will use the same decision-making logic and, for **Internal** brains, the same trained TensorFlow model.

Expand Down
2 changes: 1 addition & 1 deletion docs/Learning-Environment-Examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ If you would like to contribute environments, please see our
* -0.0005 for every step.
* +1.0 if the agent touches the goal.
* -1.0 if the agent falls off the platform.
* Brains: One brain with the following observation/action space.
* Brains: Two brains, each with the following observation/action space.
* Vector Observation space: (Continuous) 16 variables corresponding to position and velocities of agent, block, and goal, plus the height of the wall.
* Vector Action space: (Discrete) Size of 74, corresponding to 14 raycasts each detecting 4 possible objects. plus the global position of the agent and whether or not the agent is grounded.
* Visual Observations: None.
Expand Down
4 changes: 3 additions & 1 deletion python/unitytrainers/bc/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,

if brain.vector_action_space_type == "discrete":
self.action_probs = tf.nn.softmax(self.policy)
self.sample_action = tf.cast(tf.multinomial(self.policy, 1, name="action"), tf.int32)
self.sample_action_float = tf.multinomial(self.policy, 1)
self.sample_action_float = tf.identity(self.sample_action_float, name="action")
self.sample_action = tf.cast(self.sample_action_float, tf.int32)
self.true_action = tf.placeholder(shape=[None], dtype=tf.int32, name="teacher_action")
self.action_oh = tf.one_hot(self.true_action, self.a_size)
self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
Expand Down
15 changes: 10 additions & 5 deletions python/unitytrainers/bc/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
self.stats = {'losses': [], 'episode_length': [], 'cumulative_reward': []}

self.training_buffer = Buffer()
self.is_continuous = (env.brains[brain_name].vector_action_space_type == "continuous")
self.is_continuous_action = (env.brains[brain_name].vector_action_space_type == "continuous")
self.is_continuous_observation = (env.brains[brain_name].vector_observation_space_type == "continuous")
self.use_observations = (env.brains[brain_name].number_visual_observations > 0)
if self.use_observations:
logger.info('Cannot use observations with imitation learning')
Expand Down Expand Up @@ -286,12 +287,16 @@ def update_model(self):
end = (j + 1) * self.n_sequences
batch_states = np.array(_buffer['vector_observations'][start:end])
batch_actions = np.array(_buffer['actions'][start:end])
feed_dict = {self.model.true_action: batch_actions.reshape([-1, self.brain.vector_action_space_size]),
self.model.dropout_rate: 0.5,

feed_dict = {self.model.dropout_rate: 0.5,
self.model.batch_size: self.n_sequences,
self.model.sequence_length: self.sequence_length}
if not self.is_continuous:
feed_dict[self.model.vector_in] = batch_states.reshape([-1, 1])
if self.is_continuous_action:
feed_dict[self.model.true_action] = batch_actions.reshape([-1, self.brain.vector_action_space_size])
else:
feed_dict[self.model.true_action] = batch_actions.reshape([-1])
if not self.is_continuous_observation:
feed_dict[self.model.vector_in] = batch_states.reshape([-1, self.brain.num_stacked_vector_observations])
else:
feed_dict[self.model.vector_in] = batch_states.reshape([-1, self.brain.vector_observation_space_size *
self.brain.num_stacked_vector_observations])
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,35 @@ public void MoveAgent(float[] act)
Vector3 dirToGo = Vector3.zero;
Vector3 rotateDir = Vector3.zero;


if (!frozen)
{
dirToGo = transform.forward * Mathf.Clamp(act[0], -1f, 1f);
rotateDir = transform.up * Mathf.Clamp(act[1], -1f, 1f);
if (Mathf.Clamp(act[2], 0f, 1f) > 0.5f)
bool shootCommand = false;
if (brain.brainParameters.vectorActionSpaceType == SpaceType.continuous)
{
dirToGo = transform.forward * Mathf.Clamp(act[0], -1f, 1f);
rotateDir = transform.up * Mathf.Clamp(act[1], -1f, 1f);
shootCommand = Mathf.Clamp(act[2], 0f, 1f) > 0.5f;
}
else
{
switch ((int)(act[0]))
{
case 1:
dirToGo = transform.forward;
break;
case 2:
shootCommand = true;
break;
case 3:
rotateDir = -transform.up;
break;
case 4:
rotateDir = transform.up;
break;
}
}
if (shootCommand)
{
shoot = true;
dirToGo *= 0.5f;
Expand Down Expand Up @@ -121,9 +145,9 @@ public void MoveAgent(float[] act)
myLaser.transform.localScale = new Vector3(0f, 0f, 0f);

}

}


void Freeze()
{
gameObject.tag = "frozenAgent";
Expand Down Expand Up @@ -182,8 +206,8 @@ public override void AgentReset()
agentRB.velocity = Vector3.zero;
bananas = 0;
myLaser.transform.localScale = new Vector3(0f, 0f, 0f);
transform.position = new Vector3(Random.Range(-myArea.range, myArea.range),
2f, Random.Range(-myArea.range, myArea.range))
transform.position = new Vector3(Random.Range(-myArea.range, myArea.range),
2f, Random.Range(-myArea.range, myArea.range))
+ area.transform.position;
transform.rotation = Quaternion.Euler(new Vector3(0f, Random.Range(0, 360)));
}
Expand Down
Binary file not shown.
Binary file not shown.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ public void OnInspector()
pixels = 1;
else
pixels = 3;
float[,,,] result = new float[batchSize, width, height, pixels];
float[,,,] result = new float[batchSize, height, width, pixels];

for (int b = 0; b < batchSize; b++)
{
Expand Down
38 changes: 1 addition & 37 deletions unity-environment/ProjectSettings/EditorBuildSettings.asset
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,4 @@
EditorBuildSettings:
m_ObjectHideFlags: 0
serializedVersion: 2
m_Scenes:
- enabled: 0
path: Assets/ML-Agents/Examples/GridWorld/GridWorld.unity
guid: 7c777442467e245108558a5155153927
- enabled: 0
path: Assets/ML-Agents/Examples/Tennis/Tennis.unity
guid: 25c0c9e81e55c4e129e1a5c0ac254100
- enabled: 0
path: Assets/ML-Agents/Examples/Banana/BananaImitation.unity
guid: 3ae10073cde7641f488ef7c87862333a
- enabled: 0
path: Assets/ML-Agents/Examples/PushBlock/Scenes/PushBlock.unity
guid: ae8cc75939e3e4d07a79c8c6a08b54f4
- enabled: 0
path: Assets/ML-Agents/Examples/3DBall/3DScene.unity
guid: 6f62a2ccb3830437ea4e85a617e856b3
- enabled: 0
path: Assets/ML-Agents/Examples/3DBall/3DHardScene.unity
guid: 35c41099ceec44889bdbe95ed86c97ac
- enabled: 0
path: Assets/ML-Agents/Examples/Banana/BananaRL.unity
guid: 11583205ab5b74bb4bb1b9951cf9e437
- enabled: 0
path: Assets/ML-Agents/Examples/Basic/Scene.unity
guid: cf1d119a8748d406e90ecb623b45f92f
- enabled: 0
path: Assets/ML-Agents/Examples/Bouncer/Bouncer.unity
guid: 2c29359d4c9fe49219b21cd83e246596
- enabled: 0
path: Assets/ML-Agents/Examples/Crawler/Crawler.unity
guid: 4cf841b0478fb4b33971627b40c6420b
- enabled: 0
path: Assets/ML-Agents/Examples/Hallway/Scenes/Hallway.unity
guid: d6d6a33ed0e18459a8d61817d600978a
- enabled: 0
path: Assets/ML-Agents/Examples/Reacher/Scene.unity
guid: e58a3c10c43de4b6b91b7149838d1dfb
m_configObjects: {}