Skip to content

Commit

Permalink
Remove graph scope (#1205)
Browse files Browse the repository at this point in the history
* initial commit : Only works with PPO balance ball

* Fix for recurrent

* [Fix indentation error]

* Fixed BC

* Remove Dead code

* Addressing comment : Removing dead code

* Fixing the Pytest

* edited comments

* Removing GraphScope from the InternalBrain (#1227)

* Documentation changes for removing graph scope (#1226)

* Documentation changes

* removed the keep checkpoint printing
  • Loading branch information
vincentpierre committed Sep 17, 2018
1 parent 384b11f commit 0855eec
Show file tree
Hide file tree
Showing 18 changed files with 395 additions and 418 deletions.
61 changes: 23 additions & 38 deletions UnitySDK/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ public enum TensorType
/// Modify only in inspector : Reference to the Graph asset
public TextAsset graphModel;

/// Modify only in inspector : If a scope was used when training the model, specify it here
public string graphScope;

[SerializeField]
[Tooltip(
"If your graph takes additional inputs that are fixed (example: noise level) you can specify them here.")]
Expand Down Expand Up @@ -136,40 +133,35 @@ public void InitializeCoreBrain(MLAgents.Batcher brainBatcher)

// TODO: Make this a loop over a dynamic set of graph inputs

if ((graphScope.Length > 1) && (graphScope[graphScope.Length - 1] != '/'))
{
graphScope = graphScope + '/';
}

if (graph[graphScope + BatchSizePlaceholderName] != null)
if (graph[BatchSizePlaceholderName] != null)
{
hasBatchSize = true;
}

if ((graph[graphScope + RecurrentInPlaceholderName] != null) &&
(graph[graphScope + RecurrentOutPlaceholderName] != null))
if ((graph[RecurrentInPlaceholderName] != null) &&
(graph[RecurrentOutPlaceholderName] != null))
{
hasRecurrent = true;
var runner = session.GetRunner();
runner.Fetch(graph[graphScope + "memory_size"][0]);
runner.Fetch(graph["memory_size"][0]);
var networkOutput = runner.Run()[0].GetValue();
memorySize = (int) networkOutput;
}

if (graph[graphScope + VectorObservationPlacholderName] != null)
if (graph[VectorObservationPlacholderName] != null)
{
hasState = true;
}

if (graph[graphScope + PreviousActionPlaceholderName] != null)
if (graph[PreviousActionPlaceholderName] != null)
{
hasPrevAction = true;
}
if (graph[graphScope + "value_estimate"] != null)
if (graph["value_estimate"] != null)
{
hasValueEstimate = true;
}
if (graph[graphScope + ActionMaskPlaceholderName] != null)
if (graph[ActionMaskPlaceholderName] != null)
{
hasMaskedActions = true;
}
Expand Down Expand Up @@ -304,18 +296,18 @@ public void DecideAction(Dictionary<Agent, AgentInfo> agentInfo)
var runner = session.GetRunner();
try
{
runner.Fetch(graph[graphScope + ActionPlaceholderName][0]);
runner.Fetch(graph[ActionPlaceholderName][0]);
}
catch
{
throw new UnityAgentsException(string.Format(
@"The node {0} could not be found. Please make sure the graphScope {1} is correct",
graphScope + ActionPlaceholderName, graphScope));
@"The node {0} could not be found. Please make sure the node name is correct",
ActionPlaceholderName));
}

if (hasBatchSize)
{
runner.AddInput(graph[graphScope + BatchSizePlaceholderName][0], new int[] {currentBatchSize});
runner.AddInput(graph[BatchSizePlaceholderName][0], new int[] {currentBatchSize});
}

foreach (TensorFlowAgentPlaceholder placeholder in graphPlaceholders)
Expand All @@ -324,12 +316,12 @@ public void DecideAction(Dictionary<Agent, AgentInfo> agentInfo)
{
if (placeholder.valueType == TensorFlowAgentPlaceholder.TensorType.FloatingPoint)
{
runner.AddInput(graph[graphScope + placeholder.name][0],
runner.AddInput(graph[placeholder.name][0],
new float[] {Random.Range(placeholder.minValue, placeholder.maxValue)});
}
else if (placeholder.valueType == TensorFlowAgentPlaceholder.TensorType.Integer)
{
runner.AddInput(graph[graphScope + placeholder.name][0],
runner.AddInput(graph[placeholder.name][0],
new int[] {Random.Range((int) placeholder.minValue, (int) placeholder.maxValue + 1)});
}
}
Expand All @@ -338,26 +330,26 @@ public void DecideAction(Dictionary<Agent, AgentInfo> agentInfo)
throw new UnityAgentsException(string.Format(
@"One of the Tensorflow placeholder cound nout be found.
In brain {0}, there are no {1} placeholder named {2}.",
brain.gameObject.name, placeholder.valueType.ToString(), graphScope + placeholder.name));
brain.gameObject.name, placeholder.valueType.ToString(), placeholder.name));
}
}

// Create the state tensor
if (hasState)
{
runner.AddInput(graph[graphScope + VectorObservationPlacholderName][0], inputState);
runner.AddInput(graph[VectorObservationPlacholderName][0], inputState);
}

// Create the previous action tensor
if (hasPrevAction)
{
runner.AddInput(graph[graphScope + PreviousActionPlaceholderName][0], inputPrevAction);
runner.AddInput(graph[PreviousActionPlaceholderName][0], inputPrevAction);
}

// Create the mask action tensor
if (hasMaskedActions)
{
runner.AddInput(graph[graphScope + ActionMaskPlaceholderName][0], maskedActions);
runner.AddInput(graph[ActionMaskPlaceholderName][0], maskedActions);
}

// Create the observation tensors
Expand All @@ -366,20 +358,20 @@ public void DecideAction(Dictionary<Agent, AgentInfo> agentInfo)
obsNumber < brain.brainParameters.cameraResolutions.Length;
obsNumber++)
{
runner.AddInput(graph[graphScope + VisualObservationPlaceholderName[obsNumber]][0],
runner.AddInput(graph[VisualObservationPlaceholderName[obsNumber]][0],
observationMatrixList[obsNumber]);
}

if (hasRecurrent)
{
runner.AddInput(graph[graphScope + "sequence_length"][0], 1);
runner.AddInput(graph[graphScope + RecurrentInPlaceholderName][0], inputOldMemories);
runner.Fetch(graph[graphScope + RecurrentOutPlaceholderName][0]);
runner.AddInput(graph["sequence_length"][0], 1);
runner.AddInput(graph[RecurrentInPlaceholderName][0], inputOldMemories);
runner.Fetch(graph[RecurrentOutPlaceholderName][0]);
}

if (hasValueEstimate)
{
runner.Fetch(graph[graphScope + "value_estimate"][0]);
runner.Fetch(graph["value_estimate"][0]);
}

TFTensor[] networkOutput;
Expand Down Expand Up @@ -504,13 +496,6 @@ public void OnInspector()
{
EditorGUILayout.HelpBox("Please provide a tensorflow graph as a bytes file.", MessageType.Error);
}


graphScope =
EditorGUILayout.TextField(new GUIContent("Graph Scope",
"If you set a scope while training your tensorflow model, " +
"all your placeholder name will have a prefix. You must specify that prefix here."), graphScope);

if (BatchSizePlaceholderName == "")
{
BatchSizePlaceholderName = "batch_size";
Expand Down
2 changes: 1 addition & 1 deletion docs/Basic-Guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,12 +192,12 @@ INFO:mlagents.envs:Hyperparameters for the PPO Trainer of brain Ball3DBrain:
sequence_length: 64
summary_freq: 1000
use_recurrent: False
graph_scope:
summary_path: ./summaries/first-run-0
memory_size: 256
use_curiosity: False
curiosity_strength: 0.01
curiosity_enc_size: 128
model_path: ./models/first-run-0/Ball3DBrain
INFO:mlagents.trainers: first-run-0: Ball3DBrain: Step: 1000. Mean Reward: 1.242. Std of Reward: 0.746. Training.
INFO:mlagents.trainers: first-run-0: Ball3DBrain: Step: 2000. Mean Reward: 1.319. Std of Reward: 0.693. Training.
INFO:mlagents.trainers: first-run-0: Ball3DBrain: Step: 3000. Mean Reward: 1.804. Std of Reward: 1.056. Training.
Expand Down
22 changes: 0 additions & 22 deletions docs/FAQ.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,28 +44,6 @@ C# scripts, then adding the `CoreBrain` back. Make sure your brain is in
Internal mode, your TensorFlowSharp plugin is imported and the
ENABLE_TENSORFLOW flag is set. This fix is only valid locally and unstable.

## Tensorflow epsilon placeholder error

If you have a graph placeholder set in the Internal Brain inspector that is not
present in the TensorFlow graph, you will see some error like this:

```console
UnityAgentsException: One of the TensorFlow placeholder could not be found. In brain <some_brain_name>, there are no FloatingPoint placeholder named <some_placeholder_name>.
```

Solution: Go to all of your Brain object, find `Graph placeholders` and change
its `size` to 0 to remove the `epsilon` placeholder.

Similarly, if you have a graph scope set in the Internal Brain inspector that is
not correctly set, you will see some error like this:

```console
UnityAgentsException: The node <Wrong_Graph_Scope>/action could not be found. Please make sure the graphScope <Wrong_Graph_Scope>/ is correct
```

Solution: Make sure your Graph Scope field matches the corresponding Brain
object name in your Hierarchy Inspector when there are multiple Brains.

## Environment Permission Error

If you directly import your Unity environment without building it in the
Expand Down
5 changes: 0 additions & 5 deletions docs/Learning-Environment-Design-External-Internal-Brains.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,6 @@ which must be set to the .bytes file containing the trained model itself.
Only change the following Internal Brain properties if you have created your own
TensorFlow model and are not using an ML-Agents model:

* `Graph Scope` : If you set a scope while training your TensorFlow model, all
your placeholder name will have a prefix. You must specify that prefix here.
Note that if more than one Brain were set to external during training, you
must give a `Graph Scope` to the Internal Brain corresponding to the name of
the Brain GameObject.
* `Batch Size Node Name` : If the batch size is one of the inputs of your
graph, you must specify the name if the placeholder here. The Brain will make
the batch size equal to the number of Agents connected to the Brain
Expand Down
2 changes: 1 addition & 1 deletion docs/Learning-Environment-Executable.md
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,12 @@ INFO:mlagents.envs:Hyperparameters for the PPO Trainer of brain Ball3DBrain:
sequence_length: 64
summary_freq: 1000
use_recurrent: False
graph_scope:
summary_path: ./summaries/first-run-0
memory_size: 256
use_curiosity: False
curiosity_strength: 0.01
curiosity_enc_size: 128
model_path: ./models/first-run-0/Ball3DBrain
INFO:mlagents.trainers: first-run-0: Ball3DBrain: Step: 1000. Mean Reward: 1.242. Std of Reward: 0.746. Training.
INFO:mlagents.trainers: first-run-0: Ball3DBrain: Step: 2000. Mean Reward: 1.319. Std of Reward: 0.693. Training.
INFO:mlagents.trainers: first-run-0: Ball3DBrain: Step: 3000. Mean Reward: 1.804. Std of Reward: 1.056. Training.
Expand Down
4 changes: 0 additions & 4 deletions docs/Using-TensorFlow-Sharp-in-Unity.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,6 @@ Your model will be saved with the name `your_name_graph.bytes` and will contain
both the graph and associated weights. Note that you must save your graph as a
.bytes file so Unity can load it.

In the Unity Editor, you must specify the names of the nodes used by your graph
in the **Internal** Brain Inspector window. If you used a scope when defining
your graph, specify it in the `Graph Scope` field.

![Internal Brain Inspector](images/internal_brain.png)

See
Expand Down
93 changes: 46 additions & 47 deletions ml-agents/mlagents/trainers/bc/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,52 +5,51 @@

class BehavioralCloningModel(LearningModel):
def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
normalize=False, use_recurrent=False, scope='PPO', seed=0):
with tf.variable_scope(scope):
LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
num_streams = 1
hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
hidden = hidden_streams[0]
self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
if self.use_recurrent:
tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in,
self.sequence_length)
self.memory_out = tf.identity(self.memory_out, name='recurrent_out')
normalize=False, use_recurrent=False, seed=0):
LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
num_streams = 1
hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
hidden = hidden_streams[0]
self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
if self.use_recurrent:
tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in,
self.sequence_length)
self.memory_out = tf.identity(self.memory_out, name='recurrent_out')

if brain.vector_action_space_type == "discrete":
policy_branches = []
for size in self.act_size:
policy_branches.append(
tf.layers.dense(
hidden,
size,
activation=None,
use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)))
self.action_probs = tf.concat(
[tf.nn.softmax(branch) for branch in policy_branches], axis=1, name="action_probs")
self.action_masks = tf.placeholder(shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks")
self.sample_action_float, _ = self.create_discrete_action_masking_layer(
tf.concat(policy_branches, axis = 1), self.action_masks, self.act_size)
self.sample_action_float = tf.identity(self.sample_action_float, name="action")
self.sample_action = tf.cast(self.sample_action_float, tf.int32)
self.true_action = tf.placeholder(shape=[None, len(policy_branches)], dtype=tf.int32, name="teacher_action")
self.action_oh = tf.concat([
tf.one_hot(self.true_action[:, i], self.act_size[i]) for i in range(len(self.act_size))], axis=1)
self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
self.action_percent = tf.reduce_mean(tf.cast(
tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32))
else:
self.policy = tf.layers.dense(hidden_reg, self.act_size[0], activation=None, use_bias=False, name='pre_action',
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
self.clipped_sample_action = tf.clip_by_value(self.policy, -1, 1)
self.sample_action = tf.identity(self.clipped_sample_action, name="action")
self.true_action = tf.placeholder(shape=[None, self.act_size[0]], dtype=tf.float32, name="teacher_action")
self.clipped_true_action = tf.clip_by_value(self.true_action, -1, 1)
self.loss = tf.reduce_sum(tf.squared_difference(self.clipped_true_action, self.sample_action))
if brain.vector_action_space_type == "discrete":
policy_branches = []
for size in self.act_size:
policy_branches.append(
tf.layers.dense(
hidden,
size,
activation=None,
use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)))
self.action_probs = tf.concat(
[tf.nn.softmax(branch) for branch in policy_branches], axis=1, name="action_probs")
self.action_masks = tf.placeholder(shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks")
self.sample_action_float, _ = self.create_discrete_action_masking_layer(
tf.concat(policy_branches, axis = 1), self.action_masks, self.act_size)
self.sample_action_float = tf.identity(self.sample_action_float, name="action")
self.sample_action = tf.cast(self.sample_action_float, tf.int32)
self.true_action = tf.placeholder(shape=[None, len(policy_branches)], dtype=tf.int32, name="teacher_action")
self.action_oh = tf.concat([
tf.one_hot(self.true_action[:, i], self.act_size[i]) for i in range(len(self.act_size))], axis=1)
self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
self.action_percent = tf.reduce_mean(tf.cast(
tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32))
else:
self.policy = tf.layers.dense(hidden_reg, self.act_size[0], activation=None, use_bias=False, name='pre_action',
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
self.clipped_sample_action = tf.clip_by_value(self.policy, -1, 1)
self.sample_action = tf.identity(self.clipped_sample_action, name="action")
self.true_action = tf.placeholder(shape=[None, self.act_size[0]], dtype=tf.float32, name="teacher_action")
self.clipped_true_action = tf.clip_by_value(self.true_action, -1, 1)
self.loss = tf.reduce_sum(tf.squared_difference(self.clipped_true_action, self.sample_action))

optimizer = tf.train.AdamOptimizer(learning_rate=lr)
self.update = optimizer.minimize(self.loss)
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
self.update = optimizer.minimize(self.loss)

0 comments on commit 0855eec

Please sign in to comment.