Remove graph scope (#1205)

* initial commit : Only works with PPO balance ball * Fix for recurrent * [Fix indentation error] * Fixed BC * Remove Dead code * Addressing comment : Removing dead code * Fixing the Pytest * edited comments * Removing GraphScope from the InternalBrain (#1227) * Documentation changes for removing graph scope (#1226) * Documentation changes * removed the keep checkpoint printing
Unity-Technologies · Sep 17, 2018 · 0855eec · 0855eec
1 parent 384b11f
commit 0855eec
Show file tree

Hide file tree

Showing 18 changed files with 395 additions and 418 deletions.
diff --git a/UnitySDK/Assets/ML-Agents/Scripts/CoreBrainInternal.cs b/UnitySDK/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
@@ -41,9 +41,6 @@ public enum TensorType
         /// Modify only in inspector : Reference to the Graph asset
         public TextAsset graphModel;
 
-        /// Modify only in inspector : If a scope was used when training the model, specify it here
-        public string graphScope;
-
         [SerializeField]
         [Tooltip(
             "If your graph takes additional inputs that are fixed (example: noise level) you can specify them here.")]
@@ -136,40 +133,35 @@ public void InitializeCoreBrain(MLAgents.Batcher brainBatcher)
 
                 // TODO: Make this a loop over a dynamic set of graph inputs
 
-                if ((graphScope.Length > 1) && (graphScope[graphScope.Length - 1] != '/'))
-                {
-                    graphScope = graphScope + '/';
-                }
-
-                if (graph[graphScope + BatchSizePlaceholderName] != null)
+                if (graph[BatchSizePlaceholderName] != null)
                 {
                     hasBatchSize = true;
                 }
 
-                if ((graph[graphScope + RecurrentInPlaceholderName] != null) &&
-                    (graph[graphScope + RecurrentOutPlaceholderName] != null))
+                if ((graph[RecurrentInPlaceholderName] != null) &&
+                    (graph[RecurrentOutPlaceholderName] != null))
                 {
                     hasRecurrent = true;
                     var runner = session.GetRunner();
-                    runner.Fetch(graph[graphScope + "memory_size"][0]);
+                    runner.Fetch(graph["memory_size"][0]);
                     var networkOutput = runner.Run()[0].GetValue();
                     memorySize = (int) networkOutput;
                 }
 
-                if (graph[graphScope + VectorObservationPlacholderName] != null)
+                if (graph[VectorObservationPlacholderName] != null)
                 {
                     hasState = true;
                 }
 
-                if (graph[graphScope + PreviousActionPlaceholderName] != null)
+                if (graph[PreviousActionPlaceholderName] != null)
                 {
                     hasPrevAction = true;
                 }
-                if (graph[graphScope + "value_estimate"] != null)
+                if (graph["value_estimate"] != null)
                 {
                     hasValueEstimate = true;
                 }
-                if (graph[graphScope + ActionMaskPlaceholderName] != null)
+                if (graph[ActionMaskPlaceholderName] != null)
                 {
                     hasMaskedActions = true;
                 }
@@ -304,18 +296,18 @@ public void DecideAction(Dictionary<Agent, AgentInfo> agentInfo)
             var runner = session.GetRunner();
             try
             {
-                runner.Fetch(graph[graphScope + ActionPlaceholderName][0]);
+                runner.Fetch(graph[ActionPlaceholderName][0]);
             }
             catch
             {
                 throw new UnityAgentsException(string.Format(
-                    @"The node {0} could not be found. Please make sure the graphScope {1} is correct",
-                    graphScope + ActionPlaceholderName, graphScope));
+                    @"The node {0} could not be found. Please make sure the node name is correct",
+                    ActionPlaceholderName));
             }
 
             if (hasBatchSize)
             {
-                runner.AddInput(graph[graphScope + BatchSizePlaceholderName][0], new int[] {currentBatchSize});
+                runner.AddInput(graph[BatchSizePlaceholderName][0], new int[] {currentBatchSize});
             }
 
             foreach (TensorFlowAgentPlaceholder placeholder in graphPlaceholders)
@@ -324,12 +316,12 @@ public void DecideAction(Dictionary<Agent, AgentInfo> agentInfo)
                 {
                     if (placeholder.valueType == TensorFlowAgentPlaceholder.TensorType.FloatingPoint)
                     {
-                        runner.AddInput(graph[graphScope + placeholder.name][0],
+                        runner.AddInput(graph[placeholder.name][0],
                             new float[] {Random.Range(placeholder.minValue, placeholder.maxValue)});
                     }
                     else if (placeholder.valueType == TensorFlowAgentPlaceholder.TensorType.Integer)
                     {
-                        runner.AddInput(graph[graphScope + placeholder.name][0],
+                        runner.AddInput(graph[placeholder.name][0],
                             new int[] {Random.Range((int) placeholder.minValue, (int) placeholder.maxValue + 1)});
                     }
                 }
@@ -338,26 +330,26 @@ public void DecideAction(Dictionary<Agent, AgentInfo> agentInfo)
                     throw new UnityAgentsException(string.Format(
                         @"One of the Tensorflow placeholder cound nout be found.
                 In brain {0}, there are no {1} placeholder named {2}.",
-                        brain.gameObject.name, placeholder.valueType.ToString(), graphScope + placeholder.name));
+                        brain.gameObject.name, placeholder.valueType.ToString(), placeholder.name));
                 }
             }
 
             // Create the state tensor
             if (hasState)
             {
-                runner.AddInput(graph[graphScope + VectorObservationPlacholderName][0], inputState);
+                runner.AddInput(graph[VectorObservationPlacholderName][0], inputState);
             }
 
             // Create the previous action tensor
             if (hasPrevAction)
             {
-                runner.AddInput(graph[graphScope + PreviousActionPlaceholderName][0], inputPrevAction);
+                runner.AddInput(graph[PreviousActionPlaceholderName][0], inputPrevAction);
             }
 
             // Create the mask action tensor
             if (hasMaskedActions)
             {
-                runner.AddInput(graph[graphScope + ActionMaskPlaceholderName][0], maskedActions);
+                runner.AddInput(graph[ActionMaskPlaceholderName][0], maskedActions);
             }
 
             // Create the observation tensors
@@ -366,20 +358,20 @@ public void DecideAction(Dictionary<Agent, AgentInfo> agentInfo)
                 obsNumber < brain.brainParameters.cameraResolutions.Length;
                 obsNumber++)
             {
-                runner.AddInput(graph[graphScope + VisualObservationPlaceholderName[obsNumber]][0],
+                runner.AddInput(graph[VisualObservationPlaceholderName[obsNumber]][0],
                     observationMatrixList[obsNumber]);
             }
 
             if (hasRecurrent)
             {
-                runner.AddInput(graph[graphScope + "sequence_length"][0], 1);
-                runner.AddInput(graph[graphScope + RecurrentInPlaceholderName][0], inputOldMemories);
-                runner.Fetch(graph[graphScope + RecurrentOutPlaceholderName][0]);
+                runner.AddInput(graph["sequence_length"][0], 1);
+                runner.AddInput(graph[RecurrentInPlaceholderName][0], inputOldMemories);
+                runner.Fetch(graph[RecurrentOutPlaceholderName][0]);
             }
 
             if (hasValueEstimate)
             {
-                runner.Fetch(graph[graphScope + "value_estimate"][0]);
+                runner.Fetch(graph["value_estimate"][0]);
             }
 
             TFTensor[] networkOutput;
@@ -504,13 +496,6 @@ public void OnInspector()
             {
                 EditorGUILayout.HelpBox("Please provide a tensorflow graph as a bytes file.", MessageType.Error);
             }
-
-
-            graphScope =
-                EditorGUILayout.TextField(new GUIContent("Graph Scope",
-                    "If you set a scope while training your tensorflow model, " +
-                    "all your placeholder name will have a prefix. You must specify that prefix here."), graphScope);
-
             if (BatchSizePlaceholderName == "")
             {
                 BatchSizePlaceholderName = "batch_size";

diff --git a/docs/Basic-Guide.md b/docs/Basic-Guide.md
@@ -192,12 +192,12 @@ INFO:mlagents.envs:Hyperparameters for the PPO Trainer of brain Ball3DBrain:
         sequence_length:     64
         summary_freq:        1000
         use_recurrent:       False
-        graph_scope:
         summary_path:        ./summaries/first-run-0
         memory_size:         256
         use_curiosity:       False
         curiosity_strength:  0.01
         curiosity_enc_size:  128
+        model_path:	./models/first-run-0/Ball3DBrain
 INFO:mlagents.trainers: first-run-0: Ball3DBrain: Step: 1000. Mean Reward: 1.242. Std of Reward: 0.746. Training.
 INFO:mlagents.trainers: first-run-0: Ball3DBrain: Step: 2000. Mean Reward: 1.319. Std of Reward: 0.693. Training.
 INFO:mlagents.trainers: first-run-0: Ball3DBrain: Step: 3000. Mean Reward: 1.804. Std of Reward: 1.056. Training.

diff --git a/docs/FAQ.md b/docs/FAQ.md
@@ -44,28 +44,6 @@ C# scripts, then adding the `CoreBrain` back. Make sure your brain is in
 Internal mode, your TensorFlowSharp plugin is imported and the
 ENABLE_TENSORFLOW flag is set. This fix is only valid locally and unstable.
 
-## Tensorflow epsilon placeholder error
-
-If you have a graph placeholder set in the Internal Brain inspector that is not
-present in the TensorFlow graph, you will see some error like this:
-
-```console
-UnityAgentsException: One of the TensorFlow placeholder could not be found. In brain <some_brain_name>, there are no FloatingPoint placeholder named <some_placeholder_name>.
-```
-
-Solution: Go to all of your Brain object, find `Graph placeholders` and change
-its `size` to 0 to remove the `epsilon` placeholder.
-
-Similarly, if you have a graph scope set in the Internal Brain inspector that is
-not correctly set, you will see some error like this:
-
-```console
-UnityAgentsException: The node <Wrong_Graph_Scope>/action could not be found. Please make sure the graphScope <Wrong_Graph_Scope>/ is correct
-```
-
-Solution: Make sure your Graph Scope field matches the corresponding Brain
-object name in your Hierarchy Inspector when there are multiple Brains.
-
 ## Environment Permission Error
 
 If you directly import your Unity environment without building it in the

diff --git a/docs/Learning-Environment-Design-External-Internal-Brains.md b/docs/Learning-Environment-Design-External-Internal-Brains.md
@@ -81,11 +81,6 @@ which must be set to the .bytes file containing the trained model itself.
 Only change the following Internal Brain properties if you have created your own
 TensorFlow model and are not using an ML-Agents model:
 
-* `Graph Scope` : If you set a scope while training your TensorFlow model, all
-  your placeholder name will have a prefix. You must specify that prefix here.
-  Note that if more than one Brain were set to external during training, you
-  must give a `Graph Scope` to the Internal Brain corresponding to the name of
-  the Brain GameObject.
 * `Batch Size Node Name` : If the batch size is one of the inputs of your
   graph, you must specify the name if the placeholder here. The Brain will make
   the batch size equal to the number of Agents connected to the Brain

diff --git a/docs/Learning-Environment-Executable.md b/docs/Learning-Environment-Executable.md
@@ -185,12 +185,12 @@ INFO:mlagents.envs:Hyperparameters for the PPO Trainer of brain Ball3DBrain:
         sequence_length:     64
         summary_freq:        1000
         use_recurrent:       False
-        graph_scope:
         summary_path:        ./summaries/first-run-0
         memory_size:         256
         use_curiosity:       False
         curiosity_strength:  0.01
         curiosity_enc_size:  128
+        model_path:	./models/first-run-0/Ball3DBrain
 INFO:mlagents.trainers: first-run-0: Ball3DBrain: Step: 1000. Mean Reward: 1.242. Std of Reward: 0.746. Training.
 INFO:mlagents.trainers: first-run-0: Ball3DBrain: Step: 2000. Mean Reward: 1.319. Std of Reward: 0.693. Training.
 INFO:mlagents.trainers: first-run-0: Ball3DBrain: Step: 3000. Mean Reward: 1.804. Std of Reward: 1.056. Training.

diff --git a/docs/Using-TensorFlow-Sharp-in-Unity.md b/docs/Using-TensorFlow-Sharp-in-Unity.md
@@ -93,10 +93,6 @@ Your model will be saved with the name `your_name_graph.bytes` and will contain
 both the graph and associated weights. Note that you must save your graph as a
 .bytes file so Unity can load it.
 
-In the Unity Editor, you must specify the names of the nodes used by your graph
-in the **Internal** Brain Inspector window. If you used a scope when defining
-your graph, specify it in the `Graph Scope` field.
-
 ![Internal Brain Inspector](images/internal_brain.png)
 
 See

diff --git a/ml-agents/mlagents/trainers/bc/models.py b/ml-agents/mlagents/trainers/bc/models.py
@@ -5,52 +5,51 @@
 
 class BehavioralCloningModel(LearningModel):
     def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
-                 normalize=False, use_recurrent=False, scope='PPO', seed=0):
-        with tf.variable_scope(scope):
-            LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
-            num_streams = 1
-            hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
-            hidden = hidden_streams[0]
-            self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
-            hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
-            if self.use_recurrent:
-                tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
-                self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
-                hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in,
-                                                                            self.sequence_length)
-                self.memory_out = tf.identity(self.memory_out, name='recurrent_out')
+                 normalize=False, use_recurrent=False, seed=0):
+        LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
+        num_streams = 1
+        hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
+        hidden = hidden_streams[0]
+        self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
+        hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
+        if self.use_recurrent:
+            tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
+            self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
+            hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in,
+                                                                        self.sequence_length)
+            self.memory_out = tf.identity(self.memory_out, name='recurrent_out')
 
-            if brain.vector_action_space_type == "discrete":
-                policy_branches = []
-                for size in self.act_size:
-                    policy_branches.append(
-                        tf.layers.dense(
-                            hidden,
-                            size,
-                            activation=None,
-                            use_bias=False,
-                            kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)))
-                self.action_probs = tf.concat(
-                    [tf.nn.softmax(branch) for branch in policy_branches], axis=1, name="action_probs")
-                self.action_masks = tf.placeholder(shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks")
-                self.sample_action_float, _ = self.create_discrete_action_masking_layer(
-                    tf.concat(policy_branches, axis = 1), self.action_masks, self.act_size)
-                self.sample_action_float = tf.identity(self.sample_action_float, name="action")
-                self.sample_action = tf.cast(self.sample_action_float, tf.int32)
-                self.true_action = tf.placeholder(shape=[None, len(policy_branches)], dtype=tf.int32, name="teacher_action")
-                self.action_oh = tf.concat([
-                    tf.one_hot(self.true_action[:, i], self.act_size[i]) for i in range(len(self.act_size))], axis=1)
-                self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
-                self.action_percent = tf.reduce_mean(tf.cast(
-                    tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32))
-            else:
-                self.policy = tf.layers.dense(hidden_reg, self.act_size[0], activation=None, use_bias=False, name='pre_action',
-                                              kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
-                self.clipped_sample_action = tf.clip_by_value(self.policy, -1, 1)
-                self.sample_action = tf.identity(self.clipped_sample_action, name="action")
-                self.true_action = tf.placeholder(shape=[None, self.act_size[0]], dtype=tf.float32, name="teacher_action")
-                self.clipped_true_action = tf.clip_by_value(self.true_action, -1, 1)
-                self.loss = tf.reduce_sum(tf.squared_difference(self.clipped_true_action, self.sample_action))
+        if brain.vector_action_space_type == "discrete":
+            policy_branches = []
+            for size in self.act_size:
+                policy_branches.append(
+                    tf.layers.dense(
+                        hidden,
+                        size,
+                        activation=None,
+                        use_bias=False,
+                        kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)))
+            self.action_probs = tf.concat(
+                [tf.nn.softmax(branch) for branch in policy_branches], axis=1, name="action_probs")
+            self.action_masks = tf.placeholder(shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks")
+            self.sample_action_float, _ = self.create_discrete_action_masking_layer(
+                tf.concat(policy_branches, axis = 1), self.action_masks, self.act_size)
+            self.sample_action_float = tf.identity(self.sample_action_float, name="action")
+            self.sample_action = tf.cast(self.sample_action_float, tf.int32)
+            self.true_action = tf.placeholder(shape=[None, len(policy_branches)], dtype=tf.int32, name="teacher_action")
+            self.action_oh = tf.concat([
+                tf.one_hot(self.true_action[:, i], self.act_size[i]) for i in range(len(self.act_size))], axis=1)
+            self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)
+            self.action_percent = tf.reduce_mean(tf.cast(
+                tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32))
+        else:
+            self.policy = tf.layers.dense(hidden_reg, self.act_size[0], activation=None, use_bias=False, name='pre_action',
+                                          kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
+            self.clipped_sample_action = tf.clip_by_value(self.policy, -1, 1)
+            self.sample_action = tf.identity(self.clipped_sample_action, name="action")
+            self.true_action = tf.placeholder(shape=[None, self.act_size[0]], dtype=tf.float32, name="teacher_action")
+            self.clipped_true_action = tf.clip_by_value(self.true_action, -1, 1)
+            self.loss = tf.reduce_sum(tf.squared_difference(self.clipped_true_action, self.sample_action))
 
-            optimizer = tf.train.AdamOptimizer(learning_rate=lr)
-            self.update = optimizer.minimize(self.loss)
+        optimizer = tf.train.AdamOptimizer(learning_rate=lr)
+        self.update = optimizer.minimize(self.loss)