Unity-Technologies · eshvk · Mar 22, 2018 · Mar 18, 2018 · Mar 19, 2018 · Mar 20, 2018
diff --git a/docs/Learning-Environment-Design.md b/docs/Learning-Environment-Design.md
@@ -31,7 +31,7 @@ To create a training environment, extend the Academy and Agent classes to implem
 
 To train and use ML-Agents in a Unity scene, the scene must contain a single Academy subclass along with as many Brain objects and Agent subclasses as you need. Any Brain instances in the scene must be attached to GameObjects that are children of the Academy in the Unity Scene Hierarchy. Agent instances should be attached to the GameObject representing that agent.
 
-[Screenshot of scene hierarchy]
+![Scene Hierarchy](images/scene-hierarchy.png)
 
 You must assign a brain to every agent, but you can share brains between multiple agents. Each agent will make its own observations and act independently, but will use the same decision-making logic and, for **Internal** brains, the same trained TensorFlow model. 
 

diff --git a/docs/Learning-Environment-Examples.md b/docs/Learning-Environment-Examples.md
@@ -109,7 +109,7 @@ If you would like to contribute environments, please see our
     * -0.0005 for every step.
     * +1.0 if the agent touches the goal.
     * -1.0 if the agent falls off the platform.
-* Brains: One brain with the following observation/action space.
+* Brains: Two brains, each with the following observation/action space.
     * Vector Observation space: (Continuous) 16 variables corresponding to position and velocities of agent, block, and goal, plus the height of the wall.
     * Vector Action space: (Discrete) Size of 74, corresponding to 14 raycasts each detecting 4 possible objects. plus the global position of the agent and whether or not the agent is grounded.
     * Visual Observations: None.

diff --git a/python/unitytrainers/bc/models.py b/python/unitytrainers/bc/models.py
@@ -22,7 +22,9 @@ def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
 
         if brain.vector_action_space_type == "discrete":
             self.action_probs = tf.nn.softmax(self.policy)
-            self.sample_action = tf.cast(tf.multinomial(self.policy, 1, name="action"), tf.int32)
+            self.sample_action_float = tf.multinomial(self.policy, 1)
+            self.sample_action_float = tf.identity(self.sample_action_float, name="action")
+            self.sample_action = tf.cast(self.sample_action_float, tf.int32)
             self.true_action = tf.placeholder(shape=[None], dtype=tf.int32, name="teacher_action")
             self.action_oh = tf.one_hot(self.true_action, self.a_size)
             self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh)

diff --git a/python/unitytrainers/bc/trainer.py b/python/unitytrainers/bc/trainer.py
@@ -54,7 +54,8 @@ def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
         self.stats = {'losses': [], 'episode_length': [], 'cumulative_reward': []}
 
         self.training_buffer = Buffer()
-        self.is_continuous = (env.brains[brain_name].vector_action_space_type == "continuous")
+        self.is_continuous_action = (env.brains[brain_name].vector_action_space_type == "continuous")
+        self.is_continuous_observation = (env.brains[brain_name].vector_observation_space_type == "continuous")
         self.use_observations = (env.brains[brain_name].number_visual_observations > 0)
         if self.use_observations:
             logger.info('Cannot use observations with imitation learning')
@@ -286,12 +287,16 @@ def update_model(self):
             end = (j + 1) * self.n_sequences
             batch_states = np.array(_buffer['vector_observations'][start:end])
             batch_actions = np.array(_buffer['actions'][start:end])
-            feed_dict = {self.model.true_action: batch_actions.reshape([-1, self.brain.vector_action_space_size]),
-                         self.model.dropout_rate: 0.5,
+
+            feed_dict = {self.model.dropout_rate: 0.5,
                          self.model.batch_size: self.n_sequences,
                          self.model.sequence_length: self.sequence_length}
-            if not self.is_continuous:
-                feed_dict[self.model.vector_in] = batch_states.reshape([-1, 1])
+            if self.is_continuous_action:
+                feed_dict[self.model.true_action] = batch_actions.reshape([-1, self.brain.vector_action_space_size])
+            else:
+                feed_dict[self.model.true_action] = batch_actions.reshape([-1])
+            if not self.is_continuous_observation:
+                feed_dict[self.model.vector_in] = batch_states.reshape([-1, self.brain.num_stacked_vector_observations])
             else:
                 feed_dict[self.model.vector_in] = batch_states.reshape([-1, self.brain.vector_observation_space_size *
                                                                        self.brain.num_stacked_vector_observations])

diff --git a/unity-environment/Assets/ML-Agents/Examples/BananaCollectors/BananaImitation.unity b/unity-environment/Assets/ML-Agents/Examples/BananaCollectors/BananaImitation.unity
diff --git a/unity-environment/Assets/ML-Agents/Examples/BananaCollectors/Scripts/BananaAgent.cs b/unity-environment/Assets/ML-Agents/Examples/BananaCollectors/Scripts/BananaAgent.cs
@@ -83,11 +83,35 @@ public void MoveAgent(float[] act)
         Vector3 dirToGo = Vector3.zero;
         Vector3 rotateDir = Vector3.zero;
 
+
         if (!frozen)
         {
-            dirToGo = transform.forward * Mathf.Clamp(act[0], -1f, 1f);
-            rotateDir = transform.up * Mathf.Clamp(act[1], -1f, 1f);
-            if (Mathf.Clamp(act[2], 0f, 1f) > 0.5f)
+            bool shootCommand = false;
+            if (brain.brainParameters.vectorActionSpaceType == SpaceType.continuous)
+            {
+                dirToGo = transform.forward * Mathf.Clamp(act[0], -1f, 1f);
+                rotateDir = transform.up * Mathf.Clamp(act[1], -1f, 1f);
+                shootCommand = Mathf.Clamp(act[2], 0f, 1f) > 0.5f;
+            }
+            else
+            {
+                switch ((int)(act[0]))
+                {
+                    case 1:
+                        dirToGo = transform.forward;
+                        break;
+                    case 2:
+                        shootCommand = true;
+                        break;
+                    case 3:
+                        rotateDir = -transform.up;
+                        break;
+                    case 4:
+                        rotateDir = transform.up;
+                        break;
+                }
+            }
+            if (shootCommand)
             {
                 shoot = true;
                 dirToGo *= 0.5f;
@@ -121,9 +145,9 @@ public void MoveAgent(float[] act)
             myLaser.transform.localScale = new Vector3(0f, 0f, 0f);
 
         }
-
     }
 
+
     void Freeze()
     {
         gameObject.tag = "frozenAgent";
@@ -182,8 +206,8 @@ public override void AgentReset()
         agentRB.velocity = Vector3.zero;
         bananas = 0;
         myLaser.transform.localScale = new Vector3(0f, 0f, 0f);
-        transform.position = new Vector3(Random.Range(-myArea.range, myArea.range), 
-                                         2f, Random.Range(-myArea.range, myArea.range)) 
+        transform.position = new Vector3(Random.Range(-myArea.range, myArea.range),
+                                         2f, Random.Range(-myArea.range, myArea.range))
             + area.transform.position;
         transform.rotation = Quaternion.Euler(new Vector3(0f, Random.Range(0, 360)));
     }

diff --git a/unity-environment/Assets/ML-Agents/Examples/BananaCollectors/TFModels/BananaI.bytes b/unity-environment/Assets/ML-Agents/Examples/BananaCollectors/TFModels/BananaI.bytes
diff --git a/unity-environment/Assets/ML-Agents/Examples/BananaCollectors/TFModels/BananaIL.bytes b/unity-environment/Assets/ML-Agents/Examples/BananaCollectors/TFModels/BananaIL.bytes
diff --git a/...anaCollectors/TFModels/BananaI.bytes.meta → ...naCollectors/TFModels/BananaIL.bytes.meta b/...anaCollectors/TFModels/BananaI.bytes.meta → ...naCollectors/TFModels/BananaIL.bytes.meta
diff --git a/unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs b/unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
@@ -513,7 +513,7 @@ public void OnInspector()
             pixels = 1;
         else
             pixels = 3;
-        float[,,,] result = new float[batchSize, width, height, pixels];
+        float[,,,] result = new float[batchSize, height, width, pixels];
 
         for (int b = 0; b < batchSize; b++)
         {

diff --git a/unity-environment/ProjectSettings/EditorBuildSettings.asset b/unity-environment/ProjectSettings/EditorBuildSettings.asset
@@ -4,40 +4,4 @@
 EditorBuildSettings:
   m_ObjectHideFlags: 0
   serializedVersion: 2
-  m_Scenes:
-  - enabled: 0
-    path: Assets/ML-Agents/Examples/GridWorld/GridWorld.unity
-    guid: 7c777442467e245108558a5155153927
-  - enabled: 0
-    path: Assets/ML-Agents/Examples/Tennis/Tennis.unity
-    guid: 25c0c9e81e55c4e129e1a5c0ac254100
-  - enabled: 0
-    path: Assets/ML-Agents/Examples/Banana/BananaImitation.unity
-    guid: 3ae10073cde7641f488ef7c87862333a
-  - enabled: 0
-    path: Assets/ML-Agents/Examples/PushBlock/Scenes/PushBlock.unity
-    guid: ae8cc75939e3e4d07a79c8c6a08b54f4
-  - enabled: 0
-    path: Assets/ML-Agents/Examples/3DBall/3DScene.unity
-    guid: 6f62a2ccb3830437ea4e85a617e856b3
-  - enabled: 0
-    path: Assets/ML-Agents/Examples/3DBall/3DHardScene.unity
-    guid: 35c41099ceec44889bdbe95ed86c97ac
-  - enabled: 0
-    path: Assets/ML-Agents/Examples/Banana/BananaRL.unity
-    guid: 11583205ab5b74bb4bb1b9951cf9e437
-  - enabled: 0
-    path: Assets/ML-Agents/Examples/Basic/Scene.unity
-    guid: cf1d119a8748d406e90ecb623b45f92f
-  - enabled: 0
-    path: Assets/ML-Agents/Examples/Bouncer/Bouncer.unity
-    guid: 2c29359d4c9fe49219b21cd83e246596
-  - enabled: 0
-    path: Assets/ML-Agents/Examples/Crawler/Crawler.unity
-    guid: 4cf841b0478fb4b33971627b40c6420b
-  - enabled: 0
-    path: Assets/ML-Agents/Examples/Hallway/Scenes/Hallway.unity
-    guid: d6d6a33ed0e18459a8d61817d600978a
-  - enabled: 0
-    path: Assets/ML-Agents/Examples/Reacher/Scene.unity
-    guid: e58a3c10c43de4b6b91b7149838d1dfb
+  m_configObjects: {}