Unity-Technologies · dongruoping · Jul 19, 2019 · Jul 3, 2019 · Jul 3, 2019 · Jul 18, 2019
diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml
@@ -16,6 +16,7 @@ default:
     sequence_length: 64
     summary_freq: 1000
     use_recurrent: false
+    vis_encode_type: default
     reward_signals: 
         extrinsic:
             strength: 1.0

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
@@ -1,4 +1,5 @@
 import logging
+from enum import Enum
 from typing import Any, Callable, Dict
 
 import numpy as np
@@ -10,6 +11,12 @@
 ActivationFunction = Callable[[tf.Tensor], tf.Tensor]
 
 
+class EncoderType(Enum):
+    RESNET = "resnet"
+    NATURE_CNN = "nature_cnn"
+    DEFAUL = "default"
+
+
 class LearningModel(object):
     _version_number_ = 2
 
@@ -222,13 +229,13 @@ def create_visual_observation_encoder(
         reuse: bool,
     ) -> tf.Tensor:
         """
-        Builds a set of visual (CNN) encoders.
-        :param reuse: Whether to re-use the weights within the same scope.
-        :param scope: The scope of the graph within which to create the ops.
+        Builds a set of resnet visual encoders.
         :param image_input: The placeholder for the image input to use.
         :param h_size: Hidden layer size.
         :param activation: What type of activation function to use for layers.
         :param num_layers: number of hidden layers to create.
+        :param scope: The scope of the graph within which to create the ops.
+        :param reuse: Whether to re-use the weights within the same scope.
         :return: List of hidden layer tensors.
         """
         with tf.variable_scope(scope):
@@ -258,6 +265,131 @@ def create_visual_observation_encoder(
             )
         return hidden_flat
 
+    def create_nature_cnn_visual_observation_encoder(
+        self,
+        image_input: tf.Tensor,
+        h_size: int,
+        activation: ActivationFunction,
+        num_layers: int,
+        scope: str,
+        reuse: bool,
+    ) -> tf.Tensor:
+        """
+        Builds a set of resnet visual encoders.
+        :param image_input: The placeholder for the image input to use.
+        :param h_size: Hidden layer size.
+        :param activation: What type of activation function to use for layers.
+        :param num_layers: number of hidden layers to create.
+        :param scope: The scope of the graph within which to create the ops.
+        :param reuse: Whether to re-use the weights within the same scope.
+        :return: List of hidden layer tensors.
+        """
+        print("creating nature cnn")
+        with tf.variable_scope(scope):
+            conv1 = tf.layers.conv2d(
+                image_input,
+                32,
+                kernel_size=[8, 8],
+                strides=[4, 4],
+                activation=tf.nn.elu,
+                reuse=reuse,
+                name="conv_1",
+            )
+            conv2 = tf.layers.conv2d(
+                conv1,
+                64,
+                kernel_size=[4, 4],
+                strides=[2, 2],
+                activation=tf.nn.elu,
+                reuse=reuse,
+                name="conv_2",
+            )
+            conv3 = tf.layers.conv2d(
+                conv2,
+                64,
+                kernel_size=[3, 3],
+                strides=[1, 1],
+                activation=tf.nn.elu,
+                reuse=reuse,
+                name="conv_3",
+            )
+            hidden = c_layers.flatten(conv3)
+
+        with tf.variable_scope(scope + "/" + "flat_encoding"):
+            hidden_flat = self.create_vector_observation_encoder(
+                hidden, h_size, activation, num_layers, scope, reuse
+            )
+        return hidden_flat
+
+    def create_resnet_visual_observation_encoder(
+        self,
+        image_input: tf.Tensor,
+        h_size: int,
+        activation: ActivationFunction,
+        num_layers: int,
+        scope: str,
+        reuse: bool,
+    ) -> tf.Tensor:
+        """
+        Builds a set of resnet visual encoders.
+        :param image_input: The placeholder for the image input to use.
+        :param h_size: Hidden layer size.
+        :param activation: What type of activation function to use for layers.
+        :param num_layers: number of hidden layers to create.
+        :param scope: The scope of the graph within which to create the ops.
+        :param reuse: Whether to re-use the weights within the same scope.
+        :return: List of hidden layer tensors.
+        """
+        print("creating resnet")
+        n_channels = [16, 32, 32]  # channel for each stack
+        n_blocks = 2  # number of residual blocks
+        with tf.variable_scope(scope):
+            hidden = image_input
+            for i, ch in enumerate(n_channels):
+                hidden = tf.layers.conv2d(
+                    hidden,
+                    ch,
+                    kernel_size=[3, 3],
+                    strides=[1, 1],
+                    reuse=reuse,
+                    name="layer%dconv_1" % i,
+                )
+                hidden = tf.layers.max_pooling2d(
+                    hidden, pool_size=[3, 3], strides=[2, 2], padding="same"
+                )
+                # create residual blocks
+                for j in range(n_blocks):
+                    block_input = hidden
+                    hidden = tf.nn.relu(hidden)
+                    hidden = tf.layers.conv2d(
+                        hidden,
+                        ch,
+                        kernel_size=[3, 3],
+                        strides=[1, 1],
+                        padding="same",
+                        reuse=reuse,
+                        name="layer%d_%d_conv1" % (i, j),
+                    )
+                    hidden = tf.nn.relu(hidden)
+                    hidden = tf.layers.conv2d(
+                        hidden,
+                        ch,
+                        kernel_size=[3, 3],
+                        strides=[1, 1],
+                        padding="same",
+                        reuse=reuse,
+                        name="layer%d_%d_conv2" % (i, j),
+                    )
+                    hidden = tf.add(block_input, hidden)
+            hidden = tf.nn.relu(hidden)
+            hidden = c_layers.flatten(hidden)
+
+        with tf.variable_scope(scope + "/" + "flat_encoding"):
+            hidden_flat = self.create_vector_observation_encoder(
+                hidden, h_size, activation, num_layers, scope, reuse
+            )
+        return hidden_flat
+
     @staticmethod
     def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
         """
@@ -302,7 +434,9 @@ def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
             ),
         )
 
-    def create_observation_streams(self, num_streams, h_size, num_layers):
+    def create_observation_streams(
+        self, num_streams, h_size, num_layers, vis_encode_type="default"
+    ):
         """
         Creates encoding stream for observations.
         :param num_streams: Number of streams to create.
@@ -326,16 +460,40 @@ def create_observation_streams(self, num_streams, h_size, num_layers):
             visual_encoders = []
             hidden_state, hidden_visual = None, None
             if self.vis_obs_size > 0:
-                for j in range(brain.number_visual_observations):
-                    encoded_visual = self.create_visual_observation_encoder(
-                        self.visual_in[j],
-                        h_size,
-                        activation_fn,
-                        num_layers,
-                        "main_graph_{}_encoder{}".format(i, j),
-                        False,
-                    )
-                    visual_encoders.append(encoded_visual)
+                vis_encode_type = EncoderType(vis_encode_type)
+                if vis_encode_type == EncoderType.RESNET:
+                    for j in range(brain.number_visual_observations):
+                        encoded_visual = self.create_resnet_visual_observation_encoder(
+                            self.visual_in[j],
+                            h_size,
+                            activation_fn,
+                            num_layers,
+                            "main_graph_{}_encoder{}".format(i, j),
+                            False,
+                        )
+                        visual_encoders.append(encoded_visual)
+                elif vis_encode_type == EncoderType.NATURE_CNN:
+                    for j in range(brain.number_visual_observations):
+                        encoded_visual = self.create_nature_cnn_visual_observation_encoder(
+                            self.visual_in[j],
+                            h_size,
+                            activation_fn,
+                            num_layers,
+                            "main_graph_{}_encoder{}".format(i, j),
+                            False,
+                        )
+                        visual_encoders.append(encoded_visual)
+                else:
+                    for j in range(brain.number_visual_observations):
+                        encoded_visual = self.create_visual_observation_encoder(
+                            self.visual_in[j],
+                            h_size,
+                            activation_fn,
+                            num_layers,
+                            "main_graph_{}_encoder{}".format(i, j),
+                            False,
+                        )
+                        visual_encoders.append(encoded_visual)
                 hidden_visual = tf.concat(visual_encoders, axis=1)
             if brain.vector_observation_space_size > 0:
                 hidden_state = self.create_vector_observation_encoder(
@@ -401,13 +559,15 @@ def create_value_heads(self, stream_names, hidden_input):
             self.value_heads[name] = value
         self.value = tf.reduce_mean(list(self.value_heads.values()), 0)
 
-    def create_cc_actor_critic(self, h_size, num_layers):
+    def create_cc_actor_critic(self, h_size, num_layers, vis_encode_type):
         """
         Creates Continuous control actor-critic model.
         :param h_size: Size of hidden linear layers.
         :param num_layers: Number of hidden linear layers.
         """
-        hidden_streams = self.create_observation_streams(2, h_size, num_layers)
+        hidden_streams = self.create_observation_streams(
+            2, h_size, num_layers, vis_encode_type
+        )
 
         if self.use_recurrent:
             self.memory_in = tf.placeholder(
@@ -486,13 +646,15 @@ def create_cc_actor_critic(self, h_size, num_layers):
             (tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
         )
 
-    def create_dc_actor_critic(self, h_size, num_layers):
+    def create_dc_actor_critic(self, h_size, num_layers, vis_encode_type):
         """
         Creates Discrete control actor-critic model.
         :param h_size: Size of hidden linear layers.
         :param num_layers: Number of hidden linear layers.
         """
-        hidden_streams = self.create_observation_streams(1, h_size, num_layers)
+        hidden_streams = self.create_observation_streams(
+            1, h_size, num_layers, vis_encode_type
+        )
         hidden = hidden_streams[0]
 
         if self.use_recurrent:

diff --git a/ml-agents/mlagents/trainers/ppo/models.py b/ml-agents/mlagents/trainers/ppo/models.py
@@ -22,6 +22,7 @@ def __init__(
         m_size=None,
         seed=0,
         stream_names=None,
+        vis_encode_type="default",
     ):
         """
         Takes a Unity environment and model-specific hyper-parameters and returns the
@@ -46,10 +47,10 @@ def __init__(
         if num_layers < 1:
             num_layers = 1
         if brain.vector_action_space_type == "continuous":
-            self.create_cc_actor_critic(h_size, num_layers)
+            self.create_cc_actor_critic(h_size, num_layers, vis_encode_type)
             self.entropy = tf.ones_like(tf.reshape(self.value, [-1])) * self.entropy
         else:
-            self.create_dc_actor_critic(h_size, num_layers)
+            self.create_dc_actor_critic(h_size, num_layers, vis_encode_type)
         self.create_losses(
             self.log_probs,
             self.old_log_probs,

diff --git a/ml-agents/mlagents/trainers/ppo/policy.py b/ml-agents/mlagents/trainers/ppo/policy.py
@@ -44,6 +44,7 @@ def __init__(self, seed, brain, trainer_params, is_training, load):
                 m_size=self.m_size,
                 seed=seed,
                 stream_names=list(reward_signal_configs.keys()),
+                vis_encode_type=trainer_params["vis_encode_type"],
             )
             self.model.create_ppo_optimizer()
 

diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -53,6 +53,7 @@ def __init__(
             "memory_size",
             "model_path",
             "reward_signals",
+            "vis_encode_type",
         ]
         self.check_param_keys()
 

diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
@@ -29,6 +29,7 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
+        vis_encode_type: default
         memory_size: 8
         pretraining:
           demo_path: ./demos/ExpertPyramid.demo

diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py
@@ -32,6 +32,7 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
+        vis_encode_type: default
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1
@@ -327,6 +328,7 @@ def test_trainer_increment_step():
         "sequence_length": 64,
         "summary_freq": 3000,
         "use_recurrent": False,
+        "vis_encode_type": "default",
         "use_curiosity": False,
         "curiosity_strength": 0.01,
         "curiosity_enc_size": 128,

diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
@@ -35,6 +35,7 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
+        vis_encode_type: default
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1