From 5f1f63e13c3c1e17c3a5256b93d693c8d5b51a26 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Wed, 3 Jul 2019 11:09:23 -0700
Subject: [PATCH 01/27] branch first commit

---
 ml-agents-envs/mlagents/envs/environment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
index 8800d5589a..4e50e87aef 100644
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
@@ -66,7 +66,7 @@ def __init__(
         atexit.register(self._close)
         self.port = base_port + worker_id
         self._buffer_size = 12000
-        self._version_ = "API-8"
+        self._version_ = "API-6"
         self._loaded = (
             False
         )  # If true, this means the environment was successfully loaded

From 02778b8347186992ba592826ee1a32bee3cc27f5 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Wed, 3 Jul 2019 16:53:51 -0700
Subject: [PATCH 02/27] add resnet

---
 ml-agents/mlagents/trainers/models.py | 67 +++++++++++++++++++--------
 1 file changed, 48 insertions(+), 19 deletions(-)

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index 7b66e5a221..057bcf7653 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -212,26 +212,55 @@ def create_visual_observation_encoder(
         :param num_layers: number of hidden layers to create.
         :return: List of hidden layer tensors.
         """
+        # channel for each stack
+        n_channels = [16, 32, 32]
+        # number of residual blocks
+        n_blocks = 2
+
         with tf.variable_scope(scope):
-            conv1 = tf.layers.conv2d(
-                image_input,
-                16,
-                kernel_size=[8, 8],
-                strides=[4, 4],
-                activation=tf.nn.elu,
-                reuse=reuse,
-                name="conv_1",
-            )
-            conv2 = tf.layers.conv2d(
-                conv1,
-                32,
-                kernel_size=[4, 4],
-                strides=[2, 2],
-                activation=tf.nn.elu,
-                reuse=reuse,
-                name="conv_2",
-            )
-            hidden = c_layers.flatten(conv2)
+            hidden = image_input
+            for i, ch in enumerate(n_channels):
+                hidden = tf.layers.conv2d(
+                    hidden,
+                    ch,
+                    kernel_size=[3, 3],
+                    strides=1,
+                    reuse=reuse,
+                    name="layer%dconv_1" % i,
+                )
+                hidden = tf.layers.max_pooling2d(
+                    hidden,
+                    pool_size=[3, 3],
+                    strides=2,
+                    padding='same',
+                )
+                # create residual blocks
+                for j in range(n_blocks):
+                    block_input = hidden
+                    hidden = tf.nn.relu(hidden)
+                    hidden = tf.layers.conv2d(
+                        hidden,
+                        ch,
+                        kernel_size=[3, 3],
+                        strides=1,
+                        padding='same',
+                        reuse=reuse,
+                        name="layer%d_%d_conv1" % (i, j),
+                    )
+                    hidden = tf.nn.relu(hidden)
+                    hidden = tf.layers.conv2d(
+                        hidden,
+                        ch,
+                        kernel_size=[3, 3],
+                        strides=1,
+                        padding='same',
+                        reuse=reuse,
+                        name="layer%d_%d_conv2" % (i, j),
+                    )
+                    hidden = tf.add(block_input, hidden)
+
+            hidden = tf.nn.relu(hidden)
+            hidden = c_layers.flatten(hidden)
 
         with tf.variable_scope(scope + "/" + "flat_encoding"):
             hidden_flat = self.create_vector_observation_encoder(

From 74b7acd1a20db99de779be701d57895b1fc0204d Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Thu, 18 Jul 2019 16:32:51 -0700
Subject: [PATCH 03/27] add different types of visual encoder

---
 ml-agents/mlagents/trainers/models.py | 125 ++++++++++++++++++--------
 1 file changed, 86 insertions(+), 39 deletions(-)

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index a7045e6975..e9064ce316 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -220,6 +220,7 @@ def create_visual_observation_encoder(
         num_layers: int,
         scope: str,
         reuse: bool,
+        encoder_type=None
     ) -> tf.Tensor:
         """
         Builds a set of visual (CNN) encoders.
@@ -231,55 +232,101 @@ def create_visual_observation_encoder(
         :param num_layers: number of hidden layers to create.
         :return: List of hidden layer tensors.
         """
-        # channel for each stack
-        n_channels = [16, 32, 32]
-        # number of residual blocks
-        n_blocks = 2
-
         with tf.variable_scope(scope):
-            hidden = image_input
-            for i, ch in enumerate(n_channels):
-                hidden = tf.layers.conv2d(
-                    hidden,
-                    ch,
-                    kernel_size=[3, 3],
-                    strides=1,
-                    reuse=reuse,
-                    name="layer%dconv_1" % i,
-                )
-                hidden = tf.layers.max_pooling2d(
-                    hidden,
-                    pool_size=[3, 3],
-                    strides=2,
-                    padding='same',
-                )
-                # create residual blocks
-                for j in range(n_blocks):
-                    block_input = hidden
-                    hidden = tf.nn.relu(hidden)
+            if encoder_type == "resnet":
+                n_channels = [16, 32, 32] # channel for each stack
+                n_blocks = 2 # number of residual blocks
+                hidden = image_input
+                for i, ch in enumerate(n_channels):
                     hidden = tf.layers.conv2d(
                         hidden,
                         ch,
                         kernel_size=[3, 3],
-                        strides=1,
-                        padding='same',
+                        strides=[1, 1],
                         reuse=reuse,
-                        name="layer%d_%d_conv1" % (i, j),
+                        name="layer%dconv_1" % i,
                     )
-                    hidden = tf.nn.relu(hidden)
-                    hidden = tf.layers.conv2d(
+                    hidden = tf.layers.max_pooling2d(
                         hidden,
-                        ch,
-                        kernel_size=[3, 3],
-                        strides=1,
+                        pool_size=[3, 3],
+                        strides=[2, 2],
                         padding='same',
-                        reuse=reuse,
-                        name="layer%d_%d_conv2" % (i, j),
                     )
-                    hidden = tf.add(block_input, hidden)
-
-            hidden = tf.nn.relu(hidden)
-            hidden = c_layers.flatten(hidden)
+                    # create residual blocks
+                    for j in range(n_blocks):
+                        block_input = hidden
+                        hidden = tf.nn.relu(hidden)
+                        hidden = tf.layers.conv2d(
+                            hidden,
+                            ch,
+                            kernel_size=[3, 3],
+                            strides=[1, 1],
+                            padding='same',
+                            reuse=reuse,
+                            name="layer%d_%d_conv1" % (i, j),
+                        )
+                        hidden = tf.nn.relu(hidden)
+                        hidden = tf.layers.conv2d(
+                            hidden,
+                            ch,
+                            kernel_size=[3, 3],
+                            strides=[1, 1],
+                            padding='same',
+                            reuse=reuse,
+                            name="layer%d_%d_conv2" % (i, j),
+                        )
+                        hidden = tf.add(block_input, hidden)
+                hidden = tf.nn.relu(hidden)
+                hidden = c_layers.flatten(hidden)
+            elif encoder_type == 'nature_cnn':
+                conv1 = tf.layers.conv2d(
+                    image_input,
+                    32,
+                    kernel_size=[8, 8],
+                    strides=[4, 4],
+                    activation=tf.nn.elu,
+                    reuse=reuse,
+                    name="conv_1"
+                )
+                conv2 = tf.layers.conv2d(
+                    conv1,
+                    64,
+                    kernel_size=[4, 4],
+                    strides=[2, 2],
+                    activation=tf.nn.elu,
+                    reuse=reuse,
+                    name="conv_2"
+                )
+                conv3 = tf.layers.conv2d(
+                    conv2,
+                    64,
+                    kernel_size=[3, 3],
+                    strides=[1, 1],
+                    activation=tf.nn.elu,
+                    reuse=reuse,
+                    name="conv_3"
+                )
+                hidden = c_layers.flatten(conv3)
+            else:
+                conv1 = tf.layers.conv2d(
+                    image_input,
+                    16,
+                    kernel_size=[8, 8],
+                    strides=[4, 4],
+                    activation=tf.nn.elu,
+                    reuse=reuse,
+                    name="conv_1",
+                )
+                conv2 = tf.layers.conv2d(
+                    conv1,
+                    32,
+                    kernel_size=[4, 4],
+                    strides=[2, 2],
+                    activation=tf.nn.elu,
+                    reuse=reuse,
+                    name="conv_2",
+                )
+                hidden = c_layers.flatten(conv2)
 
         with tf.variable_scope(scope + "/" + "flat_encoding"):
             hidden_flat = self.create_vector_observation_encoder(

From 0d32a08217e5f373a0f91f78b78d485f85784ecc Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Thu, 18 Jul 2019 16:38:03 -0700
Subject: [PATCH 04/27] fix bug

---
 ml-agents-envs/mlagents/envs/environment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
index 9e549ca94b..eb8993eca0 100644
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
@@ -68,7 +68,7 @@ def __init__(
         atexit.register(self._close)
         self.port = base_port + worker_id
         self._buffer_size = 12000
-        self._version_ = "API-6"
+        self._version_ = "API-8"
         self._loaded = (
             False
         )  # If true, this means the environment was successfully loaded

From 17ecb8d2e978f38211e11f89e1253210c3940e3f Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Thu, 18 Jul 2019 16:41:01 -0700
Subject: [PATCH 05/27] reformatting

---
 ml-agents/mlagents/trainers/models.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index e9064ce316..09e86f1779 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -220,7 +220,7 @@ def create_visual_observation_encoder(
         num_layers: int,
         scope: str,
         reuse: bool,
-        encoder_type=None
+        encoder_type=None,
     ) -> tf.Tensor:
         """
         Builds a set of visual (CNN) encoders.
@@ -234,8 +234,8 @@ def create_visual_observation_encoder(
         """
         with tf.variable_scope(scope):
             if encoder_type == "resnet":
-                n_channels = [16, 32, 32] # channel for each stack
-                n_blocks = 2 # number of residual blocks
+                n_channels = [16, 32, 32]  # channel for each stack
+                n_blocks = 2  # number of residual blocks
                 hidden = image_input
                 for i, ch in enumerate(n_channels):
                     hidden = tf.layers.conv2d(
@@ -247,10 +247,7 @@ def create_visual_observation_encoder(
                         name="layer%dconv_1" % i,
                     )
                     hidden = tf.layers.max_pooling2d(
-                        hidden,
-                        pool_size=[3, 3],
-                        strides=[2, 2],
-                        padding='same',
+                        hidden, pool_size=[3, 3], strides=[2, 2], padding="same"
                     )
                     # create residual blocks
                     for j in range(n_blocks):
@@ -261,7 +258,7 @@ def create_visual_observation_encoder(
                             ch,
                             kernel_size=[3, 3],
                             strides=[1, 1],
-                            padding='same',
+                            padding="same",
                             reuse=reuse,
                             name="layer%d_%d_conv1" % (i, j),
                         )
@@ -271,14 +268,14 @@ def create_visual_observation_encoder(
                             ch,
                             kernel_size=[3, 3],
                             strides=[1, 1],
-                            padding='same',
+                            padding="same",
                             reuse=reuse,
                             name="layer%d_%d_conv2" % (i, j),
                         )
                         hidden = tf.add(block_input, hidden)
                 hidden = tf.nn.relu(hidden)
                 hidden = c_layers.flatten(hidden)
-            elif encoder_type == 'nature_cnn':
+            elif encoder_type == "nature_cnn":
                 conv1 = tf.layers.conv2d(
                     image_input,
                     32,
@@ -286,7 +283,7 @@ def create_visual_observation_encoder(
                     strides=[4, 4],
                     activation=tf.nn.elu,
                     reuse=reuse,
-                    name="conv_1"
+                    name="conv_1",
                 )
                 conv2 = tf.layers.conv2d(
                     conv1,
@@ -295,7 +292,7 @@ def create_visual_observation_encoder(
                     strides=[2, 2],
                     activation=tf.nn.elu,
                     reuse=reuse,
-                    name="conv_2"
+                    name="conv_2",
                 )
                 conv3 = tf.layers.conv2d(
                     conv2,
@@ -304,7 +301,7 @@ def create_visual_observation_encoder(
                     strides=[1, 1],
                     activation=tf.nn.elu,
                     reuse=reuse,
-                    name="conv_3"
+                    name="conv_3",
                 )
                 hidden = c_layers.flatten(conv3)
             else:

From 52b78d71a54564a867a89446d510c52a6aed2c2d Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Thu, 18 Jul 2019 16:48:13 -0700
Subject: [PATCH 06/27] fix type annotation

---
 ml-agents/mlagents/trainers/models.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index 09e86f1779..e3fcdf5f0f 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -220,7 +220,7 @@ def create_visual_observation_encoder(
         num_layers: int,
         scope: str,
         reuse: bool,
-        encoder_type=None,
+        encoder_type: str=None,
     ) -> tf.Tensor:
         """
         Builds a set of visual (CNN) encoders.
@@ -230,6 +230,7 @@ def create_visual_observation_encoder(
         :param h_size: Hidden layer size.
         :param activation: What type of activation function to use for layers.
         :param num_layers: number of hidden layers to create.
+        :param encoder_type: type of visual encoder (default/nature_cnn/resnet)
         :return: List of hidden layer tensors.
         """
         with tf.variable_scope(scope):

From fae3b10a51f25f0ba1b49e63eafa1a6443262534 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Thu, 18 Jul 2019 16:52:15 -0700
Subject: [PATCH 07/27] reformatting

---
 ml-agents/mlagents/trainers/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index e3fcdf5f0f..5f6ba3b666 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -220,7 +220,7 @@ def create_visual_observation_encoder(
         num_layers: int,
         scope: str,
         reuse: bool,
-        encoder_type: str=None,
+        encoder_type: str = None,
     ) -> tf.Tensor:
         """
         Builds a set of visual (CNN) encoders.

From f9a226e1b4f420a02dcab7f36c0d44026eb3635f Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Fri, 19 Jul 2019 13:24:35 -0700
Subject: [PATCH 08/27] Add visual encoder option. Seperate create function for
 different encoder.

---
 config/trainer_config.yaml                 |   1 +
 ml-agents/mlagents/trainers/models.py      | 278 ++++++++++++++-------
 ml-agents/mlagents/trainers/ppo/models.py  |   5 +-
 ml-agents/mlagents/trainers/ppo/policy.py  |   1 +
 ml-agents/mlagents/trainers/ppo/trainer.py |   1 +
 5 files changed, 187 insertions(+), 99 deletions(-)

diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml
index fcda3f6a9a..1430ac2f88 100644
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml
@@ -16,6 +16,7 @@ default:
     sequence_length: 64
     summary_freq: 1000
     use_recurrent: false
+    vis_encode_type: "default"
     reward_signals: 
         extrinsic:
             strength: 1.0
diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index 5f6ba3b666..b467e31e22 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -1,4 +1,5 @@
 import logging
+from enum import Enum
 from typing import Any, Callable, Dict
 
 import numpy as np
@@ -10,6 +11,12 @@
 ActivationFunction = Callable[[tf.Tensor], tf.Tensor]
 
 
+class EncoderType(Enum):
+    RESNET = "resnet"
+    NATURE_CNN = "nature_cnn"
+    DEFAUL = "default"
+
+
 class LearningModel(object):
     _version_number_ = 2
 
@@ -220,111 +227,162 @@ def create_visual_observation_encoder(
         num_layers: int,
         scope: str,
         reuse: bool,
-        encoder_type: str = None,
     ) -> tf.Tensor:
         """
-        Builds a set of visual (CNN) encoders.
+        Builds a set of resnet visual encoders.
+        :param image_input: The placeholder for the image input to use.
+        :param h_size: Hidden layer size.
+        :param activation: What type of activation function to use for layers.
+        :param num_layers: number of hidden layers to create.
+        :param scope: The scope of the graph within which to create the ops.
         :param reuse: Whether to re-use the weights within the same scope.
+        :return: List of hidden layer tensors.
+        """
+        with tf.variable_scope(scope):
+            conv1 = tf.layers.conv2d(
+                image_input,
+                16,
+                kernel_size=[8, 8],
+                strides=[4, 4],
+                activation=tf.nn.elu,
+                reuse=reuse,
+                name="conv_1",
+            )
+            conv2 = tf.layers.conv2d(
+                conv1,
+                32,
+                kernel_size=[4, 4],
+                strides=[2, 2],
+                activation=tf.nn.elu,
+                reuse=reuse,
+                name="conv_2",
+            )
+            hidden = c_layers.flatten(conv2)
+
+        with tf.variable_scope(scope + "/" + "flat_encoding"):
+            hidden_flat = self.create_vector_observation_encoder(
+                hidden, h_size, activation, num_layers, scope, reuse
+            )
+        return hidden_flat
+
+    def create_nature_cnn_visual_observation_encoder(
+        self,
+        image_input: tf.Tensor,
+        h_size: int,
+        activation: ActivationFunction,
+        num_layers: int,
+        scope: str,
+        reuse: bool,
+    ) -> tf.Tensor:
+        """
+        Builds a set of resnet visual encoders.
+        :param image_input: The placeholder for the image input to use.
+        :param h_size: Hidden layer size.
+        :param activation: What type of activation function to use for layers.
+        :param num_layers: number of hidden layers to create.
         :param scope: The scope of the graph within which to create the ops.
+        :param reuse: Whether to re-use the weights within the same scope.
+        :return: List of hidden layer tensors.
+        """
+        print("creating nature cnn")
+        with tf.variable_scope(scope):
+            conv1 = tf.layers.conv2d(
+                image_input,
+                32,
+                kernel_size=[8, 8],
+                strides=[4, 4],
+                activation=tf.nn.elu,
+                reuse=reuse,
+                name="conv_1",
+            )
+            conv2 = tf.layers.conv2d(
+                conv1,
+                64,
+                kernel_size=[4, 4],
+                strides=[2, 2],
+                activation=tf.nn.elu,
+                reuse=reuse,
+                name="conv_2",
+            )
+            conv3 = tf.layers.conv2d(
+                conv2,
+                64,
+                kernel_size=[3, 3],
+                strides=[1, 1],
+                activation=tf.nn.elu,
+                reuse=reuse,
+                name="conv_3",
+            )
+            hidden = c_layers.flatten(conv3)
+
+        with tf.variable_scope(scope + "/" + "flat_encoding"):
+            hidden_flat = self.create_vector_observation_encoder(
+                hidden, h_size, activation, num_layers, scope, reuse
+            )
+        return hidden_flat
+
+    def create_resnet_visual_observation_encoder(
+        self,
+        image_input: tf.Tensor,
+        h_size: int,
+        activation: ActivationFunction,
+        num_layers: int,
+        scope: str,
+        reuse: bool,
+    ) -> tf.Tensor:
+        """
+        Builds a set of resnet visual encoders.
         :param image_input: The placeholder for the image input to use.
         :param h_size: Hidden layer size.
         :param activation: What type of activation function to use for layers.
         :param num_layers: number of hidden layers to create.
-        :param encoder_type: type of visual encoder (default/nature_cnn/resnet)
+        :param scope: The scope of the graph within which to create the ops.
+        :param reuse: Whether to re-use the weights within the same scope.
         :return: List of hidden layer tensors.
         """
+        print("creating resnet")
+        n_channels = [16, 32, 32]  # channel for each stack
+        n_blocks = 2  # number of residual blocks
         with tf.variable_scope(scope):
-            if encoder_type == "resnet":
-                n_channels = [16, 32, 32]  # channel for each stack
-                n_blocks = 2  # number of residual blocks
-                hidden = image_input
-                for i, ch in enumerate(n_channels):
+            hidden = image_input
+            for i, ch in enumerate(n_channels):
+                hidden = tf.layers.conv2d(
+                    hidden,
+                    ch,
+                    kernel_size=[3, 3],
+                    strides=[1, 1],
+                    reuse=reuse,
+                    name="layer%dconv_1" % i,
+                )
+                hidden = tf.layers.max_pooling2d(
+                    hidden, pool_size=[3, 3], strides=[2, 2], padding="same"
+                )
+                # create residual blocks
+                for j in range(n_blocks):
+                    block_input = hidden
+                    hidden = tf.nn.relu(hidden)
                     hidden = tf.layers.conv2d(
                         hidden,
                         ch,
                         kernel_size=[3, 3],
                         strides=[1, 1],
+                        padding="same",
                         reuse=reuse,
-                        name="layer%dconv_1" % i,
+                        name="layer%d_%d_conv1" % (i, j),
                     )
-                    hidden = tf.layers.max_pooling2d(
-                        hidden, pool_size=[3, 3], strides=[2, 2], padding="same"
+                    hidden = tf.nn.relu(hidden)
+                    hidden = tf.layers.conv2d(
+                        hidden,
+                        ch,
+                        kernel_size=[3, 3],
+                        strides=[1, 1],
+                        padding="same",
+                        reuse=reuse,
+                        name="layer%d_%d_conv2" % (i, j),
                     )
-                    # create residual blocks
-                    for j in range(n_blocks):
-                        block_input = hidden
-                        hidden = tf.nn.relu(hidden)
-                        hidden = tf.layers.conv2d(
-                            hidden,
-                            ch,
-                            kernel_size=[3, 3],
-                            strides=[1, 1],
-                            padding="same",
-                            reuse=reuse,
-                            name="layer%d_%d_conv1" % (i, j),
-                        )
-                        hidden = tf.nn.relu(hidden)
-                        hidden = tf.layers.conv2d(
-                            hidden,
-                            ch,
-                            kernel_size=[3, 3],
-                            strides=[1, 1],
-                            padding="same",
-                            reuse=reuse,
-                            name="layer%d_%d_conv2" % (i, j),
-                        )
-                        hidden = tf.add(block_input, hidden)
-                hidden = tf.nn.relu(hidden)
-                hidden = c_layers.flatten(hidden)
-            elif encoder_type == "nature_cnn":
-                conv1 = tf.layers.conv2d(
-                    image_input,
-                    32,
-                    kernel_size=[8, 8],
-                    strides=[4, 4],
-                    activation=tf.nn.elu,
-                    reuse=reuse,
-                    name="conv_1",
-                )
-                conv2 = tf.layers.conv2d(
-                    conv1,
-                    64,
-                    kernel_size=[4, 4],
-                    strides=[2, 2],
-                    activation=tf.nn.elu,
-                    reuse=reuse,
-                    name="conv_2",
-                )
-                conv3 = tf.layers.conv2d(
-                    conv2,
-                    64,
-                    kernel_size=[3, 3],
-                    strides=[1, 1],
-                    activation=tf.nn.elu,
-                    reuse=reuse,
-                    name="conv_3",
-                )
-                hidden = c_layers.flatten(conv3)
-            else:
-                conv1 = tf.layers.conv2d(
-                    image_input,
-                    16,
-                    kernel_size=[8, 8],
-                    strides=[4, 4],
-                    activation=tf.nn.elu,
-                    reuse=reuse,
-                    name="conv_1",
-                )
-                conv2 = tf.layers.conv2d(
-                    conv1,
-                    32,
-                    kernel_size=[4, 4],
-                    strides=[2, 2],
-                    activation=tf.nn.elu,
-                    reuse=reuse,
-                    name="conv_2",
-                )
-                hidden = c_layers.flatten(conv2)
+                    hidden = tf.add(block_input, hidden)
+            hidden = tf.nn.relu(hidden)
+            hidden = c_layers.flatten(hidden)
 
         with tf.variable_scope(scope + "/" + "flat_encoding"):
             hidden_flat = self.create_vector_observation_encoder(
@@ -376,7 +434,9 @@ def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
             ),
         )
 
-    def create_observation_streams(self, num_streams, h_size, num_layers):
+    def create_observation_streams(
+        self, num_streams, h_size, num_layers, vis_encode_type
+    ):
         """
         Creates encoding stream for observations.
         :param num_streams: Number of streams to create.
@@ -400,16 +460,40 @@ def create_observation_streams(self, num_streams, h_size, num_layers):
             visual_encoders = []
             hidden_state, hidden_visual = None, None
             if self.vis_obs_size > 0:
-                for j in range(brain.number_visual_observations):
-                    encoded_visual = self.create_visual_observation_encoder(
-                        self.visual_in[j],
-                        h_size,
-                        activation_fn,
-                        num_layers,
-                        "main_graph_{}_encoder{}".format(i, j),
-                        False,
-                    )
-                    visual_encoders.append(encoded_visual)
+                vis_encode_type = EncoderType(vis_encode_type)
+                if vis_encode_type == EncoderType.RESNET:
+                    for j in range(brain.number_visual_observations):
+                        encoded_visual = self.create_resnet_visual_observation_encoder(
+                            self.visual_in[j],
+                            h_size,
+                            activation_fn,
+                            num_layers,
+                            "main_graph_{}_encoder{}".format(i, j),
+                            False,
+                        )
+                        visual_encoders.append(encoded_visual)
+                elif vis_encode_type == EncoderType.nature_cnn:
+                    for j in range(brain.number_visual_observations):
+                        encoded_visual = self.create_nature_cnn_visual_observation_encoder(
+                            self.visual_in[j],
+                            h_size,
+                            activation_fn,
+                            num_layers,
+                            "main_graph_{}_encoder{}".format(i, j),
+                            False,
+                        )
+                        visual_encoders.append(encoded_visual)
+                else:
+                    for j in range(brain.number_visual_observations):
+                        encoded_visual = self.create_visual_observation_encoder(
+                            self.visual_in[j],
+                            h_size,
+                            activation_fn,
+                            num_layers,
+                            "main_graph_{}_encoder{}".format(i, j),
+                            False,
+                        )
+                        visual_encoders.append(encoded_visual)
                 hidden_visual = tf.concat(visual_encoders, axis=1)
             if brain.vector_observation_space_size > 0:
                 hidden_state = self.create_vector_observation_encoder(
diff --git a/ml-agents/mlagents/trainers/ppo/models.py b/ml-agents/mlagents/trainers/ppo/models.py
index 5526221889..767206d253 100644
--- a/ml-agents/mlagents/trainers/ppo/models.py
+++ b/ml-agents/mlagents/trainers/ppo/models.py
@@ -22,6 +22,7 @@ def __init__(
         m_size=None,
         seed=0,
         stream_names=None,
+        vis_encode_type=None,
     ):
         """
         Takes a Unity environment and model-specific hyper-parameters and returns the
@@ -46,10 +47,10 @@ def __init__(
         if num_layers < 1:
             num_layers = 1
         if brain.vector_action_space_type == "continuous":
-            self.create_cc_actor_critic(h_size, num_layers)
+            self.create_cc_actor_critic(h_size, num_layers, vis_encode_type)
             self.entropy = tf.ones_like(tf.reshape(self.value, [-1])) * self.entropy
         else:
-            self.create_dc_actor_critic(h_size, num_layers)
+            self.create_dc_actor_critic(h_size, num_layers, vis_encode_type)
         self.create_losses(
             self.log_probs,
             self.old_log_probs,
diff --git a/ml-agents/mlagents/trainers/ppo/policy.py b/ml-agents/mlagents/trainers/ppo/policy.py
index a867cbb34a..621a613227 100644
--- a/ml-agents/mlagents/trainers/ppo/policy.py
+++ b/ml-agents/mlagents/trainers/ppo/policy.py
@@ -44,6 +44,7 @@ def __init__(self, seed, brain, trainer_params, is_training, load):
                 m_size=self.m_size,
                 seed=seed,
                 stream_names=list(reward_signal_configs.keys()),
+                vis_encode_type=trainer_params["vis_encode_type"],
             )
             self.model.create_ppo_optimizer()
 
diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index 9e44b65c8f..a5485f8ed3 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -53,6 +53,7 @@ def __init__(
             "memory_size",
             "model_path",
             "reward_signals",
+            "vis_encode_type",
         ]
         self.check_param_keys()
 

From de56c6273c22d95d519b3bdb9fa8f6d8478136ba Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Fri, 19 Jul 2019 13:31:23 -0700
Subject: [PATCH 09/27] Add visual encoder option. Seperate create function for
 different encoder.

---
 ml-agents/mlagents/trainers/models.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index b467e31e22..a7b4241a86 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -472,7 +472,7 @@ def create_observation_streams(
                             False,
                         )
                         visual_encoders.append(encoded_visual)
-                elif vis_encode_type == EncoderType.nature_cnn:
+                elif vis_encode_type == EncoderType.NATURE_CNN:
                     for j in range(brain.number_visual_observations):
                         encoded_visual = self.create_nature_cnn_visual_observation_encoder(
                             self.visual_in[j],
@@ -559,13 +559,15 @@ def create_value_heads(self, stream_names, hidden_input):
             self.value_heads[name] = value
         self.value = tf.reduce_mean(list(self.value_heads.values()), 0)
 
-    def create_cc_actor_critic(self, h_size, num_layers):
+    def create_cc_actor_critic(self, h_size, num_layers, vis_encode_type):
         """
         Creates Continuous control actor-critic model.
         :param h_size: Size of hidden linear layers.
         :param num_layers: Number of hidden linear layers.
         """
-        hidden_streams = self.create_observation_streams(2, h_size, num_layers)
+        hidden_streams = self.create_observation_streams(
+            2, h_size, num_layers, vis_encode_type
+        )
 
         if self.use_recurrent:
             self.memory_in = tf.placeholder(
@@ -644,13 +646,15 @@ def create_cc_actor_critic(self, h_size, num_layers):
             (tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
         )
 
-    def create_dc_actor_critic(self, h_size, num_layers):
+    def create_dc_actor_critic(self, h_size, num_layers, vis_encode_type):
         """
         Creates Discrete control actor-critic model.
         :param h_size: Size of hidden linear layers.
         :param num_layers: Number of hidden linear layers.
         """
-        hidden_streams = self.create_observation_streams(1, h_size, num_layers)
+        hidden_streams = self.create_observation_streams(
+            1, h_size, num_layers, vis_encode_type
+        )
         hidden = hidden_streams[0]
 
         if self.use_recurrent:

From e2fc44705513a6347f37d0a1d0cbf5a822121fc5 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Fri, 19 Jul 2019 13:51:24 -0700
Subject: [PATCH 10/27] fix test

---
 ml-agents/mlagents/trainers/models.py                    | 2 +-
 ml-agents/mlagents/trainers/ppo/models.py                | 2 +-
 ml-agents/mlagents/trainers/tests/test_bcmodule.py       | 1 +
 ml-agents/mlagents/trainers/tests/test_ppo.py            | 2 ++
 ml-agents/mlagents/trainers/tests/test_reward_signals.py | 1 +
 5 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index a7b4241a86..5ede54c21f 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -435,7 +435,7 @@ def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
         )
 
     def create_observation_streams(
-        self, num_streams, h_size, num_layers, vis_encode_type
+        self, num_streams, h_size, num_layers, vis_encode_type="default"
     ):
         """
         Creates encoding stream for observations.
diff --git a/ml-agents/mlagents/trainers/ppo/models.py b/ml-agents/mlagents/trainers/ppo/models.py
index 767206d253..afaf80e423 100644
--- a/ml-agents/mlagents/trainers/ppo/models.py
+++ b/ml-agents/mlagents/trainers/ppo/models.py
@@ -22,7 +22,7 @@ def __init__(
         m_size=None,
         seed=0,
         stream_names=None,
-        vis_encode_type=None,
+        vis_encode_type="default",
     ):
         """
         Takes a Unity environment and model-specific hyper-parameters and returns the
diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
index 0eee0f4d2e..b25250ec5f 100644
--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
@@ -29,6 +29,7 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
+        vis_encode_type: "default"
         memory_size: 8
         pretraining:
           demo_path: ./demos/ExpertPyramid.demo
diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py
index 939a9864b2..c9dfd0c792 100644
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
@@ -32,6 +32,7 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
+        vis_encode_type: "default"
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1
@@ -327,6 +328,7 @@ def test_trainer_increment_step():
         "sequence_length": 64,
         "summary_freq": 3000,
         "use_recurrent": False,
+        "vis_encode_type": "default",
         "use_curiosity": False,
         "curiosity_strength": 0.01,
         "curiosity_enc_size": 128,
diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
index 1b695788c3..cc99e72477 100644
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
@@ -35,6 +35,7 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
+        vis_encode_type: "default"
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1

From d088a02e15af37e90de40399cdd6b7b0472559d7 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Fri, 19 Jul 2019 15:01:37 -0700
Subject: [PATCH 11/27] remove redundant quotes

---
 config/trainer_config.yaml                               | 2 +-
 ml-agents/mlagents/trainers/tests/test_bcmodule.py       | 2 +-
 ml-agents/mlagents/trainers/tests/test_ppo.py            | 2 +-
 ml-agents/mlagents/trainers/tests/test_reward_signals.py | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml
index 1430ac2f88..2bf0fbc347 100644
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml
@@ -16,7 +16,7 @@ default:
     sequence_length: 64
     summary_freq: 1000
     use_recurrent: false
-    vis_encode_type: "default"
+    vis_encode_type: default
     reward_signals: 
         extrinsic:
             strength: 1.0
diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
index b25250ec5f..395308c3c4 100644
--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
@@ -29,7 +29,7 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
-        vis_encode_type: "default"
+        vis_encode_type: default
         memory_size: 8
         pretraining:
           demo_path: ./demos/ExpertPyramid.demo
diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py
index c9dfd0c792..baecbb16de 100644
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
@@ -32,7 +32,7 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
-        vis_encode_type: "default"
+        vis_encode_type: default
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1
diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
index cc99e72477..dde79b3ece 100644
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
@@ -35,7 +35,7 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
-        vis_encode_type: "default"
+        vis_encode_type: default
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1

From 0e70d059d58ea25ab7eb6f2d5f29a3c4a6c0402a Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Mon, 22 Jul 2019 13:34:07 -0700
Subject: [PATCH 12/27] minor refactor for vis encoder parameter input

---
 config/trainer_config.yaml                | 1 -
 ml-agents/mlagents/trainers/models.py     | 9 ++++-----
 ml-agents/mlagents/trainers/ppo/models.py | 4 ++--
 ml-agents/mlagents/trainers/ppo/policy.py | 3 ++-
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml
index 2bf0fbc347..fcda3f6a9a 100644
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml
@@ -16,7 +16,6 @@ default:
     sequence_length: 64
     summary_freq: 1000
     use_recurrent: false
-    vis_encode_type: default
     reward_signals: 
         extrinsic:
             strength: 1.0
diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index 5ede54c21f..f242cca097 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -435,8 +435,8 @@ def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
         )
 
     def create_observation_streams(
-        self, num_streams, h_size, num_layers, vis_encode_type="default"
-    ):
+        self, num_streams: int, h_size: int, num_layers: int, vis_encode_type: EncoderType
+    ) -> tf.Tensor:
         """
         Creates encoding stream for observations.
         :param num_streams: Number of streams to create.
@@ -460,7 +460,6 @@ def create_observation_streams(
             visual_encoders = []
             hidden_state, hidden_visual = None, None
             if self.vis_obs_size > 0:
-                vis_encode_type = EncoderType(vis_encode_type)
                 if vis_encode_type == EncoderType.RESNET:
                     for j in range(brain.number_visual_observations):
                         encoded_visual = self.create_resnet_visual_observation_encoder(
@@ -559,7 +558,7 @@ def create_value_heads(self, stream_names, hidden_input):
             self.value_heads[name] = value
         self.value = tf.reduce_mean(list(self.value_heads.values()), 0)
 
-    def create_cc_actor_critic(self, h_size, num_layers, vis_encode_type):
+    def create_cc_actor_critic(self, h_size: int, num_layers: int, vis_encode_type: EncoderType):
         """
         Creates Continuous control actor-critic model.
         :param h_size: Size of hidden linear layers.
@@ -646,7 +645,7 @@ def create_cc_actor_critic(self, h_size, num_layers, vis_encode_type):
             (tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
         )
 
-    def create_dc_actor_critic(self, h_size, num_layers, vis_encode_type):
+    def create_dc_actor_critic(self, h_size: int, num_layers: int, vis_encode_type: EncoderType):
         """
         Creates Discrete control actor-critic model.
         :param h_size: Size of hidden linear layers.
diff --git a/ml-agents/mlagents/trainers/ppo/models.py b/ml-agents/mlagents/trainers/ppo/models.py
index afaf80e423..291c03a00b 100644
--- a/ml-agents/mlagents/trainers/ppo/models.py
+++ b/ml-agents/mlagents/trainers/ppo/models.py
@@ -2,7 +2,7 @@
 import numpy as np
 
 import tensorflow as tf
-from mlagents.trainers.models import LearningModel
+from mlagents.trainers.models import LearningModel, EncoderType
 
 logger = logging.getLogger("mlagents.trainers")
 
@@ -22,7 +22,7 @@ def __init__(
         m_size=None,
         seed=0,
         stream_names=None,
-        vis_encode_type="default",
+        vis_encode_type=EncoderType.DEFAUL,
     ):
         """
         Takes a Unity environment and model-specific hyper-parameters and returns the
diff --git a/ml-agents/mlagents/trainers/ppo/policy.py b/ml-agents/mlagents/trainers/ppo/policy.py
index 621a613227..706b506268 100644
--- a/ml-agents/mlagents/trainers/ppo/policy.py
+++ b/ml-agents/mlagents/trainers/ppo/policy.py
@@ -5,6 +5,7 @@
 
 from mlagents.envs.timers import timed
 from mlagents.trainers import BrainInfo, ActionInfo
+from mlagents.trainers.models import EncoderType
 from mlagents.trainers.ppo.models import PPOModel
 from mlagents.trainers.tf_policy import TFPolicy
 from mlagents.trainers.components.reward_signals.reward_signal_factory import (
@@ -44,7 +45,7 @@ def __init__(self, seed, brain, trainer_params, is_training, load):
                 m_size=self.m_size,
                 seed=seed,
                 stream_names=list(reward_signal_configs.keys()),
-                vis_encode_type=trainer_params["vis_encode_type"],
+                vis_encode_type=EncoderType(trainer_params.get("vis_encode_type", "default")),
             )
             self.model.create_ppo_optimizer()
 

From 85158bae5854d36c602f64af86cf3a453bdf2dc6 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Mon, 22 Jul 2019 13:38:53 -0700
Subject: [PATCH 13/27] add default

---
 ml-agents/mlagents/trainers/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index f242cca097..673262b6b5 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -435,7 +435,7 @@ def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
         )
 
     def create_observation_streams(
-        self, num_streams: int, h_size: int, num_layers: int, vis_encode_type: EncoderType
+        self, num_streams: int, h_size: int, num_layers: int, vis_encode_type: EncoderType=EncoderType.DEFAUL
     ) -> tf.Tensor:
         """
         Creates encoding stream for observations.

From ff2167c891a23d431d0729fa0832d8a8daf266bd Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Mon, 22 Jul 2019 13:40:22 -0700
Subject: [PATCH 14/27] reformatting

---
 ml-agents/mlagents/trainers/models.py     | 14 +++++++++++---
 ml-agents/mlagents/trainers/ppo/policy.py |  4 +++-
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index 673262b6b5..11a1d508c8 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -435,7 +435,11 @@ def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
         )
 
     def create_observation_streams(
-        self, num_streams: int, h_size: int, num_layers: int, vis_encode_type: EncoderType=EncoderType.DEFAUL
+        self,
+        num_streams: int,
+        h_size: int,
+        num_layers: int,
+        vis_encode_type: EncoderType = EncoderType.DEFAUL,
     ) -> tf.Tensor:
         """
         Creates encoding stream for observations.
@@ -558,7 +562,9 @@ def create_value_heads(self, stream_names, hidden_input):
             self.value_heads[name] = value
         self.value = tf.reduce_mean(list(self.value_heads.values()), 0)
 
-    def create_cc_actor_critic(self, h_size: int, num_layers: int, vis_encode_type: EncoderType):
+    def create_cc_actor_critic(
+        self, h_size: int, num_layers: int, vis_encode_type: EncoderType
+    ):
         """
         Creates Continuous control actor-critic model.
         :param h_size: Size of hidden linear layers.
@@ -645,7 +651,9 @@ def create_cc_actor_critic(self, h_size: int, num_layers: int, vis_encode_type:
             (tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
         )
 
-    def create_dc_actor_critic(self, h_size: int, num_layers: int, vis_encode_type: EncoderType):
+    def create_dc_actor_critic(
+        self, h_size: int, num_layers: int, vis_encode_type: EncoderType
+    ):
         """
         Creates Discrete control actor-critic model.
         :param h_size: Size of hidden linear layers.
diff --git a/ml-agents/mlagents/trainers/ppo/policy.py b/ml-agents/mlagents/trainers/ppo/policy.py
index 706b506268..e4926c4868 100644
--- a/ml-agents/mlagents/trainers/ppo/policy.py
+++ b/ml-agents/mlagents/trainers/ppo/policy.py
@@ -45,7 +45,9 @@ def __init__(self, seed, brain, trainer_params, is_training, load):
                 m_size=self.m_size,
                 seed=seed,
                 stream_names=list(reward_signal_configs.keys()),
-                vis_encode_type=EncoderType(trainer_params.get("vis_encode_type", "default")),
+                vis_encode_type=EncoderType(
+                    trainer_params.get("vis_encode_type", "default")
+                ),
             )
             self.model.create_ppo_optimizer()
 

From 117fd88405dc427ae361629f2b11e19f76d91334 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Mon, 22 Jul 2019 13:44:03 -0700
Subject: [PATCH 15/27] small fix

---
 ml-agents/mlagents/trainers/ppo/trainer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index a5485f8ed3..9e44b65c8f 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -53,7 +53,6 @@ def __init__(
             "memory_size",
             "model_path",
             "reward_signals",
-            "vis_encode_type",
         ]
         self.check_param_keys()
 

From 8952bba83630233ab3d037474b4b6d739f79fd32 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Mon, 22 Jul 2019 14:07:01 -0700
Subject: [PATCH 16/27] add vis_encode_type param option to doc

---
 docs/Training-PPO.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/Training-PPO.md b/docs/Training-PPO.md
index b458d60890..9812fe0879 100644
--- a/docs/Training-PPO.md
+++ b/docs/Training-PPO.md
@@ -166,6 +166,16 @@ variables, this should be larger.
 
 Typical Range: `32` - `512`
 
+### (Optional) Visual Encoder Type
+
+`vis_encode_type` correspond to the encoder type for encoding visual observations.
+Valid options include:
+* `default` (default): a simple encoder consists of two CNN layer
+* `nature_cnn`: Mnih's CNN implementation (https://www.nature.com/articles/nature14236)
+* `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561)
+
+Options: `default`, `resnet`, `nature_cnn`
+
 ## (Optional) Recurrent Neural Network Hyperparameters
 
 The below hyperparameters are only used when `use_recurrent` is set to true.

From 7de954d66e7628057e226e40ae501296f81ac261 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Mon, 22 Jul 2019 14:15:15 -0700
Subject: [PATCH 17/27] type annotation

---
 ml-agents/mlagents/trainers/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index 11a1d508c8..5aca69a987 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -564,7 +564,7 @@ def create_value_heads(self, stream_names, hidden_input):
 
     def create_cc_actor_critic(
         self, h_size: int, num_layers: int, vis_encode_type: EncoderType
-    ):
+    ) -> None:
         """
         Creates Continuous control actor-critic model.
         :param h_size: Size of hidden linear layers.
@@ -653,7 +653,7 @@ def create_cc_actor_critic(
 
     def create_dc_actor_critic(
         self, h_size: int, num_layers: int, vis_encode_type: EncoderType
-    ):
+    ) -> None:
         """
         Creates Discrete control actor-critic model.
         :param h_size: Size of hidden linear layers.

From b444d1f1f774dd470e3f1b20fe2d321ef84ffed3 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Mon, 22 Jul 2019 14:18:30 -0700
Subject: [PATCH 18/27] fix typo

---
 ml-agents/mlagents/trainers/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index 5aca69a987..0c41bac099 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -14,7 +14,7 @@
 class EncoderType(Enum):
     RESNET = "resnet"
     NATURE_CNN = "nature_cnn"
-    DEFAUL = "default"
+    DEFAULT = "default"
 
 
 class LearningModel(object):

From dce059ce5b0be70a98c60ef44ab4288178205553 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Mon, 22 Jul 2019 14:22:20 -0700
Subject: [PATCH 19/27] fix typo

---
 ml-agents/mlagents/trainers/models.py     | 2 +-
 ml-agents/mlagents/trainers/ppo/models.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index 0c41bac099..2f0983ab1d 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -439,7 +439,7 @@ def create_observation_streams(
         num_streams: int,
         h_size: int,
         num_layers: int,
-        vis_encode_type: EncoderType = EncoderType.DEFAUL,
+        vis_encode_type: EncoderType = EncoderType.DEFAULT,
     ) -> tf.Tensor:
         """
         Creates encoding stream for observations.
diff --git a/ml-agents/mlagents/trainers/ppo/models.py b/ml-agents/mlagents/trainers/ppo/models.py
index d8fab615a9..ca494c5476 100644
--- a/ml-agents/mlagents/trainers/ppo/models.py
+++ b/ml-agents/mlagents/trainers/ppo/models.py
@@ -22,7 +22,7 @@ def __init__(
         m_size=None,
         seed=0,
         stream_names=None,
-        vis_encode_type=EncoderType.DEFAUL,
+        vis_encode_type=EncoderType.DEFAULT,
     ):
         """
         Takes a Unity environment and model-specific hyper-parameters and returns the

From e2c8bdfd594fd166d8ec6e9d25afd53ac1549658 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Mon, 22 Jul 2019 15:08:04 -0700
Subject: [PATCH 20/27] remove print

---
 ml-agents/mlagents/trainers/models.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index 2f0983ab1d..5dc03d98c7 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -284,7 +284,6 @@ def create_nature_cnn_visual_observation_encoder(
         :param reuse: Whether to re-use the weights within the same scope.
         :return: List of hidden layer tensors.
         """
-        print("creating nature cnn")
         with tf.variable_scope(scope):
             conv1 = tf.layers.conv2d(
                 image_input,
@@ -340,7 +339,6 @@ def create_resnet_visual_observation_encoder(
         :param reuse: Whether to re-use the weights within the same scope.
         :return: List of hidden layer tensors.
         """
-        print("creating resnet")
         n_channels = [16, 32, 32]  # channel for each stack
         n_blocks = 2  # number of residual blocks
         with tf.variable_scope(scope):

From 422969672a0a19613946180f33b22d094930c201 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Mon, 22 Jul 2019 15:08:17 -0700
Subject: [PATCH 21/27] modify doc

---
 docs/Training-PPO.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/Training-PPO.md b/docs/Training-PPO.md
index 9812fe0879..029bb73ba0 100644
--- a/docs/Training-PPO.md
+++ b/docs/Training-PPO.md
@@ -170,9 +170,12 @@ Typical Range: `32` - `512`
 
 `vis_encode_type` correspond to the encoder type for encoding visual observations.
 Valid options include:
-* `default` (default): a simple encoder consists of two CNN layer
-* `nature_cnn`: Mnih's CNN implementation (https://www.nature.com/articles/nature14236)
-* `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561)
+* `default` (default): a simple encoder consists of two convolutional layers
+* `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236), 
+consisting of three convolutional layers
+* `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561),
+consisting of three stacked layers, each with two risidual blocks, making a 
+much larger network than the other two.
 
 Options: `default`, `resnet`, `nature_cnn`
 

From 16ad6e6a3828cd0af337faefd87c6b15492b1f31 Mon Sep 17 00:00:00 2001
From: Ruo-Ping Dong <dongruoping@gmail.com>
Date: Tue, 23 Jul 2019 10:32:41 -0700
Subject: [PATCH 22/27] Update docs/Training-PPO.md

Co-Authored-By: Jonathan Harper <jharper+moar@unity3d.com>
---
 docs/Training-PPO.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Training-PPO.md b/docs/Training-PPO.md
index 029bb73ba0..bb1f4b39a1 100644
--- a/docs/Training-PPO.md
+++ b/docs/Training-PPO.md
@@ -168,7 +168,7 @@ Typical Range: `32` - `512`
 
 ### (Optional) Visual Encoder Type
 
-`vis_encode_type` correspond to the encoder type for encoding visual observations.
+`vis_encode_type` corresponds to the encoder type for encoding visual observations.
 Valid options include:
 * `default` (default): a simple encoder consists of two convolutional layers
 * `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236), 

From c1562a9330be0c5885ce33111bfd90016d49a1c9 Mon Sep 17 00:00:00 2001
From: Ruo-Ping Dong <dongruoping@gmail.com>
Date: Tue, 23 Jul 2019 10:32:50 -0700
Subject: [PATCH 23/27] Update docs/Training-PPO.md

Co-Authored-By: Jonathan Harper <jharper+moar@unity3d.com>
---
 docs/Training-PPO.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Training-PPO.md b/docs/Training-PPO.md
index bb1f4b39a1..6239311003 100644
--- a/docs/Training-PPO.md
+++ b/docs/Training-PPO.md
@@ -170,7 +170,7 @@ Typical Range: `32` - `512`
 
 `vis_encode_type` corresponds to the encoder type for encoding visual observations.
 Valid options include:
-* `default` (default): a simple encoder consists of two convolutional layers
+* `default` (default): a simple encoder which consists of two convolutional layers
 * `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236), 
 consisting of three convolutional layers
 * `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561),

From 2af95fe1471983265816ca9ff67a7bcd2de95379 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Tue, 23 Jul 2019 13:23:32 -0700
Subject: [PATCH 24/27] change default to simple

---
 docs/Training-PPO.md                                        | 4 ++--
 ml-agents/mlagents/trainers/models.py                       | 6 +++---
 ml-agents/mlagents/trainers/ppo/models.py                   | 2 +-
 ml-agents/mlagents/trainers/ppo/policy.py                   | 2 +-
 ml-agents/mlagents/trainers/tests/test_bcmodule.py          | 2 +-
 .../trainers/tests/test_environments/test_simple.py         | 2 +-
 ml-agents/mlagents/trainers/tests/test_ppo.py               | 4 ++--
 ml-agents/mlagents/trainers/tests/test_reward_signals.py    | 2 +-
 8 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/docs/Training-PPO.md b/docs/Training-PPO.md
index 6239311003..6732a8cdcf 100644
--- a/docs/Training-PPO.md
+++ b/docs/Training-PPO.md
@@ -170,14 +170,14 @@ Typical Range: `32` - `512`
 
 `vis_encode_type` corresponds to the encoder type for encoding visual observations.
 Valid options include:
-* `default` (default): a simple encoder which consists of two convolutional layers
+* `simple` (default): a simple encoder which consists of two convolutional layers
 * `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236), 
 consisting of three convolutional layers
 * `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561),
 consisting of three stacked layers, each with two risidual blocks, making a 
 much larger network than the other two.
 
-Options: `default`, `resnet`, `nature_cnn`
+Options: `simple`, `nature_cnn`, `resnet`
 
 ## (Optional) Recurrent Neural Network Hyperparameters
 
diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index 5dc03d98c7..880eaa315e 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -12,9 +12,9 @@
 
 
 class EncoderType(Enum):
-    RESNET = "resnet"
+    SIMPLE = "simple"
     NATURE_CNN = "nature_cnn"
-    DEFAULT = "default"
+    RESNET = "resnet"
 
 
 class LearningModel(object):
@@ -437,7 +437,7 @@ def create_observation_streams(
         num_streams: int,
         h_size: int,
         num_layers: int,
-        vis_encode_type: EncoderType = EncoderType.DEFAULT,
+        vis_encode_type: EncoderType = EncoderType.SIMPLE,
     ) -> tf.Tensor:
         """
         Creates encoding stream for observations.
diff --git a/ml-agents/mlagents/trainers/ppo/models.py b/ml-agents/mlagents/trainers/ppo/models.py
index ca494c5476..225387c132 100644
--- a/ml-agents/mlagents/trainers/ppo/models.py
+++ b/ml-agents/mlagents/trainers/ppo/models.py
@@ -22,7 +22,7 @@ def __init__(
         m_size=None,
         seed=0,
         stream_names=None,
-        vis_encode_type=EncoderType.DEFAULT,
+        vis_encode_type=EncoderType.SIMPLE,
     ):
         """
         Takes a Unity environment and model-specific hyper-parameters and returns the
diff --git a/ml-agents/mlagents/trainers/ppo/policy.py b/ml-agents/mlagents/trainers/ppo/policy.py
index c3ec75596c..477f742037 100644
--- a/ml-agents/mlagents/trainers/ppo/policy.py
+++ b/ml-agents/mlagents/trainers/ppo/policy.py
@@ -46,7 +46,7 @@ def __init__(self, seed, brain, trainer_params, is_training, load):
                 seed=seed,
                 stream_names=list(reward_signal_configs.keys()),
                 vis_encode_type=EncoderType(
-                    trainer_params.get("vis_encode_type", "default")
+                    trainer_params.get("vis_encode_type", "simple")
                 ),
             )
             self.model.create_ppo_optimizer()
diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
index 395308c3c4..5f2bf58e25 100644
--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
@@ -29,7 +29,7 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
-        vis_encode_type: default
+        vis_encode_type: simple
         memory_size: 8
         pretraining:
           demo_path: ./demos/ExpertPyramid.demo
diff --git a/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py b/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py
index 4a92c970a5..d6402d50ce 100644
--- a/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py
+++ b/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py
@@ -144,7 +144,7 @@ def test_simple():
                 extrinsic:
                     strength: 1.0
                     gamma: 0.99
-            vis_encode_type: default
+            vis_encode_type: simple
     """
     # Create controller and begin training.
     with tempfile.TemporaryDirectory() as dir:
diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py
index baecbb16de..e70b0c04ad 100644
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
@@ -32,7 +32,7 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
-        vis_encode_type: default
+        vis_encode_type: simple
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1
@@ -328,7 +328,7 @@ def test_trainer_increment_step():
         "sequence_length": 64,
         "summary_freq": 3000,
         "use_recurrent": False,
-        "vis_encode_type": "default",
+        "vis_encode_type": "simple",
         "use_curiosity": False,
         "curiosity_strength": 0.01,
         "curiosity_enc_size": 128,
diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
index dde79b3ece..1ed0903153 100644
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
@@ -35,7 +35,7 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
-        vis_encode_type: default
+        vis_encode_type: simple
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1

From 8b6481e4e32057519fe1bed265ac7ab7df72ea33 Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Tue, 23 Jul 2019 18:09:25 -0700
Subject: [PATCH 25/27] remove default

---
 ml-agents/mlagents/trainers/ppo/trainer.py                      | 1 -
 ml-agents/mlagents/trainers/tests/test_bcmodule.py              | 1 -
 .../mlagents/trainers/tests/test_environments/test_simple.py    | 1 -
 ml-agents/mlagents/trainers/tests/test_ppo.py                   | 2 --
 ml-agents/mlagents/trainers/tests/test_reward_signals.py        | 1 -
 5 files changed, 6 deletions(-)

diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index 6a1d30a8f2..b74f15efa4 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -53,7 +53,6 @@ def __init__(
             "memory_size",
             "model_path",
             "reward_signals",
-            "vis_encode_type",
         ]
         self.check_param_keys()
 
diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
index 5f2bf58e25..0eee0f4d2e 100644
--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
@@ -29,7 +29,6 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
-        vis_encode_type: simple
         memory_size: 8
         pretraining:
           demo_path: ./demos/ExpertPyramid.demo
diff --git a/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py b/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py
index d6402d50ce..4aa28cac90 100644
--- a/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py
+++ b/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py
@@ -144,7 +144,6 @@ def test_simple():
                 extrinsic:
                     strength: 1.0
                     gamma: 0.99
-            vis_encode_type: simple
     """
     # Create controller and begin training.
     with tempfile.TemporaryDirectory() as dir:
diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py
index e70b0c04ad..939a9864b2 100644
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
@@ -32,7 +32,6 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
-        vis_encode_type: simple
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1
@@ -328,7 +327,6 @@ def test_trainer_increment_step():
         "sequence_length": 64,
         "summary_freq": 3000,
         "use_recurrent": False,
-        "vis_encode_type": "simple",
         "use_curiosity": False,
         "curiosity_strength": 0.01,
         "curiosity_enc_size": 128,
diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
index 1ed0903153..1b695788c3 100644
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
@@ -35,7 +35,6 @@ def dummy_config():
         sequence_length: 64
         summary_freq: 1000
         use_recurrent: false
-        vis_encode_type: simple
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1

From 7ee74a57edc26a343640c053957fda1ee5da4c9d Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Fri, 26 Jul 2019 11:46:26 -0700
Subject: [PATCH 26/27] fix trainer_config.yaml

---
 config/trainer_config.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml
index d09c886d14..a13b8ffbb6 100644
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml
@@ -16,7 +16,6 @@ default:
     sequence_length: 64
     summary_freq: 1000
     use_recurrent: false
-    vis_encode_type: default
     reward_signals: 
         extrinsic:
             strength: 1.0

From 2911a0fb9cf84c5674efd2abd74937b2cc10c86c Mon Sep 17 00:00:00 2001
From: dongruoping <dongruoping@gmail.com>
Date: Fri, 26 Jul 2019 11:58:26 -0700
Subject: [PATCH 27/27] fix default

---
 config/trainer_config.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml
index a13b8ffbb6..9a60aefcf4 100644
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml
@@ -16,6 +16,7 @@ default:
     sequence_length: 64
     summary_freq: 1000
     use_recurrent: false
+    vis_encode_type: simple
     reward_signals: 
         extrinsic:
             strength: 1.0