From 5f1f63e13c3c1e17c3a5256b93d693c8d5b51a26 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Wed, 3 Jul 2019 11:09:23 -0700 Subject: [PATCH 01/27] branch first commit --- ml-agents-envs/mlagents/envs/environment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py index 8800d5589a..4e50e87aef 100644 --- a/ml-agents-envs/mlagents/envs/environment.py +++ b/ml-agents-envs/mlagents/envs/environment.py @@ -66,7 +66,7 @@ def __init__( atexit.register(self._close) self.port = base_port + worker_id self._buffer_size = 12000 - self._version_ = "API-8" + self._version_ = "API-6" self._loaded = ( False ) # If true, this means the environment was successfully loaded From 02778b8347186992ba592826ee1a32bee3cc27f5 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Wed, 3 Jul 2019 16:53:51 -0700 Subject: [PATCH 02/27] add resnet --- ml-agents/mlagents/trainers/models.py | 67 +++++++++++++++++++-------- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index 7b66e5a221..057bcf7653 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -212,26 +212,55 @@ def create_visual_observation_encoder( :param num_layers: number of hidden layers to create. :return: List of hidden layer tensors. """ + # channel for each stack + n_channels = [16, 32, 32] + # number of residual blocks + n_blocks = 2 + with tf.variable_scope(scope): - conv1 = tf.layers.conv2d( - image_input, - 16, - kernel_size=[8, 8], - strides=[4, 4], - activation=tf.nn.elu, - reuse=reuse, - name="conv_1", - ) - conv2 = tf.layers.conv2d( - conv1, - 32, - kernel_size=[4, 4], - strides=[2, 2], - activation=tf.nn.elu, - reuse=reuse, - name="conv_2", - ) - hidden = c_layers.flatten(conv2) + hidden = image_input + for i, ch in enumerate(n_channels): + hidden = tf.layers.conv2d( + hidden, + ch, + kernel_size=[3, 3], + strides=1, + reuse=reuse, + name="layer%dconv_1" % i, + ) + hidden = tf.layers.max_pooling2d( + hidden, + pool_size=[3, 3], + strides=2, + padding='same', + ) + # create residual blocks + for j in range(n_blocks): + block_input = hidden + hidden = tf.nn.relu(hidden) + hidden = tf.layers.conv2d( + hidden, + ch, + kernel_size=[3, 3], + strides=1, + padding='same', + reuse=reuse, + name="layer%d_%d_conv1" % (i, j), + ) + hidden = tf.nn.relu(hidden) + hidden = tf.layers.conv2d( + hidden, + ch, + kernel_size=[3, 3], + strides=1, + padding='same', + reuse=reuse, + name="layer%d_%d_conv2" % (i, j), + ) + hidden = tf.add(block_input, hidden) + + hidden = tf.nn.relu(hidden) + hidden = c_layers.flatten(hidden) with tf.variable_scope(scope + "/" + "flat_encoding"): hidden_flat = self.create_vector_observation_encoder( From 74b7acd1a20db99de779be701d57895b1fc0204d Mon Sep 17 00:00:00 2001 From: dongruoping Date: Thu, 18 Jul 2019 16:32:51 -0700 Subject: [PATCH 03/27] add different types of visual encoder --- ml-agents/mlagents/trainers/models.py | 125 ++++++++++++++++++-------- 1 file changed, 86 insertions(+), 39 deletions(-) diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index a7045e6975..e9064ce316 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -220,6 +220,7 @@ def create_visual_observation_encoder( num_layers: int, scope: str, reuse: bool, + encoder_type=None ) -> tf.Tensor: """ Builds a set of visual (CNN) encoders. @@ -231,55 +232,101 @@ def create_visual_observation_encoder( :param num_layers: number of hidden layers to create. :return: List of hidden layer tensors. """ - # channel for each stack - n_channels = [16, 32, 32] - # number of residual blocks - n_blocks = 2 - with tf.variable_scope(scope): - hidden = image_input - for i, ch in enumerate(n_channels): - hidden = tf.layers.conv2d( - hidden, - ch, - kernel_size=[3, 3], - strides=1, - reuse=reuse, - name="layer%dconv_1" % i, - ) - hidden = tf.layers.max_pooling2d( - hidden, - pool_size=[3, 3], - strides=2, - padding='same', - ) - # create residual blocks - for j in range(n_blocks): - block_input = hidden - hidden = tf.nn.relu(hidden) + if encoder_type == "resnet": + n_channels = [16, 32, 32] # channel for each stack + n_blocks = 2 # number of residual blocks + hidden = image_input + for i, ch in enumerate(n_channels): hidden = tf.layers.conv2d( hidden, ch, kernel_size=[3, 3], - strides=1, - padding='same', + strides=[1, 1], reuse=reuse, - name="layer%d_%d_conv1" % (i, j), + name="layer%dconv_1" % i, ) - hidden = tf.nn.relu(hidden) - hidden = tf.layers.conv2d( + hidden = tf.layers.max_pooling2d( hidden, - ch, - kernel_size=[3, 3], - strides=1, + pool_size=[3, 3], + strides=[2, 2], padding='same', - reuse=reuse, - name="layer%d_%d_conv2" % (i, j), ) - hidden = tf.add(block_input, hidden) - - hidden = tf.nn.relu(hidden) - hidden = c_layers.flatten(hidden) + # create residual blocks + for j in range(n_blocks): + block_input = hidden + hidden = tf.nn.relu(hidden) + hidden = tf.layers.conv2d( + hidden, + ch, + kernel_size=[3, 3], + strides=[1, 1], + padding='same', + reuse=reuse, + name="layer%d_%d_conv1" % (i, j), + ) + hidden = tf.nn.relu(hidden) + hidden = tf.layers.conv2d( + hidden, + ch, + kernel_size=[3, 3], + strides=[1, 1], + padding='same', + reuse=reuse, + name="layer%d_%d_conv2" % (i, j), + ) + hidden = tf.add(block_input, hidden) + hidden = tf.nn.relu(hidden) + hidden = c_layers.flatten(hidden) + elif encoder_type == 'nature_cnn': + conv1 = tf.layers.conv2d( + image_input, + 32, + kernel_size=[8, 8], + strides=[4, 4], + activation=tf.nn.elu, + reuse=reuse, + name="conv_1" + ) + conv2 = tf.layers.conv2d( + conv1, + 64, + kernel_size=[4, 4], + strides=[2, 2], + activation=tf.nn.elu, + reuse=reuse, + name="conv_2" + ) + conv3 = tf.layers.conv2d( + conv2, + 64, + kernel_size=[3, 3], + strides=[1, 1], + activation=tf.nn.elu, + reuse=reuse, + name="conv_3" + ) + hidden = c_layers.flatten(conv3) + else: + conv1 = tf.layers.conv2d( + image_input, + 16, + kernel_size=[8, 8], + strides=[4, 4], + activation=tf.nn.elu, + reuse=reuse, + name="conv_1", + ) + conv2 = tf.layers.conv2d( + conv1, + 32, + kernel_size=[4, 4], + strides=[2, 2], + activation=tf.nn.elu, + reuse=reuse, + name="conv_2", + ) + hidden = c_layers.flatten(conv2) with tf.variable_scope(scope + "/" + "flat_encoding"): hidden_flat = self.create_vector_observation_encoder( From 0d32a08217e5f373a0f91f78b78d485f85784ecc Mon Sep 17 00:00:00 2001 From: dongruoping Date: Thu, 18 Jul 2019 16:38:03 -0700 Subject: [PATCH 04/27] fix bug --- ml-agents-envs/mlagents/envs/environment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py index 9e549ca94b..eb8993eca0 100644 --- a/ml-agents-envs/mlagents/envs/environment.py +++ b/ml-agents-envs/mlagents/envs/environment.py @@ -68,7 +68,7 @@ def __init__( atexit.register(self._close) self.port = base_port + worker_id self._buffer_size = 12000 - self._version_ = "API-6" + self._version_ = "API-8" self._loaded = ( False ) # If true, this means the environment was successfully loaded From 17ecb8d2e978f38211e11f89e1253210c3940e3f Mon Sep 17 00:00:00 2001 From: dongruoping Date: Thu, 18 Jul 2019 16:41:01 -0700 Subject: [PATCH 05/27] reformatting --- ml-agents/mlagents/trainers/models.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index e9064ce316..09e86f1779 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -220,7 +220,7 @@ def create_visual_observation_encoder( num_layers: int, scope: str, reuse: bool, - encoder_type=None + encoder_type=None, ) -> tf.Tensor: """ Builds a set of visual (CNN) encoders. @@ -234,8 +234,8 @@ def create_visual_observation_encoder( """ with tf.variable_scope(scope): if encoder_type == "resnet": - n_channels = [16, 32, 32] # channel for each stack - n_blocks = 2 # number of residual blocks + n_channels = [16, 32, 32] # channel for each stack + n_blocks = 2 # number of residual blocks hidden = image_input for i, ch in enumerate(n_channels): hidden = tf.layers.conv2d( @@ -247,10 +247,7 @@ def create_visual_observation_encoder( name="layer%dconv_1" % i, ) hidden = tf.layers.max_pooling2d( - hidden, - pool_size=[3, 3], - strides=[2, 2], - padding='same', + hidden, pool_size=[3, 3], strides=[2, 2], padding="same" ) # create residual blocks for j in range(n_blocks): @@ -261,7 +258,7 @@ def create_visual_observation_encoder( ch, kernel_size=[3, 3], strides=[1, 1], - padding='same', + padding="same", reuse=reuse, name="layer%d_%d_conv1" % (i, j), ) @@ -271,14 +268,14 @@ def create_visual_observation_encoder( ch, kernel_size=[3, 3], strides=[1, 1], - padding='same', + padding="same", reuse=reuse, name="layer%d_%d_conv2" % (i, j), ) hidden = tf.add(block_input, hidden) hidden = tf.nn.relu(hidden) hidden = c_layers.flatten(hidden) - elif encoder_type == 'nature_cnn': + elif encoder_type == "nature_cnn": conv1 = tf.layers.conv2d( image_input, 32, @@ -286,7 +283,7 @@ def create_visual_observation_encoder( strides=[4, 4], activation=tf.nn.elu, reuse=reuse, - name="conv_1" + name="conv_1", ) conv2 = tf.layers.conv2d( conv1, @@ -295,7 +292,7 @@ def create_visual_observation_encoder( strides=[2, 2], activation=tf.nn.elu, reuse=reuse, - name="conv_2" + name="conv_2", ) conv3 = tf.layers.conv2d( conv2, @@ -304,7 +301,7 @@ def create_visual_observation_encoder( strides=[1, 1], activation=tf.nn.elu, reuse=reuse, - name="conv_3" + name="conv_3", ) hidden = c_layers.flatten(conv3) else: From 52b78d71a54564a867a89446d510c52a6aed2c2d Mon Sep 17 00:00:00 2001 From: dongruoping Date: Thu, 18 Jul 2019 16:48:13 -0700 Subject: [PATCH 06/27] fix type annotation --- ml-agents/mlagents/trainers/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index 09e86f1779..e3fcdf5f0f 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -220,7 +220,7 @@ def create_visual_observation_encoder( num_layers: int, scope: str, reuse: bool, - encoder_type=None, + encoder_type: str=None, ) -> tf.Tensor: """ Builds a set of visual (CNN) encoders. @@ -230,6 +230,7 @@ def create_visual_observation_encoder( :param h_size: Hidden layer size. :param activation: What type of activation function to use for layers. :param num_layers: number of hidden layers to create. + :param encoder_type: type of visual encoder (default/nature_cnn/resnet) :return: List of hidden layer tensors. """ with tf.variable_scope(scope): From fae3b10a51f25f0ba1b49e63eafa1a6443262534 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Thu, 18 Jul 2019 16:52:15 -0700 Subject: [PATCH 07/27] reformatting --- ml-agents/mlagents/trainers/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index e3fcdf5f0f..5f6ba3b666 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -220,7 +220,7 @@ def create_visual_observation_encoder( num_layers: int, scope: str, reuse: bool, - encoder_type: str=None, + encoder_type: str = None, ) -> tf.Tensor: """ Builds a set of visual (CNN) encoders. From f9a226e1b4f420a02dcab7f36c0d44026eb3635f Mon Sep 17 00:00:00 2001 From: dongruoping Date: Fri, 19 Jul 2019 13:24:35 -0700 Subject: [PATCH 08/27] Add visual encoder option. Seperate create function for different encoder. --- config/trainer_config.yaml | 1 + ml-agents/mlagents/trainers/models.py | 278 ++++++++++++++------- ml-agents/mlagents/trainers/ppo/models.py | 5 +- ml-agents/mlagents/trainers/ppo/policy.py | 1 + ml-agents/mlagents/trainers/ppo/trainer.py | 1 + 5 files changed, 187 insertions(+), 99 deletions(-) diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml index fcda3f6a9a..1430ac2f88 100644 --- a/config/trainer_config.yaml +++ b/config/trainer_config.yaml @@ -16,6 +16,7 @@ default: sequence_length: 64 summary_freq: 1000 use_recurrent: false + vis_encode_type: "default" reward_signals: extrinsic: strength: 1.0 diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index 5f6ba3b666..b467e31e22 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -1,4 +1,5 @@ import logging +from enum import Enum from typing import Any, Callable, Dict import numpy as np @@ -10,6 +11,12 @@ ActivationFunction = Callable[[tf.Tensor], tf.Tensor] +class EncoderType(Enum): + RESNET = "resnet" + NATURE_CNN = "nature_cnn" + DEFAUL = "default" + + class LearningModel(object): _version_number_ = 2 @@ -220,111 +227,162 @@ def create_visual_observation_encoder( num_layers: int, scope: str, reuse: bool, - encoder_type: str = None, ) -> tf.Tensor: """ - Builds a set of visual (CNN) encoders. + Builds a set of resnet visual encoders. + :param image_input: The placeholder for the image input to use. + :param h_size: Hidden layer size. + :param activation: What type of activation function to use for layers. + :param num_layers: number of hidden layers to create. + :param scope: The scope of the graph within which to create the ops. :param reuse: Whether to re-use the weights within the same scope. + :return: List of hidden layer tensors. + """ + with tf.variable_scope(scope): + conv1 = tf.layers.conv2d( + image_input, + 16, + kernel_size=[8, 8], + strides=[4, 4], + activation=tf.nn.elu, + reuse=reuse, + name="conv_1", + ) + conv2 = tf.layers.conv2d( + conv1, + 32, + kernel_size=[4, 4], + strides=[2, 2], + activation=tf.nn.elu, + reuse=reuse, + name="conv_2", + ) + hidden = c_layers.flatten(conv2) + + with tf.variable_scope(scope + "/" + "flat_encoding"): + hidden_flat = self.create_vector_observation_encoder( + hidden, h_size, activation, num_layers, scope, reuse + ) + return hidden_flat + + def create_nature_cnn_visual_observation_encoder( + self, + image_input: tf.Tensor, + h_size: int, + activation: ActivationFunction, + num_layers: int, + scope: str, + reuse: bool, + ) -> tf.Tensor: + """ + Builds a set of resnet visual encoders. + :param image_input: The placeholder for the image input to use. + :param h_size: Hidden layer size. + :param activation: What type of activation function to use for layers. + :param num_layers: number of hidden layers to create. :param scope: The scope of the graph within which to create the ops. + :param reuse: Whether to re-use the weights within the same scope. + :return: List of hidden layer tensors. + """ + print("creating nature cnn") + with tf.variable_scope(scope): + conv1 = tf.layers.conv2d( + image_input, + 32, + kernel_size=[8, 8], + strides=[4, 4], + activation=tf.nn.elu, + reuse=reuse, + name="conv_1", + ) + conv2 = tf.layers.conv2d( + conv1, + 64, + kernel_size=[4, 4], + strides=[2, 2], + activation=tf.nn.elu, + reuse=reuse, + name="conv_2", + ) + conv3 = tf.layers.conv2d( + conv2, + 64, + kernel_size=[3, 3], + strides=[1, 1], + activation=tf.nn.elu, + reuse=reuse, + name="conv_3", + ) + hidden = c_layers.flatten(conv3) + + with tf.variable_scope(scope + "/" + "flat_encoding"): + hidden_flat = self.create_vector_observation_encoder( + hidden, h_size, activation, num_layers, scope, reuse + ) + return hidden_flat + + def create_resnet_visual_observation_encoder( + self, + image_input: tf.Tensor, + h_size: int, + activation: ActivationFunction, + num_layers: int, + scope: str, + reuse: bool, + ) -> tf.Tensor: + """ + Builds a set of resnet visual encoders. :param image_input: The placeholder for the image input to use. :param h_size: Hidden layer size. :param activation: What type of activation function to use for layers. :param num_layers: number of hidden layers to create. - :param encoder_type: type of visual encoder (default/nature_cnn/resnet) + :param scope: The scope of the graph within which to create the ops. + :param reuse: Whether to re-use the weights within the same scope. :return: List of hidden layer tensors. """ + print("creating resnet") + n_channels = [16, 32, 32] # channel for each stack + n_blocks = 2 # number of residual blocks with tf.variable_scope(scope): - if encoder_type == "resnet": - n_channels = [16, 32, 32] # channel for each stack - n_blocks = 2 # number of residual blocks - hidden = image_input - for i, ch in enumerate(n_channels): + hidden = image_input + for i, ch in enumerate(n_channels): + hidden = tf.layers.conv2d( + hidden, + ch, + kernel_size=[3, 3], + strides=[1, 1], + reuse=reuse, + name="layer%dconv_1" % i, + ) + hidden = tf.layers.max_pooling2d( + hidden, pool_size=[3, 3], strides=[2, 2], padding="same" + ) + # create residual blocks + for j in range(n_blocks): + block_input = hidden + hidden = tf.nn.relu(hidden) hidden = tf.layers.conv2d( hidden, ch, kernel_size=[3, 3], strides=[1, 1], + padding="same", reuse=reuse, - name="layer%dconv_1" % i, + name="layer%d_%d_conv1" % (i, j), ) - hidden = tf.layers.max_pooling2d( - hidden, pool_size=[3, 3], strides=[2, 2], padding="same" + hidden = tf.nn.relu(hidden) + hidden = tf.layers.conv2d( + hidden, + ch, + kernel_size=[3, 3], + strides=[1, 1], + padding="same", + reuse=reuse, + name="layer%d_%d_conv2" % (i, j), ) - # create residual blocks - for j in range(n_blocks): - block_input = hidden - hidden = tf.nn.relu(hidden) - hidden = tf.layers.conv2d( - hidden, - ch, - kernel_size=[3, 3], - strides=[1, 1], - padding="same", - reuse=reuse, - name="layer%d_%d_conv1" % (i, j), - ) - hidden = tf.nn.relu(hidden) - hidden = tf.layers.conv2d( - hidden, - ch, - kernel_size=[3, 3], - strides=[1, 1], - padding="same", - reuse=reuse, - name="layer%d_%d_conv2" % (i, j), - ) - hidden = tf.add(block_input, hidden) - hidden = tf.nn.relu(hidden) - hidden = c_layers.flatten(hidden) - elif encoder_type == "nature_cnn": - conv1 = tf.layers.conv2d( - image_input, - 32, - kernel_size=[8, 8], - strides=[4, 4], - activation=tf.nn.elu, - reuse=reuse, - name="conv_1", - ) - conv2 = tf.layers.conv2d( - conv1, - 64, - kernel_size=[4, 4], - strides=[2, 2], - activation=tf.nn.elu, - reuse=reuse, - name="conv_2", - ) - conv3 = tf.layers.conv2d( - conv2, - 64, - kernel_size=[3, 3], - strides=[1, 1], - activation=tf.nn.elu, - reuse=reuse, - name="conv_3", - ) - hidden = c_layers.flatten(conv3) - else: - conv1 = tf.layers.conv2d( - image_input, - 16, - kernel_size=[8, 8], - strides=[4, 4], - activation=tf.nn.elu, - reuse=reuse, - name="conv_1", - ) - conv2 = tf.layers.conv2d( - conv1, - 32, - kernel_size=[4, 4], - strides=[2, 2], - activation=tf.nn.elu, - reuse=reuse, - name="conv_2", - ) - hidden = c_layers.flatten(conv2) + hidden = tf.add(block_input, hidden) + hidden = tf.nn.relu(hidden) + hidden = c_layers.flatten(hidden) with tf.variable_scope(scope + "/" + "flat_encoding"): hidden_flat = self.create_vector_observation_encoder( @@ -376,7 +434,9 @@ def create_discrete_action_masking_layer(all_logits, action_masks, action_size): ), ) - def create_observation_streams(self, num_streams, h_size, num_layers): + def create_observation_streams( + self, num_streams, h_size, num_layers, vis_encode_type + ): """ Creates encoding stream for observations. :param num_streams: Number of streams to create. @@ -400,16 +460,40 @@ def create_observation_streams(self, num_streams, h_size, num_layers): visual_encoders = [] hidden_state, hidden_visual = None, None if self.vis_obs_size > 0: - for j in range(brain.number_visual_observations): - encoded_visual = self.create_visual_observation_encoder( - self.visual_in[j], - h_size, - activation_fn, - num_layers, - "main_graph_{}_encoder{}".format(i, j), - False, - ) - visual_encoders.append(encoded_visual) + vis_encode_type = EncoderType(vis_encode_type) + if vis_encode_type == EncoderType.RESNET: + for j in range(brain.number_visual_observations): + encoded_visual = self.create_resnet_visual_observation_encoder( + self.visual_in[j], + h_size, + activation_fn, + num_layers, + "main_graph_{}_encoder{}".format(i, j), + False, + ) + visual_encoders.append(encoded_visual) + elif vis_encode_type == EncoderType.nature_cnn: + for j in range(brain.number_visual_observations): + encoded_visual = self.create_nature_cnn_visual_observation_encoder( + self.visual_in[j], + h_size, + activation_fn, + num_layers, + "main_graph_{}_encoder{}".format(i, j), + False, + ) + visual_encoders.append(encoded_visual) + else: + for j in range(brain.number_visual_observations): + encoded_visual = self.create_visual_observation_encoder( + self.visual_in[j], + h_size, + activation_fn, + num_layers, + "main_graph_{}_encoder{}".format(i, j), + False, + ) + visual_encoders.append(encoded_visual) hidden_visual = tf.concat(visual_encoders, axis=1) if brain.vector_observation_space_size > 0: hidden_state = self.create_vector_observation_encoder( diff --git a/ml-agents/mlagents/trainers/ppo/models.py b/ml-agents/mlagents/trainers/ppo/models.py index 5526221889..767206d253 100644 --- a/ml-agents/mlagents/trainers/ppo/models.py +++ b/ml-agents/mlagents/trainers/ppo/models.py @@ -22,6 +22,7 @@ def __init__( m_size=None, seed=0, stream_names=None, + vis_encode_type=None, ): """ Takes a Unity environment and model-specific hyper-parameters and returns the @@ -46,10 +47,10 @@ def __init__( if num_layers < 1: num_layers = 1 if brain.vector_action_space_type == "continuous": - self.create_cc_actor_critic(h_size, num_layers) + self.create_cc_actor_critic(h_size, num_layers, vis_encode_type) self.entropy = tf.ones_like(tf.reshape(self.value, [-1])) * self.entropy else: - self.create_dc_actor_critic(h_size, num_layers) + self.create_dc_actor_critic(h_size, num_layers, vis_encode_type) self.create_losses( self.log_probs, self.old_log_probs, diff --git a/ml-agents/mlagents/trainers/ppo/policy.py b/ml-agents/mlagents/trainers/ppo/policy.py index a867cbb34a..621a613227 100644 --- a/ml-agents/mlagents/trainers/ppo/policy.py +++ b/ml-agents/mlagents/trainers/ppo/policy.py @@ -44,6 +44,7 @@ def __init__(self, seed, brain, trainer_params, is_training, load): m_size=self.m_size, seed=seed, stream_names=list(reward_signal_configs.keys()), + vis_encode_type=trainer_params["vis_encode_type"], ) self.model.create_ppo_optimizer() diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index 9e44b65c8f..a5485f8ed3 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -53,6 +53,7 @@ def __init__( "memory_size", "model_path", "reward_signals", + "vis_encode_type", ] self.check_param_keys() From de56c6273c22d95d519b3bdb9fa8f6d8478136ba Mon Sep 17 00:00:00 2001 From: dongruoping Date: Fri, 19 Jul 2019 13:31:23 -0700 Subject: [PATCH 09/27] Add visual encoder option. Seperate create function for different encoder. --- ml-agents/mlagents/trainers/models.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index b467e31e22..a7b4241a86 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -472,7 +472,7 @@ def create_observation_streams( False, ) visual_encoders.append(encoded_visual) - elif vis_encode_type == EncoderType.nature_cnn: + elif vis_encode_type == EncoderType.NATURE_CNN: for j in range(brain.number_visual_observations): encoded_visual = self.create_nature_cnn_visual_observation_encoder( self.visual_in[j], @@ -559,13 +559,15 @@ def create_value_heads(self, stream_names, hidden_input): self.value_heads[name] = value self.value = tf.reduce_mean(list(self.value_heads.values()), 0) - def create_cc_actor_critic(self, h_size, num_layers): + def create_cc_actor_critic(self, h_size, num_layers, vis_encode_type): """ Creates Continuous control actor-critic model. :param h_size: Size of hidden linear layers. :param num_layers: Number of hidden linear layers. """ - hidden_streams = self.create_observation_streams(2, h_size, num_layers) + hidden_streams = self.create_observation_streams( + 2, h_size, num_layers, vis_encode_type + ) if self.use_recurrent: self.memory_in = tf.placeholder( @@ -644,13 +646,15 @@ def create_cc_actor_critic(self, h_size, num_layers): (tf.identity(self.all_old_log_probs)), axis=1, keepdims=True ) - def create_dc_actor_critic(self, h_size, num_layers): + def create_dc_actor_critic(self, h_size, num_layers, vis_encode_type): """ Creates Discrete control actor-critic model. :param h_size: Size of hidden linear layers. :param num_layers: Number of hidden linear layers. """ - hidden_streams = self.create_observation_streams(1, h_size, num_layers) + hidden_streams = self.create_observation_streams( + 1, h_size, num_layers, vis_encode_type + ) hidden = hidden_streams[0] if self.use_recurrent: From e2fc44705513a6347f37d0a1d0cbf5a822121fc5 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Fri, 19 Jul 2019 13:51:24 -0700 Subject: [PATCH 10/27] fix test --- ml-agents/mlagents/trainers/models.py | 2 +- ml-agents/mlagents/trainers/ppo/models.py | 2 +- ml-agents/mlagents/trainers/tests/test_bcmodule.py | 1 + ml-agents/mlagents/trainers/tests/test_ppo.py | 2 ++ ml-agents/mlagents/trainers/tests/test_reward_signals.py | 1 + 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index a7b4241a86..5ede54c21f 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -435,7 +435,7 @@ def create_discrete_action_masking_layer(all_logits, action_masks, action_size): ) def create_observation_streams( - self, num_streams, h_size, num_layers, vis_encode_type + self, num_streams, h_size, num_layers, vis_encode_type="default" ): """ Creates encoding stream for observations. diff --git a/ml-agents/mlagents/trainers/ppo/models.py b/ml-agents/mlagents/trainers/ppo/models.py index 767206d253..afaf80e423 100644 --- a/ml-agents/mlagents/trainers/ppo/models.py +++ b/ml-agents/mlagents/trainers/ppo/models.py @@ -22,7 +22,7 @@ def __init__( m_size=None, seed=0, stream_names=None, - vis_encode_type=None, + vis_encode_type="default", ): """ Takes a Unity environment and model-specific hyper-parameters and returns the diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py index 0eee0f4d2e..b25250ec5f 100644 --- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py +++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py @@ -29,6 +29,7 @@ def dummy_config(): sequence_length: 64 summary_freq: 1000 use_recurrent: false + vis_encode_type: "default" memory_size: 8 pretraining: demo_path: ./demos/ExpertPyramid.demo diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py index 939a9864b2..c9dfd0c792 100644 --- a/ml-agents/mlagents/trainers/tests/test_ppo.py +++ b/ml-agents/mlagents/trainers/tests/test_ppo.py @@ -32,6 +32,7 @@ def dummy_config(): sequence_length: 64 summary_freq: 1000 use_recurrent: false + vis_encode_type: "default" memory_size: 8 curiosity_strength: 0.0 curiosity_enc_size: 1 @@ -327,6 +328,7 @@ def test_trainer_increment_step(): "sequence_length": 64, "summary_freq": 3000, "use_recurrent": False, + "vis_encode_type": "default", "use_curiosity": False, "curiosity_strength": 0.01, "curiosity_enc_size": 128, diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py index 1b695788c3..cc99e72477 100644 --- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py +++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py @@ -35,6 +35,7 @@ def dummy_config(): sequence_length: 64 summary_freq: 1000 use_recurrent: false + vis_encode_type: "default" memory_size: 8 curiosity_strength: 0.0 curiosity_enc_size: 1 From d088a02e15af37e90de40399cdd6b7b0472559d7 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Fri, 19 Jul 2019 15:01:37 -0700 Subject: [PATCH 11/27] remove redundant quotes --- config/trainer_config.yaml | 2 +- ml-agents/mlagents/trainers/tests/test_bcmodule.py | 2 +- ml-agents/mlagents/trainers/tests/test_ppo.py | 2 +- ml-agents/mlagents/trainers/tests/test_reward_signals.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml index 1430ac2f88..2bf0fbc347 100644 --- a/config/trainer_config.yaml +++ b/config/trainer_config.yaml @@ -16,7 +16,7 @@ default: sequence_length: 64 summary_freq: 1000 use_recurrent: false - vis_encode_type: "default" + vis_encode_type: default reward_signals: extrinsic: strength: 1.0 diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py index b25250ec5f..395308c3c4 100644 --- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py +++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py @@ -29,7 +29,7 @@ def dummy_config(): sequence_length: 64 summary_freq: 1000 use_recurrent: false - vis_encode_type: "default" + vis_encode_type: default memory_size: 8 pretraining: demo_path: ./demos/ExpertPyramid.demo diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py index c9dfd0c792..baecbb16de 100644 --- a/ml-agents/mlagents/trainers/tests/test_ppo.py +++ b/ml-agents/mlagents/trainers/tests/test_ppo.py @@ -32,7 +32,7 @@ def dummy_config(): sequence_length: 64 summary_freq: 1000 use_recurrent: false - vis_encode_type: "default" + vis_encode_type: default memory_size: 8 curiosity_strength: 0.0 curiosity_enc_size: 1 diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py index cc99e72477..dde79b3ece 100644 --- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py +++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py @@ -35,7 +35,7 @@ def dummy_config(): sequence_length: 64 summary_freq: 1000 use_recurrent: false - vis_encode_type: "default" + vis_encode_type: default memory_size: 8 curiosity_strength: 0.0 curiosity_enc_size: 1 From 0e70d059d58ea25ab7eb6f2d5f29a3c4a6c0402a Mon Sep 17 00:00:00 2001 From: dongruoping Date: Mon, 22 Jul 2019 13:34:07 -0700 Subject: [PATCH 12/27] minor refactor for vis encoder parameter input --- config/trainer_config.yaml | 1 - ml-agents/mlagents/trainers/models.py | 9 ++++----- ml-agents/mlagents/trainers/ppo/models.py | 4 ++-- ml-agents/mlagents/trainers/ppo/policy.py | 3 ++- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml index 2bf0fbc347..fcda3f6a9a 100644 --- a/config/trainer_config.yaml +++ b/config/trainer_config.yaml @@ -16,7 +16,6 @@ default: sequence_length: 64 summary_freq: 1000 use_recurrent: false - vis_encode_type: default reward_signals: extrinsic: strength: 1.0 diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index 5ede54c21f..f242cca097 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -435,8 +435,8 @@ def create_discrete_action_masking_layer(all_logits, action_masks, action_size): ) def create_observation_streams( - self, num_streams, h_size, num_layers, vis_encode_type="default" - ): + self, num_streams: int, h_size: int, num_layers: int, vis_encode_type: EncoderType + ) -> tf.Tensor: """ Creates encoding stream for observations. :param num_streams: Number of streams to create. @@ -460,7 +460,6 @@ def create_observation_streams( visual_encoders = [] hidden_state, hidden_visual = None, None if self.vis_obs_size > 0: - vis_encode_type = EncoderType(vis_encode_type) if vis_encode_type == EncoderType.RESNET: for j in range(brain.number_visual_observations): encoded_visual = self.create_resnet_visual_observation_encoder( @@ -559,7 +558,7 @@ def create_value_heads(self, stream_names, hidden_input): self.value_heads[name] = value self.value = tf.reduce_mean(list(self.value_heads.values()), 0) - def create_cc_actor_critic(self, h_size, num_layers, vis_encode_type): + def create_cc_actor_critic(self, h_size: int, num_layers: int, vis_encode_type: EncoderType): """ Creates Continuous control actor-critic model. :param h_size: Size of hidden linear layers. @@ -646,7 +645,7 @@ def create_cc_actor_critic(self, h_size, num_layers, vis_encode_type): (tf.identity(self.all_old_log_probs)), axis=1, keepdims=True ) - def create_dc_actor_critic(self, h_size, num_layers, vis_encode_type): + def create_dc_actor_critic(self, h_size: int, num_layers: int, vis_encode_type: EncoderType): """ Creates Discrete control actor-critic model. :param h_size: Size of hidden linear layers. diff --git a/ml-agents/mlagents/trainers/ppo/models.py b/ml-agents/mlagents/trainers/ppo/models.py index afaf80e423..291c03a00b 100644 --- a/ml-agents/mlagents/trainers/ppo/models.py +++ b/ml-agents/mlagents/trainers/ppo/models.py @@ -2,7 +2,7 @@ import numpy as np import tensorflow as tf -from mlagents.trainers.models import LearningModel +from mlagents.trainers.models import LearningModel, EncoderType logger = logging.getLogger("mlagents.trainers") @@ -22,7 +22,7 @@ def __init__( m_size=None, seed=0, stream_names=None, - vis_encode_type="default", + vis_encode_type=EncoderType.DEFAUL, ): """ Takes a Unity environment and model-specific hyper-parameters and returns the diff --git a/ml-agents/mlagents/trainers/ppo/policy.py b/ml-agents/mlagents/trainers/ppo/policy.py index 621a613227..706b506268 100644 --- a/ml-agents/mlagents/trainers/ppo/policy.py +++ b/ml-agents/mlagents/trainers/ppo/policy.py @@ -5,6 +5,7 @@ from mlagents.envs.timers import timed from mlagents.trainers import BrainInfo, ActionInfo +from mlagents.trainers.models import EncoderType from mlagents.trainers.ppo.models import PPOModel from mlagents.trainers.tf_policy import TFPolicy from mlagents.trainers.components.reward_signals.reward_signal_factory import ( @@ -44,7 +45,7 @@ def __init__(self, seed, brain, trainer_params, is_training, load): m_size=self.m_size, seed=seed, stream_names=list(reward_signal_configs.keys()), - vis_encode_type=trainer_params["vis_encode_type"], + vis_encode_type=EncoderType(trainer_params.get("vis_encode_type", "default")), ) self.model.create_ppo_optimizer() From 85158bae5854d36c602f64af86cf3a453bdf2dc6 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Mon, 22 Jul 2019 13:38:53 -0700 Subject: [PATCH 13/27] add default --- ml-agents/mlagents/trainers/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index f242cca097..673262b6b5 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -435,7 +435,7 @@ def create_discrete_action_masking_layer(all_logits, action_masks, action_size): ) def create_observation_streams( - self, num_streams: int, h_size: int, num_layers: int, vis_encode_type: EncoderType + self, num_streams: int, h_size: int, num_layers: int, vis_encode_type: EncoderType=EncoderType.DEFAUL ) -> tf.Tensor: """ Creates encoding stream for observations. From ff2167c891a23d431d0729fa0832d8a8daf266bd Mon Sep 17 00:00:00 2001 From: dongruoping Date: Mon, 22 Jul 2019 13:40:22 -0700 Subject: [PATCH 14/27] reformatting --- ml-agents/mlagents/trainers/models.py | 14 +++++++++++--- ml-agents/mlagents/trainers/ppo/policy.py | 4 +++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index 673262b6b5..11a1d508c8 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -435,7 +435,11 @@ def create_discrete_action_masking_layer(all_logits, action_masks, action_size): ) def create_observation_streams( - self, num_streams: int, h_size: int, num_layers: int, vis_encode_type: EncoderType=EncoderType.DEFAUL + self, + num_streams: int, + h_size: int, + num_layers: int, + vis_encode_type: EncoderType = EncoderType.DEFAUL, ) -> tf.Tensor: """ Creates encoding stream for observations. @@ -558,7 +562,9 @@ def create_value_heads(self, stream_names, hidden_input): self.value_heads[name] = value self.value = tf.reduce_mean(list(self.value_heads.values()), 0) - def create_cc_actor_critic(self, h_size: int, num_layers: int, vis_encode_type: EncoderType): + def create_cc_actor_critic( + self, h_size: int, num_layers: int, vis_encode_type: EncoderType + ): """ Creates Continuous control actor-critic model. :param h_size: Size of hidden linear layers. @@ -645,7 +651,9 @@ def create_cc_actor_critic(self, h_size: int, num_layers: int, vis_encode_type: (tf.identity(self.all_old_log_probs)), axis=1, keepdims=True ) - def create_dc_actor_critic(self, h_size: int, num_layers: int, vis_encode_type: EncoderType): + def create_dc_actor_critic( + self, h_size: int, num_layers: int, vis_encode_type: EncoderType + ): """ Creates Discrete control actor-critic model. :param h_size: Size of hidden linear layers. diff --git a/ml-agents/mlagents/trainers/ppo/policy.py b/ml-agents/mlagents/trainers/ppo/policy.py index 706b506268..e4926c4868 100644 --- a/ml-agents/mlagents/trainers/ppo/policy.py +++ b/ml-agents/mlagents/trainers/ppo/policy.py @@ -45,7 +45,9 @@ def __init__(self, seed, brain, trainer_params, is_training, load): m_size=self.m_size, seed=seed, stream_names=list(reward_signal_configs.keys()), - vis_encode_type=EncoderType(trainer_params.get("vis_encode_type", "default")), + vis_encode_type=EncoderType( + trainer_params.get("vis_encode_type", "default") + ), ) self.model.create_ppo_optimizer() From 117fd88405dc427ae361629f2b11e19f76d91334 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Mon, 22 Jul 2019 13:44:03 -0700 Subject: [PATCH 15/27] small fix --- ml-agents/mlagents/trainers/ppo/trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index a5485f8ed3..9e44b65c8f 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -53,7 +53,6 @@ def __init__( "memory_size", "model_path", "reward_signals", - "vis_encode_type", ] self.check_param_keys() From 8952bba83630233ab3d037474b4b6d739f79fd32 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Mon, 22 Jul 2019 14:07:01 -0700 Subject: [PATCH 16/27] add vis_encode_type param option to doc --- docs/Training-PPO.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/Training-PPO.md b/docs/Training-PPO.md index b458d60890..9812fe0879 100644 --- a/docs/Training-PPO.md +++ b/docs/Training-PPO.md @@ -166,6 +166,16 @@ variables, this should be larger. Typical Range: `32` - `512` +### (Optional) Visual Encoder Type + +`vis_encode_type` correspond to the encoder type for encoding visual observations. +Valid options include: +* `default` (default): a simple encoder consists of two CNN layer +* `nature_cnn`: Mnih's CNN implementation (https://www.nature.com/articles/nature14236) +* `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561) + +Options: `default`, `resnet`, `nature_cnn` + ## (Optional) Recurrent Neural Network Hyperparameters The below hyperparameters are only used when `use_recurrent` is set to true. From 7de954d66e7628057e226e40ae501296f81ac261 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Mon, 22 Jul 2019 14:15:15 -0700 Subject: [PATCH 17/27] type annotation --- ml-agents/mlagents/trainers/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index 11a1d508c8..5aca69a987 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -564,7 +564,7 @@ def create_value_heads(self, stream_names, hidden_input): def create_cc_actor_critic( self, h_size: int, num_layers: int, vis_encode_type: EncoderType - ): + ) -> None: """ Creates Continuous control actor-critic model. :param h_size: Size of hidden linear layers. @@ -653,7 +653,7 @@ def create_cc_actor_critic( def create_dc_actor_critic( self, h_size: int, num_layers: int, vis_encode_type: EncoderType - ): + ) -> None: """ Creates Discrete control actor-critic model. :param h_size: Size of hidden linear layers. From b444d1f1f774dd470e3f1b20fe2d321ef84ffed3 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Mon, 22 Jul 2019 14:18:30 -0700 Subject: [PATCH 18/27] fix typo --- ml-agents/mlagents/trainers/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index 5aca69a987..0c41bac099 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -14,7 +14,7 @@ class EncoderType(Enum): RESNET = "resnet" NATURE_CNN = "nature_cnn" - DEFAUL = "default" + DEFAULT = "default" class LearningModel(object): From dce059ce5b0be70a98c60ef44ab4288178205553 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Mon, 22 Jul 2019 14:22:20 -0700 Subject: [PATCH 19/27] fix typo --- ml-agents/mlagents/trainers/models.py | 2 +- ml-agents/mlagents/trainers/ppo/models.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index 0c41bac099..2f0983ab1d 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -439,7 +439,7 @@ def create_observation_streams( num_streams: int, h_size: int, num_layers: int, - vis_encode_type: EncoderType = EncoderType.DEFAUL, + vis_encode_type: EncoderType = EncoderType.DEFAULT, ) -> tf.Tensor: """ Creates encoding stream for observations. diff --git a/ml-agents/mlagents/trainers/ppo/models.py b/ml-agents/mlagents/trainers/ppo/models.py index d8fab615a9..ca494c5476 100644 --- a/ml-agents/mlagents/trainers/ppo/models.py +++ b/ml-agents/mlagents/trainers/ppo/models.py @@ -22,7 +22,7 @@ def __init__( m_size=None, seed=0, stream_names=None, - vis_encode_type=EncoderType.DEFAUL, + vis_encode_type=EncoderType.DEFAULT, ): """ Takes a Unity environment and model-specific hyper-parameters and returns the From e2c8bdfd594fd166d8ec6e9d25afd53ac1549658 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Mon, 22 Jul 2019 15:08:04 -0700 Subject: [PATCH 20/27] remove print --- ml-agents/mlagents/trainers/models.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index 2f0983ab1d..5dc03d98c7 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -284,7 +284,6 @@ def create_nature_cnn_visual_observation_encoder( :param reuse: Whether to re-use the weights within the same scope. :return: List of hidden layer tensors. """ - print("creating nature cnn") with tf.variable_scope(scope): conv1 = tf.layers.conv2d( image_input, @@ -340,7 +339,6 @@ def create_resnet_visual_observation_encoder( :param reuse: Whether to re-use the weights within the same scope. :return: List of hidden layer tensors. """ - print("creating resnet") n_channels = [16, 32, 32] # channel for each stack n_blocks = 2 # number of residual blocks with tf.variable_scope(scope): From 422969672a0a19613946180f33b22d094930c201 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Mon, 22 Jul 2019 15:08:17 -0700 Subject: [PATCH 21/27] modify doc --- docs/Training-PPO.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/Training-PPO.md b/docs/Training-PPO.md index 9812fe0879..029bb73ba0 100644 --- a/docs/Training-PPO.md +++ b/docs/Training-PPO.md @@ -170,9 +170,12 @@ Typical Range: `32` - `512` `vis_encode_type` correspond to the encoder type for encoding visual observations. Valid options include: -* `default` (default): a simple encoder consists of two CNN layer -* `nature_cnn`: Mnih's CNN implementation (https://www.nature.com/articles/nature14236) -* `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561) +* `default` (default): a simple encoder consists of two convolutional layers +* `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236), +consisting of three convolutional layers +* `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561), +consisting of three stacked layers, each with two risidual blocks, making a +much larger network than the other two. Options: `default`, `resnet`, `nature_cnn` From 16ad6e6a3828cd0af337faefd87c6b15492b1f31 Mon Sep 17 00:00:00 2001 From: Ruo-Ping Dong Date: Tue, 23 Jul 2019 10:32:41 -0700 Subject: [PATCH 22/27] Update docs/Training-PPO.md Co-Authored-By: Jonathan Harper --- docs/Training-PPO.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Training-PPO.md b/docs/Training-PPO.md index 029bb73ba0..bb1f4b39a1 100644 --- a/docs/Training-PPO.md +++ b/docs/Training-PPO.md @@ -168,7 +168,7 @@ Typical Range: `32` - `512` ### (Optional) Visual Encoder Type -`vis_encode_type` correspond to the encoder type for encoding visual observations. +`vis_encode_type` corresponds to the encoder type for encoding visual observations. Valid options include: * `default` (default): a simple encoder consists of two convolutional layers * `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236), From c1562a9330be0c5885ce33111bfd90016d49a1c9 Mon Sep 17 00:00:00 2001 From: Ruo-Ping Dong Date: Tue, 23 Jul 2019 10:32:50 -0700 Subject: [PATCH 23/27] Update docs/Training-PPO.md Co-Authored-By: Jonathan Harper --- docs/Training-PPO.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Training-PPO.md b/docs/Training-PPO.md index bb1f4b39a1..6239311003 100644 --- a/docs/Training-PPO.md +++ b/docs/Training-PPO.md @@ -170,7 +170,7 @@ Typical Range: `32` - `512` `vis_encode_type` corresponds to the encoder type for encoding visual observations. Valid options include: -* `default` (default): a simple encoder consists of two convolutional layers +* `default` (default): a simple encoder which consists of two convolutional layers * `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236), consisting of three convolutional layers * `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561), From 2af95fe1471983265816ca9ff67a7bcd2de95379 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Tue, 23 Jul 2019 13:23:32 -0700 Subject: [PATCH 24/27] change default to simple --- docs/Training-PPO.md | 4 ++-- ml-agents/mlagents/trainers/models.py | 6 +++--- ml-agents/mlagents/trainers/ppo/models.py | 2 +- ml-agents/mlagents/trainers/ppo/policy.py | 2 +- ml-agents/mlagents/trainers/tests/test_bcmodule.py | 2 +- .../trainers/tests/test_environments/test_simple.py | 2 +- ml-agents/mlagents/trainers/tests/test_ppo.py | 4 ++-- ml-agents/mlagents/trainers/tests/test_reward_signals.py | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/Training-PPO.md b/docs/Training-PPO.md index 6239311003..6732a8cdcf 100644 --- a/docs/Training-PPO.md +++ b/docs/Training-PPO.md @@ -170,14 +170,14 @@ Typical Range: `32` - `512` `vis_encode_type` corresponds to the encoder type for encoding visual observations. Valid options include: -* `default` (default): a simple encoder which consists of two convolutional layers +* `simple` (default): a simple encoder which consists of two convolutional layers * `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236), consisting of three convolutional layers * `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561), consisting of three stacked layers, each with two risidual blocks, making a much larger network than the other two. -Options: `default`, `resnet`, `nature_cnn` +Options: `simple`, `nature_cnn`, `resnet` ## (Optional) Recurrent Neural Network Hyperparameters diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index 5dc03d98c7..880eaa315e 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -12,9 +12,9 @@ class EncoderType(Enum): - RESNET = "resnet" + SIMPLE = "simple" NATURE_CNN = "nature_cnn" - DEFAULT = "default" + RESNET = "resnet" class LearningModel(object): @@ -437,7 +437,7 @@ def create_observation_streams( num_streams: int, h_size: int, num_layers: int, - vis_encode_type: EncoderType = EncoderType.DEFAULT, + vis_encode_type: EncoderType = EncoderType.SIMPLE, ) -> tf.Tensor: """ Creates encoding stream for observations. diff --git a/ml-agents/mlagents/trainers/ppo/models.py b/ml-agents/mlagents/trainers/ppo/models.py index ca494c5476..225387c132 100644 --- a/ml-agents/mlagents/trainers/ppo/models.py +++ b/ml-agents/mlagents/trainers/ppo/models.py @@ -22,7 +22,7 @@ def __init__( m_size=None, seed=0, stream_names=None, - vis_encode_type=EncoderType.DEFAULT, + vis_encode_type=EncoderType.SIMPLE, ): """ Takes a Unity environment and model-specific hyper-parameters and returns the diff --git a/ml-agents/mlagents/trainers/ppo/policy.py b/ml-agents/mlagents/trainers/ppo/policy.py index c3ec75596c..477f742037 100644 --- a/ml-agents/mlagents/trainers/ppo/policy.py +++ b/ml-agents/mlagents/trainers/ppo/policy.py @@ -46,7 +46,7 @@ def __init__(self, seed, brain, trainer_params, is_training, load): seed=seed, stream_names=list(reward_signal_configs.keys()), vis_encode_type=EncoderType( - trainer_params.get("vis_encode_type", "default") + trainer_params.get("vis_encode_type", "simple") ), ) self.model.create_ppo_optimizer() diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py index 395308c3c4..5f2bf58e25 100644 --- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py +++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py @@ -29,7 +29,7 @@ def dummy_config(): sequence_length: 64 summary_freq: 1000 use_recurrent: false - vis_encode_type: default + vis_encode_type: simple memory_size: 8 pretraining: demo_path: ./demos/ExpertPyramid.demo diff --git a/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py b/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py index 4a92c970a5..d6402d50ce 100644 --- a/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py +++ b/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py @@ -144,7 +144,7 @@ def test_simple(): extrinsic: strength: 1.0 gamma: 0.99 - vis_encode_type: default + vis_encode_type: simple """ # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py index baecbb16de..e70b0c04ad 100644 --- a/ml-agents/mlagents/trainers/tests/test_ppo.py +++ b/ml-agents/mlagents/trainers/tests/test_ppo.py @@ -32,7 +32,7 @@ def dummy_config(): sequence_length: 64 summary_freq: 1000 use_recurrent: false - vis_encode_type: default + vis_encode_type: simple memory_size: 8 curiosity_strength: 0.0 curiosity_enc_size: 1 @@ -328,7 +328,7 @@ def test_trainer_increment_step(): "sequence_length": 64, "summary_freq": 3000, "use_recurrent": False, - "vis_encode_type": "default", + "vis_encode_type": "simple", "use_curiosity": False, "curiosity_strength": 0.01, "curiosity_enc_size": 128, diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py index dde79b3ece..1ed0903153 100644 --- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py +++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py @@ -35,7 +35,7 @@ def dummy_config(): sequence_length: 64 summary_freq: 1000 use_recurrent: false - vis_encode_type: default + vis_encode_type: simple memory_size: 8 curiosity_strength: 0.0 curiosity_enc_size: 1 From 8b6481e4e32057519fe1bed265ac7ab7df72ea33 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Tue, 23 Jul 2019 18:09:25 -0700 Subject: [PATCH 25/27] remove default --- ml-agents/mlagents/trainers/ppo/trainer.py | 1 - ml-agents/mlagents/trainers/tests/test_bcmodule.py | 1 - .../mlagents/trainers/tests/test_environments/test_simple.py | 1 - ml-agents/mlagents/trainers/tests/test_ppo.py | 2 -- ml-agents/mlagents/trainers/tests/test_reward_signals.py | 1 - 5 files changed, 6 deletions(-) diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index 6a1d30a8f2..b74f15efa4 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -53,7 +53,6 @@ def __init__( "memory_size", "model_path", "reward_signals", - "vis_encode_type", ] self.check_param_keys() diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py index 5f2bf58e25..0eee0f4d2e 100644 --- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py +++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py @@ -29,7 +29,6 @@ def dummy_config(): sequence_length: 64 summary_freq: 1000 use_recurrent: false - vis_encode_type: simple memory_size: 8 pretraining: demo_path: ./demos/ExpertPyramid.demo diff --git a/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py b/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py index d6402d50ce..4aa28cac90 100644 --- a/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py +++ b/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py @@ -144,7 +144,6 @@ def test_simple(): extrinsic: strength: 1.0 gamma: 0.99 - vis_encode_type: simple """ # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py index e70b0c04ad..939a9864b2 100644 --- a/ml-agents/mlagents/trainers/tests/test_ppo.py +++ b/ml-agents/mlagents/trainers/tests/test_ppo.py @@ -32,7 +32,6 @@ def dummy_config(): sequence_length: 64 summary_freq: 1000 use_recurrent: false - vis_encode_type: simple memory_size: 8 curiosity_strength: 0.0 curiosity_enc_size: 1 @@ -328,7 +327,6 @@ def test_trainer_increment_step(): "sequence_length": 64, "summary_freq": 3000, "use_recurrent": False, - "vis_encode_type": "simple", "use_curiosity": False, "curiosity_strength": 0.01, "curiosity_enc_size": 128, diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py index 1ed0903153..1b695788c3 100644 --- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py +++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py @@ -35,7 +35,6 @@ def dummy_config(): sequence_length: 64 summary_freq: 1000 use_recurrent: false - vis_encode_type: simple memory_size: 8 curiosity_strength: 0.0 curiosity_enc_size: 1 From 7ee74a57edc26a343640c053957fda1ee5da4c9d Mon Sep 17 00:00:00 2001 From: dongruoping Date: Fri, 26 Jul 2019 11:46:26 -0700 Subject: [PATCH 26/27] fix trainer_config.yaml --- config/trainer_config.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml index d09c886d14..a13b8ffbb6 100644 --- a/config/trainer_config.yaml +++ b/config/trainer_config.yaml @@ -16,7 +16,6 @@ default: sequence_length: 64 summary_freq: 1000 use_recurrent: false - vis_encode_type: default reward_signals: extrinsic: strength: 1.0 From 2911a0fb9cf84c5674efd2abd74937b2cc10c86c Mon Sep 17 00:00:00 2001 From: dongruoping Date: Fri, 26 Jul 2019 11:58:26 -0700 Subject: [PATCH 27/27] fix default --- config/trainer_config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml index a13b8ffbb6..9a60aefcf4 100644 --- a/config/trainer_config.yaml +++ b/config/trainer_config.yaml @@ -16,6 +16,7 @@ default: sequence_length: 64 summary_freq: 1000 use_recurrent: false + vis_encode_type: simple reward_signals: extrinsic: strength: 1.0