Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add different types of visual encoder (nature cnn/resnet) #2289

Merged
merged 12 commits into from
Jul 19, 2019
1 change: 1 addition & 0 deletions config/trainer_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ default:
sequence_length: 64
summary_freq: 1000
use_recurrent: false
vis_encode_type: default
dongruoping marked this conversation as resolved.
Show resolved Hide resolved
reward_signals:
extrinsic:
strength: 1.0
Expand Down
198 changes: 180 additions & 18 deletions ml-agents/mlagents/trainers/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from enum import Enum
from typing import Any, Callable, Dict

import numpy as np
Expand All @@ -10,6 +11,12 @@
ActivationFunction = Callable[[tf.Tensor], tf.Tensor]


class EncoderType(Enum):
RESNET = "resnet"
NATURE_CNN = "nature_cnn"
DEFAUL = "default"


class LearningModel(object):
_version_number_ = 2

Expand Down Expand Up @@ -222,13 +229,13 @@ def create_visual_observation_encoder(
reuse: bool,
) -> tf.Tensor:
"""
Builds a set of visual (CNN) encoders.
:param reuse: Whether to re-use the weights within the same scope.
:param scope: The scope of the graph within which to create the ops.
Builds a set of resnet visual encoders.
:param image_input: The placeholder for the image input to use.
:param h_size: Hidden layer size.
:param activation: What type of activation function to use for layers.
:param num_layers: number of hidden layers to create.
:param scope: The scope of the graph within which to create the ops.
:param reuse: Whether to re-use the weights within the same scope.
:return: List of hidden layer tensors.
"""
with tf.variable_scope(scope):
Expand Down Expand Up @@ -258,6 +265,131 @@ def create_visual_observation_encoder(
)
return hidden_flat

def create_nature_cnn_visual_observation_encoder(
self,
image_input: tf.Tensor,
h_size: int,
activation: ActivationFunction,
num_layers: int,
scope: str,
reuse: bool,
) -> tf.Tensor:
"""
Builds a set of resnet visual encoders.
:param image_input: The placeholder for the image input to use.
:param h_size: Hidden layer size.
:param activation: What type of activation function to use for layers.
:param num_layers: number of hidden layers to create.
:param scope: The scope of the graph within which to create the ops.
:param reuse: Whether to re-use the weights within the same scope.
:return: List of hidden layer tensors.
"""
print("creating nature cnn")
with tf.variable_scope(scope):
conv1 = tf.layers.conv2d(
image_input,
32,
kernel_size=[8, 8],
strides=[4, 4],
activation=tf.nn.elu,
reuse=reuse,
name="conv_1",
)
conv2 = tf.layers.conv2d(
conv1,
64,
kernel_size=[4, 4],
strides=[2, 2],
activation=tf.nn.elu,
reuse=reuse,
name="conv_2",
)
conv3 = tf.layers.conv2d(
conv2,
64,
kernel_size=[3, 3],
strides=[1, 1],
activation=tf.nn.elu,
reuse=reuse,
name="conv_3",
)
hidden = c_layers.flatten(conv3)

with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = self.create_vector_observation_encoder(
hidden, h_size, activation, num_layers, scope, reuse
)
return hidden_flat

def create_resnet_visual_observation_encoder(
self,
image_input: tf.Tensor,
h_size: int,
activation: ActivationFunction,
num_layers: int,
scope: str,
reuse: bool,
) -> tf.Tensor:
"""
Builds a set of resnet visual encoders.
:param image_input: The placeholder for the image input to use.
:param h_size: Hidden layer size.
:param activation: What type of activation function to use for layers.
:param num_layers: number of hidden layers to create.
:param scope: The scope of the graph within which to create the ops.
:param reuse: Whether to re-use the weights within the same scope.
:return: List of hidden layer tensors.
"""
print("creating resnet")
n_channels = [16, 32, 32] # channel for each stack
n_blocks = 2 # number of residual blocks
with tf.variable_scope(scope):
hidden = image_input
for i, ch in enumerate(n_channels):
hidden = tf.layers.conv2d(
hidden,
ch,
kernel_size=[3, 3],
strides=[1, 1],
reuse=reuse,
name="layer%dconv_1" % i,
)
hidden = tf.layers.max_pooling2d(
hidden, pool_size=[3, 3], strides=[2, 2], padding="same"
)
# create residual blocks
for j in range(n_blocks):
block_input = hidden
hidden = tf.nn.relu(hidden)
hidden = tf.layers.conv2d(
hidden,
ch,
kernel_size=[3, 3],
strides=[1, 1],
padding="same",
reuse=reuse,
name="layer%d_%d_conv1" % (i, j),
)
hidden = tf.nn.relu(hidden)
hidden = tf.layers.conv2d(
hidden,
ch,
kernel_size=[3, 3],
strides=[1, 1],
padding="same",
reuse=reuse,
name="layer%d_%d_conv2" % (i, j),
)
hidden = tf.add(block_input, hidden)
hidden = tf.nn.relu(hidden)
hidden = c_layers.flatten(hidden)

with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = self.create_vector_observation_encoder(
hidden, h_size, activation, num_layers, scope, reuse
)
return hidden_flat

@staticmethod
def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
"""
Expand Down Expand Up @@ -302,7 +434,9 @@ def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
),
)

def create_observation_streams(self, num_streams, h_size, num_layers):
def create_observation_streams(
Copy link
Contributor

@chriselion chriselion Jul 19, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add type annotations here.

self, num_streams, h_size, num_layers, vis_encode_type="default"
):
"""
Creates encoding stream for observations.
:param num_streams: Number of streams to create.
Expand All @@ -326,16 +460,40 @@ def create_observation_streams(self, num_streams, h_size, num_layers):
visual_encoders = []
hidden_state, hidden_visual = None, None
if self.vis_obs_size > 0:
for j in range(brain.number_visual_observations):
encoded_visual = self.create_visual_observation_encoder(
self.visual_in[j],
h_size,
activation_fn,
num_layers,
"main_graph_{}_encoder{}".format(i, j),
False,
)
visual_encoders.append(encoded_visual)
vis_encode_type = EncoderType(vis_encode_type)
if vis_encode_type == EncoderType.RESNET:
for j in range(brain.number_visual_observations):
encoded_visual = self.create_resnet_visual_observation_encoder(
self.visual_in[j],
h_size,
activation_fn,
num_layers,
"main_graph_{}_encoder{}".format(i, j),
False,
)
visual_encoders.append(encoded_visual)
elif vis_encode_type == EncoderType.NATURE_CNN:
for j in range(brain.number_visual_observations):
encoded_visual = self.create_nature_cnn_visual_observation_encoder(
self.visual_in[j],
h_size,
activation_fn,
num_layers,
"main_graph_{}_encoder{}".format(i, j),
False,
)
visual_encoders.append(encoded_visual)
else:
for j in range(brain.number_visual_observations):
encoded_visual = self.create_visual_observation_encoder(
self.visual_in[j],
h_size,
activation_fn,
num_layers,
"main_graph_{}_encoder{}".format(i, j),
False,
)
visual_encoders.append(encoded_visual)
hidden_visual = tf.concat(visual_encoders, axis=1)
if brain.vector_observation_space_size > 0:
hidden_state = self.create_vector_observation_encoder(
Expand Down Expand Up @@ -401,13 +559,15 @@ def create_value_heads(self, stream_names, hidden_input):
self.value_heads[name] = value
self.value = tf.reduce_mean(list(self.value_heads.values()), 0)

def create_cc_actor_critic(self, h_size, num_layers):
def create_cc_actor_critic(self, h_size, num_layers, vis_encode_type):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

type annotations

"""
Creates Continuous control actor-critic model.
:param h_size: Size of hidden linear layers.
:param num_layers: Number of hidden linear layers.
"""
hidden_streams = self.create_observation_streams(2, h_size, num_layers)
hidden_streams = self.create_observation_streams(
2, h_size, num_layers, vis_encode_type
)

if self.use_recurrent:
self.memory_in = tf.placeholder(
Expand Down Expand Up @@ -486,13 +646,15 @@ def create_cc_actor_critic(self, h_size, num_layers):
(tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
)

def create_dc_actor_critic(self, h_size, num_layers):
def create_dc_actor_critic(self, h_size, num_layers, vis_encode_type):
"""
Creates Discrete control actor-critic model.
:param h_size: Size of hidden linear layers.
:param num_layers: Number of hidden linear layers.
"""
hidden_streams = self.create_observation_streams(1, h_size, num_layers)
hidden_streams = self.create_observation_streams(
1, h_size, num_layers, vis_encode_type
)
hidden = hidden_streams[0]

if self.use_recurrent:
Expand Down
5 changes: 3 additions & 2 deletions ml-agents/mlagents/trainers/ppo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def __init__(
m_size=None,
seed=0,
stream_names=None,
vis_encode_type="default",
):
"""
Takes a Unity environment and model-specific hyper-parameters and returns the
Expand All @@ -46,10 +47,10 @@ def __init__(
if num_layers < 1:
num_layers = 1
if brain.vector_action_space_type == "continuous":
self.create_cc_actor_critic(h_size, num_layers)
self.create_cc_actor_critic(h_size, num_layers, vis_encode_type)
self.entropy = tf.ones_like(tf.reshape(self.value, [-1])) * self.entropy
else:
self.create_dc_actor_critic(h_size, num_layers)
self.create_dc_actor_critic(h_size, num_layers, vis_encode_type)
self.create_losses(
self.log_probs,
self.old_log_probs,
Expand Down
1 change: 1 addition & 0 deletions ml-agents/mlagents/trainers/ppo/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def __init__(self, seed, brain, trainer_params, is_training, load):
m_size=self.m_size,
seed=seed,
stream_names=list(reward_signal_configs.keys()),
vis_encode_type=trainer_params["vis_encode_type"],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Convert to EncoderType enum here and pass that through instead of the string type. Handle a missing value with .get(). For example

vis_encode_type = EncoderType(trainer_params.get("vis_encode_type", "default"))

)
self.model.create_ppo_optimizer()

Expand Down
1 change: 1 addition & 0 deletions ml-agents/mlagents/trainers/ppo/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def __init__(
"memory_size",
"model_path",
"reward_signals",
"vis_encode_type",
]
self.check_param_keys()

Expand Down
1 change: 1 addition & 0 deletions ml-agents/mlagents/trainers/tests/test_bcmodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def dummy_config():
sequence_length: 64
summary_freq: 1000
use_recurrent: false
vis_encode_type: default
memory_size: 8
pretraining:
demo_path: ./demos/ExpertPyramid.demo
Expand Down
2 changes: 2 additions & 0 deletions ml-agents/mlagents/trainers/tests/test_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def dummy_config():
sequence_length: 64
summary_freq: 1000
use_recurrent: false
vis_encode_type: default
memory_size: 8
curiosity_strength: 0.0
curiosity_enc_size: 1
Expand Down Expand Up @@ -327,6 +328,7 @@ def test_trainer_increment_step():
"sequence_length": 64,
"summary_freq": 3000,
"use_recurrent": False,
"vis_encode_type": "default",
"use_curiosity": False,
"curiosity_strength": 0.01,
"curiosity_enc_size": 128,
Expand Down
1 change: 1 addition & 0 deletions ml-agents/mlagents/trainers/tests/test_reward_signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def dummy_config():
sequence_length: 64
summary_freq: 1000
use_recurrent: false
vis_encode_type: default
memory_size: 8
curiosity_strength: 0.0
curiosity_enc_size: 1
Expand Down