Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions ml-agents/mlagents/trainers/bc/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
class BehavioralCloningModel(LearningModel):
def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
normalize=False, use_recurrent=False, scope='PPO', seed=0):

LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, scope, seed)

with tf.variable_scope(scope):
LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
num_streams = 1
hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
hidden = hidden_streams[0]
Expand Down
33 changes: 16 additions & 17 deletions ml-agents/mlagents/trainers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,23 @@


class LearningModel(object):
def __init__(self, m_size, normalize, use_recurrent, brain, scope, seed):
def __init__(self, m_size, normalize, use_recurrent, brain, seed):
tf.set_random_seed(seed)
with tf.variable_scope(scope):
self.brain = brain
self.vector_in = None
self.global_step, self.increment_step = self.create_global_steps()
self.visual_in = []
self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size')
self.sequence_length = tf.placeholder(shape=None, dtype=tf.int32, name='sequence_length')
self.mask_input = tf.placeholder(shape=[None], dtype=tf.float32, name='masks')
self.mask = tf.cast(self.mask_input, tf.int32)
self.m_size = m_size
self.normalize = normalize
self.use_recurrent = use_recurrent
self.act_size = brain.vector_action_space_size
self.vec_obs_size = brain.vector_observation_space_size * \
brain.num_stacked_vector_observations
self.vis_obs_size = brain.number_visual_observations
self.brain = brain
self.vector_in = None
self.global_step, self.increment_step = self.create_global_steps()
self.visual_in = []
self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size')
self.sequence_length = tf.placeholder(shape=None, dtype=tf.int32, name='sequence_length')
self.mask_input = tf.placeholder(shape=[None], dtype=tf.float32, name='masks')
self.mask = tf.cast(self.mask_input, tf.int32)
self.m_size = m_size
self.normalize = normalize
self.use_recurrent = use_recurrent
self.act_size = brain.vector_action_space_size
self.vec_obs_size = brain.vector_observation_space_size * \
brain.num_stacked_vector_observations
self.vis_obs_size = brain.number_visual_observations

@staticmethod
def create_global_steps():
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, seed, brain, trainer_parameters, sess):
:param sess: The current TensorFlow session.
"""
self.m_size = None
self.model = LearningModel(0, False, False, brain, scope='Model', seed=0)
self.model = None
self.inference_dict = {}
self.update_dict = {}
self.sequence_length = 1
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/ppo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ def __init__(self, brain, lr=1e-4, h_size=128, epsilon=0.2, beta=1e-3, max_step=
:param num_layers Number of hidden layers between encoded input and policy & value layers
:param m_size: Size of brain memory.
"""
LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, scope, seed)
with tf.variable_scope(scope):
LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
self.use_curiosity = use_curiosity
if num_layers < 1:
num_layers = 1
Expand Down