diff --git a/ml-agents/mlagents/trainers/bc/models.py b/ml-agents/mlagents/trainers/bc/models.py index 77deca5182..25e7c3ccf6 100644 --- a/ml-agents/mlagents/trainers/bc/models.py +++ b/ml-agents/mlagents/trainers/bc/models.py @@ -6,10 +6,8 @@ class BehavioralCloningModel(LearningModel): def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128, normalize=False, use_recurrent=False, scope='PPO', seed=0): - - LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, scope, seed) - with tf.variable_scope(scope): + LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed) num_streams = 1 hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers) hidden = hidden_streams[0] diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index f9ccead39f..b2aec6890f 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -8,24 +8,23 @@ class LearningModel(object): - def __init__(self, m_size, normalize, use_recurrent, brain, scope, seed): + def __init__(self, m_size, normalize, use_recurrent, brain, seed): tf.set_random_seed(seed) - with tf.variable_scope(scope): - self.brain = brain - self.vector_in = None - self.global_step, self.increment_step = self.create_global_steps() - self.visual_in = [] - self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size') - self.sequence_length = tf.placeholder(shape=None, dtype=tf.int32, name='sequence_length') - self.mask_input = tf.placeholder(shape=[None], dtype=tf.float32, name='masks') - self.mask = tf.cast(self.mask_input, tf.int32) - self.m_size = m_size - self.normalize = normalize - self.use_recurrent = use_recurrent - self.act_size = brain.vector_action_space_size - self.vec_obs_size = brain.vector_observation_space_size * \ - brain.num_stacked_vector_observations - self.vis_obs_size = brain.number_visual_observations + self.brain = brain + self.vector_in = None + self.global_step, self.increment_step = self.create_global_steps() + self.visual_in = [] + self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size') + self.sequence_length = tf.placeholder(shape=None, dtype=tf.int32, name='sequence_length') + self.mask_input = tf.placeholder(shape=[None], dtype=tf.float32, name='masks') + self.mask = tf.cast(self.mask_input, tf.int32) + self.m_size = m_size + self.normalize = normalize + self.use_recurrent = use_recurrent + self.act_size = brain.vector_action_space_size + self.vec_obs_size = brain.vector_observation_space_size * \ + brain.num_stacked_vector_observations + self.vis_obs_size = brain.number_visual_observations @staticmethod def create_global_steps(): diff --git a/ml-agents/mlagents/trainers/policy.py b/ml-agents/mlagents/trainers/policy.py index 8578af7c45..3e8095e8b6 100644 --- a/ml-agents/mlagents/trainers/policy.py +++ b/ml-agents/mlagents/trainers/policy.py @@ -29,7 +29,7 @@ def __init__(self, seed, brain, trainer_parameters, sess): :param sess: The current TensorFlow session. """ self.m_size = None - self.model = LearningModel(0, False, False, brain, scope='Model', seed=0) + self.model = None self.inference_dict = {} self.update_dict = {} self.sequence_length = 1 diff --git a/ml-agents/mlagents/trainers/ppo/models.py b/ml-agents/mlagents/trainers/ppo/models.py index bb8fbf4f04..86cf0d03dd 100644 --- a/ml-agents/mlagents/trainers/ppo/models.py +++ b/ml-agents/mlagents/trainers/ppo/models.py @@ -26,8 +26,8 @@ def __init__(self, brain, lr=1e-4, h_size=128, epsilon=0.2, beta=1e-3, max_step= :param num_layers Number of hidden layers between encoded input and policy & value layers :param m_size: Size of brain memory. """ - LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, scope, seed) with tf.variable_scope(scope): + LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed) self.use_curiosity = use_curiosity if num_layers < 1: num_layers = 1