Skip to content

Commit

Permalink
Remove unused "last reward" logic, TF nodes
Browse files Browse the repository at this point in the history
At each step, an unused `last_reward` variable in the TF graph is
updated in our PPO trainer.  There are also related unused methods
in various places in the codebase.  This change removes them.
  • Loading branch information
Jonathan Harper committed Jul 3, 2019
1 parent f4bc8ef commit f6f967a
Show file tree
Hide file tree
Showing 7 changed files with 9 additions and 62 deletions.
15 changes: 2 additions & 13 deletions ml-agents/mlagents/trainers/bc/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,9 @@ def get_step(self):
"""
return self.policy.get_current_step()

@property
def get_last_reward(self):
"""
Returns the last reward the trainer has had
:return: the new last reward
"""
if len(self.stats["Environment/Cumulative Reward"]) > 0:
return np.mean(self.stats["Environment/Cumulative Reward"])
else:
return 0

def increment_step_and_update_last_reward(self):
def increment_step(self):
"""
Increment the step count of the trainer and Updates the last reward
Increment the step count of the trainer
"""
self.policy.increment_step()
return
Expand Down
13 changes: 0 additions & 13 deletions ml-agents/mlagents/trainers/ppo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ def __init__(
)
if num_layers < 1:
num_layers = 1
self.last_reward, self.new_reward, self.update_reward = (
self.create_reward_encoder()
)
if brain.vector_action_space_type == "continuous":
self.create_cc_actor_critic(h_size, num_layers)
self.entropy = tf.ones_like(tf.reshape(self.value, [-1])) * self.entropy
Expand All @@ -64,16 +61,6 @@ def __init__(
max_step,
)

@staticmethod
def create_reward_encoder():
"""Creates TF ops to track and increment recent average cumulative reward."""
last_reward = tf.Variable(
0, name="last_reward", trainable=False, dtype=tf.float32
)
new_reward = tf.placeholder(shape=[], dtype=tf.float32, name="new_reward")
update_reward = tf.assign(last_reward, new_reward)
return last_reward, new_reward, update_reward

def create_losses(
self, probs, old_probs, value_heads, entropy, beta, epsilon, lr, max_step
):
Expand Down
16 changes: 0 additions & 16 deletions ml-agents/mlagents/trainers/ppo/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,19 +221,3 @@ def get_action(self, brain_info: BrainInfo) -> ActionInfo:
value=mean_values,
outputs=run_out,
)

def get_last_reward(self):
"""
Returns the last reward the trainer has had
:return: the new last reward
"""
return self.sess.run(self.model.last_reward)

def update_reward(self, new_reward):
"""
Updates reward value for policy.
:param new_reward: New reward to save.
"""
self.sess.run(
self.model.update_reward, feed_dict={self.model.new_reward: new_reward}
)
7 changes: 2 additions & 5 deletions ml-agents/mlagents/trainers/ppo/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,10 @@ def reward_buffer(self):
"""
return self._reward_buffer

def increment_step_and_update_last_reward(self):
def increment_step(self):
"""
Increment the step count of the trainer and Updates the last reward
Increment the step count of the trainer
"""
if self.stats["Environment/Cumulative Reward"]:
mean_reward = np.mean(self.stats["Environment/Cumulative Reward"])
self.policy.update_reward(mean_reward)
self.policy.increment_step()
self.step = self.policy.get_current_step()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,4 +425,4 @@ def test_take_step_adds_experiences_to_trainer_and_trains():
)
trainer_mock.update_policy.assert_called_once()
trainer_mock.write_summary.assert_called_once()
trainer_mock.increment_step_and_update_last_reward.assert_called_once()
trainer_mock.increment_step.assert_called_once()
16 changes: 3 additions & 13 deletions ml-agents/mlagents/trainers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,21 +109,11 @@ def get_step(self):
"""
raise UnityTrainerException("The get_step property was not implemented.")

@property
def get_last_reward(self):
"""
Returns the last reward the trainer has had
:return: the new last reward
"""
raise UnityTrainerException("The get_last_reward property was not implemented.")

def increment_step_and_update_last_reward(self):
def increment_step(self):
"""
Increment the step count of the trainer and updates the last reward
Increment the step count of the trainer
"""
raise UnityTrainerException(
"The increment_step_and_update_last_reward method was not implemented."
)
raise UnityTrainerException("The increment_step method was not implemented.")

def get_action(self, curr_info: BrainInfo) -> ActionInfo:
"""
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/trainer_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,5 +339,5 @@ def take_step(
else:
trainer.write_summary(self.global_step, delta_train_start)
if self.train_model and trainer.get_step <= trainer.get_max_steps:
trainer.increment_step_and_update_last_reward()
trainer.increment_step()
return new_info

0 comments on commit f6f967a

Please sign in to comment.