Skip to content

Commit

Permalink
Replace torch.detach().cpu().numpy() with a utils method (#4406)
Browse files Browse the repository at this point in the history
* Replace torch.detach().cpu().numpy() with a utils method

* Using item() in place of to_numpy()

* more use of item() and additional tests
  • Loading branch information
vincentpierre committed Aug 24, 2020
1 parent 4a6063d commit 95ec88c
Show file tree
Hide file tree
Showing 10 changed files with 48 additions and 50 deletions.
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ def get_trajectory_value_estimates(
)

for name, estimate in value_estimates.items():
value_estimates[name] = estimate.detach().cpu().numpy()
next_value_estimate[name] = next_value_estimate[name].detach().cpu().numpy()
value_estimates[name] = ModelUtils.to_numpy(estimate)
next_value_estimate[name] = ModelUtils.to_numpy(next_value_estimate[name])

if done:
for k in next_value_estimate:
Expand Down
12 changes: 6 additions & 6 deletions ml-agents/mlagents/trainers/policy/torch_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,18 +194,18 @@ def evaluate(
action, log_probs, entropy, value_heads, memories = self.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories
)
run_out["action"] = action.detach().cpu().numpy()
run_out["pre_action"] = action.detach().cpu().numpy()
run_out["action"] = ModelUtils.to_numpy(action)
run_out["pre_action"] = ModelUtils.to_numpy(action)
# Todo - make pre_action difference
run_out["log_probs"] = log_probs.detach().cpu().numpy()
run_out["entropy"] = entropy.detach().cpu().numpy()
run_out["log_probs"] = ModelUtils.to_numpy(log_probs)
run_out["entropy"] = ModelUtils.to_numpy(entropy)
run_out["value_heads"] = {
name: t.detach().cpu().numpy() for name, t in value_heads.items()
name: ModelUtils.to_numpy(t) for name, t in value_heads.items()
}
run_out["value"] = np.mean(list(run_out["value_heads"].values()), 0)
run_out["learning_rate"] = 0.0
if self.use_recurrent:
run_out["memory_out"] = memories.detach().cpu().numpy().squeeze(0)
run_out["memory_out"] = ModelUtils.to_numpy(memories).squeeze(0)
return run_out

def get_action(
Expand Down
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/ppo/optimizer_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:

self.optimizer.step()
update_stats = {
"Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
"Losses/Value Loss": value_loss.detach().cpu().numpy(),
"Losses/Policy Loss": policy_loss.item(),
"Losses/Value Loss": value_loss.item(),
"Policy/Learning Rate": decay_lr,
"Policy/Epsilon": decay_eps,
"Policy/Beta": decay_bet,
Expand Down
13 changes: 5 additions & 8 deletions ml-agents/mlagents/trainers/sac/optimizer_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,14 +530,11 @@ def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
# Update target network
self.soft_update(self.policy.actor_critic.critic, self.target_network, self.tau)
update_stats = {
"Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
"Losses/Value Loss": value_loss.detach().cpu().numpy(),
"Losses/Q1 Loss": q1_loss.detach().cpu().numpy(),
"Losses/Q2 Loss": q2_loss.detach().cpu().numpy(),
"Policy/Entropy Coeff": torch.exp(self._log_ent_coef)
.detach()
.cpu()
.numpy(),
"Losses/Policy Loss": policy_loss.item(),
"Losses/Value Loss": value_loss.item(),
"Losses/Q1 Loss": q1_loss.item(),
"Losses/Q2 Loss": q2_loss.item(),
"Policy/Entropy Coeff": torch.exp(self._log_ent_coef).item(),
"Policy/Learning Rate": decay_lr,
}

Expand Down
21 changes: 10 additions & 11 deletions ml-agents/mlagents/trainers/tests/torch/test_bcmodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pytest
import mlagents.trainers.tests.mock_brain as mb

import numpy as np
import os

from mlagents.trainers.policy.torch_policy import TorchPolicy
Expand Down Expand Up @@ -33,6 +32,11 @@ def create_bc_module(mock_behavior_specs, bc_settings, use_rnn, tanhresample):
return bc_module


def assert_stats_are_float(stats):
for _, item in stats.items():
assert isinstance(item, float)


# Test default values
def test_bcmodule_defaults():
# See if default values match
Expand Down Expand Up @@ -63,8 +67,7 @@ def test_bcmodule_update(is_sac):
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
assert_stats_are_float(stats)


# Test with constant pretraining learning rate
Expand All @@ -77,8 +80,7 @@ def test_bcmodule_constant_lr_update(is_sac):
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
assert_stats_are_float(stats)
old_learning_rate = bc_module.current_lr

_ = bc_module.update()
Expand Down Expand Up @@ -110,8 +112,7 @@ def test_bcmodule_rnn_update(is_sac):
)
bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
assert_stats_are_float(stats)


# Test with discrete control and visual observations
Expand All @@ -123,8 +124,7 @@ def test_bcmodule_dc_visual_update(is_sac):
)
bc_module = create_bc_module(mock_specs, bc_settings, False, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
assert_stats_are_float(stats)


# Test with discrete control, visual observations and RNN
Expand All @@ -136,8 +136,7 @@ def test_bcmodule_rnn_dc_update(is_sac):
)
bc_module = create_bc_module(mock_specs, bc_settings, True, is_sac)
stats = bc_module.update()
for _, item in stats.items():
assert isinstance(item, np.float32)
assert_stats_are_float(stats)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
from mlagents.trainers.torch.utils import ModelUtils

SEED = [42]

Expand Down Expand Up @@ -82,9 +83,9 @@ def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) ->
buffer = create_agent_buffer(behavior_spec, 5)
for _ in range(200):
curiosity_rp.update(buffer)
prediction = curiosity_rp._network.predict_action(buffer)[0].detach()
target = buffer["actions"][0]
error = float(torch.mean((prediction - target) ** 2))
prediction = curiosity_rp._network.predict_action(buffer)[0]
target = torch.tensor(buffer["actions"][0])
error = torch.mean((prediction - target) ** 2).item()
assert error < 0.001


Expand All @@ -107,5 +108,5 @@ def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
curiosity_rp.update(buffer)
prediction = curiosity_rp._network.predict_next_state(buffer)[0]
target = curiosity_rp._network.get_next_state(buffer)[0]
error = float(torch.mean((prediction - target) ** 2).detach())
error = float(ModelUtils.to_numpy(torch.mean((prediction - target) ** 2)))
assert error < 0.001
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/torch/components/bc/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,5 +179,5 @@ def _update_batch(
bc_loss.backward()

self.optimizer.step()
run_out = {"loss": bc_loss.detach().cpu().numpy()}
run_out = {"loss": bc_loss.item()}
return run_out
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __init__(self, specs: BehaviorSpec, settings: CuriositySettings) -> None:

def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
with torch.no_grad():
rewards = self._network.compute_reward(mini_batch).detach().cpu().numpy()
rewards = ModelUtils.to_numpy(self._network.compute_reward(mini_batch))
rewards = np.minimum(rewards, 1.0 / self.strength)
return rewards * self._has_updated_once

Expand All @@ -46,8 +46,8 @@ def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
loss.backward()
self.optimizer.step()
return {
"Losses/Curiosity Forward Loss": forward_loss.detach().cpu().numpy(),
"Losses/Curiosity Inverse Loss": inverse_loss.detach().cpu().numpy(),
"Losses/Curiosity Forward Loss": forward_loss.item(),
"Losses/Curiosity Inverse Loss": inverse_loss.item(),
}

def get_modules(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,12 @@ def evaluate(self, mini_batch: AgentBuffer) -> np.ndarray:
estimates, _ = self._discriminator_network.compute_estimate(
mini_batch, use_vail_noise=False
)
return (
return ModelUtils.to_numpy(
-torch.log(
1.0
- estimates.squeeze(dim=1)
* (1.0 - self._discriminator_network.EPSILON)
)
.detach()
.cpu()
.numpy()
)

def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
Expand Down Expand Up @@ -178,17 +175,13 @@ def compute_loss(
expert_estimate, expert_mu = self.compute_estimate(
expert_batch, use_vail_noise=True
)
stats_dict["Policy/GAIL Policy Estimate"] = (
policy_estimate.mean().detach().cpu().numpy()
)
stats_dict["Policy/GAIL Expert Estimate"] = (
expert_estimate.mean().detach().cpu().numpy()
)
stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.mean().item()
stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.mean().item()
discriminator_loss = -(
torch.log(expert_estimate + self.EPSILON)
+ torch.log(1.0 - policy_estimate + self.EPSILON)
).mean()
stats_dict["Losses/GAIL Loss"] = discriminator_loss.detach().cpu().numpy()
stats_dict["Losses/GAIL Loss"] = discriminator_loss.item()
total_loss += discriminator_loss
if self._settings.use_vail:
# KL divergence loss (encourage latent representation to be normal)
Expand All @@ -209,8 +202,8 @@ def compute_loss(
torch.tensor(0.0),
)
total_loss += vail_loss
stats_dict["Policy/GAIL Beta"] = self._beta.detach().cpu().numpy()
stats_dict["Losses/GAIL KL Loss"] = kl_loss.detach().cpu().numpy()
stats_dict["Policy/GAIL Beta"] = self._beta.item()
stats_dict["Losses/GAIL KL Loss"] = kl_loss.item()
if self.gradient_penalty_weight > 0.0:
total_loss += (
self.gradient_penalty_weight
Expand Down
8 changes: 8 additions & 0 deletions ml-agents/mlagents/trainers/torch/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,14 @@ def list_to_tensor(
"""
return torch.as_tensor(np.asanyarray(ndarray_list), dtype=dtype)

@staticmethod
def to_numpy(tensor: torch.Tensor) -> np.ndarray:
"""
Converts a Torch Tensor to a numpy array. If the Tensor is on the GPU, it will
be brought to the CPU.
"""
return tensor.detach().cpu().numpy()

@staticmethod
def break_into_branches(
concatenated_logits: torch.Tensor, action_size: List[int]
Expand Down

0 comments on commit 95ec88c

Please sign in to comment.