diff --git a/ml-agents/mlagents/trainers/poca/optimizer_torch.py b/ml-agents/mlagents/trainers/poca/optimizer_torch.py index 07ff16e1a2..5282d190f7 100644 --- a/ml-agents/mlagents/trainers/poca/optimizer_torch.py +++ b/ml-agents/mlagents/trainers/poca/optimizer_torch.py @@ -4,7 +4,7 @@ ) import numpy as np import math -from mlagents.torch_utils import torch +from mlagents.torch_utils import torch, default_device from mlagents.trainers.buffer import ( AgentBuffer, @@ -155,6 +155,8 @@ def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings): network_settings=trainer_settings.network_settings, action_spec=policy.behavior_spec.action_spec, ) + # Move to GPU if needed + self._critic.to(default_device()) params = list(self.policy.actor.parameters()) + list(self.critic.parameters()) self.hyperparameters: POCASettings = cast( diff --git a/ml-agents/mlagents/trainers/torch/networks.py b/ml-agents/mlagents/trainers/torch/networks.py index b19a383287..56768f8782 100644 --- a/ml-agents/mlagents/trainers/torch/networks.py +++ b/ml-agents/mlagents/trainers/torch/networks.py @@ -268,7 +268,7 @@ def _get_masks_from_nans(self, obs_tensors: List[torch.Tensor]) -> torch.Tensor: [_obs.flatten(start_dim=1)[:, 0] for _obs in only_first_obs], dim=1 ) # Get the mask from NaNs - attn_mask = only_first_obs_flat.isnan().type(torch.FloatTensor) + attn_mask = only_first_obs_flat.isnan().float() return attn_mask def _copy_and_remove_nans_from_obs( @@ -283,7 +283,7 @@ def _copy_and_remove_nans_from_obs( for obs in single_agent_obs: new_obs = obs.clone() new_obs[ - attention_mask.type(torch.BoolTensor)[:, i_agent], :: + attention_mask.bool()[:, i_agent], :: ] = 0.0 # Remoove NaNs fast no_nan_obs.append(new_obs) obs_with_no_nans.append(no_nan_obs)