Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/agent_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from mlagents.trainers.policy import Policy
from mlagents.trainers.action_info import ActionInfo, ActionInfoOutputs
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.brain_conversion_utils import get_global_agent_id
from mlagents.trainers.behavior_id_utils import get_global_agent_id

T = TypeVar("T")

Expand Down
7 changes: 7 additions & 0 deletions ml-agents/mlagents/trainers/behavior_id_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,10 @@ def create_name_behavior_id(name: str, team_id: int) -> str:
:return: name_behavior_id
"""
return name + "?team=" + str(team_id)


def get_global_agent_id(worker_id: int, agent_id: int) -> str:
"""
Create an agent id that is unique across environment workers using the worker_id.
"""
return f"${worker_id}-{agent_id}"
88 changes: 0 additions & 88 deletions ml-agents/mlagents/trainers/brain.py

This file was deleted.

31 changes: 0 additions & 31 deletions ml-agents/mlagents/trainers/brain_conversion_utils.py

This file was deleted.

2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/components/bc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def make_inputs(self) -> None:
self.done_expert = tf.placeholder(shape=[None, 1], dtype=tf.float32)
self.done_policy = tf.placeholder(shape=[None, 1], dtype=tf.float32)

if self.policy.brain.vector_action_space_type == "continuous":
if self.policy.behavior_spec.is_action_continuous():
action_length = self.policy.act_size[0]
self.action_in_expert = tf.placeholder(
shape=[None, action_length], dtype=tf.float32
Expand Down
8 changes: 4 additions & 4 deletions ml-agents/mlagents/trainers/components/bc/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __init__(
self.current_lr = policy_learning_rate * settings.strength
self.model = BCModel(policy, self.current_lr, settings.steps)
_, self.demonstration_buffer = demo_to_buffer(
settings.demo_path, policy.sequence_length, policy.brain
settings.demo_path, policy.sequence_length, policy.behavior_spec
)

self.batch_size = (
Expand Down Expand Up @@ -107,15 +107,15 @@ def _update_batch(
self.policy.sequence_length_ph: self.policy.sequence_length,
}
feed_dict[self.model.action_in_expert] = mini_batch_demo["actions"]
if not self.policy.use_continuous_act:
if self.policy.behavior_spec.is_action_discrete():
feed_dict[self.policy.action_masks] = np.ones(
(
self.n_sequences * self.policy.sequence_length,
sum(self.policy.brain.vector_action_space_size),
sum(self.policy.behavior_spec.discrete_action_branches),
),
dtype=np.float32,
)
if self.policy.brain.vector_observation_space_size > 0:
if self.policy.vec_obs_size > 0:
feed_dict[self.policy.vector_in] = mini_batch_demo["vector_obs"]
for i, _ in enumerate(self.policy.visual_in):
feed_dict[self.policy.visual_in[i]] = mini_batch_demo["visual_obs%d" % i]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,21 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]:
encoded_state_list = []
encoded_next_state_list = []

if self.policy.vis_obs_size > 0:
self.next_visual_in = []
# Create input ops for next (t+1) visual observations.
self.next_vector_in, self.next_visual_in = ModelUtils.create_input_placeholders(
self.policy.behavior_spec.observation_shapes, name_prefix="curiosity_next_"
)

if self.next_visual_in:
visual_encoders = []
next_visual_encoders = []
for i in range(self.policy.vis_obs_size):
# Create input ops for next (t+1) visual observations.
next_visual_input = ModelUtils.create_visual_input(
self.policy.brain.camera_resolutions[i],
name="curiosity_next_visual_observation_" + str(i),
)
self.next_visual_in.append(next_visual_input)

for i, (vis_in, next_vis_in) in enumerate(
zip(self.policy.visual_in, self.next_visual_in)
):
# Create the encoder ops for current and next visual input.
# Note that these encoders are siamese.
encoded_visual = ModelUtils.create_visual_observation_encoder(
self.policy.visual_in[i],
vis_in,
self.encoding_size,
ModelUtils.swish,
1,
Expand All @@ -57,7 +56,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]:
)

encoded_next_visual = ModelUtils.create_visual_observation_encoder(
self.next_visual_in[i],
next_vis_in,
self.encoding_size,
ModelUtils.swish,
1,
Expand All @@ -73,15 +72,6 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]:
encoded_next_state_list.append(hidden_next_visual)

if self.policy.vec_obs_size > 0:
# Create the encoder ops for current and next vector input.
# Note that these encoders are siamese.
# Create input op for next (t+1) vector observation.
self.next_vector_in = tf.placeholder(
shape=[None, self.policy.vec_obs_size],
dtype=tf.float32,
name="curiosity_next_vector_observation",
)

encoded_vector_obs = ModelUtils.create_vector_observation_encoder(
self.policy.vector_in,
self.encoding_size,
Expand All @@ -100,7 +90,6 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]:
)
encoded_state_list.append(encoded_vector_obs)
encoded_next_state_list.append(encoded_next_vector_obs)

encoded_state = tf.concat(encoded_state_list, axis=1)
encoded_next_state = tf.concat(encoded_next_state_list, axis=1)
return encoded_state, encoded_next_state
Expand All @@ -116,7 +105,7 @@ def create_inverse_model(
"""
combined_input = tf.concat([encoded_state, encoded_next_state], axis=1)
hidden = tf.layers.dense(combined_input, 256, activation=ModelUtils.swish)
if self.policy.brain.vector_action_space_type == "continuous":
if self.policy.behavior_spec.is_action_continuous():
pred_action = tf.layers.dense(
hidden, self.policy.act_size[0], activation=None
)
Expand Down
32 changes: 15 additions & 17 deletions ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Optional, Tuple
from typing import Optional, Tuple

from mlagents.tf_utils import tf

Expand Down Expand Up @@ -69,7 +69,7 @@ def make_inputs(self) -> None:
self.done_expert = tf.expand_dims(self.done_expert_holder, -1)
self.done_policy = tf.expand_dims(self.done_policy_holder, -1)

if self.policy.brain.vector_action_space_type == "continuous":
if self.policy.behavior_spec.is_action_continuous():
action_length = self.policy.act_size[0]
self.action_in_expert = tf.placeholder(
shape=[None, action_length], dtype=tf.float32
Expand All @@ -91,10 +91,14 @@ def make_inputs(self) -> None:
encoded_policy_list = []
encoded_expert_list = []

(
self.obs_in_expert,
self.expert_visual_in,
) = ModelUtils.create_input_placeholders(
self.policy.behavior_spec.observation_shapes, "gail_"
)

if self.policy.vec_obs_size > 0:
self.obs_in_expert = tf.placeholder(
shape=[None, self.policy.vec_obs_size], dtype=tf.float32
)
if self.policy.normalize:
encoded_expert_list.append(
ModelUtils.normalize_vector_obs(
Expand All @@ -109,20 +113,14 @@ def make_inputs(self) -> None:
encoded_expert_list.append(self.obs_in_expert)
encoded_policy_list.append(self.policy.vector_in)

if self.policy.vis_obs_size > 0:
self.expert_visual_in: List[tf.Tensor] = []
if self.expert_visual_in:
visual_policy_encoders = []
visual_expert_encoders = []
for i in range(self.policy.vis_obs_size):
# Create input ops for next (t+1) visual observations.
visual_input = ModelUtils.create_visual_input(
self.policy.brain.camera_resolutions[i],
name="gail_visual_observation_" + str(i),
)
self.expert_visual_in.append(visual_input)

for i, (vis_in, exp_vis_in) in enumerate(
zip(self.policy.visual_in, self.expert_visual_in)
):
encoded_policy_visual = ModelUtils.create_visual_observation_encoder(
self.policy.visual_in[i],
vis_in,
self.encoding_size,
ModelUtils.swish,
1,
Expand All @@ -131,7 +129,7 @@ def make_inputs(self) -> None:
)

encoded_expert_visual = ModelUtils.create_visual_observation_encoder(
self.expert_visual_in[i],
exp_vis_in,
self.encoding_size,
ModelUtils.swish,
1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __init__(self, policy: TFPolicy, settings: GAILSettings):
settings.use_vail,
)
_, self.demonstration_buffer = demo_to_buffer(
settings.demo_path, policy.sequence_length, policy.brain
settings.demo_path, policy.sequence_length, policy.behavior_spec
)
self.has_updated = False
self.update_dict: Dict[str, tf.Tensor] = {
Expand Down
Loading