Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions ml-agents/mlagents/trainers/tests/torch/test_layers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import torch

from mlagents.trainers.torch.layers import Swish, linear_layer, Initialization


def test_swish():
layer = Swish()
input_tensor = torch.Tensor([[1, 2, 3], [4, 5, 6]])
target_tensor = torch.mul(input_tensor, torch.sigmoid(input_tensor))
assert torch.all(torch.eq(layer(input_tensor), target_tensor))


def test_initialization_layer():
torch.manual_seed(0)
# Test Zero
layer = linear_layer(
3, 4, kernel_init=Initialization.Zero, bias_init=Initialization.Zero
)
assert torch.all(torch.eq(layer.weight.data, torch.zeros_like(layer.weight.data)))
assert torch.all(torch.eq(layer.bias.data, torch.zeros_like(layer.bias.data)))
9 changes: 5 additions & 4 deletions ml-agents/mlagents/trainers/tests/torch/test_networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,17 @@


def test_networkbody_vector():
torch.manual_seed(0)
obs_size = 4
network_settings = NetworkSettings()
obs_shapes = [(obs_size,)]

networkbody = NetworkBody(obs_shapes, network_settings, encoded_act_size=2)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
sample_obs = torch.ones((1, obs_size))
sample_act = torch.ones((1, 2))
sample_obs = 0.1 * torch.ones((1, obs_size))
sample_act = 0.1 * torch.ones((1, 2))

for _ in range(100):
for _ in range(300):
encoded, _ = networkbody([sample_obs], [], sample_act)
assert encoded.shape == (1, network_settings.hidden_units)
# Try to force output to 1
Expand Down Expand Up @@ -77,7 +78,7 @@ def test_networkbody_visual():
sample_obs = torch.ones((1, 84, 84, 3))
sample_vec_obs = torch.ones((1, vec_obs_size))

for _ in range(100):
for _ in range(150):
encoded, _ = networkbody([sample_vec_obs], [sample_obs])
assert encoded.shape == (1, network_settings.hidden_units)
# Try to force output to 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import linear_layer, Swish
from mlagents.trainers.settings import NetworkSettings, EncoderType


Expand Down Expand Up @@ -70,22 +71,18 @@ def __init__(self, specs: BehaviorSpec, settings: CuriositySettings) -> None:
self._action_flattener = ModelUtils.ActionFlattener(specs)

self.inverse_model_action_predition = torch.nn.Sequential(
torch.nn.Linear(2 * settings.encoding_size, 256),
ModelUtils.SwishLayer(),
torch.nn.Linear(256, self._action_flattener.flattened_size),
linear_layer(2 * settings.encoding_size, 256),
Swish(),
linear_layer(256, self._action_flattener.flattened_size),
)
self.inverse_model_action_predition[0].bias.data.zero_()
self.inverse_model_action_predition[2].bias.data.zero_()

self.forward_model_next_state_prediction = torch.nn.Sequential(
torch.nn.Linear(
linear_layer(
settings.encoding_size + self._action_flattener.flattened_size, 256
),
ModelUtils.SwishLayer(),
torch.nn.Linear(256, settings.encoding_size),
Swish(),
linear_layer(256, settings.encoding_size),
)
self.forward_model_next_state_prediction[0].bias.data.zero_()
self.forward_model_next_state_prediction[2].bias.data.zero_()

def get_current_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import linear_layer, Swish, Initialization
from mlagents.trainers.settings import NetworkSettings, EncoderType
from mlagents.trainers.demo_loader import demo_to_buffer

Expand Down Expand Up @@ -98,35 +99,31 @@ def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
) # + 1 is for done

self.encoder = torch.nn.Sequential(
torch.nn.Linear(encoder_input_size, settings.encoding_size),
ModelUtils.SwishLayer(),
torch.nn.Linear(settings.encoding_size, settings.encoding_size),
ModelUtils.SwishLayer(),
linear_layer(encoder_input_size, settings.encoding_size),
Swish(),
linear_layer(settings.encoding_size, settings.encoding_size),
Swish(),
)
torch.nn.init.xavier_normal_(self.encoder[0].weight.data)
torch.nn.init.xavier_normal_(self.encoder[2].weight.data)
self.encoder[0].bias.data.zero_()
self.encoder[2].bias.data.zero_()

estimator_input_size = settings.encoding_size
if settings.use_vail:
estimator_input_size = self.z_size
self.z_sigma = torch.nn.Parameter(
torch.ones((self.z_size), dtype=torch.float), requires_grad=True
)
self.z_mu_layer = torch.nn.Linear(settings.encoding_size, self.z_size)
# self.z_mu_layer.weight.data Needs a variance scale initializer
torch.nn.init.xavier_normal_(self.z_mu_layer.weight.data)
self.z_mu_layer.bias.data.zero_()
self.z_mu_layer = linear_layer(
settings.encoding_size,
self.z_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
)
self.beta = torch.nn.Parameter(
torch.tensor(self.initial_beta, dtype=torch.float), requires_grad=False
)

self.estimator = torch.nn.Sequential(
torch.nn.Linear(estimator_input_size, 1), torch.nn.Sigmoid()
linear_layer(estimator_input_size, 1), torch.nn.Sigmoid()
)
torch.nn.init.xavier_normal_(self.estimator[0].weight.data)
self.estimator[0].bias.data.zero_()

def get_action_input(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""
Expand Down
3 changes: 2 additions & 1 deletion ml-agents/mlagents/trainers/torch/decoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch
from torch import nn
from mlagents.trainers.torch.layers import linear_layer


class ValueHeads(nn.Module):
Expand All @@ -11,7 +12,7 @@ def __init__(self, stream_names: List[str], input_size: int, output_size: int =
_value_heads = {}

for name in stream_names:
value = nn.Linear(input_size, output_size)
value = linear_layer(input_size, output_size)
_value_heads[name] = value
self.value_heads = nn.ModuleDict(_value_heads)

Expand Down
28 changes: 22 additions & 6 deletions ml-agents/mlagents/trainers/torch/distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from torch import nn
import numpy as np
import math
from mlagents.trainers.torch.layers import linear_layer, Initialization

EPSILON = 1e-7 # Small value to avoid divide by zero

Expand Down Expand Up @@ -127,12 +128,22 @@ def __init__(
):
super().__init__()
self.conditional_sigma = conditional_sigma
self.mu = nn.Linear(hidden_size, num_outputs)
self.mu = linear_layer(
hidden_size,
num_outputs,
kernel_init=Initialization.KaimingHeNormal,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe these are the natural default vals since they seem to be common

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to have the default be the same as tensorflow since it is more common

kernel_gain=0.1,
bias_init=Initialization.Zero,
)
self.tanh_squash = tanh_squash
nn.init.xavier_uniform_(self.mu.weight, gain=0.01)
if conditional_sigma:
self.log_sigma = nn.Linear(hidden_size, num_outputs)
nn.init.xavier_uniform(self.log_sigma.weight, gain=0.01)
self.log_sigma = linear_layer(
hidden_size,
num_outputs,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
else:
self.log_sigma = nn.Parameter(
torch.zeros(1, num_outputs, requires_grad=True)
Expand All @@ -159,8 +170,13 @@ def __init__(self, hidden_size: int, act_sizes: List[int]):
def _create_policy_branches(self, hidden_size: int) -> nn.ModuleList:
branches = []
for size in self.act_sizes:
branch_output_layer = nn.Linear(hidden_size, size)
nn.init.xavier_uniform_(branch_output_layer.weight, gain=0.01)
branch_output_layer = linear_layer(
hidden_size,
size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
branches.append(branch_output_layer)
return nn.ModuleList(branches)

Expand Down
100 changes: 67 additions & 33 deletions ml-agents/mlagents/trainers/torch/encoders.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Tuple, Optional

from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.torch.layers import linear_layer, Initialization, Swish

import torch
from torch import nn
Expand Down Expand Up @@ -64,11 +65,6 @@ def pool_out_shape(h_w: Tuple[int, int], kernel_size: int) -> Tuple[int, int]:
return height, width


class SwishLayer(torch.nn.Module):
def forward(self, data: torch.Tensor) -> torch.Tensor:
return torch.mul(data, torch.sigmoid(data))


class VectorEncoder(nn.Module):
def __init__(
self,
Expand All @@ -79,14 +75,28 @@ def __init__(
):
self.normalizer: Optional[Normalizer] = None
super().__init__()
self.layers = [nn.Linear(input_size, hidden_size)]
self.layers.append(SwishLayer())
self.layers = [
linear_layer(
input_size,
hidden_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
)
]
self.layers.append(Swish())
if normalize:
self.normalizer = Normalizer(input_size)

for _ in range(num_layers - 1):
self.layers.append(nn.Linear(hidden_size, hidden_size))
self.layers.append(nn.LeakyReLU())
self.layers.append(
linear_layer(
hidden_size,
hidden_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
)
)
self.layers.append(Swish())
self.seq_layers = nn.Sequential(*self.layers)

def forward(self, inputs: torch.Tensor) -> None:
Expand Down Expand Up @@ -160,17 +170,26 @@ def __init__(
conv_2_hw = conv_output_shape(conv_1_hw, 4, 2)
self.final_flat = conv_2_hw[0] * conv_2_hw[1] * 32

self.conv1 = nn.Conv2d(initial_channels, 16, [8, 8], [4, 4])
self.conv2 = nn.Conv2d(16, 32, [4, 4], [2, 2])
self.dense = nn.Linear(self.final_flat, self.h_size)
self.conv_layers = nn.Sequential(
nn.Conv2d(initial_channels, 16, [8, 8], [4, 4]),
nn.LeakyReLU(),
nn.Conv2d(16, 32, [4, 4], [2, 2]),
nn.LeakyReLU(),
)
self.dense = nn.Sequential(
linear_layer(
self.final_flat,
self.h_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
),
nn.LeakyReLU(),
)

def forward(self, visual_obs: torch.Tensor) -> None:
conv_1 = nn.functional.leaky_relu(self.conv1(visual_obs))
conv_2 = nn.functional.leaky_relu(self.conv2(conv_1))
# hidden = torch.relu(self.dense(conv_2.view([-1, self.final_flat])))
hidden = nn.functional.leaky_relu(
self.dense(torch.reshape(conv_2, (-1, self.final_flat)))
)
hidden = self.conv_layers(visual_obs)
hidden = torch.reshape(hidden, (-1, self.final_flat))
hidden = self.dense(hidden)
return hidden


Expand All @@ -183,18 +202,28 @@ def __init__(self, height, width, initial_channels, output_size):
conv_3_hw = conv_output_shape(conv_2_hw, 3, 1)
self.final_flat = conv_3_hw[0] * conv_3_hw[1] * 64

self.conv1 = nn.Conv2d(initial_channels, 32, [8, 8], [4, 4])
self.conv2 = nn.Conv2d(32, 64, [4, 4], [2, 2])
self.conv3 = nn.Conv2d(64, 64, [3, 3], [1, 1])
self.dense = nn.Linear(self.final_flat, self.h_size)

def forward(self, visual_obs):
conv_1 = nn.functional.leaky_relu(self.conv1(visual_obs))
conv_2 = nn.functional.leaky_relu(self.conv2(conv_1))
conv_3 = nn.functional.leaky_relu(self.conv3(conv_2))
hidden = nn.functional.leaky_relu(
self.dense(conv_3.view([-1, self.final_flat]))
self.conv_layers = nn.Sequential(
nn.Conv2d(initial_channels, 32, [8, 8], [4, 4]),
nn.LeakyReLU(),
nn.Conv2d(32, 64, [4, 4], [2, 2]),
nn.LeakyReLU(),
nn.Conv2d(64, 64, [3, 3], [1, 1]),
nn.LeakyReLU(),
)
self.dense = nn.Sequential(
linear_layer(
self.final_flat,
self.h_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
),
nn.LeakyReLU(),
)

def forward(self, visual_obs: torch.Tensor) -> None:
hidden = self.conv_layers(visual_obs)
hidden = hidden.view([-1, self.final_flat])
hidden = self.dense(hidden)
return hidden


Expand All @@ -214,15 +243,20 @@ def __init__(self, height, width, initial_channels, final_hidden):
for _ in range(n_blocks):
self.layers.append(self.make_block(channel))
last_channel = channel
self.layers.append(nn.LeakyReLU())
self.dense = nn.Linear(n_channels[-1] * height * width, final_hidden)
self.layers.append(Swish())
self.dense = linear_layer(
n_channels[-1] * height * width,
final_hidden,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
)

@staticmethod
def make_block(channel):
block_layers = [
nn.LeakyReLU(),
Swish(),
nn.Conv2d(channel, channel, [3, 3], [1, 1], padding=1),
nn.LeakyReLU(),
Swish(),
nn.Conv2d(channel, channel, [3, 3], [1, 1], padding=1),
]
return block_layers
Expand Down
Loading