From 64871e3c392bc88d213c80991c1cce99af848c6a Mon Sep 17 00:00:00 2001
From: Ruo-Ping Dong <ruoping.dong@unity3d.com>
Date: Wed, 16 Dec 2020 13:28:04 -0800
Subject: [PATCH 1/3] fix export

---
 ml-agents/mlagents/trainers/torch/distributions.py |  8 +++++---
 ml-agents/mlagents/trainers/torch/networks.py      | 14 ++++++++------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/ml-agents/mlagents/trainers/torch/distributions.py b/ml-agents/mlagents/trainers/torch/distributions.py
index e5b44e8550..39ae089d89 100644
--- a/ml-agents/mlagents/trainers/torch/distributions.py
+++ b/ml-agents/mlagents/trainers/torch/distributions.py
@@ -173,9 +173,11 @@ def forward(self, inputs: torch.Tensor) -> List[DistInstance]:
             log_sigma = torch.clamp(self.log_sigma(inputs), min=-20, max=2)
         else:
             # Expand so that entropy matches batch size. Note that we're using
-            # torch.cat here instead of torch.expand() becuase it is not supported in the
-            # verified version of Barracuda (1.0.2).
-            log_sigma = torch.cat([self.log_sigma] * inputs.shape[0], axis=0)
+            # mu*0 here to get the batch size implicitly since Barracuda 1.2.1
+            # throws error on runtime broadcasting due to unknow reason. We
+            # use this to replace torch.expand() becuase it is not supported in
+            # the verified version of Barracuda (1.0.2).
+            log_sigma = mu * 0 + self.log_sigma
         if self.tanh_squash:
             return TanhGaussianDistInstance(mu, torch.exp(log_sigma))
         else:
diff --git a/ml-agents/mlagents/trainers/torch/networks.py b/ml-agents/mlagents/trainers/torch/networks.py
index 2308b91d7a..ed82c1f2ca 100644
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
@@ -258,9 +258,11 @@ def __init__(
     ):
         super().__init__()
         self.action_spec = action_spec
-        self.version_number = torch.nn.Parameter(torch.Tensor([2.0]))
+        self.version_number = torch.nn.Parameter(
+            torch.Tensor([2.0]), requires_grad=False
+        )
         self.is_continuous_int_deprecated = torch.nn.Parameter(
-            torch.Tensor([int(self.action_spec.is_continuous())])
+            torch.Tensor([int(self.action_spec.is_continuous())]), requires_grad=False
         )
         self.continuous_act_size_vector = torch.nn.Parameter(
             torch.Tensor([int(self.action_spec.continuous_size)]), requires_grad=False
@@ -283,6 +285,9 @@ def __init__(
             self.encoding_size = network_settings.memory.memory_size // 2
         else:
             self.encoding_size = network_settings.hidden_units
+        self.memory_size_vector = torch.nn.Parameter(
+            torch.Tensor([int(self.network_body.memory_size)]), requires_grad=False
+        )
 
         self.action_model = ActionModel(
             self.encoding_size,
@@ -335,10 +340,7 @@ def forward(
             disc_action_out,
             action_out_deprecated,
         ) = self.action_model.get_action_out(encoding, masks)
-        export_out = [
-            self.version_number,
-            torch.Tensor([self.network_body.memory_size]),
-        ]
+        export_out = [self.version_number, self.memory_size_vector]
         if self.action_spec.continuous_size > 0:
             export_out += [cont_action_out, self.continuous_act_size_vector]
         if self.action_spec.discrete_size > 0:

From 24c00a7fc251ebb29fbf01a4ea6871e43913d104 Mon Sep 17 00:00:00 2001
From: Ruo-Ping Dong <ruoping.dong@unity3d.com>
Date: Wed, 16 Dec 2020 16:38:37 -0500
Subject: [PATCH 2/3] typo

Co-authored-by: Ervin T. <ervin@unity3d.com>
---
 ml-agents/mlagents/trainers/torch/distributions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/torch/distributions.py b/ml-agents/mlagents/trainers/torch/distributions.py
index 39ae089d89..af1cafbb0b 100644
--- a/ml-agents/mlagents/trainers/torch/distributions.py
+++ b/ml-agents/mlagents/trainers/torch/distributions.py
@@ -174,7 +174,7 @@ def forward(self, inputs: torch.Tensor) -> List[DistInstance]:
         else:
             # Expand so that entropy matches batch size. Note that we're using
             # mu*0 here to get the batch size implicitly since Barracuda 1.2.1
-            # throws error on runtime broadcasting due to unknow reason. We
+            # throws error on runtime broadcasting due to unknown reason. We
             # use this to replace torch.expand() becuase it is not supported in
             # the verified version of Barracuda (1.0.2).
             log_sigma = mu * 0 + self.log_sigma

From 457a3446276f9371a560248dd0dae7f2309f86c4 Mon Sep 17 00:00:00 2001
From: Ruo-Ping Dong <ruoping.dong@unity3d.com>
Date: Wed, 16 Dec 2020 16:44:44 -0500
Subject: [PATCH 3/3] Update ml-agents/mlagents/trainers/torch/distributions.py

Co-authored-by: Ervin T. <ervin@unity3d.com>
---
 ml-agents/mlagents/trainers/torch/distributions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/torch/distributions.py b/ml-agents/mlagents/trainers/torch/distributions.py
index af1cafbb0b..b3e7b34f3c 100644
--- a/ml-agents/mlagents/trainers/torch/distributions.py
+++ b/ml-agents/mlagents/trainers/torch/distributions.py
@@ -176,7 +176,7 @@ def forward(self, inputs: torch.Tensor) -> List[DistInstance]:
             # mu*0 here to get the batch size implicitly since Barracuda 1.2.1
             # throws error on runtime broadcasting due to unknown reason. We
             # use this to replace torch.expand() becuase it is not supported in
-            # the verified version of Barracuda (1.0.2).
+            # the verified version of Barracuda (1.0.X).
             log_sigma = mu * 0 + self.log_sigma
         if self.tanh_squash:
             return TanhGaussianDistInstance(mu, torch.exp(log_sigma))