Unity-Technologies · ervteng · Aug 10, 2020 · Aug 10, 2020 · Aug 10, 2020 · Aug 10, 2020
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_distributions.py b/ml-agents/mlagents/trainers/tests/torch/test_distributions.py
@@ -125,13 +125,13 @@ def test_categorical_dist_instance():
     torch.manual_seed(0)
     act_size = 4
     test_prob = torch.tensor(
-        [1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)
+        [[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)]
     )  # High prob for first action
     dist_instance = CategoricalDistInstance(test_prob)
 
     for _ in range(10):
         action = dist_instance.sample()
-        assert action.shape == (1,)
+        assert action.shape == (1, 1)
         assert action < act_size
 
     # Make sure the first action as higher probability than the others.

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_utils.py b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
@@ -187,14 +187,14 @@ def test_get_probs_and_entropy():
     # Add two dists to the list.
     act_size = 2
     test_prob = torch.tensor(
-        [1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)
+        [[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)]
     )  # High prob for first action
     dist_list = [CategoricalDistInstance(test_prob), CategoricalDistInstance(test_prob)]
     action_list = [torch.tensor([0]), torch.tensor([1])]
     log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
         action_list, dist_list
     )
-    assert all_probs.shape == (len(dist_list * act_size),)
-    assert entropies.shape == (len(dist_list),)
+    assert all_probs.shape == (1, len(dist_list * act_size))
+    assert entropies.shape == (1, len(dist_list))
     # Make sure the first action has high probability than the others.
     assert log_probs.flatten()[0] > log_probs.flatten()[1]
diff --git a/ml-agents/mlagents/trainers/torch/distributions.py b/ml-agents/mlagents/trainers/torch/distributions.py
@@ -100,8 +100,12 @@ def sample(self):
         return torch.multinomial(self.probs, 1)
 
     def pdf(self, value):
-        idx = torch.range(end=len(value)).unsqueeze(-1)
-        return torch.gather(self.probs.permute(1, 0)[value.flatten().long()], -1, idx).squeeze(-1)
+        # This function is equivalent to torch.diag(self.probs.T[value.flatten().long()]),
+        # but torch.diag is not supported by ONNX export.
+        idx = torch.arange(start=0, end=len(value)).unsqueeze(-1)
+        return torch.gather(
+            self.probs.permute(1, 0)[value.flatten().long()], -1, idx
+        ).squeeze(-1)
 
     def log_prob(self, value):
         return torch.log(self.pdf(value))