documentation/policy (#151)

* update autodoc options * ad DeterministicPolicy documentation * add docs to Gaussian policy * add docs to Greedy policy * add documentation for the SoftDeterministicPolicy * add SoftmaxPolicy documentaiton * tweak policy documentation wording * update Environment documentation * clean up some other issues with documentation
cpnota · Jun 7, 2020 · 5547867 · 5547867
1 parent 794ea2d
commit 5547867
Show file tree

Hide file tree

Showing 10 changed files with 84 additions and 11 deletions.
diff --git a/all/environments/abstract.py b/all/environments/abstract.py
@@ -41,14 +41,11 @@ def step(self, action):
 
         Returns
         -------
-        State
-            The state of the environment after the action is applied
+        all.environments.State
+            The State of the environment after the action is applied.
+            This State object includes both the done flag and any additional "info"
         float
             The reward achieved by the previous action
-        done
-            True if the environment has entered a terminal state and should be reset
-        info
-            Diagnostic information useful for debugging
         """
 
     @abstractmethod

diff --git a/all/nn/__init__.py b/all/nn/__init__.py
@@ -10,7 +10,6 @@ class RLNetwork(nn.Module):
     """
     Wraps a network such that States can be given as input.
     """
-
     def __init__(self, model, _=None):
         super().__init__()
         self.model = model
@@ -20,7 +19,7 @@ def forward(self, state):
         return self.model(state.features.float()) * state.mask.float().unsqueeze(-1)
 
 class Aggregation(nn.Module):
-    """len()
+    """
     Aggregation layer for the Dueling architecture.
 
     https://arxiv.org/abs/1511.06581

diff --git a/all/optim/__init__.py b/all/optim/__init__.py
@@ -1 +1,3 @@
 from .scheduler import LinearScheduler, Schedulable
+
+__all__ = ['Schedulable', 'LinearScheduler']
diff --git a/all/policies/deterministic.py b/all/policies/deterministic.py
@@ -4,6 +4,18 @@
 
 
 class DeterministicPolicy(Approximation):
+    '''
+    A DDPG-style deterministic policy.
+
+    Args:
+        model (torch.nn.Module): A Pytorch module representing the policy network.
+            The input shape should be the same as the shape of the state space,
+            and the output shape should be the same as the shape of the action space.
+        optimizer (torch.optim.Optimizer): A optimizer initialized with the
+            model parameters, e.g. SGD, Adam, RMSprop, etc.
+        action_space (gym.spaces.Box): The Box representing the action space.
+        kwargs (optional): Any other arguments accepted by all.approximation.Approximation
+    '''
     def __init__(
             self,
             model,
@@ -20,6 +32,7 @@ def __init__(
             **kwargs
         )
 
+
 class DeterministicPolicyNetwork(RLNetwork):
     def __init__(self, model, space):
         super().__init__(model)

diff --git a/all/policies/gaussian.py b/all/policies/gaussian.py
@@ -6,6 +6,24 @@
 
 
 class GaussianPolicy(Approximation):
+    '''
+    A Gaussian stochastic policy.
+
+    This policy will choose actions from a distribution represented by a spherical Gaussian.
+    The first n outputs the model will be squashed to [-1, 1] through a tanh function, and then
+    scaled to the given action_space, and the remaining n outputs will define the amount of noise added.
+
+    Args:
+        model (torch.nn.Module): A Pytorch module representing the policy network.
+            The input shape should be the same as the shape of the state (or feature) space,
+            and the output shape should be double the size of the the action space.
+            The first n outputs will be the unscaled mean of the action for each dimension,
+            and the second n outputs will be the logarithm of the variance.
+        optimizer (torch.optim.Optimizer): A optimizer initialized with the
+            model parameters, e.g. SGD, Adam, RMSprop, etc.
+        action_space (gym.spaces.Box): The Box representing the action space.
+        kwargs (optional): Any other arguments accepted by all.approximation.Approximation
+    '''
     def __init__(
             self,
             model,

diff --git a/all/policies/greedy.py b/all/policies/greedy.py
@@ -3,6 +3,19 @@
 from all.optim import Schedulable
 
 class GreedyPolicy(Schedulable):
+    '''
+    An  "epsilon-greedy" action selection policy for discrete action spaces.
+
+    This policy will usually choose the optimal action according to an approximation
+    of the action value function (the "q-function"), but with probabilty epsilon will
+    choose a random action instead. GreedyPolicy is a Schedulable, meaning that
+    epsilon can be varied over time by passing a Scheduler object.
+
+    Args:
+        q (all.approximation.QNetwork): The action-value or "q-function"
+        num_actions (int): The number of available actions.
+        epsilon (float, optional): The probability of selecting a random action.
+    '''
     def __init__(
             self,
             q,

diff --git a/all/policies/soft_deterministic.py b/all/policies/soft_deterministic.py
@@ -3,6 +3,20 @@
 from all.nn import RLNetwork
 
 class SoftDeterministicPolicy(Approximation):
+    '''
+    A "soft" deterministic policy compatible with soft actor-critic (SAC).
+
+    Args:
+        model (torch.nn.Module): A Pytorch module representing the policy network.
+            The input shape should be the same as the shape of the state (or feature) space,
+            and the output shape should be double the size of the the action space
+            The first n outputs will be the unscaled mean of the action for each dimension,
+            and the second n outputs will be the logarithm of the variance.
+        optimizer (torch.optim.Optimizer): A optimizer initialized with the
+            model parameters, e.g. SGD, Adam, RMSprop, etc.
+        action_space (gym.spaces.Box): The Box representing the action space.
+        kwargs (optional): Any other arguments accepted by all.approximation.Approximation
+    '''
     def __init__(
             self,
             model,
@@ -32,7 +46,7 @@ def forward(self, state):
         return self._squash(normal.loc)
 
     def _normal(self, outputs):
-        means = outputs[:, 0 : self._action_dim]
+        means = outputs[:, 0:self._action_dim]
         logvars = outputs[:, self._action_dim:]
         std = logvars.mul(0.5).exp_()
         return torch.distributions.normal.Normal(means, std)

diff --git a/all/policies/softmax.py b/all/policies/softmax.py
@@ -5,6 +5,17 @@
 
 
 class SoftmaxPolicy(Approximation):
+    '''
+    A softmax (or Boltzmann) stochastic policy for discrete actions.
+
+    Args:
+        model (torch.nn.Module): A Pytorch module representing the policy network.
+            The input shape should be the same as the shape of the state (or feature) space,
+            and the output should be a vector the size of the action set.
+        optimizer (torch.optim.Optimizer): A optimizer initialized with the
+            model parameters, e.g. SGD, Adam, RMSprop, etc.
+        kwargs (optional): Any other arguments accepted by all.approximation.Approximation
+    '''
     def __init__(
             self,
             model,

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -40,12 +40,17 @@
 ]
 
 # Autosummary settings
-autodoc_default_flags = ['members']
+autodoc_default_options = {
+    'members': True,
+    'undoc-members': True,
+    'show-inheritance': True
+}
 autosummary_generate = True
+autodoc_inherit_docstrings = True
 
 # Mock requirements to save resources during doc build machine setup
 autodoc_mock_imports = [
-    'torch',
+    # 'torch',
     'torchvision',
 ]
 

diff --git a/docs/source/modules/nn.rst b/docs/source/modules/nn.rst
@@ -5,4 +5,5 @@ all.nn
 =================
 
 .. automodule:: all.nn
+    :ignore-module-all:
     :members: