Refactor/documentation (#130)

* update installation * tweak getting_started * update act documentation * update basic_concepts * typos * update agent docstrings * update Approximation and Feature documentation * update approximation constructor docs' * add Writer documentation * update travis.yml * linting
cpnota · Apr 17, 2020 · ca5ec2b · ca5ec2b
1 parent 4a57780
commit ca5ec2b
Show file tree

Hide file tree

Showing 12 changed files with 188 additions and 63 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -8,7 +8,7 @@ branches:
 install:
   - pip install https://download.pytorch.org/whl/cpu/torch-1.0.1.post2-cp36-cp36m-linux_x86_64.whl
   - pip install torchvision
-  - pip install -q -e .
+  - pip install -q -e .["dev"]
 script:
   - make lint
   - make test
diff --git a/Makefile b/Makefile
@@ -1,7 +1,5 @@
 install:
-	conda install pytorch torchvision cudatoolkit=10.1 -c pytorch
-	pip install tensorboard
-	pip install -e .
+	pip install -e .[dev]
 
 lint:
 	pylint all --rcfile=.pylintrc

diff --git a/all/agents/_agent.py b/all/agents/_agent.py
@@ -28,7 +28,6 @@ def act(self, state, reward):
         Args:
             state (all.environment.State): The environment state at the current timestep.
             reward (torch.Tensor): The reward from the previous timestep.
-            info (:obj:, optional): The info object from the environment.
 
         Returns:
             torch.Tensor: The action to take at the current timestep.
@@ -43,4 +42,11 @@ def eval(self, state, reward):
         Most of the time, this method should return the greedy action according to the current policy.
         This method is useful when using evaluation methodologies that distinguish between the performance
         of the agent during training and the performance of the resulting policy.
+
+        Args:
+            state (all.environment.State): The environment state at the current timestep.
+            reward (torch.Tensor): The reward from the previous timestep.
+
+        Returns:
+            torch.Tensor: The action to take at the current timestep.
         """
diff --git a/all/approximation/approximation.py b/all/approximation/approximation.py
@@ -18,31 +18,31 @@ class Approximation():
     increased code reusability and simpler Agent implementations.
 
     Args:
-            model (:torch.nn.Module:): A Pytorch module representing the model
+            model (torch.nn.Module): A Pytorch module representing the model
                 used to approximate the function. This could be a convolution
                 network, a fully connected network, or any other Pytorch-compatible
                 model.
-            optimizer (:torch.optim.Optimizer:): A optimizer initialized with the
+            optimizer (torch.optim.Optimizer): A optimizer initialized with the
                 model parameters, e.g. SGD, Adam, RMSprop, etc.
-            checkpointer: (:all.approximation.checkpointer.Checkpointer): A Checkpointer object
+            checkpointer (all.approximation.checkpointer.Checkpointer): A Checkpointer object
                 that periodically saves the model and its parameters to the disk. Default:
                 A PeriodicCheckpointer that saves the model once every 200 updates.
-            clip_grad: (float, optional): If non-zero, clips the norm of the
+            clip_grad (float, optional): If non-zero, clips the norm of the
                 gradient to this value in order prevent large updates and
                 improve stability.
                 See torch.nn.utils.clip_grad.
-            loss_scaling: (float, optional): Multiplies the loss by this value before
+            loss_scaling (float, optional): Multiplies the loss by this value before
                 performing a backwards pass. Useful when used with multi-headed networks
                 with shared feature layers.
-            name: (str, optional): The name of the function approximator used for logging.
-            scheduler: (:torch.optim.lr_scheduler._LRScheduler:, optional): A learning
+            name (str, optional): The name of the function approximator used for logging.
+            scheduler (:torch.optim.lr_scheduler._LRScheduler:, optional): A learning
                 rate scheduler initialized with the given optimizer. step() will be called
                 after every update.
-            target: (:all.approximation.target.TargetNetwork, optional): A target network object
+            target (all.approximation.target.TargetNetwork, optional): A target network object
                 to be used during optimization. A target network updates more slowly than
                 the base model that is being optimizing, allowing for a more stable
                 optimization target.
-            writer: (:all.logging.Writer:, optional): A Writer object used for logging.
+            writer (all.logging.Writer:, optional): A Writer object used for logging.
                 The standard object logs to tensorboard, however, other types of Writer objects
                 may be implemented by the user.
     '''
@@ -111,14 +111,34 @@ def target(self, *inputs):
         return self._target(*inputs)
 
     def reinforce(self, loss):
+        '''
+        Backpropagate the loss through the model and make an update step.
+        Internally, this will perform most of the activities associated with a control loop
+        in standard machine learning environments, depending on the configuration of the object:
+        Gradient clipping, learning rate schedules, logging, checkpointing, etc.
+
+        Args:
+            loss (torch.Tensor): The loss computed for a batch of inputs.
+
+        Returns:
+            self: The current Approximation object
+        '''
         loss = self._loss_scaling * loss
         self._writer.add_loss(self._name, loss.detach())
         loss.backward()
         self.step()
         return self
 
     def step(self):
-        '''Given that a backward pass has been made, run an optimization step.'''
+        '''
+        Given that a backward pass has been made, run an optimization step
+        Internally, this will perform most of the activities associated with a control loop
+        in standard machine learning environments, depending on the configuration of the object:
+        Gradient clipping, learning rate schedules, logging, checkpointing, etc.
+
+        Returns:
+            self: The current Approximation object
+        '''
         if self._clip_grad != 0:
             utils.clip_grad_norm_(self.model.parameters(), self._clip_grad)
         self._optimizer.step()
@@ -131,5 +151,11 @@ def step(self):
         return self
 
     def zero_grad(self):
+        '''
+        Clears the gradients of all optimized tensors
+
+        Returns:
+            self: The current Approximation object
+        '''
         self._optimizer.zero_grad()
         return self
diff --git a/all/approximation/feature_network.py b/all/approximation/feature_network.py
@@ -4,13 +4,31 @@
 
 
 class FeatureNetwork(Approximation):
+    '''
+    A special type of Approximation that accumulates gradients before backpropagating them.
+    This is useful when features are shared between network heads.
+
+    The __call__ function caches the computation graph and detaches the output.
+    Then, various functions approximators may backpropagate to the output.
+    The reinforce() function will then backpropagate the accumulated gradients on the output
+    through the original computation graph.
+    '''
     def __init__(self, model, optimizer=None, name='feature', **kwargs):
         model = FeatureModule(model)
         super().__init__(model, optimizer, name=name, **kwargs)
         self._cache = []
         self._out = []
 
     def __call__(self, states):
+        '''
+        Run a forward pass of the model and return the detached output.
+
+        Args:
+            state (all.environment.State): An environment State
+
+        Returns:
+            all.environment.State: An enviornment State with the computed features
+        '''
         features = self.model(states)
         graphs = features.raw
         # pylint: disable=protected-access
@@ -20,6 +38,9 @@ def __call__(self, states):
         return features
 
     def reinforce(self):
+        '''
+        Backward pass of the model.
+        '''
         graphs, grads = self._dequeue()
         graphs.backward(grads)
         self.step()

diff --git a/all/experiments/__init__.py b/all/experiments/__init__.py
@@ -2,6 +2,7 @@
 from .experiment import Experiment
 from .single_env_experiment import SingleEnvExperiment
 from .parallel_env_experiment import ParallelEnvExperiment
+from .writer import ExperimentWriter
 from .plots import plot_returns_100
 from .slurm import SlurmExperiment
 from .watch import GreedyAgent, watch, load_and_watch
@@ -13,6 +14,7 @@
     "ParallelEnvExperiment",
     "SlurmExperiment",
     "GreedyAgent",
+    "ExperimentWriter",
     "watch",
     "load_and_watch",
 ]
diff --git a/all/experiments/writer.py b/all/experiments/writer.py
@@ -8,6 +8,20 @@
 
 
 class ExperimentWriter(SummaryWriter, Writer):
+    '''
+    The Writer object used by all.experiments.Experiment.
+    Writes logs using tensorboard into the current `runs` directory,
+    tagging the run with a combination of the agent name, the commit hash of the
+    current git repo of the working directory (if any), and the current time.
+    Also writes summary statistics into CSV files.
+
+
+    Args:
+        experiment (all.experiments.Experiment): The Experiment associated with the Writer object.
+        agent_name (str): The name of the Agent the Experiment is being performed on
+        env_name (str): The name of the environment the Experiment is being performed in
+        loss (bool, optional): Whether or not to log loss/scheduling metrics, or only evaluation and summary metrics.
+    '''
     def __init__(self, experiment, agent_name, env_name, loss=True):
         self.env_name = env_name
         current_time = str(datetime.now())
@@ -35,6 +49,14 @@ def add_schedule(self, name, value, step="frame"):
             self.add_scalar("schedule" + "/" + name, value, self._get_step(step))
 
     def add_scalar(self, name, value, step="frame"):
+        '''
+        Log an arbitrary scalar.
+
+        Args:
+            name (str): The tag to associate with the scalar
+            value (number): The value of the scalar at the current step
+            step (str, optional): Which step to use (e.g., "frame" or "episode")
+        '''
         super().add_scalar(self.env_name + "/" + name, value, self._get_step(step))
 
     def add_summary(self, name, mean, std, step="frame"):

diff --git a/all/logging/__init__.py b/all/logging/__init__.py
@@ -11,26 +11,64 @@ class Writer(ABC):
 
     @abstractmethod
     def add_loss(self, name, value, step="frame"):
-        pass
+        '''
+        Log the given loss metric at the current step.
+
+        Args:
+            name (str): The tag to associate with the loss
+            value (number): The value of the loss at the current step
+            step (str, optional): Which step to use (e.g., "frame" or "episode")
+        '''
 
     @abstractmethod
     def add_evaluation(self, name, value, step="frame"):
-        pass
+        '''
+        Log the evaluation metric.
+
+        Args:
+            name (str): The tag to associate with the loss
+            value (number): The evaluation metric at the current step
+            step (str, optional): Which step to use (e.g., "frame" or "episode")
+        '''
 
     @abstractmethod
     def add_scalar(self, name, value, step="frame"):
-        pass
+        '''
+        Log an arbitrary scalar.
+
+        Args:
+            name (str): The tag to associate with the scalar
+            value (number): The value of the scalar at the current step
+            step (str, optional): Which step to use (e.g., "frame" or "episode")
+        '''
 
     @abstractmethod
     def add_schedule(self, name, value, step="frame"):
-        pass
+        '''
+        Log the current value of a hyperparameter according to some schedule.
+
+        Args:
+            name (str): The tag to associate with the hyperparameter schedule
+            value (number): The value of the hyperparameter at the current step
+            step (str, optional): Which step to use (e.g., "frame" or "episode")
+        '''
 
     @abstractmethod
     def add_summary(self, name, mean, std, step="frame"):
-        pass
+        '''
+        Log a summary statistic.
+
+        Args:
+            name (str): The tag to associate with the summary statistic
+            mean (float): The mean of the statistic at the current step
+            std (float): The standard deviation of the statistic at the current step
+            step (str, optional): Which step to use (e.g., "frame" or "episode")
+        '''
 
 
 class DummyWriter(Writer):
+    '''A default Writer object that performs no logging and has no side effects.'''
+
     def add_loss(self, name, value, step="frame"):
         pass