Skip to content

Commit

Permalink
Refactor/documentation (#130)
Browse files Browse the repository at this point in the history
* update installation

* tweak getting_started

* update act documentation

* update basic_concepts

* typos

* update agent docstrings

* update Approximation and Feature documentation

* update approximation constructor docs'

* add Writer documentation

* update travis.yml

* linting
  • Loading branch information
cpnota committed Apr 17, 2020
1 parent 4a57780 commit ca5ec2b
Show file tree
Hide file tree
Showing 12 changed files with 188 additions and 63 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ branches:
install:
- pip install https://download.pytorch.org/whl/cpu/torch-1.0.1.post2-cp36-cp36m-linux_x86_64.whl
- pip install torchvision
- pip install -q -e .
- pip install -q -e .["dev"]
script:
- make lint
- make test
4 changes: 1 addition & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
install:
conda install pytorch torchvision cudatoolkit=10.1 -c pytorch
pip install tensorboard
pip install -e .
pip install -e .[dev]

lint:
pylint all --rcfile=.pylintrc
Expand Down
8 changes: 7 additions & 1 deletion all/agents/_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def act(self, state, reward):
Args:
state (all.environment.State): The environment state at the current timestep.
reward (torch.Tensor): The reward from the previous timestep.
info (:obj:, optional): The info object from the environment.
Returns:
torch.Tensor: The action to take at the current timestep.
Expand All @@ -43,4 +42,11 @@ def eval(self, state, reward):
Most of the time, this method should return the greedy action according to the current policy.
This method is useful when using evaluation methodologies that distinguish between the performance
of the agent during training and the performance of the resulting policy.
Args:
state (all.environment.State): The environment state at the current timestep.
reward (torch.Tensor): The reward from the previous timestep.
Returns:
torch.Tensor: The action to take at the current timestep.
"""
46 changes: 36 additions & 10 deletions all/approximation/approximation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,31 +18,31 @@ class Approximation():
increased code reusability and simpler Agent implementations.
Args:
model (:torch.nn.Module:): A Pytorch module representing the model
model (torch.nn.Module): A Pytorch module representing the model
used to approximate the function. This could be a convolution
network, a fully connected network, or any other Pytorch-compatible
model.
optimizer (:torch.optim.Optimizer:): A optimizer initialized with the
optimizer (torch.optim.Optimizer): A optimizer initialized with the
model parameters, e.g. SGD, Adam, RMSprop, etc.
checkpointer: (:all.approximation.checkpointer.Checkpointer): A Checkpointer object
checkpointer (all.approximation.checkpointer.Checkpointer): A Checkpointer object
that periodically saves the model and its parameters to the disk. Default:
A PeriodicCheckpointer that saves the model once every 200 updates.
clip_grad: (float, optional): If non-zero, clips the norm of the
clip_grad (float, optional): If non-zero, clips the norm of the
gradient to this value in order prevent large updates and
improve stability.
See torch.nn.utils.clip_grad.
loss_scaling: (float, optional): Multiplies the loss by this value before
loss_scaling (float, optional): Multiplies the loss by this value before
performing a backwards pass. Useful when used with multi-headed networks
with shared feature layers.
name: (str, optional): The name of the function approximator used for logging.
scheduler: (:torch.optim.lr_scheduler._LRScheduler:, optional): A learning
name (str, optional): The name of the function approximator used for logging.
scheduler (:torch.optim.lr_scheduler._LRScheduler:, optional): A learning
rate scheduler initialized with the given optimizer. step() will be called
after every update.
target: (:all.approximation.target.TargetNetwork, optional): A target network object
target (all.approximation.target.TargetNetwork, optional): A target network object
to be used during optimization. A target network updates more slowly than
the base model that is being optimizing, allowing for a more stable
optimization target.
writer: (:all.logging.Writer:, optional): A Writer object used for logging.
writer (all.logging.Writer:, optional): A Writer object used for logging.
The standard object logs to tensorboard, however, other types of Writer objects
may be implemented by the user.
'''
Expand Down Expand Up @@ -111,14 +111,34 @@ def target(self, *inputs):
return self._target(*inputs)

def reinforce(self, loss):
'''
Backpropagate the loss through the model and make an update step.
Internally, this will perform most of the activities associated with a control loop
in standard machine learning environments, depending on the configuration of the object:
Gradient clipping, learning rate schedules, logging, checkpointing, etc.
Args:
loss (torch.Tensor): The loss computed for a batch of inputs.
Returns:
self: The current Approximation object
'''
loss = self._loss_scaling * loss
self._writer.add_loss(self._name, loss.detach())
loss.backward()
self.step()
return self

def step(self):
'''Given that a backward pass has been made, run an optimization step.'''
'''
Given that a backward pass has been made, run an optimization step
Internally, this will perform most of the activities associated with a control loop
in standard machine learning environments, depending on the configuration of the object:
Gradient clipping, learning rate schedules, logging, checkpointing, etc.
Returns:
self: The current Approximation object
'''
if self._clip_grad != 0:
utils.clip_grad_norm_(self.model.parameters(), self._clip_grad)
self._optimizer.step()
Expand All @@ -131,5 +151,11 @@ def step(self):
return self

def zero_grad(self):
'''
Clears the gradients of all optimized tensors
Returns:
self: The current Approximation object
'''
self._optimizer.zero_grad()
return self
21 changes: 21 additions & 0 deletions all/approximation/feature_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,31 @@


class FeatureNetwork(Approximation):
'''
A special type of Approximation that accumulates gradients before backpropagating them.
This is useful when features are shared between network heads.
The __call__ function caches the computation graph and detaches the output.
Then, various functions approximators may backpropagate to the output.
The reinforce() function will then backpropagate the accumulated gradients on the output
through the original computation graph.
'''
def __init__(self, model, optimizer=None, name='feature', **kwargs):
model = FeatureModule(model)
super().__init__(model, optimizer, name=name, **kwargs)
self._cache = []
self._out = []

def __call__(self, states):
'''
Run a forward pass of the model and return the detached output.
Args:
state (all.environment.State): An environment State
Returns:
all.environment.State: An enviornment State with the computed features
'''
features = self.model(states)
graphs = features.raw
# pylint: disable=protected-access
Expand All @@ -20,6 +38,9 @@ def __call__(self, states):
return features

def reinforce(self):
'''
Backward pass of the model.
'''
graphs, grads = self._dequeue()
graphs.backward(grads)
self.step()
Expand Down
2 changes: 2 additions & 0 deletions all/experiments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .experiment import Experiment
from .single_env_experiment import SingleEnvExperiment
from .parallel_env_experiment import ParallelEnvExperiment
from .writer import ExperimentWriter
from .plots import plot_returns_100
from .slurm import SlurmExperiment
from .watch import GreedyAgent, watch, load_and_watch
Expand All @@ -13,6 +14,7 @@
"ParallelEnvExperiment",
"SlurmExperiment",
"GreedyAgent",
"ExperimentWriter",
"watch",
"load_and_watch",
]
22 changes: 22 additions & 0 deletions all/experiments/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@


class ExperimentWriter(SummaryWriter, Writer):
'''
The Writer object used by all.experiments.Experiment.
Writes logs using tensorboard into the current `runs` directory,
tagging the run with a combination of the agent name, the commit hash of the
current git repo of the working directory (if any), and the current time.
Also writes summary statistics into CSV files.
Args:
experiment (all.experiments.Experiment): The Experiment associated with the Writer object.
agent_name (str): The name of the Agent the Experiment is being performed on
env_name (str): The name of the environment the Experiment is being performed in
loss (bool, optional): Whether or not to log loss/scheduling metrics, or only evaluation and summary metrics.
'''
def __init__(self, experiment, agent_name, env_name, loss=True):
self.env_name = env_name
current_time = str(datetime.now())
Expand Down Expand Up @@ -35,6 +49,14 @@ def add_schedule(self, name, value, step="frame"):
self.add_scalar("schedule" + "/" + name, value, self._get_step(step))

def add_scalar(self, name, value, step="frame"):
'''
Log an arbitrary scalar.
Args:
name (str): The tag to associate with the scalar
value (number): The value of the scalar at the current step
step (str, optional): Which step to use (e.g., "frame" or "episode")
'''
super().add_scalar(self.env_name + "/" + name, value, self._get_step(step))

def add_summary(self, name, mean, std, step="frame"):
Expand Down
48 changes: 43 additions & 5 deletions all/logging/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,64 @@ class Writer(ABC):

@abstractmethod
def add_loss(self, name, value, step="frame"):
pass
'''
Log the given loss metric at the current step.
Args:
name (str): The tag to associate with the loss
value (number): The value of the loss at the current step
step (str, optional): Which step to use (e.g., "frame" or "episode")
'''

@abstractmethod
def add_evaluation(self, name, value, step="frame"):
pass
'''
Log the evaluation metric.
Args:
name (str): The tag to associate with the loss
value (number): The evaluation metric at the current step
step (str, optional): Which step to use (e.g., "frame" or "episode")
'''

@abstractmethod
def add_scalar(self, name, value, step="frame"):
pass
'''
Log an arbitrary scalar.
Args:
name (str): The tag to associate with the scalar
value (number): The value of the scalar at the current step
step (str, optional): Which step to use (e.g., "frame" or "episode")
'''

@abstractmethod
def add_schedule(self, name, value, step="frame"):
pass
'''
Log the current value of a hyperparameter according to some schedule.
Args:
name (str): The tag to associate with the hyperparameter schedule
value (number): The value of the hyperparameter at the current step
step (str, optional): Which step to use (e.g., "frame" or "episode")
'''

@abstractmethod
def add_summary(self, name, mean, std, step="frame"):
pass
'''
Log a summary statistic.
Args:
name (str): The tag to associate with the summary statistic
mean (float): The mean of the statistic at the current step
std (float): The standard deviation of the statistic at the current step
step (str, optional): Which step to use (e.g., "frame" or "episode")
'''


class DummyWriter(Writer):
'''A default Writer object that performs no logging and has no side effects.'''

def add_loss(self, name, value, step="frame"):
pass

Expand Down

0 comments on commit ca5ec2b

Please sign in to comment.