Skip to content

Commit

Permalink
Refactor/logging (#271)
Browse files Browse the repository at this point in the history
* move ExperimentWriter to logging folder

* rename Writer to Logger

* update documentation to use logger instead of writer

* update documentation to use logger instead of writer

* refactor logging api

* rename verbose logging mode

* linting

Co-authored-by: Nota, Christopher <cnota@irobot.com>
  • Loading branch information
cpnota and Nota, Christopher committed Apr 15, 2022
1 parent a682f50 commit d3a537a
Show file tree
Hide file tree
Showing 59 changed files with 491 additions and 557 deletions.
12 changes: 6 additions & 6 deletions all/agents/a2c.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch
from torch.nn.functional import mse_loss
from all.logging import DummyWriter
from all.logging import DummyLogger
from all.memory import NStepAdvantageBuffer
from ._agent import Agent
from ._parallel_agent import ParallelAgent
Expand All @@ -24,7 +24,7 @@ class A2C(ParallelAgent):
discount_factor (float): Discount factor for future rewards.
n_envs (int): Number of parallel actors/environments
n_steps (int): Number of timesteps per rollout. Updates are performed once per rollout.
writer (Writer): Used for logging.
logger (Logger): Used for logging.
"""

def __init__(
Expand All @@ -36,15 +36,15 @@ def __init__(
entropy_loss_scaling=0.01,
n_envs=None,
n_steps=4,
writer=DummyWriter()
logger=DummyLogger()
):
if n_envs is None:
raise RuntimeError("Must specify n_envs.")
# objects
self.features = features
self.v = v
self.policy = policy
self.writer = writer
self.logger = logger
# hyperparameters
self.discount_factor = discount_factor
self.entropy_loss_scaling = entropy_loss_scaling
Expand Down Expand Up @@ -90,8 +90,8 @@ def _train(self, next_states):
self.features.step()

# record metrics
self.writer.add_scalar('entropy', -entropy_loss)
self.writer.add_scalar('normalized_value_error', value_loss / targets.var())
self.logger.add_info('entropy', -entropy_loss)
self.logger.add_info('normalized_value_error', value_loss / targets.var())

def _make_buffer(self):
return NStepAdvantageBuffer(
Expand Down
8 changes: 4 additions & 4 deletions all/agents/c51.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch
import numpy as np
from all.logging import DummyWriter
from all.logging import DummyLogger
from ._agent import Agent


Expand Down Expand Up @@ -35,12 +35,12 @@ def __init__(
minibatch_size=32,
replay_start_size=5000,
update_frequency=1,
writer=DummyWriter(),
logger=DummyLogger(),
):
# objects
self.q_dist = q_dist
self.replay_buffer = replay_buffer
self.writer = writer
self.logger = logger
# hyperparameters
self.eps = eps
self.exploration = exploration
Expand Down Expand Up @@ -94,7 +94,7 @@ def _train(self):
# update replay buffer priorities
self.replay_buffer.update_priorities(kl.detach())
# debugging
self.writer.add_loss(
self.logger.add_loss(
"q_mean", (dist.detach() * self.q_dist.atoms).sum(dim=1).mean()
)

Expand Down
12 changes: 6 additions & 6 deletions all/agents/ppo.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch
from torch.nn.functional import mse_loss
from all.logging import DummyWriter
from all.logging import DummyLogger
from all.memory import GeneralizedAdvantageBuffer
from ._agent import Agent
from ._parallel_agent import ParallelAgent
Expand All @@ -27,7 +27,7 @@ class PPO(ParallelAgent):
compute_batch_size (int): The batch size to use for computations that do not need backpropogation.
n_envs (int): Number of parallel actors/environments.
n_steps (int): Number of timesteps per rollout. Updates are performed once per rollout.
writer (Writer): Used for logging.
logger (Logger): Used for logging.
"""

def __init__(
Expand All @@ -44,15 +44,15 @@ def __init__(
compute_batch_size=256,
n_envs=None,
n_steps=4,
writer=DummyWriter()
logger=DummyLogger()
):
if n_envs is None:
raise RuntimeError("Must specify n_envs.")
# objects
self.features = features
self.v = v
self.policy = policy
self.writer = writer
self.logger = logger
# hyperparameters
self.discount_factor = discount_factor
self.entropy_loss_scaling = entropy_loss_scaling
Expand Down Expand Up @@ -128,8 +128,8 @@ def _train_minibatch(self, states, actions, pi_0, advantages, targets):
self.features.step()

# debugging
self.writer.add_scalar('entropy', -entropy_loss)
self.writer.add_scalar('normalized_value_error', value_loss / targets.var())
self.logger.add_info('entropy', -entropy_loss)
self.logger.add_info('normalized_value_error', value_loss / targets.var())

def _clipped_policy_gradient_loss(self, pi_0, pi_i, advantages):
ratios = torch.exp(pi_i - pi_0)
Expand Down
16 changes: 8 additions & 8 deletions all/agents/sac.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch
from torch.nn.functional import mse_loss
from all.logging import DummyWriter
from all.logging import DummyLogger
from ._agent import Agent


Expand Down Expand Up @@ -43,15 +43,15 @@ def __init__(self,
replay_start_size=5000,
temperature_initial=0.1,
update_frequency=1,
writer=DummyWriter()
logger=DummyLogger()
):
# objects
self.policy = policy
self.v = v
self.q_1 = q_1
self.q_2 = q_2
self.replay_buffer = replay_buffer
self.writer = writer
self.logger = logger
# hyperparameters
self.discount_factor = discount_factor
self.entropy_target = entropy_target
Expand Down Expand Up @@ -101,11 +101,11 @@ def _train(self):
self.temperature = max(0, self.temperature + self.lr_temperature * temperature_grad.detach())

# additional debugging info
self.writer.add_loss('entropy', -_log_probs.mean())
self.writer.add_loss('v_mean', v_targets.mean())
self.writer.add_loss('r_mean', rewards.mean())
self.writer.add_loss('temperature_grad', temperature_grad)
self.writer.add_loss('temperature', self.temperature)
self.logger.add_loss('entropy', -_log_probs.mean())
self.logger.add_loss('v_mean', v_targets.mean())
self.logger.add_loss('r_mean', rewards.mean())
self.logger.add_loss('temperature_grad', temperature_grad)
self.logger.add_loss('temperature', self.temperature)

def _should_train(self):
self._frames_seen += 1
Expand Down
2 changes: 1 addition & 1 deletion all/agents/vac.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class VAC(ParallelAgent):
discount_factor (float): Discount factor for future rewards.
n_envs (int): Number of parallel actors/environments
n_steps (int): Number of timesteps per rollout. Updates are performed once per rollout.
writer (Writer): Used for logging.
logger (Logger): Used for logging.
'''

def __init__(self, features, v, policy, discount_factor=1):
Expand Down
16 changes: 8 additions & 8 deletions all/approximation/approximation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import torch
from torch.nn import utils
from all.logging import DummyWriter
from all.logging import DummyLogger
from .target import TrivialTarget
from .checkpointer import DummyCheckpointer

Expand Down Expand Up @@ -45,8 +45,8 @@ class Approximation():
to be used during optimization. A target network updates more slowly than
the base model that is being optimizing, allowing for a more stable
optimization target.
writer (all.logging.Writer:, optional): A Writer object used for logging.
The standard object logs to tensorboard, however, other types of Writer objects
logger (all.logging.Logger:, optional): A Logger object used for logging.
The standard object logs to tensorboard, however, other types of Logger objects
may be implemented by the user.
'''

Expand All @@ -61,7 +61,7 @@ def __init__(
name='approximation',
scheduler=None,
target=None,
writer=DummyWriter(),
logger=DummyLogger(),
):
self.model = model
self.device = device if device else next(model.parameters()).device
Expand All @@ -73,15 +73,15 @@ def __init__(
self._loss_scaling = loss_scaling
self._cache = []
self._clip_grad = clip_grad
self._writer = writer
self._logger = logger
self._name = name

if checkpointer is None:
checkpointer = DummyCheckpointer()
self._checkpointer = checkpointer
self._checkpointer.init(
self.model,
os.path.join(writer.log_dir, name + '.pt')
os.path.join(logger.log_dir, name + '.pt')
)

def __call__(self, *inputs):
Expand Down Expand Up @@ -147,7 +147,7 @@ def step(self, loss=None):
self: The current Approximation object
'''
if loss is not None:
self._writer.add_loss(self._name, loss.detach())
self._logger.add_loss(self._name, loss.detach())
self._clip_grad_norm()
self._optimizer.step()
self._optimizer.zero_grad()
Expand All @@ -174,5 +174,5 @@ def _clip_grad_norm(self):
def _step_lr_scheduler(self):
'''Step the . Raises RuntimeError if norm is non-finite.'''
if self._scheduler:
self._writer.add_schedule(self._name + '/lr', self._optimizer.param_groups[0]['lr'])
self._logger.add_schedule(self._name + '/lr', self._optimizer.param_groups[0]['lr'])
self._scheduler.step()
1 change: 1 addition & 0 deletions all/approximation/feature_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class FeatureNetwork(Approximation):
An Approximation that accepts a state updates the observation key
based on the given model.
'''

def __init__(self, model, optimizer=None, name='feature', **kwargs):
model = FeatureModule(model)
super().__init__(model, optimizer, name=name, **kwargs)
Expand Down
5 changes: 1 addition & 4 deletions all/experiments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
from .single_env_experiment import SingleEnvExperiment
from .parallel_env_experiment import ParallelEnvExperiment
from .multiagent_env_experiment import MultiagentEnvExperiment
from .writer import ExperimentWriter
from .writer import CometWriter
from .plots import plot_returns_100
from .slurm import SlurmExperiment
from .watch import watch, load_and_watch
Expand All @@ -16,8 +14,7 @@
"ParallelEnvExperiment",
"MultiagentEnvExperiment",
"SlurmExperiment",
"ExperimentWriter",
"CometWriter",
"watch",
"load_and_watch",
"plot_returns_100"
]
22 changes: 11 additions & 11 deletions all/experiments/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ class Experiment(ABC):
An Experiment manages the basic train/test loop and logs results.
Args:
writer (:torch.logging.writer:): A Writer object used for logging.
logger (:torch.logging.logger:): A Logger object used for logging.
quiet (bool): If False, the Experiment will print information about
episode returns to standard out.
'''

def __init__(self, writer, quiet):
self._writer = writer
def __init__(self, logger, quiet):
self._logger = logger
self._quiet = quiet
self._best_returns = -np.inf
self._returns100 = []
Expand Down Expand Up @@ -61,12 +61,12 @@ def _log_training_episode(self, returns, fps):
if len(self._returns100) == 100:
mean = np.mean(self._returns100)
std = np.std(self._returns100)
self._writer.add_summary('returns100', mean, std, step="frame")
self._logger.add_summary('returns100', mean, std, step="frame")
self._returns100 = []
self._writer.add_evaluation('returns/episode', returns, step="episode")
self._writer.add_evaluation('returns/frame', returns, step="frame")
self._writer.add_evaluation("returns/max", self._best_returns, step="frame")
self._writer.add_scalar('fps', fps, step="frame")
self._logger.add_eval('returns/episode', returns, step="episode")
self._logger.add_eval('returns/frame', returns, step="frame")
self._logger.add_eval("returns/max", self._best_returns, step="frame")
self._logger.add_eval('fps', fps, step="frame")

def _log_test_episode(self, episode, returns):
if not self._quiet:
Expand All @@ -77,10 +77,10 @@ def _log_test(self, returns):
mean = np.mean(returns)
sem = np.var(returns) / np.sqrt(len(returns))
print('test returns (mean ± sem): {} ± {}'.format(mean, sem))
self._writer.add_summary('returns-test', np.mean(returns), np.std(returns))
self._logger.add_summary('returns-test', np.mean(returns), np.std(returns))

def save(self):
return self._preset.save('{}/preset.pt'.format(self._writer.log_dir))
return self._preset.save('{}/preset.pt'.format(self._logger.log_dir))

def close(self):
self._writer.close()
self._logger.close()
34 changes: 20 additions & 14 deletions all/experiments/multiagent_env_experiment.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from timeit import default_timer as timer
import numpy as np
from .writer import ExperimentWriter, CometWriter
from all.logging import ExperimentLogger, CometLogger


class MultiagentEnvExperiment():
Expand All @@ -16,7 +16,7 @@ class MultiagentEnvExperiment():
render (bool, optional): Whether or not to render during training.
save_freq (int, optional): How often to save the model to disk.
train_steps (int, optional): The number of steps for which to train.
write_loss (bool, optional): Whether or not to log advanced loss information.
verbose (bool, optional): Whether or not to log detailed information or only summaries.
'''

def __init__(
Expand All @@ -29,12 +29,12 @@ def __init__(
render=False,
save_freq=100,
train_steps=float('inf'),
write_loss=True,
writer="tensorboard"
verbose=True,
logger="tensorboard"
):
self._name = name if name is not None else preset.name
self._writer = self._make_writer(logdir, self._name, env.name, write_loss, writer)
self._agent = preset.agent(writer=self._writer, train_steps=train_steps)
self._logger = self._make_logger(logdir, self._name, env.name, verbose, logger)
self._agent = preset.agent(logger=self._logger, train_steps=train_steps)
self._env = env
self._episode = 0
self._frame = 0
Expand Down Expand Up @@ -89,6 +89,12 @@ def test(self, episodes=100):
self._log_test(returns)
return returns

def save(self):
return self._preset.save('{}/preset.pt'.format(self._logger.log_dir))

def close(self):
self._logger.close()

'''int: The number of completed training frames'''
@property
def frame(self):
Expand Down Expand Up @@ -153,9 +159,9 @@ def _done(self, frames, episodes):
def _log_training_episode(self, returns, fps):
if not self._quiet:
print('returns: {}'.format(returns))
print('fps: {}'.format(fps))
print('frames: {}, fps: {}'.format(self._frame, fps))
for agent in self._env.agents:
self._writer.add_evaluation('{}/returns/frame'.format(agent), returns[agent], step="frame")
self._logger.add_eval('{}/returns/frame'.format(agent), returns[agent], step="frame")

def _log_test_episode(self, episode, returns):
if not self._quiet:
Expand All @@ -167,13 +173,13 @@ def _log_test(self, returns):
mean = np.mean(agent_returns)
sem = np.variance(agent_returns) / np.sqrt(len(agent_returns))
print('{} test returns (mean ± sem): {} ± {}'.format(agent, mean, sem))
self._writer.add_summary('{}/returns-test'.format(agent), np.mean(agent_returns), np.std(agent_returns))
self._logger.add_summary('{}/returns-test'.format(agent), np.mean(agent_returns), np.std(agent_returns))

def _save_model(self):
if self._save_freq != float('inf') and self._episode % self._save_freq == 0:
self._preset.save('{}/preset.pt'.format(self._writer.log_dir))
self.save()

def _make_writer(self, logdir, agent_name, env_name, write_loss, writer):
if writer == "comet":
return CometWriter(self, agent_name, env_name, loss=write_loss, logdir=logdir)
return ExperimentWriter(self, agent_name, env_name, loss=write_loss, logdir=logdir)
def _make_logger(self, logdir, agent_name, env_name, verbose, logger):
if logger == "comet":
return CometLogger(self, agent_name, env_name, verbose=verbose, logdir=logdir)
return ExperimentLogger(self, agent_name, env_name, verbose=verbose, logdir=logdir)

0 comments on commit d3a537a

Please sign in to comment.