Skip to content

Commit

Permalink
Added pytorch 1.8 profiler as hook with tensorboard visualization.
Browse files Browse the repository at this point in the history
Summary: Added a new hook with uses pytorch's new profiler (in versions 1.8.1+) to better log and visualize training details. In particular, this new hook includes the ability to use tensorboard visualizations when compared to the previous Autograd Hook.

Reviewed By: vaibhava0

Differential Revision: D29624951

fbshipit-source-id: 26e2b9cecf85ae2c545dc15a8103d6e1d983a94a
  • Loading branch information
louaaron authored and facebook-github-bot committed Jul 9, 2021
1 parent 87e9946 commit 76ec0a2
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 17 deletions.
93 changes: 77 additions & 16 deletions detectron2/engine/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import tempfile
import time
import warnings
from collections import Counter
import torch
from fvcore.common.checkpoint import PeriodicCheckpointer as _PeriodicCheckpointer
Expand All @@ -31,6 +32,7 @@
"AutogradProfiler",
"EvalHook",
"PreciseBN",
"TorchProfiler",
]


Expand Down Expand Up @@ -268,45 +270,59 @@ def load_state_dict(self, state_dict):
self.scheduler.load_state_dict(state_dict)


class AutogradProfiler(HookBase):
class TorchProfiler(HookBase):
"""
A hook which runs `torch.autograd.profiler.profile`.
A hook which runs `torch.profiler.profile`.
Examples:
::
hooks.AutogradProfiler(
lambda trainer: trainer.iter > 10 and trainer.iter < 20, self.cfg.OUTPUT_DIR
hooks.TorchProfiler(
lambda trainer: 10 < trainer.iter < 20, self.cfg.OUTPUT_DIR
)
The above example will run the profiler for iteration 10~20 and dump
results to ``OUTPUT_DIR``. We did not profile the first few iterations
because they are typically slower than the rest.
The result files can be loaded in the ``chrome://tracing`` page in chrome browser.
Note:
When used together with NCCL on older version of GPUs,
autograd profiler may cause deadlock because it unnecessarily allocates
memory on every device it sees. The memory management calls, if
interleaved with NCCL calls, lead to deadlock on GPUs that do not
support ``cudaLaunchCooperativeKernelMultiDevice``.
The result files can be loaded in the ``chrome://tracing`` page in chrome browser,
and the tensorboard visualizations can be visualized using
``tensorboard --logdir OUTPUT_DIR/log``
"""

def __init__(self, enable_predicate, output_dir, *, use_cuda=True):
def __init__(self, enable_predicate, output_dir, *, activities=None, save_tensorboard=True):
"""
Args:
enable_predicate (callable[trainer -> bool]): a function which takes a trainer,
and returns whether to enable the profiler.
It will be called once every step, and can be used to select which steps to profile.
output_dir (str): the output directory to dump tracing files.
use_cuda (bool): same as in `torch.autograd.profiler.profile`.
activities (iterable): same as in `torch.profiler.profile`.
save_tensorboard (bool): whether to save tensorboard visualizations at (output_dir)/log/
"""
self._enable_predicate = enable_predicate
self._use_cuda = use_cuda
self._activities = activities
self._output_dir = output_dir
self._save_tensorboard = save_tensorboard

def before_step(self):
if self._enable_predicate(self.trainer):
self._profiler = torch.autograd.profiler.profile(use_cuda=self._use_cuda)
if self._save_tensorboard:
on_trace_ready = torch.profiler.tensorboard_trace_handler(
os.path.join(
self._output_dir,
"log",
"profiler-tensorboard-iter{}".format(self.trainer.iter),
)
)
else:
on_trace_ready = None
self._profiler = torch.profiler.profile(
activities=self._activities,
on_trace_ready=on_trace_ready,
record_shapes=True,
profile_memory=True,
with_stack=True,
with_flops=True,
)
self._profiler.__enter__()
else:
self._profiler = None
Expand All @@ -332,6 +348,51 @@ def after_step(self):
f.write(content)


class AutogradProfiler(TorchProfiler):
"""
A hook which runs `torch.autograd.profiler.profile`.
Examples:
::
hooks.AutogradProfiler(
lambda trainer: 10 < trainer.iter < 20, self.cfg.OUTPUT_DIR
)
The above example will run the profiler for iteration 10~20 and dump
results to ``OUTPUT_DIR``. We did not profile the first few iterations
because they are typically slower than the rest.
The result files can be loaded in the ``chrome://tracing`` page in chrome browser.
Note:
When used together with NCCL on older version of GPUs,
autograd profiler may cause deadlock because it unnecessarily allocates
memory on every device it sees. The memory management calls, if
interleaved with NCCL calls, lead to deadlock on GPUs that do not
support ``cudaLaunchCooperativeKernelMultiDevice``.
"""

def __init__(self, enable_predicate, output_dir, *, use_cuda=True):
"""
Args:
enable_predicate (callable[trainer -> bool]): a function which takes a trainer,
and returns whether to enable the profiler.
It will be called once every step, and can be used to select which steps to profile.
output_dir (str): the output directory to dump tracing files.
use_cuda (bool): same as in `torch.autograd.profiler.profile`.
"""
warnings.warn("AutogradProfiler has been deprecated in favor of TorchProfiler.")
self._enable_predicate = enable_predicate
self._use_cuda = use_cuda
self._output_dir = output_dir

def before_step(self):
if self._enable_predicate(self.trainer):
self._profiler = torch.autograd.profiler.profile(use_cuda=self._use_cuda)
self._profiler.__enter__()
else:
self._profiler = None


class EvalHook(HookBase):
"""
Run an evaluation function periodically, and at the end of training.
Expand Down
8 changes: 7 additions & 1 deletion tools/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,13 @@ def f():
max_iter = 400
trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(model, f(), optimizer)
trainer.register_hooks(
[hooks.IterationTimer(), hooks.PeriodicWriter([CommonMetricPrinter(max_iter)])]
[
hooks.IterationTimer(),
hooks.PeriodicWriter([CommonMetricPrinter(max_iter)]),
hooks.TorchProfiler(
lambda trainer: trainer.iter == max_iter - 1, cfg.OUTPUT_DIR, save_tensorboard=True
),
]
)
trainer.train(1, max_iter)

Expand Down

0 comments on commit 76ec0a2

Please sign in to comment.