From f5d67f120e57e19ccce4a0af758f32535fe2bee0 Mon Sep 17 00:00:00 2001 From: dberenbaum Date: Thu, 9 Nov 2023 17:59:56 -0500 Subject: [PATCH] dvclive tracker --- docs/source/usage_guides/tracking.md | 3 +- .../deepspeed_with_config_support.py | 2 +- .../by_feature/megatron_lm_gpt_pretraining.py | 2 +- src/accelerate/test_utils/testing.py | 8 +++ src/accelerate/tracking.py | 70 +++++++++++++++++++ src/accelerate/utils/__init__.py | 1 + src/accelerate/utils/dataclasses.py | 2 + src/accelerate/utils/imports.py | 4 ++ tests/test_tracking.py | 54 +++++++++++++- 9 files changed, 142 insertions(+), 4 deletions(-) diff --git a/docs/source/usage_guides/tracking.md b/docs/source/usage_guides/tracking.md index 141fea6924b..dba4b084d5d 100644 --- a/docs/source/usage_guides/tracking.md +++ b/docs/source/usage_guides/tracking.md @@ -20,7 +20,7 @@ There are a large number of experiment tracking API's available, however getting ## Integrated Trackers -Currently `Accelerate` supports six trackers out-of-the-box: +Currently `Accelerate` supports seven trackers out-of-the-box: - TensorBoard - WandB @@ -28,6 +28,7 @@ Currently `Accelerate` supports six trackers out-of-the-box: - Aim - MLFlow - ClearML +- DVCLive To use any of them, pass in the selected type(s) to the `log_with` parameter in [`Accelerate`]: ```python diff --git a/examples/by_feature/deepspeed_with_config_support.py b/examples/by_feature/deepspeed_with_config_support.py index 15e810c4a2e..b5f122f3ad1 100755 --- a/examples/by_feature/deepspeed_with_config_support.py +++ b/examples/by_feature/deepspeed_with_config_support.py @@ -220,7 +220,7 @@ def parse_args(): default="all", help=( 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,' - ' `"wandb"` and `"comet_ml"`. Use `"all"` (default) to report to all integrations.' + ' `"wandb"`, `"comet_ml"`, and `"dvclive"`. Use `"all"` (default) to report to all integrations.' "Only applicable when `--with_tracking` is passed." ), ) diff --git a/examples/by_feature/megatron_lm_gpt_pretraining.py b/examples/by_feature/megatron_lm_gpt_pretraining.py index 3c048b2600e..b0e1b33700f 100644 --- a/examples/by_feature/megatron_lm_gpt_pretraining.py +++ b/examples/by_feature/megatron_lm_gpt_pretraining.py @@ -216,7 +216,7 @@ def parse_args(): default="all", help=( 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,' - ' `"wandb"` and `"comet_ml"`. Use `"all"` (default) to report to all integrations.' + ' `"wandb"`, `"comet_ml"`, and `"dvclive"`. Use `"all"` (default) to report to all integrations.' "Only applicable when `--with_tracking` is passed." ), ) diff --git a/src/accelerate/test_utils/testing.py b/src/accelerate/test_utils/testing.py index d6d1e2f2f0a..8a8b82f4e34 100644 --- a/src/accelerate/test_utils/testing.py +++ b/src/accelerate/test_utils/testing.py @@ -35,6 +35,7 @@ is_comet_ml_available, is_datasets_available, is_deepspeed_available, + is_dvclive_available, is_mps_available, is_pandas_available, is_tensorboard_available, @@ -231,6 +232,13 @@ def require_clearml(test_case): return unittest.skipUnless(is_clearml_available(), "test requires clearml")(test_case) +def require_dvclive(test_case): + """ + Decorator marking a test that requires dvclive installed. These tests are skipped when dvclive isn't installed + """ + return unittest.skipUnless(is_dvclive_available(), "test requires dvclive")(test_case) + + def require_pandas(test_case): """ Decorator marking a test that requires pandas installed. These tests are skipped when pandas isn't installed diff --git a/src/accelerate/tracking.py b/src/accelerate/tracking.py index 4f536d57812..cfacbfb1161 100644 --- a/src/accelerate/tracking.py +++ b/src/accelerate/tracking.py @@ -30,6 +30,7 @@ is_aim_available, is_clearml_available, is_comet_ml_available, + is_dvclive_available, is_mlflow_available, is_tensorboard_available, is_wandb_available, @@ -57,6 +58,9 @@ if is_clearml_available(): _available_trackers.append(LoggerType.CLEARML) +if is_dvclive_available(): + _available_trackers.append(LoggerType.DVCLIVE) + logger = get_logger(__name__) @@ -837,6 +841,70 @@ def _get_title_series(name): return name, "train" +class DVCLiveTracker(GeneralTracker): + """ + A `Tracker` class that supports `dvclive`. Should be initialized at the start of your script. + + Args: + run_name (`str`): + Ignored for dvclive. See `kwargs` instead. + kwargs: + Additional key word arguments passed along to `dvclive.Live()`. + """ + + name = "dvclive" + requires_logging_directory = False + + @on_main_process + def __init__(self, run_name: Optional[str] = None, live: Optional[Any] = None, **kwargs): + from dvclive import Live + + super().__init__() + self.live = live if live is not None else Live(**kwargs) + + @property + def tracker(self): + return self.live + + @on_main_process + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment. Stores the + hyperparameters in a yaml file for future use. + + Args: + values (Dictionary `str` to `bool`, `str`, `float`, `int`, or a List or Dict of those types): + Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`, + `str`, `float`, or `int`. + """ + self.live.log_params(values) + + @on_main_process + def log(self, values: dict, step: Optional[int] = None, **kwargs): + """ + Logs `values` to the current run. + + Args: + values (Dictionary `str` to `str`, `float`, or `int`): + Values to be logged as key-value pairs. The values need to have type `str`, `float`, or `int`. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + kwargs: + Additional key word arguments passed along to `dvclive.Live.log_metric()`. + """ + if step: + self.live.step = step + for k, v in values.items(): + self.live.log_metric(k, v, **kwargs) + + @on_main_process + def finish(self): + """ + Closes `dvclive.Live()`. + """ + self.live.end() + + LOGGER_TYPE_TO_CLASS = { "aim": AimTracker, "comet_ml": CometMLTracker, @@ -844,6 +912,7 @@ def _get_title_series(name): "tensorboard": TensorBoardTracker, "wandb": WandBTracker, "clearml": ClearMLTracker, + "dvclive": DVCLiveTracker, } @@ -866,6 +935,7 @@ def filter_trackers( - `"wandb"` - `"comet_ml"` - `"mlflow"` + - `"dvclive"` If `"all"` is selected, will pick up all available trackers in the environment and initialize them. Can also accept implementations of `GeneralTracker` for custom trackers, and can be combined with `"all"`. logging_dir (`str`, `os.PathLike`, *optional*): diff --git a/src/accelerate/utils/__init__.py b/src/accelerate/utils/__init__.py index 96e3fe61035..e3f4c8997d6 100644 --- a/src/accelerate/utils/__init__.py +++ b/src/accelerate/utils/__init__.py @@ -52,6 +52,7 @@ is_cuda_available, is_datasets_available, is_deepspeed_available, + is_dvclive_available, is_fp8_available, is_ipex_available, is_megatron_lm_available, diff --git a/src/accelerate/utils/dataclasses.py b/src/accelerate/utils/dataclasses.py index 72f3c9aeb2d..e0e41568b0c 100644 --- a/src/accelerate/utils/dataclasses.py +++ b/src/accelerate/utils/dataclasses.py @@ -340,6 +340,7 @@ class LoggerType(BaseEnum): - **TENSORBOARD** -- TensorBoard as an experiment tracker - **WANDB** -- wandb as an experiment tracker - **COMETML** -- comet_ml as an experiment tracker + - **DVCLIVE** -- dvclive as an experiment tracker """ ALL = "all" @@ -349,6 +350,7 @@ class LoggerType(BaseEnum): COMETML = "comet_ml" MLFLOW = "mlflow" CLEARML = "clearml" + DVCLIVE = "dvclive" class PrecisionType(BaseEnum): diff --git a/src/accelerate/utils/imports.py b/src/accelerate/utils/imports.py index 9a60233c96c..27389eab107 100644 --- a/src/accelerate/utils/imports.py +++ b/src/accelerate/utils/imports.py @@ -297,3 +297,7 @@ def is_xpu_available(check_device=False): except RuntimeError: return False return hasattr(torch, "xpu") and torch.xpu.is_available() + + +def is_dvclive_available(): + return _is_package_available("dvclive") diff --git a/tests/test_tracking.py b/tests/test_tracking.py index 545b51fefd4..73a5049c81e 100644 --- a/tests/test_tracking.py +++ b/tests/test_tracking.py @@ -35,13 +35,19 @@ TempDirTestCase, require_clearml, require_comet_ml, + require_dvclive, require_pandas, require_tensorboard, require_wandb, skip, ) from accelerate.tracking import CometMLTracker, GeneralTracker -from accelerate.utils import ProjectConfiguration, is_comet_ml_available, is_tensorboard_available +from accelerate.utils import ( + ProjectConfiguration, + is_comet_ml_available, + is_dvclive_available, + is_tensorboard_available, +) if is_comet_ml_available(): @@ -52,6 +58,11 @@ import tensorboard.compat.proto.event_pb2 as event_pb2 +if is_dvclive_available(): + from dvclive.plots.metric import Metric + from dvclive.serialize import load_yaml + from dvclive.utils import parse_metrics + logger = logging.getLogger(__name__) @@ -473,3 +484,44 @@ def test_log(self): "some_string": "", } self.assertDictEqual(data, truth) + + +@require_dvclive +class DVCLiveTrackingTest(unittest.TestCase): + def test_init_trackers(self): + with mock.patch("dvclive.live.get_dvc_repo") as repo_mock: + repo_mock.return_value = None + project_name = "test_project_with_config" + with tempfile.TemporaryDirectory() as dirpath: + accelerator = Accelerator(log_with="dvclive") + config = { + "num_iterations": 12, + "learning_rate": 1e-2, + "some_boolean": False, + "some_string": "some_value", + } + init_kwargs = {"dvclive": {"dir": dirpath, "save_dvc_exp": False, "dvcyaml": None}} + accelerator.init_trackers(project_name, config, init_kwargs) + accelerator.end_training() + live = accelerator.trackers[0].live + params = load_yaml(live.params_file) + assert params == config + + def test_log(self): + with mock.patch("dvclive.live.get_dvc_repo") as repo_mock: + repo_mock.return_value = None + project_name = "test_project_with_log" + with tempfile.TemporaryDirectory() as dirpath: + accelerator = Accelerator(log_with="dvclive", project_dir=dirpath) + init_kwargs = {"dvclive": {"dir": dirpath, "save_dvc_exp": False, "dvcyaml": None}} + accelerator.init_trackers(project_name, init_kwargs=init_kwargs) + values = {"total_loss": 0.1, "iteration": 1, "my_text": "some_value"} + accelerator.log(values, step=0) + accelerator.end_training() + live = accelerator.trackers[0].live + logs, latest = parse_metrics(live) + assert latest == values + scalars = os.path.join(live.plots_dir, Metric.subfolder) + assert os.path.join(scalars, "total_loss.tsv") in logs + assert os.path.join(scalars, "iteration.tsv") in logs + assert os.path.join(scalars, "my_text.tsv") in logs