iterative · AlexandreKempf · Feb 8, 2024 · Feb 2, 2024 · Feb 5, 2024 · Feb 6, 2024
diff --git a/src/dvclive/live.py b/src/dvclive/live.py
@@ -71,11 +71,49 @@ def __init__(
         resume: bool = False,
         report: Optional[str] = None,
         save_dvc_exp: bool = True,
-        dvcyaml: Union[str, bool] = "dvc.yaml",
+        dvcyaml: Union[str, None] = "dvc.yaml",
         cache_images: bool = False,
         exp_name: Optional[str] = None,
         exp_message: Optional[str] = None,
     ):
+        """
+        Initializes a DVCLive logger. A `Live()` instance is required in order to log
+        machine learning parameters, metrics and other metadata.
+        Warning: `Live()` will remove all existing DVCLive related files under dir
+        unless `resume=True`.
+
+        Args:
+            dir (str | Path): where to save DVCLive's outputs. Defaults to `"dvclive"`.
+            resume (bool): if `True`, DVCLive will try to read the previous step from
+                the metrics_file and start from that point. Defaults to `False`.
+            report ("html", "md", "notebook", None): any of `"html"`, `"notebook"`,
+                `"md"` or `None`. See `Live.make_report()`. Defaults to None.
+            save_dvc_exp (bool): if `True`, DVCLive will create a new DVC experiment as
+                part of `Live.end()`. Defaults to `True`. If you are using DVCLive
+                inside a DVC Pipeline and running with `dvc exp run`, the option will be
+                ignored.
+            dvcyaml (str | None): where to write dvc.yaml file, which adds DVC
+                configuration for metrics, plots, and parameters as part of
+                `Live.next_step()` and `Live.end()`. If `None`, no dvc.yaml file is
+                written. Defaults to `"dvc.yaml"`. See `Live.make_dvcyaml()`.
+                If a string like `"subdir/dvc.yaml"`, DVCLive will write the
+                configuration to that path (file must be named "dvc.yaml").
+                If `False`, DVCLive will not write to "dvc.yaml" (useful if you are
+                tracking DVCLive metrics, plots, and parameters independently and
+                want to avoid duplication).
+            cache_images (bool): if `True`, DVCLive will cache any images logged with
+                `Live.log_image()` as part of `Live.end()`. Defaults to `False`.
+                If running a DVC pipeline, `cache_images` will be ignored, and you
+                should instead cache images as pipeline outputs.
+            exp_name (str | None): if not `None`, and `save_dvc_exp` is `True`, the
+                provided string will be passed to `dvc exp save --name`.
+                If DVCLive is used inside `dvc exp run`, the option will be ignored, use
+                `dvc exp run --name` instead.
+            exp_message (str | None): if not `None`, and `save_dvc_exp` is `True`, the
+                provided string will be passed to `dvc exp save --message`.
+                If DVCLive is used inside `dvc exp run`, the option will be ignored, use
+                `dvc exp run --message` instead.
+        """
         self.summary: Dict[str, Any] = {}
 
         self._dir: str = dir
@@ -280,10 +318,11 @@ def _init_report(self):
 
     def _init_test(self):
         """
-        Enables test mode that writes to temp paths and doesn't depend on repo.
+        Enables a test mode that writes to temporary paths and doesn't depend on the
+        repository.
 
-        Needed to run integration tests in external libraries like huggingface
-        accelerate.
+        This is needed to run integration tests in external libraries, such as
+        HuggingFace Accelerate.
         """
         with tempfile.TemporaryDirectory() as dirpath:
             self._dir = os.path.join(dirpath, self._dir)
@@ -297,6 +336,7 @@ def _init_test(self):
 
     @property
     def dir(self) -> str:  # noqa: A003
+        """Location of the directory to store outputs."""
         return self._dir
 
     @property
@@ -309,6 +349,7 @@ def metrics_file(self) -> str:
 
     @property
     def dvc_file(self) -> str:
+        """Path for dvc.yaml file."""
         return self._dvc_file
 
     @property
@@ -346,6 +387,14 @@ def sync(self):
         self.post_to_studio("data")
 
     def next_step(self):
+        """
+        Signals that the current iteration has ended and increases step value by one.
+        DVCLive uses `step` to track the history of the metrics logged with
+        `Live.log_metric()`.
+        You can use `Live.next_step()` to increase the step by one. In addition to
+        increasing the `step` number, it will call `Live.make_report()`,
+        `Live.make_dvcyaml()`, and `Live.make_summary()` by default.
+        """
         if self._step is None:
             self._step = 0
 
@@ -360,6 +409,28 @@ def log_metric(
         timestamp: bool = False,
         plot: bool = True,
     ):
+        """
+        On each `Live.log_metric(name, val)` call `DVCLive` will create a metrics
+        history file in `{Live.plots_dir}/metrics/{name}.tsv`. Each subsequent call to
+        `Live.log_metric(name, val)` will add a new row to
+        `{Live.plots_dir}/metrics/{name}.tsv`. In addition, `DVCLive` will store the
+        latest value logged in `Live.summary`, so it can be serialized with calls to
+        `live.make_summary()`, `live.next_step()` or when exiting the `Live` context
+        block.
+
+        Args:
+            name (str): name of the metric being logged.
+            val (int | float | str): the value to be logged.
+            timestamp (bool): whether to automatically log timestamp in the metrics
+                history file.
+            plot (bool): whether to add the metric value to the metrics history file for
+                plotting. If `False`, the metric will only be saved to the metrics
+                summary.
+
+        Raises:
+            `InvalidDataTypeError`: thrown if the provided `val` does not have a
+                supported type.
+        """
         if not Metric.could_log(val):
             raise InvalidDataTypeError(name, type(val))
 
@@ -380,6 +451,36 @@ def log_metric(
         logger.debug(f"Logged {name}: {val}")
 
     def log_image(self, name: str, val):
+        """
+        Saves the given image `val` to the output file `name`.
+
+        Supported values for val are:
+        - A valid NumPy array (convertible to an image via `PIL.Image.fromarray`)
+        - A `matplotlib.figure.Figure` instance
+        - A `PIL.Image` instance
+        - A path to an image file (`str` or `Path`). It should be in a format that is
+        readable by `PIL.Image.open()`
+
+        The images will be saved in `{Live.plots_dir}/images/{name}`. When using
+        `Live(cache_images=True)`, the images directory will also be cached as part of
+        `Live.end()`. In that case, a `.dvc` file will be saved to track it, and the
+        directory will be added to a `.gitignore` file to prevent Git tracking.
+
+        By default the images will be overwritten on each step. However, you can log
+        images using the following pattern
+        `live.log_image(f"folder/{live.step}.png", img)`.
+        In `DVC Studio` and the `DVC Extension for VSCode`, folders following this
+        pattern will be rendered using an image slider.
+
+        Args:
+            name (str): name of the image file that this command will output
+            val (np.ndarray | matplotlib.figure.Figure | PIL.Image | StrPath):
+                image to be saved. See the list of supported values in the description.
+
+        Raises:
+            `InvalidDataTypeError`: thrown if the provided `val` does not have a
+                supported type.
+        """
         if not Image.could_log(val):
             raise InvalidDataTypeError(name, type(val))
 
@@ -409,6 +510,31 @@ def log_plot(
         x_label: Optional[str] = None,
         y_label: Optional[str] = None,
     ):
+        """
+        The method will dump the provided datapoints to
+        `{Live.dir}/plots/custom/{name}.json`and store the provided properties to be
+        included in the plots section written by `Live.make_dvcyaml()`. The plot can be
+        rendered with `DVC CLI`, `VSCode Extension` or `DVC Studio`.
+
+
+
+        Args:
+            name (StrPath): name of the output file.
+            datapoints (pd.DataFrame | np.ndarray | List[Dict]): Pandas DataFrame, Numpy
+                Array or List of dictionaries containing the data for the plot.
+            x (str): name of the key (present in the dictionaries) to use as the x axis.
+            y (str): name of the key (present in the dictionaries) to use the y axis.
+            template (str): name of the `DVC plots template` to use. Defaults to
+                `"linear"`.
+            title (str): title to be displayed. Defaults to
+                `"{Live.dir}/plots/custom/{name}.json"`.
+            x_label (str): label for the x axis. Defaults to the name passed as `x`.
+            y_label (str): label for the y axis. Defaults to the name passed as `y`.
+
+        Raises:
+            `InvalidDataTypeError`: thrown if the provided `datapoints` does not have a
+                supported type.
+        """
         # Convert the given datapoints to List[Dict]
         datapoints = convert_datapoints_to_list_of_dicts(datapoints=datapoints)
 
@@ -435,6 +561,30 @@ def log_plot(
         logger.debug(f"Logged {name}")
 
     def log_sklearn_plot(self, kind, labels, predictions, name=None, **kwargs):
+        """
+        Generates a scikit learn plot and saves the data in
+        `{Live.dir}/plots/sklearn/{name}.json`. The method will compute and dump the
+        `kind` plot to `{Live.dir}/plots/sklearn/{name}` in a format compatible with
+        dvc plots. It will also store the provided properties to be included in the
+        plots section written by `Live.make_dvcyaml()`.
+
+        Args:
+            kind ("calibration" | "confusion_matrix" | "det" | "precision_recall" |
+                "roc"): a supported plot type.
+            labels (List | np.ndarray): array of ground truth labels.
+            predictions (List | np.ndarray): array of predicted labels (for
+            `"confusion_matrix"`) or predicted probabilities (for other plots).
+            name (str): optional name of the output file. If not provided, `kind` will
+            be used as name.
+            kwargs: additional arguments to tune the result. Arguments are passed to the
+                scikit-learn function (e.g. `drop_intermediate=True` for the `"roc"`
+                type). Plus extra arguments supported by the type of a plot are:
+                 - `normalized`: default to `False`. `confusion_matrix` with values
+                    normalized to `<0, 1>` range.
+        Raises:
+            InvalidPlotTypeError: thrown if the provided `kind` does not correspond to
+                any of the supported plots.
+        """
         val = (labels, predictions)
 
         plot_config = {
@@ -470,13 +620,39 @@ def _dump_params(self):
             raise InvalidParameterTypeError(exc.args[0]) from exc
 
     def log_params(self, params: Dict[str, ParamLike]):
-        """Saves the given set of parameters (dict) to yaml"""
+        """
+        On each `Live.log_params(params)` call, DVCLive will write keys/values pairs in
+        the params dict to `{Live.dir}/params.yaml`:
+
+        Also see `Live.log_param()`.
+
+        Args:
+            params (Dict[str, ParamLike]): dictionary with name/value pairs of
+                parameters to be logged.
+
+        Raises:
+            `InvalidParameterTypeError`: thrown if the parameter value is not among
+                supported types.
+        """
         self._params.update(params)
         self._dump_params()
         logger.debug(f"Logged {params} parameters to {self.params_file}")
 
     def log_param(self, name: str, val: ParamLike):
-        """Saves the given parameter value to yaml"""
+        """
+        On each `Live.log_param(name, val)` call, DVCLive will write the name parameter
+        to `{Live.dir}/params.yaml` with the corresponding `val`.
+
+        Also see `Live.log_params()`.
+
+        Args:
+            name (str): name of the parameter being logged.
+            val (ParamLike): the value to be logged.
+
+        Raises:
+            `InvalidParameterTypeError`: thrown if the parameter value is not among
+                supported types.
+        """
         self.log_params({name: val})
 
     def log_artifact(
@@ -490,7 +666,46 @@ def log_artifact(
         copy: bool = False,
         cache: bool = True,
     ):
-        """Tracks a local file or directory with DVC"""
+        """
+        Tracks an existing directory or file with DVC.
+
+        Log path, saving its contents to DVC storage. Also annotate with any included
+        metadata fields (for example, to be consumed in the model registry or automation
+        scenarios).
+        If `cache=True` (which is the default), uses `dvc add` to track path with DVC,
+        saving it to the DVC cache and generating a `{path}.dvc` file that acts as a
+        pointer to the cached data.
+        If you include any of the optional metadata fields (type, name, desc, labels,
+        meta), it will add an artifact and all the metadata passed as arguments to the
+        corresponding `dvc.yaml` (unless `dvcyaml=None`). Passing `type="model"` will
+        include it in the model registry.
+
+        Args:
+            path (StrPath): an existing directory or file.
+            type (Optional[str]): an optional type of the artifact. Common types are
+                `"model"` or `"dataset"`.
+            name (Optional[str]): an optional custom name of an artifact.
+                If not provided the `path` stem (last part of the path without the
+                file extension) will be used as the artifact name.
+            desc (Optional[str]): an optional description of an artifact.
+            labels (Optional[List[str]]): optional labels describing the artifact.
+            meta (Optional[Dict[str, Any]]): optional metainformation in `key: value`
+                format.
+            copy (bool): copy a directory or file at path into the `dvclive/artifacts`
+                location (default) before tracking it. The new path is used instead of
+                the original one to track the artifact. Useful if you don't want to
+                track the original path in your repo (for example, it is outside the
+                repo or in a Git-ignored directory).
+            cache (bool): cache the files with DVC to track them outside of Git.
+                Defaults to `True`, but set to `False` if you want to annotate metadata
+                about the artifact without storing a copy in the DVC cache.
+                If running a DVC pipeline, `cache` will be ignored, and you should
+                instead cache artifacts as pipeline outputs.
+
+        Raises:
+            `InvalidDataTypeError`: thrown if the provided `path` does not have a
+                supported type.
+        """
         if not isinstance(path, (str, Path)):
             raise InvalidDataTypeError(path, builtins.type(path))
 
@@ -559,25 +774,66 @@ def cache(self, path):
             self._include_untracked.append(str(Path(dvc_file).parent / ".gitignore"))
 
     def make_summary(self):
+        """
+        Serializes a summary of the logged metrics (`Live.summary`) to
+        `Live.metrics_file`.
+
+        The `Live.summary` object will contain the latest value of each metric logged
+        with `Live.log_metric()`. It can be also modified manually.
+
+        `Live.next_step()` and `Live.end()` will call `Live.make_summary()` internally,
+        so you don't need to call both.
+
+        The summary is usable by `dvc metrics`.
+        """
         if self._step is not None:
             self.summary["step"] = self.step
         dump_json(self.summary, self.metrics_file, cls=NumpyEncoder)
 
     def make_report(self):
+        """
+        Generates a report from the logged data.
+
+        `Live.next_step()` and `Live.end()` will call `Live.make_report()` internally,
+        so you don't need to call both.
+
+        On each call, DVCLive will collect all the data logged in `{Live.dir}`, generate
+        a report and save it in `{Live.dir}/report.{format}`. The format can be HTML
+        or Markdown depending on the value of the `report` argument passed to `Live()`.
+        """
         if self._report_mode is not None:
             make_report(self)
             if self._report_mode == "html" and env2bool(env.DVCLIVE_OPEN):
                 open_file_in_browser(self.report_file)
 
     @catch_and_warn(DvcException, logger)
     def make_dvcyaml(self):
+        """
+        Writes DVC configuration for metrics, plots, and parameters to `Live.dvc_file`.
+
+        Creates `dvc.yaml`, which describes and configures metrics, plots, and
+        parameters. DVC tools use this file to show reports and experiments tables.
+        `Live.next_step()` and `Live.end()` will call `Live.make_dvcyaml()` internally,
+        so you don't need to call both (unless `dvcyaml=None`).
+        """
         make_dvcyaml(self)
 
     @catch_and_warn(DvcException, logger)
     def post_to_studio(self, event):
         post_to_studio(self, event)
 
     def end(self):
+        """
+        Signals that the current experiment has ended.
+        `Live.end()` gets automatically called when exiting the context manager. It is
+        also called when the training ends for each of the supported ML Frameworks
+
+        By default, `Live.end()` will call `Live.make_summary()`, `Live.make_dvcyaml()`,
+        and `Live.make_report()`.
+
+        If `save_dvc_exp=True`, it will save a new DVC experiment and write a `dvc.yaml`
+        file configuring what DVC will show for logged plots, metrics, and parameters.
+        """
         if self._inside_with:
             # Prevent `live.end` calls inside context manager
             return