Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions dvclive/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,16 @@


def init(
path: str = None,
resume: bool = False,
step: int = 0,
summary: bool = True,
path: str = None, resume: bool = False, summary: bool = True,
) -> MetricLogger:
global _metric_logger # pylint: disable=global-statement
_metric_logger = MetricLogger(
path=path or MetricLogger.DEFAULT_DIR,
resume=resume,
step=step,
summary=summary,
path=path or MetricLogger.DEFAULT_DIR, resume=resume, summary=summary,
)
return _metric_logger


def log(name: str, val: Union[int, float], step: int = None) -> None:
global _metric_logger # pylint: disable=global-statement
def _lazy_init(_metric_logger):
if _metric_logger:
if not _metric_logger.matches_env_setup():
from .error import ConfigMismatchError
Expand All @@ -35,9 +28,21 @@ def log(name: str, val: Union[int, float], step: int = None) -> None:
if not _metric_logger:
_metric_logger = MetricLogger()

return _metric_logger


def log(name: str, val: Union[int, float], step: int = None) -> None:
global _metric_logger # pylint: disable=global-statement
_metric_logger = _lazy_init(_metric_logger)
_metric_logger.log(name=name, val=val, step=step)


def get_step() -> None:
global _metric_logger # pylint: disable=global-statement
_metric_logger = _lazy_init(_metric_logger)
return _metric_logger.step
Comment on lines +40 to +43
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method returns int.
Also, I think we shoul change MetricLogger's step property into get_step() method to maintain consistency with with API. @daavoo what do you think?

Copy link
Contributor Author

@daavoo daavoo Sep 6, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure, tbh. get_X method instead of @property kind of feels strange and but step doesn't sound good for a public method neither

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, my POV is that I presume that at some point one might want to, parallelize their code, and in that case do something like:
dvclive = MetricsLogger() in that case, dvclive.get_step stops working.
Now that I mention that, it would probably be good to mention that dvclive is not thread-safe, and one needs to initialize their own Loggers in case of parallel jobs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see your point.

However, assuming we are focusing on "integrations first" (iterative/example-repos-dev#77 (comment)), the parallelization would happen at the ML Framework level and most ML Frameworks already take care of properly calling the callbacks/loggers.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the downside to having the public API match MetricsLogger?

I don't think I understand the point about ML framework integrations. Even if ML frameworks spawn a separate process for each model training, dvclive would try to read/write using the same file by default, right? Users might need to specify a different path for each one, which isn't supported yet in the callbacks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would need to investigate each case but, at least in the Deep Learning Frameworks I'm familiar with, parallelism usually occurs:

  • At the Data Loader level
    Which doesn't affect DVCLive callbacks.

  • In Distributed training strategy
    Where ML Framework usually provide some decorator like rank_zero_only / master_only which is (should be) used in the DVCLive callback.



def next_step() -> None:
global _metric_logger # pylint: disable=global-statement
if not _metric_logger:
Expand Down
17 changes: 9 additions & 8 deletions dvclive/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,22 @@ def __init__(
self,
path: str = "dvclive",
resume: bool = False,
step: int = 0,
summary=True,
html=True,
checkpoint=False,
):
self._path: str = path
self._step: int = step
self._step: int = 0
self._html: bool = html
self._summary = summary
self._metrics: Dict[str, float] = OrderedDict()
self._checkpoint: bool = checkpoint

if resume and self.exists:
if step == 0:
self._step = self.read_step()
if self._step != 0:
self._step += 1
else:
self._step = step
self._step = self.read_step()
if self._step != 0:
self._step += 1

else:
self._cleanup()
os.makedirs(self.dir, exist_ok=True)
Expand Down Expand Up @@ -103,6 +100,10 @@ def summary_path(self):
def html_path(self):
return self.dir + ".html"

@property
def step(self):
return self._step

def next_step(self):
if self._summary:
metrics = OrderedDict({"step": self._step})
Expand Down
52 changes: 52 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,55 @@ def test_invalid_metric_type(tmp_dir, invalid_type):
def test_initialization_error(tmp_dir):
with pytest.raises(InitializationError):
dvclive.next_step()


def test_get_step_init(tmp_dir):
assert dvclive.get_step() == 0


def test_get_step_resume(tmp_dir):
dvclive.init("logs")

for metric in [0.9, 0.8]:
dvclive.log("metric", metric)
dvclive.next_step()

assert dvclive.get_step() == 2

dvclive.init("logs", resume=True)

assert dvclive.get_step() == 2
dvclive.init("logs", resume=False)
assert dvclive.get_step() == 0


def test_get_step_custom_steps(tmp_dir):
dvclive.init("logs")

steps = [0, 62, 1000]
metrics = [0.9, 0.8, 0.7]

for step, metric in zip(steps, metrics):
dvclive.log("x", metric, step=step)
assert dvclive.get_step() == step

dvclive.log("y", metric, step=step)
assert dvclive.get_step() == step

dvclive.log("z", metric)
assert dvclive.get_step() == step

for metric in ["x", "y", "z"]:
assert read_history("logs", "x") == (steps, metrics)


def test_get_step_control_flow(tmp_dir):
dvclive.init("logs")

while dvclive.get_step() < 10:
dvclive.log("i", dvclive.get_step())
dvclive.next_step()

steps, values = read_history("logs", "i")
assert steps == list(range(10))
assert values == [float(x) for x in range(10)]