Skip to content

Commit

Permalink
params/metrics: assume file when dir expansion raises error
Browse files Browse the repository at this point in the history
Closes #10018.

We have to be careful when expanding paths. This PR fall backs to assume it
as a file when either of `isdir` and `find` raises error due to some mishaps.
The consequence of this assumption happens when we try to read the file.
In that case, if the directory exists, and we happen to try to open it
as a file, dvc will raise `IsADirectoryError`, but that is handled correctly
and won't fail catastrophically like this did.
  • Loading branch information
skshetry committed Mar 27, 2024
1 parent 5379aa2 commit 1b874f6
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 17 deletions.
21 changes: 19 additions & 2 deletions dvc/repo/metrics/show.py
@@ -1,3 +1,4 @@
import logging
import os
from collections.abc import Iterable, Iterator
from itertools import chain
Expand All @@ -8,7 +9,7 @@

from dvc.log import logger
from dvc.scm import NoSCMError
from dvc.utils import as_posix, expand_paths
from dvc.utils import as_posix
from dvc.utils.collections import ensure_list
from dvc.utils.serialize import load_path

Expand Down Expand Up @@ -103,7 +104,7 @@ def _collect_metrics(
paths = (fs.from_os_path(metric) for metric in metrics)
# make paths absolute for DVCFileSystem
repo_paths = (f"{fs.root_marker}{path}" for path in paths)
return ldistinct(expand_paths(fs, repo_paths))
return ldistinct(try_expand_paths(fs, repo_paths))


class FileResult(TypedDict, total=False):
Expand All @@ -116,6 +117,22 @@ class Result(TypedDict, total=False):
error: Exception


def try_expand_paths(fs: "FileSystem", paths: Iterable[str]) -> Iterator[str]:
for path in paths:
try:
if fs.isdir(path):
yield from fs.find(path)
return
except Exception as e: # noqa: BLE001
logger.debug(
"failed to expand %r: %s",
path,
e,
exc_info=logger.isEnabledFor(logging.TRACE), # type: ignore[attr-defined]
)
yield path


def to_relpath(fs: "FileSystem", root_dir: str, d: Result) -> Result:
relpath = fs.relpath
cwd = fs.getcwd()
Expand Down
6 changes: 3 additions & 3 deletions dvc/repo/params/show.py
Expand Up @@ -6,9 +6,9 @@

from dvc.dependency.param import ParamsDependency, read_param_file
from dvc.log import logger
from dvc.repo.metrics.show import FileResult, Result
from dvc.repo.metrics.show import FileResult, Result, try_expand_paths
from dvc.stage import PipelineStage
from dvc.utils import as_posix, expand_paths
from dvc.utils import as_posix
from dvc.utils.collections import ensure_list

if TYPE_CHECKING:
Expand Down Expand Up @@ -77,7 +77,7 @@ def _collect_params(
path = fs.from_os_path(param)
# make paths absolute for DVCFileSystem
repo_path = f"{fs.root_marker}{path}"
ret.update(dict.fromkeys(expand_paths(fs, [repo_path]), _params))
ret.update(dict.fromkeys(try_expand_paths(fs, [repo_path]), _params))
return ret


Expand Down
11 changes: 0 additions & 11 deletions dvc/utils/__init__.py
Expand Up @@ -5,16 +5,13 @@
import os
import re
import sys
from collections.abc import Iterable, Iterator
from typing import TYPE_CHECKING, Optional

import colorama

if TYPE_CHECKING:
from typing import TextIO

from dvc.fs import FileSystem


LARGE_DIR_SIZE = 100
TARGET_REGEX = re.compile(r"(?P<path>.*?)(:(?P<name>[^\\/:]*))??$")
Expand Down Expand Up @@ -410,14 +407,6 @@ def errored_revisions(rev_data: dict) -> list:
return result


def expand_paths(fs: "FileSystem", paths: Iterable[str]) -> Iterator[str]:
for path in paths:
if fs.isdir(path):
yield from fs.find(path)
else:
yield path


def isatty(stream: "Optional[TextIO]") -> bool:
if stream is None:
return False
Expand Down
18 changes: 18 additions & 0 deletions tests/func/metrics/test_show.py
@@ -1,5 +1,6 @@
import json
import os
import shutil
from os.path import join

import pytest
Expand All @@ -9,8 +10,10 @@
from dvc.dvcfile import PROJECT_FILE
from dvc.exceptions import OverlappingOutputPathsError
from dvc.repo import Repo
from dvc.repo.metrics.show import FileResult, Result
from dvc.utils.fs import remove
from dvc.utils.serialize import JSONFileCorruptedError
from dvc_data.index import DataIndexDirError


def test_show_simple(tmp_dir, dvc, run_copy_metrics):
Expand Down Expand Up @@ -351,3 +354,18 @@ def test_top_level_parametrized(tmp_dir, dvc):
assert dvc.metrics.show() == {
"": {"data": {"metrics.yaml": {"data": {"foo": 3, "bar": 10}}}}
}


def test_metric_in_a_tracked_directory_with_missing_dir_file(M, tmp_dir, dvc):
tmp_dir.dvc_gen({"dir": {"file": "2"}})
(tmp_dir / "dvc.yaml").dump({"metrics": [join("dir", "file")]})
shutil.rmtree(tmp_dir / "dir") # remove workspace
dvc.cache.local.clear() # remove cache and .dir file

assert dvc.metrics.show() == {
"": Result(
data={
join("dir", "file"): FileResult(error=M.instance_of(DataIndexDirError)),
}
)
}
20 changes: 19 additions & 1 deletion tests/func/params/test_show.py
@@ -1,9 +1,12 @@
import shutil
from os.path import join

import pytest

from dvc.dvcfile import PROJECT_FILE
from dvc.repo import Repo
from dvc.repo.stage import PROJECT_FILE
from dvc.repo.metrics.show import FileResult, Result
from dvc_data.index import DataIndexDirError


def test_show_empty(dvc):
Expand Down Expand Up @@ -192,3 +195,18 @@ def test_top_level_parametrized(tmp_dir, dvc):
}
}
}


def test_param_in_a_tracked_directory_with_missing_dir_file(M, tmp_dir, dvc):
tmp_dir.dvc_gen({"dir": {"file": "2"}})
(tmp_dir / "dvc.yaml").dump({"params": [join("dir", "file")]})
shutil.rmtree(tmp_dir / "dir") # remove workspace
dvc.cache.local.clear() # remove cache and .dir file

assert dvc.params.show() == {
"": Result(
data={
join("dir", "file"): FileResult(error=M.instance_of(DataIndexDirError)),
}
)
}

0 comments on commit 1b874f6

Please sign in to comment.