Skip to content

Commit

Permalink
gc: make it safer by only activating by use of certain flags (#3363)
Browse files Browse the repository at this point in the history
* gc: make it safer by only activating by use of certain flags

Previous plain `gc` is now not supported. The same behavior can be trigger by
`gc -w` or `gc --workspace`. When `--all-{tags, commits, branches}` are
specified, it will work as if `-w` is also specified (same as before).

* tests: adjust gc to use workspace on test_repro
  • Loading branch information
skshetry committed Feb 21, 2020
1 parent bd33b1a commit 5101bf1
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 8 deletions.
18 changes: 18 additions & 0 deletions dvc/command/gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@

class CmdGC(CmdBase):
def run(self):
from dvc.repo.gc import _raise_error_if_all_disabled

_raise_error_if_all_disabled(
all_branches=self.args.all_branches,
all_tags=self.args.all_tags,
all_commits=self.args.all_commits,
workspace=self.args.workspace,
cloud=self.args.cloud,
)

msg = "This will remove all cache except items used in "

msg += "the working tree"
Expand Down Expand Up @@ -47,6 +57,7 @@ def run(self):
force=self.args.force,
jobs=self.args.jobs,
repos=self.args.repos,
workspace=self.args.workspace,
)
return 0

Expand All @@ -64,6 +75,13 @@ def add_parser(subparsers, parent_parser):
help=GC_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
gc_parser.add_argument(
"-w",
"--workspace",
action="store_true",
default=False,
help="Keep data files used in the current workspace.",
)
gc_parser.add_argument(
"-a",
"--all-branches",
Expand Down
4 changes: 4 additions & 0 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ class DvcException(Exception):
"""Base class for all dvc exceptions."""


class InvalidArgumentError(ValueError, DvcException):
"""Thrown if arguments are invalid."""


class OutputDuplicationError(DvcException):
"""Thrown if a file/directory is specified as an output in more than one
stage.
Expand Down
23 changes: 23 additions & 0 deletions dvc/repo/gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from . import locked
from dvc.cache import NamedCache
from dvc.exceptions import InvalidArgumentError


logger = logging.getLogger(__name__)
Expand All @@ -13,6 +14,15 @@ def _do_gc(typ, func, clist):
logger.info("No unused '{}' cache to remove.".format(typ))


def _raise_error_if_all_disabled(**kwargs):
if not any(kwargs.values()):
raise InvalidArgumentError(
"Invalid Arguments. Either of ({}) needs to be enabled.".format(
", ".join(kwargs.keys())
)
)


@locked
def gc(
self,
Expand All @@ -25,7 +35,20 @@ def gc(
force=False,
jobs=None,
repos=None,
workspace=False,
):

# require `workspace` to be true to come into effect.
# assume `workspace` to be enabled if any of `all_tags`, `all_commits`,
# or `all_branches` are enabled.
_raise_error_if_all_disabled(
workspace=workspace,
all_tags=all_tags,
all_commits=all_commits,
all_branches=all_branches,
cloud=cloud,
)

from contextlib import ExitStack
from dvc.repo import Repo

Expand Down
70 changes: 63 additions & 7 deletions tests/func/test_gc.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import logging
import os

import configobj
import pytest
from git import Repo

from dvc.compat import fspath
from dvc.exceptions import CollectCacheError
from dvc.main import main
from dvc.repo import Repo as DvcRepo
Expand All @@ -28,11 +30,11 @@ def setUp(self):
self.bad_cache.append(path)

def test_api(self):
self.dvc.gc()
self.dvc.gc(workspace=True)
self._test_gc()

def test_cli(self):
ret = main(["gc", "-f"])
ret = main(["gc", "-wf"])
self.assertEqual(ret, 0)
self._test_gc()

Expand Down Expand Up @@ -169,10 +171,10 @@ def test(self):

self._check_cache(3)

self.dvc.gc(repos=[self.additional_path])
self.dvc.gc(repos=[self.additional_path], workspace=True)
self._check_cache(3)

self.dvc.gc()
self.dvc.gc(workspace=True)
self._check_cache(2)


Expand All @@ -196,10 +198,10 @@ def test_gc_no_dir_cache(tmp_dir, dvc):
os.unlink(dir_stage.outs[0].cache_path)

with pytest.raises(CollectCacheError):
dvc.gc()
dvc.gc(workspace=True)

assert _count_files(dvc.cache.local.cache_dir) == 4
dvc.gc(force=True)
dvc.gc(force=True, workspace=True)
assert _count_files(dvc.cache.local.cache_dir) == 2


Expand All @@ -218,5 +220,59 @@ def test_gc_no_unpacked_dir(tmp_dir, dvc):

assert os.path.exists(unpackeddir)

dvc.gc(force=True)
dvc.gc(force=True, workspace=True)
assert not os.path.exists(unpackeddir)


def test_gc_without_workspace_raises_error(tmp_dir, dvc):
dvc.gc(force=True, workspace=True) # works without error

from dvc.exceptions import InvalidArgumentError

with pytest.raises(InvalidArgumentError):
dvc.gc(force=True)

with pytest.raises(InvalidArgumentError):
dvc.gc(force=True, workspace=False)


def test_gc_without_workspace_on_tags_branches_commits(tmp_dir, dvc):
dvc.gc(force=True, all_tags=True)
dvc.gc(force=True, all_commits=True)
dvc.gc(force=False, all_branches=True)

# even if workspace is disabled, and others are enabled, assume as if
# workspace is enabled.
dvc.gc(force=False, all_branches=True, all_commits=False, workspace=False)


def test_gc_without_workspace(tmp_dir, dvc, caplog):
with caplog.at_level(logging.WARNING, logger="dvc"):
assert main(["gc", "-vf"]) == 255

assert "Invalid Arguments" in caplog.text


def test_gc_with_possible_args_positive(tmp_dir, dvc):
for flag in [
"-w",
"-a",
"-T",
"--all-commits",
"-aT",
"-wa",
"-waT",
]:
assert main(["gc", "-vf", flag]) == 0


def test_gc_cloud_positive(tmp_dir, dvc, tmp_path_factory):
with dvc.config.edit() as conf:
storage = fspath(tmp_path_factory.mktemp("test_remote_base"))
conf["remote"]["local_remote"] = {"url": storage}
conf["core"]["remote"] = "local_remote"

dvc.push()

for flag in ["-c", "-ca", "-cT", "-caT", "-cwT"]:
assert main(["gc", "-vf", flag]) == 0
2 changes: 1 addition & 1 deletion tests/func/test_repro.py
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,7 @@ def test(self, mock_prompt):
self.assertEqual(self.dvc.status([cmd_stage.path]), {})

self.assertEqual(self.dvc.status(), {})
self.dvc.gc()
self.dvc.gc(workspace=True)
self.assertEqual(self.dvc.status(), {})

self.dvc.remove(cmd_stage.path, outs_only=True)
Expand Down

0 comments on commit 5101bf1

Please sign in to comment.