From 88370e1b00dfb72750ee99a4c1bb8b0ef72a5739 Mon Sep 17 00:00:00 2001 From: Alexander Schepanovski Date: Mon, 9 Dec 2019 12:40:56 +0300 Subject: [PATCH 1/6] dvc: reorganize imports in dvc.remote.base --- dvc/remote/base.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 7b58542a2a..b1d839fc74 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -1,4 +1,5 @@ from __future__ import unicode_literals +from dvc.utils.compat import basestring, FileNotFoundError, str, urlparse import itertools import json @@ -14,22 +15,17 @@ import dvc.prompt as prompt from dvc.config import Config -from dvc.exceptions import ConfirmRemoveError -from dvc.exceptions import DvcException -from dvc.exceptions import DvcIgnoreInCollectedDirError +from dvc.exceptions import ( + DvcException, + ConfirmRemoveError, + DvcIgnoreInCollectedDirError, +) from dvc.ignore import DvcIgnore -from dvc.path_info import PathInfo -from dvc.path_info import URLInfo +from dvc.path_info import PathInfo, URLInfo from dvc.progress import Tqdm from dvc.remote.slow_link_detection import slow_link_guard from dvc.state import StateNoop -from dvc.utils import makedirs -from dvc.utils import relpath -from dvc.utils import tmp_fname -from dvc.utils.compat import basestring -from dvc.utils.compat import FileNotFoundError -from dvc.utils.compat import str -from dvc.utils.compat import urlparse +from dvc.utils import makedirs, relpath, tmp_fname from dvc.utils.fs import move from dvc.utils.http import open_url From 3b2666e46bb86090d4934ad2de6c75c0c03360b4 Mon Sep 17 00:00:00 2001 From: Alexander Schepanovski Date: Mon, 9 Dec 2019 12:45:56 +0300 Subject: [PATCH 2/6] remote: remove unused exception class --- dvc/remote/base.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index b1d839fc74..8c94f152fd 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -45,13 +45,6 @@ } -class DataCloudError(DvcException): - """ Data Cloud exception """ - - def __init__(self, msg): - super(DataCloudError, self).__init__("Data sync error: {}".format(msg)) - - class RemoteCmdError(DvcException): def __init__(self, remote, cmd, ret, err): super(RemoteCmdError, self).__init__( From 2f79e362f4ccdc51bc99fe40619776daaeab3991 Mon Sep 17 00:00:00 2001 From: Alexander Schepanovski Date: Mon, 9 Dec 2019 13:25:32 +0300 Subject: [PATCH 3/6] dvc: add checkout --relink --- dvc/command/checkout.py | 7 +++++ dvc/output/base.py | 5 +++- dvc/remote/base.py | 27 +++++++++++------ dvc/repo/checkout.py | 13 ++++++-- dvc/stage.py | 7 +++-- tests/func/test_checkout.py | 60 ++++++++++--------------------------- 6 files changed, 60 insertions(+), 59 deletions(-) diff --git a/dvc/command/checkout.py b/dvc/command/checkout.py index 2802b1b15a..de26541177 100644 --- a/dvc/command/checkout.py +++ b/dvc/command/checkout.py @@ -12,6 +12,7 @@ def run(self): targets=self.args.targets, with_deps=self.args.with_deps, force=self.args.force, + relink=self.args.relink, recursive=self.args.recursive, ) return 0 @@ -34,6 +35,12 @@ def add_parser(subparsers, parent_parser): default=False, help="Do not prompt when removing working directory files.", ) + checkout_parser.add_argument( + "--relink", + action="store_true", + default=False, + help="Relink all files to cache, including unchanged.", + ) checkout_parser.add_argument( "-d", "--with-deps", diff --git a/dvc/output/base.py b/dvc/output/base.py index 8344f1f3d7..ffb68ac6af 100644 --- a/dvc/output/base.py +++ b/dvc/output/base.py @@ -282,7 +282,9 @@ def verify_metric(self): def download(self, to): self.remote.download(self.path_info, to.path_info) - def checkout(self, force=False, progress_callback=None, tag=None): + def checkout( + self, force=False, progress_callback=None, tag=None, relink=False + ): if not self.use_cache: if progress_callback: progress_callback(str(self.path_info), self.get_files_number()) @@ -298,6 +300,7 @@ def checkout(self, force=False, progress_callback=None, tag=None): info, force=force, progress_callback=progress_callback, + relink=relink, ) def remove(self, ignore_remove=False): diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 8c94f152fd..f72e56127d 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -836,7 +836,7 @@ def makedirs(self, path_info): pass def _checkout_dir( - self, path_info, checksum, force, progress_callback=None + self, path_info, checksum, force, progress_callback=None, relink=False ): # Create dir separately so that dir is created # even if there are no files in it @@ -854,9 +854,8 @@ def _checkout_dir( entry_info = path_info / relative_path entry_checksum_info = {self.PARAM_CHECKSUM: entry_checksum} - if self.changed(entry_info, entry_checksum_info): - if self.exists(entry_info): - self.safe_remove(entry_info, force=force) + if relink or self.changed(entry_info, entry_checksum_info): + self.safe_remove(entry_info, force=force) self.link(entry_cache_info, entry_info) self.state.save(entry_info, entry_checksum) if progress_callback: @@ -878,7 +877,12 @@ def _remove_redundant_files(self, path_info, dir_info, force): self.safe_remove(path, force) def checkout( - self, path_info, checksum_info, force=False, progress_callback=None + self, + path_info, + checksum_info, + force=False, + progress_callback=None, + relink=False, ): if path_info.scheme not in ["local", self.scheme]: raise NotImplementedError @@ -894,7 +898,7 @@ def checkout( self.safe_remove(path_info, force=force) failed = path_info - elif not self.changed(path_info, checksum_info): + elif not relink and not self.changed(path_info, checksum_info): msg = "Data '{}' didn't change." logger.debug(msg.format(str(path_info))) skip = True @@ -915,18 +919,23 @@ def checkout( msg = "Checking out '{}' with cache '{}'." logger.debug(msg.format(str(path_info), checksum)) - self._checkout(path_info, checksum, force, progress_callback) + self._checkout(path_info, checksum, force, progress_callback, relink) return None def _checkout( - self, path_info, checksum, force=False, progress_callback=None + self, + path_info, + checksum, + force=False, + progress_callback=None, + relink=False, ): if not self.is_dir_checksum(checksum): return self._checkout_file( path_info, checksum, force, progress_callback=progress_callback ) return self._checkout_dir( - path_info, checksum, force, progress_callback=progress_callback + path_info, checksum, force, progress_callback, relink ) def get_files_number(self, checksum): diff --git a/dvc/repo/checkout.py b/dvc/repo/checkout.py index d4e4b805f9..040590d86c 100644 --- a/dvc/repo/checkout.py +++ b/dvc/repo/checkout.py @@ -25,7 +25,12 @@ def get_all_files_numbers(stages): def _checkout( - self, targets=None, with_deps=False, force=False, recursive=False + self, + targets=None, + with_deps=False, + force=False, + relink=False, + recursive=False, ): from dvc.stage import StageFileDoesNotExistError, StageFileBadNameError @@ -55,7 +60,11 @@ def _checkout( ) as pbar: for stage in stages: failed.extend( - stage.checkout(force=force, progress_callback=pbar.update_desc) + stage.checkout( + force=force, + progress_callback=pbar.update_desc, + relink=relink, + ) ) if failed: raise CheckoutError(failed) diff --git a/dvc/stage.py b/dvc/stage.py index c3c5daec9c..977347a0e0 100644 --- a/dvc/stage.py +++ b/dvc/stage.py @@ -904,11 +904,14 @@ def check_missing_outputs(self): if paths: raise MissingDataSource(paths) - def checkout(self, force=False, progress_callback=None): + def checkout(self, force=False, progress_callback=None, relink=False): failed_checkouts = [] for out in self.outs: failed = out.checkout( - force=force, tag=self.tag, progress_callback=progress_callback + force=force, + tag=self.tag, + progress_callback=progress_callback, + relink=relink, ) if failed: failed_checkouts.append(failed) diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index f2249bee14..5ff3b5f40c 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -26,7 +26,6 @@ from tests.basic_env import TestDvc from tests.basic_env import TestDvcGit from tests.func.test_repro import TestRepro -from tests.utils import spy logger = logging.getLogger("dvc") @@ -482,58 +481,29 @@ def test_checkout_no_checksum(repo_dir, dvc_repo): assert not os.path.exists(repo_dir.FOO) -@pytest.mark.skip @pytest.mark.parametrize( "link, link_test_func", [("hardlink", System.is_hardlink), ("symlink", System.is_symlink)], ) -def test_should_relink_on_checkout(link, link_test_func, repo_dir, dvc_repo): - dvc_repo.cache.local.cache_types = [link] +def test_checkout_relink(tmp_dir, dvc, link, link_test_func): + dvc.cache.local.cache_types = [link] - dvc_repo.add(repo_dir.DATA_DIR) - dvc_repo.unprotect(repo_dir.DATA_SUB) + tmp_dir.dvc_gen({"dir": {"data": "text"}}) + dvc.unprotect("dir/data") + assert not link_test_func("dir/data") - dvc_repo.checkout([repo_dir.DATA_DIR + Stage.STAGE_FILE_SUFFIX]) + dvc.checkout(["dir.dvc"], relink=True) + assert link_test_func("dir/data") - assert link_test_func(repo_dir.DATA_SUB) - -@pytest.mark.skip @pytest.mark.parametrize("link", ["hardlink", "symlink", "copy"]) -def test_should_protect_on_checkout(link, dvc_repo, repo_dir): - dvc_repo.cache.local.cache_types = [link] - dvc_repo.cache.local.protected = True - - dvc_repo.add(repo_dir.FOO) - dvc_repo.unprotect(repo_dir.FOO) - - dvc_repo.checkout([repo_dir.FOO + Stage.STAGE_FILE_SUFFIX]) - - assert not os.access(repo_dir.FOO, os.W_OK) - - -@pytest.mark.skip -def test_should_relink_only_one_file_in_dir(dvc_repo, repo_dir): - dvc_repo.cache.local.cache_types = ["symlink"] - - dvc_repo.add(repo_dir.DATA_DIR) - dvc_repo.unprotect(repo_dir.DATA_SUB) - - link_spy = spy(System.symlink) - with patch.object(dvc_repo.cache.local, "symlink", link_spy): - dvc_repo.checkout([repo_dir.DATA_DIR + Stage.STAGE_FILE_SUFFIX]) - - assert link_spy.mock.call_count == 1 - - -@pytest.mark.skip -@pytest.mark.parametrize("link", ["hardlink", "symlink", "copy"]) -def test_should_not_relink_on_unchanged_dependency(link, dvc_repo, repo_dir): - dvc_repo.cache.local.cache_types = [link] - - dvc_repo.add(repo_dir.DATA_DIR) +def test_checkout_relink_protected(tmp_dir, dvc, link): + dvc.cache.local.cache_types = [link] + dvc.cache.local.protected = True - with patch.object(dvc_repo.cache.local, "link") as mock_link: - dvc_repo.checkout([repo_dir.DATA_DIR + Stage.STAGE_FILE_SUFFIX]) + tmp_dir.dvc_gen("foo", "foo") + dvc.unprotect("foo") + assert os.access("foo", os.W_OK) - assert not mock_link.called + dvc.checkout(["foo.dvc"], relink=True) + assert not os.access("foo", os.W_OK) From 3610961144c4f258e0340bda847e3abc0db9ffd4 Mon Sep 17 00:00:00 2001 From: Alexander Schepanovski Date: Mon, 9 Dec 2019 13:46:10 +0300 Subject: [PATCH 4/6] dvc: warn to run `dvc checkout --relink` in cache.type change --- dvc/command/config.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dvc/command/config.py b/dvc/command/config.py index fb679e84a0..70b084441c 100644 --- a/dvc/command/config.py +++ b/dvc/command/config.py @@ -29,6 +29,15 @@ def run(self): section, opt, self.args.value, level=self.args.level ) + is_write = self.args.unset or self.args.value is not None + if is_write and self.args.name == "cache.type": + logger.warning( + "You have changed cache.type, this doesn't update link types " + "of any previously checked out files automatically. " + "That can be done with:\n" + " dvc checkout --relink" + ) + return 0 From 1143a8c57f7c61ebe5b4c57560c3ebf0ece703e9 Mon Sep 17 00:00:00 2001 From: Alexander Schepanovski Date: Thu, 12 Dec 2019 19:48:04 +0700 Subject: [PATCH 5/6] test: add checkout parse-cli-api test --- tests/unit/command/test_checkout.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/unit/command/test_checkout.py diff --git a/tests/unit/command/test_checkout.py b/tests/unit/command/test_checkout.py new file mode 100644 index 0000000000..21fc7920e4 --- /dev/null +++ b/tests/unit/command/test_checkout.py @@ -0,0 +1,21 @@ +from dvc.cli import parse_args +from dvc.command.checkout import CmdCheckout + + +def test_checkout(tmp_dir, dvc, mocker): + cli_args = parse_args( + ["checkout", "foo.dvc", "bar.dvc", "--relink", "--with-deps"] + ) + assert cli_args.func == CmdCheckout + + cmd = cli_args.func(cli_args) + m = mocker.patch("dvc.repo.Repo.checkout") + + assert cmd.run() == 0 + m.assert_called_once_with( + targets=["foo.dvc", "bar.dvc"], + force=False, + recursive=False, + relink=True, + with_deps=True, + ) From 5d1f01d58fb4cf06c94c9e3c25351af551745adc Mon Sep 17 00:00:00 2001 From: Alexander Schepanovski Date: Fri, 13 Dec 2019 19:00:06 +0700 Subject: [PATCH 6/6] checkout: iron out --relink related messages --- dvc/command/checkout.py | 2 +- dvc/command/config.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dvc/command/checkout.py b/dvc/command/checkout.py index de26541177..3fb2083348 100644 --- a/dvc/command/checkout.py +++ b/dvc/command/checkout.py @@ -39,7 +39,7 @@ def add_parser(subparsers, parent_parser): "--relink", action="store_true", default=False, - help="Relink all files to cache, including unchanged.", + help="Recreate links or copies from cache to workspace.", ) checkout_parser.add_argument( "-d", diff --git a/dvc/command/config.py b/dvc/command/config.py index 70b084441c..c8c80b7908 100644 --- a/dvc/command/config.py +++ b/dvc/command/config.py @@ -32,10 +32,9 @@ def run(self): is_write = self.args.unset or self.args.value is not None if is_write and self.args.name == "cache.type": logger.warning( - "You have changed cache.type, this doesn't update link types " - "of any previously checked out files automatically. " - "That can be done with:\n" - " dvc checkout --relink" + "You have changed the 'cache.type' option. This doesn't update" + " any existing workspace file links, but it can be done with:" + "\n dvc checkout --relink" ) return 0