diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index 3e1878f9c4..3bcfd3e65d 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -9,8 +9,10 @@ from dvc.external_repo import external_repo from dvc.exceptions import NotDvcRepoError from dvc.exceptions import OutputNotFoundError +from dvc.exceptions import NoOutputInExternalRepoError from dvc.exceptions import PathMissingError from dvc.utils.fs import fs_copy +from dvc.path_info import PathInfo from dvc.scm import SCM @@ -46,14 +48,31 @@ def _make_repo(self, **overrides): with external_repo(**merge(self.def_repo, overrides)) as repo: yield repo - def status(self): - with self._make_repo() as repo: - current = repo.find_out_by_relpath(self.def_path).info + def _get_checksum(self, updated=False): + rev_lock = None + if not updated: + rev_lock = self.def_repo.get(self.PARAM_REV_LOCK) - with self._make_repo(rev_lock=None) as repo: - updated = repo.find_out_by_relpath(self.def_path).info + try: + with self._make_repo(rev_lock=rev_lock) as repo: + return repo.find_out_by_relpath(self.def_path).info["md5"] + except (NotDvcRepoError, NoOutputInExternalRepoError): + # Fall through and clone + pass + + repo_path = cached_clone( + self.def_repo[self.PARAM_URL], + rev=rev_lock or self.def_repo.get(self.PARAM_REV), + ) + path = PathInfo(os.path.join(repo_path, self.def_path)) + + return self.repo.cache.local.get_checksum(path) + + def status(self): + current_checksum = self._get_checksum(updated=False) + updated_checksum = self._get_checksum(updated=True) - if current != updated: + if current_checksum != updated_checksum: return {str(self): "update available"} return {} diff --git a/dvc/repo/status.py b/dvc/repo/status.py index 2e9a7a9483..3528e6243d 100644 --- a/dvc/repo/status.py +++ b/dvc/repo/status.py @@ -10,16 +10,11 @@ logger = logging.getLogger(__name__) -def _local_status(self, targets=None, with_deps=False): +def _joint_status(stages): status = {} - if targets: - stages = cat(self.collect(t, with_deps=with_deps) for t in targets) - else: - stages = self.collect(None, with_deps=with_deps) - for stage in stages: - if stage.locked: + if stage.locked and not stage.is_repo_import: logger.warning( "DVC-file '{path}' is locked. Its dependencies are" " not going to be shown in the status output.".format( @@ -27,11 +22,20 @@ def _local_status(self, targets=None, with_deps=False): ) ) - status.update(stage.status()) + status.update(stage.status(check_updates=True)) return status +def _local_status(self, targets=None, with_deps=False): + if targets: + stages = cat(self.collect(t, with_deps=with_deps) for t in targets) + else: + stages = self.collect(None, with_deps=with_deps) + + return _joint_status(stages) + + def _cloud_status( self, targets=None, diff --git a/dvc/stage.py b/dvc/stage.py index 449cb261fd..28076bad6d 100644 --- a/dvc/stage.py +++ b/dvc/stage.py @@ -1006,10 +1006,12 @@ def _status(entries): return ret @rwlocked(read=["deps", "outs"]) - def status(self): + def status(self, check_updates=False): ret = [] - if not self.locked: + show_import = self.is_repo_import and check_updates + + if not self.locked or show_import: deps_status = self._status(self.deps) if deps_status: ret.append({"changed deps": deps_status}) diff --git a/tests/func/test_status.py b/tests/func/test_status.py index e4de038523..8b8d3d2ba9 100644 --- a/tests/func/test_status.py +++ b/tests/func/test_status.py @@ -1,8 +1,12 @@ import os +import shutil from mock import patch +from dvc.repo import Repo from dvc.main import main +from dvc.compat import fspath +from dvc.external_repo import clean_repos from tests.basic_env import TestDvc @@ -23,3 +27,72 @@ def test_quiet(self): def test_implied_cloud(self, mock_status): main(["status", "--remote", "something"]) mock_status.assert_called() + + +def test_status_non_dvc_repo_import(tmp_dir, dvc, erepo_dir): + with erepo_dir.branch("branch", new=True), erepo_dir.chdir(): + erepo_dir.scm.repo.index.remove([".dvc"], r=True) + shutil.rmtree(".dvc") + erepo_dir.scm_gen("file", "first version") + erepo_dir.scm.add(["file"]) + erepo_dir.scm.commit("first version") + + dvc.imp(fspath(erepo_dir), "file", "file", rev="branch") + + status = dvc.status(["file.dvc"]) + + assert status == {} + + # Caching in external repos doesn't see upstream updates within single + # cli call, so we need to clean the caches to see the changes. + clean_repos() + + with erepo_dir.branch("branch", new=False), erepo_dir.chdir(): + erepo_dir.scm_gen("file", "second_version", commit="update file") + erepo_dir.scm.add(["file"]) + erepo_dir.scm.commit("first version") + + status, = dvc.status(["file.dvc"])["file.dvc"] + + assert status == { + "changed deps": { + "file ({})".format(fspath(erepo_dir)): "update available" + } + } + + +def test_status_before_and_after_dvc_init(tmp_dir, dvc, erepo_dir): + with erepo_dir.chdir(): + erepo_dir.scm.repo.index.remove([".dvc"], r=True) + shutil.rmtree(".dvc") + erepo_dir.scm_gen("file", "first version") + erepo_dir.scm.add(["file"]) + erepo_dir.scm.commit("first version") + old_rev = erepo_dir.scm.get_rev() + + dvc.imp(fspath(erepo_dir), "file", "file") + + assert dvc.status(["file.dvc"]) == {} + + with erepo_dir.chdir(): + Repo.init() + erepo_dir.scm.repo.index.remove(["file"]) + os.remove("file") + erepo_dir.dvc_gen("file", "second version") + erepo_dir.scm.add([".dvc", "file.dvc"]) + erepo_dir.scm.commit("version with dvc") + new_rev = erepo_dir.scm.get_rev() + + assert old_rev != new_rev + + # Caching in external repos doesn't see upstream updates within single + # cli call, so we need to clean the caches to see the changes. + clean_repos() + + status, = dvc.status(["file.dvc"])["file.dvc"] + + assert status == { + "changed deps": { + "file ({})".format(fspath(erepo_dir)): "update available" + } + } diff --git a/tests/func/test_update.py b/tests/func/test_update.py index 5afbac0480..6f483fff0a 100644 --- a/tests/func/test_update.py +++ b/tests/func/test_update.py @@ -1,5 +1,8 @@ import pytest +import os +import shutil +from dvc.repo import Repo from dvc.stage import Stage from dvc.compat import fspath from dvc.external_repo import clean_repos @@ -36,8 +39,64 @@ def test_update_import(tmp_dir, dvc, erepo_dir, cached): # cli call, so we need to clean the caches to see the changes. clean_repos() + status, = dvc.status([stage.path])["version.dvc"] + changed_dep, = list(status["changed deps"].items()) + assert changed_dep[0].startswith("version ") + assert changed_dep[1] == "update available" + + dvc.update(stage.path) + assert dvc.status([stage.path]) == {} + + assert imported.is_file() + assert imported.read_text() == "updated" + + stage = Stage.load(dvc, stage.path) + assert stage.deps[0].def_repo == { + "url": fspath(erepo_dir), + "rev": "branch", + "rev_lock": new_rev, + } + + +def test_update_import_after_remote_updates_to_dvc(tmp_dir, dvc, erepo_dir): + old_rev = None + with erepo_dir.branch("branch", new=True), erepo_dir.chdir(): + erepo_dir.scm_gen("version", "branch", commit="add version file") + old_rev = erepo_dir.scm.get_rev() + + stage = dvc.imp(fspath(erepo_dir), "version", "version", rev="branch") + + imported = tmp_dir / "version" + assert imported.is_file() + assert imported.read_text() == "branch" + assert stage.deps[0].def_repo == { + "url": fspath(erepo_dir), + "rev": "branch", + "rev_lock": old_rev, + } + + new_rev = None + with erepo_dir.branch("branch", new=False), erepo_dir.chdir(): + erepo_dir.scm.repo.index.remove("version") + erepo_dir.dvc_gen("version", "updated") + erepo_dir.scm.add(["version", "version.dvc"]) + erepo_dir.scm.commit("upgrade to DVC tracking") + new_rev = erepo_dir.scm.get_rev() + + assert old_rev != new_rev + + # Caching in external repos doesn't see upstream updates within single + # cli call, so we need to clean the caches to see the changes. + clean_repos() + + status, = dvc.status([stage.path])["version.dvc"] + changed_dep, = list(status["changed deps"].items()) + assert changed_dep[0].startswith("version ") + assert changed_dep[1] == "update available" + dvc.update(stage.path) + assert dvc.status([stage.path]) == {} assert imported.is_file() @@ -51,6 +110,48 @@ def test_update_import(tmp_dir, dvc, erepo_dir, cached): } +def test_update_before_and_after_dvc_init(tmp_dir, dvc, erepo_dir): + with erepo_dir.chdir(): + erepo_dir.scm.repo.index.remove([".dvc"], r=True) + shutil.rmtree(".dvc") + erepo_dir.scm_gen("file", "first version") + erepo_dir.scm.add(["file"]) + erepo_dir.scm.commit("first version") + old_rev = erepo_dir.scm.get_rev() + + stage = dvc.imp(fspath(erepo_dir), "file", "file") + + with erepo_dir.chdir(): + Repo.init() + erepo_dir.scm.repo.index.remove(["file"]) + os.remove("file") + erepo_dir.dvc_gen("file", "second version") + erepo_dir.scm.add([".dvc", "file.dvc"]) + erepo_dir.scm.commit("version with dvc") + new_rev = erepo_dir.scm.get_rev() + + assert old_rev != new_rev + + # Caching in external repos doesn't see upstream updates within single + # cli call, so we need to clean the caches to see the changes. + clean_repos() + + assert dvc.status([stage.path]) == { + "file.dvc": [ + { + "changed deps": { + "file ({})".format(fspath(erepo_dir)): "update available" + } + } + ] + } + + dvc.update(stage.path) + + assert (tmp_dir / "file").read_text() == "second version" + assert dvc.status([stage.path]) == {} + + def test_update_import_url(tmp_dir, dvc, tmp_path_factory): import_src = tmp_path_factory.mktemp("import_url_source") src = import_src / "file"