Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
from dvc.external_repo import external_repo
from dvc.exceptions import NotDvcRepoError
from dvc.exceptions import OutputNotFoundError
from dvc.exceptions import NoOutputInExternalRepoError
from dvc.exceptions import PathMissingError
from dvc.utils.fs import fs_copy
from dvc.path_info import PathInfo
from dvc.scm import SCM


Expand Down Expand Up @@ -46,14 +48,31 @@ def _make_repo(self, **overrides):
with external_repo(**merge(self.def_repo, overrides)) as repo:
yield repo

def status(self):
with self._make_repo() as repo:
current = repo.find_out_by_relpath(self.def_path).info
def _get_checksum(self, updated=False):
rev_lock = None
if not updated:
rev_lock = self.def_repo.get(self.PARAM_REV_LOCK)

with self._make_repo(rev_lock=None) as repo:
updated = repo.find_out_by_relpath(self.def_path).info
try:
with self._make_repo(rev_lock=rev_lock) as repo:
return repo.find_out_by_relpath(self.def_path).info["md5"]
except (NotDvcRepoError, NoOutputInExternalRepoError):
# Fall through and clone
pass

repo_path = cached_clone(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Luckily we do have git + dvc clones refactor on our todo list 🙂Not a part of this PR or anything, it is just us messing up earlier. 😅

self.def_repo[self.PARAM_URL],
rev=rev_lock or self.def_repo.get(self.PARAM_REV),
)
path = PathInfo(os.path.join(repo_path, self.def_path))

return self.repo.cache.local.get_checksum(path)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We really shouldn't do this. If path happens to be a dir then it will add that dir listing to self.repo cache.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Suor It is not an issue really. Those are tiny files and will get cleaned up on gc.


def status(self):
current_checksum = self._get_checksum(updated=False)
updated_checksum = self._get_checksum(updated=True)

if current != updated:
if current_checksum != updated_checksum:
return {str(self): "update available"}

return {}
Expand Down
20 changes: 12 additions & 8 deletions dvc/repo/status.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,32 @@
logger = logging.getLogger(__name__)


def _local_status(self, targets=None, with_deps=False):
def _joint_status(stages):
status = {}

if targets:
stages = cat(self.collect(t, with_deps=with_deps) for t in targets)
else:
stages = self.collect(None, with_deps=with_deps)

Comment on lines -16 to -20
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess you've split these because of deep source, right? Nothing wrong with that, just asking 🙂

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's exactly it :)

for stage in stages:
if stage.locked:
if stage.locked and not stage.is_repo_import:
logger.warning(
"DVC-file '{path}' is locked. Its dependencies are"
" not going to be shown in the status output.".format(
path=stage.relpath
)
)

status.update(stage.status())
status.update(stage.status(check_updates=True))

return status


def _local_status(self, targets=None, with_deps=False):
if targets:
stages = cat(self.collect(t, with_deps=with_deps) for t in targets)
else:
stages = self.collect(None, with_deps=with_deps)

return _joint_status(stages)


def _cloud_status(
self,
targets=None,
Expand Down
6 changes: 4 additions & 2 deletions dvc/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1006,10 +1006,12 @@ def _status(entries):
return ret

@rwlocked(read=["deps", "outs"])
def status(self):
def status(self, check_updates=False):
ret = []

if not self.locked:
show_import = self.is_repo_import and check_updates

if not self.locked or show_import:
deps_status = self._status(self.deps)
if deps_status:
ret.append({"changed deps": deps_status})
Expand Down
73 changes: 73 additions & 0 deletions tests/func/test_status.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import os
import shutil

from mock import patch

from dvc.repo import Repo
from dvc.main import main
from dvc.compat import fspath
from dvc.external_repo import clean_repos
from tests.basic_env import TestDvc


Expand All @@ -23,3 +27,72 @@ def test_quiet(self):
def test_implied_cloud(self, mock_status):
main(["status", "--remote", "something"])
mock_status.assert_called()


def test_status_non_dvc_repo_import(tmp_dir, dvc, erepo_dir):
with erepo_dir.branch("branch", new=True), erepo_dir.chdir():
erepo_dir.scm.repo.index.remove([".dvc"], r=True)
shutil.rmtree(".dvc")
erepo_dir.scm_gen("file", "first version")
erepo_dir.scm.add(["file"])
erepo_dir.scm.commit("first version")

dvc.imp(fspath(erepo_dir), "file", "file", rev="branch")

status = dvc.status(["file.dvc"])

assert status == {}

# Caching in external repos doesn't see upstream updates within single
# cli call, so we need to clean the caches to see the changes.
clean_repos()

with erepo_dir.branch("branch", new=False), erepo_dir.chdir():
erepo_dir.scm_gen("file", "second_version", commit="update file")
erepo_dir.scm.add(["file"])
erepo_dir.scm.commit("first version")

status, = dvc.status(["file.dvc"])["file.dvc"]

assert status == {
"changed deps": {
"file ({})".format(fspath(erepo_dir)): "update available"
}
}


def test_status_before_and_after_dvc_init(tmp_dir, dvc, erepo_dir):
with erepo_dir.chdir():
erepo_dir.scm.repo.index.remove([".dvc"], r=True)
shutil.rmtree(".dvc")
erepo_dir.scm_gen("file", "first version")
erepo_dir.scm.add(["file"])
erepo_dir.scm.commit("first version")
old_rev = erepo_dir.scm.get_rev()

dvc.imp(fspath(erepo_dir), "file", "file")

assert dvc.status(["file.dvc"]) == {}

with erepo_dir.chdir():
Repo.init()
erepo_dir.scm.repo.index.remove(["file"])
os.remove("file")
erepo_dir.dvc_gen("file", "second version")
erepo_dir.scm.add([".dvc", "file.dvc"])
erepo_dir.scm.commit("version with dvc")
new_rev = erepo_dir.scm.get_rev()

assert old_rev != new_rev

# Caching in external repos doesn't see upstream updates within single
# cli call, so we need to clean the caches to see the changes.
clean_repos()

status, = dvc.status(["file.dvc"])["file.dvc"]

assert status == {
"changed deps": {
"file ({})".format(fspath(erepo_dir)): "update available"
}
}
101 changes: 101 additions & 0 deletions tests/func/test_update.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import pytest
import os
import shutil

from dvc.repo import Repo
from dvc.stage import Stage
from dvc.compat import fspath
from dvc.external_repo import clean_repos
Expand Down Expand Up @@ -36,8 +39,64 @@ def test_update_import(tmp_dir, dvc, erepo_dir, cached):
# cli call, so we need to clean the caches to see the changes.
clean_repos()

status, = dvc.status([stage.path])["version.dvc"]
changed_dep, = list(status["changed deps"].items())
assert changed_dep[0].startswith("version ")
assert changed_dep[1] == "update available"

dvc.update(stage.path)

assert dvc.status([stage.path]) == {}

assert imported.is_file()
assert imported.read_text() == "updated"

stage = Stage.load(dvc, stage.path)
assert stage.deps[0].def_repo == {
"url": fspath(erepo_dir),
"rev": "branch",
"rev_lock": new_rev,
}


def test_update_import_after_remote_updates_to_dvc(tmp_dir, dvc, erepo_dir):
old_rev = None
with erepo_dir.branch("branch", new=True), erepo_dir.chdir():
erepo_dir.scm_gen("version", "branch", commit="add version file")
old_rev = erepo_dir.scm.get_rev()

stage = dvc.imp(fspath(erepo_dir), "version", "version", rev="branch")

imported = tmp_dir / "version"
assert imported.is_file()
assert imported.read_text() == "branch"
assert stage.deps[0].def_repo == {
"url": fspath(erepo_dir),
"rev": "branch",
"rev_lock": old_rev,
}

new_rev = None
with erepo_dir.branch("branch", new=False), erepo_dir.chdir():
erepo_dir.scm.repo.index.remove("version")
erepo_dir.dvc_gen("version", "updated")
erepo_dir.scm.add(["version", "version.dvc"])
erepo_dir.scm.commit("upgrade to DVC tracking")
new_rev = erepo_dir.scm.get_rev()

assert old_rev != new_rev

# Caching in external repos doesn't see upstream updates within single
# cli call, so we need to clean the caches to see the changes.
clean_repos()

status, = dvc.status([stage.path])["version.dvc"]
changed_dep, = list(status["changed deps"].items())
assert changed_dep[0].startswith("version ")
assert changed_dep[1] == "update available"

dvc.update(stage.path)

assert dvc.status([stage.path]) == {}

assert imported.is_file()
Expand All @@ -51,6 +110,48 @@ def test_update_import(tmp_dir, dvc, erepo_dir, cached):
}


def test_update_before_and_after_dvc_init(tmp_dir, dvc, erepo_dir):
with erepo_dir.chdir():
erepo_dir.scm.repo.index.remove([".dvc"], r=True)
shutil.rmtree(".dvc")
erepo_dir.scm_gen("file", "first version")
erepo_dir.scm.add(["file"])
erepo_dir.scm.commit("first version")
old_rev = erepo_dir.scm.get_rev()

stage = dvc.imp(fspath(erepo_dir), "file", "file")

with erepo_dir.chdir():
Repo.init()
erepo_dir.scm.repo.index.remove(["file"])
os.remove("file")
erepo_dir.dvc_gen("file", "second version")
erepo_dir.scm.add([".dvc", "file.dvc"])
erepo_dir.scm.commit("version with dvc")
new_rev = erepo_dir.scm.get_rev()

assert old_rev != new_rev

# Caching in external repos doesn't see upstream updates within single
# cli call, so we need to clean the caches to see the changes.
clean_repos()

assert dvc.status([stage.path]) == {
"file.dvc": [
{
"changed deps": {
"file ({})".format(fspath(erepo_dir)): "update available"
}
}
]
}

dvc.update(stage.path)

assert (tmp_dir / "file").read_text() == "second version"
assert dvc.status([stage.path]) == {}


def test_update_import_url(tmp_dir, dvc, tmp_path_factory):
import_src = tmp_path_factory.mktemp("import_url_source")
src = import_src / "file"
Expand Down