Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,20 @@ def _ignore(self):

def check_modified_graph(self, new_stages):
"""Generate graph including the new stage to check for errors"""
self._collect_graph(self.stages + new_stages)
# Building graph might be costly for the ones with many DVC-files,
# so we provide this undocumented hack to skip it. See [1] for
# more details. The hack can be used as:
#
# repo = Repo(...)
# repo._skip_graph_checks = True
# repo.add(...)
#
# A user should care about not duplicating outs and not adding cycles,
# otherwise DVC might have an undefined behaviour.
#
# [1] https://github.com/iterative/dvc/issues/2671
if not getattr(self, "_skip_graph_checks", False):
self._collect_graph(self.stages + new_stages)

def collect(self, target, with_deps=False, recursive=False, graph=None):
import networkx as nx
Expand Down
27 changes: 27 additions & 0 deletions tests/unit/repo/test_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,30 @@ def test_collect_optimization(tmp_dir, dvc, mocker):
# Should read stage directly instead of collecting the whole graph
dvc.collect(stage.path)
dvc.collect_granular(stage.path)


def test_skip_graph_checks(tmp_dir, dvc, mocker, run_copy):
# See https://github.com/iterative/dvc/issues/2671 for more info
mock_collect_graph = mocker.patch("dvc.repo.Repo._collect_graph")

# sanity check
tmp_dir.gen("foo", "foo text")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also you have dvc_gen above. Not sure if it is intended.

dvc.add("foo")
run_copy("foo", "bar")
assert mock_collect_graph.called

# check that our hack can be enabled
mock_collect_graph.reset_mock()
dvc._skip_graph_checks = True
tmp_dir.gen("baz", "baz text")
dvc.add("baz")
run_copy("baz", "qux")
assert not mock_collect_graph.called

# check that our hack can be disabled
mock_collect_graph.reset_mock()
dvc._skip_graph_checks = False
tmp_dir.gen("quux", "quux text")
dvc.add("quux")
run_copy("quux", "quuz")
assert mock_collect_graph.called