diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index e7db956463..e7f2b4dfd3 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -174,7 +174,20 @@ def _ignore(self): def check_modified_graph(self, new_stages): """Generate graph including the new stage to check for errors""" - self._collect_graph(self.stages + new_stages) + # Building graph might be costly for the ones with many DVC-files, + # so we provide this undocumented hack to skip it. See [1] for + # more details. The hack can be used as: + # + # repo = Repo(...) + # repo._skip_graph_checks = True + # repo.add(...) + # + # A user should care about not duplicating outs and not adding cycles, + # otherwise DVC might have an undefined behaviour. + # + # [1] https://github.com/iterative/dvc/issues/2671 + if not getattr(self, "_skip_graph_checks", False): + self._collect_graph(self.stages + new_stages) def collect(self, target, with_deps=False, recursive=False, graph=None): import networkx as nx diff --git a/tests/unit/repo/test_repo.py b/tests/unit/repo/test_repo.py index 4638ff0113..985c113ef1 100644 --- a/tests/unit/repo/test_repo.py +++ b/tests/unit/repo/test_repo.py @@ -82,3 +82,30 @@ def test_collect_optimization(tmp_dir, dvc, mocker): # Should read stage directly instead of collecting the whole graph dvc.collect(stage.path) dvc.collect_granular(stage.path) + + +def test_skip_graph_checks(tmp_dir, dvc, mocker, run_copy): + # See https://github.com/iterative/dvc/issues/2671 for more info + mock_collect_graph = mocker.patch("dvc.repo.Repo._collect_graph") + + # sanity check + tmp_dir.gen("foo", "foo text") + dvc.add("foo") + run_copy("foo", "bar") + assert mock_collect_graph.called + + # check that our hack can be enabled + mock_collect_graph.reset_mock() + dvc._skip_graph_checks = True + tmp_dir.gen("baz", "baz text") + dvc.add("baz") + run_copy("baz", "qux") + assert not mock_collect_graph.called + + # check that our hack can be disabled + mock_collect_graph.reset_mock() + dvc._skip_graph_checks = False + tmp_dir.gen("quux", "quux text") + dvc.add("quux") + run_copy("quux", "quuz") + assert mock_collect_graph.called