Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions dvc/command/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def run(self):
recursive=self.args.recursive,
no_commit=self.args.no_commit,
fname=self.args.file,
external=self.args.external,
)

except DvcException:
Expand Down Expand Up @@ -49,6 +50,12 @@ def add_parser(subparsers, parent_parser):
default=False,
help="Don't put files/directories into cache.",
)
parser.add_argument(
"--external",
action="store_true",
default=False,
help="Allow targets that are outside of the DVC repository.",
)
parser.add_argument(
"-f",
"--file",
Expand Down
7 changes: 7 additions & 0 deletions dvc/command/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def run(self):
always_changed=self.args.always_changed,
name=self.args.name,
single_stage=self.args.single_stage,
external=self.args.external,
)
except DvcException:
logger.exception("")
Expand Down Expand Up @@ -225,6 +226,12 @@ def add_parser(subparsers, parent_parser):
default=False,
help=argparse.SUPPRESS,
)
run_parser.add_argument(
"--external",
action="store_true",
default=False,
help="Allow outputs that are outside of the DVC repository.",
)
run_parser.add_argument(
"command", nargs=argparse.REMAINDER, help="Command to execute."
)
Expand Down
19 changes: 15 additions & 4 deletions dvc/repo/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@

@locked
@scm_context
def add(repo, targets, recursive=False, no_commit=False, fname=None):
def add(
repo, targets, recursive=False, no_commit=False, fname=None, external=False
):
if recursive and fname:
raise RecursiveAddingWhileUsingFilename()

Expand Down Expand Up @@ -52,7 +54,9 @@ def add(repo, targets, recursive=False, no_commit=False, fname=None):
)
)

stages = _create_stages(repo, sub_targets, fname, pbar=pbar)
stages = _create_stages(
repo, sub_targets, fname, pbar=pbar, external=external
)

try:
repo.check_modified_graph(stages)
Expand Down Expand Up @@ -120,7 +124,7 @@ def _find_all_targets(repo, target, recursive):
return [target]


def _create_stages(repo, targets, fname, pbar=None):
def _create_stages(repo, targets, fname, pbar=None, external=False):
from dvc.stage import Stage, create_stage

stages = []
Expand All @@ -132,7 +136,14 @@ def _create_stages(repo, targets, fname, pbar=None):
unit="file",
):
path, wdir, out = resolve_paths(repo, out)
stage = create_stage(Stage, repo, fname or path, wdir=wdir, outs=[out])
stage = create_stage(
Stage,
repo,
fname or path,
wdir=wdir,
outs=[out],
external=external,
)
if stage:
Dvcfile(repo, stage.path).remove_with_prompt(force=True)

Expand Down
5 changes: 4 additions & 1 deletion dvc/stage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
check_circular_dependency,
check_duplicated_arguments,
check_missing_outputs,
check_no_externals,
check_stage_path,
compute_md5,
fill_stage_dependencies,
Expand Down Expand Up @@ -52,7 +53,7 @@ def loads_from(cls, repo, path, wdir, data):
return cls(**kw)


def create_stage(cls, repo, path, **kwargs):
def create_stage(cls, repo, path, external=False, **kwargs):
from dvc.dvcfile import check_dvc_filename

wdir = os.path.abspath(kwargs.get("wdir", None) or os.curdir)
Expand All @@ -63,6 +64,8 @@ def create_stage(cls, repo, path, **kwargs):

stage = loads_from(cls, repo, path, wdir, kwargs)
fill_stage_outputs(stage, **kwargs)
if not external:
check_no_externals(stage)
fill_stage_dependencies(
stage, **project(kwargs, ["deps", "erepo", "params"])
)
Expand Down
1 change: 1 addition & 0 deletions dvc/stage/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def _create_stage(self, cache, wdir=None):
params=params,
deps=[dep["path"] for dep in cache.get("deps", [])],
outs=[out["path"] for out in cache["outs"]],
external=True,
)
StageLoader.fill_from_lock(stage, cache)
return stage
Expand Down
4 changes: 4 additions & 0 deletions dvc/stage/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ class StageCommitError(DvcException):
pass


class StageExternalOutputsError(DvcException):
pass


class StageUpdateError(DvcException):
def __init__(self, path):
super().__init__(
Expand Down
28 changes: 27 additions & 1 deletion dvc/stage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from dvc.utils.fs import path_isin

from ..remote import LocalRemote, S3Remote
from ..utils import dict_md5, relpath
from ..utils import dict_md5, format_link, relpath
from .exceptions import (
MissingDataSource,
StageExternalOutputsError,
StagePathNotDirectoryError,
StagePathNotFoundError,
StagePathOutsideError,
Expand Down Expand Up @@ -68,6 +69,31 @@ def fill_stage_dependencies(stage, deps=None, erepo=None, params=None):
stage.deps += dependency.loads_params(stage, params or [])


def check_no_externals(stage):
from urllib.parse import urlparse

# NOTE: preventing users from accidentally using external outputs. See
# https://github.com/iterative/dvc/issues/1545 for more details.

def _is_external(out):
# NOTE: in case of `remote://` notation, the user clearly knows that
# this is an advanced feature and so we shouldn't error-out.
if out.is_in_repo or urlparse(out.def_path).scheme == "remote":
return False
return True

outs = [str(out) for out in stage.outs if _is_external(out)]
if not outs:
return

str_outs = ", ".join(outs)
link = format_link("https://dvc.org/doc/user-guide/managing-external-data")
raise StageExternalOutputsError(
f"Output(s) outside of DVC project: {str_outs}. "
f"See {link} for more info."
)


def check_circular_dependency(stage):
from dvc.exceptions import CircularDependencyError

Expand Down
4 changes: 2 additions & 2 deletions scripts/completion/dvc.bash
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ _dvc_commands='add cache checkout commit config dag destroy diff fetch freeze \
_dvc_options='-h --help -V --version'
_dvc_global_options='-h --help -q --quiet -v --verbose'

_dvc_add='-R --recursive -f --file --no-commit'
_dvc_add='-R --recursive -f --file --no-commit --external'
_dvc_add_COMPGEN=_dvc_compgen_files
_dvc_cache='dir'
_dvc_cache_dir=' --global --system --local -u --unset'
Expand Down Expand Up @@ -65,7 +65,7 @@ _dvc_remove_COMPGEN=_dvc_compgen_DVCFiles
_dvc_repro='-f --force -s --single-item -c --cwd -m --metrics --dry -i --interactive -p --pipeline -P --all-pipelines --no-run-cache --force-downstream --no-commit -R --recursive --downstream'
_dvc_repro_COMPGEN=_dvc_compgen_DVCFiles
_dvc_root=''
_dvc_run='--no-exec -f --file -d --deps -o --outs -O --outs-no-cache --outs-persist --outs-persist-no-cache -m --metrics -M --metrics-no-cache --overwrite-dvcfile --no-run-cache --no-commit -w --wdir'
_dvc_run='--no-exec -f --file -d --deps -o --outs -O --outs-no-cache --outs-persist --outs-persist-no-cache -m --metrics -M --metrics-no-cache --overwrite-dvcfile --no-run-cache --no-commit -w --wdir --external'
_dvc_run_COMPGEN=_dvc_compgen_DVCFiles
_dvc_status='-j --jobs -r --remote -a --all-branches -T --all-tags -d --with-deps -c --cloud'
_dvc_status_COMPGEN=_dvc_compgen_DVCFiles
Expand Down
2 changes: 2 additions & 0 deletions scripts/completion/dvc.zsh
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ _dvc_add=(
{-R,--recursive}"[Recursively add each file under the directory.]"
"--no-commit[Don't put files/directories into cache.]"
{-f,--file}"[Specify name of the DVC-file it generates.]:File:_files"
"--external[Allow targets that are outside of the DVC project.]"
"1:File:_files"
)

Expand Down Expand Up @@ -267,6 +268,7 @@ _dvc_run=(
"--no-commit[Don't put files/directories into cache.]"
"--outs-persist[Declare output file or directory that will not be removed upon repro.]:Output persistent:_files"
"--outs-persist-no-cache[Declare output file or directory that will not be removed upon repro (do not put into DVC cache).]:Output persistent regular:_files"
"--external[Allow outputs that are outside of the DVC project.]"
)

_dvc_status=(
Expand Down
7 changes: 6 additions & 1 deletion tests/func/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,16 @@ def test_add_file_in_dir(tmp_dir, dvc):

class TestAddExternalLocalFile(TestDvc):
def test(self):
from dvc.stage.exceptions import StageExternalOutputsError

dname = TestDvc.mkdtemp()
fname = os.path.join(dname, "foo")
shutil.copyfile(self.FOO, fname)

stages = self.dvc.add(fname)
with self.assertRaises(StageExternalOutputsError):
self.dvc.add(fname)

stages = self.dvc.add(fname, external=True)
self.assertEqual(len(stages), 1)
stage = stages[0]
self.assertNotEqual(stage, None)
Expand Down
4 changes: 2 additions & 2 deletions tests/func/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,15 @@ def test(self):
cwd = os.getcwd()
os.chdir(self.additional_path)
# ADD FILE ONLY IN SECOND PROJECT
fname = os.path.join(self.additional_path, "only_in_second")
fname = "only_in_second"
with open(fname, "w+") as fobj:
fobj.write("only in additional repo")

stages = self.additional_dvc.add(fname)
self.assertEqual(len(stages), 1)

# ADD FILE IN SECOND PROJECT THAT IS ALSO IN MAIN PROJECT
fname = os.path.join(self.additional_path, "in_both")
fname = "in_both"
with open(fname, "w+") as fobj:
fobj.write("in both repos")

Expand Down
3 changes: 2 additions & 1 deletion tests/func/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ def test_get_full_dvc_path(tmp_dir, erepo_dir, tmp_path_factory):
external_data.write_text("ext_data")

with erepo_dir.chdir():
erepo_dir.dvc_add(os.fspath(external_data), commit="add external data")
erepo_dir.dvc.add(os.fspath(external_data), external=True)
erepo_dir.scm_add("ext_data.dvc", commit="add external data")

Repo.get(
os.fspath(erepo_dir), os.fspath(external_data), "ext_data_imported"
Expand Down
1 change: 1 addition & 0 deletions tests/func/test_repro.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,7 @@ def test(self, mock_prompt):
deps=[out_foo_path],
cmd=self.cmd(foo_path, bar_path),
name="external-base",
external=True,
)

self.assertEqual(self.dvc.status([cmd_stage.addressing]), {})
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/command/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def test_run(mocker, dvc):
"file:param1,param2",
"--params",
"param3",
"--external",
"command",
]
)
Expand Down Expand Up @@ -70,6 +71,7 @@ def test_run(mocker, dvc):
cmd="command",
name="nam",
single_stage=False,
external=True,
)


Expand Down Expand Up @@ -99,6 +101,7 @@ def test_run_args_from_cli(mocker, dvc):
cmd="echo foo",
name=None,
single_stage=False,
external=False,
)


Expand Down Expand Up @@ -128,4 +131,5 @@ def test_run_args_with_spaces(mocker, dvc):
cmd='echo "foo bar"',
name=None,
single_stage=False,
external=False,
)
18 changes: 18 additions & 0 deletions tests/unit/stage/test_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,21 @@ def test_always_changed(dvc):
with dvc.lock:
assert stage.changed()
assert stage.status()["path"] == ["always changed"]


def test_external_outs(tmp_path_factory, dvc):
from dvc.stage import create_stage
from dvc.stage.exceptions import StageExternalOutputsError

tmp_path = tmp_path_factory.mktemp("external-outs")
foo = tmp_path / "foo"
foo.write_text("foo")

with pytest.raises(StageExternalOutputsError):
create_stage(Stage, dvc, "path.dvc", outs=[os.fspath(foo)])

with dvc.config.edit() as conf:
conf["remote"]["myremote"] = {"url": os.fspath(tmp_path)}

create_stage(Stage, dvc, "path.dvc", outs=["remote://myremote/foo"])
create_stage(Stage, dvc, "path.dvc", outs=[os.fspath(foo)], external=True)