Skip to content

Commit

Permalink
SCM: allow multiple DVC repos inside single SCM repo (#3257)
Browse files Browse the repository at this point in the history
  • Loading branch information
pared committed Feb 25, 2020
1 parent f673144 commit 8d04d6d
Show file tree
Hide file tree
Showing 13 changed files with 178 additions and 60 deletions.
14 changes: 13 additions & 1 deletion dvc/command/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ def run(self):

try:
self.repo = Repo.init(
".", no_scm=self.args.no_scm, force=self.args.force
".",
no_scm=self.args.no_scm,
force=self.args.force,
subdir=self.args.subdir,
)
self.config = self.repo.config
except InitError:
Expand Down Expand Up @@ -56,4 +59,13 @@ def add_parser(subparsers, parent_parser):
"This operation removes local cache."
),
)
init_parser.add_argument(
"--subdir",
action="store_true",
default=False,
help=(
"Necessary for running this command inside a subdirectory of a "
"parent SCM repository."
),
)
init_parser.set_defaults(func=CmdInit)
1 change: 1 addition & 0 deletions dvc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ class RelPath(str):
Optional("interactive", default=False): Bool,
Optional("analytics", default=True): Bool,
Optional("hardlink_lock", default=False): Bool,
Optional("no_scm", default=False): Bool,
},
"cache": {
"local": str,
Expand Down
17 changes: 16 additions & 1 deletion dvc/ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,25 @@ def __eq__(self, other):
return self.basenames == other.basenames


class DvcIgnoreRepo(DvcIgnore):
def __call__(self, root, dirs, files):
def is_dvc_repo(directory):
from dvc.repo import Repo

return os.path.isdir(os.path.join(directory, Repo.DVC_DIR))

dirs = [d for d in dirs if not is_dvc_repo(d)]

return dirs, files


class DvcIgnoreFilter(object):
def __init__(self, tree):
self.tree = tree
self.ignores = {DvcIgnoreDirs([".git", ".hg", ".dvc"])}
self.ignores = {
DvcIgnoreDirs([".git", ".hg", ".dvc"]),
DvcIgnoreRepo(),
}
for root, dirs, files in self.tree.walk(self.tree.tree_root):
self._update(root)
dirs[:], files[:] = self(root, dirs, files)
Expand Down
7 changes: 4 additions & 3 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ def __init__(self, root_dir=None):

self.config = Config(self.dvc_dir)

self.scm = SCM(self.root_dir)
no_scm = self.config["core"].get("no_scm", False)
self.scm = SCM(self.root_dir, no_scm=no_scm)

self.tree = WorkingTree(self.root_dir)

Expand Down Expand Up @@ -146,10 +147,10 @@ def find_dvc_dir(cls, root=None):
return os.path.join(root_dir, cls.DVC_DIR)

@staticmethod
def init(root_dir=os.curdir, no_scm=False, force=False):
def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False):
from dvc.repo.init import init

init(root_dir=root_dir, no_scm=no_scm, force=force)
init(root_dir=root_dir, no_scm=no_scm, force=force, subdir=subdir)
return Repo(root_dir)

def unprotect(self, target):
Expand Down
29 changes: 21 additions & 8 deletions dvc/repo/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from dvc.config import Config
from dvc.exceptions import InitError
from dvc.repo import Repo
from dvc.scm import NoSCM
from dvc.scm import SCM
from dvc.scm.base import SCMError
from dvc.utils import boxify
from dvc.utils import relpath
from dvc.utils.fs import remove
Expand Down Expand Up @@ -44,7 +44,7 @@ def _welcome_message():
logger.info(msg)


def init(root_dir=os.curdir, no_scm=False, force=False):
def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False):
"""
Creates an empty repo on the given directory -- basically a
`.dvc` directory with subdirectories for configuration and cache.
Expand All @@ -63,15 +63,23 @@ def init(root_dir=os.curdir, no_scm=False, force=False):
Raises:
KeyError: Raises an exception.
"""

if no_scm and subdir:
raise InitError(
"Cannot initialize repo with `--no-scm` and `--subdir`"
)

root_dir = os.path.realpath(root_dir)
dvc_dir = os.path.join(root_dir, Repo.DVC_DIR)
scm = SCM(root_dir)
if isinstance(scm, NoSCM) and not no_scm:

try:
scm = SCM(root_dir, search_parent_directories=subdir, no_scm=no_scm)
except SCMError:
raise InitError(
"{repo} is not tracked by any supported scm tool (e.g. git). "
"Use `--no-scm` if you don't want to use any scm.".format(
repo=root_dir
)
"{repo} is not tracked by any supported SCM tool (e.g. Git). "
"Use `--no-scm` if you don't want to use any SCM or "
"`--subdir` if initializing inside a subdirectory of a parent SCM "
"repository.".format(repo=root_dir)
)

if os.path.isdir(dvc_dir):
Expand All @@ -87,6 +95,11 @@ def init(root_dir=os.curdir, no_scm=False, force=False):
os.mkdir(dvc_dir)

config = Config.init(dvc_dir)

if no_scm:
with config.edit() as conf:
conf["core"]["no_scm"] = True

proj = Repo(root_dir)

scm.add([config.files["repo"]])
Expand Down
15 changes: 10 additions & 5 deletions dvc/scm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,23 @@ class NoSCM(Base):
pass


def SCM(root_dir): # pylint: disable=invalid-name
def SCM(
root_dir, search_parent_directories=True, no_scm=False
): # pylint: disable=invalid-name
"""Returns SCM instance that corresponds to a repo at the specified
path.
Args:
root_dir (str): path to a root directory of the repo.
repo (dvc.repo.Repo): DVC repo instance that root_dir belongs to.
search_parent_directories (bool): whether to look for repo root in
parent directories.
no_scm (bool): return NoSCM if True.
Returns:
dvc.scm.base.Base: SCM instance.
"""
if Git.is_repo(root_dir) or Git.is_submodule(root_dir):
return Git(root_dir)

return NoSCM(root_dir)
if no_scm:
return NoSCM(root_dir)

return Git(root_dir, search_parent_directories=search_parent_directories)
6 changes: 5 additions & 1 deletion dvc/scm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ class Base(object):
"""Base class for source control management driver implementations."""

def __init__(self, root_dir=os.curdir):
self.root_dir = os.path.realpath(root_dir)
self._root_dir = os.path.realpath(root_dir)

@property
def root_dir(self):
return self._root_dir

def __repr__(self):
return "{class_name}: '{directory}'".format(
Expand Down
20 changes: 9 additions & 11 deletions dvc/scm/git/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class Git(Base):
GITIGNORE = ".gitignore"
GIT_DIR = ".git"

def __init__(self, root_dir=os.curdir):
def __init__(self, root_dir=os.curdir, search_parent_directories=True):
"""Git class constructor.
Requires `Repo` class from `git` module (from gitpython package).
"""
Expand All @@ -32,10 +32,12 @@ def __init__(self, root_dir=os.curdir):
from git.exc import InvalidGitRepositoryError

try:
self.repo = git.Repo(self.root_dir)
self.repo = git.Repo(
root_dir, search_parent_directories=search_parent_directories
)
except InvalidGitRepositoryError:
msg = "{} is not a git repository"
raise SCMError(msg.format(self.root_dir))
raise SCMError(msg.format(root_dir))

# NOTE: fixing LD_LIBRARY_PATH for binary built by PyInstaller.
# http://pyinstaller.readthedocs.io/en/stable/runtime-information.html
Expand All @@ -46,6 +48,10 @@ def __init__(self, root_dir=os.curdir):
self.ignored_paths = []
self.files_to_track = set()

@property
def root_dir(self):
return self.repo.working_tree_dir

@staticmethod
def clone(url, to_path, rev=None):
import git
Expand Down Expand Up @@ -95,14 +101,6 @@ def is_sha(rev):

return rev and git.Repo.re_hexsha_shortened.search(rev)

@staticmethod
def is_repo(root_dir):
return os.path.isdir(Git._get_git_dir(root_dir))

@staticmethod
def is_submodule(root_dir):
return os.path.isfile(Git._get_git_dir(root_dir))

@staticmethod
def _get_git_dir(root_dir):
return os.path.join(root_dir, Git.GIT_DIR)
Expand Down
4 changes: 3 additions & 1 deletion tests/dir_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ def init(self, *, scm=False, dvc=False):
if scm:
_git_init(str_path)
if dvc:
self.dvc = Repo.init(str_path, no_scm=True)
self.dvc = Repo.init(
str_path, no_scm=not scm and not hasattr(self, "scm")
)
if scm:
self.scm = self.dvc.scm if hasattr(self, "dvc") else Git(str_path)
if dvc and hasattr(self, "scm"):
Expand Down
20 changes: 15 additions & 5 deletions tests/func/test_ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@
import pytest

from dvc.exceptions import DvcIgnoreInCollectedDirError
from dvc.ignore import DvcIgnore, DvcIgnoreDirs, DvcIgnorePatterns
from dvc.ignore import (
DvcIgnore,
DvcIgnoreDirs,
DvcIgnorePatterns,
DvcIgnoreRepo,
)
from dvc.scm.tree import WorkingTree
from dvc.utils import relpath
from dvc.compat import fspath_py35, fspath
Expand Down Expand Up @@ -93,10 +98,15 @@ def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname):
ignore_file = tmp_dir / dname / DvcIgnore.DVCIGNORE_FILE
ignore_file.write_text("foo")

assert dvc.tree.dvcignore.ignores == {
DvcIgnoreDirs([".git", ".hg", ".dvc"]),
DvcIgnorePatterns(fspath(top_ignore_file), WorkingTree(dvc.root_dir)),
}
assert len(dvc.tree.dvcignore.ignores) == 3
assert DvcIgnoreDirs([".git", ".hg", ".dvc"]) in dvc.tree.dvcignore.ignores
assert (
DvcIgnorePatterns(fspath(top_ignore_file), WorkingTree(dvc.root_dir))
in dvc.tree.dvcignore.ignores
)
assert any(
i for i in dvc.tree.dvcignore.ignores if isinstance(i, DvcIgnoreRepo)
)


def test_ignore_on_branch(tmp_dir, scm, dvc):
Expand Down
53 changes: 42 additions & 11 deletions tests/func/test_init.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import logging
import os

from dvc.compat import fspath
from dvc.config import Config
from dvc.exceptions import InitError
from dvc.main import main
from dvc.repo import Repo as DvcRepo
Expand Down Expand Up @@ -49,25 +52,53 @@ def test_cli(self):
self.assertNotEqual(ret, 0)


class TestInitNoSCM(TestDir):
def _test_init(self):
self.assertTrue(os.path.exists(DvcRepo.DVC_DIR))
self.assertTrue(os.path.isdir(DvcRepo.DVC_DIR))
def test_init_no_scm_api(tmp_dir):
repo = DvcRepo.init(no_scm=True)

def test_api(self):
DvcRepo.init(no_scm=True)
assert (tmp_dir / DvcRepo.DVC_DIR).is_dir()
assert repo.config["core"]["no_scm"]

self._test_init()

def test_cli(self):
ret = main(["init", "--no-scm"])
self.assertEqual(ret, 0)
def test_init_no_scm_cli(tmp_dir):
ret = main(["init", "--no-scm"])
assert ret == 0

self._test_init()
dvc_path = tmp_dir / DvcRepo.DVC_DIR
assert dvc_path.is_dir()
assert Config(fspath(dvc_path))["core"]["no_scm"]


def test_init_quiet_should_not_display_welcome_screen(tmp_dir, scm, caplog):
ret = main(["init", "--quiet"])

assert 0 == ret
assert "" == caplog.text


def test_allow_init_dvc_subdir(tmp_dir, scm, monkeypatch):
tmp_dir.gen({"subdir": {}})

with monkeypatch.context() as m:
m.chdir("subdir")
assert main(["init", "--subdir"]) == 0

repo = DvcRepo("subdir")
assert repo.root_dir == fspath(tmp_dir / "subdir")
assert repo.scm.root_dir == fspath(tmp_dir)


def test_subdir_init_no_option(tmp_dir, scm, monkeypatch, caplog):
tmp_dir.gen({"subdir": {}})

caplog.clear()
with monkeypatch.context() as m:
m.chdir("subdir")
with caplog.at_level(logging.ERROR, logger="dvc"):
assert main(["init"]) == 1

assert (
"{} is not tracked by any supported SCM tool (e.g. Git). "
"Use `--no-scm` if you don't want to use any SCM or "
"`--subdir` if initializing inside a subdirectory of a parent SCM "
"repository.".format(fspath(tmp_dir / "subdir"))
) in caplog.text
Loading

0 comments on commit 8d04d6d

Please sign in to comment.