Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions dvc/ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,16 +186,15 @@ def _parents_exist(self, path):
if path.parent == self.tree_root or Repo.DVC_DIR in path.parts:
return True

# if path is outside of tree, assume this is a local remote/local cache
# link/move operation where we do not need to filter ignores
# paths outside of the CleanTree root should be ignored
path = relpath(path, self.tree_root)
if path.startswith("..") or (
os.name == "nt"
and not os.path.commonprefix(
[os.path.abspath(path), self.tree_root]
)
):
return True
return False

# check if parent directories are in our ignores, starting from
# tree_root
Expand Down
28 changes: 6 additions & 22 deletions dvc/remote/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,12 @@ def repo(self):
return self.remote.repo

@cached_property
def _work_tree(self):
if self.repo:
return WorkingTree(self.repo.root_dir)
return None

@property
def work_tree(self):
# When using repo.brancher, repo.tree may change to/from WorkingTree to
# GitTree arbitarily. When repo.tree is GitTree, local cache needs to
# use its own WorkingTree instance.
if self.repo and not is_working_tree(self.repo.tree):
return self._work_tree
if self.repo:
return WorkingTree(self.repo.root_dir)
return None

@staticmethod
Expand All @@ -72,35 +66,25 @@ def exists(self, path_info):
assert isinstance(path_info, str) or path_info.scheme == "local"
if not self.repo:
return os.path.exists(path_info)
if self.work_tree and self.work_tree.exists(path_info):
return True
return self.repo.tree.exists(path_info)
return self.work_tree.exists(path_info)

def isfile(self, path_info):
if not self.repo:
return os.path.isfile(path_info)
if self.work_tree and self.work_tree.isfile(path_info):
return True
return self.repo.tree.isfile(path_info)
return self.work_tree.isfile(path_info)

def isdir(self, path_info):
if not self.repo:
return os.path.isdir(path_info)
if self.work_tree and self.work_tree.isdir(path_info):
return True
return self.repo.tree.isdir(path_info)
return self.work_tree.isdir(path_info)

def iscopy(self, path_info):
return not (
System.is_symlink(path_info) or System.is_hardlink(path_info)
)

def walk_files(self, path_info, **kwargs):
if self.work_tree:
tree = self.work_tree
else:
tree = self.repo.tree
for fname in tree.walk_files(path_info):
for fname in self.work_tree.walk_files(path_info):
yield PathInfo(fname)

def is_empty(self, path_info):
Expand Down
8 changes: 4 additions & 4 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,14 @@ def __init__(self, root_dir=None, scm=None, rev=None):
friendly=True,
)

self.cache = Cache(self)
self.cloud = DataCloud(self)

if not scm:
# NOTE: storing state and link_state in the repository itself to
# avoid any possible state corruption in 'shared cache dir'
# scenario.
self.state = State(self)

self.cache = Cache(self)
self.cloud = DataCloud(self)
self.state = State(self.cache.local)

self.stage_cache = StageCache(self)

Expand Down
13 changes: 8 additions & 5 deletions dvc/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from urllib.parse import urlencode, urlunparse

from dvc.exceptions import DvcException
from dvc.scm.tree import WorkingTree
from dvc.utils import current_timestamp, relpath, to_chunks
from dvc.utils.fs import get_inode, get_mtime_and_size, remove

Expand Down Expand Up @@ -89,10 +88,11 @@ class State: # pylint: disable=too-many-instance-attributes
MAX_INT = 2 ** 63 - 1
MAX_UINT = 2 ** 64 - 2

def __init__(self, repo):
def __init__(self, local_cache):
repo = local_cache.repo
self.repo = repo
self.root_dir = repo.root_dir
self.tree = WorkingTree(self.root_dir)
self.tree = local_cache.tree.work_tree
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After remote/cache/tree separation, the only default tree (aka cache.tree) that local cache is working on will be the work tree, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's correct

Comment on lines +92 to +95
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pmrowla State itself has nothing to do with the cache, but rather with the repos working tree, right? So maybe we should have work_tree(or wtree) in repo itself and make local cache and state use it? Just not quite obvious why state depends on local cache here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seemed to me that since state is only applicable whenever there is a local cache, and used as a cache for calculating md5s which will go into the local cache, it would make more sense to have state associated with the local cache's working tree rather than with a repo tree instance. The repo doesn't actually need a working tree to function (since we can use git trees), but local cache and state only use a working tree.


state_config = repo.config.get("state", {})
self.row_limit = state_config.get("row_limit", self.STATE_ROW_LIMIT)
Expand Down Expand Up @@ -394,6 +394,9 @@ def get(self, path_info):
assert isinstance(path_info, str) or path_info.scheme == "local"
path = os.fspath(path_info)

# NOTE: use os.path.exists instead of WorkingTree.exists
# WorkingTree.exists uses lexists() and will return True for broken
# symlinks that we cannot stat() in get_mtime_and_size
if not os.path.exists(path):
return None

Expand All @@ -420,7 +423,7 @@ def save_link(self, path_info):
"""
assert isinstance(path_info, str) or path_info.scheme == "local"

if not os.path.exists(path_info):
if not self.tree.exists(path_info):
return

mtime, _ = get_mtime_and_size(path_info, self.tree)
Expand All @@ -446,7 +449,7 @@ def get_unused_links(self, used):
inode = self._from_sqlite(inode)
path = os.path.join(self.root_dir, relpath)

if path in used or not os.path.exists(path):
if path in used or not self.tree.exists(path):
continue

actual_inode = get_inode(path)
Expand Down
7 changes: 2 additions & 5 deletions tests/func/test_ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
DvcIgnorePatterns,
DvcIgnoreRepo,
)
from dvc.remote import LocalRemote
from dvc.repo import Repo
from dvc.scm.tree import WorkingTree
from dvc.utils import relpath
Expand Down Expand Up @@ -139,8 +138,7 @@ def test_match_nested(tmp_dir, dvc):
}
)

remote = LocalRemote(dvc, {})
result = {os.fspath(f) for f in remote.tree.walk_files(".")}
result = {os.fspath(os.path.normpath(f)) for f in dvc.tree.walk_files(".")}
assert result == {".dvcignore", "foo"}


Expand All @@ -149,8 +147,7 @@ def test_ignore_external(tmp_dir, scm, dvc, tmp_path_factory):
ext_dir = TmpDir(os.fspath(tmp_path_factory.mktemp("external_dir")))
ext_dir.gen({"y.backup": "y", "tmp": "ext tmp"})

remote = LocalRemote(dvc, {})
result = {relpath(f, ext_dir) for f in remote.tree.walk_files(ext_dir)}
result = {relpath(f, ext_dir) for f in dvc.tree.walk_files(ext_dir)}
assert result == {"y.backup", "tmp"}


Expand Down
4 changes: 2 additions & 2 deletions tests/func/test_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def test_state(tmp_dir, dvc):
path_info = PathInfo(path)
md5 = file_md5(path)[0]

state = State(dvc)
state = State(dvc.cache.local)

with state:
state.save(path_info, md5)
Expand Down Expand Up @@ -57,7 +57,7 @@ def get_inode_mocked(path):
def test_get_state_record_for_inode(get_inode_mock, tmp_dir, dvc):
tmp_dir.gen("foo", "foo content")

state = State(dvc)
state = State(dvc.cache.local)
inode = state.MAX_INT + 2
assert inode != state._to_sqlite(inode)

Expand Down