-
Couldn't load subscription status.
- Fork 1.3k
remote: use separate working tree instance for local cache #3991
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
016d99d
2bc305d
7438546
9c59e62
9da6b2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,7 +7,6 @@ | |
| from urllib.parse import urlencode, urlunparse | ||
|
|
||
| from dvc.exceptions import DvcException | ||
| from dvc.scm.tree import WorkingTree | ||
| from dvc.utils import current_timestamp, relpath, to_chunks | ||
| from dvc.utils.fs import get_inode, get_mtime_and_size, remove | ||
|
|
||
|
|
@@ -89,10 +88,11 @@ class State: # pylint: disable=too-many-instance-attributes | |
| MAX_INT = 2 ** 63 - 1 | ||
| MAX_UINT = 2 ** 64 - 2 | ||
|
|
||
| def __init__(self, repo): | ||
| def __init__(self, local_cache): | ||
| repo = local_cache.repo | ||
| self.repo = repo | ||
| self.root_dir = repo.root_dir | ||
| self.tree = WorkingTree(self.root_dir) | ||
| self.tree = local_cache.tree.work_tree | ||
|
Comment on lines
+92
to
+95
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pmrowla State itself has nothing to do with the cache, but rather with the repos working tree, right? So maybe we should have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seemed to me that since state is only applicable whenever there is a local cache, and used as a cache for calculating md5s which will go into the local cache, it would make more sense to have state associated with the local cache's working tree rather than with a repo tree instance. The repo doesn't actually need a working tree to function (since we can use git trees), but local cache and state only use a working tree. |
||
|
|
||
| state_config = repo.config.get("state", {}) | ||
| self.row_limit = state_config.get("row_limit", self.STATE_ROW_LIMIT) | ||
|
|
@@ -394,6 +394,9 @@ def get(self, path_info): | |
| assert isinstance(path_info, str) or path_info.scheme == "local" | ||
| path = os.fspath(path_info) | ||
|
|
||
| # NOTE: use os.path.exists instead of WorkingTree.exists | ||
| # WorkingTree.exists uses lexists() and will return True for broken | ||
| # symlinks that we cannot stat() in get_mtime_and_size | ||
| if not os.path.exists(path): | ||
| return None | ||
|
|
||
|
|
@@ -420,7 +423,7 @@ def save_link(self, path_info): | |
| """ | ||
| assert isinstance(path_info, str) or path_info.scheme == "local" | ||
|
|
||
| if not os.path.exists(path_info): | ||
| if not self.tree.exists(path_info): | ||
| return | ||
|
|
||
| mtime, _ = get_mtime_and_size(path_info, self.tree) | ||
|
|
@@ -446,7 +449,7 @@ def get_unused_links(self, used): | |
| inode = self._from_sqlite(inode) | ||
| path = os.path.join(self.root_dir, relpath) | ||
|
|
||
| if path in used or not os.path.exists(path): | ||
| if path in used or not self.tree.exists(path): | ||
| continue | ||
|
|
||
| actual_inode = get_inode(path) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After remote/cache/tree separation, the only default tree (aka cache.tree) that local cache is working on will be the work tree, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that's correct