Skip to content

Commit

Permalink
[feat] Sha1 Store enhancements (#1026)
Browse files Browse the repository at this point in the history
* refactor SHA1_Store

- renamed the class
- added created_on field and refactored how init is done
- wrap long lines

* wrapped longer lines

* rename json file ref_count.json

* make sha1_buf_size an argument

* update gitignore

* added tmp_dir

* added new sha1_store add and tests

* update chdir

* add debug to test

* fixing unit test for 1.8

Co-authored-by: Min Xu <min.xu.public@gmail.com>
  • Loading branch information
min-xu-ai and flying-x committed Jul 12, 2022
1 parent 03f3e83 commit 68af57d
Show file tree
Hide file tree
Showing 7 changed files with 336 additions and 275 deletions.
4 changes: 0 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@
*.egg-info/
.testmondata

# experimental weigit
fairscale/experimental/wgit/dev
.wgit

# Build and release
build/
dist/
Expand Down
25 changes: 13 additions & 12 deletions fairscale/experimental/wgit/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from typing import Dict, Tuple, Union

from .pygit import PyGit
from .sha1_store import SHA1_store
from .sha1_store import SHA1_Store


class Repo:
Expand Down Expand Up @@ -42,28 +42,27 @@ def __init__(self, parent_dir: Union[Path, str] = Path.cwd(), init: bool = False
exists = self._exists(self.wgit_parent)
if not exists and init:
# No weigit repo exists and is being initialized with init=True
# Make .wgit directory, create sha1_refs
# Make .wgit directory, create sha1_store
weigit_dir = self.wgit_parent.joinpath(".wgit")
weigit_dir.mkdir(parents=False, exist_ok=True)

# Initializing sha1_store only after wgit has been initialized!
self._sha1_store = SHA1_store(weigit_dir, init=True)
self._sha1_store = SHA1_Store(weigit_dir, init=True)

# # Make the .wgit a git repo
gitignore_files = [
self._sha1_store_path.name,
self._sha1_store.ref_file_path.name,
]
self._pygit = PyGit(weigit_dir, gitignore=gitignore_files)

elif exists and init:
# if weigit repo already exists and init is being called, wrap the existing .wgit/.git repo with PyGit
self._sha1_store = SHA1_store(self.path)
self._sha1_store = SHA1_Store(self.path)
self._pygit = PyGit(self.path)

elif exists and not init:
# weigit exists and non-init commands are triggered
self._sha1_store = SHA1_store(self.path)
self._sha1_store = SHA1_Store(self.path)
self._pygit = PyGit(self.path)

else:
Expand All @@ -86,7 +85,10 @@ def add(self, in_file_path: str) -> None:
metadata_file, parent_sha1 = self._process_metadata_file(rel_file_path)

# add the file to the sha1_store
sha1_hash = self._sha1_store.add(file_path, parent_sha1)
# TODO (Min): We don't add parent sha1 tracking to sha1 store due to
# de-duplication & dependency tracking can create cycles.
# We need to figure out a way to handle deletion.
sha1_hash = self._sha1_store.add(file_path)

# write metadata to the metadata-file
self._write_metadata(metadata_file, file_path, sha1_hash)
Expand Down Expand Up @@ -183,7 +185,7 @@ def _get_metdata_files(self) -> Dict:
metadata_d = dict()
for file in self.path.iterdir(): # iterate over the .wgit directory
# exlude all the .wgit files and directory
if file.name not in {"sha1_store", "sha1_refs.json", ".git", ".gitignore"}:
if file.name not in {"sha1_store", ".git", ".gitignore"}:
# perform a directory walk on the metadata_file directories to find the metadata files
for path in file.rglob("*"):
if path.is_file():
Expand Down Expand Up @@ -275,16 +277,15 @@ def _exists(self, check_dir: Path) -> bool:

def _weigit_repo_exists(self, check_dir: pathlib.Path) -> bool:
"""Returns True if a valid WeiGit repo exists in the path: check_dir"""
wgit_exists, sha1_refs, git_exists, gitignore_exists = self._weight_repo_file_check(check_dir)
return wgit_exists and sha1_refs and git_exists and gitignore_exists
wgit_exists, git_exists, gitignore_exists = self._weight_repo_file_check(check_dir)
return wgit_exists and git_exists and gitignore_exists

def _weight_repo_file_check(self, check_dir: Path) -> tuple:
"""Returns a tuple of boolean corresponding to the existence of each .wgit internally required files."""
wgit_exists = check_dir.joinpath(".wgit").exists()
sha1_refs = check_dir.joinpath(".wgit/sha1_refs.json").exists()
git_exists = check_dir.joinpath(".wgit/.git").exists()
gitignore_exists = check_dir.joinpath(".wgit/.gitignore").exists()
return wgit_exists, sha1_refs, git_exists, gitignore_exists
return wgit_exists, git_exists, gitignore_exists


class RepoStatus(Enum):
Expand Down

0 comments on commit 68af57d

Please sign in to comment.