Skip to content

Commit

Permalink
wgit functionalities and skeleton, move to subparsers and addition of…
Browse files Browse the repository at this point in the history
… Repo Class (#1001)

* [feat] Adding wgit within fairscale/experimental/wgit.

* [feat] adding experimental wgit

* [feat] wgit init functionalities and skeleton for the rest.

* adapted the suggested changes

* repo class working

* [feat] wgit functionalities and skeleton. Addition of subparsers and repo class along with some changes.

* [feat] wgit functionalities and skeleton, move to subparsers and addition of Repo Class

* [feat] wgit functionalities and skeleton, move to subparsers and addition of Repo Class

* [docs] changed a comment in .gitignore

* [refactor] changed the sequene of tests in ci_test_list2.txt
  • Loading branch information
riohib committed Jun 1, 2022
1 parent b3a4c68 commit 3b72794
Show file tree
Hide file tree
Showing 9 changed files with 333 additions and 18 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
*.egg-info/
.testmondata

# experimental weigit
experimental/.gitignore
.wgit

# Build and release
build/
dist/
Expand Down
2 changes: 2 additions & 0 deletions experimental/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
#
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.

from .wgit import cli, weigit_api
1 change: 1 addition & 0 deletions experimental/wgit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
# LICENSE file in the root directory of this source tree.

from .cli import main
from .weigit_api import WeiGit

__version__ = "0.0.1"
83 changes: 78 additions & 5 deletions experimental/wgit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,91 @@

import argparse

import experimental.wgit as wgit
import experimental.wgit.weigit_api as weigit_api


def main(argv=None):
desc = "WeiGit checkpoint tracking"
desc = "WeiGit: A git-like tool for model weight tracking"

# top level parser and corresponding subparser
parser = argparse.ArgumentParser(description=desc)
subparsers = parser.add_subparsers(dest="command")

# Version
version_parser = subparsers.add_parser("version", description="Display version")
version_parser.set_defaults(command="version", subcommand="")

# Repo
init_parser = subparsers.add_parser("init", description="Initialize a weigit repo")
init_parser.add_argument("init", action="store_true", help="initialize the repo")

status_parser = subparsers.add_parser("status", description="Shows the repo's current status")
status_parser.add_argument("status", action="store_true", help="Show the repo's current status")

add_parser = subparsers.add_parser("add", description="add a file to the staged changeset (default: none)")
add_parser.add_argument(
"add",
default="",
type=str,
metavar="FILE_PATH",
help="add a file to the staged changeset (default: none)",
)

commit_parser = subparsers.add_parser("commit", description="Commits the staged changes")
commit_parser.add_argument("commit", action="store_true", help="Commit the staged changes")
commit_parser.add_argument(
"-m",
"--message",
default="",
type=str,
metavar="MESSAGE",
required=True,
help="commit message",
)

# flags
parser.add_argument("-i", "--init", action="store_true", help="Initialize a weigit repository!")
checkout_parser = subparsers.add_parser("checkout", description="checkout from a commit")
checkout_parser.add_argument(
"checkout",
default="",
type=str,
metavar="FILE_SHA1",
help="checkout from a commit",
)

log_parser = subparsers.add_parser("log", description="Show the history log of the repo or optionally of a file.")
log_parser.add_argument("log", action="store_true", help="Show the repo's history log")
log_parser.add_argument(
"-f",
"--file",
default="",
type=str,
metavar="FILE_PATH",
help="Show the history log of a file",
)

args = parser.parse_args(argv)

if args.init:
print("Hello World, Wgit has been initialized!")
if args.command == "init":
weigit = weigit_api.WeiGit()

if args.command == "add":
weigit_api.WeiGit.add(args.add)

if args.command == "status":
weigit_api.WeiGit.status()

if args.command == "log":
weigit_api.WeiGit.log(args.file)

if args.command == "commit":
weigit_api.WeiGit.commit(args.message)

if args.command == "checkout":
weigit_api.WeiGit.checkout()

if args.command == "version":
print(wgit.__version__)


if __name__ == "__main__":
Expand Down
8 changes: 8 additions & 0 deletions experimental/wgit/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from enum import Enum


class ExitCode(Enum):
CLEAN = 0
FILE_EXISTS_ERROR = 1

ERROR = -1 # unknown errors
158 changes: 158 additions & 0 deletions experimental/wgit/weigit_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.


import os
import pathlib
import sys

import pygit2

from experimental.wgit.utils import ExitCode


class WeiGit:
def __init__(self) -> None:
"""
Planned Features:
1. create the wgit directory. Error, if already dir exists.
2. SHA1Store.init()
3. Create SHA1 .wgit/sha1_ref_count.json
3. Initialize a .git directory within the .wgit using `git init`.
4. add a .gitignore within the .wgit directory, so that the git repo within will ignore `sha1_ref_count.json`
"""

# Make .wgit directory. If already exists, we error out
try:
os.mkdir(".wgit")
except FileExistsError:
sys.stderr.write("An exception occured while wgit initialization: WeiGit already Initialized\n")
sys.exit(ExitCode.FILE_EXISTS_ERROR)

# if no .wgit dir then initialize the following
SHA1_store()

# create sha1_ref_count and a .gitignore:
# In general sha1_ref_count is only create only if .wgit already exists
try:
ref_count_json = ".wgit/sha1_ref_count.json"
with open(ref_count_json, "w") as f:
pass
except FileExistsError as error:
sys.stderr.write(f"An exception occured while creating {ref_count_json}: {repr(error)}\n")
sys.exit(ExitCode.FILE_EXISTS_ERROR)

# Make the .wgit a git repo
try:
pygit2.init_repository(".wgit/.git", False)
except BaseException as error:
sys.stderr.write(f"An exception occurred while initializing .wgit/.git: {repr(error)}\n")
sys.exit(ExitCode.ERROR)

# add a .gitignore:
try:
gitignore = ".wgit/.gitignore"
with open(gitignore, "w") as f:
f.write("sha1_ref_count.json")
except FileExistsError as error:
sys.stderr.write(f"An exception occured while creating {gitignore}: {repr(error)}\n")
sys.exit(ExitCode.FILE_EXISTS_ERROR)

@staticmethod
def add(file):
if Repo(os.getcwd()).exists():
print("wgit added")

@staticmethod
def status():
if Repo(os.getcwd()).exists():
print("wgit status")

@staticmethod
def log(file):
if Repo(os.getcwd()).exists():
if file:
print(f"wgit log of the file: {file}")
else:
print("wgit log")

@staticmethod
def commit(message):
if Repo(os.getcwd()).exists():
if message:
print(f"commited with message: {message}")
else:
print("wgit commit")

@staticmethod
def checkout():
if Repo(os.getcwd()).exists():
print("wgit checkout")

@staticmethod
def compression():
print("Not Implemented!")

@staticmethod
def checkout_by_steps():
print("Not Implemented!")


class SHA1_store:
"""
Planned Features:
1. def init
2. def add <file or data> -> SHA1
3. def remove (SHA1)
4. def add_ref(children_SHA1, parent_SHA1)
5. def read(SHA1): ->
6. def lookup(SHA1): -> file path to the data. NotFound Exception if not found.
"""

def __init__(self) -> None:
pass


class Repo:
"""
Designates the weigit repo, which is identified by a path to the repo.
"""

def __init__(self, check_dir) -> None:
self.repo_path = None
self.check_dir = os.path.realpath(check_dir)

def exists(self):
def weigit_repo_exists(check_dir):
"""
checks if the input path to dir (check_dir) is a valid weigit repo
with .git and sha1_ref_count in the repo.
"""
is_wgit_in_curr = pathlib.Path(os.path.join(check_dir, ".wgit")).exists()
is_refcount_in_wgit = pathlib.Path(os.path.join(check_dir, ".wgit/sha1_ref_count.json")).exists()
is_git_in_wgit = pathlib.Path(os.path.join(check_dir, ".wgit/.git")).exists()
return is_wgit_in_curr and is_refcount_in_wgit and is_git_in_wgit

if weigit_repo_exists(self.check_dir):
self.repo_path = os.path.join(self.check_dir, ".wgit")
else:
while self.check_dir != os.getcwd():
self.check_dir = os.path.dirname(self.check_dir)

if weigit_repo_exists(self.check_dir):
self.repo_path = os.path.join(self.check_dir, ".wgit")
break

if self.repo_path is None:
print("Initialize a weigit repo first!!")
is_exist = False
else:
is_exist = True
return is_exist

def get_repo_path(self):
if self.repo_path is None:
self.exists()
return self.repo_path
3 changes: 3 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,6 @@ numpy == 1.21.5

# For layerwise gradient scaler
sklearn >= 0.0

# For weigit
pygit2==1.9.2
18 changes: 9 additions & 9 deletions tests/ci_test_list_2.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
tests/experimental/nn/test_mevo.py
tests/experimental/nn/test_multiprocess_pipe.py
tests/experimental/nn/test_sync_batchnorm.py
tests/experimental/nn/ampnet_pipe_process/test_ampnet_pipe.py
tests/experimental/nn/test_offload.py
tests/experimental/nn/test_auto_shard.py
tests/experimental/optim/test_dynamic_loss_scaler.py
tests/experimental/tooling/test_layer_memory_tracker.py
tests/experimental/nn/test_ssd_offload.py
tests/nn/data_parallel/test_fsdp_shared_weights_mevo.py
tests/nn/data_parallel/test_fsdp_shared_weights.py
tests/nn/data_parallel/test_fsdp_pre_backward_hook.py
Expand Down Expand Up @@ -41,13 +50,4 @@ tests/nn/pipe/test_dependency.py
tests/nn/pipe/test_stream.py
tests/nn/moe/test_moe_layer.py
tests/nn/moe/test_top2gating.py
tests/experimental/nn/test_mevo.py
tests/experimental/nn/test_multiprocess_pipe.py
tests/experimental/nn/test_sync_batchnorm.py
tests/experimental/nn/ampnet_pipe_process/test_ampnet_pipe.py
tests/experimental/nn/test_offload.py
tests/experimental/nn/test_auto_shard.py
tests/experimental/optim/test_dynamic_loss_scaler.py
tests/experimental/tooling/test_layer_memory_tracker.py
tests/experimental/nn/test_ssd_offload.py
tests/nn/data_parallel/test_fsdp_offload.py

0 comments on commit 3b72794

Please sign in to comment.