Skip to content

Commit

Permalink
Address issus with non-default branches for dws repo and git resource…
Browse files Browse the repository at this point in the history
… repos. This fixes issues #75 and #76
  • Loading branch information
jfischer committed Aug 10, 2021
1 parent c37d1c3 commit 69d4183
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 46 deletions.
69 changes: 50 additions & 19 deletions dataworkspaces/backends/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@
verify_git_config_initialized,
git_remove_file,
git_remove_subtree,
get_branch_info,
ensure_entry_in_gitignore,
echo_git_status_for_user,
switch_git_branch_if_needed
)
from dataworkspaces.utils.git_fat_utils import (
validate_git_fat_in_path_if_needed,
Expand All @@ -65,6 +67,7 @@
get_scratch_directory,
SCRATCH_DIRECTORY,
LOCAL_SCRATCH_DIRECTORY,
DWS_GIT_BRANCH
)
from dataworkspaces.utils.lineage_utils import (
FileLineageStore,
Expand Down Expand Up @@ -358,17 +361,19 @@ def pull_workspace(self) -> ws.SyncedWorkspaceMixin:
)
validate_git_fat_in_path_if_needed(self.workspace_dir)

# do the pooling
# do the pulls
branch = self.get_dws_git_branch()
call_subprocess(
[GIT_EXE_PATH, "pull", "origin", "master"], cwd=self.workspace_dir, verbose=self.verbose
[GIT_EXE_PATH, "pull", "origin", branch], cwd=self.workspace_dir, verbose=self.verbose
)
run_git_fat_pull_if_needed(self.workspace_dir, self.verbose)

# reload and return new workspace
return Workspace(self.workspace_dir, batch=self.batch, verbose=self.verbose)

def _push_precheck(self, resource_list: List[ws.LocalStateResourceMixin]) -> None:
if is_pull_needed_from_remote(self.workspace_dir, "master", self.verbose):
branch = self.get_dws_git_branch()
if is_pull_needed_from_remote(self.workspace_dir, branch, self.verbose):
raise ConfigurationError(
"Data workspace at %s requires a pull from remote origin" % self.workspace_dir
)
Expand All @@ -389,9 +394,10 @@ def _push_precheck(self, resource_list: List[ws.LocalStateResourceMixin]) -> Non
super()._push_precheck(resource_list)

def push(self, resource_list: List[ws.LocalStateResourceMixin]) -> None:
branch = self.get_dws_git_branch()
super().push(resource_list)
call_subprocess(
[GIT_EXE_PATH, "push", "origin", "master"], cwd=self.workspace_dir, verbose=self.verbose
[GIT_EXE_PATH, "push", "origin", branch], cwd=self.workspace_dir, verbose=self.verbose
)
run_git_fat_push_if_needed(self.workspace_dir, verbose=self.verbose)

Expand Down Expand Up @@ -586,6 +592,20 @@ def as_lineage_ws(self) -> ws.SnapshotWorkspaceMixin:
"""
return cast(ws.SnapshotWorkspaceMixin, self)

def get_dws_git_branch(self) -> str:
"""This is an implementation-specific method to get the branch name for
the workspace. It is used internally as well as by the Git subdirectory
resource (which uses the same git repo as the workspace). We define this
method rather than just getting the dws_git_branch parameter because it
might not have been set if the repo was created pre-1.5."""
branch = self.global_params.get(DWS_GIT_BRANCH, None)
if branch is not None:
return branch
else:
(dws_git_branch, _) = get_branch_info(self.workspace_dir, self.verbose)
self.global_params[DWS_GIT_BRANCH] = dws_git_branch # cache for subsequent calls
return dws_git_branch


class WorkspaceFactory(ws.WorkspaceFactory):
@staticmethod
Expand Down Expand Up @@ -639,16 +659,10 @@ def init_workspace( # type: ignore
(abs_scratch_dir, scratch_dir_gitignore) = init_scratch_directory(
scratch_dir, workspace_dir, global_params, local_params
)
with open(join(workspace_dir, CONFIG_FILE_PATH), "w") as f:
json.dump(
{
"name": workspace_name,
"dws-version": dws_version,
"global_params": global_params,
},
f,
indent=2,
)
if exists(join(workspace_dir, ".git")):
click.echo("%s is already a git repository, will just add to it" % workspace_dir)
else:
git_init(workspace_dir, verbose=verbose)
with open(join(workspace_dir, RESOURCES_FILE_PATH), "w") as f:
json.dump([], f, indent=2)
with open(join(workspace_dir, LOCAL_PARAMS_PATH), "w") as f:
Expand All @@ -661,13 +675,9 @@ def init_workspace( # type: ignore
f.write("%s\n" % basename(LOCAL_PARAMS_PATH))
f.write("%s\n" % basename(RESOURCE_LOCAL_PARAMS_PATH))
f.write("current_lineage/\n")
if exists(join(workspace_dir, ".git")):
click.echo("%s is already a git repository, will just add to it" % workspace_dir)
else:
git_init(workspace_dir, verbose=verbose)
git_add(
workspace_dir,
[CONFIG_FILE_PATH, RESOURCES_FILE_PATH, GIT_IGNORE_FILE_PATH],
[RESOURCES_FILE_PATH, GIT_IGNORE_FILE_PATH],
verbose=verbose,
)
if scratch_dir_gitignore is not None:
Expand All @@ -690,6 +700,23 @@ def init_workspace( # type: ignore
)
commit_changes_in_repo(workspace_dir, "dws init", verbose=verbose)

# Now that we've done an initial commit, we figure out the branch we
# are on, create the config file and commit that.
(dws_git_branch, _) = get_branch_info(workspace_dir, verbose=verbose)
global_params[DWS_GIT_BRANCH] = dws_git_branch
with open(join(workspace_dir, CONFIG_FILE_PATH), "w") as f:
json.dump(
{
"name": workspace_name,
"dws-version": dws_version,
"global_params": global_params,
},
f,
indent=2,
)
git_add(workspace_dir, [CONFIG_FILE_PATH], verbose=verbose)
commit_changes_in_repo(workspace_dir, "dws init (config.json)", verbose=verbose)

if not isdir(abs_scratch_dir):
if verbose:
print("Creating scratch directory %s" % abs_scratch_dir)
Expand Down Expand Up @@ -778,6 +805,10 @@ def clone_workspace(local_params: JSONDict, batch: bool, verbose: bool, *args) -
global_params = cf_data["global_params"]
# get the scratch directory (also adds local param if needed)
abs_scratch_dir = clone_scratch_directory(directory, global_params, local_params, batch)
if DWS_GIT_BRANCH in global_params and (global_params[DWS_GIT_BRANCH] is not None):
# if the branch is specified, make sure we are on it
switch_git_branch_if_needed(directory, global_params[DWS_GIT_BRANCH],
verbose=verbose)
if not isdir(abs_scratch_dir):
if verbose:
print("Creating scratch directory %s" % abs_scratch_dir)
Expand Down
3 changes: 2 additions & 1 deletion dataworkspaces/dws.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,8 @@ def rclone(
@click.option("--role", type=ROLE_PARAM)
@click.option("--name", type=str, default=None, help="Short name for this resource")
@click.option(
"--branch", type=str, default="master", help="Branch of the repo to use, defaults to master."
"--branch", type=str, default=None,
help="Branch of the repo to use. If not specified, defaults to the current branch."
)
@click.option(
"--read-only",
Expand Down
45 changes: 20 additions & 25 deletions dataworkspaces/resources/git_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
git_commit,
git_add,
is_git_staging_dirty,
switch_git_branch,
switch_git_branch_if_needed,
)
from dataworkspaces.utils.git_fat_utils import (
is_a_git_fat_repo,
Expand Down Expand Up @@ -551,26 +553,6 @@ def get_remote_origin(local_path, verbose=False):
return cp.stdout.rstrip()


def switch_git_branch(local_path, branch, verbose):
try:
call_subprocess([GIT_EXE_PATH, "checkout", branch], cwd=local_path, verbose=verbose)
except Exception as e:
raise ConfigurationError(
"Unable to switch git repo at %s to branch %s" % (local_path, branch)
) from e


def switch_git_branch_if_needed(local_path, branch, verbose, ok_if_not_present=False):
(current, others) = get_branch_info(local_path, verbose)
if branch == current:
return
else:
if (branch not in others) and (not ok_if_not_present):
raise InternalError(
"Trying to switch to branch %s not in repo at %s" % (branch, others)
)
switch_git_branch(local_path, branch, verbose)


class GitLocalPathType(LocalPathType):
def __init__(self, remote_url: str, verbose: bool):
Expand Down Expand Up @@ -617,9 +599,10 @@ def from_command_line(
and wspath is not None
and lpr.startswith(wspath)
):
if branch != "master":
dws_branch = workspace.get_dws_git_branch()
if (branch is not None ) and (branch != dws_branch):
raise ConfigurationError(
"Only the branch 'master' is available for resources that are within the workspace's git repository"
f"Branch for subdirectory must match brach of primary dws git repository ({dws_branch})"
)
elif read_only:
raise ConfigurationError(
Expand Down Expand Up @@ -657,6 +640,8 @@ def from_command_line(
+ "Please configure before adding resource."
)
(current, others) = get_branch_info(local_path, workspace.verbose)
if branch is None:
branch = current # we default to the current branch
if branch != current and branch not in others:
raise ConfigurationError(
"Requested branch '%s' is not available for git repository at %s"
Expand Down Expand Up @@ -774,7 +759,7 @@ def clone(self, params, workspace):
else:
# the repo already exists locally, and we've alerady verified that then
# remote is correct
cmd = [GIT_EXE_PATH, "pull", "origin", "master"]
cmd = [GIT_EXE_PATH, "pull", "origin", branch]
call_subprocess(cmd, local_path, workspace.verbose)
switch_git_branch_if_needed(local_path, branch, workspace.verbose, ok_if_not_present=True)
ensure_git_lfs_configured_if_needed(local_path, verbose=workspace.verbose)
Expand Down Expand Up @@ -928,6 +913,11 @@ def restore(self, hashval):
"Should never call restore on a git subdirectory resource (%s)" % self.name
)

def get_branch(self):
"""The branch will be the same as our parent"""
assert isinstance(self.workspace, git_backend.Workspace)
return self.workspace.get_dws_git_branch()

def add_results_file(self, data: Union[JSONDict, JSONList], rel_dest_path: str) -> None:
"""Save JSON results data to the specified path in the resource.
"""
Expand Down Expand Up @@ -963,7 +953,7 @@ def push_precheck(self):
"Git repo at %s has uncommitted changes. Please commit your changes before pushing."
% self.workspace_dir
)
if is_pull_needed_from_remote(self.workspace_dir, "master", self.workspace.verbose):
if is_pull_needed_from_remote(self.workspace_dir, self.get_branch(), self.workspace.verbose):
raise ConfigurationError(
"Resource '%s' requires a pull from the remote origin before pushing." % self.name
)
Expand Down Expand Up @@ -1093,6 +1083,11 @@ def restore(self, hashval):
self.workspace_dir, self.relative_path, hashval, verbose=self.workspace.verbose
)

def get_branch(self):
"""The branch will be the same as our parent"""
assert isinstance(self.workspace, git_backend.Workspace)
return self.workspace.get_dws_git_branch()

def push_precheck(self):
if not exists(self.local_path):
raise ConfigurationError(
Expand All @@ -1107,7 +1102,7 @@ def push_precheck(self):
"Git repo at %s has uncommitted changes. Please commit your changes before pushing."
% self.local_path
)
if is_pull_needed_from_remote(self.local_path, "master", self.workspace.verbose):
if is_pull_needed_from_remote(self.local_path, self.get_branch(), self.workspace.verbose):
raise ConfigurationError(
"Resource '%s' requires a pull from the remote origin before pushing." % self.name
)
Expand Down
20 changes: 20 additions & 0 deletions dataworkspaces/utils/git_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,26 @@ def get_branch_info(local_path, verbose=False):
return (current, other)


def switch_git_branch(local_path, branch, verbose):
try:
call_subprocess([GIT_EXE_PATH, "checkout", branch], cwd=local_path, verbose=verbose)
except Exception as e:
raise ConfigurationError(
"Unable to switch git repo at %s to branch %s" % (local_path, branch)
) from e


def switch_git_branch_if_needed(local_path, branch, verbose, ok_if_not_present=False):
(current, others) = get_branch_info(local_path, verbose)
if branch == current:
return
else:
if (branch not in others) and (not ok_if_not_present):
raise InternalError(
"Trying to switch to branch %s not in repo at %s" % (branch, others)
)
switch_git_branch(local_path, branch, verbose)


def git_remove_subtree(
repo_dir: str, relative_path: str, remove_history: bool = False, verbose: bool = False
Expand Down
9 changes: 9 additions & 0 deletions dataworkspaces/utils/param_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,15 @@ def __str__(self):
)


DWS_GIT_BRANCH = define_param(
"dws_git_branch",
default_value=None,
optional=True,
help="Name of git branch for the main dws repository. This is set when the dws workspace is initialized to the "+
"current branch.",
ptype=StringType()
)

def get_global_param_defaults():
"""Return a mapping of all default values of global params for use
in generating the initial config file
Expand Down
2 changes: 1 addition & 1 deletion tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ DATAWORKSPACES:=$(shell cd ../dataworkspaces; pwd)
help:
@echo targets are: test clean mypy pyflakes check help install-rclone-deb format-with-black

UNIT_TESTS=test_git_utils test_file_utils test_move_results test_snapshots test_push_pull test_local_files_resource test_hashtree test_lineage_utils test_git_fat_integration test_git_lfs test_lineage test_jupyter_kit test_sklearn_kit test_api test_wrapper_utils test_tensorflow test_scratch_dir test_export test_import test_rclone
UNIT_TESTS=test_git_utils test_file_utils test_move_results test_snapshots test_push_pull test_local_files_resource test_hashtree test_lineage_utils test_git_fat_integration test_git_lfs test_lineage test_jupyter_kit test_sklearn_kit test_api test_wrapper_utils test_tensorflow test_scratch_dir test_export test_import test_rclone test_alternative_branch

MYPY_KITS=scikit_learn.py jupyter.py tensorflow.py wrapper_utils.py

Expand Down

0 comments on commit 69d4183

Please sign in to comment.