Skip to content

Commit

Permalink
Fixes #30 - fix resource merging on pull
Browse files Browse the repository at this point in the history
  • Loading branch information
jfischer committed May 15, 2019
1 parent 5bffc2a commit 8c5ef80
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 37 deletions.
11 changes: 10 additions & 1 deletion dataworkspaces/commands/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from dataworkspaces.resources.resource import \
get_resource_file_path, get_resource_local_params_file_path
import dataworkspaces.commands.actions as actions
from .init import get_config_file_path
from .init import get_config_file_path, get_snapshot_metadata_dir_path,\
get_snapshot_dir_path
from .add import UpdateLocalParams, add_local_dir_to_gitignore_if_needed
from .pull import AddRemoteResource
from .params import get_local_defaults, get_local_params_file_path
Expand Down Expand Up @@ -61,6 +62,14 @@ def clone_command(repository, hostname, directory=None, batch=False, verbose=Fal
raise ConfigurationError("Clone target directory %s already exists" % new_name)
os.rename(initial_path, new_name)
directory = new_name
snapshot_md_dir = get_snapshot_metadata_dir_path(directory)
if not exists(snapshot_md_dir):
# It is possible that we are cloning a repo with no snapshots
os.mkdir(snapshot_md_dir)
snapshot_dir = get_snapshot_dir_path(directory)
if not exists(snapshot_dir):
# It is possible that we are cloning a repo with no snapshots
os.mkdir(snapshot_dir)
if is_a_git_fat_repo(directory):
validate_git_fat_in_path()
import dataworkspaces.third_party.git_fat as git_fat
Expand Down
48 changes: 17 additions & 31 deletions dataworkspaces/commands/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import click

import dataworkspaces.commands.actions as actions
from dataworkspaces.utils.git_utils import is_a_git_fat_repo
from dataworkspaces.utils.git_utils import is_a_git_fat_repo,\
get_json_file_from_remote
from dataworkspaces.resources.resource import \
CurrentResources, get_resource_from_json_remote
from .add import UpdateLocalParams, add_local_dir_to_gitignore_if_needed
Expand Down Expand Up @@ -87,21 +88,7 @@ def __str__(self):
', '.join(self.pulled_resource_names)


def get_json_file_from_remote(relpath, workspace_dir, verbose):
try:
with tempfile.TemporaryDirectory() as tdir:
tarpath = join(tdir, 'test.tgz')
cmd = [actions.GIT_EXE_PATH, 'archive', '-o', tarpath, '--remote=origin',
'refs/heads/master', relpath]
actions.call_subprocess(cmd, workspace_dir, verbose)
with tarfile.open(name=tarpath) as tf:
tf.extract(relpath, path=tdir)
with open(join(tdir, relpath), 'r') as f:
return json.load(f)
except Exception as e:
raise ConfigurationError("Problem retrieving file %s from remote"%relpath) from e

def get_resouces_file_from_git_origin(workspace_dir, verbose):
def get_resources_file_from_git_origin(workspace_dir, verbose):
"""We want to read the resources.json file from the remote without pulling or fetching.
We can do that by creating an archive with just the resources.json file.
"""
Expand All @@ -116,28 +103,27 @@ def pull_command(workspace_dir, batch=False, verbose=False,
if not only_workspace:
current_resources = CurrentResources.read_current_resources(workspace_dir,
batch, verbose)
# temporary workaround for issue #30 - skip check for added remote resources
#remote_resources_json = get_resouces_file_from_git_origin(workspace_dir, verbose)
remote_resources_json = get_resources_file_from_git_origin(workspace_dir,
verbose)
pulled_resource_names = []
for name in get_resources_to_process(current_resources, only, skip):
r = current_resources.by_name[name]
plan.append(PullResource(ns, verbose, r))
pulled_resource_names.append(name)
plan.append(PullWorkspace(ns, verbose, workspace_dir))
gitignore_path = None
# temporary workaround for issue #30 - skip check for added remote resources
# for resource_json in remote_resources_json:
# if current_resources.is_a_current_name(resource_json['name']):
# continue
# # resouce not local, was added to the remote workspace
# add_remote_action = AddRemoteResource(ns, verbose, batch, workspace_dir, resource_json)
# plan.append(add_remote_action)
# plan.append(UpdateLocalParams(ns, verbose, add_remote_action.r, workspace_dir))
# add_to_gi = add_local_dir_to_gitignore_if_needed(ns, verbose, add_remote_action.r,
# workspace_dir)
# if add_to_gi:
# plan.append(add_to_gi)
# gitignore_path = add_to_gi.gitignore_path
for resource_json in remote_resources_json:
if current_resources.is_a_current_name(resource_json['name']):
continue
# resouce not local, was added to the remote workspace
add_remote_action = AddRemoteResource(ns, verbose, batch, workspace_dir, resource_json)
plan.append(add_remote_action)
plan.append(UpdateLocalParams(ns, verbose, add_remote_action.r, workspace_dir))
add_to_gi = add_local_dir_to_gitignore_if_needed(ns, verbose, add_remote_action.r,
workspace_dir)
if add_to_gi:
plan.append(add_to_gi)
gitignore_path = add_to_gi.gitignore_path
if gitignore_path:
plan.append(actions.GitAdd(ns, verbose, workspace_dir, [gitignore_path]))
plan.append(actions.GitCommit(ns, verbose, workspace_dir, "Added new resources to gitignore"))
Expand Down
6 changes: 4 additions & 2 deletions dataworkspaces/resources/git_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,8 +677,10 @@ def from_json_remote(self, json_data, workspace_dir, batch, verbose):
relative_path = json_data['relative_path']
local_path = join(workspace_dir, relative_path)
if not exists(local_path):
raise ConfigurationError("Subdirectory %s for resource %s not found at %s"%
(relative_path, rname, local_path))
# this subdirectory most have been created in the remote
# resource. We can just wait for the "git pull" to populate the
# the contents, but will create a placeholder so our checks pass.
os.mkdir(local_path)
if role==ResourceRoles.RESULTS:
return GitRepoResultsSubdirResource(rname, workspace_dir, relative_path,
verbose)
Expand Down
33 changes: 32 additions & 1 deletion dataworkspaces/utils/git_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
"""
Utility functions related to interacting with git
"""
from os.path import isdir, join, dirname, exists
from os.path import isdir, join, dirname, exists, isdir
from subprocess import run, PIPE
import shutil
import re
import tempfile
import json

import click

Expand Down Expand Up @@ -326,3 +329,31 @@ def validate_git_fat_in_path_if_needed(repo_dir):
find_exe('git-fat', GIT_FAT_ERRMSG,
additional_search_locations=[])


def get_remote_origin_url(repo_dir, verbose):
try:
url = call_subprocess([GIT_EXE_PATH, 'config', '--get', 'remote.origin.url'],
cwd=repo_dir, verbose=verbose)
return url.strip()
except Exception as e:
raise ConfigurationError("Problem getting remote origin from repository at %s. Do you have a remote origin configured?"%
repo_dir) from e

def get_json_file_from_remote(relpath, repo_dir, verbose):
"""Download a JSON file from the remote master, parse it,
and return it.
"""
remote_url = get_remote_origin_url(repo_dir, verbose)
tdir = None
try:
with tempfile.TemporaryDirectory() as tdir:
# Issue #30 - we wanted to use the git-archive command,
# but it is not supported by GitHub.
call_subprocess([GIT_EXE_PATH, 'clone', '--depth=1', remote_url, 'root'],
cwd=tdir, verbose=verbose)
with open(join(join(tdir, 'root'), relpath), 'r') as f:
return json.load(f)
except Exception as e:
if (tdir is not None) and isdir(tdir):
shutil.rmtree(tdir)
raise ConfigurationError("Problem retrieving file %s from remote"%relpath) from e
2 changes: 1 addition & 1 deletion tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ DATAWORKSPACES:=$(shell cd ../dataworkspaces; pwd)
help:
@echo targets are: test clean mypy help install-rclone-deb

UNIT_TESTS=test_git_utils test_move_results test_api test_git_fat_integration test_lineage_utils test_lineage test_snapshots test_jupyter_kit
UNIT_TESTS=test_git_utils test_move_results test_api test_git_fat_integration test_lineage_utils test_lineage test_snapshots test_jupyter_kit test_push_pull

MYPY_UTILS=workspace_utils.py lineage_utils.py
MYPY_APIS=lineage.py
Expand Down
15 changes: 14 additions & 1 deletion tests/test_git_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
commit_changes_in_repo, checkout_and_apply_commit,\
get_local_head_hash, commit_changes_in_repo_subdir,\
checkout_subdir_and_apply_commit, GIT_EXE_PATH,\
get_subdirectory_hash
get_subdirectory_hash, get_json_file_from_remote


def makefile(relpath, contents):
Expand Down Expand Up @@ -409,6 +409,19 @@ def test_subdir_hash(self):
self.assertTrue('Makefile' in files)


class TestMisc(unittest.TestCase):
def test_get_json_file_from_remote(self):
"""We get the data.json file in this directory from the origin repo
and then check its contents.
"""
this_dir = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
repo_dir = os.path.abspath(join(this_dir, '..'))
data = get_json_file_from_remote('tests/data.json', repo_dir, verbose=True)
keys = frozenset(data.keys())
self.assertEqual(keys, frozenset(['foo', 'bat']))
self.assertEqual(data['foo'], 'bar')
self.assertEqual(data['bat'], 3)

if __name__ == '__main__':
unittest.main()

107 changes: 107 additions & 0 deletions tests/test_push_pull.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""
Test cases related to push and pull
"""

import unittest
import sys
import os
import os.path
from os.path import join, exists
import shutil
import subprocess
import filecmp
import json

TEMPDIR=os.path.abspath(os.path.expanduser(__file__)).replace('.py', '_data')
WS_DIR=join(TEMPDIR,'workspace')
WS_ORIGIN=join(TEMPDIR, 'workspace_origin.git')
CODE2_DIR=join(WS_DIR, 'code2')
OTHER_WS_PARENT=join(TEMPDIR, 'workspace2-parent')
OTHER_WS=join(OTHER_WS_PARENT, 'workspace')
OTHER_CODE2_DIR=join(OTHER_WS, 'code2')


try:
import dataworkspaces
except ImportError:
sys.path.append(os.path.abspath(".."))

from dataworkspaces.utils.git_utils import GIT_EXE_PATH
from dataworkspaces.utils.subprocess_utils import find_exe

class BaseCase(unittest.TestCase):
def setUp(self):
if os.path.exists(TEMPDIR):
shutil.rmtree(TEMPDIR)
os.mkdir(TEMPDIR)
os.mkdir(WS_DIR)
os.mkdir(OTHER_WS_PARENT)
self.dws=find_exe("dws", "Make sure you have enabled your python virtual environment")

def tearDown(self):
if os.path.exists(TEMPDIR):
shutil.rmtree(TEMPDIR)
#pass

def _run_dws(self, dws_args, cwd=WS_DIR, env=None):
command = self.dws + ' --verbose --batch '+ ' '.join(dws_args)
print(command + (' [%s]' % cwd))
r = subprocess.run(command, cwd=cwd, shell=True, env=env)
r.check_returncode()

def _run_git(self, git_args, cwd=WS_DIR):
args = [GIT_EXE_PATH]+git_args
print(' '.join(args) + (' [%s]' % cwd))
r = subprocess.run(args, cwd=cwd)
r.check_returncode()

def _assert_files_same(self, f1, f2):
self.assertTrue(exists(f1), "Missing file %s" % f1)
self.assertTrue(exists(f2), "Missing file %s" % f2)
self.assertTrue(filecmp.cmp(f1, f2, shallow=False),
"Files %s and %s are different" % (f1, f2))

def _assert_file_contents(self, filepath, expected_contents):
with open(filepath, 'r') as f:
data = f.read()
self.assertEqual(expected_contents, data, "File %s does not contain expected data"%filepath)

def _get_resource_set(self, workspace_dir):
resource_file = join(workspace_dir, '.dataworkspace/resources.json')
with open(resource_file, 'r') as f:
data = json.load(f)
names = set()
for obj in data:
names.add(obj['name'])
return names


class TestPushPull(BaseCase):
def test_adding_git_subdirectory(self):
# create a primary ws and the origin
self._run_dws(['init',
'--create-resources=code'])
self._run_git(['init', '--bare', 'workspace_origin.git'],
cwd=TEMPDIR)
self._run_git(['remote', 'add', 'origin', WS_ORIGIN], cwd=WS_DIR)
self._run_dws(['push'])
# clone a copy
self._run_dws(['clone', WS_ORIGIN], cwd=OTHER_WS_PARENT)
# add a resource to the copy, create a file in the resource, and push
os.mkdir(OTHER_CODE2_DIR)
self._run_dws(['add', 'git', '--role=code', './code2'], cwd=OTHER_WS)
other_ws_file = join(OTHER_CODE2_DIR, 'test.txt')
with open(other_ws_file, 'w') as f:
f.write("this is a test.\n")
self._run_dws(['snapshot', 'S1'], cwd=OTHER_WS)
self._run_dws(['push'], cwd=OTHER_WS)
# pull back to the original workspace
self._run_dws(['pull'], cwd=WS_DIR)
# make sure the file was created with the same content
self._assert_files_same(other_ws_file, join(CODE2_DIR, 'test.txt'))
resources = self._get_resource_set(WS_DIR)
self.assertEqual(resources, set(['code', 'code2']))

if __name__ == '__main__':
unittest.main()

0 comments on commit 8c5ef80

Please sign in to comment.