Skip to content

Commit

Permalink
Fixes #65 - rclone remotes should not be absoluted
Browse files Browse the repository at this point in the history
  • Loading branch information
jfischer committed May 3, 2020
1 parent bd1a95c commit 1b522e6
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 36 deletions.
22 changes: 13 additions & 9 deletions dataworkspaces/dws.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,9 +447,9 @@ def local_files(ctx, role, name, compute_hash: bool, export: bool, imported: boo
+ "the target are removed if they are not present at the source. The default is 'copy'. If master is 'none', "
+ "this option has no effect.",
)
@click.argument("source", type=str)
@click.argument("remote", type=str)
@click.argument(
"dest", type=str
"local_path", type=str
) # Currently, dest is required. Later: make dest optional and use the same path as remote?
@click.pass_context
def rclone(
Expand All @@ -462,10 +462,14 @@ def rclone(
imported: bool,
master: str,
sync_mode: str,
source: str,
dest: str,
remote: str,
local_path: str,
):
"""Add an rclone-d repository as a resource to the workspace. Subcommand of ``add``"""
"""Add an rclone-d repository as a resource to the workspace. Subcommand of ``add``.
This is designed for uni-directional synchronization between a remote and a local_path.
The remote has the form remote_name:remote_path, where remote_name is an entry in your
rclone config file.
"""
ns = ctx.obj
if role is None:
if imported:
Expand All @@ -478,7 +482,7 @@ def rclone(
type=ROLE_PARAM,
)
rclone_re = r".*:.*"
if re.match(rclone_re, source) == None:
if re.match(rclone_re, remote) == None:
raise click.BadOptionUsage(
message="Source in rclone should be specified as remotename:filepath",
option_name="source",
Expand All @@ -495,15 +499,15 @@ def rclone(
raise click.BadOptionUsage(
message="--imported only for source-data roles", option_name="imported"
)
dest = abspath(expanduser(dest))
local_path = abspath(expanduser(local_path))
workspace = find_and_load_workspace(ns.batch, ns.verbose, ns.workspace_dir)
add_command(
"rclone",
role,
name,
workspace,
source,
dest,
remote,
local_path,
config,
compute_hash,
export,
Expand Down
31 changes: 9 additions & 22 deletions dataworkspaces/resources/rclone_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from dataworkspaces.third_party.rclone import RClone
from dataworkspaces.utils.param_utils import (
ParamType,
ParamParseError,
ParamValidationError,
StringType,
EnumType,
Expand All @@ -41,20 +40,8 @@

class RemoteOriginType(ParamType):
"""Custom param type for maintaining the remote origin in the form
remote:abspath.
remote_name:path. Path does not need to be absolute
"""

def parse(self, str_value: str) -> str:
if ":" not in str_value:
raise ParamParseError(
"Remote origin '%s' is missing a ':', should be of the form remote:path" % str_value
)
(remote_name, rpath) = str_value.split(":")
if os.path.isabs(rpath):
return str_value
else:
return remote_name + ":" + os.path.abspath(rpath)

def validate(self, value: Any) -> None:
if not isinstance(value, str):
raise ParamValidationError("Remote origin '%s' is not a string" % repr(value))
Expand Down Expand Up @@ -107,7 +94,7 @@ def __init__(
ptype=RemoteOriginType(),
)
self.remote_origin = self.param_defs.get("remote_origin", remote_origin) # type: str
(self.remote_name, remote_path) = self.remote_origin.split(":")
(self.remote_name, _) = self.remote_origin.split(":")
self.param_defs.define(
"config",
default_value=None,
Expand Down Expand Up @@ -229,15 +216,15 @@ def __str__(self):


class RcloneFactory(ResourceFactory):
def _add_prechecks(self, local_path, remote_path, config) -> RClone:
def _add_prechecks(self, local_path, remote_origin, config) -> RClone:
if os.path.exists(local_path) and not (os.access(local_path, os.W_OK)):
raise ConfigurationError(local_path + " does not have write permission")
if config:
rclone = RClone(cfgfile=config)
else:
rclone = RClone()
known_remotes = rclone.listremotes()
(remote_name, _) = remote_path.split(":")
(remote_name, _) = remote_origin.split(":")
if remote_name not in known_remotes:
raise ConfigurationError("Remote '" + remote_name + "' not found by rclone")
return rclone
Expand Down Expand Up @@ -286,7 +273,7 @@ def from_command_line(
role,
name,
workspace,
remote_path,
remote_origin,
local_path,
config,
compute_hash,
Expand All @@ -295,8 +282,8 @@ def from_command_line(
master,
sync_mode,
):
rclone = self._add_prechecks(local_path, remote_path, config)
self._copy_from_remote(name, local_path, remote_path, rclone, master, sync_mode)
rclone = self._add_prechecks(local_path, remote_origin, config)
self._copy_from_remote(name, local_path, remote_origin, rclone, master, sync_mode)
setup_path_for_hashes(role, name, workspace, local_path)
if imported:
lineage_path = os.path.join(local_path, "lineage.json")
Expand Down Expand Up @@ -326,7 +313,7 @@ def from_command_line(
name,
role,
workspace,
remote_path,
remote_origin,
global_local_path=local_path,
my_local_path=None,
config=config,
Expand Down Expand Up @@ -421,7 +408,7 @@ def suggest_name(
self,
workspace,
role,
remote_path,
remote_origin,
local_path,
config,
compute_hash,
Expand Down
5 changes: 3 additions & 2 deletions dataworkspaces/third_party/rclone.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __init__(self, cfgfile=None, cfgstring=None):
else:
# find the default config file used by the rclone installation
ret = self._execute(['rclone', 'config', 'file'])
print(ret)
self.log.debug(ret)
if ret['code'] == 0:
# rclone config file output looks like:
#
Expand All @@ -60,6 +60,7 @@ def __init__(self, cfgfile=None, cfgstring=None):
# so we skip until the '\n'
self.cfgfile = ret['out'].splitlines()[1].decode('utf_8')
else:
print(ret)
raise ConfigurationError("RClone requires either a configuration file or a configuration string")

assert(self.cfgstring or self.cfgfile), 'Either a config string is given or a filename is given'
Expand Down Expand Up @@ -167,7 +168,7 @@ def listremotes(self, flags=[]):
ret = self.run_cmd(command="listremotes", extra_args=flags)
if ret['code'] == 0:
list_remotes = map((lambda s: s[0:-1].decode('utf_8')), ret['out'].split(b'\n'))
print(list_remotes)
#print(list_remotes)
return list(list_remotes)[0:-1]
else:
raise RCloneException('listremotes returns %d %s' % (ret['code'], ret['error']))
Expand Down
51 changes: 48 additions & 3 deletions tests/test_rclone.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,38 @@
from utils_for_tests import BaseCase, WS_DIR, TEMPDIR
from dataworkspaces.lineage import LineageBuilder
from dataworkspaces.api import get_snapshot_history
from dataworkspaces.utils.subprocess_utils import find_exe

# If set to True, by --keep-outputs option, leave the output data
KEEP_OUTPUTS = False

MASTER_DIR=join(TEMPDIR, 'rclone-master')
RESOURCE_DIR=join(WS_DIR, 'rclone-resource')

REAL_REMOTE='dws-test:/dws-test.benedat'

def rclone_found():
try:
find_exe('rclone', "need to install rclone")
return True
except:
return False

def remote_exists(remote_name):
from dataworkspaces.third_party.rclone import RClone
rc = RClone()
return remote_name in rc.listremotes()

@unittest.skipUnless(rclone_found(), "SKIP: rclone executable not found")
class TestRclone(BaseCase):
def tearDown(self):
if not KEEP_OUTPUTS:
super().tearDown()


def _get_rclone(self):
from dataworkspaces.third_party.rclone import RClone
return RClone()

def _assert_file(self, base_path, rel_path, filesize):
path = join(base_path, rel_path)
self.assertTrue(exists(path), "Local file %s is missing"%path)
Expand Down Expand Up @@ -99,10 +119,10 @@ def test_copy_remote_is_master(self):
self.assertEqual(len(history), 2)
snap1 = history[0]
self.assertEqual(['tag1'], snap1.tags)
self.assertEqual('85abb55d8da6a137da75cc05056377572426cebf', snap1.hashval)
self.assertEqual('9162a3c2825841ee9426c28089da4901986bb226', snap1.hashval)
snap2 = history[1]
self.assertEqual(['tag2'], snap2.tags)
self.assertEqual('59990f102b71c7a3755163c0fc2ed8db05cfa532', snap2.hashval)
self.assertEqual('e8ea515f179a08479caafb7c7da05ce18525cbd0', snap2.hashval)

def test_sync_remote_is_master(self):
"""Will pull changes down from master in sync mode.
Expand Down Expand Up @@ -194,6 +214,31 @@ def test_no_master_initial_copy(self):
self._assert_initial_state(RESOURCE_DIR)
self._run_dws(['snapshot', 'tag2'])

@unittest.skipUnless(remote_exists('dws-test'), "SKIP: rclone config file does not contain dws-test remote")
def test_real_remote(self):
"""Test with a real remote that is configured in the rclone config file as dws-test.
We use the sync with remote master scenario."""
self._setup_initial_repo()
# the master is not tied to our workspace, we sync it to the real master using rclone directly
os.mkdir(MASTER_DIR)
self._init_files(MASTER_DIR)
rclone = self._get_rclone()
print("Running initial sync...")
ret = rclone.sync(MASTER_DIR, REAL_REMOTE, flags=['--verbose'])
self.assertEqual(ret['code'], 0, "Return from rclone sync bad: %s"% repr(ret))
print("Sync was successful")
self._run_dws(['add', 'rclone','--role', 'source-data', '--sync-mode=sync', '--master=remote', REAL_REMOTE,
RESOURCE_DIR])
self._assert_initial_state(RESOURCE_DIR)
self._run_dws(['snapshot', 'tag1'])

self._update_files(MASTER_DIR)
ret = rclone.sync(MASTER_DIR, REAL_REMOTE)
self.assertEqual(ret['code'], 0, "Return from rclone sync bad: %s"% repr(ret))
self._run_dws(['pull'])
self._assert_final_state_sync(RESOURCE_DIR)
self._run_dws(['snapshot', 'tag2'])


if __name__ == "__main__":
if len(sys.argv) > 1 and sys.argv[1] == "--keep-outputs":
Expand Down

0 comments on commit 1b522e6

Please sign in to comment.