diff --git a/.travis.yml b/.travis.yml index b51e1cf7eb..e00ff995f2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -67,10 +67,12 @@ matrix: env: - DATALAD_REPO_VERSION=6 - NOSE_SELECTION_OP=not + - DATALAD_SSH_MULTIPLEX__CONNECTIONS=0 - python: 3.5 env: - DATALAD_REPO_VERSION=6 - NOSE_SELECTION_OP="" + - DATALAD_SSH_MULTIPLEX__CONNECTIONS=0 # To test https://github.com/datalad/datalad/pull/4342 fix in case of no "not" for NOSE. # From our testing in that PR seems to have no effect, but kept around since should not hurt. - LANG=bg_BG.UTF-8 diff --git a/datalad/interface/common_cfg.py b/datalad/interface/common_cfg.py index 04f9f42f9d..fbde1282ad 100644 --- a/datalad/interface/common_cfg.py +++ b/datalad/interface/common_cfg.py @@ -21,6 +21,7 @@ from datalad.support.constraints import EnsureChoice from datalad.support.constraints import EnsureListOf from datalad.support.constraints import EnsureStr +from datalad.utils import on_windows dirs = AppDirs("datalad", "datalad.org") @@ -268,6 +269,13 @@ 'destination': 'global', 'default': None, }, + 'datalad.ssh.multiplex-connections': { + 'ui': ('question', { + 'title': "Whether to use a single shared connection for multiple SSH processes aiming at the same target."}), + 'destination': 'global', + 'default': not on_windows, + 'type': EnsureBool(), + }, 'datalad.annex.retry': { 'ui': ('question', {'title': 'Value for annex.retry to use for git-annex calls', diff --git a/datalad/support/annexrepo.py b/datalad/support/annexrepo.py index d343f92744..dd5c9aa3f2 100644 --- a/datalad/support/annexrepo.py +++ b/datalad/support/annexrepo.py @@ -413,6 +413,9 @@ def _set_shared_connection(self, remote_name, url): remote_name: str url: str """ + if not self.config.obtain('datalad.ssh.multiplex-connections'): + return + from datalad.support.network import is_ssh # Note: # diff --git a/datalad/support/gitrepo.py b/datalad/support/gitrepo.py index e6c11a2dd4..1660d9d44b 100644 --- a/datalad/support/gitrepo.py +++ b/datalad/support/gitrepo.py @@ -951,7 +951,7 @@ def __init__(self, path, url=None, runner=None, create=True, # So that we "share" control paths with git/git-annex if ssh_manager: - ssh_manager.assure_initialized() + ssh_manager.ensure_initialized() # note: we may also want to distinguish between a path to the worktree # and the actual repository diff --git a/datalad/support/sshconnector.py b/datalad/support/sshconnector.py index 396f7772c4..8548b9c279 100644 --- a/datalad/support/sshconnector.py +++ b/datalad/support/sshconnector.py @@ -75,11 +75,10 @@ def get_connection_hash(hostname, port='', username='', identity_file='', @auto_repr -class SSHConnection(object): - """Representation of a (shared) ssh connection. +class BaseSSHConnection(object): + """Representation of an SSH connection. """ - - def __init__(self, ctrl_path, sshri, identity_file=None, + def __init__(self, sshri, identity_file=None, use_remote_annex_bundle=True, force_ip=False): """Create a connection handler @@ -87,8 +86,6 @@ def __init__(self, ctrl_path, sshri, identity_file=None, Parameters ---------- - ctrl_path: str - path to SSH controlmaster sshri: SSHRI SSH resource identifier (contains all connection-relevant info), or another resource identifier that can be converted into an SSHRI. @@ -110,59 +107,37 @@ def __init__(self, ctrl_path, sshri, identity_file=None, "connections: {}".format(sshri)) self.sshri = SSHRI(**{k: v for k, v in sshri.fields.items() if k in ('username', 'hostname', 'port')}) - # on windows cmd args lists are always converted into a string using appropriate - # quoting rules, on other platforms args lists are passed directly and we need - # to take care of quoting ourselves - ctrlpath_arg = "ControlPath={}".format(ctrl_path if on_windows else sh_quote(str(ctrl_path))) - self._ssh_args = ["-o", ctrlpath_arg] - self.ctrl_path = Path(ctrl_path) - if self.sshri.port: - self._ssh_args += ['-p', '{}'.format(self.sshri.port)] - + # arguments only used for opening a connection + self._ssh_open_args = [] + # arguments for annex ssh invocation + self._ssh_args = [] + self._ssh_open_args.extend( + ['-p', '{}'.format(self.sshri.port)] if self.sshri.port else []) if force_ip: - self._ssh_args.append("-{}".format(force_ip)) - self._identity_file = identity_file - self._use_remote_annex_bundle = use_remote_annex_bundle + self._ssh_open_args.append("-{}".format(force_ip)) + if identity_file: + self._ssh_open_args.extend(["-i", identity_file]) + self._use_remote_annex_bundle = use_remote_annex_bundle # essential properties of the remote system self._remote_props = {} - self._opened_by_us = False def __call__(self, cmd, options=None, stdin=None, log_output=True): - """Executes a command on the remote. + raise NotImplementedError - It is the callers responsibility to properly quote commands - for remote execution (e.g. filename with spaces of other special - characters). Use the `sh_quote()` from the module for this purpose. - - Parameters - ---------- - cmd: str - command to run on the remote - options : list of str, optional - Additional options to pass to the `-o` flag of `ssh`. Note: Many - (probably most) of the available configuration options should not be - set here because they can critically change the properties of the - connection. This exists to allow options like SendEnv to be set. + def open(self): + raise NotImplementedError - Returns - ------- - tuple of str - stdout, stderr of the command run. - """ + def close(self): + raise NotImplementedError - # XXX: check for open socket once - # and provide roll back if fails to run and was not explicitly - # checked first - # MIH: this would mean that we would have to distinguish failure - # of a payload command from failure of SSH itself. SSH however, - # only distinguishes success and failure of the entire operation - # Increase in fragility from introspection makes a potential - # performance benefit a questionable improvement. - # make sure we have an open connection, will test if action is needed - # by itself - self.open() + @property + def runner(self): + if self._runner is None: + self._runner = Runner() + return self._runner + def _adjust_cmd_for_bundle_execution(self, cmd): # locate annex and set the bundled vs. system Git machinery in motion if self._use_remote_annex_bundle: remote_annex_installdir = self.get_annex_installdir() @@ -171,23 +146,27 @@ def __call__(self, cmd, options=None, stdin=None, log_output=True): cmd = '{}; {}'.format( 'export "PATH={}:$PATH"'.format(remote_annex_installdir), cmd) + return cmd + + def _exec_ssh(self, ssh_cmd, cmd, options=None, stdin=None, log_output=True): + cmd = self._adjust_cmd_for_bundle_execution(cmd) + + for opt in options or []: + ssh_cmd.extend(["-o", opt]) # build SSH call, feed remote command as a single last argument # whatever it contains will go to the remote machine for execution # we cannot perform any sort of escaping, because it will limit # what we can do on the remote, e.g. concatenate commands with '&&' - ssh_cmd = ["ssh"] + self._ssh_args - for opt in options or []: - ssh_cmd.extend(["-o", opt]) - - ssh_cmd += [self.sshri.as_str()] \ - + [cmd] + ssh_cmd += [self.sshri.as_str()] + [cmd] kwargs = dict( log_stdout=log_output, log_stderr=log_output, log_online=not log_output ) + lgr.debug("%s is used to run %s", self, ssh_cmd) + # TODO: pass expect parameters from above? # Hard to explain to toplevel users ... So for now, just set True return self.runner.run( @@ -197,119 +176,6 @@ def __call__(self, cmd, options=None, stdin=None, log_output=True): stdin=stdin, **kwargs) - @property - def runner(self): - if self._runner is None: - self._runner = Runner() - return self._runner - - def is_open(self): - if not self.ctrl_path.exists(): - lgr.log( - 5, - "Not opening %s for checking since %s does not exist", - self, self.ctrl_path - ) - return False - # check whether controlmaster is still running: - cmd = ["ssh", "-O", "check"] + self._ssh_args + [self.sshri.as_str()] - lgr.debug("Checking %s by calling %s" % (self, cmd)) - try: - # expect_stderr since ssh would announce to stderr - # "Master is running" and that is normal, not worthy warning about - # etc -- we are doing the check here for successful operation - with tempfile.TemporaryFile() as tempf: - out, err = self.runner.run(cmd, stdin=tempf, expect_stderr=True) - res = True - except CommandError as e: - if e.code != 255: - # this is not a normal SSH error, whine ... - raise e - # SSH died and left socket behind, or server closed connection - self.close() - res = False - lgr.debug( - "Check of %s has %s", - self, - {True: 'succeeded', False: 'failed'}[res]) - return res - - def open(self): - """Opens the connection. - - In other words: Creates the SSH ControlMaster to be used by this - connection, if it is not there already. - - Returns - ------- - bool - True when SSH reports success opening the connection, False when - a ControlMaster for an open connection already exists. - - Raises - ------ - ConnectionOpenFailedError - When starting the SSH ControlMaster process failed. - """ - # the socket should vanish almost instantly when the connection closes - # sending explicit 'check' commands to the control master is expensive - # (needs tempfile to shield stdin, Runner overhead, etc...) - # as we do not use any advanced features (forwarding, stop[ing the - # master without exiting) it should be relatively safe to just perform - # the much cheaper check of an existing control path - if self.ctrl_path.exists(): - return False - - # set control options - ctrl_options = ["-fN", - "-o", "ControlMaster=auto", - "-o", "ControlPersist=15m"] + self._ssh_args - if self._identity_file: - ctrl_options.extend(["-i", self._identity_file]) - # create ssh control master command - cmd = ["ssh"] + ctrl_options + [self.sshri.as_str()] - - # start control master: - lgr.debug("Opening %s by calling %s" % (self, cmd)) - proc = Popen(cmd) - stdout, stderr = proc.communicate(input="\n") # why the f.. this is necessary? - - # wait till the command exits, connection is conclusively - # open or not at this point - exit_code = proc.wait() - - if exit_code != 0: - raise ConnectionOpenFailedError( - cmd, - 'Failed to open SSH connection (could not start ControlMaster process)', - exit_code, - stdout, - stderr, - ) - self._opened_by_us = True - return True - - def close(self): - """Closes the connection. - """ - if not self._opened_by_us: - lgr.debug("Not closing %s since was not opened by itself", self) - return - # stop controlmaster: - cmd = ["ssh", "-O", "stop"] + self._ssh_args + [self.sshri.as_str()] - lgr.debug("Closing %s by calling %s", self, cmd) - try: - self.runner.run(cmd, expect_stderr=True, expect_fail=True) - except CommandError as e: - lgr.debug("Failed to run close command") - if self.ctrl_path.exists(): - lgr.debug("Removing existing control path %s", self.ctrl_path) - # socket need to go in any case - self.ctrl_path.unlink() - if e.code != 255: - # not a "normal" SSH error - raise e - def _get_scp_command_spec(self, recursive, preserve_attrs): """Internal helper for SCP interface methods""" # Convert ssh's port flag (-p) to scp's (-P). @@ -450,7 +316,345 @@ def get_git_version(self): @auto_repr -class SSHManager(object): +class NoMultiplexSSHConnection(BaseSSHConnection): + """Representation of an SSH connection. + + The connection is opened for execution of a single process, and closed + as soon as the process end. + """ + def __init__(self, sshri, **kwargs): + """Create a connection handler + + The actual opening of the connection is performed on-demand. + + Parameters + ---------- + sshri: SSHRI + SSH resource identifier (contains all connection-relevant info), + or another resource identifier that can be converted into an SSHRI. + **kwargs + Pass on to BaseSSHConnection + """ + super().__init__(sshri, **kwargs) + self._ssh_open_args += [ + # we presently do not support any interactive authentication + # at the time of process execution + '-o', 'PasswordAuthentication=no', + '-o', 'KbdInteractiveAuthentication=no', + ] + + def __call__(self, cmd, options=None, stdin=None, log_output=True): + """Executes a command on the remote. + + It is the callers responsibility to properly quote commands + for remote execution (e.g. filename with spaces of other special + characters). Use the `sh_quote()` from the module for this purpose. + + Parameters + ---------- + cmd: str + command to run on the remote + options : list of str, optional + Additional options to pass to the `-o` flag of `ssh`. Note: Many + (probably most) of the available configuration options should not be + set here because they can critically change the properties of the + connection. This exists to allow options like SendEnv to be set. + + Returns + ------- + tuple of str + stdout, stderr of the command run. + """ + # there is no dedicated "open" step, put all args together + ssh_cmd = ["ssh"] + self._ssh_open_args + self._ssh_args + return self._exec_ssh( + ssh_cmd, + cmd, + options=options, + stdin=stdin, + log_output=log_output) + + def is_open(self): + return False + + def open(self): + return False + + def close(self): + # we perform blocking execution, we should not return from __call__ until + # the connection is already closed + pass + + +@auto_repr +class MultiplexSSHConnection(BaseSSHConnection): + """Representation of a (shared) ssh connection. + """ + def __init__(self, ctrl_path, sshri, **kwargs): + """Create a connection handler + + The actual opening of the connection is performed on-demand. + + Parameters + ---------- + ctrl_path: str + path to SSH controlmaster + sshri: SSHRI + SSH resource identifier (contains all connection-relevant info), + or another resource identifier that can be converted into an SSHRI. + **kwargs + Pass on to BaseSSHConnection + """ + super().__init__(sshri, **kwargs) + + # on windows cmd args lists are always converted into a string using appropriate + # quoting rules, on other platforms args lists are passed directly and we need + # to take care of quoting ourselves + ctrlpath_arg = "ControlPath={}".format(ctrl_path if on_windows else sh_quote(str(ctrl_path))) + self._ssh_args += ["-o", ctrlpath_arg] + self._ssh_open_args += [ + "-fN", + "-o", "ControlMaster=auto", + "-o", "ControlPersist=15m", + ] + self.ctrl_path = Path(ctrl_path) + self._opened_by_us = False + + def __call__(self, cmd, options=None, stdin=None, log_output=True): + """Executes a command on the remote. + + It is the callers responsibility to properly quote commands + for remote execution (e.g. filename with spaces of other special + characters). Use the `sh_quote()` from the module for this purpose. + + Parameters + ---------- + cmd: str + command to run on the remote + options : list of str, optional + Additional options to pass to the `-o` flag of `ssh`. Note: Many + (probably most) of the available configuration options should not be + set here because they can critically change the properties of the + connection. This exists to allow options like SendEnv to be set. + + Returns + ------- + tuple of str + stdout, stderr of the command run. + """ + + # XXX: check for open socket once + # and provide roll back if fails to run and was not explicitly + # checked first + # MIH: this would mean that we would have to distinguish failure + # of a payload command from failure of SSH itself. SSH however, + # only distinguishes success and failure of the entire operation + # Increase in fragility from introspection makes a potential + # performance benefit a questionable improvement. + # make sure we have an open connection, will test if action is needed + # by itself + self.open() + + ssh_cmd = ["ssh"] + self._ssh_args + return self._exec_ssh( + ssh_cmd, + cmd, + options=options, + stdin=stdin, + log_output=log_output) + + def is_open(self): + if not self.ctrl_path.exists(): + lgr.log( + 5, + "Not opening %s for checking since %s does not exist", + self, self.ctrl_path + ) + return False + # check whether controlmaster is still running: + cmd = ["ssh", "-O", "check"] + self._ssh_args + [self.sshri.as_str()] + lgr.debug("Checking %s by calling %s" % (self, cmd)) + try: + # expect_stderr since ssh would announce to stderr + # "Master is running" and that is normal, not worthy warning about + # etc -- we are doing the check here for successful operation + with tempfile.TemporaryFile() as tempf: + out, err = self.runner.run(cmd, stdin=tempf, expect_stderr=True) + res = True + except CommandError as e: + if e.code != 255: + # this is not a normal SSH error, whine ... + raise e + # SSH died and left socket behind, or server closed connection + self.close() + res = False + lgr.debug( + "Check of %s has %s", + self, + {True: 'succeeded', False: 'failed'}[res]) + return res + + def open(self): + """Opens the connection. + + In other words: Creates the SSH ControlMaster to be used by this + connection, if it is not there already. + + Returns + ------- + bool + True when SSH reports success opening the connection, False when + a ControlMaster for an open connection already exists. + + Raises + ------ + ConnectionOpenFailedError + When starting the SSH ControlMaster process failed. + """ + # the socket should vanish almost instantly when the connection closes + # sending explicit 'check' commands to the control master is expensive + # (needs tempfile to shield stdin, Runner overhead, etc...) + # as we do not use any advanced features (forwarding, stop[ing the + # master without exiting) it should be relatively safe to just perform + # the much cheaper check of an existing control path + if self.ctrl_path.exists(): + return False + + # create ssh control master command + cmd = ["ssh"] + self._ssh_open_args + self._ssh_args + [self.sshri.as_str()] + + # start control master: + lgr.debug("Opening %s by calling %s", self, cmd) + proc = Popen(cmd) + stdout, stderr = proc.communicate(input="\n") # why the f.. this is necessary? + + # wait till the command exits, connection is conclusively + # open or not at this point + exit_code = proc.wait() + + if exit_code != 0: + raise ConnectionOpenFailedError( + cmd, + 'Failed to open SSH connection (could not start ControlMaster process)', + exit_code, + stdout, + stderr, + ) + self._opened_by_us = True + return True + + def close(self): + """Closes the connection. + """ + if not self._opened_by_us: + lgr.debug("Not closing %s since was not opened by itself", self) + return + # stop controlmaster: + cmd = ["ssh", "-O", "stop"] + self._ssh_args + [self.sshri.as_str()] + lgr.debug("Closing %s by calling %s", self, cmd) + try: + self.runner.run(cmd, expect_stderr=True, expect_fail=True) + except CommandError as e: + lgr.debug("Failed to run close command") + if self.ctrl_path.exists(): + lgr.debug("Removing existing control path %s", self.ctrl_path) + # socket need to go in any case + self.ctrl_path.unlink() + if e.code != 255: + # not a "normal" SSH error + raise e + + +@auto_repr +class BaseSSHManager(object): + """Interface for an SSHManager + """ + def ensure_initialized(self): + """Ensures that manager is initialized""" + pass + + assure_initialized = ensure_initialized + + def get_connection(self, url, use_remote_annex_bundle=True, force_ip=False): + """Get an SSH connection handler + + Parameters + ---------- + url: str + ssh url + force_ip : {False, 4, 6} + Force the use of IPv4 or IPv6 addresses. + + Returns + ------- + BaseSSHConnection + """ + raise NotImplementedError + + def _prep_connection_args(self, url): + # parse url: + from datalad.support.network import RI, is_ssh + if isinstance(url, RI): + sshri = url + else: + if ':' not in url and '/' not in url: + # it is just a hostname + lgr.debug("Assuming %r is just a hostname for ssh connection", + url) + url += ':' + sshri = RI(url) + + if not is_ssh(sshri): + raise ValueError("Unsupported SSH URL: '{0}', use " + "ssh://host/path or host:path syntax".format(url)) + + from datalad import cfg + identity_file = cfg.get("datalad.ssh.identityfile") + return sshri, identity_file + + def close(self, allow_fail=True): + """Closes all connections, known to this instance. + + Parameters + ---------- + allow_fail: bool, optional + If True, swallow exceptions which might be thrown during + connection.close, and just log them at DEBUG level + """ + pass + + +@auto_repr +class NoMultiplexSSHManager(BaseSSHManager): + """Does not "manage" and just returns a new connection + """ + + def get_connection(self, url, use_remote_annex_bundle=True, force_ip=False): + """Get a singleton, representing a shared ssh connection to `url` + + Parameters + ---------- + url: str + ssh url + force_ip : {False, 4, 6} + Force the use of IPv4 or IPv6 addresses. + + Returns + ------- + SSHConnection + """ + sshri, identity_file = self._prep_connection_args(url) + + return NoMultiplexSSHConnection( + sshri, + identity_file=identity_file, + use_remote_annex_bundle=use_remote_annex_bundle, + force_ip=force_ip, + ) + + +@auto_repr +class MultiplexSSHManager(BaseSSHManager): """Keeps ssh connections to share. Serves singleton representation per connection. @@ -460,6 +664,7 @@ class SSHManager(object): """ def __init__(self): + super().__init__() self._socket_dir = None self._connections = dict() # Initialization of prev_connections is happening during initial @@ -467,23 +672,22 @@ def __init__(self): # to an empty list to fail if logic is violated self._prev_connections = None # and no explicit initialization in the constructor - # self.assure_initialized() + # self.ensure_initialized() @property def socket_dir(self): """Return socket_dir, and if was not defined before, and also pick up all previous connections (if any) """ - self.assure_initialized() + self.ensure_initialized() return self._socket_dir - def assure_initialized(self): + def ensure_initialized(self): """Assures that manager is initialized - knows socket_dir, previous connections """ if self._socket_dir is not None: return - from ..config import ConfigManager - cfg = ConfigManager() + from datalad import cfg self._socket_dir = \ Path(cfg.obtain('datalad.locations.cache')) / 'sockets' self._socket_dir.mkdir(exist_ok=True, parents=True) @@ -513,6 +717,7 @@ def assure_initialized(self): lgr.log(5, "Found %d previous connections", len(self._prev_connections)) + assure_initialized = ensure_initialized def get_connection(self, url, use_remote_annex_bundle=True, force_ip=False): """Get a singleton, representing a shared ssh connection to `url` @@ -528,24 +733,7 @@ def get_connection(self, url, use_remote_annex_bundle=True, force_ip=False): ------- SSHConnection """ - # parse url: - from datalad.support.network import RI, is_ssh - if isinstance(url, RI): - sshri = url - else: - if ':' not in url and '/' not in url: - # it is just a hostname - lgr.debug("Assuming %r is just a hostname for ssh connection", - url) - url += ':' - sshri = RI(url) - - if not is_ssh(sshri): - raise ValueError("Unsupported SSH URL: '{0}', use " - "ssh://host/path or host:path syntax".format(url)) - - from datalad import cfg - identity_file = cfg.get("datalad.ssh.identityfile") + sshri, identity_file = self._prep_connection_args(url) conhash = get_connection_hash( sshri.hostname, @@ -562,7 +750,7 @@ def get_connection(self, url, use_remote_annex_bundle=True, force_ip=False): if ctrl_path in self._connections: return self._connections[ctrl_path] else: - c = SSHConnection( + c = MultiplexSSHConnection( ctrl_path, sshri, identity_file=identity_file, use_remote_annex_bundle=use_remote_annex_bundle, force_ip=force_ip) @@ -604,6 +792,17 @@ def close(self, allow_fail=True, ctrl_path=None): self._connections = dict() +# retain backward compat with 0.13.4 and earlier +# should be ok since cfg already defined by the time this one is imported +from .. import cfg +if cfg.obtain('datalad.ssh.multiplex-connections'): + SSHManager = MultiplexSSHManager + SSHConnection = MultiplexSSHConnection +else: + SSHManager = NoMultiplexSSHManager + SSHConnection = NoMultiplexSSHConnection + + def _quote_filename_for_scp(name): """Manually escape shell goodies in a file name. diff --git a/datalad/support/tests/test_annexrepo.py b/datalad/support/tests/test_annexrepo.py index 92b98966af..2a6a03c4f1 100644 --- a/datalad/support/tests/test_annexrepo.py +++ b/datalad/support/tests/test_annexrepo.py @@ -88,6 +88,7 @@ skip_if, skip_if_on_windows, skip_if_root, + skip_nomultiplex_ssh, skip_ssh, SkipTest, slow, @@ -1118,7 +1119,7 @@ def test_annex_backends(path): eq_(repo.default_backends, ['MD5E']) -@skip_ssh +@skip_nomultiplex_ssh # too much of "multiplex" testing @with_tempfile(mkdir=True) def test_annex_ssh(topdir): # On Xenial, this hangs with a recent git-annex. It bisects to git-annex's diff --git a/datalad/support/tests/test_gitrepo.py b/datalad/support/tests/test_gitrepo.py index 69094f219e..2e92209129 100644 --- a/datalad/support/tests/test_gitrepo.py +++ b/datalad/support/tests/test_gitrepo.py @@ -49,6 +49,7 @@ ok_, skip_if_no_network, skip_if_on_windows, + skip_nomultiplex_ssh, skip_ssh, SkipTest, slow, @@ -531,7 +532,7 @@ def _path2localsshurl(path): # broken,possibly due to a GitPy issue with windows sshurls # see https://github.com/datalad/datalad/pull/3638 @skip_if_on_windows -@skip_ssh +@skip_nomultiplex_ssh @with_testrepos('.*basic.*', flavors=['local']) @with_tempfile def test_GitRepo_ssh_fetch(remote_path, repo_path): @@ -565,7 +566,7 @@ def test_GitRepo_ssh_fetch(remote_path, repo_path): # broken,possibly due to a GitPy issue with windows sshurls # see https://github.com/datalad/datalad/pull/3638 @skip_if_on_windows -@skip_ssh +@skip_nomultiplex_ssh @with_tempfile @with_tempfile def test_GitRepo_ssh_pull(remote_path, repo_path): @@ -604,7 +605,7 @@ def test_GitRepo_ssh_pull(remote_path, repo_path): # broken,possibly due to a GitPy issue with windows sshurls # see https://github.com/datalad/datalad/pull/3638 @skip_if_on_windows -@skip_ssh +@skip_nomultiplex_ssh @with_tempfile @with_tempfile def test_GitRepo_ssh_push(repo_path, remote_path): diff --git a/datalad/support/tests/test_sshconnector.py b/datalad/support/tests/test_sshconnector.py index 51e8328d37..aaa40b3427 100644 --- a/datalad/support/tests/test_sshconnector.py +++ b/datalad/support/tests/test_sshconnector.py @@ -32,28 +32,43 @@ ok_, patch_config, skip_if_on_windows, + skip_nomultiplex_ssh, skip_ssh, swallow_logs, with_tempfile, with_tree, ) from datalad import cfg as dl_cfg -from ..sshconnector import SSHConnection, SSHManager, sh_quote +from ..sshconnector import ( + SSHConnection, + SSHManager, + MultiplexSSHManager, + MultiplexSSHConnection, + NoMultiplexSSHConnection, + sh_quote, +) from ..sshconnector import get_connection_hash +# Some tests test the internals and assumptions of multiplex connections +_ssh_manager_is_multiplex = SSHManager is MultiplexSSHManager + @skip_ssh def test_ssh_get_connection(): manager = SSHManager() - assert manager._socket_dir is None, \ - "Should be unset upon initialization. Got %s" % str(manager._socket_dir) + if _ssh_manager_is_multiplex: + assert manager._socket_dir is None, \ + "Should be unset upon initialization. Got %s" % str(manager._socket_dir) c1 = manager.get_connection('ssh://datalad-test') - assert manager._socket_dir, "Should be set after interactions with the manager" - assert_is_instance(c1, SSHConnection) - # subsequent call returns the very same instance: - ok_(manager.get_connection('ssh://datalad-test') is c1) + if _ssh_manager_is_multiplex: + assert manager._socket_dir, "Should be set after interactions with the manager" + assert_is_instance(c1, MultiplexSSHConnection) + # subsequent call returns the very same instance: + ok_(manager.get_connection('ssh://datalad-test') is c1) + else: + assert_is_instance(c1, NoMultiplexSSHConnection) # fail on malformed URls (meaning: our fancy URL parser can't correctly # deal with them): @@ -85,15 +100,18 @@ def test_ssh_open_close(tmp_path, tfile1): manager = SSHManager() - path = opj(str(manager.socket_dir), - get_connection_hash('datalad-test', bundled=True)) - # TODO: facilitate the test when it didn't exist - existed_before = exists(path) + socket_path = None + if _ssh_manager_is_multiplex: + socket_path = opj(str(manager.socket_dir), + get_connection_hash('datalad-test', bundled=True)) + # TODO: facilitate the test when it didn't exist + existed_before = exists(socket_path) c1 = manager.get_connection('ssh://datalad-test') c1.open() - # control master exists for sure now - ok_(exists(path)) + if socket_path: + # control master exists for sure now + ok_(exists(socket_path)) # use connection to execute remote command: # we list explicitly local HOME since we override it in module_setup @@ -105,7 +123,8 @@ def test_ssh_open_close(tmp_path, tfile1): remote_ls = [entry for entry in out.splitlines() if entry != '.' and entry != '..'] eq_(set(remote_ls), {"f0", "f1"}) - ok_(exists(path)) + if socket_path: + ok_(exists(socket_path)) # now test for arguments containing spaces and other pleasant symbols out, err = c1('ls -l {}'.format(sh_quote(tfile1))) @@ -116,12 +135,12 @@ def test_ssh_open_close(tmp_path, tfile1): #eq_(err, '') c1.close() - # control master doesn't exist anymore: - ok_(exists(path) == existed_before) + if socket_path: + # control master doesn't exist anymore: + ok_(exists(socket_path) == existed_before) -@skip_if_on_windows -@skip_ssh +@skip_nomultiplex_ssh def test_ssh_manager_close(): manager = SSHManager() @@ -159,7 +178,7 @@ def test_ssh_manager_close(): @with_tempfile def test_ssh_manager_close_no_throw(bogus_socket): - manager = SSHManager() + manager = MultiplexSSHManager() class bogus: def close(self): @@ -202,8 +221,9 @@ def test_ssh_copy(sourcedir, sourcefile1, sourcefile2): # copy tempfile list to remote_url:sourcedir sourcefiles = [sourcefile1, sourcefile2, obscure_file] ssh.put(sourcefiles, opj(remote_url, sourcedir)) - # docs promise that connection is auto-opened - ok_(ssh.is_open()) + # docs promise that connection is auto-opened in case of multiplex + if _ssh_manager_is_multiplex: + ok_(ssh.is_open()) # recursive copy tempdir to remote_url:targetdir targetdir = sourcedir + '.c opy' @@ -245,7 +265,7 @@ def test_ssh_compound_cmds(): @skip_if_on_windows -@skip_ssh +@skip_nomultiplex_ssh def test_ssh_close_target(): manager = SSHManager() path0 = manager.socket_dir / get_connection_hash( @@ -278,11 +298,12 @@ def test_ssh_custom_identity_file(): manager = SSHManager() ssh = manager.get_connection('ssh://datalad-test') cmd_out, _ = ssh("echo blah") - expected_socket = op.join( - str(manager.socket_dir), - get_connection_hash("datalad-test", identity_file=ifile, - bundled=True)) - ok_(exists(expected_socket)) + if _ssh_manager_is_multiplex: + expected_socket = op.join( + str(manager.socket_dir), + get_connection_hash("datalad-test", identity_file=ifile, + bundled=True)) + ok_(exists(expected_socket)) manager.close() assert_in("-i", cml.out) assert_in(ifile, cml.out) diff --git a/datalad/tests/utils.py b/datalad/tests/utils.py index 703886204d..476532b4d1 100644 --- a/datalad/tests/utils.py +++ b/datalad/tests/utils.py @@ -272,6 +272,25 @@ def _wrap_skip_ssh(*args, **kwargs): return _wrap_skip_ssh +def skip_nomultiplex_ssh(func): + """Skips SSH tests if default connection/manager does not support multiplexing + + e.g. currently on windows or if set via datalad.ssh.multiplex-connections config variable + """ + + check_not_generatorfunction(func) + from ..support.sshconnector import MultiplexSSHManager, SSHManager + + @wraps(func) + @attr('skip_nomultiplex_ssh') + @skip_ssh + def _wrap_skip_nomultiplex_ssh(*args, **kwargs): + if SSHManager is not MultiplexSSHManager: + raise SkipTest("SSH without multiplexing is used") + return func(*args, **kwargs) + return _wrap_skip_nomultiplex_ssh + + @optional_args def skip_v6_or_later(func, method='raise'): """Skip tests if v6 or later will be used as the default repo version.