Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: ssh: Support specifying identity file via environment variable #3149

Merged
merged 4 commits into from Feb 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions datalad/interface/common_cfg.py
Expand Up @@ -220,6 +220,12 @@
'ui': ('question', {
'title': 'Sets a prefix to add before the command call times are noted by DATALAD_CMD_PROTOCOL.'}),
},
'datalad.ssh.identityfile': {
'ui': ('question', {
'title': "If set, pass this file as ssh's -i option."}),
'destination': 'global',
'default': None,
},
'datalad.repo.direct': {
'ui': ('yesno', {
'title': 'Direct Mode for git-annex repositories',
Expand Down
29 changes: 25 additions & 4 deletions datalad/support/sshconnector.py
Expand Up @@ -37,13 +37,16 @@
lgr = logging.getLogger('datalad.support.sshconnector')


def get_connection_hash(hostname, port='', username=''):
def get_connection_hash(hostname, port='', username='', identity_file=''):
"""Generate a hash based on SSH connection properties

This can be used for generating filenames that are unique
to a connection from and to a particular machine (with
port and login username). The hash also contains the local
host name.

Identity file corresponds to a file that will be passed via ssh's -i
option.
"""
# returning only first 8 characters to minimize our chance
# of hitting a limit on the max path length for the Unix socket.
Expand All @@ -52,10 +55,11 @@ def get_connection_hash(hostname, port='', username=''):
# https://github.com/ansible/ansible/issues/11536#issuecomment-153030743
# https://github.com/datalad/datalad/pull/1377
return md5(
'{lhost}{rhost}{port}{username}'.format(
'{lhost}{rhost}{port}{identity_file}{username}'.format(
lhost=gethostname(),
rhost=hostname,
port=port,
identity_file=identity_file,
username=username).encode('utf-8')).hexdigest()[:8]


Expand All @@ -64,7 +68,7 @@ class SSHConnection(object):
"""Representation of a (shared) ssh connection.
"""

def __init__(self, ctrl_path, sshri):
def __init__(self, ctrl_path, sshri, identity_file=None):
"""Create a connection handler

The actual opening of the connection is performed on-demand.
Expand All @@ -76,6 +80,8 @@ def __init__(self, ctrl_path, sshri):
sshri: SSHRI
SSH resource identifier (contains all connection-relevant info),
or another resource identifier that can be converted into an SSHRI.
identity_file : str or None
Value to pass to ssh's -i option.
"""
self._runner = None

Expand All @@ -91,6 +97,8 @@ def __init__(self, ctrl_path, sshri):
if self.sshri.port:
self._ctrl_options += ['-p', '{}'.format(self.sshri.port)]

self._identity_file = identity_file

# essential properties of the remote system
self._remote_props = {}
self._opened_by_us = False
Expand Down Expand Up @@ -207,6 +215,8 @@ def open(self):
ctrl_options = ["-fN",
"-o", "ControlMaster=auto",
"-o", "ControlPersist=15m"] + self._ctrl_options
if self._identity_file:
ctrl_options.extend(["-i", self._identity_file])
# create ssh control master command
cmd = ["ssh"] + ctrl_options + [self.sshri.as_str()]

Expand Down Expand Up @@ -339,6 +349,10 @@ def get_git_version(self):
class SSHManager(object):
"""Keeps ssh connections to share. Serves singleton representation
per connection.

Note: If a connection should use a custom identity file via
`datalad.ssh.identityfile`, that value should be set before this class is
initialized.
"""

def __init__(self):
Expand All @@ -348,6 +362,7 @@ def __init__(self):
# handling of socket_dir, so we do not define them here explicitly
# to an empty list to fail if logic is violated
self._prev_connections = None
self._identity_file = None
# and no explicit initialization in the constructor
# self.assure_initialized()

Expand All @@ -369,6 +384,7 @@ def assure_initialized(self):
cfg = ConfigManager()
self._socket_dir = opj(cfg.obtain('datalad.locations.cache'),
'sockets')
self._identity_file = cfg.get('datalad.ssh.identityfile')
assure_dir(self._socket_dir)
try:
chmod(self._socket_dir, 0o700)
Expand Down Expand Up @@ -427,9 +443,13 @@ def get_connection(self, url):
raise ValueError("Unsupported SSH URL: '{0}', use "
"ssh://host/path or host:path syntax".format(url))

# Ensure self._identityfile has been set, if configured.
self.assure_initialized()

conhash = get_connection_hash(
sshri.hostname,
port=sshri.port,
identity_file=self._identity_file or "",
username=sshri.username)
# determine control master:
ctrl_path = "%s/%s" % (self.socket_dir, conhash)
Expand All @@ -438,7 +458,8 @@ def get_connection(self, url):
if ctrl_path in self._connections:
return self._connections[ctrl_path]
else:
c = SSHConnection(ctrl_path, sshri)
c = SSHConnection(ctrl_path, sshri,
identity_file=self._identity_file)
self._connections[ctrl_path] = c
return c

Expand Down
2 changes: 2 additions & 0 deletions datalad/support/sshrun.py
Expand Up @@ -36,6 +36,8 @@ class SSHRun(Interface):
In addition to SSH alone, this command can make use of datalad's SSH
connection management. Its primary use case is to be used with Git
as 'core.sshCommand' or via "GIT_SSH_COMMAND".

Configure `datalad.ssh.identityfile` to pass a file to the ssh's -i option.
"""
# prevent common args from being added to the docstring
_no_eval_results = True
Expand Down
38 changes: 34 additions & 4 deletions datalad/support/tests/test_sshconnector.py
Expand Up @@ -11,7 +11,11 @@

import logging
import os
import os.path as op
from os.path import exists, isdir, getmtime, join as opj
from mock import patch

from nose import SkipTest

from datalad.support.external_versions import external_versions

Expand Down Expand Up @@ -108,8 +112,10 @@ def test_ssh_manager_close():
manager = SSHManager()

# check for previously existing sockets:
existed_before_1 = exists(opj(manager.socket_dir, 'localhost'))
existed_before_2 = exists(opj(manager.socket_dir, 'datalad-test'))
existed_before_1 = exists(opj(manager.socket_dir,
get_connection_hash('localhost')))
existed_before_2 = exists(opj(manager.socket_dir,
get_connection_hash('datalad-test')))

manager.get_connection('ssh://localhost').open()
manager.get_connection('ssh://datalad-test').open()
Expand All @@ -125,8 +131,10 @@ def test_ssh_manager_close():

manager.close()

still_exists_1 = exists(opj(manager.socket_dir, 'localhost'))
still_exists_2 = exists(opj(manager.socket_dir, 'datalad-test'))
still_exists_1 = exists(opj(manager.socket_dir,
get_connection_hash('localhost')))
still_exists_2 = exists(opj(manager.socket_dir,
get_connection_hash('datalad-test')))

eq_(existed_before_1, still_exists_1)
eq_(existed_before_2, still_exists_2)
Expand Down Expand Up @@ -210,6 +218,28 @@ def test_ssh_compound_cmds():
ssh.close() # so we get rid of the possibly lingering connections


@skip_if_on_windows
@skip_ssh
def test_ssh_custom_identity_file():
ifile = "/tmp/dl-test-ssh-id" # Travis
if not op.exists(ifile):
raise SkipTest("Travis-specific '{}' identity file does not exist"
.format(ifile))

with patch.dict("os.environ", {"DATALAD_SSH_IDENTITYFILE": ifile}):
with swallow_logs(new_level=logging.DEBUG) as cml:
manager = SSHManager()
ssh = manager.get_connection('ssh://localhost')
cmd_out, _ = ssh("echo blah")
expected_socket = op.join(
manager.socket_dir,
get_connection_hash("localhost", identity_file=ifile))
ok_(exists(expected_socket))
manager.close()
assert_in("-i", cml.out)
assert_in(ifile, cml.out)


@skip_if_on_windows # our test setup has no SSH server running
@skip_ssh
def test_ssh_git_props():
Expand Down