Skip to content

Commit 1fe67f9

Browse files
siveljimi-c
authored andcommitted
Extend SSH Retry to put_file and fetch_file (#20187)
* Move retry logic into _ssh_retry decorator, and apply to exec_command, put_file and fetch_file * Update tests to reflect change * Move _ssh_retry to _run, and update tests to reflect * piped should use exec_command instead of removed _exec_command * Rework tests to support selectors instead of select.select
1 parent 911600a commit 1fe67f9

File tree

2 files changed

+207
-105
lines changed

2 files changed

+207
-105
lines changed

Diff for: lib/ansible/plugins/connection/ssh.py

+79-71
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import subprocess
3030
import time
3131

32+
from functools import wraps
3233
from ansible import constants as C
3334
from ansible.compat import selectors
3435
from ansible.compat.six import PY3, text_type, binary_type
@@ -51,6 +52,54 @@
5152
SSHPASS_AVAILABLE = None
5253

5354

55+
def _ssh_retry(func):
56+
"""
57+
Decorator to retry ssh/scp/sftp in the case of a connection failure
58+
59+
Will retry if:
60+
* an exception is caught
61+
* ssh returns 255
62+
Will not retry if
63+
* remaining_tries is <2
64+
* retries limit reached
65+
"""
66+
@wraps(func)
67+
def wrapped(self, *args, **kwargs):
68+
remaining_tries = int(C.ANSIBLE_SSH_RETRIES) + 1
69+
cmd_summary = "%s..." % args[0]
70+
for attempt in range(remaining_tries):
71+
try:
72+
return_tuple = func(self, *args, **kwargs)
73+
display.vvv(return_tuple, host=self.host)
74+
# 0 = success
75+
# 1-254 = remote command return code
76+
# 255 = failure from the ssh command itself
77+
if return_tuple[0] != 255:
78+
break
79+
else:
80+
raise AnsibleConnectionFailure("Failed to connect to the host via ssh: %s" % to_native(return_tuple[2]))
81+
except (AnsibleConnectionFailure, Exception) as e:
82+
if attempt == remaining_tries - 1:
83+
raise
84+
else:
85+
pause = 2 ** attempt - 1
86+
if pause > 30:
87+
pause = 30
88+
89+
if isinstance(e, AnsibleConnectionFailure):
90+
msg = "ssh_retry: attempt: %d, ssh return code is 255. cmd (%s), pausing for %d seconds" % (attempt, cmd_summary, pause)
91+
else:
92+
msg = "ssh_retry: attempt: %d, caught exception(%s) from cmd (%s), pausing for %d seconds" % (attempt, e, cmd_summary, pause)
93+
94+
display.vv(msg, host=self.host)
95+
96+
time.sleep(pause)
97+
continue
98+
99+
return return_tuple
100+
return wrapped
101+
102+
54103
class Connection(ConnectionBase):
55104
''' ssh based connections '''
56105

@@ -352,6 +401,7 @@ def _examine_output(self, source, state, b_chunk, sudoable):
352401

353402
return b''.join(output), remainder
354403

404+
@_ssh_retry
355405
def _run(self, cmd, in_data, sudoable=True, checkrc=True):
356406
'''
357407
Starts the command and communicates with it until it ends.
@@ -618,28 +668,6 @@ def _run(self, cmd, in_data, sudoable=True, checkrc=True):
618668

619669
return (p.returncode, b_stdout, b_stderr)
620670

621-
def _exec_command(self, cmd, in_data=None, sudoable=True):
622-
''' run a command on the remote host '''
623-
624-
super(Connection, self).exec_command(cmd, in_data=in_data, sudoable=sudoable)
625-
626-
display.vvv(u"ESTABLISH SSH CONNECTION FOR USER: {0}".format(self._play_context.remote_user), host=self._play_context.remote_addr)
627-
628-
629-
# we can only use tty when we are not pipelining the modules. piping
630-
# data into /usr/bin/python inside a tty automatically invokes the
631-
# python interactive-mode but the modules are not compatible with the
632-
# interactive-mode ("unexpected indent" mainly because of empty lines)
633-
if not in_data and sudoable:
634-
args = ('ssh', '-tt', self.host, cmd)
635-
else:
636-
args = ('ssh', self.host, cmd)
637-
638-
cmd = self._build_command(*args)
639-
(returncode, stdout, stderr) = self._run(cmd, in_data, sudoable=sudoable)
640-
641-
return (returncode, stdout, stderr)
642-
643671
def _file_transport_command(self, in_path, out_path, sftp_action):
644672
# scp and sftp require square brackets for IPv6 addresses, but
645673
# accept them for hostnames and IPv4 addresses too.
@@ -674,7 +702,6 @@ def _file_transport_command(self, in_path, out_path, sftp_action):
674702
methods = ['sftp']
675703

676704
success = False
677-
res = None
678705
for method in methods:
679706
returncode = stdout = stderr = None
680707
if method == 'sftp':
@@ -693,77 +720,58 @@ def _file_transport_command(self, in_path, out_path, sftp_action):
693720
if sftp_action == 'get':
694721
# we pass sudoable=False to disable pty allocation, which
695722
# would end up mixing stdout/stderr and screwing with newlines
696-
(returncode, stdout, stderr) = self._exec_command('dd if=%s bs=%s' % (in_path, BUFSIZE), sudoable=False)
723+
(returncode, stdout, stderr) = self.exec_command('dd if=%s bs=%s' % (in_path, BUFSIZE), sudoable=False)
697724
out_file = open(to_bytes(out_path, errors='surrogate_or_strict'), 'wb+')
698725
out_file.write(stdout)
699726
out_file.close()
700727
else:
701728
in_data = open(to_bytes(in_path, errors='surrogate_or_strict'), 'rb').read()
702729
in_data = to_bytes(in_data, nonstring='passthru')
703-
(returncode, stdout, stderr) = self._exec_command('dd of=%s bs=%s' % (out_path, BUFSIZE), in_data=in_data)
730+
(returncode, stdout, stderr) = self.exec_command('dd of=%s bs=%s' % (out_path, BUFSIZE), in_data=in_data)
704731

705732
# Check the return code and rollover to next method if failed
706733
if returncode == 0:
707-
success = True
708-
break
734+
return (returncode, stdout, stderr)
709735
else:
710736
# If not in smart mode, the data will be printed by the raise below
711737
if len(methods) > 1:
712738
display.warning(msg='%s transfer mechanism failed on %s. Use ANSIBLE_DEBUG=1 to see detailed information' % (method, host))
713739
display.debug(msg='%s' % to_native(stdout))
714740
display.debug(msg='%s' % to_native(stderr))
715-
res = (returncode, stdout, stderr)
716741

717-
if not success:
718-
raise AnsibleError("failed to transfer file {0} to {1}:\n{2}\n{3}"\
719-
.format(to_native(in_path), to_native(out_path), to_native(res[1]), to_native(res[2])))
742+
if returncode == 255:
743+
raise AnsibleConnectionFailure("Failed to connect to the host via %s: %s" % (method, to_native(stderr)))
744+
else:
745+
raise AnsibleError("failed to transfer file to {0} {1}:\n{2}\n{3}"\
746+
.format(to_native(in_path), to_native(out_path), to_native(stdout), to_native(stderr)))
720747

721748
#
722749
# Main public methods
723750
#
724-
def exec_command(self, *args, **kwargs):
725-
"""
726-
Wrapper around _exec_command to retry in the case of an ssh failure
727-
728-
Will retry if:
729-
* an exception is caught
730-
* ssh returns 255
731-
Will not retry if
732-
* remaining_tries is <2
733-
* retries limit reached
734-
"""
751+
def exec_command(self, cmd, in_data=None, sudoable=True):
752+
''' run a command on the remote host '''
735753

736-
remaining_tries = int(C.ANSIBLE_SSH_RETRIES) + 1
737-
cmd_summary = "%s..." % args[0]
738-
for attempt in range(remaining_tries):
739-
try:
740-
return_tuple = self._exec_command(*args, **kwargs)
741-
# 0 = success
742-
# 1-254 = remote command return code
743-
# 255 = failure from the ssh command itself
744-
if return_tuple[0] != 255:
745-
break
746-
else:
747-
raise AnsibleConnectionFailure("Failed to connect to the host via ssh: %s" % to_native(return_tuple[2]))
748-
except (AnsibleConnectionFailure, Exception) as e:
749-
if attempt == remaining_tries - 1:
750-
raise
751-
else:
752-
pause = 2 ** attempt - 1
753-
if pause > 30:
754-
pause = 30
754+
super(Connection, self).exec_command(cmd, in_data=in_data, sudoable=sudoable)
755755

756-
if isinstance(e, AnsibleConnectionFailure):
757-
msg = "ssh_retry: attempt: %d, ssh return code is 255. cmd (%s), pausing for %d seconds" % (attempt, cmd_summary, pause)
758-
else:
759-
msg = "ssh_retry: attempt: %d, caught exception(%s) from cmd (%s), pausing for %d seconds" % (attempt, e, cmd_summary, pause)
756+
display.vvv(u"ESTABLISH SSH CONNECTION FOR USER: {0}".format(self._play_context.remote_user), host=self._play_context.remote_addr)
760757

761-
display.vv(msg, host=self.host)
762758

763-
time.sleep(pause)
764-
continue
759+
# we can only use tty when we are not pipelining the modules. piping
760+
# data into /usr/bin/python inside a tty automatically invokes the
761+
# python interactive-mode but the modules are not compatible with the
762+
# interactive-mode ("unexpected indent" mainly because of empty lines)
765763

766-
return return_tuple
764+
ssh_executable = self._play_context.ssh_executable
765+
766+
if not in_data and sudoable:
767+
args = (ssh_executable, '-tt', self.host, cmd)
768+
else:
769+
args = (ssh_executable, self.host, cmd)
770+
771+
cmd = self._build_command(*args)
772+
(returncode, stdout, stderr) = self._run(cmd, in_data, sudoable=sudoable)
773+
774+
return (returncode, stdout, stderr)
767775

768776
def put_file(self, in_path, out_path):
769777
''' transfer a file from local to remote '''
@@ -774,15 +782,15 @@ def put_file(self, in_path, out_path):
774782
if not os.path.exists(to_bytes(in_path, errors='surrogate_or_strict')):
775783
raise AnsibleFileNotFound("file or module does not exist: {0}".format(to_native(in_path)))
776784

777-
self._file_transport_command(in_path, out_path, 'put')
785+
return self._file_transport_command(in_path, out_path, 'put')
778786

779787
def fetch_file(self, in_path, out_path):
780788
''' fetch a file from remote to local '''
781789

782790
super(Connection, self).fetch_file(in_path, out_path)
783791

784792
display.vvv(u"FETCH {0} TO {1}".format(in_path, out_path), host=self.host)
785-
self._file_transport_command(in_path, out_path, 'get')
793+
return self._file_transport_command(in_path, out_path, 'get')
786794

787795
def reset(self):
788796
# If we have a persistent ssh connection (ControlPersist), we can ask it to stop listening.

0 commit comments

Comments
 (0)