Skip to content

Commit

Permalink
winrm - make command input more resiliant (#81538)
Browse files Browse the repository at this point in the history
* winrm - make command input more resiliant

* Expand warning message
  • Loading branch information
jborean93 committed Sep 26, 2023
1 parent 60c9660 commit f22231d
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 2 deletions.
2 changes: 2 additions & 0 deletions changelogs/fragments/winrm-send-input.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
bugfixes:
- winrm - Better handle send input failures when communicating with hosts under load
42 changes: 40 additions & 2 deletions lib/ansible/plugins/connection/winrm.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@
import tempfile
import shlex
import subprocess
import time
import typing as t

from inspect import getfullargspec
Expand Down Expand Up @@ -199,6 +200,7 @@
try:
import winrm
from winrm import Response
from winrm.exceptions import WinRMError, WinRMOperationTimeoutError
from winrm.protocol import Protocol
import requests.exceptions
HAS_WINRM = True
Expand Down Expand Up @@ -494,6 +496,43 @@ def _winrm_connect(self) -> winrm.Protocol:
else:
raise AnsibleError('No transport found for WinRM connection')

def _winrm_write_stdin(self, command_id: str, stdin_iterator: t.Iterable[tuple[bytes, bool]]) -> None:
for (data, is_last) in stdin_iterator:
for attempt in range(1, 4):
try:
self._winrm_send_input(self.protocol, self.shell_id, command_id, data, eof=is_last)

except WinRMOperationTimeoutError:
# A WSMan OperationTimeout can be received for a Send
# operation when the server is under severe load. On manual
# testing the input is still processed and it's safe to
# continue. As the calling method still tries to wait for
# the proc to end if this failed it shouldn't hurt to just
# treat this as a warning.
display.warning(
"WSMan OperationTimeout during send input, attempting to continue. "
"If this continues to occur, try increasing the connection_timeout "
"value for this host."
)
if not is_last:
time.sleep(5)

except WinRMError as e:
# Error 170 == ERROR_BUSY. This could be the result of a
# timed out Send from above still being processed on the
# server. Add a 5 second delay and try up to 3 times before
# fully giving up.
# pywinrm does not expose the internal WSMan fault details
# through an actual object but embeds it as a repr.
if attempt == 3 or "'wsmanfault_code': '170'" not in str(e):
raise

display.warning(f"WSMan send failed on attempt {attempt} as the command is busy, trying to send data again")
time.sleep(5)
continue

break

def _winrm_send_input(self, protocol: winrm.Protocol, shell_id: str, command_id: str, stdin: bytes, eof: bool = False) -> None:
rq = {'env:Envelope': protocol._get_soap_header(
resource_uri='http://schemas.microsoft.com/wbem/wsman/1/windows/shell/cmd',
Expand Down Expand Up @@ -529,8 +568,7 @@ def _winrm_exec(

try:
if stdin_iterator:
for (data, is_last) in stdin_iterator:
self._winrm_send_input(self.protocol, self.shell_id, command_id, data, eof=is_last)
self._winrm_write_stdin(command_id, stdin_iterator)

except Exception as ex:
display.warning("ERROR DURING WINRM SEND INPUT - attempting to recover: %s %s"
Expand Down

0 comments on commit f22231d

Please sign in to comment.