Skip to content

Commit

Permalink
Add SIGINFO logging when service is hard killed
Browse files Browse the repository at this point in the history
The functional testing framework sometimes is unable to kill and
restart the service gracefully in between tests. This change sends a
SIGINFO signal before doing the hard kill. The debug logs triggered by
this signal should give us info about what thread is preventing
graceful shutdown.
  • Loading branch information
josephharrington committed Dec 14, 2015
1 parent 775e613 commit 68cf54f
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 8 deletions.
13 changes: 10 additions & 3 deletions app/util/process_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from contextlib import suppress
import os
import subprocess
from subprocess import TimeoutExpired
import time


SIGINFO = 29 # signal.SIGINFO is not present in all Python distributions


def kill_gracefully(process, timeout=2):
Expand All @@ -10,7 +13,7 @@ def kill_gracefully(process, timeout=2):
does not exit within the given timeout, the process is killed (SIGKILL).
:param process: The process to terminate or kill
:type process: Popen
:type process: subprocess.Popen
:param timeout: Number of seconds to wait after terminate before killing
:type timeout: int
:return: The exit code, stdout, and stderr of the process
Expand All @@ -20,7 +23,11 @@ def kill_gracefully(process, timeout=2):
with suppress(ProcessLookupError):
process.terminate()
stdout, stderr = process.communicate(timeout=timeout)
except TimeoutExpired:

except subprocess.TimeoutExpired:
if not is_windows():
process.send_signal(SIGINFO) # this assumes a debug handler has been registered for SIGINFO
time.sleep(1) # give the logger a chance to write out debug info
process.kill()
stdout, stderr = process.communicate()

Expand Down
9 changes: 4 additions & 5 deletions app/util/unhandled_exception_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import signal
from threading import current_thread, Lock, main_thread

from app.util import app_info, log
from app.util import app_info, log, process_utils
from app.util.singleton import Singleton


Expand All @@ -21,11 +21,10 @@ class UnhandledExceptionHandler(Singleton):
HANDLED_EXCEPTION_EXIT_CODE = 1
EXCEPTION_DURING_TEARDOWN_EXIT_CODE = 2

SIGINFO = 29 # signal.SIGINFO is not present in all Python distributions
_SIGINFO_DEBUG_LOG = '/tmp/clusterrunner.debug.log'

_signal_names = {
SIGINFO: 'SIGINFO',
process_utils.SIGINFO: 'SIGINFO',
signal.SIGINT: 'SIGINT',
signal.SIGTERM: 'SIGTERM',
}
Expand All @@ -45,7 +44,7 @@ def __init__(self):
signal.signal(signal.SIGTERM, self._application_teardown_signal_handler)
signal.signal(signal.SIGINT, self._application_teardown_signal_handler)
try:
signal.signal(self.SIGINFO, self._application_info_dump_signal_handler)
signal.signal(process_utils.SIGINFO, self._application_info_dump_signal_handler)
except ValueError:
self._logger.warning('Failed at registering signal handler for SIGINFO. This is expected if ClusterRunner'
'is running on Windows.')
Expand All @@ -57,7 +56,7 @@ def reset_signal_handlers(cls):
want to inherit all the signal handlers.
"""
signals_to_reset = dict(cls._signal_names)
signals_to_reset.pop(cls.SIGINFO, None) # Leave the SIGINFO handler for forked subprocesses
signals_to_reset.pop(process_utils.SIGINFO, None) # Leave the SIGINFO handler for forked subprocesses
for signal_num in signals_to_reset:
signal.signal(signal_num, signal.SIG_DFL) # SIG_DFL restores the default behavior for each signal

Expand Down

0 comments on commit 68cf54f

Please sign in to comment.