Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Add state, fix bug where all HangWatchers record to the same bad_func…

…tions dictionary, add tests
  • Loading branch information...
commit cafaa55736772a176886bfb3b647f3aaf044058e 1 parent 70c04c7
cyli authored
Showing with 108 additions and 23 deletions.
  1. +1 −0  .gitignore
  2. +50 −4 __init__.py
  3. +57 −19 test_hangwatcher.py
View
1  .gitignore
@@ -1 +1,2 @@
*.pyc
+_trial_temp
View
54 __init__.py
@@ -10,8 +10,42 @@
class HangWatcher(object):
- bad_functions = collections.defaultdict(int)
+ """
+ Object which watches a L{twisted} reactor to determine whether the
+ reactor is hung
+
+ @ivar cancel_interval: how often to cancel the SIGALRM sent to the process
+ (therefore this value should be less than C{max_delay})
+ @type cancel_interval: C{int} or C{float}
+
+ @ivar max_delay: how long to wait before determining that the reactor is
+ hung (SIGALRM will be sent to the process after this much time, unless
+ it is canceled, therefore C{cancel_interval} should be less than
+ C{max_delay})
+ @type max_delay: C{int} or C{float}
+
+ @ivar bad_functions: a dictionary of bad functions that cause the
+ reactor to hang, mapped to the number of times it has caused the
+ reactor to hang
+ @type bad_functions: C{dict} of C{tuples} to C{int}
+
+ @ivar hang_count: number of times the reactor has been observed to be hung
+ @type hang_count: C{int}
+
+ @ivar currently_hung: whether the reactor was last seen to be hung
+ @type currently_hung: C{bool}
+
+ @ivar currently_bad_function: the code line that was last observed to have
+ caused the reactor to hang
+ @type: C{tuple} of the function name, file name, and first line number
+
+ @ivar clock: the reactor to watch for hanging - if not set, will just use
+ the default reactor (useful to be able to set for testing purposes)
+ @type clock: L{twisted.internet.interfaces.IReactor} provider
+ """
+
hang_count = 0
+ currently_hung = False
def __init__(self, cancel_interval=CANCEL_INTERVAL, max_delay=MAX_DELAY):
# Handle SIGALRMs with print_traceback
@@ -20,10 +54,16 @@ def __init__(self, cancel_interval=CANCEL_INTERVAL, max_delay=MAX_DELAY):
# this LoopingCall is run by the reactor.
# If the reactor is hung, cancel_sigalrm won't run and the handler for SIGALRM will fire
self.lc = task.LoopingCall(self.cancel_sigalrm)
+
self.cancel_interval = cancel_interval
- self.max_delay = MAX_DELAY
+ self.max_delay = max_delay
+
+ self.bad_functions = collections.defaultdict(int)
+ self.current_bad_function = ()
def start(self):
+ if self.clock is not None:
+ self.lc.clock = self.clock
self.lc.start(self.cancel_interval)
def reset_itimer(self):
@@ -33,15 +73,21 @@ def log_traceback(self, signal, frame):
# Oh snap, cancel_sigalrm didn't get called
traceback.print_stack(frame)
+ self.currently_hung = True
self.hang_count += 1
- code_tuple = (frame.f_code.co_name, frame.f_code.co_filename, frame.f_code.co_firstlineno)
- self.bad_functions[code_tuple] += 1
+ self.current_bad_function = (frame.f_code.co_name,
+ frame.f_code.co_filename,
+ frame.f_code.co_firstlineno)
+ self.bad_functions[self.current_bad_function] += 1
self.reset_itimer()
def cancel_sigalrm(self):
# Cancel any pending alarm
signal.alarm(0)
+ # remove currently hung status
+ self.currently_hung = False
+ self.current_bad_function = ()
self.reset_itimer()
def print_stats(self, reset_stats=False):
View
76 test_hangwatcher.py
@@ -42,8 +42,11 @@ def fake_setitimer(self, itimer_type, interval):
if interval < 0:
raise signal.ItimerError("[Errno 22] Invalid argument")
+ def alarm():
+ os.kill(os.getpid(), sig)
+
self.alarms.append(
- self.itimer_clock.callLater(interval, os.kill, os.getpid(), sig))
+ self.itimer_clock.callLater(interval, alarm))
def fake_signal_alarm(self, delay):
"""
@@ -58,6 +61,20 @@ def fake_signal_alarm(self, delay):
else:
self.fake_setitimer(signal.ITIMER_REAL, delay)
+ def advance_time(self, seconds, clocks):
+ """
+ Advances time incrementally, .5 seconds at a time, on all the clocks
+
+ @param seconds: seconds to advance time
+ @type seconds: C{int}
+
+ @param clocks: list of clocks
+ @type clocks: C{list}
+ """
+ for i in range(seconds * 2):
+ for clock in clocks:
+ clock.advance(.5)
+
def setUp(self):
# use task.Clock to simulate reactor hanging, and to simulate time
# passing for setitimer
@@ -91,21 +108,42 @@ def test_init_has_valid_default_delays(self):
watcher = twisted_hang.HangWatcher()
self.assertTrue(watcher.cancel_interval < watcher.max_delay)
- # def test_logs_no_hangs_if_not_hung(self):
- # """
- # If the reactor isn't hung, the alarm should be canceled/should not
- # alarm after C{max_delay}
- # """
- # self.watcher.start()
- # self.fake_reactor.advance(6)
- # self.itimer_clock.advance(6)
- # self.assertEqual(0, self.watcher.hang_count)
-
- # def test_logs_hang_if_hung(self):
- # """
- # If the reactor is hung, the alarm never gets canceled and log_traceback
- # should be called
- # """
- # self.watcher.start()
- # self.itimer_clock.advance(6)
- # self.assertEqual(1, self.watcher.hang_count)
+ def test_logs_no_hangs_if_not_hung(self):
+ """
+ If the reactor isn't hung, the alarm should be canceled/should not
+ alarm after C{max_delay}. Which means that the reactor is not
+ currently hung, the hang count is 0, and no bad functions have been
+ recorded.
+ """
+ self.watcher.start()
+ # time should advance on both the timer clock and the reactor
+ self.advance_time(6, [self.fake_reactor, self.itimer_clock])
+
+ self.assertEqual(0, self.watcher.hang_count)
+ self.assertTrue(not self.watcher.currently_hung)
+ self.assertEqual(0, len(self.watcher.bad_functions))
+ self.assertEqual((), self.watcher.current_bad_function)
+
+ def test_logs_hang_if_hung(self):
+ """
+ If the reactor is hung, the alarm never gets canceled and log_traceback
+ should be called, which means that the reactor is currently hung,
+ the hang count is 1, and a bad function has been recorded
+ """
+ self.watcher.start()
+ # time should advance on both the timer clock and the reactor
+ self.advance_time(6, [self.itimer_clock])
+
+ self.assertEqual(1, self.watcher.hang_count)
+ self.assertTrue(self.watcher.currently_hung)
+ # one bad function should have been recorded
+ self.assertEqual(1, len(self.watcher.bad_functions))
+ # the bad function should be a tuple of the function name, the
+ # filename, and the first line number of the function
+ self.assertEqual(3, len(self.watcher.current_bad_function))
+
+ bad_function = self.watcher.bad_functions.items()[0]
+ # the bad function count for that particular function should be 1
+ self.assertEqual(1, bad_function[1])
+ # the bad function recorded should be the current bad function
+ self.assertEqual(bad_function[0], self.watcher.current_bad_function)
Please sign in to comment.
Something went wrong with that request. Please try again.