Skip to content

Commit

Permalink
Reap workers in the main loop
Browse files Browse the repository at this point in the history
Handle SIGCHLD like every other signal, waking up the arbiter and
handling the signal in the main loop rather than in the signal handler.
Take special care to reinstall the signal handler since Python may not.

Clean up workers and call the worker_exit hook in only one place. When
killing a worker, do not clean it up. The arbiter will now clean up the
worker and invoke the hook when it reaps the worker.

Ensure that all workers have their temporary watchdog files closed and
that the arbiter does not exit or log about other child processes dying.

With reaping handled in the main loop and kill_worker delegating
responsibility for cleanup to the reaping loop, iterate over the workers
dictionary everywhere else without concern for concurrent modification.
  • Loading branch information
tilgovi committed Dec 27, 2023
1 parent bd734c5 commit a526c00
Showing 1 changed file with 33 additions and 22 deletions.
55 changes: 33 additions & 22 deletions gunicorn/arbiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ class Arbiter(object):
# I love dynamic languages
SIG_QUEUE = []
SIGNALS = [getattr(signal, "SIG%s" % x)
for x in "HUP QUIT INT TERM TTIN TTOU USR1 USR2 WINCH".split()]
for x in "CHLD HUP QUIT INT TERM TTIN TTOU USR1 USR2 WINCH".split()]
SIG_NAMES = dict(
(getattr(signal, name), name[3:].lower()) for name in dir(signal)
if name[:3] == "SIG" and name[3] != "_"
if name[:3] == "SIG" and name[3] != "_" and name[3:] != "CLD"
)

def __init__(self, app):
Expand Down Expand Up @@ -186,10 +186,10 @@ def init_signals(self):
# initialize all signals
for s in self.SIGNALS:
signal.signal(s, self.signal)
signal.signal(signal.SIGCHLD, self.handle_chld)

def signal(self, sig, frame):
if len(self.SIG_QUEUE) < 5:
# limit overload, but do not miss unique signals
if len(self.SIG_QUEUE) < 5 or sig not in self.SIG_QUEUE:
self.SIG_QUEUE.append(sig)
self.wakeup()

Expand Down Expand Up @@ -237,10 +237,25 @@ def run(self):
self.pidfile.unlink()
sys.exit(-1)

def handle_chld(self, sig, frame):
"SIGCHLD handling"
def handle_chld(self):
"""\
SIGCHLD handling.
Reap workers and re-install the signal handler, in case it is not reset
by the OS, as described in the signal module documentation:
A handler for a particular signal, once set, remains installed
until it is explicitly reset (Python emulates the BSD style
interface regardless of the underlying implementation), with the
exception of the handler for SIGCHLD, which follows the underlying
implementation.
Python makes this exception to avoid infinite recursion on platforms
that invoke the handler immediately when installing it from a process
with dead children.
"""
self.reap_workers()
self.wakeup()
signal.signal(signal.SIGCHLD, self.signal)

def handle_hup(self):
"""\
Expand Down Expand Up @@ -391,9 +406,11 @@ def stop(self, graceful=True):
limit = time.time() + self.cfg.graceful_timeout
# instruct the workers to exit
self.kill_workers(sig)
self.reap_workers()
# wait until the graceful timeout
while self.WORKERS and time.time() < limit:
time.sleep(0.1)
self.reap_workers()

self.kill_workers(signal.SIGKILL)

Expand Down Expand Up @@ -492,8 +509,7 @@ def murder_workers(self):
"""
if not self.timeout:
return
workers = list(self.WORKERS.items())
for (pid, worker) in workers:
for (pid, worker) in self.WORKERS.items():
try:
if time.time() - worker.tmp.last_update() <= self.timeout:
continue
Expand All @@ -519,6 +535,12 @@ def reap_workers(self):
if self.reexec_pid == wpid:
self.reexec_pid = 0
else:
worker = self.WORKERS.pop(wpid, None)
if not worker:
continue

worker.tmp.close()

# A worker was terminated. If the termination reason was
# that it could not boot, we'll shut it down to avoid
# infinite start/stop cycles.
Expand Down Expand Up @@ -553,10 +575,6 @@ def reap_workers(self):
msg += " Perhaps out of memory?"
self.log.error(msg)

worker = self.WORKERS.pop(wpid, None)
if not worker:
continue
worker.tmp.close()
self.cfg.child_exit(self, worker)
except OSError as e:
if e.errno != errno.ECHILD:
Expand Down Expand Up @@ -647,8 +665,7 @@ def kill_workers(self, sig):
Kill all workers with the signal `sig`
:attr sig: `signal.SIG*` value
"""
worker_pids = list(self.WORKERS.keys())
for pid in worker_pids:
for pid in self.WORKERS:
self.kill_worker(pid, sig)

def kill_worker(self, pid, sig):
Expand All @@ -662,11 +679,5 @@ def kill_worker(self, pid, sig):
os.kill(pid, sig)
except OSError as e:
if e.errno == errno.ESRCH:
try:
worker = self.WORKERS.pop(pid)
worker.tmp.close()
self.cfg.worker_exit(self, worker)
return
except (KeyError, OSError):
return
return
raise

0 comments on commit a526c00

Please sign in to comment.