Skip to content

Commit

Permalink
[merp] Use a separate process as the hang supervisor.
Browse files Browse the repository at this point in the history
Fixes mono#15646

macOS does not like signals being sent from the forked supervisor process, possibly towards anywhere but definitely when sent to the parent process. The following message is currently spewed after the supervisor process attempting to send a SIGSEGV to a hanged Mono process:
"The process has forked and you cannot use this CoreFoundation functionality safely. You MUST exec()."

We follow that direction and introduce a new binary that, when available on the environment's PATH, is used to abort the parent process for us.
  • Loading branch information
alexischr committed Jul 17, 2019
1 parent c6bf637 commit 08de9c4
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 29 deletions.
1 change: 1 addition & 0 deletions configure.ac
Expand Up @@ -6681,6 +6681,7 @@ tools/Makefile
tools/locale-builder/Makefile
tools/sgen/Makefile
tools/pedump/Makefile
tools/mono-hang-watchdog/Makefile
runtime/Makefile
msvc/Makefile
po/Makefile
Expand Down
36 changes: 8 additions & 28 deletions mono/metadata/threads.c
Expand Up @@ -6262,7 +6262,6 @@ mono_threads_summarize_one (MonoThreadSummary *out, MonoContext *ctx)
return success;
}

#define TIMEOUT_CRASH_REPORTER_FATAL 30
#define MAX_NUM_THREADS 128
typedef struct {
gint32 has_owner; // state of this memory
Expand All @@ -6278,7 +6277,7 @@ typedef struct {
gboolean silent; // print to stdout
} SummarizerGlobalState;

#if defined(HAVE_KILL) && !defined(HOST_ANDROID) && defined(HAVE_WAITPID) && ((!defined(HOST_DARWIN) && defined(SYS_fork)) || HAVE_FORK)
#if defined(HAVE_KILL) && !defined(HOST_ANDROID) && defined(HAVE_WAITPID) && defined(HAVE_EXECVP) && ((!defined(HOST_DARWIN) && defined(SYS_fork)) || HAVE_FORK)
#define HAVE_MONO_SUMMARIZER_SUPERVISOR 1
#endif

Expand All @@ -6289,12 +6288,6 @@ typedef struct {
} SummarizerSupervisorState;

#ifndef HAVE_MONO_SUMMARIZER_SUPERVISOR
static void
summarizer_supervisor_wait (SummarizerSupervisorState *state)
{
return;
}

static pid_t
summarizer_supervisor_start (SummarizerSupervisorState *state)
{
Expand All @@ -6309,23 +6302,6 @@ summarizer_supervisor_end (SummarizerSupervisorState *state)
}

#else
static void
summarizer_supervisor_wait (SummarizerSupervisorState *state)
{
sleep (TIMEOUT_CRASH_REPORTER_FATAL);

// If we haven't been SIGKILL'ed yet, we signal our parent
// and then exit
#ifdef HAVE_KILL
g_async_safe_printf("Crash Reporter has timed out, sending SIGSEGV\n");
kill (state->pid, SIGSEGV);
#else
g_error ("kill () is not supported by this platform");
#endif

exit (1);
}

static pid_t
summarizer_supervisor_start (SummarizerSupervisorState *state)
{
Expand All @@ -6350,8 +6326,14 @@ summarizer_supervisor_start (SummarizerSupervisorState *state)
g_assert_not_reached ();
#endif

if (pid != 0)
if (pid != 0) {
state->supervisor_pid = pid;
char pid_str[20]; // pid is a unit64_t, 20 digits max in decimal form
sprintf (pid_str, "%d", state->pid);
const char *const args[] = { "mono-hang-watchdog", pid_str };
execvp (args[0], (char * const*)args); // run 'mono-hang-watchdog [pid]'
g_assert_not_reached ();
}

return pid;
}
Expand Down Expand Up @@ -6665,8 +6647,6 @@ mono_threads_summarize (MonoContext *ctx, gchar **out, MonoStackHash *hashes, gb
g_assert (mem);
success = mono_threads_summarize_execute_internal (ctx, out, hashes, silent, mem, provided_size, TRUE);
summarizer_supervisor_end (&synch);
} else {
summarizer_supervisor_wait (&synch);
}

if (!already_async)
Expand Down
2 changes: 1 addition & 1 deletion tools/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = locale-builder sgen pedump
SUBDIRS = locale-builder sgen pedump mono-hang-watchdog


13 changes: 13 additions & 0 deletions tools/mono-hang-watchdog/Makefile.am
@@ -0,0 +1,13 @@

AM_CPPFLAGS = $(SHARED_CFLAGS)

if DISABLE_EXECUTABLES
bin_PROGRAMS =
else
bin_PROGRAMS = mono-hang-watchdog
endif

CFLAGS = $(filter-out @CXX_REMOVE_CFLAGS@, @CFLAGS@)
mono_hang_watchdog_CFLAGS = @CXX_ADD_CFLAGS@

mono_hang_watchdog_SOURCES = mono-hang-watchdog.c
47 changes: 47 additions & 0 deletions tools/mono-hang-watchdog/mono-hang-watchdog.c
@@ -0,0 +1,47 @@
/* Given a external process' id as argument, the program waits for a set timeout then attempts to abort that process */
/* Used by the Mono runtime's crash reporting. */

#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <unistd.h>

#include "config.h"

#ifdef HAVE_SIGNAL_H
#include <signal.h>
#endif

#ifndef HAVE_KILL
#error "kill() is not supported by this platform. This tool won't be able to function."
#endif

#define TIMEOUT_CRASH_REPORTER_FATAL 30

static char* program_name;
void program_exit (int exit_code, const char* message);

void program_exit (int exit_code, const char* message) {
if (message)
printf ("%s\n", message);
printf ("Usage: '%s [pid]'\t\t[pid]: The id for the Mono process\n", program_name);
exit (exit_code);
}

int main (int argc, char* argv[])
{
program_name = argv[0];
if (argc < 2)
program_exit (-1, "Please provide one argument (pid)");
errno = 0;
pid_t pid = (pid_t)strtoul (argv[1], NULL, 10);
if (errno)
program_exit (-2, "Invalid pid");

sleep (TIMEOUT_CRASH_REPORTER_FATAL);

/* Abort the process. This isn't suspect to 'pid' referring a wrong new process with the same id:
this process is killed by Mono if Mono doesn't hang */
printf ("Mono hang watchdog timed out, sending SIGSEGV to %ul\n", pid);
kill (pid, SIGSEGV);
}

0 comments on commit 08de9c4

Please sign in to comment.