Skip to content

Commit

Permalink
ServerControl: add signal checks and wait() wrapper
Browse files Browse the repository at this point in the history
KillServer: add a parameter to allow the expected exit signal to be checked.
This allows us to determine if a process died for the wrong reason, e.g. if it
failed to catch SIGTERM, or was killed by SIGSEGV instead.

WaitForProcessExit: split out, to make it easier to clean up after forked
daemons, and check their exit status and signal.

test/bbackupd: use these to improve handling and failure mode checking of
forking daemon tests, reducing risk of hangs.
  • Loading branch information
qris committed Jun 20, 2019
1 parent e2b0e00 commit 3dd4796
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 41 deletions.
78 changes: 52 additions & 26 deletions lib/server/ServerControl.cpp
Expand Up @@ -212,38 +212,14 @@ bool KillServerInternal(int pid)

#endif // WIN32

bool KillServer(int pid, bool wait_for_process)
bool KillServer(int pid, bool wait_for_process, int expected_exit_status,
int expected_signal)
{
if(!KillServerInternal(pid))
{
return false;
}

#ifdef HAVE_WAITPID
if(wait_for_process)
{
int status, result;

result = waitpid(pid, &status, 0);
if (result != pid)
{
BOX_LOG_SYS_ERROR("waitpid failed");
}
TEST_THAT(result == pid);

TEST_THAT(WIFEXITED(status));
if (WIFEXITED(status))
{
if (WEXITSTATUS(status) != 0)
{
BOX_WARNING("process exited with code " <<
WEXITSTATUS(status));
}
TEST_THAT(WEXITSTATUS(status) == 0);
}
}
#endif

BOX_INFO("Waiting for server to die (pid " << pid << ")");
printf("Waiting for server to die (pid %d): ", pid);

Expand All @@ -255,6 +231,14 @@ bool KillServer(int pid, bool wait_for_process)
fflush(stdout);
}

#ifdef HAVE_WAITPID
if(wait_for_process)
{
WaitForProcessExit(pid, expected_signal, expected_exit_status);
// If successful, then ServerIsAlive will return false below.
}
#endif

if (!ServerIsAlive(pid)) break;
ShortSleep(MilliSecondsToBoxTime(100), false);
if (!ServerIsAlive(pid)) break;
Expand All @@ -274,6 +258,48 @@ bool KillServer(int pid, bool wait_for_process)
return !ServerIsAlive(pid);
}

#ifdef HAVE_WAITPID
bool WaitForProcessExit(int pid, int expected_signal, int expected_exit_status)
{
int status, result;

result = waitpid(pid, &status, WNOHANG);
if(result != 0) // otherwise the process has not exited (yet)
{
if(result != pid)
{
BOX_LOG_SYS_ERROR("waitpid failed");
}

TEST_THAT(result == pid);

if(expected_signal != 0)
{
TEST_THAT(!WIFEXITED(status));
TEST_LINE(WIFSIGNALED(status), "Process " << pid << " was not killed by a "
"signal as expected");
TEST_EQUAL_LINE(expected_signal, WTERMSIG(status),
"Process " << pid << " killed by unexpected signal");
}
else
{
TEST_THAT(WIFEXITED(status));
TEST_THAT(!WIFSIGNALED(status));
TEST_EQUAL_LINE(expected_exit_status, WEXITSTATUS(status),
"Process " << pid << " exited with unexpected exit "
"status");
}

return true;
}
else
{
// otherwise the process has not exited (yet)
return false;
}
}
#endif // HAVE_WAITPID

bool KillServer(const std::string& pid_file, bool WaitForProcess)
{
FileStream fs(pid_file);
Expand Down
7 changes: 6 additions & 1 deletion lib/server/ServerControl.h
Expand Up @@ -6,7 +6,8 @@
#include "Test.h"

bool HUPServer(int pid);
bool KillServer(int pid, bool wait_for_process = false);
bool KillServer(int pid, bool wait_for_process = false, int expected_exit_status = 0,
int expected_signal = 0);
bool KillServer(const std::string& pid_file, bool WaitForProcess = false);
bool KillServerInternal(int pid);
int StartDaemon(int current_pid, const std::string& cmd_line, const char* pid_file, int port = 0,
Expand All @@ -18,6 +19,10 @@ int LaunchServer(const std::string& rCommandLine, const char *pidFile, int port
int WaitForServerStartup(const char *pidFile, int pidIfKnown, int port = 0,
const std::string& socket_path = "");

#ifdef HAVE_WAITPID
bool WaitForProcessExit(int pid, int expected_signal = 0, int expected_exit_status = 0);
#endif // HAVE_WAITPID

#ifdef WIN32
#include "WinNamedPipeStream.h"
#include "IOStreamGetLine.h"
Expand Down
59 changes: 45 additions & 14 deletions test/bbackupd/testbbackupd.cpp
Expand Up @@ -708,6 +708,7 @@ void do_interrupted_restore(const TLSContext &context, int64_t restoredirid,
{
case 0:
// child process
try
{
Logging::SetProgramName("bbackupquery");
Logging::ShowTagOnConsole show_tag_on_console;
Expand All @@ -734,8 +735,18 @@ void do_interrupted_restore(const TLSContext &context, int64_t restoredirid,

// Log out
apClient->QueryFinished();
exit(0);
}
catch(std::exception &e)
{
printf("FAILED: Forked daemon caught exception: %s\n", e.what());
exit(1);
}
catch(...)
{
printf("FAILED: Forked daemon caught unknown exception\n");
exit(1);
}
exit(0);
break;

case -1:
Expand All @@ -750,34 +761,55 @@ void do_interrupted_restore(const TLSContext &context, int64_t restoredirid,
#endif // !WIN32

// Wait until a resume file is written, then terminate the child
box_time_t deadline = GetCurrentBoxTime() + SecondsToBoxTime(10);

while(true)
{
TEST_THAT(GetCurrentBoxTime() <= deadline); // restore took too long

// Test for existence of the result file
int64_t resumesize = 0;
if(FileExists("testfiles/restore-interrupt.boxbackupresume", &resumesize) && resumesize > 16)
if((FileExists("testfiles/restore-interrupt.boxbackupresume", &resumesize) &&
resumesize > 16) || GetCurrentBoxTime() > deadline)
{
// It's done something. Terminate it.
TEST_THAT(KillServerInternal(pid));
// It's done something. Terminate it. If it forked, then we need to wait()
// for it as well, otherwise the zombie process will never die.
// If the restore process returned 0, then it actually finished, so we
// haven't tested anything. So check that it doesn't (assume 1 instead):
#ifdef WIN32
TEST_THAT(KillServer(pid,
true, // wait_for_process
0, // expected_exit_status
0 // expected_signal: no signals on Windows, and not used
));
#else
TEST_THAT(KillServer(pid,
true, // wait_for_process
0, // expected_exit_status
SIGTERM // expected_signal: no SIGTERM handler installed
));
#endif
break;
}

// Process finished?
// Process finished unexpectedly? We were too slow?

#ifdef HAVE_WAITPID
WaitForProcessExit(pid, SIGTERM, 1); // expected_exit_status
// If successful, then ServerIsAlive will return false below.
#endif

if(!ServerIsAlive(pid))
{
// Even though we forked a child process, if it was a zombie waiting for us
// to reap it then it would still be alive, so we know that it's not, so we
// don't have to wait for it.
break;
}

// Give up timeslot so as not to hog the processor
::sleep(0);
}

#ifdef HAVE_WAITPID
// Clean up zombie process
int status = 0;
waitpid(pid, &status, 0);
// If the restore process returned 0, then it actually finished, so we haven't tested anything.
TEST_EQUAL(SIGTERM, status);
#endif
}

#ifdef WIN32
Expand Down Expand Up @@ -3783,7 +3815,6 @@ bool test_changing_client_store_marker_pauses_daemon(RaidAndS3TestSpecs::Special
// the client store marker.
wait_for_sync_end();


// TODO FIXME dedent
{
// Then... connect to the server, and change the
Expand Down

0 comments on commit 3dd4796

Please sign in to comment.