Skip to content
Permalink
Browse files

Use posix_spawn to start the Bazel server.

Introduce a new "daemonize" helper tool that takes the heavy-lifting
of starting the Bazel server process as a daemon.  This tool is pure
C and does not have any big dependencies like gRPC so we can ensure
it's thread-free.  As a result, the code in this tool doesn't have
to take a lot of care when running fork(), which results in easier
to read code overall.

Then, change the Bazel client to simply use posix_spawn to invoke
this helper tool, which in turn runs the Bazel server as a daemon.
The code in the Bazel client becomes simpler as well as we don't
have to worry about the implications of using fork() from a threaded
process.

The switch to posix_spawn is necessary to address
#7446 because the only way
to customize the QoS of a process in macOS is by using this API.
Such a change will come separately.

This change is intended to be a no-op.  However, this fixes a bug
introduced in unknown commit by which we violated the requirement of
not doing any memory management from a child process started with
fork().  (I don't think this bug ever manifested itself, but it was
there -- which is funny because this piece of code went to great
extents to not do the wrong thing... and a one-line change let
hell break loose.)

RELNOTES: None.
PiperOrigin-RevId: 234991943
  • Loading branch information...
jmmv authored and Copybara-Service committed Feb 21, 2019
1 parent 25d202f commit 18a0e23fe9fa1935f6d278e4f6872ba03e84d82f
@@ -358,7 +358,12 @@ filegroup(
"//src/main/tools:jdk-support",
"//src/main/tools:linux-sandbox",
"//tools/osx:xcode-locator",
],
] + select({
"//src/conditions:windows": [],
"//conditions:default": [
"//src/main/tools:daemonize",
],
}),
outs = ["package" + suffix + ".zip"],
cmd = "$(location :package-bazel.sh) $@ " + ("" if embed else "''") + " $(SRCS)",
tools = ["package-bazel.sh"],
@@ -685,6 +685,8 @@ static int StartServer(const WorkspaceLayout *workspace_layout,
BlazeServerStartup **server_startup) {
vector<string> jvm_args_vector = GetArgumentArray(workspace_layout);
string argument_string = GetArgumentString(jvm_args_vector);
const string binaries_dir =
GetEmbeddedBinariesRoot(globals->options->install_base);
string server_dir =
blaze_util::JoinPath(globals->options->output_base, "server");
// Write the cmdline argument string to the server dir. If we get to this
@@ -708,7 +710,7 @@ static int StartServer(const WorkspaceLayout *workspace_layout,

return ExecuteDaemon(exe, jvm_args_vector, PrepareEnvironmentForJvm(),
globals->jvm_log_file, globals->jvm_log_file_append,
server_dir, server_startup);
binaries_dir, server_dir, server_startup);
}

// Replace this process with blaze in standalone/batch mode.
@@ -154,6 +154,7 @@ int ExecuteDaemon(const std::string& exe,
const std::map<std::string, EnvVarValue>& env,
const std::string& daemon_output,
const bool daemon_output_append,
const std::string& binaries_dir,
const std::string& server_dir,
BlazeServerStartup** server_startup);

@@ -22,6 +22,7 @@
#include <poll.h>
#include <pwd.h>
#include <signal.h>
#include <spawn.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
@@ -34,8 +35,11 @@
#include <time.h>
#include <unistd.h>

#include <algorithm>
#include <cassert>
#include <cinttypes>
#include <fstream>
#include <iterator>
#include <set>
#include <string>

@@ -223,6 +227,30 @@ class CharPP {
charpp_[i] = NULL;
}

// Constructs a new CharPP from a list of environment variables.
explicit CharPP(const std::map<string, EnvVarValue>& env) {
charpp_ = static_cast<char**>(malloc(sizeof(char*) * (env.size() + 1)));
size_t i = 0;
for (auto iter : env) {
const string& var = iter.first;
const EnvVarValue& value = iter.second;

switch (value.action) {
case SET:
charpp_[i] = strdup((var + "=" + value.value).c_str());
i++;
break;

case UNSET:
break;

default:
assert(false);
}
}
charpp_[i] = NULL;
}

// Deletes all memory held by the CharPP.
~CharPP() {
for (char** ptr = charpp_; *ptr != NULL; ptr++) {
@@ -258,40 +286,6 @@ bool SymlinkDirectories(const string &target, const string &link) {
return symlink(target.c_str(), link.c_str()) == 0;
}

// Causes the current process to become a daemon (i.e. a child of
// init, detached from the terminal, in its own session.) We don't
// change cwd, though.
static void Daemonize(const char* daemon_output,
const bool daemon_output_append) {
// Don't call BAZEL_DIE or exit() in this function; we're already in a child
// process so it won't work as expected. Just don't do anything that can
// possibly fail. :)

signal(SIGHUP, SIG_IGN);
if (fork() > 0) {
// This second fork is required iff there's any chance cmd will
// open an specific tty explicitly, e.g., open("/dev/tty23"). If
// not, this fork can be removed.
_exit(blaze_exit_code::SUCCESS);
}

setsid();

close(0);
close(1);
close(2);

open("/dev/null", O_RDONLY); // stdin
// stdout:
int out_flags =
O_WRONLY | O_CREAT | (daemon_output_append ? O_APPEND : O_TRUNC);
if (open(daemon_output, out_flags, 0666) == -1) {
// In a daemon, no-one can hear you scream.
open("/dev/null", O_WRONLY);
}
(void) dup(STDOUT_FILENO); // stderr (2>&1)
}

// Notifies the client about the death of the server process by keeping a socket
// open in the server. If the server dies for any reason, the socket will be
// closed, which can be detected by the client.
@@ -331,151 +325,81 @@ bool SocketBlazeServerStartup::IsStillAlive() {
}
}

// NB: There should only be system calls in this function. See the comment
// before ExecuteDaemon() to understand why. strerror() and strlen() are
// hopefully okay.
static void DieAfterFork(const char* message) {
char* error_string = strerror(errno); // strerror is hopefully okay
write(STDERR_FILENO, message, strlen(message)); // strlen should be OK
write(STDERR_FILENO, ": ", 2);
write(STDERR_FILENO, error_string, strlen(error_string));
write(STDERR_FILENO, "\n", 1);
_exit(blaze_exit_code::INTERNAL_ERROR);
}

// NB: There should only be system calls in this function. See the comment
// before ExecuteDaemon() to understand why.
static void ReadFromFdWithRetryEintr(
int fd, void *buf, size_t count, const char* error_message) {
ssize_t result;
do {
result = read(fd, buf, count);
} while (result < 0 && errno == EINTR);
if (result < 0 || static_cast<size_t>(result) != count) {
DieAfterFork(error_message);
}
}

// NB: There should only be system calls in this function. See the comment
// before ExecuteDaemon() to understand why.
static void WriteToFdWithRetryEintr(
int fd, void *buf, size_t count, const char* error_message) {
ssize_t result;
do {
// Ideally, we'd use send(..., MSG_NOSIGNAL), but that's not available on
// Darwin.
result = write(fd, buf, count);
} while (result < 0 && errno == EINTR);
if (result < 0 || static_cast<size_t>(result) != count) {
DieAfterFork(error_message);
}
}

void WriteSystemSpecificProcessIdentifier(
const string& server_dir, pid_t server_pid);

// We do a lot of seemingly-needless complications to avoid doing anything
// complex after a fork(). The reason is that forking in multi-threaded
// programs is fraught with peril.
//
// One root cause is that fork() only forks the thread it was called from. If
// another thread holds a lock, it will never be relinquished in the child
// process, and malloc() sometimes does lock. Thus, we need to avoid allocating
// any dynamic memory in the child process, which is hard if any C++ feature is
// used.
//
// We also don't know what libc does behind the scenes, so it's advisable to
// avoid anything that's not a system call. read(), write(), fork() and execv()
// aren't guaranteed to be pure system calls, either, but we can't get any
// closer to this ideal without writing logic specific to each POSIX system we
// run on.
//
// Another way to tackle this issue would be to pre-fork a child process before
// spawning any threads which is then used to fork all the other necessary
// child processes. However, then we'd need to invent a protocol that can handle
// sending stdout/stderr back and forking multiple times, which isn't trivial,
// either.
//
// Yet another fix would be not to use multiple threads before forking. However,
// we need to use gRPC to figure out if a server process is already present and
// gRPC currently cannot be convinced not to spawn any threads.
//
// Another way would be to use posix_spawn(). However, since we need to create a
// daemon process, we'd need to posix_spawn() a little child process that
// daemonizes then exec()s the actual JVM, which is also non-trivial. So I hope
// this will be good enough because for all its flaws, this solution is at least
// localized here.
int ExecuteDaemon(const string& exe,
const std::vector<string>& args_vector,
const std::map<string, EnvVarValue>& env,
const string& daemon_output,
const bool daemon_output_append,
const string& binaries_dir,
const string& server_dir,
BlazeServerStartup** server_startup) {
const string pid_file = blaze_util::JoinPath(server_dir, kServerPidFile);
const string daemonize = blaze_util::JoinPath(binaries_dir, "daemonize");

std::vector<string> daemonize_args =
{"daemonize", "-l", daemon_output, "-p", pid_file};
if (daemon_output_append) {
daemonize_args.push_back("-a");
}
daemonize_args.push_back("--");
daemonize_args.push_back(exe);
std::copy(args_vector.begin(), args_vector.end(),
std::back_inserter(daemonize_args));

int fds[2];

if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) {
BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
<< "socket creation failed: " << GetLastErrorString();
}

const char* daemon_output_chars = daemon_output.c_str();
CharPP argv(args_vector);
const char* exe_chars = exe.c_str();
posix_spawn_file_actions_t file_actions;
if (posix_spawn_file_actions_init(&file_actions) == -1) {
BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
<< "Failed to create posix_spawn_file_actions: " << GetLastErrorString();
}
if (posix_spawn_file_actions_addclose(&file_actions, fds[0]) == -1) {
BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
<< "Failed to modify posix_spawn_file_actions: "<< GetLastErrorString();
}

int child = fork();
if (child == -1) {
pid_t transient_pid;
if (posix_spawn(&transient_pid, daemonize.c_str(), &file_actions, NULL,
CharPP(daemonize_args).get(), CharPP(env).get()) == -1) {
BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
<< "fork() failed: " << GetLastErrorString();
return -1;
} else if (child > 0) {
// Parent process (i.e. the client)
close(fds[1]); // parent keeps one side...
int unused_status;
waitpid(child, &unused_status, 0); // child double-forks
pid_t server_pid = 0;
ReadFromFdWithRetryEintr(fds[0], &server_pid, sizeof server_pid,
"cannot read server PID from server");
string pid_file = blaze_util::JoinPath(server_dir, kServerPidFile);
if (!blaze_util::WriteFile(ToString(server_pid), pid_file)) {
BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
<< "cannot write PID file: " << GetLastErrorString();
return -1;
}
<< "Failed to execute JVM via " << daemonize
<< ": " << GetLastErrorString();
}
close(fds[1]);

WriteSystemSpecificProcessIdentifier(server_dir, server_pid);
char dummy = 'a';
WriteToFdWithRetryEintr(fds[0], &dummy, 1,
"cannot notify server about having written PID file");
*server_startup = new SocketBlazeServerStartup(fds[0]);
return server_pid;
} else {
// Child process (i.e. the server)
// NB: There should only be system calls in this branch. See the comment
// before ExecuteDaemon() to understand why.
close(fds[0]); // ...child keeps the other.

{
WithEnvVars env_obj(env);
Daemonize(daemon_output_chars, daemon_output_append);
pid_t server_pid = getpid();
WriteToFdWithRetryEintr(fds[1], &server_pid, sizeof server_pid,
"cannot communicate server PID to client");
// We wait until the client writes the PID file so that there is no race
// condition; the server expects the PID file to already be there so that
// it can read it and know its own PID (see the ctor GrpcServerImpl) and
// so that it can kill itself if the PID file is deleted (see
// GrpcServerImpl.PidFileWatcherThread)
char dummy;
ReadFromFdWithRetryEintr(
fds[1], &dummy, 1,
"cannot get PID file write acknowledgement from client");

execv(exe_chars, argv.get());
DieAfterFork("Cannot execute daemon");
return -1;
}
posix_spawn_file_actions_destroy(&file_actions);

// Wait for daemonize to exit. This guarantees that the pid file exists.
int status;
if (waitpid(transient_pid, &status, 0) == -1) {
BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
<< "waitpid failed: " << GetLastErrorString();
}
if (!WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS) {
BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
<< "daemonize didn't exit cleanly: " << GetLastErrorString();
}

std::ifstream pid_reader(pid_file);
if (!pid_reader) {
BAZEL_DIE(blaze_exit_code::INTERNAL_ERROR)
<< "Failed to open " << pid_file << ": " << GetLastErrorString();
}
pid_t server_pid;
pid_reader >> server_pid;

WriteSystemSpecificProcessIdentifier(server_dir, server_pid);

*server_startup = new SocketBlazeServerStartup(fds[0]);
return server_pid;
}

string GetHashedBaseDir(const string& root, const string& hashable) {
@@ -648,6 +648,7 @@ int ExecuteDaemon(const string& exe,
const std::map<string, EnvVarValue>& env,
const string& daemon_output,
const bool daemon_out_append,
const string& binaries_dir,
const string& server_dir,
BlazeServerStartup** server_startup) {
wstring wdaemon_output;
@@ -1,5 +1,10 @@
package(default_visibility = ["//src:__subpackages__"])

cc_binary(
name = "daemonize",
srcs = ["daemonize.c"],
)

cc_library(
name = "logging",
srcs = ["logging.cc"],
Oops, something went wrong.

0 comments on commit 18a0e23

Please sign in to comment.
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.