Skip to content

Commit

Permalink
log: use non-blocking atomic writes to stderr fifos
Browse files Browse the repository at this point in the history
Buggy container runtimes are known to do partial reads on container's
stderr and emit torn logs [1]. This is due to write(2) allowing partial
data transfer. One way to avoid this when stderr is a FIFO (which is the
case for container runtimes) is to changed the file flags to set
O_NONBLOCK which alters the behavior of write(2) when the write size is
less than PIPE_BUF. This makes a write guaranteed to be atomic.

The small complexity added is to poll on stderr in this case whenever a
write(2) fails with EAGAIN. This does not block the log flush thread in
any new way. It would always block when the stderr FIFO was full.

[1] containers/conmon#242

Fixes: cef04bc
Fixes: https://tracker.ceph.com/issues/57923
Related: https://bugzilla.redhat.com/show_bug.cgi?id=1996599
Related: https://bugzilla.redhat.com/show_bug.cgi?id=2108228
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
  • Loading branch information
batrick committed Oct 26, 2022
1 parent fbdb2cf commit cacd74a
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 1 deletion.
93 changes: 92 additions & 1 deletion src/log/Log.cc
Expand Up @@ -18,10 +18,13 @@
#include "LogClock.h"
#include "SubsystemMap.h"

#include <boost/container/vector.hpp>

#include <errno.h>
#include <fcntl.h>
#include <syslog.h>

#include <algorithm>
#include <iostream>
#include <set>

Expand Down Expand Up @@ -49,6 +52,7 @@ Log::Log(const SubsystemMap *s)
m_recent(DEFAULT_MAX_RECENT)
{
m_log_buf.reserve(MAX_LOG_BUF);
_configure_stderr();
}

Log::~Log()
Expand All @@ -64,6 +68,36 @@ Log::~Log()
}
}

void Log::_configure_stderr()
{
struct stat info;
if (int rc = fstat(STDERR_FILENO, &info); rc == -1) {
std::cerr << "failed to stat stderr: " << cpp_strerror(errno) << std::endl;
return;
}

if (S_ISFIFO(info.st_mode)) {
/* Set O_NONBLOCK on FIFO stderr file. We want to ensure atomic debug log
* writes so they do not get partially read by e.g. buggy container
* runtimes. See also IEEE Std 1003.1-2017 and Log::_log_stderr below.
*/
int flags = fcntl(STDERR_FILENO, F_GETFL);
if (flags == -1) {
std::cerr << "failed to get fcntl flags for stderr: " << cpp_strerror(errno) << std::endl;
return;
}
if (!(flags & O_NONBLOCK)) {
flags |= O_NONBLOCK;
flags = fcntl(STDERR_FILENO, F_SETFL, flags);
if (flags == -1) {
std::cerr << "failed to set fcntl flags for stderr: " << cpp_strerror(errno) << std::endl;
return;
}
}
do_stderr_poll = true;
}
}


///
void Log::set_coarse_timestamps(bool coarse) {
Expand Down Expand Up @@ -260,6 +294,63 @@ void Log::_log_safe_write(std::string_view sv)
}
}

void Log::_log_stderr(std::string_view strv)
{
if (do_stderr_poll) {
auto& prefix = m_log_stderr_prefix;
size_t const len = prefix.size() + strv.size();
boost::container::small_vector<char, PIPE_BUF> buf;
buf.resize(len+1, '\0');
memcpy(buf.data(), prefix.c_str(), prefix.size());
memcpy(buf.data()+prefix.size(), strv.data(), strv.size());

char const* const start = buf.data();
char const* current = start;
while ((size_t)(current-start) < len) {
auto chunk = std::min<ssize_t>(PIPE_BUF, len-(ssize_t)(current-start));
while (1) {
ssize_t rc = write(STDERR_FILENO, current, chunk);
if (rc == chunk) {
current += chunk;
break;
} else if (rc > 0) {
/* According to IEEE Std 1003.1-2017, this cannot happen:
*
* Write requests to a pipe or FIFO shall be handled in the same way as a regular file with the following exceptions:
* ...
* If the O_NONBLOCK flag is set ...
* ...
* A write request for {PIPE_BUF} or fewer bytes shall have the
* following effect: if there is sufficient space available in
* the pipe, write() shall transfer all the data and return the
* number of bytes requested. Otherwise, write() shall transfer
* no data and return -1 with errno set to [EAGAIN].
*
* In any case, handle misbehavior gracefully by incrementing current.
*/
current += rc;
break;
} else if (rc == -1) {
if (errno == EAGAIN) {
struct pollfd pfd[1];
pfd[0].fd = STDERR_FILENO;
pfd[0].events = POLLOUT;
poll(pfd, 1, -1);
/* ignore errors / success, just retry the write */
} else if (errno == EINTR) {
continue;
} else {
/* some other kind of error, no point logging if stderr writes fail */
return;
}
}
}
}
} else {
fmt::print(std::cerr, "{}{}", m_log_stderr_prefix, strv);
}
}

void Log::_flush_logbuf()
{
if (m_log_buf.size()) {
Expand Down Expand Up @@ -319,7 +410,7 @@ void Log::_flush(EntryVector& t, bool crash)
pos[used++] = '\n';

if (do_stderr) {
fmt::print(std::cerr, "{}{}", m_log_stderr_prefix, std::string_view(pos, used));
_log_stderr(std::string_view(pos, used));
}

if (do_fd) {
Expand Down
4 changes: 4 additions & 0 deletions src/log/Log.h
Expand Up @@ -116,6 +116,7 @@ class Log : private Thread
int m_journald_log = -3, m_journald_crash = -3;

std::string m_log_stderr_prefix;
bool do_stderr_poll = false;

std::shared_ptr<Graylog> m_graylog;
std::unique_ptr<JournaldLogger> m_journald;
Expand All @@ -133,6 +134,9 @@ class Log : private Thread
void _log_safe_write(std::string_view sv);
void _flush_logbuf();
void _log_message(std::string_view s, bool crash);
void _configure_stderr();
void _log_stderr(std::string_view strv);



};
Expand Down

0 comments on commit cacd74a

Please sign in to comment.