Skip to content

Commit

Permalink
Merge pull request #5772 from grondo/perilog-log-ignore
Browse files Browse the repository at this point in the history
job-manager: support suppression of prolog/epilog output with `perilog.log-ignore` pattern list
  • Loading branch information
mergify[bot] committed Mar 6, 2024
2 parents 1d6dbed + d748966 commit 58a5587
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 5 deletions.
10 changes: 10 additions & 0 deletions doc/guide/admin.rst
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,16 @@ components need to be configured, which is explained in the steps below.
"-e", "/usr/libexec/flux/flux-imp,run,epilog"
]
4. (optional) If log messages from the prolog or epilog are filling
up the broker logs, a list of ignore patterns may be added via
the ``[job-manager.perilog]`` ``log-ignore`` array. Each entry
in the array should be a :linux:man7:`regex`. POSIX extended
regular expression syntax is supported, e.g.:

.. code-block:: toml
[job-manager]
perilog.log-ignore = [ ".*Xauth.*", "^foo:.*debug" ]

Note that the ``flux perilog-run`` command will additionally execute any
scripts in ``/etc/flux/system/{prolog,epilog}.d`` on rank 0 by default as
part of the job-manager prolog/epilog. Only place scripts here if there is
Expand Down
1 change: 1 addition & 0 deletions doc/test/spell.en.pws
Original file line number Diff line number Diff line change
Expand Up @@ -858,3 +858,4 @@ uncomment
unsatisfiable
validators
hostpids
Xauth
134 changes: 129 additions & 5 deletions src/modules/job-manager/plugins/perilog.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <regex.h>
#include "src/common/libmissing/macros.h"
#define EXIT_CODE(x) __W_EXITCODE(x,0)

Expand All @@ -55,6 +56,7 @@
#include "src/common/libjob/job_hash.h"
#include "src/common/libjob/idf58.h"
#include "src/common/libczmqcontainers/czmq_containers.h"
#include "src/common/libutil/errprintf.h"
#include "ccan/str/str.h"

extern char **environ;
Expand All @@ -66,6 +68,7 @@ static struct perilog_conf {
flux_cmd_t *prolog_cmd; /* Configured prolog command */
flux_cmd_t *epilog_cmd; /* Configured epilog command */
zhashx_t *processes; /* List of outstanding perilog_proc objects */
zlistx_t *log_ignore; /* List of regex patterns to ignore in logs */
} perilog_config;


Expand Down Expand Up @@ -251,6 +254,19 @@ static void state_cb (flux_subprocess_t *sp, flux_subprocess_state_t state)
}
}

static bool perilog_log_ignore (struct perilog_conf *conf, const char *s)
{
if (conf->log_ignore) {
const regex_t *reg = zlistx_first (conf->log_ignore);
while (reg) {
if (regexec (reg, s, 0, NULL, 0) == 0)
return true;
reg = zlistx_next (conf->log_ignore);
}
}
return false;
}

static void io_cb (flux_subprocess_t *sp, const char *stream)
{
struct perilog_proc *proc = flux_subprocess_aux_get (sp, "perilog_proc");
Expand All @@ -265,7 +281,7 @@ static void io_cb (flux_subprocess_t *sp, const char *stream)
stream);
return;
}
if (len) {
if (len && !perilog_log_ignore (&perilog_config, s)) {
int level = LOG_INFO;
if (streq (stream, "stderr"))
level = LOG_ERR;
Expand Down Expand Up @@ -531,13 +547,96 @@ static flux_cmd_t *cmd_from_json (json_t *o)
return NULL;
}

static regex_t *regexp_create (const char *pattern)
{
regex_t *reg = calloc (1, sizeof (*reg));
if (!reg)
return NULL;
if (regcomp (reg, pattern, REG_EXTENDED | REG_NOSUB) != 0) {
free (reg);
return NULL;
}
return reg;
}

static void regexp_destroy (regex_t *reg)
{
if (reg) {
int saved_errno = errno;
regfree (reg);
free (reg);
errno = saved_errno;
}
}

static void regexp_free (void **item)
{
if (item) {
regex_t *reg = *item;
regexp_destroy (reg);
reg = NULL;
}
}

static zlistx_t *regexp_list_create ()
{
zlistx_t *l = NULL;
if (!(l = zlistx_new ()))
return NULL;
zlistx_set_destructor (l, regexp_free);
return l;
}

static int regexp_list_append (zlistx_t *l,
const char *pattern,
flux_error_t *errp)
{
regex_t *reg = NULL;
if (!(reg = regexp_create (pattern))) {
errprintf (errp, "Failed to compile regex: %s", pattern);
return -1;
}
if (!zlistx_add_end (l, reg)) {
regexp_destroy (reg);
errprintf (errp, "Out of memory adding regex pattern");
return -1;
}
return 0;
}

static int regexp_list_append_array (zlistx_t *l,
json_t *array,
flux_error_t *errp)
{
size_t index;
json_t *entry;

if (!json_is_array (array)) {
errprintf (errp, "not an array");
return -1;
}

json_array_foreach (array, index, entry) {
const char *pattern = json_string_value (entry);
if (pattern == NULL) {
errprintf (errp, "all entries must be a string value");
return -1;
}
if (regexp_list_append (l, pattern, errp) < 0)
return -1;
}
return 0;
}

/* Parse [job-manager.prolog] and [job-manager.epilog] config
*/
static int conf_init (flux_t *h, struct perilog_conf *conf)
static int conf_init (flux_plugin_t *p, struct perilog_conf *conf)
{
flux_t *h = flux_jobtap_get_flux (p);
flux_error_t error;
json_t *prolog = NULL;
json_t *epilog = NULL;
json_t *log_ignore = NULL;

memset (conf, 0, sizeof (*conf));
conf->prolog_kill_timeout = 5.;
Expand All @@ -546,15 +645,30 @@ static int conf_init (flux_t *h, struct perilog_conf *conf)
zhashx_set_destructor (conf->processes,
perilog_proc_destructor);

/* Set up log ignore pattern list
*/
if (!(conf->log_ignore = regexp_list_create ()))
return -1;
/* Always ignore empty lines
*/
if (regexp_list_append (conf->log_ignore, "^\\s*$", &error) < 0) {
flux_log (h,
LOG_ERR,
"perilog: failed to pass empty pattern to log-ignore: %s",
error.text);
return -1;
}
if (flux_conf_unpack (flux_get_conf (h),
&error,
"{s?{s?{s?o s?F !} s?{s?o !}}}",
"{s?{s?{s?o s?F !} s?{s?o !} s?{s?o}}}",
"job-manager",
"prolog",
"command", &prolog,
"kill-timeout", &conf->prolog_kill_timeout,
"epilog",
"command", &epilog) < 0) {
"command", &epilog,
"perilog",
"log-ignore", &log_ignore) < 0) {
flux_log (h, LOG_ERR,
"prolog/epilog configuration error: %s",
error.text);
Expand All @@ -570,6 +684,15 @@ static int conf_init (flux_t *h, struct perilog_conf *conf)
flux_log (h, LOG_ERR, "[job-manager.epilog] command malformed!");
return -1;
}
if (log_ignore
&& regexp_list_append_array (conf->log_ignore,
log_ignore,
&error) < 0) {
flux_log (h,
LOG_ERR,
"perilog: error parsing conf.log_ignore: %s", error.text);
return -1;
}
return 0;
}

Expand All @@ -578,6 +701,7 @@ static void free_config (struct perilog_conf *conf)
flux_cmd_destroy (conf->prolog_cmd);
flux_cmd_destroy (conf->epilog_cmd);
zhashx_destroy (&conf->processes);
zlistx_destroy (&conf->log_ignore);
}

static const struct flux_plugin_handler tab[] = {
Expand All @@ -589,7 +713,7 @@ static const struct flux_plugin_handler tab[] = {

int flux_plugin_init (flux_plugin_t *p)
{
if (conf_init (flux_jobtap_get_flux (p), &perilog_config) < 0
if (conf_init (p, &perilog_config) < 0
|| flux_plugin_aux_set (p,
NULL,
&perilog_config,
Expand Down
36 changes: 36 additions & 0 deletions t/t2274-manager-perilog.t
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,42 @@ test_expect_success 'perilog: canceled prolog does not drain ranks' '
flux resource drain &&
test "$(drained_ranks)" = ""
'
test_expect_success 'perilog: log-ignore works' '
cat <<-EOF >config/perilog.toml &&
[job-manager.prolog]
command = [ "printf", "foo: whee!\nbar: woo!\nbaz: important!\n" ]
[job-manager.perilog]
log-ignore = [ "^foo:.*", "^bar:" ]
EOF
flux config reload &&
flux jobtap load --remove=*.so perilog.so &&
flux dmesg -c >/dev/null &&
flux run hostname &&
flux dmesg -H > dmesg.out &&
test_debug "cat dmesg.out" &&
test_must_fail grep foo: dmesg.out &&
test_must_fail grep bar: dmesg.out &&
grep baz: dmesg.out
'
test_expect_success 'perilog: bad log-ignore entry is caught' '
cat <<-EOF >config/perilog.toml &&
[job-manager.perilog]
log-ignore = "foo"
EOF
flux config reload &&
test_must_fail flux jobtap load --remove=*.so perilog.so &&
flux dmesg -Hc | grep "not an array"
'
test_expect_success 'perilog: bad log-ignore regexp is caught' '
cat <<-EOF >config/perilog.toml &&
[job-manager.perilog]
log-ignore = [ "[" ]
EOF
flux config reload &&
test_must_fail flux jobtap load perilog.so &&
flux dmesg -Hc | grep "[fF]ailed to compile"
'

# Note: run this job before taking rank 3 offline below
test_expect_success 'perilog: run job across all 4 ranks' '
jobid=$(flux submit --wait-event=clean -N4 -n4 true)
Expand Down

0 comments on commit 58a5587

Please sign in to comment.