Skip to content

Commit

Permalink
Merge pull request #4795 from grondo/issue#4771
Browse files Browse the repository at this point in the history
broker: improve detection of interactive shells
  • Loading branch information
mergify[bot] committed Dec 5, 2022
2 parents 82d9c86 + 59f45b9 commit ec885f6
Show file tree
Hide file tree
Showing 12 changed files with 132 additions and 10 deletions.
30 changes: 28 additions & 2 deletions src/broker/broker.c
Expand Up @@ -661,15 +661,41 @@ static void set_proctitle (uint32_t rank)
(void)prctl (PR_SET_NAME, proctitle, 0, 0, 0);
}

static bool is_interactive_shell (const char *argz, size_t argz_len)
{
bool result = false;
/* If no command is specified, then an interactive shell will be run
*/
if (argz == NULL)
return true;

/* O/w, if command is plain "$SHELL", e.g. bash, zsh, csh, etc.
* then assume interactive shell.
*/
if (argz_count (argz, argz_len) == 1) {
char *shell;
char *cmd = argz_next (argz, argz_len, NULL);
while ((shell = getusershell ())) {
if (strcmp (cmd, shell) == 0
|| strcmp (cmd, basename (shell)) == 0) {
result = true;
break;
}
}
endusershell ();
}
return result;
}

static int create_runat_rc2 (struct runat *r, const char *argz, size_t argz_len)
{
if (argz == NULL) { // run interactive shell
if (is_interactive_shell (argz, argz_len)) { // run interactive shell
/* Check if stdin is a tty and error out if not to avoid
* confusing users with what appears to be a hang.
*/
if (!isatty (STDIN_FILENO))
log_msg_exit ("stdin is not a tty - can't run interactive shell");
if (runat_push_shell (r, "rc2", 0) < 0)
if (runat_push_shell (r, "rc2", argz, 0) < 0)
return -1;
}
else if (argz_count (argz, argz_len) == 1) { // run shell -c "command"
Expand Down
28 changes: 24 additions & 4 deletions src/broker/runat.c
Expand Up @@ -19,6 +19,7 @@
#if HAVE_CONFIG_H
#include "config.h"
#endif
#include <unistd.h>
#include <signal.h>
#include <assert.h>
#include <argz.h>
Expand Down Expand Up @@ -200,6 +201,19 @@ static void state_change_cb (flux_subprocess_t *p,
case FLUX_SUBPROCESS_EXITED:
case FLUX_SUBPROCESS_FAILED:
break;
case FLUX_SUBPROCESS_STOPPED:
/* If this process is non-interactive, and stdin was a tty,
* then likely the subprocess was stopped due to SIGTTIN/TTOU.
* To avoid what would be a permanent hang, log an error and
* kill the child process.
*/
if (!entry->interactive && isatty (STDIN_FILENO)) {
flux_log (r->h, LOG_ERR,
"%s: Killing stopped non-interactive process",
entry->name);
flux_subprocess_kill (p, SIGKILL);
}
break;
case FLUX_SUBPROCESS_RUNNING:
if (entry->aborted) {
if (!(f = flux_subprocess_kill (p, abort_signal)))
Expand Down Expand Up @@ -350,9 +364,12 @@ static int runat_command_set_argz (struct runat_command *cmd,
}

static int runat_command_set_cmdline (struct runat_command *cmd,
const char *shell,
const char *cmdline)
{
if (flux_cmd_argv_append (cmd->cmd, get_shell ()) < 0)
if (shell == NULL)
shell = get_shell ();
if (flux_cmd_argv_append (cmd->cmd, shell) < 0)
return -1;
if (cmdline) {
if (flux_cmd_argv_append (cmd->cmd, "-c") < 0)
Expand Down Expand Up @@ -454,7 +471,7 @@ int runat_push_shell_command (struct runat *r,
*/
cmd->flags |= FLUX_SUBPROCESS_FLAGS_SETPGRP;

if (runat_command_set_cmdline (cmd, cmdline) < 0)
if (runat_command_set_cmdline (cmd, NULL, cmdline) < 0)
goto error;
if (runat_command_modenv (cmd, env_blocklist, r->local_uri) < 0)
goto error;
Expand All @@ -466,7 +483,10 @@ int runat_push_shell_command (struct runat *r,
return -1;
}

int runat_push_shell (struct runat *r, const char *name, int flags)
int runat_push_shell (struct runat *r,
const char *name,
const char *shell,
int flags)
{
struct runat_command *cmd;

Expand All @@ -476,7 +496,7 @@ int runat_push_shell (struct runat *r, const char *name, int flags)
}
if (!(cmd = runat_command_create (environ, flags)))
return -1;
if (runat_command_set_cmdline (cmd, NULL) < 0)
if (runat_command_set_cmdline (cmd, shell, NULL) < 0)
goto error;
if (runat_command_modenv (cmd, env_blocklist, r->local_uri) < 0)
goto error;
Expand Down
5 changes: 4 additions & 1 deletion src/broker/runat.h
Expand Up @@ -40,7 +40,10 @@ int runat_push_shell_command (struct runat *r,
/* Push interactive shell onto named list.
* Note: RUNAT_FLAG_LOG_STDIO flag not allowed
*/
int runat_push_shell (struct runat *r, const char *name, int flags);
int runat_push_shell (struct runat *r,
const char *name,
const char *shell,
int flags);

/* Push command, to be run directly, onto named list.
* The command is specified by argz.
Expand Down
7 changes: 4 additions & 3 deletions src/broker/test/runat.c
Expand Up @@ -313,13 +313,14 @@ void badinput (flux_t *h)
"runat_get_exit_code rc=NULL fails with ENOENT");

errno = 0;
ok (runat_push_shell (NULL, "foo", 0) < 0 && errno == EINVAL,
ok (runat_push_shell (NULL, "foo", NULL, 0) < 0 && errno == EINVAL,
"runat_push_shell r=NULL fails with EINVAL");
errno = 0;
ok (runat_push_shell (r, NULL, 0) < 0 && errno == EINVAL,
ok (runat_push_shell (r, NULL, NULL, 0) < 0 && errno == EINVAL,
"runat_push_shell name=NULL fails with EINVAL");
errno = 0;
ok (runat_push_shell (r, "foo", RUNAT_FLAG_LOG_STDIO) < 0 && errno == EINVAL,
ok (runat_push_shell (r, "foo", NULL, RUNAT_FLAG_LOG_STDIO) < 0
&& errno == EINVAL,
"runat_push_shell flags=RUNAT_FLAG_LOG_STDIO fails with EINVAL");

errno = 0;
Expand Down
3 changes: 3 additions & 0 deletions src/cmd/flux-start.c
Expand Up @@ -406,6 +406,9 @@ static void state_cb (flux_subprocess_t *p, flux_subprocess_state_t state)
}
break;
}
case FLUX_SUBPROCESS_STOPPED:
/* ignore */
break;
}
}

Expand Down
5 changes: 5 additions & 0 deletions src/common/libsubprocess/local.c
Expand Up @@ -375,6 +375,11 @@ static void child_watch_cb (flux_reactor_t *r, flux_watcher_t *w,

p->status = status;

if (WIFSTOPPED (p->status)) {
if (p->ops.on_state_change)
(*p->ops.on_state_change) (p, FLUX_SUBPROCESS_STOPPED);
}

if (WIFEXITED (p->status) || WIFSIGNALED (p->status)) {

/* remote/server code may have set EXEC_FAILED or
Expand Down
3 changes: 3 additions & 0 deletions src/common/libsubprocess/subprocess.c
Expand Up @@ -464,6 +464,7 @@ static flux_subprocess_state_t state_change_next (flux_subprocess_t *p)
case FLUX_SUBPROCESS_EXEC_FAILED:
case FLUX_SUBPROCESS_EXITED:
case FLUX_SUBPROCESS_FAILED:
case FLUX_SUBPROCESS_STOPPED:
break;
}

Expand Down Expand Up @@ -1226,6 +1227,8 @@ const char *flux_subprocess_state_string (flux_subprocess_state_t state)
return "Exited";
case FLUX_SUBPROCESS_FAILED:
return "Failed";
case FLUX_SUBPROCESS_STOPPED:
return "Stopped";
}
return NULL;
}
Expand Down
1 change: 1 addition & 0 deletions src/common/libsubprocess/subprocess.h
Expand Up @@ -56,6 +56,7 @@ typedef enum {
FLUX_SUBPROCESS_EXITED, /* process has exited */
FLUX_SUBPROCESS_FAILED, /* internal failure, catch all for
* all other errors */
FLUX_SUBPROCESS_STOPPED, /* process was stopped */
} flux_subprocess_state_t;

/*
Expand Down
48 changes: 48 additions & 0 deletions src/common/libsubprocess/test/subprocess.c
Expand Up @@ -41,6 +41,7 @@ int child_pid;
int stdout_eof_cb_count;
int stderr_eof_cb_count;
int state_change_cb_count;
int stopped_cb_count;
int channel_fd_env_cb_count;
int channel_in_cb_count;
int channel_in_and_out_cb_count;
Expand Down Expand Up @@ -1459,6 +1460,49 @@ void test_state_change (flux_reactor_t *r)
flux_cmd_destroy (cmd);
}

void state_change_stopped_cb (flux_subprocess_t *p,
flux_subprocess_state_t state)
{
diag ("state_change_stopped: state = %s",
flux_subprocess_state_string (state));
if (state == FLUX_SUBPROCESS_STOPPED) {
flux_future_t *f = flux_subprocess_kill (p, SIGKILL);
ok (true, "subprocess state == STOPPED in state change handler");
flux_future_destroy (f);
stopped_cb_count++;
}
}

void test_state_change_stopped (flux_reactor_t *r)
{
char *av[] = { "/bin/sleep", "30", NULL };
flux_cmd_t *cmd;
flux_subprocess_t *p = NULL;

ok ((cmd = flux_cmd_create (2, av, NULL)) != NULL, "flux_cmd_create");

flux_subprocess_ops_t ops = {
.on_state_change = state_change_stopped_cb
};
stopped_cb_count = 0;
p = flux_local_exec (r, 0, cmd, &ops, NULL);
ok (p != NULL, "flux_local_exec");

ok (flux_subprocess_state (p) == FLUX_SUBPROCESS_RUNNING,
"subprocess state == RUNNING after flux_local_exec");

flux_future_t *f = flux_subprocess_kill (p, SIGSTOP);
ok (f != NULL,
"flux_subprocess_kill SIGStOP");
flux_future_destroy (f);

int rc = flux_reactor_run (r, 0);
ok (rc == 0, "flux_reactor_run returned zero status");
ok (stopped_cb_count == 1, "subprocess was stopped");
flux_subprocess_destroy (p);
flux_cmd_destroy (cmd);
}

void test_state_strings (void)
{
ok (!strcasecmp (flux_subprocess_state_string (FLUX_SUBPROCESS_INIT), "Init"),
Expand All @@ -1471,6 +1515,8 @@ void test_state_strings (void)
"flux_subprocess_state_string returns correct string");
ok (!flux_subprocess_state_string (100),
"flux_subprocess_state_string returns NULL on bad state");
is (flux_subprocess_state_string (FLUX_SUBPROCESS_STOPPED),
"Stopped");
}

void test_exec_fail (flux_reactor_t *r)
Expand Down Expand Up @@ -2737,6 +2783,8 @@ int main (int argc, char *argv[])
test_kill_eofs (r);
diag ("state_change");
test_state_change (r);
diag ("state_change_stopped");
test_state_change_stopped (r);
diag ("state_strings");
test_state_strings ();
diag ("exec_fail");
Expand Down
1 change: 1 addition & 0 deletions src/modules/job-ingest/worker.c
Expand Up @@ -119,6 +119,7 @@ static void worker_state_cb (flux_subprocess_t *p,
break;
case FLUX_SUBPROCESS_EXITED:
case FLUX_SUBPROCESS_INIT:
case FLUX_SUBPROCESS_STOPPED:
break; // ignore
}
}
Expand Down
1 change: 1 addition & 0 deletions t/Makefile.am
Expand Up @@ -318,6 +318,7 @@ dist_check_SCRIPTS = \
issues/t4583-free-range-test.sh \
issues/t4612-eventlog-overwrite-crash.sh \
issues/t4711-job-list-purge-inactive.sh \
issues/t4771-flux-start-bash.sh \
python/__init__.py \
python/subflux.py \
python/tap \
Expand Down
10 changes: 10 additions & 0 deletions t/issues/t4771-flux-start-bash.sh
@@ -0,0 +1,10 @@
#!/bin/sh -x
# flux start of interactive process non-interactively causes error

# create link to shell to thwart broker detection of interactive shell:
ln -s /bin/bash bash

${SHARNESS_TEST_SRCDIR}/scripts/runpty.py flux start ./bash

# Ensure broker killed bash with SIGKILL:
test $? -eq 137

0 comments on commit ec885f6

Please sign in to comment.