Skip to content

Commit

Permalink
Fix foreground process group on restart
Browse files Browse the repository at this point in the history
     * Sometimes 'dmtcp_launch' is called from inside a shell script
       (e.g., for a batch job).  In this case, on restart, the foreground
       process group leader is the restarted process, and yet the
       process group leader is the parent (presumably the login shell).
       So, the foreground process group leader is not the leader of a
       process group.  To fix this, we set the process group leader to
       also be the leader of the foreground process group.
  • Loading branch information
gc00 committed Apr 3, 2021
1 parent cd0c8ee commit c9fa28a
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 13 deletions.
46 changes: 43 additions & 3 deletions src/terminal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#endif // ifdef HAS_PR_SET_PTRACER
#include "../jalib/jassert.h"
#include "config.h"
#include "syscallwrappers.h"
#include "dmtcp.h"

/*************************************************************************
Expand Down Expand Up @@ -60,18 +61,57 @@ safe_tcsetattr(int fd, int optional_actions, const struct termios *termios_p)
return 0;
}

// FIXME: Handle Virtual Pids
static int
get_parent_pid(int pid)
{
char buf[8192];
// /proc/PID/stat is: PID (COMMAND) STATE PPID ...
snprintf(buf, sizeof(buf), "/proc/%d/stat", pid);
FILE* fp = fopen(buf, "r");
JASSERT(fp != NULL)(pid)(JASSERT_ERRNO).Text("fopen");
int rc = -1;
if (fp) {
size_t size = fread(buf, 1, sizeof(buf), fp);
if (size > 0 && strstr(buf, ") ")) {
rc = strtol(strstr(strstr(buf, ") ")+2, " "), NULL, 10);
}
}
JASSERT(rc != -1)(pid).Text("parent pid not found in /proc/*/stat");
return rc;
}

// See: info libc -> Job Control -> Implementing a Shell ->
// Foreground and Background
static void
restore_term_settings()
{
if (saved_termios_exists) {
/* First check if we are in foreground. If not, skip this and print
* warning. If we try to call tcsetattr in background, we will hang up.
* In some shells, the shell itself will be a pgrp that includes is
* first child process, instead of the first child forming a pgrp.
*/
int foreground = (tcgetpgrp(STDIN_FILENO) == getpgrp());
int foreground = (_real_tcgetpgrp(STDIN_FILENO) == _libc_getpgrp() ||
get_parent_pid(_real_tcgetpgrp(STDIN_FILENO)) ==
_libc_getpgrp());
JTRACE("restore terminal attributes, check foreground status first")
(foreground);
(foreground);
if (foreground) {
if (get_parent_pid(_real_tcgetpgrp(STDIN_FILENO)) == _libc_getpgrp()) {
/* At ckpt time, maybe a shell script called dmtcp_launch.
* Now, at restart, we are running directly under the login shell,
* and we are foreground process group leader, not the login shell.
* Normally, the foreground process group leader is also
* a process group leader. But this broke, during restart.
* So, we will set a new foregroup process group leader to fix this.
*/
tcsetpgrp(0, getpgrp()); // Set login shell (not under DMTCP control)
// to be the foreground process, instead of
// this process.
JTRACE("The process group ID of this process is the parent of\n"
" the process group ID for the foreground process group.\n"
"Make current process group the new foreground process group.");
}
if ((!isatty(STDIN_FILENO)
|| safe_tcsetattr(STDIN_FILENO, TCSANOW, &saved_termios) == -1)) {
JWARNING(false).Text("failed to restore terminal");
Expand Down
15 changes: 6 additions & 9 deletions test/misc/README
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,11 @@ Some of these special cases could be combined into autotest.py, and

Examples of special cases are:
* All CPU architectures: x86, x86_64, arm, aarch64
* The two major branches of distros: Debian/Ubuntu and
RedHat/Fedora/CoreOS
* The three major branches of distros: Debian/Ubuntu, and
RedHat/Fedora/CoreOS, OpenSUSE/SUSE
* 32-bit Linux: native, --enable-m32, and multi-arch (see multi-arch.sh)
* Builds of DMTCP with other compilers: icc LLVM/clang
* musl libc (and Alpine Linux) (?)
* 'make install' for native 64-bit and each of the 32-bit Linux combinations
* for file in test/plugin/*; do (cd $file && make check); done
* dmtcp_launch invoked in extra shell: #!/bin/sh / dmtcp_launth test/dmtcp1
[ and test restart: foreground issue ]
* forked checkpointing
* fast restart
* --no-gzip
Expand All @@ -30,9 +26,10 @@ Examples of special cases are:
* Network: InfiniBand (CM and UD), TCP
+ Shared filesystems: NFS, Lustre
* openmp and Intel TBB
* MANA for MPI and CRAC for CUDA
* SOON: OpenGL (?)
* MIC
* SOON: OpenGL
* Widely used commercial software: Matlab, ...
* Non-default plugins:
modify-env, path-virt
--ptrace, modify-env
test/plugin/{applic-inititated-ckdpt,applic-delayed-ckpt}
* Builds of DMTCP with other compilers: icc LLVM/Clang
3 changes: 2 additions & 1 deletion util/gdb-dmtcp-utils
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class ShowFilenameAtAddress(gdb.Command):
if getpid() == 0:
gdb.execute('print "Process not yet started"', False, False)
else:
memory_region = "%s (r-x): 0x%x-0x%x" % \
memory_region = "%s (r[-w]x): 0x%x-0x%x" % \
memory_region_at_address(memory_address)
gdb.execute('print "' + memory_region + '"', False, False)
# This will add the new gdb command: show-filename-at-address MEMORY_ADDRESS
Expand Down Expand Up @@ -352,6 +352,7 @@ def procmap_filename_address(line):
return (triple[-1],) + \
tuple([int("0x"+elt, 16) for elt in triple[0].split("-")])
def is_text_segment(procmap_line):
print(procmap_line)
return "r-x" in procmap_line and \
(procmap_line == "" or procmap_line.isspace() or '/' in procmap_line)
def memory_regions():
Expand Down

0 comments on commit c9fa28a

Please sign in to comment.