Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

This commit was manufactured by cvs2svn to create tag

'slurm-0-3-14-1'.
  • Loading branch information...
commit 5a49bcbc85aed0dac989a28ee9affcd7f42a3433 1 parent 8e60bcf
no author authored
Showing with 522 additions and 248 deletions.
  1. +2 −2 META
  2. +47 −0 NEWS
  3. +6 −2 doc/man/man1/scontrol.1
  4. +16 −4 doc/man/man1/srun.1
  5. +19 −10 etc/init.d.slurm
  6. +22 −18 slurm.spec.in
  7. +3 −1 src/common/bitstring.c
  8. +13 −8 src/common/cbuf.c
  9. +7 −5 src/common/cbuf.h
  10. +3 −0  src/common/slurm_protocol_api.c
  11. +2 −15 src/plugins/auth/munge/auth_munge.c
  12. +2 −2 src/plugins/jobcomp/filetxt/jobcomp_filetxt.c
  13. +71 −1 src/plugins/switch/elan/qsw.c
  14. +3 −0  src/plugins/switch/elan/qsw.h
  15. +10 −1 src/plugins/switch/elan/switch_elan.c
  16. +0 −1  src/sinfo/sinfo.c
  17. +13 −8 src/slurmctld/job_mgr.c
  18. +4 −2 src/slurmctld/node_scheduler.c
  19. +18 −8 src/slurmctld/partition_mgr.c
  20. +0 −5 src/slurmctld/step_mgr.c
  21. +27 −1 src/slurmd/fname.c
  22. +1 −0  src/slurmd/fname.h
  23. +32 −9 src/slurmd/io.c
  24. +2 −1  src/slurmd/job.c
  25. +12 −2 src/slurmd/req.c
  26. +12 −3 src/slurmd/shm.c
  27. +34 −4 src/slurmd/smgr.c
  28. +2 −0  src/srun/allocate.c
  29. +22 −6 src/srun/fname.c
  30. +2 −0  src/srun/fname.h
  31. +31 −28 src/srun/io.c
  32. +10 −8 src/srun/launch.c
  33. +10 −3 src/srun/opt.c
  34. +1 −0  src/srun/opt.h
  35. +5 −5 src/srun/signals.c
  36. +47 −37 src/srun/srun.c
  37. +0 −1  testsuite/slurm_unit/slurmctld/Makefile.am
  38. +6 −6 testsuite/slurm_unit/slurmctld/security_2_1.csh
  39. +0 −40 testsuite/slurm_unit/slurmctld/security_2_4.c
  40. +5 −1 testsuite/slurm_unit/slurmd/security_3_1.c
View
4 META
@@ -9,8 +9,8 @@
Name: slurm
Major: 0
Minor: 3
- Micro: 7
- Version: 0.3.7
+ Micro: 14
+ Version: 0.3.14
Release: 1
API_CURRENT: 5
API_AGE: 4
View
47 NEWS
@@ -1,6 +1,53 @@
This file describes changes in recent versions of SLURM. It primarily
documents those changes that are of interest to users and admins.
+* Changes in SLURM 0.3.14
+=========================
+ -- Fix bug in bitfmt2int() which can go off allocated memory.
+
+* Changes in SLURM 0.3.13
+=========================
+ -- "sinfo -i" no longer aborts when nodes have associated features
+
+* Changes in SLURM 0.3.12
+=========================
+ -- Always report job scheduling details (so prolog/epilog can find
+ constraints).
+
+* Changes in SLURM 0.3.11
+=========================
+ -- Fixes for reported problems:
+ - slurm/538: user tasks block writing to stdio
+ -- Added srun option --disable-status,-X to disable srun status feature
+ and instead forward SIGINT immediately to job upon receipt of Ctrl-C.
+ -- Fix for bogus slurmd error message "Unable to put task N into pgrp..."
+ -- Fix case where slurmd may erroneously detect shared memory entry
+ as "stale" and delete entry for unkillable or slow-to-exit job.
+ -- (qsnet) Fix for running slurmd on node without and elan3 adapter.
+
+* Changes in SLURM 0.3.10
+=========================
+ -- Move startup script from "/etc/rc.d/init.d/slurm" to "/etc/init.d/slurm"
+ -- Avoid creating orphaned process group when starting job processes.
+ -- Remove calls in auth/munge plugin deprecated by munge-0.4.
+
+* Changes in SLURM 0.3.9
+========================
+ -- Fixes for reported problems:
+ - slurm/512: Let job steps run on DRAINING nodes
+ - slurm/513: Gracefully deal with UIDs missing from passwd file
+ -- Create new allocation as needed for debugger in case old allocation
+ has been purged
+ -- Fix bug in scheduling jobs when a processor count is specified
+ and FastSchedule=0 and the cluster is heterogeneous.
+ -- Fix srun bug when --input, --output and --error are all "none"
+ -- Allow single task id to be selected with --input, --output, and --error.
+ -- Create shared memory segment for Elan statistics when using the
+ switch/elan plugin.
+
+* Changes in SLURM 0.3.8
+========================
+ -- More fixes necessary for TotalView.
* Changes in SLURM 0.3.7
========================
View
8 doc/man/man1/scontrol.1
@@ -171,6 +171,7 @@ Possible values are"YES" and "NO".
Set the job's required features on nodes specified value. Multiple values
may be comma separated if all features are required (AND operation) or
separated by "|" if any of the specified features are required (OR operation).
+Value may be cleared with blank data value, "Features=".
.TP
\fIJobId\fP=<id>
Identify the job to be updated. This specification is required.
@@ -196,6 +197,7 @@ Set the job's priority to the specified value.
\fIReqNodeList\fP=<nodes>
Set the job's list of required node. Multiple node names may be specified using
simple node range expressions (e.g. "lx[10-20]").
+Value may be cleared with blank data value, "ReqNodeList=".
.TP
\fIReqNodes\fP=<count>
Set the job's count of required nodes to the specified value.
@@ -232,8 +234,9 @@ changing its underlying state.
\fBSPECIFICATIONS FOR UPDATE AND DELETE COMMANDS, PARTITIONS\fR
.TP
\fIAllowGroups\fP=<name>
-Identify the user groups which may use this partition. Multiple groups
-may be specified in a comma separated list.
+Identify the user groups which may use this partition.
+Multiple groups may be specified in a comma separated list.
+To permit all groups to use the partition specify "AllowGroups=ALL"
.TP
\fIDefault\fP=<yes|no>
Specify if this partition is to be used by jobs which do not explicitly
@@ -249,6 +252,7 @@ Possible values are"YES" and "NO".
Identify the node(s) to be associated with this partition. Multiple node names
may be specified using simple node range expressions (e.g. "lx[10-20]").
Note that jobs may only be associated with one partition at any time.
+Specify a blank data value to remove all nodes from a partition: "Nodes=".
.TP
\fIPartitionName\fP=<name>
Identify the partition to be updated. This specification is required.
View
20 doc/man/man1/srun.1
@@ -223,15 +223,24 @@ the job. By default only errors are displayed.
.TP
\fB\-W\fR, \fB\-\-wait\fR=\fIseconds\fR
Specify how long to wait after the first task terminates before terminating
-all remaining tasks. The default value is unlimited. This can be useful to
-insure that a job is terminated in a timely fashion in the event that one
-or more tasks terminate prematurely.
+all remaining tasks. A value of 0 indicates an unlimited wait (a warning will
+be issued after 60 seconds). The default value is set by the WaitTime
+parameter in the slurm configuration file (see \fBslurm.conf(5)\fR). This
+option can be useful to insure that a job is terminated in a timely fashion
+in the event that one or more tasks terminate prematurely.
.TP
\fB\-q\fR, \fB\-\-quit-on-interrupt\fR
Quit immediately on single SIGINT (Ctrl-C). Use of this option
disables the status feature normally available when \fBsrun\fR receives
a single Ctrl-C and causes \fBsrun\fR to instead immediately terminate the
-running job.
+running job.
+.TP
+\fB\-X\fR, \fB\-\-disable-status\fR
+Disable the display of task status when srun receives a single SIGINT
+(Ctrl-C). Instead immediately forward the SIGINT to the running job.
+A second Ctrl-C in one second will forcibly terminate the job and
+\fBsrun\fR will immediately exit. May also be set via the environment
+variable SLURM_DISABLE_STATUS.
.TP
\fB\-Q\fR, \fB\-\-quiet\fR
Quiet operation. Suppress informational messages. Errors will still
@@ -604,6 +613,9 @@ SLURM_TIMELIMIT
.TP
SLURM_WAIT
\fB\-W, \-\-wait\fR=\fIseconds\fR
+.TP
+SLURM_DISABLE_STATUS
+\fB\-X, \-\-disable-status\fR
.PP
Additionally,
.B srun
View
29 etc/init.d.slurm
@@ -16,6 +16,11 @@
# $Id$
+BINDIR=/usr/bin
+CONFDIR=/etc/slurm
+LIBDIR=/usr/lib
+SBINDIR=/usr/sbin
+
# Source function library.
[ -f /etc/rc.d/init.d/functions ] || exit 0
. /etc/rc.d/init.d/functions
@@ -28,14 +33,17 @@ else
SLURMD_OPTIONS=""
fi
-[ -f /etc/slurm/slurm.conf ] || exit 1
+[ -f $CONFDIR/slurm.conf ] || exit 1
+
+# setup library paths for slurm and munge support
+export LD_LIBRARY_PATH="$LIBDIR:$LD_LIBRARY_PATH"
RETVAL=0
start() {
echo -n "starting $1: "
unset HOME MAIL USER USERNAME
- daemon /usr/sbin/$1 $2
+ daemon $SBINDIR/$1 $2
RETVAL=$?
echo
touch /var/lock/subsys/slurm
@@ -52,7 +60,7 @@ stop() {
}
startall() {
- for prog in `scontrol show daemons`; do
+ for prog in `$BINDIR/scontrol show daemons`; do
optvar=`echo ${prog}_OPTIONS | tr "a-z" "A-Z"`
start $prog ${!optvar}
done
@@ -68,7 +76,7 @@ slurmstatus() {
local rpid
local pidfile
- pidfile=`grep -i ${base}pid /etc/slurm/slurm.conf | grep -v '^ *#'`
+ pidfile=`grep -i ${base}pid $CONFDIR/slurm.conf | grep -v '^ *#'`
if [ $? = 0 ]; then
pidfile=${pidfile##*=}
pidfile=${pidfile%#*}
@@ -123,33 +131,34 @@ case "$1" in
startall
;;
stop)
- for prog in `scontrol show daemons`; do
+ for prog in `$BINDIR/scontrol show daemons`; do
stop $prog
done
;;
status)
- for prog in `scontrol show daemons`; do
+ for prog in `$BINDIR/scontrol show daemons`; do
slurmstatus $prog
done
;;
restart)
- $0 stop && $0 start
+ $0 stop
+ $0 start
;;
condrestart)
if [ -f /var/lock/subsys/slurm ]; then
- for prog in `scontrol show daemons`; do
+ for prog in `$BINDIR/scontrol show daemons`; do
stop $prog
start $prog
done
fi
;;
reconfig)
- for prog in `scontrol show daemons`; do
+ for prog in `$BINDIR/scontrol show daemons`; do
killproc $prog -HUP
done
;;
test)
- for prog in `scontrol show daemons`; do
+ for prog in `$BINDIR/scontrol show daemons`; do
echo "$prog runs here"
done
;;
View
40 slurm.spec.in
@@ -24,8 +24,12 @@ Requires: openssl >= 0.9.6
#
%if %{?_with_debug:1}%{!?_with_debug:0}
%define _enable_debug --enable-debug
- %define __os_install_post /usr/lib/rpm/brp-compress
%endif
+#
+# Never allow rpm to strip binaries as this will break
+# parallel debugging capability
+#
+%define __os_install_post /usr/lib/rpm/brp-compress
%package devel
@@ -108,16 +112,16 @@ rm -rf "$RPM_BUILD_ROOT"
mkdir -p "$RPM_BUILD_ROOT"
DESTDIR="$RPM_BUILD_ROOT" make install
-install -D -m755 etc/init.d.slurm $RPM_BUILD_ROOT/etc/rc.d/init.d/slurm
+install -D -m755 etc/init.d.slurm $RPM_BUILD_ROOT/etc/init.d/slurm
install -D -m644 etc/slurm.conf.example $RPM_BUILD_ROOT/etc/slurm/slurm.conf
# Delete unpackaged files:
-rm -f $RPM_BUILD_ROOT/usr/lib/slurm/*.{a,la}
+rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/*.{a,la}
# Build file lists for optional plugin packages
for plugin in auth_munge auth_authd switch_elan; do
LIST=./${plugin}.files
touch $LIST
- test -f $RPM_BUILD_ROOT/usr/lib/slurm/${plugin}.so &&
+ test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/${plugin}.so &&
echo %{_libdir}/slurm/${plugin}.so > $LIST
done
@@ -154,7 +158,7 @@ rm -rf $RPM_BUILD_ROOT
%{_libdir}/slurm/sched_builtin.so
%{_libdir}/slurm/switch_none.so
%dir %{_libdir}/slurm/src
-%config(noreplace) /etc/rc.d/init.d/slurm
+%config(noreplace) /etc/init.d/slurm
%config(noreplace) /etc/slurm/slurm.conf
#############################################################################
@@ -189,29 +193,24 @@ rm -rf $RPM_BUILD_ROOT
#############################################################################
%pre
-#if [ -x /etc/rc.d/init.d/slurm ]; then
-# if /etc/rc.d/init.d/slurm status | grep -q running; then
-# /etc/rc.d/init.d/slurm stop
+#if [ -x /etc/init.d/slurm ]; then
+# if /etc/init.d/slurm status | grep -q running; then
+# /etc/init.d/slurm stop
# fi
#fi
%post
/sbin/ldconfig %{_libdir}
-if [ -x /etc/rc.d/init.d/slurm ]; then
- [ -x /sbin/chkconfig ] && /sbin/chkconfig --del slurm
+if [ $1 = 1 ]; then
[ -x /sbin/chkconfig ] && /sbin/chkconfig --add slurm
-# if ! /etc/rc.d/init.d/slurm status | grep -q running \
-# || ! /etc/rc.d/init.d/slurm test | grep -q slurmctld; then
-# /etc/rc.d/init.d/slurm start
-# fi
fi
%preun
-if [ "$1" = 0 ]; then
- if [ -x /etc/rc.d/init.d/slurm ]; then
+if [ $1 = 0 ]; then
+ if [ -x /etc/init.d/slurm ]; then
[ -x /sbin/chkconfig ] && /sbin/chkconfig --del slurm
- if /etc/rc.d/init.d/slurm status | grep -q running; then
- /etc/rc.d/init.d/slurm stop
+ if /etc/init.d/slurm status | grep -q running; then
+ /etc/init.d/slurm stop
fi
fi
fi
@@ -224,6 +223,11 @@ fi
%changelog
+* Fri Oct 01 2004 Mark Grondona <mgrondona@llnl.gov>
+- don't delete and add service in %post
+* Wed Aug 04 2004 Mark Grondona <mgrondona@llnl.gov>
+- don't allow rpm to strip binaries since this breaks interface to parallel
+ debuggers.
* Thu Jul 29 2004 Morris Jette <jette1@llnl.gov>
- added checkpoint_none.so and jobcomp_script.so plugins
* Fri Mar 07 2004 Mark Grondona <mgrondona@llnl.gov>
View
4 src/common/bitstring.c
@@ -624,7 +624,8 @@ bitfmt2int (char *bit_str_ptr)
if (bit_str_ptr == NULL)
return NULL;
size = strlen (bit_str_ptr) + 1;
- bit_int_ptr = xmalloc ( sizeof (int *) * size);
+ bit_int_ptr = xmalloc ( sizeof (int *) *
+ (size * 2 + 1)); /* more than enough space */
if (bit_int_ptr == NULL)
return NULL;
@@ -653,6 +654,7 @@ bitfmt2int (char *bit_str_ptr)
sum = 0;
}
}
+ assert(bit_inx < (size*2+1));
bit_int_ptr[bit_inx] = -1;
return bit_int_ptr;
}
View
21 src/common/cbuf.c
@@ -1,9 +1,9 @@
/*****************************************************************************
* $Id$
*****************************************************************************
- * $LSDId: cbuf.c,v 1.32 2003/01/03 21:08:19 dun Exp $
+ * $LSDId: cbuf.c,v 1.35 2005/01/13 00:41:17 dun Exp $
*****************************************************************************
- * Copyright (C) 2002-2003 The Regents of the University of California.
+ * Copyright (C) 2002-2005 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Chris Dunlap <cdunlap@llnl.gov>.
*
@@ -330,7 +330,7 @@ cbuf_size (cbuf_t cb)
assert(cb != NULL);
cbuf_mutex_lock(cb);
assert(cbuf_is_valid(cb));
- size = cb->size;
+ size = cb->maxsize;
cbuf_mutex_unlock(cb);
return(size);
}
@@ -344,7 +344,7 @@ cbuf_free (cbuf_t cb)
assert(cb != NULL);
cbuf_mutex_lock(cb);
assert(cbuf_is_valid(cb));
- nfree = cb->size - cb->used;
+ nfree = cb->maxsize - cb->used;
cbuf_mutex_unlock(cb);
return(nfree);
}
@@ -381,6 +381,12 @@ cbuf_lines_used (cbuf_t cb)
int
cbuf_reused (cbuf_t cb)
{
+/* If (O > R)
+ * n = O - R
+ * else
+ * n = (O - 0) + ((S+1) - R).
+ * (S+1) is used since data[] contains 'size' bytes + a 1-byte sentinel.
+ */
int reused;
assert(cb != NULL);
@@ -988,12 +994,11 @@ cbuf_write_from_fd (cbuf_t dst, int srcfd, int len, int *ndropped)
if (len == -1) {
/*
* Try to use all of the free buffer space available for writing.
- * If it is all in use, try to grab another chunk and limit the
- * amount of data being overwritten.
+ * If it is all in use, try to grab another chunk.
*/
len = dst->size - dst->used;
if (len == 0) {
- len = MIN(dst->size, CBUF_CHUNK);
+ len = CBUF_CHUNK;
}
}
if (len > 0) {
@@ -1275,7 +1280,7 @@ cbuf_get_fd (void *dstbuf, int *psrcfd, int len)
do {
n = read(*psrcfd, dstbuf, len);
- } while ((n < 0) && ((errno == EINTR) || (errno == EAGAIN)));
+ } while ((n < 0) && (errno == EINTR));
return(n);
}
View
12 src/common/cbuf.h
@@ -1,7 +1,9 @@
/*****************************************************************************
* $Id$
*****************************************************************************
- * Copyright (C) 2002-2003 The Regents of the University of California.
+ * $LSDId: cbuf.h,v 1.22 2005/01/19 22:35:57 dun Exp $
+ *****************************************************************************
+ * Copyright (C) 2002-2005 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Chris Dunlap <cdunlap@llnl.gov>.
*
@@ -104,14 +106,14 @@ void cbuf_flush (cbuf_t cb);
int cbuf_size (cbuf_t cb);
/*
- * Returns the current size of the buffer allocated to [cb]
- * (ie, the number of bytes in can currently hold).
+ * Returns the maximum size of the buffer allocated to [cb]
+ * (ie, the number of bytes it can currently hold).
*/
int cbuf_free (cbuf_t cb);
/*
- * Returns the number of bytes in [cb] available for writing before
- * unread data is overwritten (unless the cbuf is able to resize itself).
+ * Returns the number of bytes in [cb] available for writing before unread
+ * data is overwritten (assuming the cbuf can resize itself if needed).
*/
int cbuf_used (cbuf_t cb);
View
3  src/common/slurm_protocol_api.c
@@ -902,6 +902,9 @@ int slurm_send_rc_msg(slurm_msg_t *msg, int rc)
{
slurm_msg_t resp_msg;
return_code_msg_t rc_msg;
+
+ if (msg->conn_fd < 0)
+ return (ENOTCONN);
rc_msg.return_code = rc;
View
17 src/plugins/auth/munge/auth_munge.c
@@ -452,7 +452,7 @@ _decode_cred(char *m, slurm_auth_credential_t *c)
error ("Munge decode failed: %s %s",
munge_ctx_strerror(ctx), retry ? "(retrying ...)": "");
- if ((e = EMUNGE_SOCKET) && retry--)
+ if ((e == EMUNGE_SOCKET) && retry--)
goto again;
/*
@@ -545,22 +545,9 @@ _print_cred_info(munge_info_t *mi)
if (mi->encoded > 0)
info ("ENCODED: %s", ctime_r(&mi->encoded, buf));
+
if (mi->decoded > 0)
info ("DECODED: %s", ctime_r(&mi->decoded, buf));
-
- if ( (mi->cipher > MUNGE_CIPHER_NONE)
- && (mi->cipher < MUNGE_CIPHER_LAST_ENTRY) )
- info ("CIPHER: %s", munge_cipher_strings[mi->cipher]);
-
- if ( (mi->mac > MUNGE_MAC_NONE)
- && (mi->mac < MUNGE_MAC_LAST_ENTRY) ) {
- info ("MAC: %s", munge_mac_strings[mi->mac]);
- /*
- * Only print ZIP if MAC is valid.
- * (because ZIP == NONE could be valid)
- */
- info ("ZIP: %s", munge_zip_strings[mi->zip]);
- }
}
View
4 src/plugins/jobcomp/filetxt/jobcomp_filetxt.c
@@ -162,7 +162,7 @@ _get_user_name(uint32_t user_id, char *user_name, int buf_size)
else
snprintf(user_name, buf_size, "Unknown");
cache_uid = user_id;
- snprintf(cache_name, sizeof(cache_name), user_info->pw_name);
+ snprintf(cache_name, sizeof(cache_name), user_name);
}
}
@@ -218,7 +218,7 @@ int slurm_jobcomp_log_record ( struct job_record *job_ptr )
job_state_string(job_state),
job_ptr->partition, lim_str, start_str,
end_str, job_ptr->nodes);
- tot_size = (strlen(job_rec) + 1);
+ tot_size = strlen(job_rec);
while ( offset < tot_size ) {
wrote = write(job_comp_fd, job_rec + offset,
View
72 src/plugins/switch/elan/qsw.c
@@ -35,6 +35,8 @@
#include <sys/param.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
#include <syslog.h>
#include <errno.h>
#include <string.h>
@@ -56,8 +58,17 @@
* want to include here since we are using the new
* version-nonspecific libelanctrl.
* (XXX: What is the equivalent in libelanctrl?)
+ *
+ * slurm/482: the elan USER context range is now split
+ * into two segments, regular user context and RMS
+ * context ranges. Do not allow a context range
+ * (lowcontext -- highcontext) to span these two segments,
+ * as this will generate and elan initialization error
+ * when MPI tries to attach to the capability. For now,
+ * restrict SLURM's range to the RMS one (starting at 0x400)
+ *
*/
-# define ELAN_USER_BASE_CONTEXT_NUM 0x020
+# define ELAN_USER_BASE_CONTEXT_NUM 0x400 /* act. RMS_BASE_CONTEXT_NUM */
# define ELAN_USER_TOP_CONTEXT_NUM 0x7ff
# define Version cap_version
@@ -108,6 +119,7 @@
#define QSW_CTX_END ELAN_USER_TOP_CONTEXT_NUM - 1
#define QSW_CTX_INVAL (-1)
+
/*
* We are going to some trouble to keep these defs private so slurm
* hackers not interested in the interconnect details can just pass around
@@ -152,6 +164,8 @@ static qsw_libstate_t qsw_internal_state = NULL;
static pthread_mutex_t qsw_lock = PTHREAD_MUTEX_INITIALIZER;
static elanhost_config_t elanconf = NULL;
+static int shmid = -1;
+
/*
* Allocate a qsw_libstate_t.
@@ -664,6 +678,10 @@ qsw_setup_jobinfo(qsw_jobinfo_t j, int nprocs, bitstr_t *nodeset,
int
qsw_prgdestroy(qsw_jobinfo_t jobinfo)
{
+
+ if (shmid >= 0)
+ shmctl (shmid, IPC_RMID, NULL);
+
if (rms_prgdestroy(jobinfo->j_prognum) < 0) {
/* translate errno values to more descriptive ones */
switch (errno) {
@@ -698,6 +716,50 @@ qsw_prog_fini(qsw_jobinfo_t jobinfo)
#endif
}
+/* Key for Elan stats shared memory segment is the
+ * rms.o program description number, left shifted 9 less 1
+ * to avoid conflicts with MPI shared memory
+ */
+static int elan_statkey (int prgid)
+{
+ return ((prgid << 9) - 1);
+}
+
+/*
+ * Return the statkey to caller if shared memory was created
+ */
+int qsw_statkey (qsw_jobinfo_t jobinfo)
+{
+ return (shmid > 0 ? elan_statkey (jobinfo->j_prognum) : -1);
+}
+
+/*
+ * Create shared memory segment for Elan stats use
+ * (ELAN_STATKEY env var is set in switch_elan.c)
+ */
+static int
+_qsw_shmem_create (qsw_jobinfo_t jobinfo, uid_t uid)
+{
+ struct shmid_ds shm;
+ ELAN_CAPABILITY *cap = &jobinfo->j_cap;
+ key_t key = elan_statkey (jobinfo->j_prognum);
+ int maxLocal = cap->HighContext - cap->LowContext + 2;
+ int pgsize = getpagesize ();
+
+ if ((shmid = shmget (key, pgsize * (maxLocal + 1), IPC_CREAT)) < 0)
+ return (error ("Failed to create Elan state shmem: %m"));
+
+ /* Ensure permissions on segment allow user read/write access
+ */
+ shm.shm_perm.uid = uid;
+ shm.shm_perm.mode = 0600;
+
+ if (shmctl (shmid, IPC_SET, &shm) < 0)
+ return (error ("Failed to set perms on Elan state shm: %m"));
+
+ return (0);
+}
+
/*
* Process 2: Create the context and make capability available to children.
*/
@@ -785,6 +847,14 @@ qsw_prog_init(qsw_jobinfo_t jobinfo, uid_t uid)
goto fail;
}
+
+ /*
+ * Create shared memory for libelan state
+ * Failure to create shared memory is not a fatal error.
+ */
+ _qsw_shmem_create (jobinfo, uid);
+
+
/* note: _elan3_fini() destroys context and makes capability unavail */
/* do it in qsw_prog_fini() after app terminates */
return 0;
View
3  src/plugins/switch/elan/qsw.h
@@ -120,4 +120,7 @@ int qsw_gethost_bynodeid(char *host, int len, int elanid);
char * qsw_capability_string(qsw_jobinfo_t j, char *buf, size_t len);
void qsw_print_jobinfo(FILE *fp, struct qsw_jobinfo *jobinfo);
+ /* Return Elan shared memory state key */
+int qsw_statkey (qsw_jobinfo_t jobinfo);
+
#endif /* _QSW_INCLUDED */
View
11 src/plugins/switch/elan/switch_elan.c
@@ -368,7 +368,7 @@ static int _have_elan3 (void)
#else
struct stat st;
- if (stat ("/proc/qsnet/elan3", &st) < 0)
+ if (stat ("/proc/qsnet/elan3/device0", &st) < 0)
return (0);
return (1);
@@ -623,6 +623,7 @@ int switch_p_job_attach ( switch_jobinfo_t jobinfo, char ***env,
uint32_t nodeid, uint32_t procid, uint32_t nnodes,
uint32_t nprocs, uint32_t rank )
{
+ int id = -1;
debug3("nodeid=%lu nnodes=%lu procid=%lu nprocs=%lu rank=%lu",
(unsigned long) nodeid, (unsigned long) nnodes,
(unsigned long) procid, (unsigned long) nprocs,
@@ -645,6 +646,14 @@ int switch_p_job_attach ( switch_jobinfo_t jobinfo, char ***env,
if (setenvpf(env, "RMS_NPROCS", "%lu", (unsigned long) nprocs) < 0)
return SLURM_ERROR;
+ /*
+ * Tell libelan the key to use for Elan state shmem segment
+ */
+ if ((id = qsw_statkey (jobinfo)) > 0)
+ setenvpf (env, "ELAN_STATKEY", "0x%x", id);
+
+
+
return SLURM_SUCCESS;
}
View
1  src/sinfo/sinfo.c
@@ -491,7 +491,6 @@ static void _sinfo_list_delete(void *data)
{
sinfo_data_t *sinfo_ptr = data;
- xfree(sinfo_ptr->features);
hostlist_destroy(sinfo_ptr->nodes);
xfree(sinfo_ptr);
}
View
21 src/slurmctld/job_mgr.c
@@ -1402,6 +1402,7 @@ _signal_batch_job(struct job_record *job_ptr, uint16_t signal)
kill_tasks_msg->signal = signal;
agent_args->msg_args = kill_tasks_msg;
+ agent_args->node_count = 1; /* slurm/477 be sure to update node_count */
agent_queue_request(agent_args);
return;
}
@@ -2445,7 +2446,7 @@ void pack_job(struct job_record *dump_job_ptr, Buf buffer)
pack_bit_fmt(dump_job_ptr->node_bitmap, buffer);
detail_ptr = dump_job_ptr->details;
- if (detail_ptr && dump_job_ptr->job_state == JOB_PENDING)
+ if (detail_ptr)
_pack_job_details(detail_ptr, buffer);
else
_pack_job_details(NULL, buffer);
@@ -2924,11 +2925,13 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
if (job_specs->features && detail_ptr) {
if (super_user) {
xfree(detail_ptr->features);
- detail_ptr->features = job_specs->features;
- info("update_job: setting features to %s for "
- "job_id %u", job_specs->features,
- job_specs->job_id);
- job_specs->features = NULL;
+ if (job_specs->features[0] != '\0') {
+ detail_ptr->features = job_specs->features;
+ job_specs->features = NULL;
+ info("update_job: setting features to %s for "
+ "job_id %u", job_specs->features,
+ job_specs->job_id);
+ }
} else {
error("Attempt to change features for job %u",
job_specs->job_id);
@@ -2953,7 +2956,6 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
info("update_job: setting partition to %s for "
"job_id %u", job_specs->partition,
job_specs->job_id);
- job_specs->partition = NULL;
} else {
error("Attempt to change partition for job %u",
job_specs->job_id);
@@ -2962,7 +2964,10 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
}
if (job_specs->req_nodes && detail_ptr) {
- if (super_user) {
+ if (job_specs->req_nodes[0] == '\0') {
+ xfree(detail_ptr->req_nodes);
+ FREE_NULL_BITMAP(detail_ptr->req_node_bitmap);
+ } else if (super_user) {
if (node_name2bitmap(job_specs->req_nodes, false,
&req_bitmap)) {
error("Invalid node list for job_update: %s",
View
6 src/slurmctld/node_scheduler.c
@@ -704,10 +704,12 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
(!bit_super_set(*req_bitmap, avail_bitmap)))
continue;
if ((avail_nodes < min_nodes) ||
- (avail_cpus < req_cpus) ||
((max_nodes > min_nodes) &&
(avail_nodes < max_nodes)))
continue; /* Keep accumulating nodes */
+ if (slurmctld_conf.fast_schedule
+ && (avail_cpus < req_cpus))
+ continue; /* Keep accumulating CPUs */
if (shared)
pick_code = _pick_best_load(avail_bitmap,
@@ -755,7 +757,7 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size,
/* determine if job could possibly run (if all configured
* nodes available) */
if ((!runable_ever || !runable_avail) &&
- (total_nodes >= min_nodes) && (total_cpus >= req_cpus) &&
+ (total_nodes >= min_nodes) &&
((*req_bitmap == NULL) ||
(bit_super_set(*req_bitmap, total_bitmap)))) {
if (!runable_avail) {
View
26 src/slurmctld/partition_mgr.c
@@ -776,18 +776,28 @@ int update_part(update_part_msg_t * part_desc)
if (part_desc->allow_groups != NULL) {
xfree(part_ptr->allow_groups);
- part_ptr->allow_groups = xstrdup(part_desc->allow_groups);
- info("update_part: setting allow_groups to %s for partition %s",
- part_desc->allow_groups, part_desc->name);
xfree(part_ptr->allow_uids);
- part_ptr->allow_uids =
- _get_groups_members(part_desc->allow_groups);
+ if ((strcasecmp(part_desc->allow_groups, "ALL") == 0) ||
+ (part_desc->allow_groups[0] == '\0')) {
+ info("update_part: setting allow_groups to ALL for partition %s",
+ part_desc->name);
+ } else {
+ part_ptr->allow_groups = part_desc->allow_groups;
+ part_desc->allow_groups = NULL;
+ info("update_part: setting allow_groups to %s for partition %s",
+ part_ptr->allow_groups, part_desc->name);
+ part_ptr->allow_uids =
+ _get_groups_members(part_ptr->allow_groups);
+ }
}
if (part_desc->nodes != NULL) {
- char *backup_node_list;
- backup_node_list = part_ptr->nodes;
- part_ptr->nodes = xstrdup(part_desc->nodes);
+ char *backup_node_list = part_ptr->nodes;
+
+ if (part_desc->nodes[0] == '\0')
+ part_ptr->nodes = NULL; /* avoid empty string */
+ else
+ part_ptr->nodes = xstrdup(part_desc->nodes);
error_code = _build_part_bitmap(part_ptr);
if (error_code) {
View
5 src/slurmctld/step_mgr.c
@@ -377,11 +377,6 @@ _pick_step_nodes (struct job_record *job_ptr, step_specs *step_spec ) {
step_spec->node_list, job_ptr->job_id);
goto cleanup;
}
- if (bit_super_set (nodes_picked, avail_node_bitmap) == 0) {
- info ("_pick_step_nodes: some requested node %s down",
- step_spec->node_list);
- goto cleanup;
- }
}
else if (step_spec->relative) {
/* Remove first (step_spec->relative) nodes from
View
28 src/slurmd/fname.c
@@ -56,6 +56,10 @@ fname_create(slurmd_job_t *job, const char *format, int taskid)
char *name = NULL;
char *orig = xstrdup(format);
char *p, *q;
+ int id;
+
+ if (((id = fname_single_task_io (format)) >= 0) && (taskid != id))
+ return (xstrdup ("/dev/null"));
/* If format doesn't specify an absolute pathname,
* use cwd
@@ -158,14 +162,36 @@ fname_free(void *name)
{
xfree(name);
}
+
+/*
+ * Return >= 0 if fmt specifies "single task only" IO
+ * i.e. if it specifies a single integer only
+ */
+int fname_single_task_io (const char *fmt)
+{
+ unsigned long taskid;
+ char *p;
+
+ taskid = strtoul (fmt, &p, 10);
+
+ if (*p == '\0')
+ return ((int) taskid);
+
+ return (-1);
+}
+
int
fname_trunc_all(slurmd_job_t *job, const char *fmt)
{
int i, rc = SLURM_SUCCESS;
char *fname;
ListIterator filei;
- List files = list_create((ListDelF)fname_free);
+ List files = NULL;
+
+ if (fname_single_task_io (fmt) >= 0)
+ return (0);
+ files = list_create((ListDelF)fname_free);
for (i = 0; i < job->ntasks; i++) {
fname = fname_create(job, fmt, job->task[i]->gid);
if (!list_find_first(files, (ListFindF) find_fname, fname))
View
1  src/slurmd/fname.h
@@ -31,5 +31,6 @@
char *fname_create(slurmd_job_t *job, const char *fmt, int taskid);
int fname_trunc_all(slurmd_job_t *job, const char *fmt);
+int fname_single_task_io (const char *fmt);
#endif /* !_SLURMD_FNAME_H */
View
41 src/slurmd/io.c
@@ -455,6 +455,23 @@ _obj_set_unwritable(io_obj_t *obj)
obj->ops->writable = NULL;
}
+static char *
+_local_filename (char *fname, int taskid)
+{
+ int id;
+
+ if (fname == NULL)
+ return (NULL);
+
+ if ((id = fname_single_task_io (fname)) < 0)
+ return (fname);
+
+ if (id != taskid)
+ return ("/dev/null");
+
+ return (NULL);
+}
+
static int
_io_add_connecting(slurmd_job_t *job, task_info_t *t, srun_info_t *srun,
slurmd_io_type_t type)
@@ -462,7 +479,7 @@ _io_add_connecting(slurmd_job_t *job, task_info_t *t, srun_info_t *srun,
io_obj_t *obj = NULL;
int sock = -1;
- debug3("in io_add_connecting");
+ debug2 ("adding connecting %s for task %d", _io_str[type], t->gid);
if ((sock = (int) slurm_open_stream(&srun->ioaddr)) < 0) {
error("connect io: %m");
@@ -479,7 +496,7 @@ _io_add_connecting(slurmd_job_t *job, task_info_t *t, srun_info_t *srun,
obj->ops = _ops_copy(&connecting_client_ops);
_io_write_header(obj->arg, srun);
- if ((type == CLIENT_STDOUT) && !srun->ifname) {
+ if ((type == CLIENT_STDOUT) && !_local_filename(srun->ifname, t->gid)) {
struct io_info *io = obj->arg;
/* This is the only read-write capable client
* at this time: a connected CLIENT_STDOUT
@@ -502,23 +519,25 @@ static int
_io_prepare_one(slurmd_job_t *j, task_info_t *t, srun_info_t *s)
{
int retval = SLURM_SUCCESS;
+ char *fname = NULL;
+
/* Try hard to get stderr connected to something
*/
if ( (_open_output_file(j, t, s->efname, CLIENT_STDERR) < 0)
&& (_io_add_connecting(j, t, s, CLIENT_STDERR) < 0) )
retval = SLURM_FAILURE;
- if (s->ofname) {
- if (_open_output_file(j, t, s->ofname, CLIENT_STDOUT) < 0)
+ if ((fname = _local_filename (s->ofname, t->gid))) {
+ if (_open_output_file(j, t, fname, CLIENT_STDOUT) < 0)
retval = SLURM_FAILURE;
} else {
_io_add_connecting(j, t, s, CLIENT_STDOUT);
}
- if (s->ifname) {
+ if ((fname = _local_filename (s->ifname, t->gid))) {
if (_open_stdin_file(j, t, s) < 0)
retval = SLURM_FAILURE;
- } else if (s->ofname) {
+ } else if (_local_filename (s->ofname, t->gid)) {
_io_add_connecting(j, t, s, CLIENT_STDIN);
}
@@ -610,16 +629,20 @@ _open_output_file(slurmd_job_t *job, task_info_t *t, char *fmt,
int fd = -1;
io_obj_t *obj = NULL;
int flags = O_APPEND|O_WRONLY;
- char *fname ;
+ char *fname = NULL;
+
+ xassert((type == CLIENT_STDOUT) || (type == CLIENT_STDERR));
if (fmt == NULL)
return SLURM_ERROR;
- xassert((type == CLIENT_STDOUT) || (type == CLIENT_STDERR));
+ if (!_local_filename (fmt, t->gid))
+ return SLURM_ERROR;
fname = fname_create(job, fmt, t->gid);
if ((fd = _open_task_file(fname, flags)) > 0) {
- debug("opened `%s' for %s fd %d", fname, _io_str[type], fd);
+ debug2 ("opened `%s' for task %d %s fd %d",
+ fname, t->gid, _io_str[type], fd);
obj = _io_obj(job, t, fd, type);
_obj_set_unreadable(obj);
xassert(obj->ops->writable != NULL);
View
3  src/slurmd/job.c
@@ -332,7 +332,8 @@ job_signal_tasks(slurmd_job_t *job, int signal)
{
int n = job->ntasks;
while (--n >= 0) {
- if (kill(job->task[n]->pid, signal) < 0) {
+ if ((job->task[n]->pid > (pid_t) 0)
+ && (kill(job->task[n]->pid, signal) < 0)) {
if (errno != EEXIST) {
error("job %d.%d: kill task %d: %m",
job->jobid, job->stepid, n);
View
14 src/slurmd/req.c
@@ -617,9 +617,14 @@ _rpc_kill_tasks(slurm_msg_t *msg, slurm_addr *cli_addr)
goto done;
}
- if (kill(-step->sid, req->signal) < 0)
+ if ((step->sid > (pid_t) 0)
+ && (kill(-step->sid, req->signal) < 0))
rc = errno;
+ if ((step->task_list->pid > (pid_t) 0)
+ && (kill (-step->task_list->pid, req->signal) < 0))
+ rc = errno;
+
if (rc == SLURM_SUCCESS)
verbose("Sent signal %d to %u.%u",
req->signal, req->job_id, req->job_step_id);
@@ -873,8 +878,13 @@ _kill_all_active_steps(uint32_t jobid, int sig)
step_cnt++;
debug2("signal %d to job %u (pg:%d)", sig, jobid, s->sid);
- if (kill(-s->sid, sig) < 0)
+ if ((s->sid > (pid_t) 0)
+ && (kill(-s->sid, sig) < 0))
error("kill jid %d sid %d: %m", s->jobid, s->sid);
+ if ((s->task_list->pid > (pid_t) 0)
+ && (kill(-s->task_list->pid, sig) < 0))
+ error("kill jid %d pgrp %d: %m", s->jobid,
+ s->task_list->pid);
}
list_destroy(steps);
if (step_cnt == 0)
View
15 src/slurmd/shm.c
@@ -484,7 +484,8 @@ shm_signal_step(uint32_t jobid, uint32_t stepid, uint32_t signal)
continue;
}
- if (kill(t->pid, signo) < 0) {
+ if ((t->pid > (pid_t) 0)
+ && (kill(t->pid, signo) < 0)) {
error ("kill %u.%u task %d pid %ld: %m",
jobid, stepid, t->id, (long)t->pid);
retval = errno;
@@ -666,7 +667,8 @@ shm_update_step_addrs(uint32_t jobid, uint32_t stepid,
debug3("Going to send shm update signal to %ld",
(long) s->mpid);
- if ((s->mpid > 0) && (kill(s->mpid, SIGHUP) < 0)) {
+ if ((s->mpid > (pid_t) 0)
+ && (kill(s->mpid, SIGHUP) < 0)) {
slurm_seterrno(EPERM);
retval = SLURM_FAILURE;
}
@@ -905,10 +907,17 @@ _shm_clear_stale_entries(void)
int count = 0;
for (i = 0; i < MAX_JOB_STEPS; i++) {
job_step_t *s = &slurmd_shm->step[i];
+ task_t *t = s->task_list;
+
if (s->state == SLURMD_JOB_UNUSED)
continue;
+
+ while (t->next && t->id != 0)
+ t = t->next;
- if ((s->sid > (pid_t) 0) && (kill(-s->sid, 0) != 0)) {
+ if ( (s->sid > (pid_t) 0)
+ && (kill(-s->sid, 0) != 0)
+ && (kill(-t->pid, 0) != 0)) {
debug ("Clearing stale job %u.%u from shm",
s->jobid, s->stepid);
_shm_clear_step(s);
View
38 src/slurmd/smgr.c
@@ -271,6 +271,7 @@ _become_user(slurmd_job_t *job)
static int
_exec_all_tasks(slurmd_job_t *job)
{
+ char c;
int i;
int fd = job->fdpair[1];
@@ -286,13 +287,26 @@ _exec_all_tasks(slurmd_job_t *job)
return error ("Unable to block signals");
for (i = 0; i < job->ntasks; i++) {
- pid_t pid = fork();
+ int fdpair[2];
+ pid_t pid;
- if (pid < 0) {
+ if (pipe (fdpair) < 0)
+ error ("exec_all_tasks: pipe: %m");
+
+ if ((pid = fork ()) < 0) {
error("fork: %m");
return SLURM_ERROR;
- } else if (pid == 0) /* child */
+ } else if (pid == 0) { /* child */
+ /*
+ * Stall exec until pgid is set by parent
+ */
+ if (read (fdpair[0], &c, sizeof (c)) != 1)
+ error ("pgrp child read failed: %m");
+ close (fdpair[0]);
+ close (fdpair[1]);
+
_exec_task(job, i);
+ }
/* Parent continues:
*/
@@ -311,6 +325,22 @@ _exec_all_tasks(slurmd_job_t *job)
job->task[i]->pid = pid;
/*
+ * Set this child's pgid to pid of first task
+ */
+ if (setpgid (pid, job->task[0]->pid) < 0)
+ error ("Unable to put task %d (pid %ld) into pgrp %ld",
+ i, pid, job->task[0]->pid);
+
+ /*
+ * Now it's ok to unblock this child, so it may call exec
+ */
+ if (write (fdpair[1], &c, sizeof (c)) != 1)
+ error ("write to unblock task %d failed", i);
+
+ close (fdpair[0]);
+ close (fdpair[1]);
+
+ /*
* Prepare process for attach by parallel debugger
* (if specified and able)
*/
@@ -594,7 +624,7 @@ _pdebug_trace_process(slurmd_job_t *job, pid_t pid)
if (job->task_flags & TASK_PARALLEL_DEBUG) {
int status;
waitpid(pid, &status, WUNTRACED);
- if (kill(pid, SIGSTOP) < 0)
+ if ((pid > (pid_t) 0) && (kill(pid, SIGSTOP) < 0))
error("kill(%lu): %m", (unsigned long) pid);
if (_PTRACE(PTRACE_DETACH, pid, NULL, 0))
error("ptrace(%lu): %m", (unsigned long) pid);
View
2  src/srun/allocate.c
@@ -132,6 +132,8 @@ existing_allocation(void)
job.uid = getuid();
if (slurm_confirm_allocation(&job, &resp) < 0) {
+ if (opt.parallel_debug)
+ return NULL; /* create new allocation as needed */
if (errno == ESLURM_ALREADY_DONE)
error ("SLURM job %u has expired.", job.job_id);
else
View
28 src/srun/fname.c
@@ -41,14 +41,17 @@ fname_create(job_t *job, char *format)
if ((format == NULL)
|| (strncasecmp(format, "all", (size_t) 3) == 0)
|| (strncmp(format, "-", (size_t) 1) == 0) ) {
- fname->type = IO_ALL;
- fname->name = NULL;
- return fname;
+ /* "all" explicitly sets IO_ALL and is the default */
+ return (fname);
}
if (strncasecmp(format, "none", (size_t) 4) == 0) {
- fname->type = IO_NONE;
- fname->name = "/dev/null";
+ /*
+ * Set type to IO_PER_TASK so that /dev/null is opened
+ * on every node, which should be more efficient
+ */
+ fname->type = IO_PER_TASK;
+ fname->name = xstrdup ("/dev/null");
return fname;
}
@@ -56,7 +59,11 @@ fname_create(job_t *job, char *format)
if ((*p == '\0') && ((int) taskid < opt.nprocs)) {
fname->type = IO_ONE;
fname->taskid = (uint32_t) taskid;
- fname->name = NULL;
+ /* Set the name string to pass to slurmd
+ * to the taskid requested, so that tasks with
+ * no IO can open /dev/null.
+ */
+ fname->name = xstrdup (format);
return fname;
}
@@ -123,3 +130,12 @@ fname_destroy(io_filename_t *f)
xfree(f->name);
xfree(f);
}
+
+char *
+fname_remote_string (io_filename_t *f)
+{
+ if ((f->type == IO_PER_TASK) || (f->type == IO_ONE))
+ return (xstrdup (f->name));
+
+ return (NULL);
+}
View
2  src/srun/fname.h
@@ -52,5 +52,7 @@ typedef struct srun_job * srun_job_t;
io_filename_t * fname_create(srun_job_t job, char *format);
void fname_destroy(io_filename_t *fname);
+char * fname_remote_string (io_filename_t *fname);
+
#endif /* !_FNAME_H */
View
59 src/srun/io.c
@@ -227,41 +227,34 @@ _flush_io(job_t *job)
debug3("Read %dB from tasks, wrote %dB", nbytes, nwritten);
}
+static int
+_initial_fd_state (io_filename_t *f, int task)
+{
+ if (f->type == IO_ALL)
+ return (WAITING_FOR_IO);
+ if (f->type == IO_ONE && f->taskid == task)
+ return (WAITING_FOR_IO);
+
+ return (IO_DONE);
+}
+
static void
_io_thr_init(job_t *job, struct pollfd *fds)
{
- int out_fd_state = WAITING_FOR_IO;
- int err_fd_state = WAITING_FOR_IO;
int i;
xassert(job != NULL);
- /*
- * XXX: Handle job->ofname/efname == IO_ONE
- */
-
_set_iofds_nonblocking(job);
- if (job->ofname->type == IO_ALL)
- out_fd_state = WAITING_FOR_IO;
- else {
- if (job->ifname->type != IO_ALL)
- out_fd_state = IO_DONE;
- else
- out_fd_state = WAITING_FOR_IO;
-
- if (!opt.efname)
- err_fd_state = IO_DONE;
- }
-
- if ((job->efname->type == IO_ALL) && (err_fd_state != IO_DONE)) {
- err_fd_state = WAITING_FOR_IO;
- } else
- err_fd_state = IO_DONE;
-
for (i = 0; i < opt.nprocs; i++) {
- job->out[i] = out_fd_state;
- job->err[i] = err_fd_state;
+ int instate = _initial_fd_state (job->ifname, i);
+ job->out[i] = _initial_fd_state (job->ofname, i);
+ job->err[i] = _initial_fd_state (job->efname, i);
+
+ if (job->out[i] != WAITING_FOR_IO)
+ job->out[i] = instate;
+
}
for (i = 0; i < job->niofds; i++)
@@ -467,20 +460,30 @@ _fopen(char *filename)
return fp;
}
+static int
+_is_local_file (io_filename_t *fname)
+{
+ if (fname->name == NULL)
+ return (0);
+
+ return ((fname->type != IO_PER_TASK) && (fname->type != IO_ONE));
+}
+
+
int
open_streams(job_t *job)
{
- if ((job->ifname->type != IO_PER_TASK) && job->ifname->name)
+ if (_is_local_file (job->ifname))
job->stdinfd = _stdin_open(job->ifname->name);
else
job->stdinfd = STDIN_FILENO;
- if ((job->ofname->type != IO_PER_TASK) && job->ofname->name)
+ if (_is_local_file (job->ofname))
job->outstream = _fopen(job->ofname->name);
else
job->outstream = stdout;
- if ((job->efname->type != IO_PER_TASK) && job->efname->name)
+ if (_is_local_file (job->efname))
job->errstream = _fopen(job->efname->name);
else
job->errstream = stderr;
View
18 src/srun/launch.c
@@ -130,12 +130,9 @@ launch(void *arg)
r->slurmd_debug = opt.slurmd_debug;
r->switch_job = job->switch_job;
- if (job->ofname->type == IO_PER_TASK)
- r->ofname = job->ofname->name;
- if (job->efname->type == IO_PER_TASK)
- r->efname = job->efname->name;
- if (job->ifname->type == IO_PER_TASK)
- r->ifname = job->ifname->name;
+ r->ofname = fname_remote_string (job->ofname);
+ r->efname = fname_remote_string (job->efname);
+ r->ifname = fname_remote_string (job->ifname);
if (opt.parallel_debug)
r->task_flags |= TASK_PARALLEL_DEBUG;
@@ -222,8 +219,12 @@ static void _join_attached_threads (int nthreads, thd_t *th)
{
int i;
void *retval;
- for (i = 0; i < nthreads; i++)
- pthread_join (th[i].thread, &retval);
+ if (!opt.parallel_debug)
+ return;
+ for (i = 0; i < nthreads; i++) {
+ if (th[i].thread != (pthread_t) NULL)
+ pthread_join (th[i].thread, &retval);
+ }
return;
}
@@ -294,6 +295,7 @@ static void _p_launch(slurm_msg_t *req, job_t *job)
if (job->ntask[i] == 0) { /* No tasks for this node */
debug("Node %s is unused",job->host[i]);
job->host_state[i] = SRUN_HOST_REPLIED;
+ thd[i].thread = (pthread_t) NULL;
continue;
}
View
13 src/srun/opt.c
@@ -401,6 +401,7 @@ static void _opt_default()
opt.max_wait = slurm_get_wait_time();
opt.quit_on_intr = false;
+ opt.disable_status = false;
opt.quiet = 0;
_verbose = 0;
@@ -416,7 +417,7 @@ static void _opt_default()
opt.contiguous = false;
opt.nodelist = NULL;
opt.exc_nodes = NULL;
- opt.max_launch_time = 60; /* 60 seconds to launch job */
+ opt.max_launch_time = 120;/* 120 seconds to launch job */
opt.max_exit_timeout= 60; /* Warn user 60 seconds after task exit */
opt.msg_timeout = 5; /* Default launch msg timeout */
@@ -473,6 +474,7 @@ env_vars_t env_vars[] = {
{"SLURM_STDOUTMODE", OPT_STRING, &opt.ofname, NULL },
{"SLURM_TIMELIMIT", OPT_INT, &opt.time_limit, NULL },
{"SLURM_WAIT", OPT_INT, &opt.max_wait, NULL },
+ {"SLURM_DISABLE_STATUS",OPT_INT, &opt.disable_status,NULL },
{NULL, 0, NULL, NULL}
};
@@ -623,6 +625,7 @@ static void _opt_args(int argc, char **argv)
{"nodelist", required_argument, 0, 'w'},
{"wait", required_argument, 0, 'W'},
{"exclude", required_argument, 0, 'x'},
+ {"disable-status", no_argument, 0,'X'},
{"no-allocate", no_argument, 0, 'Z'},
{"quit-on-interrupt", no_argument, 0, 'q'},
{"quiet", no_argument, 0, 'Q'},
@@ -645,7 +648,7 @@ static void _opt_args(int argc, char **argv)
{NULL, 0, 0, 0}
};
char *opt_string = "+a:Abc:C:d:D:e:Hi:IjJ:klm:n:N:"
- "o:Op:Qr:st:T:uvVw:W:x:Zq";
+ "o:Op:Qr:st:T:uvVw:W:x:XZq";
char **rest = NULL;
opt.progname = xbasename(argv[0]);
@@ -809,6 +812,9 @@ static void _opt_args(int argc, char **argv)
if (!_valid_node_list(&opt.exc_nodes))
exit(1);
break;
+ case (int)'X':
+ opt.disable_status = true;
+ break;
case (int)'Z':
opt.no_alloc = true;
break;
@@ -1246,7 +1252,7 @@ static bool _under_parallel_debugger (void)
static void _usage(void)
{
printf("\
-Usage: srun [-N nnodes] [-n ntasks] [-i in] [-i in] [-e err] [-e err]\n\
+Usage: srun [-N nnodes] [-n ntasks] [-i in] [-o out] [-e err]\n\
[-c ncpus] [-r n] [-p partition] [--hold] [-t minutes]\n\
[-D path] [--immediate] [--overcommit] [--no-kill]\n\
[--share] [--label] [--unbuffered] [-m dist] [-J jobname]\n\
@@ -1290,6 +1296,7 @@ Parallel run options:\n\
-W, --wait=sec seconds to wait after first task exits\n\
before killing job\n\
-q, --quit-on-interrupt quit on single Ctrl-C\n\
+ -X, --disable-status Disable Ctrl-C status feature\n\
-v, --verbose verbose mode (multiple -v's increase verbosity)\n\
-Q, --quiet quiet mode (suppress informational messages)\n\
-d, --slurmd-debug=level slurmd debug level\n\
View
1  src/srun/opt.h
@@ -133,6 +133,7 @@ typedef struct srun_options {
bool share; /* --share, -s */
int max_wait; /* --wait, -W */
bool quit_on_intr; /* --quit-on-interrupt, -q */
+ bool disable_status; /* --disable-status, -X */
int quiet;
bool parallel_debug; /* srun controlled by debugger */
bool debugger_test; /* --debugger-test */
View
10 src/srun/signals.c
@@ -195,7 +195,7 @@ _handle_intr(job_t *job, time_t *last_intr, time_t *last_intr_sent)
pthread_exit (0);
}
- if ((time(NULL) - *last_intr) > 1) {
+ if (((time(NULL) - *last_intr) > 1) && !opt.disable_status) {
info("interrupt (one more within 1 sec to abort)");
if (mode != MODE_ATTACH)
report_task_status(job);
@@ -228,15 +228,15 @@ _sig_thr(void *arg)
sigset_t set;
time_t last_intr = 0;
time_t last_intr_sent = 0;
- int signo;
+ int signo, err;
while (!_sig_thr_done(job)) {
xsignal_sigset_create(srun_sigarray, &set);
- if (sigwait(&set, &signo) < 0) {
- if (errno != EINTR)
- error ("sigwait: %m");
+ if ((err = sigwait(&set, &signo)) != 0) {
+ if (err != EINTR)
+ error ("sigwait: %s", slurm_strerror (err));
continue;
}
View
84 src/srun/srun.c
@@ -96,7 +96,7 @@ static int _run_batch_job (void);
static void _run_job_script(job_t *job);
static int _set_batch_script_env(job_t *job);
static int _set_rlimit_env(void);
-static char *_sprint_task_cnt(job_t *job);
+static char *_task_count_string(job_t *job);
static void _switch_standalone(job_t *job);
static int _become_user (void);
@@ -210,7 +210,7 @@ int srun(int ac, char **av)
setenvf("SLURM_JOBID=%u", job->jobid);
setenvf("SLURM_NPROCS=%d", opt.nprocs);
setenvf("SLURM_NNODES=%d", job->nhosts);
- setenvf("SLURM_TASKS_PER_NODE=%s", (task_cnt = _sprint_task_cnt(job)));
+ setenvf("SLURM_TASKS_PER_NODE=%s", task_cnt = _task_count_string (job));
setenvf("SLURM_DISTRIBUTION=%s",
format_distribution_t (opt.distribution));
@@ -274,12 +274,12 @@ int srun(int ac, char **av)
}
static char *
-_sprint_task_cnt(job_t *job)
+_task_count_string (job_t *job)
{
int i, last_val, last_cnt;
- char *task_str = xstrdup("");
char tmp[16];
-
+ char *str = xstrdup ("");
+
last_val = job->ntask[0];
last_cnt = 1;
for (i=1; i<job->nhosts; i++) {
@@ -290,7 +290,7 @@ _sprint_task_cnt(job_t *job)
sprintf(tmp, "%d(x%d),", last_val, last_cnt);
else
sprintf(tmp, "%d,", last_val);
- xstrcat(task_str, tmp);
+ xstrcat(str, tmp);
last_val = job->ntask[i];
last_cnt = 1;
}
@@ -299,8 +299,8 @@ _sprint_task_cnt(job_t *job)
sprintf(tmp, "%d(x%d)", last_val, last_cnt);
else
sprintf(tmp, "%d", last_val);
- xstrcat(task_str, tmp);
- return task_str;
+ xstrcat(str, tmp);
+ return (str);
}
static void
@@ -367,6 +367,8 @@ _run_batch_job(void)
return SLURM_ERROR;
}
+ _set_batch_script_env (NULL);
+
if (!(req = job_desc_msg_create_from_opts (script)))
fatal ("Unable to create job request");
@@ -560,44 +562,22 @@ static int
_set_batch_script_env(job_t *job)
{
int rc = SLURM_SUCCESS;
- char *dist = NULL, *task_cnt;
+ char *dist = NULL;
+ char *p;
struct utsname name;
- if (job->jobid > 0) {
- if (setenvf("SLURM_JOBID=%u", job->jobid)) {
- error("Unable to set SLURM_JOBID environment");
- rc = SLURM_FAILURE;
- }
- }
-
- if (job->nhosts > 0) {
- if (setenvf("SLURM_NNODES=%u", job->nhosts)) {
- error("Unable to set SLURM_NNODES environment var");
- rc = SLURM_FAILURE;
- }
- }
-
- if (job->nodelist) {
- if (setenvf("SLURM_NODELIST=%s", job->nodelist)) {
- error("Unable to set SLURM_NODELIST environment var.");
- rc = SLURM_FAILURE;
- }
- }
-
if (opt.nprocs_set && setenvf("SLURM_NPROCS=%u", opt.nprocs)) {
error("Unable to set SLURM_NPROCS environment variable");
rc = SLURM_FAILURE;
}
-
if ( opt.cpus_set
&& setenvf("SLURM_CPUS_PER_TASK=%u", opt.cpus_per_task) ) {
error("Unable to set SLURM_CPUS_PER_TASK");
rc = SLURM_FAILURE;
}
-
- if (opt.distribution != SRUN_DIST_UNKNOWN) {
+ if (job && opt.distribution != SRUN_DIST_UNKNOWN) {
dist = (opt.distribution == SRUN_DIST_BLOCK) ?
"block" : "cyclic";
@@ -625,11 +605,41 @@ _set_batch_script_env(job_t *job)
rc = SLURM_FAILURE;
}
- if (setenvf("SLURM_TASKS_PER_NODE=%s", (task_cnt = _sprint_task_cnt(job)))) {
- error("Unable to set SLURM_TASKS_PER_NODE environment variable");
- rc = SLURM_FAILURE;
+ /*
+ * If no job has been allocated yet, just return. We are
+ * submitting a batch job.
+ */
+ if (job == NULL)
+ return (rc);
+
+
+ if (job->jobid > 0) {
+ if (setenvf("SLURM_JOBID=%u", job->jobid)) {
+ error("Unable to set SLURM_JOBID environment");
+ rc = SLURM_FAILURE;
+ }
+ }
+
+ if (job->nhosts > 0) {
+ if (setenvf("SLURM_NNODES=%u", job->nhosts)) {
+ error("Unable to set SLURM_NNODES environment var");
+ rc = SLURM_FAILURE;
+ }
+ }
+
+ if (job->nodelist) {
+ if (setenvf("SLURM_NODELIST=%s", job->nodelist)) {
+ error("Unable to set SLURM_NODELIST environment var.");
+ rc = SLURM_FAILURE;
+ }
+ }
+ if ((p = _task_count_string (job))) {
+ if (setenvf ("SLURM_TASKS_PER_NODE=%s", p)) {
+ error ("Can't set SLURM_TASKS_PER_NODE env variable");
+ rc = SLURM_FAILURE;
+ }
+ xfree (p);
}
- xfree(task_cnt);
uname(&name);
if (strcasecmp(name.sysname, "AIX") == 0) {
View
1  testsuite/slurm_unit/slurmctld/Makefile.am
@@ -7,7 +7,6 @@ TESTS =
# even if they can't be run stand-alone.
check_PROGRAMS = \
security_2_2 \
- security_2_4 \
$(TESTS)
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common
View
12 testsuite/slurm_unit/slurmctld/security_2_1.csh
@@ -2,7 +2,7 @@
setenv CONFIG /etc/slurm/slurm.conf
setenv DEPLOY /usr
-echo "Insure that files are not user writable"
+echo "Insure that executable files are not user writable"
ls -ld $DEPLOY/bin/srun
ls -ld $DEPLOY/bin/sinfo
ls -ld $DEPLOY/bin/squeue
@@ -21,18 +21,18 @@ grep JobCredential $CONFIG
ls -ld /etc/slurm/slurm.key
ls -ld /etc/slurm/slurm.cert
+echo "Plugin directory and its contents must be non-writable"
grep PluginDir $CONFIG
ls -ld /usr/lib/slurm
ls -l /usr/lib/slurm
-grep Prioritize $CONFIG
-#echo "Prioritize will move to a plugin"
-
grep Prolog $CONFIG
#ls -ld /admin/sbin/slurm.prolog
+echo "Spool and log files must be non-writeable"
grep SlurmdSpoolDir $CONFIG
-ls -ld /tmp/slurmd
-
+ls -ld /var/spool/slurm
grep StateSaveLocation $CONFIG
ls -ld /usr/local/tmp/slurm/adev
+grep SlurmctldLogFile $CONFIG
+ls -ld /var/log/slurm/slurmctld.log
View
40 testsuite/slurm_unit/slurmctld/security_2_4.c
@@ -1,40 +0,0 @@
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include <slurm/slurm.h>
-#include <slurm/slurm_errno.h>
-
-/* Attempt to run a job with the incorrect user id and confirm an error */
-int
-main (int argc, char *argv[])
-{
- int error_code;
- job_desc_msg_t job_mesg;
- resource_allocation_and_run_response_msg_t* run_resp_msg ;
-
- slurm_init_job_desc_msg( &job_mesg );
- job_mesg. user_id = getuid() + 1;
- job_mesg. min_nodes = 1;
- job_mesg. task_dist = SLURM_DIST_CYCLIC;
-
- error_code = slurm_allocate_resources_and_run ( &job_mesg ,
- &run_resp_msg );
- if (error_code == SLURM_SUCCESS) {
- fprintf (stderr, "ERROR: The allocate succeeded\n");
- exit(1);
- } else if ((error_code = slurm_get_errno()) != ESLURM_USER_ID_MISSING) {
- fprintf (stderr,
- "ERROR: Wrong error code received: %s instead of %s\n",
- slurm_strerror(error_code), "ESLURM_USER_ID_MISSING");
- exit(1);
- } else {
- printf ("SUCCESS!\n");
- printf ("The allocate request was rejected as expected.\n");
- printf ("Check SlurmctldLog for an error message.\n");
- exit(0);
- }
-}
-
View
6 testsuite/slurm_unit/slurmd/security_3_1.c
@@ -23,7 +23,7 @@ int main(int argc, char *argv[])
{
batch_job_launch_msg_t launch_msg;
int uid;
- uint32_t jid;
+ uint32_t jid, cpu_arr[1];
if (argc != 2) {
_usage(argv[0]);
@@ -40,6 +40,10 @@ int main(int argc, char *argv[])
launch_msg.job_id = jid;
launch_msg.uid = uid;
launch_msg.nodes = argv[1];
+ launch_msg.num_cpu_groups = 1;
+ cpu_arr[0] = 1;
+ launch_msg.cpus_per_node = cpu_arr;
+ launch_msg.cpu_count_reps = cpu_arr;
launch_msg.err = "/dev/null";
launch_msg.in = "/dev/null";
launch_msg.out = "/dev/null";
Please sign in to comment.
Something went wrong with that request. Please try again.