From b371864c12c669e16272d1cceaf85a798f4d0a46 Mon Sep 17 00:00:00 2001 From: no author Date: Sat, 28 Feb 2004 00:43:38 +0000 Subject: [PATCH] This commit was manufactured by cvs2svn to create tag 'slurm-0-2-24-1'. --- META | 4 +- NEWS | 37 ++- doc/man/man1/sinfo.1 | 48 +++- doc/man/man1/squeue.1 | 22 +- doc/man/man5/slurm.conf.5 | 15 +- slurm/slurm.h.in | 3 +- src/api/Makefile.am | 19 +- src/api/allocate.c | 6 +- src/api/cancel.c | 3 +- src/api/complete.c | 3 +- src/api/config_info.c | 2 +- src/api/job_info.c | 3 +- src/api/job_step_info.c | 2 +- src/api/node_info.c | 2 +- src/api/partition_info.c | 2 +- src/api/reconfigure.c | 4 +- src/api/submit.c | 1 + src/common/elanhosts.c | 387 ------------------------------- src/common/elanhosts.h | 121 ---------- src/common/log.c | 13 +- src/common/slurm_auth.c | 11 +- src/common/slurm_errno.c | 2 +- src/common/slurm_protocol_defs.c | 2 +- src/common/slurm_protocol_pack.c | 4 - src/common/xsignal.c | 1 + src/plugins/auth/auth_munge.c | 1 + src/scontrol/scontrol.c | 2 +- src/slurmctld/agent.c | 7 +- src/slurmctld/controller.c | 37 ++- src/slurmctld/job_mgr.c | 42 +--- src/slurmctld/node_mgr.c | 31 ++- src/slurmctld/node_scheduler.c | 5 +- src/slurmctld/partition_mgr.c | 3 + src/slurmctld/ping_nodes.c | 29 ++- src/slurmd/interconnect.h | 1 - src/slurmd/mgr.c | 10 +- src/slurmd/req.c | 5 + src/slurmd/shm.c | 43 ++-- src/slurmd/slurmd.c | 93 ++++---- src/slurmd/ulimits.c | 3 +- src/srun/allocate.c | 12 +- src/srun/io.c | 2 + src/srun/job.c | 20 +- src/srun/job.h | 3 +- src/srun/opt.c | 10 +- src/srun/opt.h | 1 + src/srun/signals.c | 4 + 47 files changed, 394 insertions(+), 687 deletions(-) delete mode 100644 src/common/elanhosts.c delete mode 100644 src/common/elanhosts.h diff --git a/META b/META index b20848cc17..cab30c0175 100644 --- a/META +++ b/META @@ -7,6 +7,6 @@ Name: slurm Major: 0 Minor: 2 - Micro: 20 - Version: 0.2.20 + Micro: 24 + Version: 0.2.24 Release: 1 diff --git a/NEWS b/NEWS index 5b6e560599..fb597596e9 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,38 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. +* Changes in SLURM 0.2.24 +========================= + -- Fixes for reported problems: + - slurm/387: Jobs lost and nodes DOWN on slurmctld restart + -- Nodes no longer transition from COMPLETING to DOWN when not responding. + Instead, slurmctld either waits for positive verification of job + completion, or an administrator must explicitly set the node DOWN. + -- New srun option `-q, --quit-on-interrupt' enables single Ctrl-C to + terminate running job. + -- Improved error message from srun when jobid set in environment is + no longer running. + -- Added job and node state descriptions to the squeue(1) and sinfo(1) + man pages. + +* Changes in SLURM 0.2.23 +======================== + -- Fixes for reported problems: + - slurm/381: Hold jobs requesting more resources than partition limit. + +* Changes in SLURM 0.2.22 +========================= + -- Fixes for reported problems: + - slurm/326: Node stays in completing state indefinitely. + - slurm/328: slurmd uses different shared memory key on restart. + - slurm/329: Job step processes may be left running when one task dies. + - slurm/334: slurmd segv with multiple simultaneous job steps. + -- Allow more digits for priority values in scontrol. + -- Applied various fixes for memory leaks. + -- Remove logic preventing DPCS from allocating jobs with more than + eight node segments. Fix for DPCS should now be in production. + -- Changed compact string for DRAINING state to "drng" from "drain." + * Changes in SLURM 0.2.21 ========================= -- Fixes for reported problems: @@ -9,14 +41,17 @@ documents those changes that are of interest to users and admins. - slurm/300: Possibly killing wrong job on slurmd restart - slurm/312: Freeing non-allocated memory and killing slurmd -- Assorted changes to support RedHat Enterprise Linux 3.0 and IA64 + -- Initial Elan4 and libelanctrl support (--with-elan). -- Slurmctld was sometimes inappropriately setting a job's priority to 1 when a node was down (even if up nodes could be used for the job when a running job completes) -- Convert all user commands from use of popt library to getopt_long() -- If TotalView support is requested, srun exports "totalview_jobid" variable for `%J' expansion in TV bulk launch string. + -- Fix several locking bugs in slurmd IO layer. -- Throttle back repetitious error messages in slurmd to avoid filling - slurm logfiles. + log files. + * Changes in SLURM 0.2.20 ========================= diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1 index 5bc8eb194d..a0eb2790a0 100644 --- a/doc/man/man1/sinfo.1 +++ b/doc/man/man1/sinfo.1 @@ -1,4 +1,4 @@ -.TH SINFO "1" "October 2003" "sinfo 0.2" "Slurm components" +.TH SINFO "1" "February 2004" "sinfo 0.2" "Slurm components" .SH "NAME" sinfo \- Used to view information about Slurm nodes and partitions. @@ -209,6 +209,52 @@ that are presently not responding. \fBTMP_DISK\fR Size of temporary disk space in megabytes on these nodes. +.SH "NODE STATE CODES" +.PP +Node state codes are shorted as required for the field size. +If the node state code is followed by "*", this indicates the node +is presently not responding and will not be allocated any new work. +If the node remains non-responsive, it will be placed in the \fBDOWN\fR +state. +.TP 12 +ALLOCATED +The node has been allocated to one or more jobs. +.TP +COMPLETING +One or more jobs have been allocated this node and are in the process +of COMPLETING. This node state will be left when all of the job's +processes have terminated and the SLURM epilog program (if any) has +terminated. See the \fBEpilog\fR parameter description in the +\fBslurm.conf\fR man page for more information. +.TP +DOWN +The node is unavailable for use. SLURM can automatically place nodes +in this state if some failure occurs. System administrators may also +explicitly place nodes in this state. If a node resumes normal operation, +SLURM can automatically return it to service. See the \fBReturnToService\fR +and \fBSlurmdTimeout\fR parameter descriptions in the \fBslurm.conf\fR(5) +man page for more information. +.TP +DRAINED +The node is unavailable for use per system administrator request. +See the \fBupdate node\fR command in the \fBscontrol\fR(1) man page +or the \fBslurm.conf\fR(5) man page for more information. +.TP +DRAINING +The node is currently executing a job, but will not be allocated to +additional jobs. The node state will be changed to state \fBDRAINED\fR +when the last job on it completes. Nodes enter this state per system +administrator request. See the \fBupdate node\fR command in the +\fBscontrol\fR(1) man page or the \fBslurm.conf\fR(5) man page for +more information. +.TP +IDLE +The node is not allocated to any jobs and is available for use. +.TP +UNKNOWN +The SLURM controller has just started and the node's state has not +yet been determined. + .SH "ENVIRONMENT VARIABLES" .PP Some \fBsinfo\fR options may be set via environment variables. These diff --git a/doc/man/man1/squeue.1 b/doc/man/man1/squeue.1 index 72b62f6c4c..a0e5d300e0 100644 --- a/doc/man/man1/squeue.1 +++ b/doc/man/man1/squeue.1 @@ -1,4 +1,4 @@ -.TH SQUEUE "1" "October 2003" "squeue 0.2" "Slurm components" +.TH SQUEUE "1" "February 2004" "squeue 0.2" "Slurm components" .SH "NAME" squeue \- Used to view information of jobs located in the scheduling queue. @@ -148,6 +148,26 @@ Report details of squeues actions. \fB\-V\fR , \fB\-\-version\fR Print version information and exit. +.SH "JOB STATE CODES" +.TP 17 +CD COMPLETED +Job has terminated all processes on all nodes. +.TP +CG COMPLETING +Job is in the process of completing. Some processes on some nodes may still be active. +.TP +F FAILED +Job terminated with non-zero exit code or other failure condition. +.TP +NF NODE_FAIL +Job terminated due to failure of one or more allocated nodes. +.TP +PD PENDING +Job is awaiting resource allocation. +.TP +TO TIMEOUT +Job terminated upon reaching its time limit. + .SH "ENVIRONMENT VARIABLES" .PP Some \fBsqueue\fR options may be set via environment variables. These diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index a0e8772145..b382aa50e9 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -205,10 +205,15 @@ on the same nodes or the values of \fBSlurmctldPort\fR and \fBSlurmdPort\fR must be different. .TP \fBSlurmdSpoolDir\fR -Fully qualified pathname of a file into which the \fBslurmd\fR daemon's state -information is written. This must be a common pathname for all nodes, but -should represent a file which is local to each node (reference a local file -system). The default value is "/tmp/slurmd". +Fully qualified pathname of a directory into which the \fBslurmd\fR +daemon's state information and batch job script information are written. This +must be a common pathname for all nodes, but should represent a directory which +is local to each node (reference a local file system). The default value +is "/var/spool/slurmd." \fBNOTE\fR: This directory is also used to store \fBslurmd\fR's +shared memory lockfile, and \fBshould not be changed\fR unless the system +is being cleanly restarted. If the location of \fBSlurmdSpoolDir\fR is +changed and \fBslurmd\fR is restarted, the new daemon will attach to a +different shared memory region and lose track of any running jobs. .TP \fBSlurmdTimeout\fR The interval, in seconds, that the SLURM controller waits for \fBslurmd\fR @@ -228,7 +233,7 @@ Fully qualified pathname of the file system available to user jobs for temporary storage. This parameter is used in establishing a node's \fBTmpDisk\fR space. The default value is "/tmp". .TP -\fBWaitTimefR +\fBWaitTime\fR Specifies how many seconds the srun command should by default wait after the first task terminates before terminating all remaining tasks. The "--wait" option on the srun command line overrides this value. diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index c28cedfc4b..4f376fb72e 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -796,7 +796,8 @@ extern int slurm_update_node PARAMS(( update_node_msg_t * node_msg )); * default values * OUT job_desc_msg - user defined partition descriptor */ -void slurm_init_part_desc_msg PARAMS((update_part_msg_t * update_part_msg )); +extern void slurm_init_part_desc_msg PARAMS(( + update_part_msg_t * update_part_msg )); /* * slurm_load_partitions - issue RPC to get slurm all partition configuration diff --git a/src/api/Makefile.am b/src/api/Makefile.am index 34bfa77aac..a0f19ee154 100644 --- a/src/api/Makefile.am +++ b/src/api/Makefile.am @@ -43,19 +43,22 @@ libslurm_la_SOURCES = \ common_dir = $(top_builddir)/src/common -libslurm_la_LIBADD = $(common_dir)/libcommon.la -lpthread +libslurm_la_LIBADD = \ + $(common_dir)/libcommon.la -lpthread -libslurm_la_LDFLAGS = -export-symbols libslurm.sym \ - -version-info $(current):$(rev):$(age) +libslurm_la_LDFLAGS = \ + -export-symbols libslurm.sym \ + -version-info $(current):$(rev):$(age) -libslurm_la_DEPENDENCIES = libslurm.sym $(common_dir)/libcommon.la +libslurm_la_DEPENDENCIES = \ + libslurm.sym \ + $(common_dir)/libcommon.la - -libslurm.sym : $(top_builddir)/slurm/slurm.h - sed -n 's/^extern.* \(slurm[^ ]*\).*$$/\1/p' $< >libslurm.sym +libslurm.sym : $(top_builddir)/slurm/slurm.h + -sed -n 's/^extern .* \([a-zA-Z0-9_]*\) PARAMS.*$$/\1/p' $< $* >libslurm.sym distclean-local: - -rm libslurm.sym + -rm -rf libslurm.map libslurm.sym force: $(libslurm_la_LIBADD) : force diff --git a/src/api/allocate.c b/src/api/allocate.c index 0895109f3f..cecf35dcf1 100644 --- a/src/api/allocate.c +++ b/src/api/allocate.c @@ -89,6 +89,7 @@ slurm_allocate_resources (job_desc_msg_t *req, if (rc == SLURM_SOCKET_ERROR) return SLURM_SOCKET_ERROR; + slurm_free_cred(resp_msg.cred); switch (resp_msg.msg_type) { case RESPONSE_SLURM_RC: if (_handle_rc_msg(&resp_msg) < 0) @@ -127,6 +128,7 @@ int slurm_job_will_run (job_desc_msg_t *req, if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) return SLURM_SOCKET_ERROR; + slurm_free_cred(resp_msg.cred); switch (resp_msg.msg_type) { case RESPONSE_SLURM_RC: if (_handle_rc_msg(&resp_msg) < 0) @@ -183,7 +185,7 @@ slurm_allocate_resources_and_run (job_desc_msg_t *req, if (rc == SLURM_SOCKET_ERROR) return SLURM_SOCKET_ERROR; - + slurm_free_cred(resp_msg.cred); switch (resp_msg.msg_type) { case RESPONSE_SLURM_RC: if (_handle_rc_msg(&resp_msg) < 0) @@ -222,6 +224,7 @@ slurm_job_step_create (job_step_create_request_msg_t *req, if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) return SLURM_ERROR; + slurm_free_cred(resp_msg.cred); switch (resp_msg.msg_type) { case RESPONSE_SLURM_RC: if (_handle_rc_msg(&resp_msg) < 0) @@ -259,6 +262,7 @@ slurm_confirm_allocation (old_job_alloc_msg_t *req, if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) return SLURM_ERROR; + slurm_free_cred(resp_msg.cred); switch(resp_msg.msg_type) { case RESPONSE_SLURM_RC: if (_handle_rc_msg(&resp_msg) < 0) diff --git a/src/api/cancel.c b/src/api/cancel.c index fb364e2888..79258b7019 100644 --- a/src/api/cancel.c +++ b/src/api/cancel.c @@ -73,7 +73,8 @@ slurm_kill_job_step (uint32_t job_id, uint32_t step_id, uint16_t signal) if (slurm_send_recv_controller_rc_msg(&msg, &rc) < 0) return SLURM_FAILURE; - if (rc) slurm_seterrno_ret(rc); + if (rc) + slurm_seterrno_ret(rc); return SLURM_SUCCESS; } diff --git a/src/api/complete.c b/src/api/complete.c index 3c529677c4..de75377c9d 100644 --- a/src/api/complete.c +++ b/src/api/complete.c @@ -85,7 +85,8 @@ slurm_complete_job_step ( uint32_t job_id, uint32_t step_id, if (slurm_send_recv_controller_rc_msg(&req_msg, &rc) < 0) return SLURM_ERROR; - if (rc) slurm_seterrno_ret(rc); + if (rc) + slurm_seterrno_ret(rc); return SLURM_PROTOCOL_SUCCESS; } diff --git a/src/api/config_info.c b/src/api/config_info.c index e43f5b928d..58020ab7d7 100644 --- a/src/api/config_info.c +++ b/src/api/config_info.c @@ -151,10 +151,10 @@ slurm_load_ctl_conf (time_t update_time, slurm_ctl_conf_t **confp) if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) return SLURM_ERROR; + slurm_free_cred(resp_msg.cred); switch (resp_msg.msg_type) { case RESPONSE_BUILD_INFO: *confp = (slurm_ctl_conf_info_msg_t *) resp_msg.data; - slurm_free_cred(resp_msg.cred); break; case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; diff --git a/src/api/job_info.c b/src/api/job_info.c index 0e46334481..3dfd658b4f 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -219,10 +219,10 @@ slurm_load_jobs (time_t update_time, job_info_msg_t **resp) if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) return SLURM_ERROR; + slurm_free_cred(resp_msg.cred); switch (resp_msg.msg_type) { case RESPONSE_JOB_INFO: *resp = (job_info_msg_t *)resp_msg.data; - slurm_free_cred(resp_msg.cred); break; case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; @@ -267,6 +267,7 @@ slurm_pid2jobid (pid_t job_pid, uint32_t *jobid) if (slurm_send_recv_node_msg(&req_msg, &resp_msg, 0) < 0) return SLURM_ERROR; + slurm_free_cred(resp_msg.cred); switch (resp_msg.msg_type) { case RESPONSE_JOB_ID: *jobid = ((job_id_response_msg_t *) resp_msg.data)->job_id; diff --git a/src/api/job_step_info.c b/src/api/job_step_info.c index f07cea90b6..9f517b0a6c 100644 --- a/src/api/job_step_info.c +++ b/src/api/job_step_info.c @@ -125,10 +125,10 @@ slurm_get_job_steps (time_t update_time, uint32_t job_id, uint32_t step_id, if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) return SLURM_ERROR; + slurm_free_cred(resp_msg.cred); switch (resp_msg.msg_type) { case RESPONSE_JOB_STEP_INFO: *resp = (job_step_info_response_msg_t *) resp_msg.data; - slurm_free_cred(resp_msg.cred); break; case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; diff --git a/src/api/node_info.c b/src/api/node_info.c index f220ff62a6..91ce0d846f 100644 --- a/src/api/node_info.c +++ b/src/api/node_info.c @@ -129,10 +129,10 @@ slurm_load_node (time_t update_time, node_info_msg_t **resp) if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) return SLURM_ERROR; + slurm_free_cred(resp_msg.cred); switch (resp_msg.msg_type) { case RESPONSE_NODE_INFO: *resp = (node_info_msg_t *) resp_msg.data; - slurm_free_cred(resp_msg.cred); break; case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; diff --git a/src/api/partition_info.c b/src/api/partition_info.c index d9c01cbbb7..0d66f643c6 100644 --- a/src/api/partition_info.c +++ b/src/api/partition_info.c @@ -161,10 +161,10 @@ slurm_load_partitions (time_t update_time, partition_info_msg_t **resp) if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) return SLURM_ERROR; + slurm_free_cred(resp_msg.cred); switch (resp_msg.msg_type) { case RESPONSE_PARTITION_INFO: *resp = (partition_info_msg_t *) resp_msg.data; - slurm_free_cred(resp_msg.cred); break; case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; diff --git a/src/api/reconfigure.c b/src/api/reconfigure.c index 37f027563c..b9109b428a 100644 --- a/src/api/reconfigure.c +++ b/src/api/reconfigure.c @@ -58,7 +58,8 @@ slurm_reconfigure ( void ) if (slurm_send_recv_controller_rc_msg(&req, &rc) < 0) return SLURM_ERROR; - if (rc) slurm_seterrno_ret(rc); + if (rc) + slurm_seterrno_ret(rc); return SLURM_PROTOCOL_SUCCESS; } @@ -129,6 +130,7 @@ _send_message_controller (enum controller_id dest, slurm_msg_t *req) if ((rc = slurm_receive_msg(fd, &resp_msg, 0)) < 0) slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR); + slurm_free_cred(resp_msg.cred); if (slurm_shutdown_msg_conn(fd) != SLURM_SUCCESS) slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR); diff --git a/src/api/submit.c b/src/api/submit.c index c413cff506..f49926b20f 100644 --- a/src/api/submit.c +++ b/src/api/submit.c @@ -87,6 +87,7 @@ slurm_submit_batch_job (job_desc_msg_t *req, if (rc == SLURM_SOCKET_ERROR) return SLURM_ERROR; + slurm_free_cred(resp_msg.cred); switch (resp_msg.msg_type) { case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; diff --git a/src/common/elanhosts.c b/src/common/elanhosts.c deleted file mode 100644 index 47fcda1c21..0000000000 --- a/src/common/elanhosts.c +++ /dev/null @@ -1,387 +0,0 @@ -/*****************************************************************************\ - * $Id$ - ***************************************************************************** - * Copyright (C) 2001-2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Mark Grondona . - * UCRL-CODE-2003-005. - * - * This file is part of Pdsh, a parallel remote shell program. - * For details, see . - * - * Pdsh is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Pdsh; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#if HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include -#include -#include -#include - -#include "src/common/list.h" -#include "src/common/hostlist.h" -#include "elanhosts.h" - -/* Default ElanId config file */ -#define ELANID_CONFIG_FILE "/etc/elanhosts" - -/* - * Error strings for error codes returned by parse_elanid_config() - */ -static char *errstr[] = -{ "No error", - "Out of memory!", - "Parse error", - "Number of ElanIds specified != number of hosts", - "Type must be \"eip\" \"eth\" or \"other\"", - NULL -}; - -/* - * Container for converting hostnames to ElanIDs - */ -struct elan_info { - elanhost_type_t type; /* type of entry */ - int elanid; /* ElanID corresponding to this hostname */ - char *hostname; /* Resolveable hostname */ -}; - -struct elanhost_config { -#ifndef NDEBUG - int magic; -# define ELANHOST_CONFIG_MAGIC 0xe100e100 -#endif - int maxid; /* Storage for max ElanID in config */ - List elanid_list; /* List of elan_info objects describing configuration */ - char errstr[1024]; /* String describing last error from this object */ -}; - - -/* - * Static Prototypes: - */ -static elanhost_config_t _elanhost_config_alloc(void); -static void _elanhost_err(elanhost_config_t ec, const char *fmt, ...); -static int _find_host(struct elan_info *ei, char *key); -static int _parse_elanid_config(elanhost_config_t ec, const char *path); -static int _parse_elanid_line(elanhost_config_t ec, char *buf); -static struct elan_info * _elan_info_create(elanhost_type_t type, - int elanid, char *hostname); -static void _elan_info_destroy(struct elan_info *ei); - - -elanhost_config_t elanhost_config_create() -{ - return _elanhost_config_alloc(); -} - - -int elanhost_config_read(elanhost_config_t ec, const char *filename) -{ - assert(ec != NULL); - assert(ec->magic == ELANHOST_CONFIG_MAGIC); - assert(ec->elanid_list != NULL); - - if (filename == NULL) - filename = ELANID_CONFIG_FILE; - - if (_parse_elanid_config(ec, filename) < 0) - return(-1); - - return(0); -} - -void elanhost_config_destroy(elanhost_config_t ec) -{ - assert(ec != NULL); - assert(ec->magic == ELANHOST_CONFIG_MAGIC); - list_destroy(ec->elanid_list); - assert(ec->magic = ~ELANHOST_CONFIG_MAGIC); - free(ec); -} - -int elanhost_config_maxid(elanhost_config_t ec) -{ - assert(ec != NULL); - assert(ec->magic == ELANHOST_CONFIG_MAGIC); - - return ec->maxid; -} - -int elanhost_host2elanid(elanhost_config_t ec, char *host) -{ - struct elan_info *ei; - - assert(ec != NULL); - assert(host != NULL); - assert(ec->magic == ELANHOST_CONFIG_MAGIC); - - ei = list_find_first(ec->elanid_list, (ListFindF) _find_host, host); - - if (!ei) { - _elanhost_err(ec, "Unable to find host \"%s\" in configuration", host); - return -1; - } - - return ei->elanid; -} - -const char *elanhost_config_err(elanhost_config_t ec) -{ - return ec->errstr; -} - - -struct elanid_find_arg { - elanhost_type_t type; - int elanid; -}; - -static int _find_elanid(struct elan_info *ei, struct elanid_find_arg *arg) -{ - if (ei->type != arg->type) - return 0; - - if (ei->elanid != arg->elanid) - return 0; - - return 1; -} - -char *elanhost_elanid2host(elanhost_config_t ec, elanhost_type_t type, int eid) -{ - struct elan_info *ei; - struct elanid_find_arg arg; - - assert(ec != NULL); - assert(eid >= 0); - assert(ec->magic == ELANHOST_CONFIG_MAGIC); - - arg.type = type; - arg.elanid = eid; - - ei = list_find_first(ec->elanid_list, (ListFindF) _find_elanid, &arg); - - if (!ei) { - _elanhost_err(ec, "Unable to find host with type=%d elanid=%d", - type, eid); - return(NULL); - } - - return ei->hostname; -} - -static elanhost_config_t _elanhost_config_alloc(void) -{ - elanhost_config_t new = malloc(sizeof(*new)); - - new->maxid = -1; - new->elanid_list = list_create((ListDelF) _elan_info_destroy); - - assert(new->magic = ELANHOST_CONFIG_MAGIC); - - return new; -} - -static void _elanhost_err(elanhost_config_t ec, const char *fmt, ...) -{ - va_list ap; - - assert(ec != NULL); - assert(fmt != NULL); - - va_start(ap, fmt); - vsnprintf(ec->errstr, 1024, fmt, ap); - va_end(ap); - - return; -} - -/* - * Parse the "elanhosts" config file which has the form - * - * ElanIds Hostnames - * [n-m] host_n,...,host_m - * [n-m] host[n-m] - * etc. - * - * and which maps ElanIds to hostnames on the cluster. - * The results are stored in the config object's elanid_list member. - * - * Returns 0 on Success, and an error code < 0 on failure. - */ -static int _parse_elanid_config(elanhost_config_t ec, const char *path) -{ - char buf[4096]; - int line; - FILE *fp; - - if (!(fp = fopen(path, "r"))) { - _elanhost_err(ec, "failed to open %s\n", path); - return -1; - } - - line = 1; - while (fgets(buf, 4096, fp)) { - int rc; - if ((rc = _parse_elanid_line(ec, buf)) < 0) { - _elanhost_err(ec, "%s: line %d: %s", path, line, errstr[-rc]); - return -1; - } - line++; - } - - if (fclose(fp) < 0) - _elanhost_err(ec, "close(%s): %m", path); - - return 0; -} - - -/* - * Translate type strings "eip," "eth," or "other" into their - * corresponding elanhost_type_t number - */ -static elanhost_type_t _get_type_num(char *type) -{ - if (strcasecmp(type, "eip") == 0) - return ELANHOST_EIP; - else if (strcasecmp(type, "eth") == 0) - return ELANHOST_ETH; - else if (strcasecmp(type, "other") == 0) - return ELANHOST_OTHER; - else - return -1; -} - -/* - * Parse one line of elanId list appending results to list "eil" - * - * Returns -1 for parse error, -2 if the number of elanids specified - * doesn't equal the number of hosts. - * - * Returns 0 on success - */ -static int -_parse_elanid_line(elanhost_config_t ec, char *buf) -{ - hostlist_t el, hl; - const char *separators = " \t\n"; - char *type; - char *elanids; - char *hosts; - char *sp, *s; - int rc = 0; - int typenum; - - /* - * Nullify any comments - */ - if ((s = strchr(buf, '#'))) - *s = '\0'; - - if (!(type = strtok_r(buf, separators, &sp))) - return 0; - - if (!(elanids = strtok_r(NULL, separators, &sp))) - return -1; - - if (!(hosts = strtok_r(NULL, separators, &sp))) - return -2; - - el = hostlist_create(NULL); - hl = hostlist_create(NULL); - - if (!el || !hl) { - rc = -1; - goto done; - } - - if (hostlist_push(el, elanids) != hostlist_push(hl, hosts)) { - rc = -3; - goto done; - } - - if ((typenum = _get_type_num(type)) < 0) - return -4; - - while ((s = hostlist_shift(el))) { - char *eptr; - int elanid = (int) strtoul(s, &eptr, 10); - - if (*eptr != '\0') { - rc = -2; - goto done; - } - - free(s); - if (!(s = hostlist_shift(hl))) { - rc = -1; - goto done; - } - - if (elanid > ec->maxid) - ec->maxid = elanid; - - list_append(ec->elanid_list, _elan_info_create(typenum, elanid, s)); - } - - done: - hostlist_destroy(el); - hostlist_destroy(hl); - - return rc; -} - -static struct elan_info * -_elan_info_create(elanhost_type_t type, int elanid, char *hostname) -{ - struct elan_info *ei = (struct elan_info *) malloc(sizeof(*ei)); - ei->type = type; - ei->elanid = elanid; - ei->hostname = hostname; - return ei; -} - -static void -_elan_info_destroy(struct elan_info *ei) -{ - if (ei->hostname) - free(ei->hostname); - free(ei); -} - - -/* - * List Find function for mapping hostname to an ElanId - */ -static int _find_host(struct elan_info *ei, char *key) -{ - if (strcmp(ei->hostname, key) != 0) - return 0; - else - return 1; -} - - -/* - * vi:tabstop=4 shiftwidth=4 expandtab - */ - diff --git a/src/common/elanhosts.h b/src/common/elanhosts.h deleted file mode 100644 index d5cb0bb652..0000000000 --- a/src/common/elanhosts.h +++ /dev/null @@ -1,121 +0,0 @@ -/*****************************************************************************\ - * $Id$ - ***************************************************************************** - * Copyright (C) 2001-2002 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Mark Grondona . - * UCRL-CODE-2003-005. - * - * This file is part of Pdsh, a parallel remote shell program. - * For details, see . - * - * Pdsh is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Pdsh; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -\*****************************************************************************/ - -#ifndef _ELANHOSTS_H -#define _ELANHOSTS_H - -/* - * Type of Elan "hostname" - * Hostname corresponds to the eip adapter, an ethernet adapter, or "other" - */ -typedef enum { - ELANHOST_EIP, - ELANHOST_ETH, - ELANHOST_OTHER -} elanhost_type_t; - -/* Opaque type which holds the elanhost configuration - */ -typedef struct elanhost_config * elanhost_config_t; - - -/* - * Functions - */ - -/* - * Create an empty Elanhost config object - */ -elanhost_config_t elanhost_config_create(void); - -/* - * Read elanhosts configuration from `file' - * (Default /etc/elanhosts) - * - * Config file format is as follows: - * - * Type ElanIDs Hostnames - * - * The "type" field may be "eip" for eip interface, "eth" for an - * ethernet interface, or "other" for anything else. ("eth" and - * "other" are equivalent at this time) - * - * The "ElanIDs" field consists of a list of one or more ElanIDs in - * the form "[i-j,n-m,..]" or just "N" for a single ElanID. - * - * The "Hostname" field consists of the hostnames which correspond - * to the ElanIDs. If the hostnames have a numeric suffix a bracketed - * hostlist is allowed (see hostlist.[ch]) - * - * For Example: - * - * Type ElanIDs Hostnames - * eip [0-10] host[0-10] - * eth [0-10] ehost[0-10] - * eth [0,1] host0-eth1,host1-eth1 - * - * Returns 0 on succes, -1 for failure. - * - */ -int elanhost_config_read(elanhost_config_t ec, const char *filename); - - -/* - * Destroy an elanhost configuration object. - */ -void elanhost_config_destroy(elanhost_config_t conf); - - -/* - * Given a hostname, return the corresponding ElanID - * - * Returns the ElanId on success, -1 if no host matching "hostname" - * was found in the configuration. - * - */ -int elanhost_host2elanid(elanhost_config_t ec, char *host); - - -/* - * Given an ElanId and adapter type, return the first matching hostname - * from the configuration. - */ -char *elanhost_elanid2host(elanhost_config_t ec, - elanhost_type_t type, int elanid); - - -/* - * Returns the max ElanID from the configuration - */ -int elanhost_config_maxid(elanhost_config_t ec); - - -/* - * Returns the last error string generated for the elan config obj `ec' - */ -const char *elanhost_config_err(elanhost_config_t ec); - -#endif diff --git a/src/common/log.c b/src/common/log.c index 4d57f99a58..3c66406fb6 100644 --- a/src/common/log.c +++ b/src/common/log.c @@ -227,13 +227,13 @@ int log_init(char *prog, log_options_t opt, log_facility_t fac, char *logfile) void log_fini() { - if (!log) return; + if (!log) + return; + log_flush(); slurm_mutex_lock(&log_lock); - if (log->argv0) - xfree(log->argv0); - if (log->fpfx) - xfree(log->fpfx); + xfree(log->argv0); + xfree(log->fpfx); if (log->buf) cbuf_destroy(log->buf); if (log->fbuf) @@ -254,8 +254,7 @@ void log_reinit() void log_set_fpfx(char *prefix) { slurm_mutex_lock(&log_lock); - if (log->fpfx) - xfree(log->fpfx); + xfree(log->fpfx); if (!prefix) log->fpfx = xstrdup(""); else { diff --git a/src/common/slurm_auth.c b/src/common/slurm_auth.c index 5495030fa1..54b09da0c5 100644 --- a/src/common/slurm_auth.c +++ b/src/common/slurm_auth.c @@ -243,7 +243,7 @@ slurm_auth_context_create( const char *auth_type ) c->auth_errno = SLURM_SUCCESS; /* Copy the authentication type. */ - c->auth_type = strdup( auth_type ); + c->auth_type = xstrdup( auth_type ); if ( c->auth_type == NULL ) { debug3( "can't make local copy of authentication type" ); xfree( c ); @@ -299,7 +299,7 @@ slurm_auth_context_destroy( slurm_auth_context_t c ) } } - free( c->auth_type ); + xfree( c->auth_type ); xfree( c ); return SLURM_SUCCESS; @@ -340,7 +340,12 @@ slurm_auth_fini( void ) if ( g_context ) slurm_auth_context_destroy( g_context ); - free_slurm_conf( &conf ); + slurm_mutex_lock( &config_lock ); + if ( conf.slurmd_port ) { + free_slurm_conf( &conf ); + conf.slurmd_port = 0; + } + slurm_mutex_unlock( &config_lock ); } /* diff --git a/src/common/slurm_errno.c b/src/common/slurm_errno.c index 92c63dcb1a..57076ecb35 100644 --- a/src/common/slurm_errno.c +++ b/src/common/slurm_errno.c @@ -92,7 +92,7 @@ static slurm_errtab_t slurm_errtab[] = { { ESLURM_INVALID_PARTITION_NAME, "Invalid partition name specified" }, { ESLURM_DEFAULT_PARTITION_NOT_SET, - "System default partition not set" }, + "No partition specified or system default partition" }, { ESLURM_ACCESS_DENIED, "Access denied" }, { ESLURM_JOB_MISSING_REQUIRED_PARTITION_GROUP, diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 19f4523265..94a399d56b 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -372,7 +372,7 @@ char *node_state_string_compact(enum node_states inx) "IDLE", "ALLOC", "DRAIN", - "DRAIN", + "DRNG", "COMP", "END" }; diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index bf369be975..bc20895676 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -796,10 +796,6 @@ _unpack_resource_allocation_response_msg(resource_allocation_response_msg_t safe_unpack16(&tmp_ptr->num_cpu_groups, buffer); if (tmp_ptr->num_cpu_groups > 0) { - tmp_ptr->cpus_per_node = (uint32_t *) - xmalloc(sizeof(uint32_t) * tmp_ptr->num_cpu_groups); - tmp_ptr->cpu_count_reps = (uint32_t *) - xmalloc(sizeof(uint32_t) * tmp_ptr->num_cpu_groups); safe_unpack32_array((uint32_t **) & (tmp_ptr->cpus_per_node), &uint32_tmp, buffer); diff --git a/src/common/xsignal.c b/src/common/xsignal.c index 28d05470d3..8eb09aa21e 100644 --- a/src/common/xsignal.c +++ b/src/common/xsignal.c @@ -91,6 +91,7 @@ xsignal_sigset_create(int sigarray[], sigset_t *setp) int xsignal_save_mask(sigset_t *set) { + sigemptyset(set); return _sigmask(SIG_SETMASK, NULL, set); } diff --git a/src/plugins/auth/auth_munge.c b/src/plugins/auth/auth_munge.c index 11cefc3089..a46713716e 100644 --- a/src/plugins/auth/auth_munge.c +++ b/src/plugins/auth/auth_munge.c @@ -462,6 +462,7 @@ _decode_cred(char *m, slurm_auth_credential_t *c) * Block all signals to allow munge_decode() to proceed * uninterrupted. (Testing for gnats slurm/223) */ + sigemptyset(&oset); sigfillset(&set); sigdelset(&set, SIGABRT); sigdelset(&set, SIGSEGV); diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c index c2d14173be..8177d76180 100644 --- a/src/scontrol/scontrol.c +++ b/src/scontrol/scontrol.c @@ -1268,7 +1268,7 @@ _update_job (int argc, char *argv[]) } else if (strncasecmp(argv[i], "Priority=", 9) == 0) job_msg.priority = - (uint32_t) strtol(&argv[i][9], + (uint32_t) strtoll(&argv[i][9], (char **) NULL, 10); else if (strncasecmp(argv[i], "ReqProcs=", 9) == 0) job_msg.num_procs = diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c index 3713eb6142..0c39a39901 100644 --- a/src/slurmctld/agent.c +++ b/src/slurmctld/agent.c @@ -364,7 +364,7 @@ static void *_wdog(void *args) (unsigned long) thread_ptr[i].thread); if (pthread_kill(thread_ptr[i].thread, SIGALRM) == ESRCH) - thread_ptr[i].state = DSH_FAILED; + thread_ptr[i].state = DSH_NO_RESP; } break; case DSH_NEW: @@ -522,7 +522,7 @@ static void *_thread_per_node_rpc(void *args) msg.msg_type = msg_type; msg.data = task_ptr->msg_args_ptr; - thread_ptr->end_time = thread_ptr->start_time + COMMAND_TIMEOUT; + thread_ptr->end_time = thread_ptr->start_time + COMMAND_TIMEOUT; if (task_ptr->get_reply) { if (slurm_send_recv_rc_msg(&msg, &rc, timeout) < 0) { _comm_err(thread_ptr->node_name); @@ -613,7 +613,7 @@ static void *_thread_per_node_rpc(void *args) } /* - * SIGALRM handler. We are really interested in interrupting hung communictions + * SIGALRM handler. We are really interested in interrupting hung communictions * and causing them to return EINTR. Multiple interupts might be required. */ static void _alarm_handler(int dummy) @@ -621,7 +621,6 @@ static void _alarm_handler(int dummy) xsignal(SIGALRM, _alarm_handler); } - /* * _queue_agent_retry - Queue any failed RPCs for later replay * IN agent_info_ptr - pointer to info on completed agent requests diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index db7380312e..f7f0dada47 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -75,7 +75,7 @@ #define MIN_CHECKIN_TIME 3 /* Nodes have this number of seconds to * check-in before we ping them */ #define MEM_LEAK_TEST 0 /* Running memory leak test if set */ - +#define SHUTDOWN_WAIT 2 /* Time to wait for backup server shutdown */ /* Log to stderr and syslog until becomes a daemon */ log_options_t log_opts = LOG_OPTS_INITIALIZER; @@ -90,6 +90,7 @@ static int debug_level = 0; static char *debug_logfile = NULL; static bool dump_core = false; static int recover = DEFAULT_RECOVER; +static char node_name[MAX_NAME_LEN]; static pthread_cond_t server_thread_cond = PTHREAD_COND_INITIALIZER; static pid_t slurmctld_pid; /* @@ -111,7 +112,7 @@ static void _parse_commandline(int argc, char *argv[], inline static int _report_locks_set(void); static void * _service_connection(void *arg); static int _set_slurmctld_state_loc(void); -static int _shutdown_backup_controller(void); +static int _shutdown_backup_controller(int wait_time); static void * _slurmctld_background(void *no_data); static void * _slurmctld_rpc_mgr(void *no_data); static void * _slurmctld_signal_hand(void *no_data); @@ -127,7 +128,6 @@ typedef struct connection_arg { int main(int argc, char *argv[]) { int error_code; - char node_name[MAX_NAME_LEN]; pthread_attr_t thread_attr_sig, thread_attr_rpc; /* @@ -209,14 +209,13 @@ int main(int argc, char *argv[]) } else if (slurmctld_conf.control_machine && (strcmp(node_name, slurmctld_conf.control_machine) == 0)) { - (void) _shutdown_backup_controller(); + (void) _shutdown_backup_controller(SHUTDOWN_WAIT); /* Now recover the remaining state information */ if ((error_code = read_slurm_conf(recover))) { error("read_slurm_conf reading %s: %m", SLURM_CONFIG_FILE); abort(); } - info("Running primary controller"); } else { error ("this host (%s) not valid controller (%s or %s)", @@ -224,6 +223,7 @@ int main(int argc, char *argv[]) slurmctld_conf.backup_controller); exit(0); } + info("Running primary controller"); if (switch_state_begin(recover)) { error("switch_state_begin: %m"); @@ -550,7 +550,9 @@ static void *_slurmctld_background(void *no_data) static time_t last_group_time; static time_t last_ping_time; static time_t last_timelimit_time; + static time_t last_assert_primary_time; time_t now; + /* Locks: Write job, write node, read partition */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK @@ -566,7 +568,7 @@ static void *_slurmctld_background(void *no_data) /* Let the dust settle before doing work */ now = time(NULL); last_sched_time = last_checkpoint_time = last_group_time = now; - last_timelimit_time = now; + last_timelimit_time = last_assert_primary_time = now; last_ping_time = now + (time_t)MIN_CHECKIN_TIME - (time_t)slurmctld_conf.heartbeat_interval; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); @@ -640,6 +642,21 @@ static void *_slurmctld_background(void *no_data) save_all_state(); } + /* Reassert this machine as the primary controller. + * A network or security problem could result in + * the backup controller assuming control even + * while the real primary controller is running */ + if (slurmctld_conf.slurmctld_timeout && + slurmctld_conf.backup_addr && + slurmctld_conf.backup_addr[0] && + (difftime(now, last_assert_primary_time) >= + slurmctld_conf.slurmctld_timeout) && + node_name && slurmctld_conf.backup_controller && + strcmp(node_name, slurmctld_conf.backup_controller)) { + last_assert_primary_time = now; + (void) _shutdown_backup_controller(0); + } + } debug3("_slurmctld_background shutting down"); @@ -834,9 +851,10 @@ static void _usage(char *prog_name) /* * Tell the backup_controller to relinquish control, primary control_machine * has resumed operation + * wait_time - How long to wait for backup controller to write state, seconds * RET 0 or an error code */ -static int _shutdown_backup_controller(void) +static int _shutdown_backup_controller(int wait_time) { int rc; slurm_msg_t req; @@ -856,7 +874,7 @@ static int _shutdown_backup_controller(void) if (slurm_send_recv_rc_msg(&req, &rc, CONTROL_TIMEOUT) < 0) { error("shutdown_backup:send/recv: %m"); - return SLURM_SOCKET_ERROR; + return SLURM_ERROR; } if (rc) { @@ -870,7 +888,8 @@ static int _shutdown_backup_controller(void) * not presently the case (it returns when no other work is pending, * so the state save should occur right away). We sleep for a while * here and give the backup controller time to shutdown */ - sleep(2); + if (wait_time) + sleep(wait_time); return SLURM_PROTOCOL_SUCCESS; } diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 7781a48682..7baab5965e 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -122,7 +122,6 @@ static void _reset_step_bitmaps(struct job_record *job_ptr); static void _set_job_id(struct job_record *job_ptr); static void _set_job_prio(struct job_record *job_ptr); static bool _slurm_picks_nodes(job_desc_msg_t * job_specs); -static bool _too_many_fragments(bitstr_t *req_bitmap); static bool _top_priority(struct job_record *job_ptr); static int _validate_job_create_req(job_desc_msg_t * job_desc); static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate, @@ -1339,7 +1338,7 @@ job_complete(uint32_t job_id, uid_t uid, bool requeue, * IN allocate - resource allocation request if set rather than job submit * IN will_run - job is not to be created, test of validity only * OUT new_job_id - the job's ID - * OUT job_rec_ptr - pointer to the job (NULL on error) + * OUT job_pptr - pointer to the job (NULL on error) * RET 0 on success, otherwise ESLURM error code * globals: job_list - pointer to global job list * list_part - global list of partition info @@ -1349,13 +1348,13 @@ job_complete(uint32_t job_id, uid_t uid, bool requeue, static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, int allocate, int will_run, - struct job_record **job_rec_ptr, uid_t submit_uid) + struct job_record **job_pptr, uid_t submit_uid) { int error_code = SLURM_SUCCESS, i; struct part_record *part_ptr; bitstr_t *req_bitmap = NULL, *exc_bitmap = NULL; - *job_rec_ptr = (struct job_record *) NULL; + *job_pptr = (struct job_record *) NULL; if ((error_code = _validate_job_desc(job_desc, allocate, submit_uid))) return error_code; @@ -1415,10 +1414,6 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, error_code = ESLURM_REQUESTED_NODES_NOT_IN_PARTITION; goto cleanup; } - if (_too_many_fragments(req_bitmap)) { - error_code = ESLURM_TOO_MANY_REQUESTED_NODES; - goto cleanup; - } i = count_cpus(req_bitmap); if (i > job_desc->num_procs) job_desc->num_procs = i; @@ -1483,7 +1478,7 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, } if ((error_code = _copy_job_desc_to_job_record(job_desc, - job_rec_ptr, + job_pptr, part_ptr, &req_bitmap, &exc_bitmap))) { @@ -1493,16 +1488,16 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, if (job_desc->script) { if ((error_code = _copy_job_desc_to_file(job_desc, - (*job_rec_ptr)-> + (*job_pptr)-> job_id))) { - (*job_rec_ptr)->job_state = JOB_FAILED; + (*job_pptr)->job_state = JOB_FAILED; error_code = ESLURM_WRITING_TO_FILE; goto cleanup; } - (*job_rec_ptr)->batch_flag = 1; + (*job_pptr)->batch_flag = 1; } else - (*job_rec_ptr)->batch_flag = 0; - *new_job_id = (*job_rec_ptr)->job_id; + (*job_pptr)->batch_flag = 0; + *new_job_id = (*job_pptr)->job_id; /* Insure that requested partition is valid right now, * otherwise leave job queued and provide warning code */ @@ -1522,6 +1517,8 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id, *new_job_id, part_ptr->name); error_code = ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE; } + if (error_code == ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) + (*job_pptr)->priority = 1; /* Move to end of queue */ cleanup: FREE_NULL_BITMAP(req_bitmap); @@ -3196,20 +3193,3 @@ void job_fini (void) xfree(job_hash_over); } -static bool _too_many_fragments(bitstr_t *req_bitmap) -{ -#ifdef MAX_NODE_FRAGMENTS - int i, frags=0; - int last_bit = 0, next_bit; - - for (i = 0; i < node_record_count; i++) { - next_bit = bit_test(req_bitmap, i); - if (next_bit == last_bit) - continue; - last_bit = next_bit; - if (next_bit && (++frags > MAX_NODE_FRAGMENTS)) - return true; - } -#endif - return false; -} diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 36f8ff6ea2..b46d705454 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -332,16 +332,21 @@ int dump_all_node_state ( void ) error ("Can't save state, error creating file %s %m", new_file); error_code = errno; - } - else { - if (write (log_fd, get_buf_data(buffer), - get_buf_offset(buffer)) != - get_buf_offset(buffer)) { - error ("Can't save state, error writing file %s %m", - new_file); - error_code = errno; + } else { + int pos = 0, nwrite = get_buf_offset(buffer), amount; + char *data = (char *)get_buf_data(buffer); + + while (nwrite > 0) { + amount = write(log_fd, &data[pos], nwrite); + if ((amount < 0) && (errno != EINTR)) { + error("Error writing file %s, %m", new_file); + error_code = errno; + break; + } + nwrite -= amount; + pos += amount; } - close (log_fd); + close(log_fd); } if (error_code) (void) unlink (new_file); @@ -1153,7 +1158,7 @@ validate_node_specs (char *node_name, uint32_t cpus, #endif if (node_ptr->node_state == NODE_STATE_UNKNOWN) { reset_job_priority(); - info ("validate_node_specs: node %s has registered", + debug("validate_node_specs: node %s has registered", node_name); if (job_count) node_ptr->node_state = NODE_STATE_ALLOCATED; @@ -1491,9 +1496,13 @@ void make_node_comp(struct node_record *node_ptr) node_ptr->name, node_state_string((enum node_states) node_ptr->node_state)); - } else { + } else if (!no_resp_flag) { node_ptr->node_state = NODE_STATE_COMPLETING | no_resp_flag; xfree(node_ptr->reason); + } else if ( (base_state == NODE_STATE_ALLOCATED) && + (node_ptr->run_job_cnt == 0) ) { + bit_set(idle_node_bitmap, inx); + node_ptr->node_state = NODE_STATE_IDLE | no_resp_flag; } } diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 17e9591cf0..0f8d1bfbbd 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -751,8 +751,11 @@ int select_nodes(struct job_record *job_ptr, bool test_only) (job_ptr->time_limit > part_ptr->max_time)) || ((job_ptr->details->max_nodes != 0) && /* no node limit */ (job_ptr->details->max_nodes < part_ptr->min_nodes)) || - (job_ptr->details->min_nodes > part_ptr->max_nodes)) + (job_ptr->details->min_nodes > part_ptr->max_nodes)) { + job_ptr->priority = 1; /* move to end of queue */ + last_job_update = time(NULL); return ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE; + } /* build sets of usable nodes based upon their configuration */ error_code = _build_node_list(job_ptr, &node_set_ptr, &node_set_size); diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index 6e8cf3dc2c..a1a66ba295 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -731,6 +731,9 @@ int update_part(update_part_msg_t * part_desc) } } + if (error_code == SLURM_SUCCESS) + reset_job_priority(); /* free jobs */ + return error_code; } diff --git a/src/slurmctld/ping_nodes.c b/src/slurmctld/ping_nodes.c index 46212742d2..729d7f291c 100644 --- a/src/slurmctld/ping_nodes.c +++ b/src/slurmctld/ping_nodes.c @@ -107,8 +107,9 @@ void ping_end (void) void ping_nodes (void) { static int offset = 0; /* mutex via node table write lock on entry */ - int i, pos, age, retries = 0; - time_t now; + int i, pos, retries = 0; + time_t now, still_live_time, node_dead_time; + static time_t last_ping_time = (time_t) 0; uint16_t base_state, no_resp_flag; hostlist_t ping_hostlist = hostlist_create(""); hostlist_t reg_hostlist = hostlist_create(""); @@ -130,7 +131,24 @@ void ping_nodes (void) reg_agent_args = xmalloc (sizeof (agent_arg_t)); reg_agent_args->msg_type = REQUEST_NODE_REGISTRATION_STATUS; reg_agent_args->retry = 0; + + /* + * If there are a large number of down nodes, the node ping + * can take a long time to complete: + * ping_time = down_nodes * agent_timeout / agent_parallelism + * ping_time = down_nodes * 10_seconds / 10 + * ping_time = down_nodes (seconds) + * Because of this, we extend the SlurmdTimeout by the + * time needed to complete a ping of all nodes. + */ now = time (NULL); + if ( (slurmctld_conf.slurmd_timeout == 0) || + (last_ping_time == (time_t) 0) ) + node_dead_time = (time_t) 0; + else + node_dead_time = last_ping_time - slurmctld_conf.slurmd_timeout; + still_live_time = now - slurmctld_conf.heartbeat_interval; + last_ping_time = now; offset += MAX_REG_THREADS; if ((offset > node_record_count) && @@ -140,16 +158,15 @@ void ping_nodes (void) for (i = 0; i < node_record_count; i++) { struct node_record *node_ptr = &node_record_table_ptr[i]; - age = difftime (now, node_ptr->last_response); - if (age < slurmctld_conf.heartbeat_interval) + if (node_ptr->last_response >= still_live_time) continue; base_state = node_ptr->node_state & (~NODE_STATE_NO_RESPOND); no_resp_flag = node_ptr->node_state & NODE_STATE_NO_RESPOND; if ((node_ptr->last_response != (time_t)0) && - (slurmctld_conf.slurmd_timeout != 0) && - (age >= slurmctld_conf.slurmd_timeout) && + (node_ptr->last_response <= node_dead_time) && ((base_state != NODE_STATE_DOWN) && + (base_state != NODE_STATE_COMPLETING) && (base_state != NODE_STATE_DRAINED))) { error ("Node %s not responding, setting DOWN", node_ptr->name); diff --git a/src/slurmd/interconnect.h b/src/slurmd/interconnect.h index d700b78834..c5461df757 100644 --- a/src/slurmd/interconnect.h +++ b/src/slurmd/interconnect.h @@ -29,7 +29,6 @@ #ifndef _INTERCONNECT_H_ #define _INTERCONNECT_H_ -#include "src/common/slurm_protocol_api.h" #include "src/slurmd/job.h" /* diff --git a/src/slurmd/mgr.c b/src/slurmd/mgr.c index aa9f918d12..d62153cbcf 100644 --- a/src/slurmd/mgr.c +++ b/src/slurmd/mgr.c @@ -555,13 +555,19 @@ _create_job_session(slurmd_job_t *job) return ESLURMD_FORK_FAILED; } + /* + * If the created job terminates immediately, the shared memory + * record can be purged before we canset the mpid and sid below. + * This does not truly indicate an error condition, but a rare + * timing anomaly. Thus we log the event using debug() + */ job->jmgr_pid = getpid(); if (shm_update_step_mpid(job->jobid, job->stepid, getpid()) < 0) - error("shm_update_step_mpid: %m"); + debug("shm_update_step_mpid: %m"); job->smgr_pid = spid; if (shm_update_step_sid(job->jobid, job->stepid, spid) < 0) - error("shm_update_step_sid: %m"); + debug("shm_update_step_sid: %m"); /* * Read information from session manager slurmd diff --git a/src/slurmd/req.c b/src/slurmd/req.c index 6490dcb70c..7b09998321 100644 --- a/src/slurmd/req.c +++ b/src/slurmd/req.c @@ -78,6 +78,7 @@ static int _run_prolog(uint32_t jobid, uid_t uid); static int _run_epilog(uint32_t jobid, uid_t uid); static int _wait_for_procs(uint32_t job_id); +static pthread_mutex_t launch_mutex = PTHREAD_MUTEX_INITIALIZER; void slurmd_req(slurm_msg_t *msg, slurm_addr *cli) @@ -86,12 +87,16 @@ slurmd_req(slurm_msg_t *msg, slurm_addr *cli) switch(msg->msg_type) { case REQUEST_BATCH_JOB_LAUNCH: + slurm_mutex_lock(&launch_mutex); _rpc_batch_job(msg, cli); slurm_free_job_launch_msg(msg->data); + slurm_mutex_unlock(&launch_mutex); break; case REQUEST_LAUNCH_TASKS: + slurm_mutex_lock(&launch_mutex); _rpc_launch_tasks(msg, cli); slurm_free_launch_tasks_request_msg(msg->data); + slurm_mutex_unlock(&launch_mutex); break; case REQUEST_KILL_TASKS: _rpc_kill_tasks(msg, cli); diff --git a/src/slurmd/shm.c b/src/slurmd/shm.c index 5ab547f3cb..af4ac0f9fa 100644 --- a/src/slurmd/shm.c +++ b/src/slurmd/shm.c @@ -117,7 +117,8 @@ typedef struct shmem_struct { * static variables: * */ static sem_t *shm_lock; -static char *lockname; +static char *lockname; +static char *lockdir; static slurmd_shm_t *slurmd_shm; static int shmid; static pid_t attach_pid = (pid_t) 0; @@ -193,22 +194,25 @@ shm_fini(void) /* detach segment from local memory */ if (shmdt(slurmd_shm) < 0) { error("shmdt: %m"); - return -1; + goto error; } slurmd_shm = NULL; if (destroy && (shmctl(shmid, IPC_RMID, NULL) < 0)) { error("shmctl: %m"); - return -1; + goto error; } _shm_unlock(); if (destroy && (_shm_unlink_lock() < 0)) { error("_shm_unlink_lock: %m"); - return -1; + goto error; } return 0; + + error: + return -1; } void @@ -218,8 +222,13 @@ shm_cleanup(void) key_t key; int id = -1; - info("request to destroy shm lock [%s]", SHM_LOCKNAME); + if (!lockdir) + lockdir = xstrdup(conf->spooldir); + if ((s = _create_ipc_name(SHM_LOCKNAME))) { + + info("request to destroy shm lock [%s]", s); + key = ftok(s, 1); if (sem_unlink(s) < 0) error("sem_unlink: %m"); @@ -304,25 +313,24 @@ _is_valid_ipc_name(const char *name) return(1); } +/* + * Create IPC name by appending `name' to slurmd spooldir + * setting. + */ static char * _create_ipc_name(const char *name) { char *dst = NULL, *dir = NULL, *slash = NULL; int rc; + xassert (lockdir != NULL); + if ((rc = _is_valid_ipc_name(name)) != 1) fatal("invalid ipc name: `%s' %d", name, rc); else if (!(dst = xmalloc(PATH_MAX))) fatal("memory allocation failure"); -#if defined(POSIX_IPC_PREFIX) && defined(HAVE_POSIX_SEMS) - dir = POSIX_IPC_PREFIX; -#else - if ( !(dir = conf->spooldir) - && !(strlen(dir)) - && !(dir = getenv("TMPDIR"))) - dir = "/tmp"; -#endif /* POSIX_IPC_PREFIX */ + dir = lockdir; slash = (dir[strlen(dir) - 1] == '/') ? "" : "/"; @@ -1086,6 +1094,15 @@ _shm_lock_and_initialize() /* * Create locked semaphore (initial value == 0) */ + + /* + * Init lockdir to slurmd spooldir. + * Make sure it does not change for this instance of slurmd, + * even if spooldir does. + */ + if (!lockdir) + lockdir = xstrdup(conf->spooldir); + shm_lock = _sem_open(SHM_LOCKNAME, O_CREAT|O_EXCL, 0600, 0); debug3("slurmd lockfile is \"%s\"", lockname); diff --git a/src/slurmd/slurmd.c b/src/slurmd/slurmd.c index cc490b40e4..f72fe83282 100644 --- a/src/slurmd/slurmd.c +++ b/src/slurmd/slurmd.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -140,6 +141,10 @@ main (int argc, char *argv[]) log_init(argv[0], conf->log_opts, LOG_DAEMON, conf->logfile); + xsignal(SIGTERM, &_term_handler); + xsignal(SIGINT, &_term_handler); + xsignal(SIGHUP, &_hup_handler ); + /* * Run slurmd_init() here in order to report early errors * (with shared memory and public keyfile) @@ -163,6 +168,12 @@ main (int argc, char *argv[]) _kill_old_slurmd(); + /* + * Restore any saved revoked credential information + */ + if (_restore_cred_state(conf->vctx)) + return SLURM_FAILURE; + if (interconnect_node_init() < 0) fatal("Unable to initialize interconnect."); @@ -176,10 +187,6 @@ main (int argc, char *argv[]) if (send_registration_msg(SLURM_SUCCESS) < 0) error("Unable to register with slurm controller"); - xsignal(SIGTERM, &_term_handler); - xsignal(SIGINT, &_term_handler); - xsignal(SIGHUP, &_hup_handler ); - _install_fork_handlers(); list_install_fork_handlers(); @@ -203,34 +210,32 @@ main (int argc, char *argv[]) return 0; } + static void _msg_engine() { slurm_fd sock; - slurm_addr cli; - while (1) { - if (_shutdown) - break; - again: - if ((sock = slurm_accept_msg_conn(conf->lfd, &cli)) < 0) { - if (errno == EINTR) { - if (_shutdown) { - verbose("got shutdown request"); - break; - } - if (_reconfig) { - _reconfigure(); - verbose("got reconfigure request"); - } - goto again; - } - error("accept: %m"); + while (!_shutdown) { + slurm_addr *cli = xmalloc (sizeof (*cli)); + if ((sock = slurm_accept_msg_conn(conf->lfd, cli)) >= 0) { + _handle_connection(sock, cli); continue; } - if (sock > 0) - _handle_connection(sock, &cli); + /* + * Otherwise, accept() failed. + */ + xfree (cli); + if (errno == EINTR) { + if (_reconfig) { + verbose("got reconfigure request"); + _reconfigure(); + } + continue; + } + error("accept: %m"); } + verbose("got shutdown request"); slurm_shutdown_msg_engine(conf->lfd); return; } @@ -336,6 +341,7 @@ _service_connection(void *arg) error ("close(%d): %m", con->fd); done: + xfree(con->cli_addr); xfree(con); slurm_free_msg(msg); _decrement_thd_count(); @@ -345,21 +351,24 @@ _service_connection(void *arg) int send_registration_msg(uint32_t status) { + int retval = SLURM_SUCCESS; slurm_msg_t req; slurm_msg_t resp; - slurm_node_registration_status_msg_t msg; + slurm_node_registration_status_msg_t *msg = xmalloc (sizeof (*msg)); - _fill_registration_msg(&msg); - msg.status = status; + _fill_registration_msg(msg); + msg->status = status; req.msg_type = MESSAGE_NODE_REGISTRATION_STATUS; - req.data = &msg; + req.data = msg; if (slurm_send_recv_controller_msg(&req, &resp) < 0) { error("Unable to register: %m"); - return SLURM_FAILURE; + retval = SLURM_FAILURE; } + slurm_free_node_registration_status_msg (msg); + /* XXX look at response msg */ @@ -374,7 +383,7 @@ _fill_registration_msg(slurm_node_registration_status_msg_t *msg) job_step_t *s; int n; - msg->node_name = conf->hostname; + msg->node_name = xstrdup (conf->hostname); get_procs(&msg->cpus); get_memory(&msg->real_memory_size); @@ -642,10 +651,12 @@ _slurmd_init() return SLURM_FAILURE; /* - * Restore any saved revoked credential information + * Create slurmd spool directory if necessary. */ - if (_restore_cred_state(conf->vctx)) + if (_set_slurmd_spooldir() < 0) { + error("Unable to initialize slurmd spooldir"); return SLURM_FAILURE; + } /* * Cleanup shared memory if so configured @@ -662,18 +673,14 @@ _slurmd_init() /* * Initialize slurmd shared memory + * This *must* be called after _set_slurmd_spooldir() + * since the default location of the slurmd lockfile is + * _in_ the spooldir. + * */ if (shm_init(true) < 0) return SLURM_FAILURE; - /* - * Create slurmd spool directory if necessary. - */ - if (_set_slurmd_spooldir() < 0) { - error("Unable to initialize slurmd spooldir"); - return SLURM_FAILURE; - } - if (conf->daemonize && (chdir("/tmp") < 0)) { error("Unable to chdir to /tmp"); return SLURM_FAILURE; @@ -691,8 +698,8 @@ _restore_cred_state(slurm_cred_ctx_t ctx) int cred_fd, data_allocated, data_read = 0; Buf buffer = NULL; - if ((mkdir(conf->spooldir, 0755) < 0) && - (errno != EEXIST)) { + if ( (mkdir(conf->spooldir, 0755) < 0) + && (errno != EEXIST) ) { error("mkdir(%s): %m", conf->spooldir); return SLURM_ERROR; } @@ -727,7 +734,7 @@ static int _slurmd_fini() { save_cred_state(conf->vctx); - shm_fini(); + shm_fini(); return SLURM_SUCCESS; } diff --git a/src/slurmd/ulimits.c b/src/slurmd/ulimits.c index 92f4967d26..ca3155f99c 100644 --- a/src/slurmd/ulimits.c +++ b/src/slurmd/ulimits.c @@ -110,7 +110,8 @@ _set_limit(char **env, struct userlim *u) r.rlim_cur = (val == -1L) ? RLIM_INFINITY : (rlim_t) val; if (setrlimit(u->resource, &r) < 0) - error("setrlimit(%s,%ld): %m", name, (long)r.rlim_cur); + error("Can't propagate %s of %ld from submit host: %m", + name, (long)r.rlim_cur); } unsetenvp(env, u->var); diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 2cbb78979c..2a596d5fdf 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -123,8 +123,12 @@ existing_allocation(void) job.uid = getuid(); if (slurm_confirm_allocation(&job, &resp) < 0) { - error("Unable to confirm resource allocation for job %u: %m", - job.job_id); + if (errno == ESLURM_ALREADY_DONE) + error ("SLURM job %u has expired. Check for allocation or job " + "that has exceeded timelimit.", job.job_id); + else + error("Unable to confirm resource allocation for job %u: %m", + job.job_id); exit(1); } @@ -302,10 +306,10 @@ create_job_step(job_t *job) job_step_create_response_msg_t *resp = NULL; if (!(req = _step_req_create(job))) - job_fatal (job, "Unable to allocate step request message"); + fatal ("Unable to allocate step request message"); if ((slurm_job_step_create(req, &resp) < 0) || (resp == NULL)) - job_fatal (job, "Unable to create job step: %m"); + fatal ("Unable to create job step: %m"); job->stepid = resp->job_step_id; job->cred = resp->cred; diff --git a/src/srun/io.c b/src/srun/io.c index 08676aff8f..9926c156b5 100644 --- a/src/srun/io.c +++ b/src/srun/io.c @@ -593,9 +593,11 @@ _read_io_header(int fd, job_t *job, char *host) (hdr.type == SLURM_IO_STDERR ? "stderr" : "stdout"), host, hdr.taskid, fd ); + cbuf_destroy(cb); return SLURM_SUCCESS; fail: + cbuf_destroy(cb); close(fd); return SLURM_ERROR; } diff --git a/src/srun/job.c b/src/srun/job.c index cf27a2107c..d3b901a5d3 100644 --- a/src/srun/job.c +++ b/src/srun/job.c @@ -219,12 +219,15 @@ int job_rc(job_t *job) { int i; - int rc; + int rc = 0; - if (job->rc) return(job->rc); + if (job->rc >= 0) return(job->rc); - for (i = 0; i < opt.nprocs; i++) - job->rc |= job->tstatus[i]; + + for (i = 0; i < opt.nprocs; i++) { + if (job->rc < job->tstatus[i]) + job->rc = job->tstatus[i]; + } if ((rc = WEXITSTATUS(job->rc))) job->rc = rc; @@ -248,8 +251,12 @@ void job_fatal(job_t *job, const char *msg) void job_destroy(job_t *job, int error) { + if (job->removed) + return; + if (job->old_job) { debug("cancelling job step %u.%u", job->jobid, job->stepid); + slurm_kill_job_step(job->jobid, job->stepid, SIGKILL); slurm_complete_job_step(job->jobid, job->stepid, 0, error); } else if (!opt.no_alloc) { debug("cancelling job %u", job->jobid); @@ -263,6 +270,8 @@ job_destroy(job_t *job, int error) #ifdef HAVE_TOTALVIEW if (error) tv_launch_failure(); #endif + + job->removed = true; } @@ -389,7 +398,7 @@ _job_create_internal(allocation_info_t *info) job->state = SRUN_JOB_INIT; job->signaled = false; - job->rc = 0; + job->rc = -1; job->nodelist = xstrdup(info->nodelist); hl = hostlist_create(job->nodelist); @@ -398,6 +407,7 @@ _job_create_internal(allocation_info_t *info) job->jobid = info->jobid; job->stepid = info->stepid; job->old_job = false; + job->removed = false; /* * Initialize Launch and Exit timeout values diff --git a/src/srun/job.h b/src/srun/job.h index 6c254d331e..177f6c2b6b 100644 --- a/src/srun/job.h +++ b/src/srun/job.h @@ -75,6 +75,7 @@ typedef struct srun_job { uint32_t jobid; /* assigned job id */ uint32_t stepid; /* assigned step id */ bool old_job; /* run job step under previous allocation */ + bool removed; /* job has been removed from SLURM */ job_state_t state; /* job state */ pthread_mutex_t state_mutex; @@ -92,7 +93,7 @@ typedef struct srun_job { uint32_t **tids; /* host id => task ids mapping */ uint32_t *hostid; /* task id => host id mapping */ - slurm_addr *slurmd_addr;/* slurm_addr vector to slurmd's */ + slurm_addr *slurmd_addr;/* slurm_addr vector to slurmd's */ pthread_t sigid; /* signals thread tid */ diff --git a/src/srun/opt.c b/src/srun/opt.c index ca6b9fc401..460b220936 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -403,6 +403,8 @@ static void _opt_default() opt.join = false; opt.max_wait = slurm_get_wait_time(); + opt.quit_on_intr = false; + _verbose = 0; opt.slurmd_debug = LOG_LEVEL_QUIET; @@ -418,7 +420,7 @@ static void _opt_default() opt.exc_nodes = NULL; opt.max_launch_time = 60; /* 60 seconds to launch job */ opt.max_exit_timeout= 60; /* Warn user 60 seconds after task exit */ - opt.msg_timeout = 2; /* Default launch msg timeout */ + opt.msg_timeout = 5; /* Default launch msg timeout */ mode = MODE_NORMAL; @@ -618,6 +620,7 @@ static void _opt_args(int argc, char **argv) {"wait", required_argument, 0, 'W'}, {"exclude", required_argument, 0, 'x'}, {"no-allocate", no_argument, 0, 'Z'}, + {"quit-on-interrupt", no_argument, 0, 'q'}, {"contiguous", no_argument, 0, LONG_OPT_CONT}, {"mincpus", required_argument, 0, LONG_OPT_MINCPU}, @@ -631,7 +634,7 @@ static void _opt_args(int argc, char **argv) {"usage", no_argument, 0, LONG_OPT_USAGE} }; char *opt_string = "+a:Abc:C:d:D:e:Hi:IjJ:klm:n:N:" - "o:Op:r:st:T:uvVw:W:x:Z"; + "o:Op:r:st:T:uvVw:W:x:Zq"; char **rest = NULL; opt.progname = xbasename(argv[0]); @@ -792,6 +795,9 @@ static void _opt_args(int argc, char **argv) case (int)'Z': opt.no_alloc = true; break; + case (int)'q': + opt.quit_on_intr = true; + break; case LONG_OPT_CONT: opt.contiguous = true; break; diff --git a/src/srun/opt.h b/src/srun/opt.h index f141e2fb37..e01f39bcab 100644 --- a/src/srun/opt.h +++ b/src/srun/opt.h @@ -122,6 +122,7 @@ typedef struct srun_options { bool no_kill; /* --no-kill, -k */ bool share; /* --share, -s */ int max_wait; /* --wait, -W */ + bool quit_on_intr; /* --quit-on-interrupt, -q */ #ifdef HAVE_TOTALVIEW bool totalview; /* srun controlled by TotalView */ #endif diff --git a/src/srun/signals.c b/src/srun/signals.c index bd8c66dc74..cb5af5cca1 100644 --- a/src/srun/signals.c +++ b/src/srun/signals.c @@ -189,6 +189,10 @@ _sigterm_handler(int signum) static void _handle_intr(job_t *job, time_t *last_intr, time_t *last_intr_sent) { + if (opt.quit_on_intr) { + job_force_termination(job); + pthread_exit (0); + } if ((time(NULL) - *last_intr) > 1) { info("interrupt (one more within 1 sec to abort)");