This commit was manufactured by cvs2svn to create tag

'slurm-0-2-24-1'.
chaos · Feb 28, 2004 · b371864 · b371864
1 parent e5cea5e
commit b371864
Show file tree

Hide file tree

Showing 47 changed files with 394 additions and 687 deletions.
diff --git a/META b/META
@@ -7,6 +7,6 @@
   Name:		slurm
   Major:	0
   Minor:	2
-  Micro:        20
-  Version:	0.2.20
+  Micro:        24
+  Version:	0.2.24
   Release:	1
diff --git a/NEWS b/NEWS
@@ -1,6 +1,38 @@
 This file describes changes in recent versions of SLURM. It primarily
 documents those changes that are of interest to users and admins. 
 
+* Changes in SLURM 0.2.24
+=========================
+ -- Fixes for reported problems:
+   - slurm/387: Jobs lost and nodes DOWN on slurmctld restart
+ -- Nodes no longer transition from COMPLETING to DOWN when not responding.
+    Instead, slurmctld either waits for positive verification of job
+    completion, or an administrator must explicitly set the node DOWN.
+ -- New srun option `-q, --quit-on-interrupt' enables single Ctrl-C to
+    terminate running job.
+ -- Improved error message from srun when jobid set in environment is
+    no longer running.
+ -- Added job and node state descriptions to the squeue(1) and sinfo(1) 
+	man pages.
+
+* Changes in SLURM 0.2.23
+========================
+ -- Fixes for reported problems:
+   - slurm/381: Hold jobs requesting more resources than partition limit.
+
+* Changes in SLURM 0.2.22
+=========================
+ -- Fixes for reported problems:
+   - slurm/326: Node stays in completing state indefinitely.
+   - slurm/328: slurmd uses different shared memory key on restart.
+   - slurm/329: Job step processes may be left running when one task dies.
+   - slurm/334: slurmd segv with multiple simultaneous job steps.
+ -- Allow more digits for priority values in scontrol.
+ -- Applied various fixes for memory leaks.
+ -- Remove logic preventing DPCS from allocating jobs with more than 
+    eight node segments. Fix for DPCS should now be in production.
+ -- Changed compact string for DRAINING state to "drng" from "drain."
+
 * Changes in SLURM 0.2.21
 =========================
  -- Fixes for reported problems:
@@ -9,14 +41,17 @@ documents those changes that are of interest to users and admins.
    - slurm/300: Possibly killing wrong job on slurmd restart
    - slurm/312: Freeing non-allocated memory and killing slurmd
  -- Assorted changes to support RedHat Enterprise Linux 3.0 and IA64
+ -- Initial Elan4 and libelanctrl support (--with-elan).
  -- Slurmctld was sometimes inappropriately setting a job's priority 
     to 1 when a node was down (even if up nodes could be used for the 
     job when a running job completes)
  -- Convert all user commands from use of popt library to getopt_long()
  -- If TotalView support is requested, srun exports "totalview_jobid"
     variable for `%J' expansion in TV bulk launch string.
+ -- Fix several locking bugs in slurmd IO layer.
  -- Throttle back repetitious error messages in slurmd to avoid filling
-    slurm logfiles.
+    log files.
+
 
 * Changes in SLURM 0.2.20
 =========================

diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1
@@ -1,4 +1,4 @@
-.TH SINFO "1" "October 2003" "sinfo 0.2" "Slurm components"
+.TH SINFO "1" "February 2004" "sinfo 0.2" "Slurm components"
 
 .SH "NAME"
 sinfo \- Used to view information about Slurm nodes and partitions.
@@ -209,6 +209,52 @@ that are presently not responding.
 \fBTMP_DISK\fR
 Size of temporary disk space in megabytes on these nodes.
 
+.SH "NODE STATE CODES"
+.PP
+Node state codes are shorted as required for the field size.
+If the node state code is followed by "*", this indicates the node
+is presently not responding and will not be allocated any new work.
+If the node remains non-responsive, it will be placed in the \fBDOWN\fR
+state.
+.TP 12
+ALLOCATED
+The node has been allocated to one or more jobs.
+.TP
+COMPLETING
+One or more jobs have been allocated this node and are in the process
+of COMPLETING. This node state will be left when all of the job's
+processes have terminated and the SLURM epilog program (if any) has
+terminated. See the \fBEpilog\fR parameter description in the
+\fBslurm.conf\fR man page for more information.
+.TP
+DOWN
+The node is unavailable for use. SLURM can automatically place nodes
+in this state if some failure occurs. System administrators may also
+explicitly place nodes in this state. If a node resumes normal operation,
+SLURM can automatically return it to service. See the \fBReturnToService\fR
+and \fBSlurmdTimeout\fR parameter descriptions in the \fBslurm.conf\fR(5)
+man page for more information.
+.TP
+DRAINED
+The node is unavailable for use per system administrator request.
+See the \fBupdate node\fR command in the \fBscontrol\fR(1) man page
+or the \fBslurm.conf\fR(5) man page for more information.
+.TP
+DRAINING
+The node is currently executing a job, but will not be allocated to
+additional jobs. The node state will be changed to state \fBDRAINED\fR
+when the last job on it completes. Nodes enter this state per system
+administrator request. See the \fBupdate node\fR command in the
+\fBscontrol\fR(1) man page or the \fBslurm.conf\fR(5) man page for
+more information.
+.TP
+IDLE
+The node is not allocated to any jobs and is available for use.
+.TP
+UNKNOWN
+The SLURM controller has just started and the node's state has not
+yet been determined.
+
 .SH "ENVIRONMENT VARIABLES"
 .PP
 Some \fBsinfo\fR options may be set via environment variables. These 

diff --git a/doc/man/man1/squeue.1 b/doc/man/man1/squeue.1
@@ -1,4 +1,4 @@
-.TH SQUEUE "1" "October 2003" "squeue 0.2" "Slurm components"
+.TH SQUEUE "1" "February 2004" "squeue 0.2" "Slurm components"
 
 .SH "NAME"
 squeue \- Used to view information of jobs located in the scheduling queue.
@@ -148,6 +148,26 @@ Report details of squeues actions.
 \fB\-V\fR , \fB\-\-version\fR
 Print version information and exit.
 
+.SH "JOB STATE CODES"
+.TP 17
+CD  COMPLETED
+Job has terminated all processes on all nodes.
+.TP
+CG  COMPLETING
+Job is in the process of completing. Some processes on some nodes may still be active.
+.TP
+F   FAILED
+Job terminated with non-zero exit code or other failure condition.
+.TP
+NF  NODE_FAIL
+Job terminated due to failure of one or more allocated nodes.
+.TP
+PD  PENDING
+Job is awaiting resource allocation.
+.TP
+TO  TIMEOUT
+Job terminated upon reaching its time limit.
+
 .SH "ENVIRONMENT VARIABLES"
 .PP
 Some \fBsqueue\fR options may be set via environment variables. These 

diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
@@ -205,10 +205,15 @@ on the same nodes or the values of \fBSlurmctldPort\fR and \fBSlurmdPort\fR
 must be different.
 .TP
 \fBSlurmdSpoolDir\fR
-Fully qualified pathname of a file into which the \fBslurmd\fR daemon's state 
-information is written. This must be a common pathname for all nodes, but 
-should represent a file which is local to each node (reference a local file
-system). The default value is "/tmp/slurmd".
+Fully qualified pathname of a directory into which the \fBslurmd\fR
+daemon's state information and batch job script information are written. This
+must be a common pathname for all nodes, but should represent a directory which
+is local to each node (reference a local file system). The default value
+is "/var/spool/slurmd." \fBNOTE\fR: This directory is also used to store \fBslurmd\fR's
+shared memory lockfile, and \fBshould not be changed\fR unless the system
+is being cleanly restarted. If the location of \fBSlurmdSpoolDir\fR is
+changed and \fBslurmd\fR is restarted, the new daemon will attach to a
+different shared memory region and lose track of any running jobs.
 .TP
 \fBSlurmdTimeout\fR
 The interval, in seconds, that the SLURM controller waits for \fBslurmd\fR 
@@ -228,7 +233,7 @@ Fully qualified pathname of the file system available to user jobs for
 temporary storage. This parameter is used in establishing a node's \fBTmpDisk\fR space. 
 The default value is "/tmp".
 .TP
-\fBWaitTimefR
+\fBWaitTime\fR
 Specifies how many seconds the srun command should by default wait after 
 the first task terminates before terminating all remaining tasks. The 
 "--wait" option on the srun command line overrides this value. 

diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
@@ -796,7 +796,8 @@ extern int slurm_update_node PARAMS(( update_node_msg_t * node_msg ));
  *	default values 
  * OUT job_desc_msg - user defined partition descriptor
  */
-void slurm_init_part_desc_msg PARAMS((update_part_msg_t * update_part_msg ));
+extern void slurm_init_part_desc_msg PARAMS(( 
+			update_part_msg_t * update_part_msg ));
 
 /*
  * slurm_load_partitions - issue RPC to get slurm all partition configuration  

diff --git a/src/api/Makefile.am b/src/api/Makefile.am
@@ -43,19 +43,22 @@ libslurm_la_SOURCES =    \
 
 common_dir = $(top_builddir)/src/common
 
-libslurm_la_LIBADD         = $(common_dir)/libcommon.la -lpthread
+libslurm_la_LIBADD         = \
+	$(common_dir)/libcommon.la -lpthread
 
-libslurm_la_LDFLAGS        = -export-symbols libslurm.sym \
-		             -version-info $(current):$(rev):$(age)
+libslurm_la_LDFLAGS        = \
+	-export-symbols libslurm.sym           \
+	-version-info $(current):$(rev):$(age) 
 
-libslurm_la_DEPENDENCIES   = libslurm.sym $(common_dir)/libcommon.la
+libslurm_la_DEPENDENCIES   = \
+	libslurm.sym \
+	$(common_dir)/libcommon.la
 
-
-libslurm.sym : $(top_builddir)/slurm/slurm.h 
-	sed -n 's/^extern.* \(slurm[^ ]*\).*$$/\1/p' $< >libslurm.sym
+libslurm.sym : $(top_builddir)/slurm/slurm.h
+	-sed -n 's/^extern .* \([a-zA-Z0-9_]*\) PARAMS.*$$/\1/p' $< $* >libslurm.sym
 
 distclean-local: 
-	-rm libslurm.sym
+	-rm -rf libslurm.map libslurm.sym
 
 force:
 $(libslurm_la_LIBADD) : force

diff --git a/src/api/allocate.c b/src/api/allocate.c
@@ -89,6 +89,7 @@ slurm_allocate_resources (job_desc_msg_t *req,
 	if (rc == SLURM_SOCKET_ERROR) 
 		return SLURM_SOCKET_ERROR;
 
+	slurm_free_cred(resp_msg.cred);
 	switch (resp_msg.msg_type) {
 	case RESPONSE_SLURM_RC:
 		if (_handle_rc_msg(&resp_msg) < 0)
@@ -127,6 +128,7 @@ int slurm_job_will_run (job_desc_msg_t *req,
 	if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
 		return SLURM_SOCKET_ERROR;
 
+	slurm_free_cred(resp_msg.cred);
 	switch (resp_msg.msg_type) {
 	case RESPONSE_SLURM_RC:
 		if (_handle_rc_msg(&resp_msg) < 0)
@@ -183,7 +185,7 @@ slurm_allocate_resources_and_run (job_desc_msg_t *req,
 	if (rc == SLURM_SOCKET_ERROR) 
 		return SLURM_SOCKET_ERROR;
 
-
+	slurm_free_cred(resp_msg.cred);
 	switch (resp_msg.msg_type) {
 	case RESPONSE_SLURM_RC:
 		if (_handle_rc_msg(&resp_msg) < 0)
@@ -222,6 +224,7 @@ slurm_job_step_create (job_step_create_request_msg_t *req,
 	if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
 		return SLURM_ERROR;
 
+	slurm_free_cred(resp_msg.cred);
 	switch (resp_msg.msg_type) {
 	case RESPONSE_SLURM_RC:
 		if (_handle_rc_msg(&resp_msg) < 0)
@@ -259,6 +262,7 @@ slurm_confirm_allocation (old_job_alloc_msg_t *req,
 	if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
 		return SLURM_ERROR;
 
+	slurm_free_cred(resp_msg.cred);
 	switch(resp_msg.msg_type) {
 	case RESPONSE_SLURM_RC:
 		if (_handle_rc_msg(&resp_msg) < 0)

diff --git a/src/api/cancel.c b/src/api/cancel.c
@@ -73,7 +73,8 @@ slurm_kill_job_step (uint32_t job_id, uint32_t step_id, uint16_t signal)
 	if (slurm_send_recv_controller_rc_msg(&msg, &rc) < 0)
 		return SLURM_FAILURE;
 
-	if (rc) slurm_seterrno_ret(rc);
+	if (rc)
+		slurm_seterrno_ret(rc);
 
 	return SLURM_SUCCESS;
 }
diff --git a/src/api/complete.c b/src/api/complete.c
@@ -85,7 +85,8 @@ slurm_complete_job_step ( uint32_t job_id, uint32_t step_id,
 	if (slurm_send_recv_controller_rc_msg(&req_msg, &rc) < 0)
 	       return SLURM_ERROR;	
 
-	if (rc) slurm_seterrno_ret(rc);
+	if (rc)
+		slurm_seterrno_ret(rc);
 
 	return SLURM_PROTOCOL_SUCCESS;
 }
diff --git a/src/api/config_info.c b/src/api/config_info.c
@@ -151,10 +151,10 @@ slurm_load_ctl_conf (time_t update_time, slurm_ctl_conf_t **confp)
 	if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) 
 		return SLURM_ERROR;
 
+	slurm_free_cred(resp_msg.cred);
 	switch (resp_msg.msg_type) {
 	case RESPONSE_BUILD_INFO:
 		*confp = (slurm_ctl_conf_info_msg_t *) resp_msg.data;
-		slurm_free_cred(resp_msg.cred);
 		break;
 	case RESPONSE_SLURM_RC:
 		rc = ((return_code_msg_t *) resp_msg.data)->return_code;

diff --git a/src/api/job_info.c b/src/api/job_info.c
@@ -219,10 +219,10 @@ slurm_load_jobs (time_t update_time, job_info_msg_t **resp)
 	if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
 		return SLURM_ERROR;
 
+	slurm_free_cred(resp_msg.cred);
 	switch (resp_msg.msg_type) {
 	case RESPONSE_JOB_INFO:
 		*resp = (job_info_msg_t *)resp_msg.data;
-		slurm_free_cred(resp_msg.cred);
 		break;
 	case RESPONSE_SLURM_RC:
 		rc = ((return_code_msg_t *) resp_msg.data)->return_code;
@@ -267,6 +267,7 @@ slurm_pid2jobid (pid_t job_pid, uint32_t *jobid)
 	if (slurm_send_recv_node_msg(&req_msg, &resp_msg, 0) < 0)
 		return SLURM_ERROR;
 
+	slurm_free_cred(resp_msg.cred);
 	switch (resp_msg.msg_type) {
 	case RESPONSE_JOB_ID:
 		*jobid = ((job_id_response_msg_t *) resp_msg.data)->job_id;

diff --git a/src/api/job_step_info.c b/src/api/job_step_info.c
@@ -125,10 +125,10 @@ slurm_get_job_steps (time_t update_time, uint32_t job_id, uint32_t step_id,
 	if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
 		return SLURM_ERROR;
 
+	slurm_free_cred(resp_msg.cred);
 	switch (resp_msg.msg_type) {
 	case RESPONSE_JOB_STEP_INFO:
 		*resp = (job_step_info_response_msg_t *) resp_msg.data;
-		slurm_free_cred(resp_msg.cred);
 		break;
 	case RESPONSE_SLURM_RC:
 		rc = ((return_code_msg_t *) resp_msg.data)->return_code;

diff --git a/src/api/node_info.c b/src/api/node_info.c
@@ -129,10 +129,10 @@ slurm_load_node (time_t update_time, node_info_msg_t **resp)
 	if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
 		return SLURM_ERROR;
 
+	slurm_free_cred(resp_msg.cred);
 	switch (resp_msg.msg_type) {
 	case RESPONSE_NODE_INFO:
 		*resp = (node_info_msg_t *) resp_msg.data;
-		slurm_free_cred(resp_msg.cred);
 		break;
 	case RESPONSE_SLURM_RC:
 		rc = ((return_code_msg_t *) resp_msg.data)->return_code;

diff --git a/src/api/partition_info.c b/src/api/partition_info.c
@@ -161,10 +161,10 @@ slurm_load_partitions (time_t update_time, partition_info_msg_t **resp)
 	if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
 		return SLURM_ERROR;
 
+	slurm_free_cred(resp_msg.cred);
 	switch (resp_msg.msg_type) {
 	case RESPONSE_PARTITION_INFO:
 		*resp = (partition_info_msg_t *) resp_msg.data;
-		slurm_free_cred(resp_msg.cred);
 		break;
 	case RESPONSE_SLURM_RC:
 		rc = ((return_code_msg_t *) resp_msg.data)->return_code;

diff --git a/src/api/reconfigure.c b/src/api/reconfigure.c
@@ -58,7 +58,8 @@ slurm_reconfigure ( void )
 	if (slurm_send_recv_controller_rc_msg(&req, &rc) < 0)
 		return SLURM_ERROR;
 
-	if (rc) slurm_seterrno_ret(rc);
+	if (rc)
+		slurm_seterrno_ret(rc);
 
 	return SLURM_PROTOCOL_SUCCESS;
 }
@@ -129,6 +130,7 @@ _send_message_controller (enum controller_id dest, slurm_msg_t *req)
 	if ((rc = slurm_receive_msg(fd, &resp_msg, 0)) < 0)
 		slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR);
 
+	slurm_free_cred(resp_msg.cred);
 	if (slurm_shutdown_msg_conn(fd) != SLURM_SUCCESS)
 		slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR);
 

diff --git a/src/api/submit.c b/src/api/submit.c
@@ -87,6 +87,7 @@ slurm_submit_batch_job (job_desc_msg_t *req,
 	if (rc == SLURM_SOCKET_ERROR)
 		return SLURM_ERROR;
 
+	slurm_free_cred(resp_msg.cred);
 	switch (resp_msg.msg_type) {
 	case RESPONSE_SLURM_RC:
 		rc = ((return_code_msg_t *) resp_msg.data)->return_code;