Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

This commit was manufactured by cvs2svn to create tag

'slurm-0-2-23-0'.
  • Loading branch information...
commit ed247e5a7f085dfd49bf84b290de788b2b04387f 1 parent 447bb06
no author authored
Showing with 216 additions and 136 deletions.
  1. +3 −3 META
  2. +21 −1 NEWS
  3. +10 −5 doc/man/man5/slurm.conf.5
  4. +5 −1 src/api/allocate.c
  5. +2 −1  src/api/cancel.c
  6. +2 −1  src/api/complete.c
  7. +1 −1  src/api/config_info.c
  8. +2 −1  src/api/job_info.c
  9. +1 −1  src/api/job_step_info.c
  10. +1 −1  src/api/node_info.c
  11. +1 −1  src/api/partition_info.c
  12. +3 −1 src/api/reconfigure.c
  13. +1 −0  src/api/submit.c
  14. +6 −7 src/common/log.c
  15. +8 −3 src/common/slurm_auth.c
  16. +1 −1  src/common/slurm_errno.c
  17. +1 −1  src/common/slurm_protocol_defs.c
  18. +0 −4 src/common/slurm_protocol_pack.c
  19. +1 −0  src/common/xsignal.c
  20. +1 −0  src/plugins/auth/auth_munge.c
  21. +1 −1  src/scontrol/scontrol.c
  22. +2 −22 src/slurmctld/job_mgr.c
  23. +20 −11 src/slurmctld/node_mgr.c
  24. +4 −1 src/slurmctld/node_scheduler.c
  25. +3 −0  src/slurmctld/partition_mgr.c
  26. +0 −1  src/slurmd/interconnect.h
  27. +8 −2 src/slurmd/mgr.c
  28. +5 −0 src/slurmd/req.c
  29. +30 −13 src/slurmd/shm.c
  30. +50 −43 src/slurmd/slurmd.c
  31. +2 −1  src/slurmd/ulimits.c
  32. +2 −0  src/srun/io.c
  33. +15 −5 src/srun/job.c
  34. +2 −1  src/srun/job.h
  35. +1 −1  src/srun/opt.c
6 META
View
@@ -7,6 +7,6 @@
Name: slurm
Major: 0
Minor: 2
- Micro: 21
- Version: 0.2.21
- Release: 1
+ Micro: 23
+ Version: 0.2.23
+ Release: 0
22 NEWS
View
@@ -1,6 +1,24 @@
This file describes changes in recent versions of SLURM. It primarily
documents those changes that are of interest to users and admins.
+* Changes in SLURM 0.2.23
+=========================
+ -- Fixes for reported problems:
+ - slurm/381: Hold jobs requesting more resources than partition limit
+
+* Changes in SLURM 0.2.22
+=========================
+ -- Fixes for reported problems:
+ - slurm/326: Node stays in completing state indefinitely.
+ - slurm/328: slurmd uses different shared memory key on restart.
+ - slurm/329: Job step processes may be left running when one task dies.
+ - slurm/334: slurmd segv with multiple simultaneous job steps.
+ -- Allow more digits for priority values in scontrol.
+ -- Applied various fixes for memory leaks.
+ -- Remove logic preventing DPCS from allocating jobs with more than
+ eight node segments. Fix for DPCS should now be in production.
+ -- Changed compact string for DRAINING state to "drng" from "drain."
+
* Changes in SLURM 0.2.21
=========================
-- Fixes for reported problems:
@@ -9,6 +27,7 @@ documents those changes that are of interest to users and admins.
- slurm/300: Possibly killing wrong job on slurmd restart
- slurm/312: Freeing non-allocated memory and killing slurmd
-- Assorted changes to support RedHat Enterprise Linux 3.0 and IA64
+ -- Initial Elan4 and libelanctrl support (--with-elan).
-- Slurmctld was sometimes inappropriately setting a job's priority
to 1 when a node was down (even if up nodes could be used for the
job when a running job completes)
@@ -17,7 +36,8 @@ documents those changes that are of interest to users and admins.
variable for `%J' expansion in TV bulk launch string.
-- Fix several locking bugs in slurmd IO layer.
-- Throttle back repetitious error messages in slurmd to avoid filling
- slurm logfiles.
+ log files.
+
* Changes in SLURM 0.2.20
=========================
15 doc/man/man5/slurm.conf.5
View
@@ -205,10 +205,15 @@ on the same nodes or the values of \fBSlurmctldPort\fR and \fBSlurmdPort\fR
must be different.
.TP
\fBSlurmdSpoolDir\fR
-Fully qualified pathname of a file into which the \fBslurmd\fR daemon's state
-information is written. This must be a common pathname for all nodes, but
-should represent a file which is local to each node (reference a local file
-system). The default value is "/tmp/slurmd".
+Fully qualified pathname of a directory into which the \fBslurmd\fR
+daemon's state information and batch job script information are written. This
+must be a common pathname for all nodes, but should represent a directory which
+is local to each node (reference a local file system). The default value
+is "/var/spool/slurmd." \fBNOTE\fR: This directory is also used to store \fBslurmd\fR's
+shared memory lockfile, and \fBshould not be changed\fR unless the system
+is being cleanly restarted. If the location of \fBSlurmdSpoolDir\fR is
+changed and \fBslurmd\fR is restarted, the new daemon will attach to a
+different shared memory region and lose track of any running jobs.
.TP
\fBSlurmdTimeout\fR
The interval, in seconds, that the SLURM controller waits for \fBslurmd\fR
@@ -228,7 +233,7 @@ Fully qualified pathname of the file system available to user jobs for
temporary storage. This parameter is used in establishing a node's \fBTmpDisk\fR space.
The default value is "/tmp".
.TP
-\fBWaitTimefR
+\fBWaitTime\fR
Specifies how many seconds the srun command should by default wait after
the first task terminates before terminating all remaining tasks. The
"--wait" option on the srun command line overrides this value.
6 src/api/allocate.c
View
@@ -89,6 +89,7 @@ slurm_allocate_resources (job_desc_msg_t *req,
if (rc == SLURM_SOCKET_ERROR)
return SLURM_SOCKET_ERROR;
+ slurm_free_cred(resp_msg.cred);
switch (resp_msg.msg_type) {
case RESPONSE_SLURM_RC:
if (_handle_rc_msg(&resp_msg) < 0)
@@ -127,6 +128,7 @@ int slurm_job_will_run (job_desc_msg_t *req,
if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
return SLURM_SOCKET_ERROR;
+ slurm_free_cred(resp_msg.cred);
switch (resp_msg.msg_type) {
case RESPONSE_SLURM_RC:
if (_handle_rc_msg(&resp_msg) < 0)
@@ -183,7 +185,7 @@ slurm_allocate_resources_and_run (job_desc_msg_t *req,
if (rc == SLURM_SOCKET_ERROR)
return SLURM_SOCKET_ERROR;
-
+ slurm_free_cred(resp_msg.cred);
switch (resp_msg.msg_type) {
case RESPONSE_SLURM_RC:
if (_handle_rc_msg(&resp_msg) < 0)
@@ -222,6 +224,7 @@ slurm_job_step_create (job_step_create_request_msg_t *req,
if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
return SLURM_ERROR;
+ slurm_free_cred(resp_msg.cred);
switch (resp_msg.msg_type) {
case RESPONSE_SLURM_RC:
if (_handle_rc_msg(&resp_msg) < 0)
@@ -259,6 +262,7 @@ slurm_confirm_allocation (old_job_alloc_msg_t *req,
if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
return SLURM_ERROR;
+ slurm_free_cred(resp_msg.cred);
switch(resp_msg.msg_type) {
case RESPONSE_SLURM_RC:
if (_handle_rc_msg(&resp_msg) < 0)
3  src/api/cancel.c
View
@@ -73,7 +73,8 @@ slurm_kill_job_step (uint32_t job_id, uint32_t step_id, uint16_t signal)
if (slurm_send_recv_controller_rc_msg(&msg, &rc) < 0)
return SLURM_FAILURE;
- if (rc) slurm_seterrno_ret(rc);
+ if (rc)
+ slurm_seterrno_ret(rc);
return SLURM_SUCCESS;
}
3  src/api/complete.c
View
@@ -85,7 +85,8 @@ slurm_complete_job_step ( uint32_t job_id, uint32_t step_id,
if (slurm_send_recv_controller_rc_msg(&req_msg, &rc) < 0)
return SLURM_ERROR;
- if (rc) slurm_seterrno_ret(rc);
+ if (rc)
+ slurm_seterrno_ret(rc);
return SLURM_PROTOCOL_SUCCESS;
}
2  src/api/config_info.c
View
@@ -151,10 +151,10 @@ slurm_load_ctl_conf (time_t update_time, slurm_ctl_conf_t **confp)
if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
return SLURM_ERROR;
+ slurm_free_cred(resp_msg.cred);
switch (resp_msg.msg_type) {
case RESPONSE_BUILD_INFO:
*confp = (slurm_ctl_conf_info_msg_t *) resp_msg.data;
- slurm_free_cred(resp_msg.cred);
break;
case RESPONSE_SLURM_RC:
rc = ((return_code_msg_t *) resp_msg.data)->return_code;
3  src/api/job_info.c
View
@@ -219,10 +219,10 @@ slurm_load_jobs (time_t update_time, job_info_msg_t **resp)
if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
return SLURM_ERROR;
+ slurm_free_cred(resp_msg.cred);
switch (resp_msg.msg_type) {
case RESPONSE_JOB_INFO:
*resp = (job_info_msg_t *)resp_msg.data;
- slurm_free_cred(resp_msg.cred);
break;
case RESPONSE_SLURM_RC:
rc = ((return_code_msg_t *) resp_msg.data)->return_code;
@@ -267,6 +267,7 @@ slurm_pid2jobid (pid_t job_pid, uint32_t *jobid)
if (slurm_send_recv_node_msg(&req_msg, &resp_msg, 0) < 0)
return SLURM_ERROR;
+ slurm_free_cred(resp_msg.cred);
switch (resp_msg.msg_type) {
case RESPONSE_JOB_ID:
*jobid = ((job_id_response_msg_t *) resp_msg.data)->job_id;
2  src/api/job_step_info.c
View
@@ -125,10 +125,10 @@ slurm_get_job_steps (time_t update_time, uint32_t job_id, uint32_t step_id,
if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
return SLURM_ERROR;
+ slurm_free_cred(resp_msg.cred);
switch (resp_msg.msg_type) {
case RESPONSE_JOB_STEP_INFO:
*resp = (job_step_info_response_msg_t *) resp_msg.data;
- slurm_free_cred(resp_msg.cred);
break;
case RESPONSE_SLURM_RC:
rc = ((return_code_msg_t *) resp_msg.data)->return_code;
2  src/api/node_info.c
View
@@ -129,10 +129,10 @@ slurm_load_node (time_t update_time, node_info_msg_t **resp)
if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
return SLURM_ERROR;
+ slurm_free_cred(resp_msg.cred);
switch (resp_msg.msg_type) {
case RESPONSE_NODE_INFO:
*resp = (node_info_msg_t *) resp_msg.data;
- slurm_free_cred(resp_msg.cred);
break;
case RESPONSE_SLURM_RC:
rc = ((return_code_msg_t *) resp_msg.data)->return_code;
2  src/api/partition_info.c
View
@@ -161,10 +161,10 @@ slurm_load_partitions (time_t update_time, partition_info_msg_t **resp)
if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
return SLURM_ERROR;
+ slurm_free_cred(resp_msg.cred);
switch (resp_msg.msg_type) {
case RESPONSE_PARTITION_INFO:
*resp = (partition_info_msg_t *) resp_msg.data;
- slurm_free_cred(resp_msg.cred);
break;
case RESPONSE_SLURM_RC:
rc = ((return_code_msg_t *) resp_msg.data)->return_code;
4 src/api/reconfigure.c
View
@@ -58,7 +58,8 @@ slurm_reconfigure ( void )
if (slurm_send_recv_controller_rc_msg(&req, &rc) < 0)
return SLURM_ERROR;
- if (rc) slurm_seterrno_ret(rc);
+ if (rc)
+ slurm_seterrno_ret(rc);
return SLURM_PROTOCOL_SUCCESS;
}
@@ -129,6 +130,7 @@ _send_message_controller (enum controller_id dest, slurm_msg_t *req)
if ((rc = slurm_receive_msg(fd, &resp_msg, 0)) < 0)
slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR);
+ slurm_free_cred(resp_msg.cred);
if (slurm_shutdown_msg_conn(fd) != SLURM_SUCCESS)
slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR);
1  src/api/submit.c
View
@@ -87,6 +87,7 @@ slurm_submit_batch_job (job_desc_msg_t *req,
if (rc == SLURM_SOCKET_ERROR)
return SLURM_ERROR;
+ slurm_free_cred(resp_msg.cred);
switch (resp_msg.msg_type) {
case RESPONSE_SLURM_RC:
rc = ((return_code_msg_t *) resp_msg.data)->return_code;
13 src/common/log.c
View
@@ -227,13 +227,13 @@ int log_init(char *prog, log_options_t opt, log_facility_t fac, char *logfile)
void log_fini()
{
- if (!log) return;
+ if (!log)
+ return;
+
log_flush();
slurm_mutex_lock(&log_lock);
- if (log->argv0)
- xfree(log->argv0);
- if (log->fpfx)
- xfree(log->fpfx);
+ xfree(log->argv0);
+ xfree(log->fpfx);
if (log->buf)
cbuf_destroy(log->buf);
if (log->fbuf)
@@ -254,8 +254,7 @@ void log_reinit()
void log_set_fpfx(char *prefix)
{
slurm_mutex_lock(&log_lock);
- if (log->fpfx)
- xfree(log->fpfx);
+ xfree(log->fpfx);
if (!prefix)
log->fpfx = xstrdup("");
else {
11 src/common/slurm_auth.c
View
@@ -243,7 +243,7 @@ slurm_auth_context_create( const char *auth_type )
c->auth_errno = SLURM_SUCCESS;
/* Copy the authentication type. */
- c->auth_type = strdup( auth_type );
+ c->auth_type = xstrdup( auth_type );
if ( c->auth_type == NULL ) {
debug3( "can't make local copy of authentication type" );
xfree( c );
@@ -299,7 +299,7 @@ slurm_auth_context_destroy( slurm_auth_context_t c )
}
}
- free( c->auth_type );
+ xfree( c->auth_type );
xfree( c );
return SLURM_SUCCESS;
@@ -340,7 +340,12 @@ slurm_auth_fini( void )
if ( g_context )
slurm_auth_context_destroy( g_context );
- free_slurm_conf( &conf );
+ slurm_mutex_lock( &config_lock );
+ if ( conf.slurmd_port ) {
+ free_slurm_conf( &conf );
+ conf.slurmd_port = 0;
+ }
+ slurm_mutex_unlock( &config_lock );
}
/*
2  src/common/slurm_errno.c
View
@@ -92,7 +92,7 @@ static slurm_errtab_t slurm_errtab[] = {
{ ESLURM_INVALID_PARTITION_NAME,
"Invalid partition name specified" },
{ ESLURM_DEFAULT_PARTITION_NOT_SET,
- "System default partition not set" },
+ "No partition specified or system default partition" },
{ ESLURM_ACCESS_DENIED,
"Access denied" },
{ ESLURM_JOB_MISSING_REQUIRED_PARTITION_GROUP,
2  src/common/slurm_protocol_defs.c
View
@@ -372,7 +372,7 @@ char *node_state_string_compact(enum node_states inx)
"IDLE",
"ALLOC",
"DRAIN",
- "DRAIN",
+ "DRNG",
"COMP",
"END"
};
4 src/common/slurm_protocol_pack.c
View
@@ -796,10 +796,6 @@ _unpack_resource_allocation_response_msg(resource_allocation_response_msg_t
safe_unpack16(&tmp_ptr->num_cpu_groups, buffer);
if (tmp_ptr->num_cpu_groups > 0) {
- tmp_ptr->cpus_per_node = (uint32_t *)
- xmalloc(sizeof(uint32_t) * tmp_ptr->num_cpu_groups);
- tmp_ptr->cpu_count_reps = (uint32_t *)
- xmalloc(sizeof(uint32_t) * tmp_ptr->num_cpu_groups);
safe_unpack32_array((uint32_t **) &
(tmp_ptr->cpus_per_node), &uint32_tmp,
buffer);
1  src/common/xsignal.c
View
@@ -91,6 +91,7 @@ xsignal_sigset_create(int sigarray[], sigset_t *setp)
int
xsignal_save_mask(sigset_t *set)
{
+ sigemptyset(set);
return _sigmask(SIG_SETMASK, NULL, set);
}
1  src/plugins/auth/auth_munge.c
View
@@ -462,6 +462,7 @@ _decode_cred(char *m, slurm_auth_credential_t *c)
* Block all signals to allow munge_decode() to proceed
* uninterrupted. (Testing for gnats slurm/223)
*/
+ sigemptyset(&oset);
sigfillset(&set);
sigdelset(&set, SIGABRT);
sigdelset(&set, SIGSEGV);
2  src/scontrol/scontrol.c
View
@@ -1268,7 +1268,7 @@ _update_job (int argc, char *argv[])
}
else if (strncasecmp(argv[i], "Priority=", 9) == 0)
job_msg.priority =
- (uint32_t) strtol(&argv[i][9],
+ (uint32_t) strtoll(&argv[i][9],
(char **) NULL, 10);
else if (strncasecmp(argv[i], "ReqProcs=", 9) == 0)
job_msg.num_procs =
24 src/slurmctld/job_mgr.c
View
@@ -122,7 +122,6 @@ static void _reset_step_bitmaps(struct job_record *job_ptr);
static void _set_job_id(struct job_record *job_ptr);
static void _set_job_prio(struct job_record *job_ptr);
static bool _slurm_picks_nodes(job_desc_msg_t * job_specs);
-static bool _too_many_fragments(bitstr_t *req_bitmap);
static bool _top_priority(struct job_record *job_ptr);
static int _validate_job_create_req(job_desc_msg_t * job_desc);
static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate,
@@ -1415,10 +1414,6 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id,
error_code = ESLURM_REQUESTED_NODES_NOT_IN_PARTITION;
goto cleanup;
}
- if (_too_many_fragments(req_bitmap)) {
- error_code = ESLURM_TOO_MANY_REQUESTED_NODES;
- goto cleanup;
- }
i = count_cpus(req_bitmap);
if (i > job_desc->num_procs)
job_desc->num_procs = i;
@@ -1522,6 +1517,8 @@ static int _job_create(job_desc_msg_t * job_desc, uint32_t * new_job_id,
*new_job_id, part_ptr->name);
error_code = ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE;
}
+ if (error_code == ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE)
+ (*job_rec_ptr)->priority = 1; /* Move to end of queue */
cleanup:
FREE_NULL_BITMAP(req_bitmap);
@@ -3196,20 +3193,3 @@ void job_fini (void)
xfree(job_hash_over);
}
-static bool _too_many_fragments(bitstr_t *req_bitmap)
-{
-#ifdef MAX_NODE_FRAGMENTS
- int i, frags=0;
- int last_bit = 0, next_bit;
-
- for (i = 0; i < node_record_count; i++) {
- next_bit = bit_test(req_bitmap, i);
- if (next_bit == last_bit)
- continue;
- last_bit = next_bit;
- if (next_bit && (++frags > MAX_NODE_FRAGMENTS))
- return true;
- }
-#endif
- return false;
-}
31 src/slurmctld/node_mgr.c
View
@@ -332,16 +332,21 @@ int dump_all_node_state ( void )
error ("Can't save state, error creating file %s %m",
new_file);
error_code = errno;
- }
- else {
- if (write (log_fd, get_buf_data(buffer),
- get_buf_offset(buffer)) !=
- get_buf_offset(buffer)) {
- error ("Can't save state, error writing file %s %m",
- new_file);
- error_code = errno;
+ } else {
+ int pos = 0, nwrite = get_buf_offset(buffer), amount;
+ char *data = (char *)get_buf_data(buffer);
+
+ while (nwrite > 0) {
+ amount = write(log_fd, &data[pos], nwrite);
+ if ((amount < 0) && (errno != EINTR)) {
+ error("Error writing file %s, %m", new_file);
+ error_code = errno;
+ break;
+ }
+ nwrite -= amount;
+ pos += amount;
}
- close (log_fd);
+ close(log_fd);
}
if (error_code)
(void) unlink (new_file);
@@ -1153,7 +1158,7 @@ validate_node_specs (char *node_name, uint32_t cpus,
#endif
if (node_ptr->node_state == NODE_STATE_UNKNOWN) {
reset_job_priority();
- info ("validate_node_specs: node %s has registered",
+ debug("validate_node_specs: node %s has registered",
node_name);
if (job_count)
node_ptr->node_state = NODE_STATE_ALLOCATED;
@@ -1491,9 +1496,13 @@ void make_node_comp(struct node_record *node_ptr)
node_ptr->name,
node_state_string((enum node_states)
node_ptr->node_state));
- } else {
+ } else if (!no_resp_flag) {
node_ptr->node_state = NODE_STATE_COMPLETING | no_resp_flag;
xfree(node_ptr->reason);
+ } else if ( (base_state == NODE_STATE_ALLOCATED) &&
+ (node_ptr->run_job_cnt == 0) ) {
+ bit_set(idle_node_bitmap, inx);
+ node_ptr->node_state = NODE_STATE_IDLE | no_resp_flag;
}
}
5 src/slurmctld/node_scheduler.c
View
@@ -751,8 +751,11 @@ int select_nodes(struct job_record *job_ptr, bool test_only)
(job_ptr->time_limit > part_ptr->max_time)) ||
((job_ptr->details->max_nodes != 0) && /* no node limit */
(job_ptr->details->max_nodes < part_ptr->min_nodes)) ||
- (job_ptr->details->min_nodes > part_ptr->max_nodes))
+ (job_ptr->details->min_nodes > part_ptr->max_nodes)) {
+ job_ptr->priority = 1; /* move to end of queue */
+ last_job_update = time(NULL);
return ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE;
+ }
/* build sets of usable nodes based upon their configuration */
error_code = _build_node_list(job_ptr, &node_set_ptr, &node_set_size);
3  src/slurmctld/partition_mgr.c
View
@@ -731,6 +731,9 @@ int update_part(update_part_msg_t * part_desc)
}
}
+ if (error_code == SLURM_SUCCESS)
+ reset_job_priority(); /* free jobs */
+
return error_code;
}
1  src/slurmd/interconnect.h
View
@@ -29,7 +29,6 @@
#ifndef _INTERCONNECT_H_
#define _INTERCONNECT_H_
-#include "src/common/slurm_protocol_api.h"
#include "src/slurmd/job.h"
/*
10 src/slurmd/mgr.c
View
@@ -555,13 +555,19 @@ _create_job_session(slurmd_job_t *job)
return ESLURMD_FORK_FAILED;
}
+ /*
+ * If the created job terminates immediately, the shared memory
+ * record can be purged before we canset the mpid and sid below.
+ * This does not truly indicate an error condition, but a rare
+ * timing anomaly. Thus we log the event using debug()
+ */
job->jmgr_pid = getpid();
if (shm_update_step_mpid(job->jobid, job->stepid, getpid()) < 0)
- error("shm_update_step_mpid: %m");
+ debug("shm_update_step_mpid: %m");
job->smgr_pid = spid;
if (shm_update_step_sid(job->jobid, job->stepid, spid) < 0)
- error("shm_update_step_sid: %m");
+ debug("shm_update_step_sid: %m");
/*
* Read information from session manager slurmd
5 src/slurmd/req.c
View
@@ -78,6 +78,7 @@ static int _run_prolog(uint32_t jobid, uid_t uid);
static int _run_epilog(uint32_t jobid, uid_t uid);
static int _wait_for_procs(uint32_t job_id);
+static pthread_mutex_t launch_mutex = PTHREAD_MUTEX_INITIALIZER;
void
slurmd_req(slurm_msg_t *msg, slurm_addr *cli)
@@ -86,12 +87,16 @@ slurmd_req(slurm_msg_t *msg, slurm_addr *cli)
switch(msg->msg_type) {
case REQUEST_BATCH_JOB_LAUNCH:
+ slurm_mutex_lock(&launch_mutex);
_rpc_batch_job(msg, cli);
slurm_free_job_launch_msg(msg->data);
+ slurm_mutex_unlock(&launch_mutex);
break;
case REQUEST_LAUNCH_TASKS:
+ slurm_mutex_lock(&launch_mutex);
_rpc_launch_tasks(msg, cli);
slurm_free_launch_tasks_request_msg(msg->data);
+ slurm_mutex_unlock(&launch_mutex);
break;
case REQUEST_KILL_TASKS:
_rpc_kill_tasks(msg, cli);
43 src/slurmd/shm.c
View
@@ -117,7 +117,8 @@ typedef struct shmem_struct {
* static variables:
* */
static sem_t *shm_lock;
-static char *lockname;
+static char *lockname;
+static char *lockdir;
static slurmd_shm_t *slurmd_shm;
static int shmid;
static pid_t attach_pid = (pid_t) 0;
@@ -193,22 +194,25 @@ shm_fini(void)
/* detach segment from local memory */
if (shmdt(slurmd_shm) < 0) {
error("shmdt: %m");
- return -1;
+ goto error;
}
slurmd_shm = NULL;
if (destroy && (shmctl(shmid, IPC_RMID, NULL) < 0)) {
error("shmctl: %m");
- return -1;
+ goto error;
}
_shm_unlock();
if (destroy && (_shm_unlink_lock() < 0)) {
error("_shm_unlink_lock: %m");
- return -1;
+ goto error;
}
return 0;
+
+ error:
+ return -1;
}
void
@@ -218,8 +222,13 @@ shm_cleanup(void)
key_t key;
int id = -1;
- info("request to destroy shm lock [%s]", SHM_LOCKNAME);
+ if (!lockdir)
+ lockdir = xstrdup(conf->spooldir);
+
if ((s = _create_ipc_name(SHM_LOCKNAME))) {
+
+ info("request to destroy shm lock [%s]", s);
+
key = ftok(s, 1);
if (sem_unlink(s) < 0)
error("sem_unlink: %m");
@@ -304,25 +313,24 @@ _is_valid_ipc_name(const char *name)
return(1);
}
+/*
+ * Create IPC name by appending `name' to slurmd spooldir
+ * setting.
+ */
static char *
_create_ipc_name(const char *name)
{
char *dst = NULL, *dir = NULL, *slash = NULL;
int rc;
+ xassert (lockdir != NULL);
+
if ((rc = _is_valid_ipc_name(name)) != 1)
fatal("invalid ipc name: `%s' %d", name, rc);
else if (!(dst = xmalloc(PATH_MAX)))
fatal("memory allocation failure");
-#if defined(POSIX_IPC_PREFIX) && defined(HAVE_POSIX_SEMS)
- dir = POSIX_IPC_PREFIX;
-#else
- if ( !(dir = conf->spooldir)
- && !(strlen(dir))
- && !(dir = getenv("TMPDIR")))
- dir = "/tmp";
-#endif /* POSIX_IPC_PREFIX */
+ dir = lockdir;
slash = (dir[strlen(dir) - 1] == '/') ? "" : "/";
@@ -1086,6 +1094,15 @@ _shm_lock_and_initialize()
/*
* Create locked semaphore (initial value == 0)
*/
+
+ /*
+ * Init lockdir to slurmd spooldir.
+ * Make sure it does not change for this instance of slurmd,
+ * even if spooldir does.
+ */
+ if (!lockdir)
+ lockdir = xstrdup(conf->spooldir);
+
shm_lock = _sem_open(SHM_LOCKNAME, O_CREAT|O_EXCL, 0600, 0);
debug3("slurmd lockfile is \"%s\"", lockname);
93 src/slurmd/slurmd.c
View
@@ -31,6 +31,7 @@
#include <fcntl.h>
#include <string.h>
+#include <stdlib.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/time.h>
@@ -140,6 +141,10 @@ main (int argc, char *argv[])
log_init(argv[0], conf->log_opts, LOG_DAEMON, conf->logfile);
+ xsignal(SIGTERM, &_term_handler);
+ xsignal(SIGINT, &_term_handler);
+ xsignal(SIGHUP, &_hup_handler );
+
/*
* Run slurmd_init() here in order to report early errors
* (with shared memory and public keyfile)
@@ -163,6 +168,12 @@ main (int argc, char *argv[])
_kill_old_slurmd();
+ /*
+ * Restore any saved revoked credential information
+ */
+ if (_restore_cred_state(conf->vctx))
+ return SLURM_FAILURE;
+
if (interconnect_node_init() < 0)
fatal("Unable to initialize interconnect.");
@@ -176,10 +187,6 @@ main (int argc, char *argv[])
if (send_registration_msg(SLURM_SUCCESS) < 0)
error("Unable to register with slurm controller");
- xsignal(SIGTERM, &_term_handler);
- xsignal(SIGINT, &_term_handler);
- xsignal(SIGHUP, &_hup_handler );
-
_install_fork_handlers();
list_install_fork_handlers();
@@ -203,34 +210,32 @@ main (int argc, char *argv[])
return 0;
}
+
static void
_msg_engine()
{
slurm_fd sock;
- slurm_addr cli;
- while (1) {
- if (_shutdown)
- break;
- again:
- if ((sock = slurm_accept_msg_conn(conf->lfd, &cli)) < 0) {
- if (errno == EINTR) {
- if (_shutdown) {
- verbose("got shutdown request");
- break;
- }
- if (_reconfig) {
- _reconfigure();
- verbose("got reconfigure request");
- }
- goto again;
- }
- error("accept: %m");
+ while (!_shutdown) {
+ slurm_addr *cli = xmalloc (sizeof (*cli));
+ if ((sock = slurm_accept_msg_conn(conf->lfd, cli)) >= 0) {
+ _handle_connection(sock, cli);
continue;
}
- if (sock > 0)
- _handle_connection(sock, &cli);
+ /*
+ * Otherwise, accept() failed.
+ */
+ xfree (cli);
+ if (errno == EINTR) {
+ if (_reconfig) {
+ verbose("got reconfigure request");
+ _reconfigure();
+ }
+ continue;
+ }
+ error("accept: %m");
}
+ verbose("got shutdown request");
slurm_shutdown_msg_engine(conf->lfd);
return;
}
@@ -336,6 +341,7 @@ _service_connection(void *arg)
error ("close(%d): %m", con->fd);
done:
+ xfree(con->cli_addr);
xfree(con);
slurm_free_msg(msg);
_decrement_thd_count();
@@ -345,21 +351,24 @@ _service_connection(void *arg)
int
send_registration_msg(uint32_t status)
{
+ int retval = SLURM_SUCCESS;
slurm_msg_t req;
slurm_msg_t resp;
- slurm_node_registration_status_msg_t msg;
+ slurm_node_registration_status_msg_t *msg = xmalloc (sizeof (*msg));
- _fill_registration_msg(&msg);
- msg.status = status;
+ _fill_registration_msg(msg);
+ msg->status = status;
req.msg_type = MESSAGE_NODE_REGISTRATION_STATUS;
- req.data = &msg;
+ req.data = msg;
if (slurm_send_recv_controller_msg(&req, &resp) < 0) {
error("Unable to register: %m");
- return SLURM_FAILURE;
+ retval = SLURM_FAILURE;
}
+ slurm_free_node_registration_status_msg (msg);
+
/* XXX look at response msg
*/
@@ -374,7 +383,7 @@ _fill_registration_msg(slurm_node_registration_status_msg_t *msg)
job_step_t *s;
int n;
- msg->node_name = conf->hostname;
+ msg->node_name = xstrdup (conf->hostname);
get_procs(&msg->cpus);
get_memory(&msg->real_memory_size);
@@ -642,10 +651,12 @@ _slurmd_init()
return SLURM_FAILURE;
/*
- * Restore any saved revoked credential information
+ * Create slurmd spool directory if necessary.
*/
- if (_restore_cred_state(conf->vctx))
+ if (_set_slurmd_spooldir() < 0) {
+ error("Unable to initialize slurmd spooldir");
return SLURM_FAILURE;
+ }
/*
* Cleanup shared memory if so configured
@@ -662,18 +673,14 @@ _slurmd_init()
/*
* Initialize slurmd shared memory
+ * This *must* be called after _set_slurmd_spooldir()
+ * since the default location of the slurmd lockfile is
+ * _in_ the spooldir.
+ *
*/
if (shm_init(true) < 0)
return SLURM_FAILURE;
- /*
- * Create slurmd spool directory if necessary.
- */
- if (_set_slurmd_spooldir() < 0) {
- error("Unable to initialize slurmd spooldir");
- return SLURM_FAILURE;
- }
-
if (conf->daemonize && (chdir("/tmp") < 0)) {
error("Unable to chdir to /tmp");
return SLURM_FAILURE;
@@ -691,8 +698,8 @@ _restore_cred_state(slurm_cred_ctx_t ctx)
int cred_fd, data_allocated, data_read = 0;
Buf buffer = NULL;
- if ((mkdir(conf->spooldir, 0755) < 0) &&
- (errno != EEXIST)) {
+ if ( (mkdir(conf->spooldir, 0755) < 0)
+ && (errno != EEXIST) ) {
error("mkdir(%s): %m", conf->spooldir);
return SLURM_ERROR;
}
@@ -727,7 +734,7 @@ static int
_slurmd_fini()
{
save_cred_state(conf->vctx);
- shm_fini();
+ shm_fini();
return SLURM_SUCCESS;
}
3  src/slurmd/ulimits.c
View
@@ -110,7 +110,8 @@ _set_limit(char **env, struct userlim *u)
r.rlim_cur = (val == -1L) ? RLIM_INFINITY : (rlim_t) val;
if (setrlimit(u->resource, &r) < 0)
- error("setrlimit(%s,%ld): %m", name, (long)r.rlim_cur);
+ error("Can't propagate %s of %ld from submit host: %m",
+ name, (long)r.rlim_cur);
}
unsetenvp(env, u->var);
2  src/srun/io.c
View
@@ -593,9 +593,11 @@ _read_io_header(int fd, job_t *job, char *host)
(hdr.type == SLURM_IO_STDERR ? "stderr" : "stdout"),
host, hdr.taskid, fd );
+ cbuf_destroy(cb);
return SLURM_SUCCESS;
fail:
+ cbuf_destroy(cb);
close(fd);
return SLURM_ERROR;
}
20 src/srun/job.c
View
@@ -219,12 +219,15 @@ int
job_rc(job_t *job)
{
int i;
- int rc;
+ int rc = 0;
- if (job->rc) return(job->rc);
+ if (job->rc >= 0) return(job->rc);
- for (i = 0; i < opt.nprocs; i++)
- job->rc |= job->tstatus[i];
+
+ for (i = 0; i < opt.nprocs; i++) {
+ if (job->rc < job->tstatus[i])
+ job->rc = job->tstatus[i];
+ }
if ((rc = WEXITSTATUS(job->rc)))
job->rc = rc;
@@ -248,8 +251,12 @@ void job_fatal(job_t *job, const char *msg)
void
job_destroy(job_t *job, int error)
{
+ if (job->removed)
+ return;
+
if (job->old_job) {
debug("cancelling job step %u.%u", job->jobid, job->stepid);
+ slurm_kill_job_step(job->jobid, job->stepid, SIGKILL);
slurm_complete_job_step(job->jobid, job->stepid, 0, error);
} else if (!opt.no_alloc) {
debug("cancelling job %u", job->jobid);
@@ -263,6 +270,8 @@ job_destroy(job_t *job, int error)
#ifdef HAVE_TOTALVIEW
if (error) tv_launch_failure();
#endif
+
+ job->removed = true;
}
@@ -389,7 +398,7 @@ _job_create_internal(allocation_info_t *info)
job->state = SRUN_JOB_INIT;
job->signaled = false;
- job->rc = 0;
+ job->rc = -1;
job->nodelist = xstrdup(info->nodelist);
hl = hostlist_create(job->nodelist);
@@ -398,6 +407,7 @@ _job_create_internal(allocation_info_t *info)
job->jobid = info->jobid;
job->stepid = info->stepid;
job->old_job = false;
+ job->removed = false;
/*
* Initialize Launch and Exit timeout values
3  src/srun/job.h
View
@@ -75,6 +75,7 @@ typedef struct srun_job {
uint32_t jobid; /* assigned job id */
uint32_t stepid; /* assigned step id */
bool old_job; /* run job step under previous allocation */
+ bool removed; /* job has been removed from SLURM */
job_state_t state; /* job state */
pthread_mutex_t state_mutex;
@@ -92,7 +93,7 @@ typedef struct srun_job {
uint32_t **tids; /* host id => task ids mapping */
uint32_t *hostid; /* task id => host id mapping */
- slurm_addr *slurmd_addr;/* slurm_addr vector to slurmd's */
+ slurm_addr *slurmd_addr;/* slurm_addr vector to slurmd's */
pthread_t sigid; /* signals thread tid */
2  src/srun/opt.c
View
@@ -418,7 +418,7 @@ static void _opt_default()
opt.exc_nodes = NULL;
opt.max_launch_time = 60; /* 60 seconds to launch job */
opt.max_exit_timeout= 60; /* Warn user 60 seconds after task exit */
- opt.msg_timeout = 2; /* Default launch msg timeout */
+ opt.msg_timeout = 5; /* Default launch msg timeout */
mode = MODE_NORMAL;
Please sign in to comment.
Something went wrong with that request. Please try again.