Permalink
Browse files

create v2.2.0-pre3 tag

  • Loading branch information...
2 parents 6e516ae + 66f44a3 commit 7faf6201f0abeb353eea70887a3bb0b7d9c1995b @jette jette committed Mar 18, 2010
View
1 NEWS
@@ -29,6 +29,7 @@ documents those changes that are of interest to users and admins.
upon job 12).
-- Add BootTime and SlurmdStartTime to available node information.
-- Fixed moab_2_slurmdb to work correctly under new database schema.
+ -- Slurmd will drain a compute node when the SlurmdSpoolDir is full.
* Changes in SLURM 2.2.0.pre2
=============================
@@ -2,24 +2,28 @@ AUTOMAKE_OPTIONS = foreign
# copied from pidgin
#
perl_dir = perl
-perl_src_dir = ${abs_srcdir}/${perl_dir}
perlpath = /usr/bin/perl
perl_sources = \
- $(perl_src_dir)/Makefile.PL.in \
- $(perl_src_dir)/ppport.h \
- $(perl_src_dir)/Slurm.pm \
- $(perl_src_dir)/Slurm.xs \
- $(perl_src_dir)/typemap \
- $(perl_src_dir)/msg.h \
- $(perl_src_dir)/alloc.c \
- $(perl_src_dir)/conf.c \
- $(perl_src_dir)/job.c \
- $(perl_src_dir)/launch.c \
- $(perl_src_dir)/node.c \
- $(perl_src_dir)/partition.c \
- $(perl_src_dir)/trigger.c
+ $(perl_dir)/Makefile.PL.in \
+ $(perl_dir)/ppport.h \
+ $(perl_dir)/Slurm.pm \
+ $(perl_dir)/Slurm.xs \
+ $(perl_dir)/typemap \
+ $(perl_dir)/msg.h \
+ $(perl_dir)/alloc.c \
+ $(perl_dir)/conf.c \
+ $(perl_dir)/job.c \
+ $(perl_dir)/launch.c \
+ $(perl_dir)/node.c \
+ $(perl_dir)/partition.c \
+ $(perl_dir)/trigger.c
$(perl_dir)/Makefile: $(perl_dir)/Makefile.PL
+ @if test "x${top_srcdir}" != "x${top_builddir}"; then \
+ for f in ${perl_sources}; do \
+ ${LN_S} -f ${abs_srcdir}/$$f $$f; \
+ done; \
+ fi
@cd $(perl_dir) && $(perlpath) Makefile.PL $(PERL_MM_PARAMS) prefix=${prefix}
#
@@ -268,22 +268,21 @@ AUTOMAKE_OPTIONS = foreign
# copied from pidgin
#
perl_dir = perl
-perl_src_dir = ${abs_srcdir}/${perl_dir}
perlpath = /usr/bin/perl
perl_sources = \
- $(perl_src_dir)/Makefile.PL.in \
- $(perl_src_dir)/ppport.h \
- $(perl_src_dir)/Slurm.pm \
- $(perl_src_dir)/Slurm.xs \
- $(perl_src_dir)/typemap \
- $(perl_src_dir)/msg.h \
- $(perl_src_dir)/alloc.c \
- $(perl_src_dir)/conf.c \
- $(perl_src_dir)/job.c \
- $(perl_src_dir)/launch.c \
- $(perl_src_dir)/node.c \
- $(perl_src_dir)/partition.c \
- $(perl_src_dir)/trigger.c
+ $(perl_dir)/Makefile.PL.in \
+ $(perl_dir)/ppport.h \
+ $(perl_dir)/Slurm.pm \
+ $(perl_dir)/Slurm.xs \
+ $(perl_dir)/typemap \
+ $(perl_dir)/msg.h \
+ $(perl_dir)/alloc.c \
+ $(perl_dir)/conf.c \
+ $(perl_dir)/job.c \
+ $(perl_dir)/launch.c \
+ $(perl_dir)/node.c \
+ $(perl_dir)/partition.c \
+ $(perl_dir)/trigger.c
AM_CPPFLAGS = \
-DVERSION=\"$(VERSION)\" \
@@ -473,6 +472,11 @@ uninstall-am: uninstall-local
$(perl_dir)/Makefile: $(perl_dir)/Makefile.PL
+ @if test "x${top_srcdir}" != "x${top_builddir}"; then \
+ for f in ${perl_sources}; do \
+ ${LN_S} -f ${abs_srcdir}/$$f $$f; \
+ done; \
+ fi
@cd $(perl_dir) && $(perlpath) Makefile.PL $(PERL_MM_PARAMS) prefix=${prefix}
#
@@ -83,18 +83,18 @@ $other_ld_flags = " -brtl -G -bnoentry -bgcbypass:1000 -bexpfull"
WriteMakefile(
NAME => 'Slurm',
- VERSION_FROM => '@abs_srcdir@/Slurm.pm', # finds $VERSION
+ VERSION_FROM => 'Slurm.pm', # finds $VERSION
PREREQ_PM => {}, # e.g., Module::Name => 1.1
($] >= 5.005 ? ## Add these new keywords supported since 5.005
- (ABSTRACT_FROM => '@abs_srcdir@/Slurm.pm', # retrieve abstract from module
+ (ABSTRACT_FROM => 'Slurm.pm', # retrieve abstract from module
AUTHOR => 'Hongjia Cao <hjcao@nudt.edu.cn>') : ()),
LIBS => ["-L@top_builddir@/src/api/.libs -L@prefix@/lib -lslurm"], # e.g., '-lm'
DEFINE => '', # e.g., '-DHAVE_SOMETHING'
INC => "-I. -I@top_srcdir@ -I@top_builddir@", # e.g., '-I. -I/usr/include/other'
# Un-comment this if you add C files to link with later:
OBJECT => '$(O_FILES)', # link all the C files too
CCFLAGS => '-g',
- PM => {'@abs_srcdir@/Slurm.pm' => '$(INST_LIBDIR)/Slurm.pm'},
+ PM => {'Slurm.pm' => '$(INST_LIBDIR)/Slurm.pm'},
dynamic_lib => {'OTHERLDFLAGS' => $other_ld_flags},
);
View
@@ -134,6 +134,7 @@ static void _atfork_prepare(void);
static void _create_msg_socket(void);
static void _decrement_thd_count(void);
static void _destroy_conf(void);
+static int _drain_node(char *reason);
static void _fill_registration_msg(slurm_node_registration_status_msg_t *);
static void _handle_connection(slurm_fd fd, slurm_addr *client);
static void _hup_handler(int);
@@ -1296,7 +1297,7 @@ _slurmd_fini(void)
int save_cred_state(slurm_cred_ctx_t ctx)
{
char *old_file, *new_file, *reg_file;
- int cred_fd = -1, error_code = SLURM_SUCCESS;
+ int cred_fd = -1, error_code = SLURM_SUCCESS, rc;
Buf buffer = NULL;
static pthread_mutex_t state_mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -1310,15 +1311,19 @@ int save_cred_state(slurm_cred_ctx_t ctx)
slurm_mutex_lock(&state_mutex);
if ((cred_fd = creat(new_file, 0600)) < 0) {
error("creat(%s): %m", new_file);
+ if (errno == ENOSPC)
+ _drain_node("SlurmdSpoolDir is full");
error_code = errno;
goto cleanup;
}
buffer = init_buf(1024);
slurm_cred_ctx_pack(ctx, buffer);
- if (write(cred_fd, get_buf_data(buffer),
- get_buf_offset(buffer)) != get_buf_offset(buffer)) {
+ rc = write(cred_fd, get_buf_data(buffer), get_buf_offset(buffer));
+ if (rc != get_buf_offset(buffer)) {
error("write %s error %m", new_file);
(void) unlink(new_file);
+ if ((rc < 0) && (errno == ENOSPC))
+ _drain_node("SlurmdSpoolDir is full");
error_code = errno;
goto cleanup;
}
@@ -1344,6 +1349,27 @@ int save_cred_state(slurm_cred_ctx_t ctx)
return error_code;
}
+static int _drain_node(char *reason)
+{
+ slurm_msg_t req_msg;
+ update_node_msg_t update_node_msg;
+
+ memset(&update_node_msg, 0, sizeof(update_node_msg_t));
+ update_node_msg.node_names = conf->node_name;
+ update_node_msg.node_state = NODE_STATE_DRAIN;
+ update_node_msg.reason = reason;
+ update_node_msg.reason_uid = getuid();
+ update_node_msg.weight = NO_VAL;
+ slurm_msg_t_init(&req_msg);
+ req_msg.msg_type = REQUEST_UPDATE_NODE;
+ req_msg.data = &update_node_msg;
+
+ if (slurm_send_only_controller_msg(&req_msg) < 0)
+ return SLURM_ERROR;
+
+ return SLURM_SUCCESS;
+}
+
static void
_term_handler(int signum)
{
@@ -165,6 +165,7 @@ typedef struct kill_thread {
static int _access(const char *path, int modes, uid_t uid, gid_t gid);
static void _send_launch_failure(launch_tasks_request_msg_t *,
slurm_addr *, int);
+static int _drain_node(char *reason);
static int _fork_all_tasks(slurmd_job_t *job);
static int _become_user(slurmd_job_t *job, struct priv_state *ps);
static void _set_prio_process (slurmd_job_t *job);
@@ -1517,6 +1518,8 @@ _make_batch_dir(slurmd_job_t *job)
if ((mkdir(path, 0750) < 0) && (errno != EEXIST)) {
error("mkdir(%s): %m", path);
+ if (errno == ENOSPC)
+ _drain_node("SlurmdSpoolDir is full");
goto error;
}
@@ -1556,6 +1559,8 @@ _make_batch_script(batch_job_launch_msg_t *msg, char *path)
if (fputs(msg->script, fp) < 0) {
(void) fclose(fp);
error("fputs: %m");
+ if (errno == ENOSPC)
+ _drain_node("SlurmdSpoolDir is full");
goto error;
}
@@ -1580,6 +1585,27 @@ _make_batch_script(batch_job_launch_msg_t *msg, char *path)
}
+static int _drain_node(char *reason)
+{
+ slurm_msg_t req_msg;
+ update_node_msg_t update_node_msg;
+
+ memset(&update_node_msg, 0, sizeof(update_node_msg_t));
+ update_node_msg.node_names = conf->node_name;
+ update_node_msg.node_state = NODE_STATE_DRAIN;
+ update_node_msg.reason = reason;
+ update_node_msg.reason_uid = getuid();
+ update_node_msg.weight = NO_VAL;
+ slurm_msg_t_init(&req_msg);
+ req_msg.msg_type = REQUEST_UPDATE_NODE;
+ req_msg.data = &update_node_msg;
+
+ if (slurm_send_only_controller_msg(&req_msg) < 0)
+ return SLURM_ERROR;
+
+ return SLURM_SUCCESS;
+}
+
static void
_send_launch_failure (launch_tasks_request_msg_t *msg, slurm_addr *cli, int rc)
{

0 comments on commit 7faf620

Please sign in to comment.