Permalink
Browse files

This commit was manufactured by cvs2svn to create tag

'slurm-0-4-19-1'.
  • Loading branch information...
1 parent 5ab7088 commit 9d1b3735adb5bd80477bff8e906cc870d8f6ab99 no author committed Apr 12, 2005
Showing with 2,953 additions and 1,743 deletions.
  1. +2 −2 META
  2. +68 −1 NEWS
  3. +1 −1 auxdir/x_ac_bgl.m4
  4. +3 −3 doc/html/bluegene.html
  5. +14 −11 doc/man/man1/sinfo.1
  6. +4 −0 doc/man/man1/smap.1
  7. +17 −4 etc/bluegene.conf.example
  8. +7 −14 slurm.spec.in
  9. +3 −3 src/api/Makefile.am
  10. +31 −0 src/api/job_info.c
  11. +16 −1 src/api/job_info.h
  12. +108 −0 src/api/node_select_info.c
  13. +76 −0 src/api/node_select_info.h
  14. +135 −3 src/common/node_select.c
  15. +32 −2 src/common/node_select.h
  16. +0 −2 src/common/slurm_protocol_api.c
  17. +7 −0 src/common/slurm_protocol_defs.c
  18. +12 −1 src/common/slurm_protocol_defs.h
  19. +97 −8 src/common/slurm_protocol_pack.c
  20. +1 −1 src/partition_allocator/Makefile.am
  21. +215 −197 src/partition_allocator/partition_allocator.c
  22. +32 −18 src/partition_allocator/partition_allocator.h
  23. +7 −2 src/plugins/select/bluegene/Makefile.am
  24. +1 −5 src/plugins/select/bluegene/bgl_job_place.c
  25. +276 −160 src/plugins/select/bluegene/bgl_job_run.c
  26. +287 −0 src/plugins/select/bluegene/bgl_part_info.c
  27. +36 −0 src/plugins/select/bluegene/bgl_part_info.h
  28. +156 −62 src/plugins/select/bluegene/bgl_switch_connections.c
  29. +352 −302 src/plugins/select/bluegene/bluegene.c
  30. +22 −13 src/plugins/select/bluegene/bluegene.h
  31. +165 −282 src/plugins/select/bluegene/partition_sys.c
  32. +54 −3 src/plugins/select/bluegene/select_bluegene.c
  33. +82 −76 src/plugins/select/bluegene/slurm_epilog.c
  34. +64 −168 src/plugins/select/bluegene/slurm_prolog.c
  35. +8 −0 src/plugins/select/bluegene/state_test.c
  36. +3 −1 src/plugins/select/bluegene/wrap_rm_api.h
  37. +16 −0 src/plugins/select/linear/select_linear.c
  38. +1 −1 src/sinfo/Makefile.am
  39. +10 −4 src/sinfo/opts.c
  40. +117 −10 src/sinfo/sinfo.c
  41. +4 −1 src/sinfo/sinfo.h
  42. +3 −0 src/slurmctld/controller.c
  43. +35 −4 src/slurmctld/job_mgr.c
  44. +3 −1 src/slurmctld/node_mgr.c
  45. +74 −0 src/slurmctld/proc_req.c
  46. +14 −3 src/slurmctld/slurmctld.h
  47. +7 −5 src/slurmd/mgr.c
  48. +1 −1 src/slurmd/mgr.h
  49. +1 −23 src/smap/Makefile.am
  50. +41 −17 src/smap/configure_functions.c
  51. +16 −22 src/smap/grid_functions.c
  52. +40 −34 src/smap/job_functions.c
  53. +7 −2 src/smap/opts.c
  54. +96 −213 src/smap/partition_functions.c
  55. +47 −52 src/smap/smap.c
  56. +23 −1 src/smap/smap.h
  57. +3 −3 src/srun/srun.c
View
4 META
@@ -9,8 +9,8 @@
Name: slurm
Major: 0
Minor: 4
- Micro: 6
- Version: 0.4.6
+ Micro: 19
+ Version: 0.4.19
Release: 1
API_CURRENT: 6
API_AGE: 4
View
69 NEWS
@@ -1,9 +1,76 @@
This file describes changes in recent versions of SLURM. It primarily
documents those changes that are of interest to users and admins.
+* Changes in SLURM 0.4.19
+=========================
+ -- Added new RPCs for getting bglblock state info remotely and cache data
+ within the plugin (permits removal of DB2 access from BGL FEN and
+ dramatically increases smap responsivenss, also changed prolog/epilog
+ operation)
+ -- Move smap executable to main slurm RPM (from separate RPM).
+ -- smap uses RPC instead of DB2 to get info about bgl partitions.
+ -- Status function added to bluegene_agent thread. Keeps current state
+ of BGL partitions updating every second. will handle multiple attempts
+ at booting if booting a partition fails.
+
+* Changes in SLURM 0.4.18
+=========================
+ -- Added error checking of rm_remove_partition calls.
+ -- job_term() was terminating a job in real time rather than
+ queueing the request. This would result in slurmctld hanging
+ for many seconds when a job termination was required.
+
+* Changes in SLURM 0.4.17
+========================
+ -- Bug fixes from testing .16.
+
+* Changes in SLURM 0.4.16
+========================
+ -- Added error checking to a bunch of Bridge API calls and more
+ gracefully handle failure modes.
+ -- Made smap more robust for more jobs.
+
+* Changes in SLURM 0.4.15
+========================
+ -- Added error checking to a bunch of Bridge API calls and more
+ gracefully handle failure modes.
+
+* Changes in SLURM 0.4.14
+========================
+ -- job state is kept on warm start of slurm
+
+* Changes in SLURM 0.4.13
+========================
+ -- epilog fix for bgl plugin
+
+* Changes in SLURM 0.4.12
+========================
+ -- bug shot for new api calls.
+ -- added BridgeAPILogFile as an option for bluegene.conf file
+
+* Changes in SLURM 0.4.11
+========================
+ -- changed as many rm_get_partition() to rm_get_partitions_info as we could
+ for time saving.
+
+* Changes in SLURM 0.4.10
+========================
+ -- redesign for BGL external wiring.
+ -- smap display bug fix for smaller systems.
+
+* Changes in SLURM 0.4.9
+========================
+ -- setpnum works now, have to include this in bluegene.conf
+
+* Changes in SLURM 0.4.8
+========================
+ -- Changed the prolog and the epilog to use the env var MPIRUN_PARTITION
+ instead of BGL_PARTITION_ID
+
* Changes in SLURM 0.4.7
========================
- -- Remove some BGL specific headers that IBM now distributes.
+ -- Remove some BGL specific headers that IBM now distributes, NOTE
+ BGL driver 080 or greater required.
-- Change autogen.sh to deal with problems running autoconf on one
system and configure on another with different software versions.
View
@@ -33,7 +33,7 @@ AC_DEFUN([X_AC_BGL],
fi
have_bgl_ar=yes
- bgl_ldflags="$bgl_ldflags -Wl,-rpath $bgl_dir/lib -Wl,-L$bgl_dir/lib -Wl,-whole-archive -Wl,-lbglbridge -Wl,-no-whole-archive $bgl_dir/lib/bglbootload.a $bgl_dir/lib/bglsp440supt.a -lbgldb -lbglmachine -ltableapi -lexpat -lbglsp"
+ bgl_ldflags="$bgl_ldflags -Wl,-rpath $bgl_dir/lib -Wl,-L$bgl_dir/lib -Wl,-whole-archive -Wl,-lbglbridge -Wl,-no-whole-archive $bgl_dir/lib/bglbootload.a $bgl_dir/lib/bglsp440supt.a -lsaymessage -lbgldb -lbglmachine -ltableapi -lexpat -lbglsp"
fi
# Search for required DB2 library in the directory
View
@@ -99,7 +99,7 @@ <h3>User Tools</h3>
The script that you submit to SLURM can contain multiple invocations of mpirun as
well as any desired commands for pre- and post-processing.
The mpirun command will get its <i>bglblock</i> or BGL partition information from the
-<i>BGL_PARTITION_ID</i> as set by SLURM. A sample script is shown below.
+<i>MPIRUN_PARTITION</i> as set by SLURM. A sample script is shown below.
<pre>
#!/bin/bash
# pre-processing
@@ -185,11 +185,11 @@ <h3>System Administration</h3>
and interfaces.
The value of <i>SchedulerType</i> should be set to "sched/builtin".
The value of <i>Prolog</i> should be set to a program that will delay
-execution until the bglblock identified by the BGL_PARTITION_ID environment
+execution until the bglblock identified by the MPIRUN_PARTITION environment
variable is ready for use. It is recommended that you construct a script
that serves this function and calls the supplied program <i>slurm_prolog</i>.
The value of <i>Epilog</i> should be set to a program that will wait
-until the bglblock identified by the BGL_PARTITION_ID environment
+until the bglblock identified by the MPIRUN_PARTITION environment
variable has been freed. It is recommended that you construct a script
that serves this function and calls the supplied program <i>slurm_epilog</i>.
The prolog and epilog programs are used to insure proper synchronization
View
@@ -1,4 +1,4 @@
-.TH SINFO "1" "February 2005" "sinfo 0.4" "Slurm components"
+.TH SINFO "1" "April 2005" "sinfo 0.4" "Slurm components"
.SH "NAME"
sinfo \- view information about SLURM nodes and partitions.
@@ -11,12 +11,15 @@ system running SLURM.
.SH "OPTIONS"
.TP
-\fB\-\-all\fR,
+\fB\-a\fR, \fB\-\-all\fR
Display information about all partions. This causes information to be
displayed about partitions that are configured as hidden and partitions that
are unavailable to user's group.
.TP
-\fB\-\-help\fR,
+\fB\-b\fR, \fB\-\-bgl\fR
+Display information about bglblocks (on Blue Gene systems only).
+.TP
+\fB\-\-help\fR
Print a message describing all \fBsinfo\fR options.
.TP
\fB\-\-hide\fR
@@ -40,21 +43,21 @@ same partition and state (e.g., "250+").
\fB\-h\fR, \fB\-\-noheader\fR
Do not print a header on the output.
.TP
-\fB\-i <seconds>\fR , \fB\-\-iterate=<seconds>\fR
+\fB\-i <seconds>\fR, \fB\-\-iterate=<seconds>\fR
Print the state on a periodic basis.
Sleep for the indicated number of seconds between reports.
.TP
-\fB\-l\fR , \fB\-\-long\fR
+\fB\-l\fR, \fB\-\-long\fR
Print more detailed information.
This is ignored if the \fB\-\-format\fR option is specified.
.TP
-\fB\-n <nodes>\fR , \fB\-\-nodes=<nodes>\fR
+\fB\-n <nodes>\fR, \fB\-\-nodes=<nodes>\fR
Print information only about the specified node(s).
Multiple nodes may be comma separated or expressed using a
node range expression. For example "linux[00-07]" would
indicate eight nodes, "linux00" through "linux07."
.TP
-\fB\-N\fR , \fB\-\-Node\fR
+\fB\-N\fR, \fB\-\-Node\fR
Print information in a node-oriented format.
The default is to print information in a partition-oriented format.
This is ignored if the \fB\-\-format\fR option is specified.
@@ -176,11 +179,11 @@ nodes that are not down or drained will not produce any output.
When used with \fB\-l\fR the output additionally includes
the current node state.
.TP
-\fB\-s\fR , \fB\-\-summarize\fR
+\fB\-s\fR, \fB\-\-summarize\fR
List only a partition state summary with no node state details.
This is ignored if the \fB\-\-format\fR option is specified.
.TP
-\fB\-S <sort_list>\fR , \fB\-\-sort=<sort_list>\fR
+\fB\-S <sort_list>\fR, \fB\-\-sort=<sort_list>\fR
Specification of the order in which records should be reported.
This uses the same field specifciation as the <output_format>.
Multiple sorts may be performed by listing multiple sort fields
@@ -209,10 +212,10 @@ the responding flag.
\fB\-p <partition>\fR, \fB\-\-partition=<partition>\fR
Print information only about the specified partition.
.TP
-\fB\-v\fR , \fB\-\-verbose\fR
+\fB\-v\fR, \fB\-\-verbose\fR
Provide detailed event logging through program execution.
.TP
-\fB\-V\fR , \fB\-\-version\fR
+\fB\-V\fR, \fB\-\-version\fR
Print version information and exit.
.SH "OUTPUT FIELD DESCRIPTIONS"
View
@@ -55,6 +55,10 @@ Do not print a header on the output.
\fB\-c\fR, \fB\-\-commandline\fR
Print output to the commandline, no curses.
.TP
+\fB\-p\fR, \fB\-\-parse\fR
+Used with -c commandline option. Don't format output send only single
+tab delimited output to stdout.
+.TP
\fB\-i <seconds>\fR , \fB\-\-iterate=<seconds>\fR
Print the state on a periodic basis.
Sleep for the indicated number of seconds between reports.
View
@@ -5,17 +5,30 @@
# LinuxImage: LinuxImage used for creation of all bglblocks.
# MloaderImage: MloaderImage used for creation of all bglblocks.
# RamDiskImage: RamDiskImage used for creation of all bglblocks.
-# ChangeNumpsets: Script to reset a created bglblock's Numpsets value.
-# Will be removed when an API is available for this.
-# Default value uses half of available I/O nodes.
+# Numpsets: The Numpsets used for creation of all bglblocks
+# equals this value multiplied by the number of
+# base partitions in the bglblock.
+#
+# BridgeAPILogFile : Pathname of file in which to write the BGL
+# Bridge API logs.
+# BridgeAPIVerbose: How verbose the BGL Bridge API logs should be
+# 0: Log only error and warning messages
+# 1: Log level 0 and information messasges
+# 2: Log level 1 and basic debug messages
+# 3: Log level 2 and more debug message
+# 4: Log all messages
+#
# NOTE: The bgl_serial value is set at configuration time using the
# "--with-bgl-serial=" option. Its default value is "BGL".
#
BlrtsImage=/bgl/BlueLight/ppcfloor/bglsys/bin/rts_hw.rts
LinuxImage=/bgl/BlueLight/ppcfloor/bglsys/bin/zImage.elf
MloaderImage=/bgl/BlueLight/ppcfloor/bglsys/bin/mmcs-mloader.rts
RamDiskImage=/bgl/BlueLight/ppcfloor/bglsys/bin/ramdisk.elf
-#ChangeNumpsets=/etc/slurm/change_numpsets
+Numpsets=8
+#
+BridgeAPILogFile=/var/log/slurm/bridgeapi.log
+BridgeAPIVerbose=0
#
# Define the static partitions (bglblocks)
View
@@ -68,11 +68,6 @@ Summary: SLURM scheduling plugin for the Maui scheduler.
Group: System Environment/Base
Requires: slurm
-%package smap
-Summary: SLURM GUI.
-Group: System Environment/Base
-Requires: slurm
-
%package switch-elan
Summary: SLURM switch plugin for Quadrics Elan3 or Elan4.
Group: System Environment/Base
@@ -103,9 +98,6 @@ SLURM plugin interfaces to IBM Blue Gene system
%description sched-wiki
SLURM scheduling plugin for the Maui scheduler.
-%description smap
-SLURM GUI.
-
%description switch-elan
SLURM switch plugin for Quadrics Elan3 or Elan4.
@@ -167,9 +159,11 @@ rm -rf $RPM_BUILD_ROOT
%{_bindir}/scancel
%{_bindir}/scontrol
%{_bindir}/sinfo
+%{_bindir}/smap
%{_bindir}/squeue
%{_bindir}/srun
-%{_sbindir}/*
+%{_sbindir}/slurmctld
+%{_sbindir}/slurmd
%{_libdir}/*.so*
%{_libdir}/slurm/src/*
%{_mandir}/man1/*
@@ -226,11 +220,6 @@ rm -rf $RPM_BUILD_ROOT
%{_libdir}/slurm/sched_wiki.so
#############################################################################
-%files smap
-%defattr(-,root,root,0755)
-%{_bindir}/smap
-#############################################################################
-
%files -f switch_elan.files switch-elan
%defattr(-,root,root)
#############################################################################
@@ -267,6 +256,10 @@ fi
%changelog
+* Mon Apr 11 2005 Morris Jette <jette1@llnl.gov>
+- move smap executable within main slurm rpm
+* Thu Apr 07 2005 Morris Jette <jette1@llnl.gov>
+- remove duplicate prolog and epilog from primary slurm rpm
* Thu Feb 24 2005 Morris Jette <jette1@llnl.gov>
- added bluegene.conf.example to distribution
- added slurm_epilog and slurm_prolog to bluegene package
View
@@ -51,12 +51,12 @@ libslurm_la_SOURCES = \
complete.c \
config_info.c \
init_msg.c \
- job_info.c \
- job_info.h \
+ job_info.c job_info.h \
job_step_info.c \
node_info.c \
+ node_select_info.c node_select_info.h \
partition_info.c \
- spawn.c \
+ spawn.c \
submit.c \
reconfigure.c \
update_config.c
View
@@ -390,3 +390,34 @@ extern int slurm_get_select_jobinfo (select_jobinfo_t jobinfo,
{
return select_g_get_jobinfo (jobinfo, data_type, data);
}
+
+/*
+ * slurm_job_node_ready - report if nodes are ready for job to execute now
+ * IN job_id - slurm job id
+ * RET: READY_* values as defined in api/job_info.h
+ */
+extern int slurm_job_node_ready(uint32_t job_id)
+{
+ slurm_msg_t req, resp;
+ job_id_msg_t msg;
+ int rc;
+
+ req.msg_type = REQUEST_JOB_READY;
+ req.data = &msg;
+ msg.job_id = job_id;
+
+ if (slurm_send_recv_controller_msg(&req, &resp) < 0)
+ return -1;
+
+ if (resp.msg_type == RESPONSE_JOB_READY) {
+ rc = ((return_code_msg_t *) resp.data)->return_code;
+ slurm_free_return_code_msg(resp.data);
+ } else if (resp.msg_type == RESPONSE_SLURM_RC) {
+ rc = READY_JOB_ERROR;
+ slurm_free_return_code_msg(resp.data);
+ } else
+ rc = READY_JOB_ERROR;
+
+ return rc;
+}
+
View
@@ -1,9 +1,11 @@
/*****************************************************************************\
* job_info.h - get/print the job state information of slurm
+ *
+ * $Id$
*****************************************************************************
* Copyright (C) 2004 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Moe Jette <jette1@llnl.gov> et. al.
+ * Written by Morris Jette <jette1@llnl.gov> et. al.
* UCRL-CODE-2002-040.
*
* This file is part of SLURM, a resource management program.
@@ -27,11 +29,24 @@
#ifndef _JOB_INFO_H
#define _JOB_INFO_H
+#include <stdint.h>
+
/*
* slurm_make_time_str - convert time_t to string "month/date hour:min:sec"
* IN time - a time stamp
* OUT string - pointer user defined buffer
*/
extern void slurm_make_time_str (time_t *time, char *string);
+#define READY_JOB_ERROR -1
+#define READY_NODE_STATE 0x01
+#define READY_JOB_STATE 0x02
+
+/*
+ * slurm_job_node_ready - report if nodes are ready for job to execute now
+ * IN job_id - slurm job id
+ * RET: READY_* values as defined in api/job_info.h
+ */
+extern int slurm_job_node_ready(uint32_t job_id);
+
#endif
Oops, something went wrong.

0 comments on commit 9d1b373

Please sign in to comment.