Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

This commit was manufactured by cvs2svn to create tag

'slurm-0-4-25-1'.
  • Loading branch information...
commit 0a82f2d4c0ab2202ca22c12fa5817d7a05160ed7 1 parent 5ab7088
no author authored
Showing with 4,212 additions and 2,321 deletions.
  1. +2 −2 META
  2. +105 −1 NEWS
  3. +2 −2 auxdir/x_ac_bgl.m4
  4. +3 −3 doc/html/bluegene.html
  5. +14 −11 doc/man/man1/sinfo.1
  6. +4 −0 doc/man/man1/smap.1
  7. +17 −4 etc/bluegene.conf.example
  8. +11 −18 slurm.spec.in
  9. +3 −3 src/api/Makefile.am
  10. +31 −0 src/api/job_info.c
  11. +16 −1 src/api/job_info.h
  12. +108 −0 src/api/node_select_info.c
  13. +76 −0 src/api/node_select_info.h
  14. +136 −4 src/common/node_select.c
  15. +32 −2 src/common/node_select.h
  16. +0 −2  src/common/slurm_protocol_api.c
  17. +7 −0 src/common/slurm_protocol_defs.c
  18. +12 −1 src/common/slurm_protocol_defs.h
  19. +97 −8 src/common/slurm_protocol_pack.c
  20. +1 −1  src/partition_allocator/Makefile.am
  21. +289 −247 src/partition_allocator/partition_allocator.c
  22. +32 −18 src/partition_allocator/partition_allocator.h
  23. +1 −1  src/plugins/jobcomp/filetxt/jobcomp_filetxt.c
  24. +7 −2 src/plugins/select/bluegene/Makefile.am
  25. +22 −24 src/plugins/select/bluegene/bgl_job_place.c
  26. +465 −372 src/plugins/select/bluegene/bgl_job_run.c
  27. +7 −0 src/plugins/select/bluegene/bgl_job_run.h
  28. +300 −0 src/plugins/select/bluegene/bgl_part_info.c
  29. +36 −0 src/plugins/select/bluegene/bgl_part_info.h
  30. +223 −70 src/plugins/select/bluegene/bgl_switch_connections.c
  31. +859 −430 src/plugins/select/bluegene/bluegene.c
  32. +52 −25 src/plugins/select/bluegene/bluegene.h
  33. +255 −304 src/plugins/select/bluegene/partition_sys.c
  34. +61 −3 src/plugins/select/bluegene/select_bluegene.c
  35. +82 −76 src/plugins/select/bluegene/slurm_epilog.c
  36. +114 −166 src/plugins/select/bluegene/slurm_prolog.c
  37. +88 −46 src/plugins/select/bluegene/state_test.c
  38. +3 −1 src/plugins/select/bluegene/wrap_rm_api.h
  39. +16 −0 src/plugins/select/linear/select_linear.c
  40. +1 −1  src/sinfo/Makefile.am
  41. +10 −4 src/sinfo/opts.c
  42. +117 −11 src/sinfo/sinfo.c
  43. +4 −1 src/sinfo/sinfo.h
  44. +3 −0  src/slurmctld/controller.c
  45. +35 −4 src/slurmctld/job_mgr.c
  46. +7 −3 src/slurmctld/node_mgr.c
  47. +74 −0 src/slurmctld/proc_req.c
  48. +14 −3 src/slurmctld/slurmctld.h
  49. +7 −5 src/slurmd/mgr.c
  50. +1 −1  src/slurmd/mgr.h
  51. +1 −23 src/smap/Makefile.am
  52. +59 −32 src/smap/configure_functions.c
  53. +16 −22 src/smap/grid_functions.c
  54. +48 −48 src/smap/job_functions.c
  55. +16 −6 src/smap/opts.c
  56. +111 −245 src/smap/partition_functions.c
  57. +73 −60 src/smap/smap.c
  58. +23 −1 src/smap/smap.h
  59. +3 −3 src/srun/srun.c
View
4 META
@@ -9,8 +9,8 @@
Name: slurm
Major: 0
Minor: 4
- Micro: 6
- Version: 0.4.6
+ Micro: 25
+ Version: 0.4.25
Release: 1
API_CURRENT: 6
API_AGE: 4
View
106 NEWS
@@ -1,9 +1,113 @@
This file describes changes in recent versions of SLURM. It primarily
documents those changes that are of interest to users and admins.
+* Changes in SLURM 0.4.25
+=========================
+ -- Added another directory to search for DB2 on BGL system.
+
+* Changes in SLURM 0.4.24
+=========================
+ -- DRAIN nodes with switches on base partitions are in ERROR, MISSING,
+ or DOWN states.
+
+* Changes in SLURM 0.4.23
+=========================
+ -- Modified bluegene plugin to only sync bglblocks to jobs on initial
+ startup, not on reconfig. Fixes race condition.
+ -- Modified bluegene plugin to work with 141 driver. Enabling it to
+ only have to reboot when switching from coproc -> virtual and back.
+ -- added support for a full system partition to make sure every other
+ partition is free and vice-verse.
+ -- smap resizing issue fixed.
+ -- change prolog not to add time when a partition is in deallocating
+ state.
+ -- NOTE: This version of SLURM requires BGL driver 141/2005.
+
+* Changes in SLURM 0.4.22
+=========================
+ -- Modified bluegene plugin to not do anything if the bluegene.conf file
+ is altered.
+ -- added checking for lists before trying to create iterator on the list.
+
+* Changes in SLURM 0.4.21
+=========================
+ -- Fix in race condition with time in Status Thread of BGL
+ -- Fix no leading zeros in smap output.
+
+* Changes in SLURM 0.4.20
+=========================
+ -- Smap output is more user friendly with -c option
+
+* Changes in SLURM 0.4.19
+=========================
+ -- Added new RPCs for getting bglblock state info remotely and cache data
+ within the plugin (permits removal of DB2 access from BGL FEN and
+ dramatically increases smap responsivenss, also changed prolog/epilog
+ operation)
+ -- Move smap executable to main slurm RPM (from separate RPM).
+ -- smap uses RPC instead of DB2 to get info about bgl partitions.
+ -- Status function added to bluegene_agent thread. Keeps current state
+ of BGL partitions updating every second. will handle multiple attempts
+ at booting if booting a partition fails.
+
+* Changes in SLURM 0.4.18
+=========================
+ -- Added error checking of rm_remove_partition calls.
+ -- job_term() was terminating a job in real time rather than
+ queueing the request. This would result in slurmctld hanging
+ for many seconds when a job termination was required.
+
+* Changes in SLURM 0.4.17
+========================
+ -- Bug fixes from testing .16.
+
+* Changes in SLURM 0.4.16
+========================
+ -- Added error checking to a bunch of Bridge API calls and more
+ gracefully handle failure modes.
+ -- Made smap more robust for more jobs.
+
+* Changes in SLURM 0.4.15
+========================
+ -- Added error checking to a bunch of Bridge API calls and more
+ gracefully handle failure modes.
+
+* Changes in SLURM 0.4.14
+========================
+ -- job state is kept on warm start of slurm
+
+* Changes in SLURM 0.4.13
+========================
+ -- epilog fix for bgl plugin
+
+* Changes in SLURM 0.4.12
+========================
+ -- bug shot for new api calls.
+ -- added BridgeAPILogFile as an option for bluegene.conf file
+
+* Changes in SLURM 0.4.11
+========================
+ -- changed as many rm_get_partition() to rm_get_partitions_info as we could
+ for time saving.
+
+* Changes in SLURM 0.4.10
+========================
+ -- redesign for BGL external wiring.
+ -- smap display bug fix for smaller systems.
+
+* Changes in SLURM 0.4.9
+========================
+ -- setpnum works now, have to include this in bluegene.conf
+
+* Changes in SLURM 0.4.8
+========================
+ -- Changed the prolog and the epilog to use the env var MPIRUN_PARTITION
+ instead of BGL_PARTITION_ID
+
* Changes in SLURM 0.4.7
========================
- -- Remove some BGL specific headers that IBM now distributes.
+ -- Remove some BGL specific headers that IBM now distributes, NOTE
+ BGL driver 080 or greater required.
-- Change autogen.sh to deal with problems running autoconf on one
system and configure on another with different software versions.
View
4 auxdir/x_ac_bgl.m4
@@ -14,7 +14,7 @@
AC_DEFUN([X_AC_BGL],
[
- bgl_default_dirs="/bgl/BlueLight/ppcfloor/bglsys /home/bgdb2cli/sqllib"
+ bgl_default_dirs="/bgl/BlueLight/ppcfloor/bglsys /home/bgdb2cli/sqllib /u/bgdb2cli/sqllib"
for bgl_dir in $bgl_default_dirs; do
# Skip directories that don't exist
@@ -33,7 +33,7 @@ AC_DEFUN([X_AC_BGL],
fi
have_bgl_ar=yes
- bgl_ldflags="$bgl_ldflags -Wl,-rpath $bgl_dir/lib -Wl,-L$bgl_dir/lib -Wl,-whole-archive -Wl,-lbglbridge -Wl,-no-whole-archive $bgl_dir/lib/bglbootload.a $bgl_dir/lib/bglsp440supt.a -lbgldb -lbglmachine -ltableapi -lexpat -lbglsp"
+ bgl_ldflags="$bgl_ldflags -Wl,-rpath $bgl_dir/lib -Wl,-L$bgl_dir/lib -Wl,-whole-archive -Wl,-lbglbridge -Wl,-no-whole-archive $bgl_dir/lib/bglbootload.a $bgl_dir/lib/bglsp440supt.a -lsaymessage -lbgldb -lbglmachine -ltableapi -lexpat -lbglsp"
fi
# Search for required DB2 library in the directory
View
6 doc/html/bluegene.html
@@ -99,7 +99,7 @@
The script that you submit to SLURM can contain multiple invocations of mpirun as
well as any desired commands for pre- and post-processing.
The mpirun command will get its <i>bglblock</i> or BGL partition information from the
-<i>BGL_PARTITION_ID</i> as set by SLURM. A sample script is shown below.
+<i>MPIRUN_PARTITION</i> as set by SLURM. A sample script is shown below.
<pre>
#!/bin/bash
# pre-processing
@@ -185,11 +185,11 @@
and interfaces.
The value of <i>SchedulerType</i> should be set to "sched/builtin".
The value of <i>Prolog</i> should be set to a program that will delay
-execution until the bglblock identified by the BGL_PARTITION_ID environment
+execution until the bglblock identified by the MPIRUN_PARTITION environment
variable is ready for use. It is recommended that you construct a script
that serves this function and calls the supplied program <i>slurm_prolog</i>.
The value of <i>Epilog</i> should be set to a program that will wait
-until the bglblock identified by the BGL_PARTITION_ID environment
+until the bglblock identified by the MPIRUN_PARTITION environment
variable has been freed. It is recommended that you construct a script
that serves this function and calls the supplied program <i>slurm_epilog</i>.
The prolog and epilog programs are used to insure proper synchronization
View
25 doc/man/man1/sinfo.1
@@ -1,4 +1,4 @@
-.TH SINFO "1" "February 2005" "sinfo 0.4" "Slurm components"
+.TH SINFO "1" "April 2005" "sinfo 0.4" "Slurm components"
.SH "NAME"
sinfo \- view information about SLURM nodes and partitions.
@@ -11,12 +11,15 @@ system running SLURM.
.SH "OPTIONS"
.TP
-\fB\-\-all\fR,
+\fB\-a\fR, \fB\-\-all\fR
Display information about all partions. This causes information to be
displayed about partitions that are configured as hidden and partitions that
are unavailable to user's group.
.TP
-\fB\-\-help\fR,
+\fB\-b\fR, \fB\-\-bgl\fR
+Display information about bglblocks (on Blue Gene systems only).
+.TP
+\fB\-\-help\fR
Print a message describing all \fBsinfo\fR options.
.TP
\fB\-\-hide\fR
@@ -40,21 +43,21 @@ same partition and state (e.g., "250+").
\fB\-h\fR, \fB\-\-noheader\fR
Do not print a header on the output.
.TP
-\fB\-i <seconds>\fR , \fB\-\-iterate=<seconds>\fR
+\fB\-i <seconds>\fR, \fB\-\-iterate=<seconds>\fR
Print the state on a periodic basis.
Sleep for the indicated number of seconds between reports.
.TP
-\fB\-l\fR , \fB\-\-long\fR
+\fB\-l\fR, \fB\-\-long\fR
Print more detailed information.
This is ignored if the \fB\-\-format\fR option is specified.
.TP
-\fB\-n <nodes>\fR , \fB\-\-nodes=<nodes>\fR
+\fB\-n <nodes>\fR, \fB\-\-nodes=<nodes>\fR
Print information only about the specified node(s).
Multiple nodes may be comma separated or expressed using a
node range expression. For example "linux[00-07]" would
indicate eight nodes, "linux00" through "linux07."
.TP
-\fB\-N\fR , \fB\-\-Node\fR
+\fB\-N\fR, \fB\-\-Node\fR
Print information in a node-oriented format.
The default is to print information in a partition-oriented format.
This is ignored if the \fB\-\-format\fR option is specified.
@@ -176,11 +179,11 @@ nodes that are not down or drained will not produce any output.
When used with \fB\-l\fR the output additionally includes
the current node state.
.TP
-\fB\-s\fR , \fB\-\-summarize\fR
+\fB\-s\fR, \fB\-\-summarize\fR
List only a partition state summary with no node state details.
This is ignored if the \fB\-\-format\fR option is specified.
.TP
-\fB\-S <sort_list>\fR , \fB\-\-sort=<sort_list>\fR
+\fB\-S <sort_list>\fR, \fB\-\-sort=<sort_list>\fR
Specification of the order in which records should be reported.
This uses the same field specifciation as the <output_format>.
Multiple sorts may be performed by listing multiple sort fields
@@ -209,10 +212,10 @@ the responding flag.
\fB\-p <partition>\fR, \fB\-\-partition=<partition>\fR
Print information only about the specified partition.
.TP
-\fB\-v\fR , \fB\-\-verbose\fR
+\fB\-v\fR, \fB\-\-verbose\fR
Provide detailed event logging through program execution.
.TP
-\fB\-V\fR , \fB\-\-version\fR
+\fB\-V\fR, \fB\-\-version\fR
Print version information and exit.
.SH "OUTPUT FIELD DESCRIPTIONS"
View
4 doc/man/man1/smap.1
@@ -55,6 +55,10 @@ Do not print a header on the output.
\fB\-c\fR, \fB\-\-commandline\fR
Print output to the commandline, no curses.
.TP
+\fB\-p\fR, \fB\-\-parse\fR
+Used with -c commandline option. Don't format output send only single
+tab delimited output to stdout.
+.TP
\fB\-i <seconds>\fR , \fB\-\-iterate=<seconds>\fR
Print the state on a periodic basis.
Sleep for the indicated number of seconds between reports.
View
21 etc/bluegene.conf.example
@@ -5,9 +5,19 @@
# LinuxImage: LinuxImage used for creation of all bglblocks.
# MloaderImage: MloaderImage used for creation of all bglblocks.
# RamDiskImage: RamDiskImage used for creation of all bglblocks.
-# ChangeNumpsets: Script to reset a created bglblock's Numpsets value.
-# Will be removed when an API is available for this.
-# Default value uses half of available I/O nodes.
+# Numpsets: The Numpsets used for creation of all bglblocks
+# equals this value multiplied by the number of
+# base partitions in the bglblock.
+#
+# BridgeAPILogFile : Pathname of file in which to write the BGL
+# Bridge API logs.
+# BridgeAPIVerbose: How verbose the BGL Bridge API logs should be
+# 0: Log only error and warning messages
+# 1: Log level 0 and information messasges
+# 2: Log level 1 and basic debug messages
+# 3: Log level 2 and more debug message
+# 4: Log all messages
+#
# NOTE: The bgl_serial value is set at configuration time using the
# "--with-bgl-serial=" option. Its default value is "BGL".
#
@@ -15,7 +25,10 @@ BlrtsImage=/bgl/BlueLight/ppcfloor/bglsys/bin/rts_hw.rts
LinuxImage=/bgl/BlueLight/ppcfloor/bglsys/bin/zImage.elf
MloaderImage=/bgl/BlueLight/ppcfloor/bglsys/bin/mmcs-mloader.rts
RamDiskImage=/bgl/BlueLight/ppcfloor/bglsys/bin/ramdisk.elf
-#ChangeNumpsets=/etc/slurm/change_numpsets
+Numpsets=8
+#
+BridgeAPILogFile=/var/log/slurm/bridgeapi.log
+BridgeAPIVerbose=0
#
# Define the static partitions (bglblocks)
View
29 slurm.spec.in
@@ -68,11 +68,6 @@ Summary: SLURM scheduling plugin for the Maui scheduler.
Group: System Environment/Base
Requires: slurm
-%package smap
-Summary: SLURM GUI.
-Group: System Environment/Base
-Requires: slurm
-
%package switch-elan
Summary: SLURM switch plugin for Quadrics Elan3 or Elan4.
Group: System Environment/Base
@@ -103,9 +98,6 @@ SLURM plugin interfaces to IBM Blue Gene system
%description sched-wiki
SLURM scheduling plugin for the Maui scheduler.
-%description smap
-SLURM GUI.
-
%description switch-elan
SLURM switch plugin for Quadrics Elan3 or Elan4.
@@ -135,8 +127,8 @@ mkdir -p "$RPM_BUILD_ROOT"
DESTDIR="$RPM_BUILD_ROOT" make install
install -D -m755 etc/init.d.slurm $RPM_BUILD_ROOT/etc/init.d/slurm
-install -D -m644 etc/slurm.conf.example $RPM_BUILD_ROOT/etc/slurm/slurm.conf
-install -D -m644 etc/bluegene.conf.example $RPM_BUILD_ROOT/etc/slurm/bluegene.conf
+install -D -m644 etc/slurm.conf.example $RPM_BUILD_ROOT/etc/slurm/slurm.conf.example
+install -D -m644 etc/bluegene.conf.example $RPM_BUILD_ROOT/etc/slurm/bluegene.conf.example
# Delete unpackaged files:
rm -f $RPM_BUILD_ROOT/%{_libdir}/slurm/*.{a,la}
@@ -167,9 +159,11 @@ rm -rf $RPM_BUILD_ROOT
%{_bindir}/scancel
%{_bindir}/scontrol
%{_bindir}/sinfo
+%{_bindir}/smap
%{_bindir}/squeue
%{_bindir}/srun
-%{_sbindir}/*
+%{_sbindir}/slurmctld
+%{_sbindir}/slurmd
%{_libdir}/*.so*
%{_libdir}/slurm/src/*
%{_mandir}/man1/*
@@ -188,7 +182,7 @@ rm -rf $RPM_BUILD_ROOT
%{_libdir}/slurm/switch_none.so
%dir %{_libdir}/slurm/src
%config(noreplace) /etc/init.d/slurm
-%config(noreplace) /etc/slurm/slurm.conf
+%config(noreplace) /etc/slurm/slurm.conf.example
#############################################################################
%files devel
@@ -218,7 +212,7 @@ rm -rf $RPM_BUILD_ROOT
%{_sbindir}/slurm_epilog
%{_sbindir}/slurm_prolog
%{_libdir}/slurm/select_bluegene.so
-%config(noreplace) /etc/slurm/bluegene.conf
+%config(noreplace) /etc/slurm/bluegene.conf.example
#############################################################################
%files sched-wiki
@@ -226,11 +220,6 @@ rm -rf $RPM_BUILD_ROOT
%{_libdir}/slurm/sched_wiki.so
#############################################################################
-%files smap
-%defattr(-,root,root,0755)
-%{_bindir}/smap
-#############################################################################
-
%files -f switch_elan.files switch-elan
%defattr(-,root,root)
#############################################################################
@@ -267,6 +256,10 @@ fi
%changelog
+* Mon Apr 11 2005 Morris Jette <jette1@llnl.gov>
+- move smap executable within main slurm rpm
+* Thu Apr 07 2005 Morris Jette <jette1@llnl.gov>
+- remove duplicate prolog and epilog from primary slurm rpm
* Thu Feb 24 2005 Morris Jette <jette1@llnl.gov>
- added bluegene.conf.example to distribution
- added slurm_epilog and slurm_prolog to bluegene package
View
6 src/api/Makefile.am
@@ -51,12 +51,12 @@ libslurm_la_SOURCES = \
complete.c \
config_info.c \
init_msg.c \
- job_info.c \
- job_info.h \
+ job_info.c job_info.h \
job_step_info.c \
node_info.c \
+ node_select_info.c node_select_info.h \
partition_info.c \
- spawn.c \
+ spawn.c \
submit.c \
reconfigure.c \
update_config.c
View
31 src/api/job_info.c
@@ -390,3 +390,34 @@ extern int slurm_get_select_jobinfo (select_jobinfo_t jobinfo,
{
return select_g_get_jobinfo (jobinfo, data_type, data);
}
+
+/*
+ * slurm_job_node_ready - report if nodes are ready for job to execute now
+ * IN job_id - slurm job id
+ * RET: READY_* values as defined in api/job_info.h
+ */
+extern int slurm_job_node_ready(uint32_t job_id)
+{
+ slurm_msg_t req, resp;
+ job_id_msg_t msg;
+ int rc;
+
+ req.msg_type = REQUEST_JOB_READY;
+ req.data = &msg;
+ msg.job_id = job_id;
+
+ if (slurm_send_recv_controller_msg(&req, &resp) < 0)
+ return -1;
+
+ if (resp.msg_type == RESPONSE_JOB_READY) {
+ rc = ((return_code_msg_t *) resp.data)->return_code;
+ slurm_free_return_code_msg(resp.data);
+ } else if (resp.msg_type == RESPONSE_SLURM_RC) {
+ rc = READY_JOB_ERROR;
+ slurm_free_return_code_msg(resp.data);
+ } else
+ rc = READY_JOB_ERROR;
+
+ return rc;
+}
+
View
17 src/api/job_info.h
@@ -1,9 +1,11 @@
/*****************************************************************************\
* job_info.h - get/print the job state information of slurm
+ *
+ * $Id$
*****************************************************************************
* Copyright (C) 2004 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Written by Moe Jette <jette1@llnl.gov> et. al.
+ * Written by Morris Jette <jette1@llnl.gov> et. al.
* UCRL-CODE-2002-040.
*
* This file is part of SLURM, a resource management program.
@@ -27,6 +29,8 @@
#ifndef _JOB_INFO_H
#define _JOB_INFO_H
+#include <stdint.h>
+
/*
* slurm_make_time_str - convert time_t to string "month/date hour:min:sec"
* IN time - a time stamp
@@ -34,4 +38,15 @@
*/
extern void slurm_make_time_str (time_t *time, char *string);
+#define READY_JOB_ERROR -1
+#define READY_NODE_STATE 0x01
+#define READY_JOB_STATE 0x02
+
+/*
+ * slurm_job_node_ready - report if nodes are ready for job to execute now
+ * IN job_id - slurm job id
+ * RET: READY_* values as defined in api/job_info.h
+ */
+extern int slurm_job_node_ready(uint32_t job_id);
+
#endif
View
108 src/api/node_select_info.c
@@ -0,0 +1,108 @@
+/*****************************************************************************\
+ * node_select_info.c - get the node select plugin state information of slurm
+ *
+ * $Id$
+ *****************************************************************************
+ * Copyright (C) 2005 The Regents of the University of California.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * Written by Morris Jette <jette1@llnl.gov>
+ * UCRL-CODE-2002-040.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <http://www.llnl.gov/linux/slurm/>.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+\*****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+
+#include <slurm/slurm.h>
+
+#include "src/api/node_select_info.h"
+#include "src/common/slurm_protocol_api.h"
+
+/*
+ * slurm_load_node_select - issue RPC to get slurm all node select plugin
+ * information if changed since update_time
+ * IN update_time - time of current configuration data
+ * IN node_select_info_msg_pptr - place to store a node select configuration
+ * pointer
+ * RET 0 or a slurm error code
+ * NOTE: free the response using slurm_free_node_select_info_msg
+ */
+extern int slurm_load_node_select (time_t update_time,
+ node_select_info_msg_t **node_select_info_msg_pptr)
+{
+ int rc;
+ slurm_msg_t req_msg;
+ slurm_msg_t resp_msg;
+ node_info_select_request_msg_t req;
+
+ req.last_update = update_time;
+ req_msg.msg_type = REQUEST_NODE_SELECT_INFO;
+ req_msg.data = &req;
+
+ if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0)
+ return SLURM_ERROR;
+
+ slurm_free_cred(resp_msg.cred);
+ switch (resp_msg.msg_type) {
+ case RESPONSE_NODE_SELECT_INFO:
+ *node_select_info_msg_pptr = (node_select_info_msg_t *)
+ resp_msg.data;
+ break;
+ case RESPONSE_SLURM_RC:
+ rc = ((return_code_msg_t *) resp_msg.data)->return_code;
+ slurm_free_return_code_msg(resp_msg.data);
+ if (rc)
+ slurm_seterrno_ret(rc);
+ *node_select_info_msg_pptr = NULL;
+ break;
+ default:
+ *node_select_info_msg_pptr = NULL;
+ slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR);
+ break;
+ }
+
+ return SLURM_SUCCESS;
+}
+
+/*
+ * slurm_free_node_select_info_msg - free buffer returned by
+ * slurm_load_node_select
+ * IN node_select_info_msg_pptr - data is freed and pointer is set to NULL
+ * RET 0 or a slurm error code
+ */
+extern int slurm_free_node_select_info_msg (node_select_info_msg_t **
+ node_select_info_msg_pptr)
+{
+ if (node_select_info_msg_pptr == NULL)
+ return EINVAL;
+
+ //free it
+ *node_select_info_msg_pptr = NULL;
+ return SLURM_SUCCESS;
+}
View
76 src/api/node_select_info.h
@@ -0,0 +1,76 @@
+/*****************************************************************************\
+ * node_select_info.h - get/free node select plugin state information from
+ * slurm
+ * NOTE: This header file is not currently exported
+ * NOTE: This software specifically supports only BlueGene/L for now. It
+ * will be made more general in the future
+ *
+ * $Id$
+ *****************************************************************************
+ * Copyright (C) 2005 The Regents of the University of California.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * Written by Morris Jette <jette1@llnl.gov>
+ * UCRL-CODE-2002-040.
+ *
+ * This file is part of SLURM, a resource management program.
+ * For details, see <http://www.llnl.gov/linux/slurm/>.
+ *
+ * SLURM is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with SLURM; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+\*****************************************************************************/
+
+#ifndef _NODE_SELECT_INFO_H
+#define _NODE_SELECT_INFO_H
+
+#include <stdint.h>
+#include <time.h>
+
+typedef struct {
+ char *nodes;
+ char *owner_name;
+ char *bgl_part_id;
+ int state;
+ int conn_type;
+ int node_use;
+} bgl_info_record_t;
+
+typedef struct {
+ time_t last_update;
+ uint32_t record_count;
+ bgl_info_record_t *bgl_info_array;
+} node_select_info_msg_t;
+
+/*
+ * slurm_load_node_select - issue RPC to get slurm all node select plugin
+ * information if changed since update_time
+ * IN update_time - time of current configuration data
+ * IN node_select_info_msg_pptr - place to store a node select configuration
+ * pointer
+ * RET 0 or a slurm error code
+ * NOTE: free the response using slurm_free_node_select_info_msg
+ */
+extern int slurm_load_node_select (time_t update_time,
+ node_select_info_msg_t **node_select_info_msg_pptr);
+
+/*
+ * slurm_free_node_select_info_msg - free buffer returned by
+ * slurm_load_node_select
+ * IN node_select_info_msg_pptr - data is freed and pointer is set to NULL
+ * RET 0 or a slurm error code
+ */
+extern int slurm_free_node_select_info_msg (node_select_info_msg_t **
+ node_select_info_msg_pptr);
+
+#endif
+
View
140 src/common/node_select.c
@@ -1,5 +1,5 @@
/*****************************************************************************\
- * select_plugin.c - node selection plugin wrapper.
+ * node_select.c - node selection plugin wrapper.
*
* NOTE: The node selection plugin itself is intimately tied to
* slurmctld functions and data structures. Some related
@@ -7,6 +7,8 @@
* variable setting) are required by most SLURM commands.
* Rather than creating a new plugin with these commonly
* used functions, they are included within this module.
+ *
+ * $Id$
*****************************************************************************
* Copyright (C) 2002 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -66,7 +68,10 @@ typedef struct slurm_select_ops {
bitstr_t *bitmap, int min_nodes,
int max_nodes );
int (*job_begin) ( struct job_record *job_ptr );
+ int (*job_ready) ( struct job_record *job_ptr );
int (*job_fini) ( struct job_record *job_ptr );
+ int (*pack_node_info) ( time_t last_query_time,
+ Buf *buffer_ptr);
} slurm_select_ops_t;
typedef struct slurm_select_context {
@@ -117,7 +122,9 @@ static slurm_select_ops_t * _select_get_ops(slurm_select_context_t *c)
"select_p_part_init",
"select_p_job_test",
"select_p_job_begin",
- "select_p_job_fini"
+ "select_p_job_ready",
+ "select_p_job_fini",
+ "select_p_pack_node_info"
};
int n_syms = sizeof( syms ) / sizeof( char * );
@@ -341,6 +348,19 @@ extern int select_g_job_begin(struct job_record *job_ptr)
}
/*
+ * determine if job is ready to execute per the node select plugin
+ * IN job_ptr - pointer to job being tested
+ * RET -1 on error, 1 if ready to execute, 0 otherwise
+ */
+extern int select_g_job_ready(struct job_record *job_ptr)
+{
+ if (slurm_select_init() < 0)
+ return -1;
+
+ return (*(g_select_context->ops.job_ready))(job_ptr);
+}
+
+/*
* Note termination of job is starting. Executed from slurmctld.
* IN job_ptr - pointer to job being terminated
*/
@@ -352,6 +372,15 @@ extern int select_g_job_fini(struct job_record *job_ptr)
return (*(g_select_context->ops.job_fini))(job_ptr);
}
+extern int select_g_pack_node_info(time_t last_query_time, Buf *buffer)
+{
+ if (slurm_select_init() < 0)
+ return SLURM_ERROR;
+
+ return (*(g_select_context->ops.pack_node_info))
+ (last_query_time, buffer);
+}
+
#ifdef HAVE_BGL /* node selection specific logic */
static char *_job_conn_type_string(uint16_t inx)
{
@@ -433,7 +462,8 @@ extern int select_g_set_jobinfo (select_jobinfo_t jobinfo,
jobinfo->bgl_part_id = xstrdup(tmp_char);
break;
default:
- debug("select_g_set_jobinfo data_type %d invalid", data_type);
+ debug("select_g_set_jobinfo data_type %d invalid",
+ data_type);
}
return rc;
@@ -627,7 +657,7 @@ extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo,
break;
case SELECT_PRINT_DATA:
snprintf(buf, size,
- "%7.7s %6.6s %8.8s %ux%ux%u %7s",
+ "%7.7s %6.6s %8.8s %ux%ux%u %16s",
_job_conn_type_string(jobinfo->conn_type),
_job_rotate_string(jobinfo->rotate),
_job_node_use_string(jobinfo->node_use),
@@ -656,6 +686,96 @@ extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo,
return buf;
}
+/* NOTE: The matching pack functions are directly in the select/bluegene
+ * plugin. The unpack functions can not be there since the plugin is
+ * dependent upon libraries which do not exist on the BlueGene front-end
+ * nodes. */
+static int _unpack_node_info(bgl_info_record_t *bgl_info_record, Buf buffer)
+{
+ uint16_t uint16_tmp;
+ safe_unpackstr_xmalloc(&(bgl_info_record->nodes), &uint16_tmp, buffer);
+ safe_unpackstr_xmalloc(&bgl_info_record->owner_name, &uint16_tmp,
+ buffer);
+ safe_unpackstr_xmalloc(&bgl_info_record->bgl_part_id, &uint16_tmp,
+ buffer);
+
+ safe_unpack16(&uint16_tmp, buffer);
+ bgl_info_record->state = (int) uint16_tmp;
+ safe_unpack16(&uint16_tmp, buffer);
+ bgl_info_record->conn_type = (int) uint16_tmp;
+ safe_unpack16(&uint16_tmp, buffer);
+ bgl_info_record->node_use = (int) uint16_tmp;
+
+ return SLURM_SUCCESS;
+
+unpack_error:
+ if(bgl_info_record->nodes)
+ xfree(bgl_info_record->nodes);
+ if(bgl_info_record->owner_name)
+ xfree(bgl_info_record->owner_name);
+ if(bgl_info_record->bgl_part_id)
+ xfree(bgl_info_record->bgl_part_id);
+ return SLURM_ERROR;
+}
+
+static void _free_node_info(bgl_info_record_t *bgl_info_record)
+{
+ if(bgl_info_record->nodes)
+ xfree(bgl_info_record->nodes);
+ if(bgl_info_record->owner_name)
+ xfree(bgl_info_record->owner_name);
+ if(bgl_info_record->bgl_part_id)
+ xfree(bgl_info_record->bgl_part_id);
+}
+
+/* Unpack node select info from a buffer */
+extern int select_g_unpack_node_info(node_select_info_msg_t **
+ node_select_info_msg_pptr, Buf buffer)
+{
+ int i, record_count = 0;
+ node_select_info_msg_t *buf;
+
+ buf = xmalloc(sizeof(bgl_info_record_t));
+ safe_unpack32(&(buf->record_count), buffer);
+ safe_unpack_time(&(buf->last_update), buffer);
+ buf->bgl_info_array = xmalloc(sizeof(bgl_info_record_t) *
+ buf->record_count);
+ record_count = buf->record_count;
+
+ for(i=0; i<record_count; i++) {
+ if (_unpack_node_info(&(buf->bgl_info_array[i]), buffer))
+ goto unpack_error;
+ }
+ *node_select_info_msg_pptr = buf;
+ return SLURM_SUCCESS;
+
+unpack_error:
+ for(i=0; i<record_count; i++)
+ _free_node_info(&(buf->bgl_info_array[i]));
+ xfree(buf->bgl_info_array);
+ xfree(buf);
+ return SLURM_ERROR;
+}
+
+/* Free a node select information buffer */
+extern int select_g_free_node_info(node_select_info_msg_t **
+ node_select_info_msg_pptr)
+{
+ int i;
+ node_select_info_msg_t *buf;
+
+ if (node_select_info_msg_pptr == NULL)
+ return EINVAL;
+ buf = *node_select_info_msg_pptr;
+
+ if (buf->bgl_info_array == NULL)
+ buf->record_count = 0;
+ for(i=0; i<buf->record_count; i++)
+ _free_node_info(&(buf->bgl_info_array[i]));
+ xfree(buf);
+ return SLURM_SUCCESS;
+}
+
#else /* !HAVE_BGL */
/* allocate storage for a select job credential
@@ -746,4 +866,16 @@ extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo,
return NULL;
}
+extern int select_g_unpack_node_info(node_select_info_msg_t **
+ node_select_info_msg_pptr, Buf buffer)
+{
+ return SLURM_ERROR;
+}
+
+extern int select_g_free_node_info(node_select_info_msg_t **
+ node_select_info_msg_pptr)
+{
+ return SLURM_ERROR;
+}
+
#endif
View
34 src/common/node_select.h
@@ -1,5 +1,7 @@
/*****************************************************************************\
* node_select.h - Define node selection plugin functions.
+ *
+ * $Id$
*****************************************************************************
* Copyright (C) 2004 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -24,9 +26,10 @@
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
\*****************************************************************************/
-#ifndef __SELECT_PLUGIN_API_H__
-#define __SELECT_PLUGIN_API_H__
+#ifndef _NODE_SELECT_H
+#define _NODE_SELECT_H
+#include "src/api/node_select_info.h"
#include "src/common/list.h"
#include "src/slurmctld/slurmctld.h"
@@ -99,6 +102,13 @@ extern int select_g_job_test(struct job_record *job_ptr, bitstr_t *bitmap,
extern int select_g_job_begin(struct job_record *job_ptr);
/*
+ * determine if job is ready to execute per the node select plugin
+ * IN job_ptr - pointer to job being tested
+ * RET -1 on error, 1 if ready to execute, 0 otherwise
+ */
+extern int select_g_job_ready(struct job_record *job_ptr);
+
+/*
* Note termination of job is starting. Executed from slurmctld.
* IN job_ptr - pointer to job being terminated
*/
@@ -165,4 +175,24 @@ extern int select_g_unpack_jobinfo(select_jobinfo_t jobinfo, Buf buffer);
extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo,
char *buf, size_t size, int mode);
+/******************************************************\
+ * NODE-SELECT PLUGIN SPECIFIC INFORMATION FUNCTIONS *
+\******************************************************/
+
+/* pack node-select plugin specific information into a buffer in
+ * machine independent form
+ * IN last_update_time - time of latest information consumer has
+ * OUT buffer - location to hold the data, consumer must free
+ * RET - slurm error code
+ */
+extern int select_g_pack_node_info(time_t last_query_time, Buf *buffer);
+
+/* Unpack node select info from a buffer */
+extern int select_g_unpack_node_info(node_select_info_msg_t **
+ node_select_info_msg_pptr, Buf buffer);
+
+/* Free a node select information buffer */
+extern int select_g_free_node_info(node_select_info_msg_t **
+ node_select_info_msg_pptr);
+
#endif /*__SELECT_PLUGIN_API_H__*/
View
2  src/common/slurm_protocol_api.c
@@ -665,8 +665,6 @@ int slurm_send_node_msg(slurm_fd fd, slurm_msg_t * msg)
free_buf(buffer);
slurm_seterrno_ret(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
}
-
-
/*
* Pack message into buffer
View
7 src/common/slurm_protocol_defs.c
@@ -2,6 +2,8 @@
* slurm_protocol_defs.c - functions for initializing and releasing
* storage for RPC data structures. these are the functions used by
* the slurm daemons directly, not for user client use.
+ *
+ * $Id$
*****************************************************************************
* Copyright (C) 2002 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -76,6 +78,11 @@ void slurm_free_return_code_msg(return_code_msg_t * msg)
xfree(msg);
}
+void slurm_free_job_id_msg(job_id_msg_t * msg)
+{
+ xfree(msg);
+}
+
void slurm_free_job_id_request_msg(job_id_request_msg_t * msg)
{
xfree(msg);
View
13 src/common/slurm_protocol_defs.h
@@ -1,5 +1,7 @@
/****************************************************************************\
* slurm_protocol_defs.h - definitions used for RPCs
+ *
+ * $Id$
*****************************************************************************
* Copyright (C) 2002 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -88,6 +90,8 @@ typedef enum {
RESPONSE_ACCOUNTING_INFO,
REQUEST_JOB_ID,
RESPONSE_JOB_ID,
+ REQUEST_NODE_SELECT_INFO,
+ RESPONSE_NODE_SELECT_INFO,
REQUEST_UPDATE_JOB = 3001,
REQUEST_UPDATE_NODE,
@@ -113,6 +117,8 @@ typedef enum {
RESPONSE_ALLOCATION_AND_RUN_JOB_STEP,
REQUEST_OLD_JOB_RESOURCE_ALLOCATION,
REQUEST_UPDATE_JOB_TIME,
+ REQUEST_JOB_READY,
+ RESPONSE_JOB_READY,
REQUEST_JOB_STEP_CREATE = 5001,
RESPONSE_JOB_STEP_CREATE,
@@ -210,6 +216,10 @@ typedef struct node_info_request_msg {
uint16_t show_flags;
} node_info_request_msg_t;
+typedef struct node_info_select_request_msg {
+ time_t last_update;
+} node_info_select_request_msg_t;
+
typedef struct part_info_request_msg {
time_t last_update;
uint16_t show_flags;
@@ -319,7 +329,7 @@ typedef struct return_code_msg {
/* Note: We include select_jobinfo here in addition to the job launch
* RPC in order to insure reliable clean-up of a BlueGene partition in
* the event of some launch failure or race condition preventing slurmd
- * from getting the BGL_PARTITION_ID at that time. It is needed for
+ * from getting the MPIRUN_PARTITION at that time. It is needed for
* the job epilog. */
typedef struct kill_job_msg {
uint32_t job_id;
@@ -457,6 +467,7 @@ slurm_free_node_registration_status_msg(slurm_node_registration_status_msg_t *
void inline slurm_free_job_info(job_info_t * job);
void inline slurm_free_job_info_members(job_info_t * job);
+void inline slurm_free_job_id_msg(job_id_msg_t * msg);
void inline slurm_free_job_id_request_msg(job_id_request_msg_t * msg);
void inline slurm_free_job_id_response_msg(job_id_response_msg_t * msg);
View
105 src/common/slurm_protocol_pack.c
@@ -1,5 +1,7 @@
/****************************************************************************\
* slurm_protocol_pack.c - functions to pack and unpack structures for RPCs
+ *
+ * $Id$
*****************************************************************************
* Copyright (C) 2002 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -48,6 +50,8 @@
#define _pack_job_info_msg(msg,buf) _pack_buffer_msg(msg,buf)
#define _pack_job_step_info_msg(msg,buf) _pack_buffer_msg(msg,buf)
+#define _pack_node_select_info_msg(msg,buf) _pack_buffer_msg(msg,buf)
+#define _pack_node_info_msg(msg,buf) _pack_buffer_msg(msg,buf)
static void _pack_update_node_msg(update_node_msg_t * msg, Buf buffer);
static int _unpack_update_node_msg(update_node_msg_t ** msg, Buf buffer);
@@ -59,6 +63,9 @@ static int
_unpack_node_registration_status_msg(slurm_node_registration_status_msg_t
** msg, Buf buffer);
+static void _pack_job_ready_msg(job_id_msg_t * msg, Buf buffer);
+static int _unpack_job_ready_msg(job_id_msg_t ** msg_ptr, Buf buffer);
+
static void
_pack_resource_allocation_response_msg(resource_allocation_response_msg_t *
msg, Buf buffer);
@@ -84,9 +91,10 @@ static void _pack_node_info_request_msg(
static int _unpack_node_info_request_msg(
node_info_request_msg_t ** msg, Buf bufer);
-static void _pack_node_info_msg(slurm_msg_t * msg, Buf buffer);
static int _unpack_node_info_msg(node_info_msg_t ** msg, Buf buffer);
static int _unpack_node_info_members(node_info_t * node, Buf buffer);
+static int _unpack_node_select_info_msg(node_select_info_msg_t ** msg,
+ Buf buffer);
static void _pack_update_partition_msg(update_part_msg_t * msg, Buf buffer);
static int _unpack_update_partition_msg(update_part_msg_t ** msg, Buf buffer);
@@ -178,6 +186,11 @@ static void _pack_job_info_request_msg(job_info_request_msg_t *
static int _unpack_job_info_request_msg(job_info_request_msg_t**
msg, Buf buffer);
+static void _pack_node_select_info_req_msg(node_info_select_request_msg_t *
+ msg, Buf buffer);
+static int _unpack_node_select_info_req_msg(node_info_select_request_msg_t **
+ msg, Buf buffer);
+
static void _pack_job_step_info_req_msg(job_step_info_request_msg_t * msg,
Buf buffer);
static int _unpack_job_step_info_req_msg(job_step_info_request_msg_t **
@@ -457,6 +470,7 @@ pack_msg(slurm_msg_t const *msg, Buf buffer)
break;
case MESSAGE_UPLOAD_ACCOUNTING_INFO:
break;
+ case RESPONSE_JOB_READY:
case RESPONSE_SLURM_RC:
_pack_return_code_msg((return_code_msg_t *) msg->data,
buffer);
@@ -497,6 +511,16 @@ pack_msg(slurm_msg_t const *msg, Buf buffer)
case RESPONSE_CHECKPOINT:
_pack_checkpoint_resp_msg((checkpoint_resp_msg_t *)msg->data, buffer);
break;
+ case REQUEST_JOB_READY:
+ _pack_job_ready_msg((job_id_msg_t *)msg->data, buffer);
+ break;
+ case REQUEST_NODE_SELECT_INFO:
+ _pack_node_select_info_req_msg(
+ (node_info_select_request_msg_t *) msg->data, buffer);
+ break;
+ case RESPONSE_NODE_SELECT_INFO:
+ _pack_node_select_info_msg((slurm_msg_t *) msg, buffer);
+ break;
default:
debug("No pack method for msg type %i", msg->msg_type);
return EINVAL;
@@ -705,6 +729,7 @@ unpack_msg(slurm_msg_t * msg, Buf buffer)
break;
case MESSAGE_UPLOAD_ACCOUNTING_INFO:
break;
+ case RESPONSE_JOB_READY:
case RESPONSE_SLURM_RC:
rc = _unpack_return_code_msg((return_code_msg_t **)
& (msg->data), buffer);
@@ -749,6 +774,19 @@ unpack_msg(slurm_msg_t * msg, Buf buffer)
rc = _unpack_checkpoint_resp_msg((checkpoint_resp_msg_t **)
& msg->data, buffer);
break;
+ case REQUEST_JOB_READY:
+ rc = _unpack_job_ready_msg((job_id_msg_t **)
+ & msg->data, buffer);
+ break;
+ case REQUEST_NODE_SELECT_INFO:
+ rc = _unpack_node_select_info_req_msg(
+ (node_info_select_request_msg_t **) &msg->data,
+ buffer);
+ break;
+ case RESPONSE_NODE_SELECT_INFO:
+ rc = _unpack_node_select_info_msg((node_select_info_msg_t **) &
+ (msg->data), buffer);
+ break;
default:
debug("No unpack method for msg type %i", msg->msg_type);
return EINVAL;
@@ -1059,11 +1097,6 @@ _unpack_submit_response_msg(submit_response_msg_t ** msg, Buf buffer)
*msg = NULL;
return SLURM_ERROR;
}
-static void
-_pack_node_info_msg(slurm_msg_t * msg, Buf buffer)
-{
- packmem_array(msg->data, msg->data_size, buffer);
-}
static int
_unpack_node_info_msg(node_info_msg_t ** msg, Buf buffer)
@@ -1123,6 +1156,13 @@ _unpack_node_info_members(node_info_t * node, Buf buffer)
return SLURM_ERROR;
}
+static int _unpack_node_select_info_msg(node_select_info_msg_t ** msg,
+ Buf buffer)
+{
+ xassert(msg != NULL);
+
+ return select_g_unpack_node_info(msg, buffer);
+}
static void
_pack_update_partition_msg(update_part_msg_t * msg, Buf buffer)
@@ -2526,8 +2566,7 @@ _unpack_complete_job_step_msg(complete_job_step_msg_t ** msg_ptr, Buf buffer)
}
static void
-_pack_job_info_request_msg(job_info_request_msg_t * msg,
- Buf buffer)
+_pack_job_info_request_msg(job_info_request_msg_t * msg, Buf buffer)
{
pack_time(msg->last_update, buffer);
pack16(msg->show_flags, buffer);
@@ -2553,6 +2592,30 @@ _unpack_job_info_request_msg(job_info_request_msg_t** msg,
}
static void
+_pack_node_select_info_req_msg(node_info_select_request_msg_t *msg, Buf buffer)
+{
+ pack_time(msg->last_update, buffer);
+}
+
+static int
+_unpack_node_select_info_req_msg(node_info_select_request_msg_t **msg,
+ Buf buffer)
+{
+ node_info_select_request_msg_t *node_sel_info;
+
+ node_sel_info = xmalloc(sizeof(node_info_select_request_msg_t));
+ *msg = node_sel_info;
+
+ safe_unpack_time(&node_sel_info->last_update, buffer);
+ return SLURM_SUCCESS;
+
+ unpack_error:
+ xfree(node_sel_info);
+ *msg = NULL;
+ return SLURM_ERROR;
+}
+
+static void
_pack_job_step_info_req_msg(job_step_info_request_msg_t * msg, Buf buffer)
{
pack_time(msg->last_update, buffer);
@@ -2889,6 +2952,32 @@ _unpack_srun_node_fail_msg(srun_node_fail_msg_t ** msg_ptr, Buf buffer)
}
static void
+_pack_job_ready_msg(job_id_msg_t * msg, Buf buffer)
+{
+ xassert ( msg != NULL );
+
+ pack32 ( msg -> job_id , buffer ) ;
+}
+
+static int
+_unpack_job_ready_msg(job_id_msg_t ** msg_ptr, Buf buffer)
+{
+ job_id_msg_t * msg;
+ xassert ( msg_ptr != NULL );
+
+ msg = xmalloc ( sizeof (job_id_msg_t) );
+ *msg_ptr = msg ;
+
+ safe_unpack32 ( & msg -> job_id , buffer ) ;
+ return SLURM_SUCCESS;
+
+ unpack_error:
+ *msg_ptr = NULL;
+ xfree(msg);
+ return SLURM_ERROR;
+}
+
+static void
_pack_srun_timeout_msg(srun_timeout_msg_t * msg, Buf buffer)
{
xassert ( msg != NULL );
View
2  src/partition_allocator/Makefile.am
@@ -16,7 +16,7 @@ INCLUDES = -I$(top_srcdir) $(BGL_INCLUDES)
# $(top_builddir)/src/api/libslurm.la
# partition_allocator_LDFLAGS = -export-dynamic -lm $(CMD_LDFLAGS) $(BGL_LDFLAGS)
-# CPPFLAGS = -DBUILD_EXE
+# CPPFLAGS = -DBUILD_EXE
# making a .la
noinst_LTLIBRARIES = libpartition_allocator.la
View
536 src/partition_allocator/partition_allocator.c
@@ -39,7 +39,7 @@
#define DEBUG_PA
#define BEST_COUNT_INIT 10;
-#if HAVE_BGL
+#ifdef HAVE_BGL
int DIM_SIZE[PA_SYSTEM_DIMENSIONS] = {0,0,0};
#else
int DIM_SIZE[PA_SYSTEM_DIMENSIONS] = {0};
@@ -49,16 +49,20 @@ bool _initialized = false;
/* _pa_system is the "current" system that the structures will work
* on */
-pa_system_t *pa_system_ptr;
-List path;
-List best_path;
+pa_system_t *pa_system_ptr = NULL;
+List path = NULL;
+List best_path = NULL;
int best_count;
int color_count = 0;
+char letters[36];
+char colors[6];
/** internal helper functions */
-#if HAVE_BGL
+#ifdef HAVE_BGL_FILES
/** */
static void _bp_map_list_del(void *object);
+#endif
+#ifdef HAVE_BGL
/* */
static int _check_for_options(pa_request_t* pa_request);
/* */
@@ -72,8 +76,6 @@ static int _create_config_even(pa_node_t *grid);
#endif
/** */
-static void _set_bp_map(void);
-/** */
static void _new_pa_node(pa_node_t *pa_node,
int *coord);
/** */
@@ -92,8 +94,7 @@ static void _switch_config(pa_node_t* source, pa_node_t* target, int dim,
int port_src, int port_tar);
/* */
static void _set_external_wires(int dim, int count, pa_node_t* source,
- pa_node_t* target_1, pa_node_t* target_2,
- pa_node_t* target_first, pa_node_t* target_second);
+ pa_node_t* target_1, pa_node_t* target_2);
/* */
static char *_set_internal_wires(List nodes, int size, int conn_type);
@@ -112,6 +113,23 @@ static int _set_one_dim(int *start, int *end, int *coord);
/* Global */
List bp_map_list;
+List bgl_info_list;
+
+extern void destroy_bgl_info_record(void* object)
+{
+ bgl_info_record_t* bgl_info_record = (bgl_info_record_t*) object;
+
+ if (bgl_info_record) {
+ if(bgl_info_record->nodes)
+ xfree(bgl_info_record->nodes);
+ if(bgl_info_record->owner_name)
+ xfree(bgl_info_record->owner_name);
+ if(bgl_info_record->bgl_part_id)
+ xfree(bgl_info_record->bgl_part_id);
+
+ xfree(bgl_info_record);
+ }
+}
/**
* create a partition request. Note that if the geometry is given,
@@ -128,10 +146,10 @@ List bp_map_list;
*
* return SUCCESS of operation.
*/
-int new_pa_request(pa_request_t* pa_request)
+extern int new_pa_request(pa_request_t* pa_request)
{
int i=0;
-#if HAVE_BGL
+#ifdef HAVE_BGL
float sz=1;
int geo[PA_SYSTEM_DIMENSIONS] = {0,0,0};
int i2, i3, picked, total_sz=1 , size2, size3;
@@ -150,7 +168,7 @@ int new_pa_request(pa_request_t* pa_request)
for (i=0; i<PA_SYSTEM_DIMENSIONS; i++){
if ((geo[i] < 1)
|| (geo[i] > DIM_SIZE[i])){
- error("new_pa_request Error, request geometry is invalid %d\n", geo[i]);
+ error("new_pa_request Error, request geometry is invalid %d", geo[i]);
return 0;
}
}
@@ -187,7 +205,7 @@ int new_pa_request(pa_request_t* pa_request)
}
if(pa_request->size>total_sz || pa_request->size<1) {
- error("new_pa_request ERROR, requested size must be\ngreater than 0 and less than %d.\n",total_sz);
+ //error("new_pa_request ERROR, requested size must be\ngreater than 0 and less than %d.",total_sz);
return 0;
}
@@ -317,7 +335,7 @@ int new_pa_request(pa_request_t* pa_request)
for (i=0; i<PA_SYSTEM_DIMENSIONS; i++){
if ((geo[i] < 1)
|| (geo[i] > DIM_SIZE[i])){
- error("new_pa_request Error, request geometry is invalid %d\n",
+ error("new_pa_request Error, request geometry is invalid %d",
geo[i]);
return 0;
}
@@ -337,7 +355,7 @@ int new_pa_request(pa_request_t* pa_request)
/**
* delete a partition request
*/
-void delete_pa_request(pa_request_t *pa_request)
+extern void delete_pa_request(pa_request_t *pa_request)
{
int *geo_ptr;
@@ -353,51 +371,54 @@ void delete_pa_request(pa_request_t *pa_request)
/**
* print a partition request
*/
-void print_pa_request(pa_request_t* pa_request)
+extern void print_pa_request(pa_request_t* pa_request)
{
int i;
if (pa_request == NULL){
- error("print_pa_request Error, request is NULL\n");
+ error("print_pa_request Error, request is NULL");
return;
}
- debug(" pa_request:\n");
+ debug(" pa_request:");
debug(" geometry:\t");
for (i=0; i<PA_SYSTEM_DIMENSIONS; i++){
debug("%d", pa_request->geometry[i]);
}
- debug("\n");
- debug(" size:\t%d\n", pa_request->size);
- debug(" conn_type:\t%d\n", pa_request->conn_type);
- debug(" rotate:\t%d\n", pa_request->rotate);
- debug(" elongate:\t%d\n", pa_request->elongate);
- debug("force contig:\t%d\n", pa_request->force_contig);
- debug(" node_use:\t%d\n", pa_request->node_use);
+ debug("");
+ debug(" size:\t%d", pa_request->size);
+ debug(" conn_type:\t%d", pa_request->conn_type);
+ debug(" rotate:\t%d", pa_request->rotate);
+ debug(" elongate:\t%d", pa_request->elongate);
+ debug("force contig:\t%d", pa_request->force_contig);
+ debug(" node_use:\t%d", pa_request->node_use);
}
/**
* Initialize internal structures by either reading previous partition
* configurations from a file or by running the graph solver.
*
- * IN: dunno yet, probably some stuff denoting downed nodes, etc.
+ * IN: node_info_msg_t * can be null,
+ * should be from slurm_load_node().
*
- * return: success or error of the intialization.
+ * return: void.
*/
-void pa_init(node_info_msg_t *node_info_ptr)
+extern void pa_init(node_info_msg_t *node_info_ptr)
{
- node_info_t *node_ptr;
+ node_info_t *node_ptr = NULL;
int i;
int start, temp;
- char *numeric;
-
- /* if we've initialized, just pop off all the old crusty
- * pa_systems */
+ char *numeric = NULL;
+#ifdef HAVE_BGL_FILES
+ rm_BGL_t *bgl = NULL;
+ rm_size3D_t bp_size;
+ int rc = 0;
+#endif
+ /* We only need to initialize once, so return if already done so. */
+
if (_initialized){
return;
}
- _set_bp_map();
-
best_count=BEST_COUNT_INIT;
pa_system_ptr = (pa_system_t *) xmalloc(sizeof(pa_system_t));
@@ -447,25 +468,41 @@ void pa_init(node_info_msg_t *node_info_ptr)
#ifdef HAVE_BGL_FILES
if ((DIM_SIZE[X]==0) && (DIM_SIZE[X]==0) && (DIM_SIZE[X]==0)) {
- rm_BGL_t *bgl = NULL;
- rm_size3D_t bp_size;
- rm_set_serial(BGL_SERIAL);
- rm_get_BGL(&bgl);
+ if ((rc = rm_set_serial(BGL_SERIAL)) != STATUS_OK) {
+ error("rm_set_serial(%s): %d", BGL_SERIAL, rc);
+ return;
+ }
+ if ((rc = rm_get_BGL(&bgl)) != STATUS_OK) {
+ error("rm_get_BGL(): %d", rc);
+ return;
+ }
+
if ((bgl != NULL)
- && (rm_get_data(bgl, RM_Msize, &bp_size) == STATUS_OK)) {
+ && ((rc = rm_get_data(bgl, RM_Msize, &bp_size)) == STATUS_OK)) {
DIM_SIZE[X]=bp_size.X;
DIM_SIZE[Y]=bp_size.Y;
DIM_SIZE[Z]=bp_size.Z;
+ } else {
+ error("rm_get_data(RM_Msize): %d", rc);
}
- rm_free_BGL(bgl);
+ if ((rc = rm_free_BGL(bgl)) != STATUS_OK)
+ error("rm_free_BGL(): %d", rc);
}
#endif
+
+#ifdef HAVE_BGL
if ((DIM_SIZE[X]==0) && (DIM_SIZE[X]==0) && (DIM_SIZE[X]==0)) {
- debug("Setting default system dimensions\n");
+ debug("Setting default system dimensions");
DIM_SIZE[X]=8;
DIM_SIZE[Y]=4;
DIM_SIZE[Z]=4;
}
+#else
+ if ((DIM_SIZE[X]==0) && (DIM_SIZE[X]==0) && (DIM_SIZE[X]==0)) {
+ debug("Setting default system dimensions");
+ DIM_SIZE[X]=100;
+ }
+#endif
if(!pa_system_ptr->num_of_proc)
pa_system_ptr->num_of_proc = DIM_SIZE[X] * DIM_SIZE[Y] * DIM_SIZE[Z];
@@ -473,9 +510,6 @@ void pa_init(node_info_msg_t *node_info_ptr)
_create_pa_system();
- pa_system_ptr->fill_in_value = (pa_node_t *)
- xmalloc(sizeof(pa_node_t) * pa_system_ptr->num_of_proc);
-
init_grid(node_info_ptr);
_create_config_even(pa_system_ptr->grid);
@@ -489,7 +523,7 @@ void pa_init(node_info_msg_t *node_info_ptr)
/**
* destroy all the internal (global) data structs.
*/
-void pa_fini()
+extern void pa_fini()
{
if (!_initialized){
return;
@@ -499,13 +533,13 @@ void pa_fini()
list_destroy(path);
if (best_path)
list_destroy(best_path);
-#if HAVE_BGL
+#ifdef HAVE_BGL_FILES
if (bp_map_list)
list_destroy(bp_map_list);
#endif
_delete_pa_system();
-// printf("pa system destroyed\n");
+// debug2("pa system destroyed");
}
@@ -514,19 +548,19 @@ void pa_fini()
*
* IN c: coordinate of the node to put down
*/
-void pa_set_node_down(pa_node_t *pa_node)
+extern void pa_set_node_down(pa_node_t *pa_node)
{
if (!_initialized){
error("Error, configuration not initialized, "
- "call init_configuration first\n");
+ "call init_configuration first");
return;
}
#ifdef DEBUG_PA
-#if HAVE_BGL
- debug("pa_set_node_down: node to set down: [%d%d%d]\n", pa_node->coord[X], pa_node->coord[Y], pa_node->coord[Z]);
+#ifdef HAVE_BGL
+ debug("pa_set_node_down: node to set down: [%d%d%d]", pa_node->coord[X], pa_node->coord[Y], pa_node->coord[Z]);
#else
- debug("pa_set_node_down: node to set down: [%d]\n", pa_node->coord[X]);
+ debug("pa_set_node_down: node to set down: [%d]", pa_node->coord[X]);
#endif
#endif
@@ -544,16 +578,16 @@ void pa_set_node_down(pa_node_t *pa_node)
*
* return: success or error of request
*/
-int allocate_part(pa_request_t* pa_request, List results)
+extern int allocate_part(pa_request_t* pa_request, List results)
{
if (!_initialized){
- error("allocate_part Error, configuration not initialized, call init_configuration first\n");
+ error("allocate_part Error, configuration not initialized, call init_configuration first");
return 0;
}
if (!pa_request){
- error("allocate_part Error, request not initialized\n");
+ error("allocate_part Error, request not initialized");
return 0;
}
@@ -565,11 +599,11 @@ int allocate_part(pa_request_t* pa_request, List results)
}
}
-int _reset_the_path(pa_switch_t *curr_switch, int source, int target, int dim)
+extern int _reset_the_path(pa_switch_t *curr_switch, int source, int target, int dim)
{
int *node_tar;
int port_tar;
- pa_switch_t *next_switch;
+ pa_switch_t *next_switch = NULL;
/*set the switch to not be used */
curr_switch->int_wire[source].used = 0;
port_tar = curr_switch->int_wire[source].port_tar;
@@ -582,7 +616,7 @@ int _reset_the_path(pa_switch_t *curr_switch, int source, int target, int dim)
node_tar = curr_switch->ext_wire[port_tar].node_tar;
port_tar = curr_switch->ext_wire[port_tar].port_tar;
-#if HAVE_BGL
+#ifdef HAVE_BGL
next_switch = &pa_system_ptr->
grid[node_tar[X]][node_tar[Y]][node_tar[Z]].axis_switch[dim];
#else
@@ -600,11 +634,11 @@ int _reset_the_path(pa_switch_t *curr_switch, int source, int target, int dim)
*
* returns SLURM_SUCCESS if undo was successful.
*/
-int remove_part(List nodes, int new_count)
+extern int remove_part(List nodes, int new_count)
{
int dim;
- pa_node_t* pa_node;
- pa_switch_t *curr_switch;
+ pa_node_t* pa_node = NULL;
+ pa_switch_t *curr_switch = NULL;
while((pa_node = (pa_node_t*) list_pop(nodes)) != NULL) {
pa_node->used = false;
@@ -632,12 +666,13 @@ int remove_part(List nodes, int new_count)
*
* returns SLURM_SUCCESS if undo was successful.
*/
-int alter_part(List nodes, int conn_type)
+extern int alter_part(List nodes, int conn_type)
{
int dim;
- pa_node_t* pa_node;
- pa_switch_t *curr_switch;
+ pa_node_t* pa_node = NULL;
+ pa_switch_t *curr_switch = NULL;
int size=0;
+ char *name = NULL;
ListIterator results_i;
results_i = list_iterator_create(nodes);
@@ -657,9 +692,12 @@ int alter_part(List nodes, int conn_type)
size++;
}
list_iterator_destroy(results_i);
- _set_internal_wires(nodes, size, conn_type);
-
- return 1;
+ if((name = _set_internal_wires(nodes, size, conn_type)) == NULL)
+ return SLURM_ERROR;
+ else {
+ xfree(name);
+ return SLURM_SUCCESS;
+ }
}
/**
@@ -667,24 +705,22 @@ int alter_part(List nodes, int conn_type)
* be redone to make sure correct path will be used in the real system
*
*/
-int redo_part(List nodes, int conn_type, int new_count)
+extern int redo_part(List nodes, int conn_type, int new_count)
{
int dim;
pa_node_t* pa_node;
pa_switch_t *curr_switch;
int size=0;
- char *name;
+ char *name = NULL;
ListIterator results_i;
results_i = list_iterator_create(nodes);
while ((pa_node = list_next(results_i)) != NULL) {
pa_node->used = false;
- pa_node->letter =
- pa_system_ptr->fill_in_value[new_count].letter;
-
- pa_node->color =
- pa_system_ptr->fill_in_value[new_count].color;
-
+
+ pa_node->letter = letters[new_count%62];
+ pa_node->color = colors[new_count%6];
+
for(dim=0;dim<PA_SYSTEM_DIMENSIONS;dim++) {
curr_switch = &pa_node->axis_switch[dim];
if(curr_switch->int_wire[0].used) {
@@ -699,29 +735,40 @@ int redo_part(List nodes, int conn_type, int new_count)
}
color_count++;
list_iterator_destroy(results_i);
- name = _set_internal_wires(nodes, size, conn_type);
- xfree(name);
- return 1;
+ if((name = _set_internal_wires(nodes, size, conn_type)) == NULL)
+ return SLURM_ERROR;
+ else {
+ xfree(name);
+ return SLURM_SUCCESS;
+ }
}
-int set_bgl_part(List nodes, int size, int conn_type)
+extern int set_bgl_part(List nodes, int size, int conn_type)
{
- _set_internal_wires(nodes, size, conn_type);
- return 1;
+ char *name;
+ if((name = _set_internal_wires(nodes, size, conn_type)) == NULL)
+ return SLURM_ERROR;
+ else {
+ xfree(name);
+ return SLURM_SUCCESS;
+ }
}
-int reset_pa_system()
+extern int reset_pa_system()
{
int x, y, z;
+ int coord[PA_SYSTEM_DIMENSIONS];
for (x = 0; x < DIM_SIZE[X]; x++)
for (y = 0; y < DIM_SIZE[Y]; y++)
for (z = 0; z < DIM_SIZE[Z]; z++) {
-#if HAVE_BGL
- int coord[PA_SYSTEM_DIMENSIONS] = {x,y,z};
+#ifdef HAVE_BGL
+ coord[X] = x;
+ coord[Y] = y;
+ coord[Z] = z;
_new_pa_node(&pa_system_ptr->grid[x][y][z], coord);
#else
- int coord[PA_SYSTEM_DIMENSIONS] = {x};
+ coord[X] = x;
_new_pa_node(&pa_system_ptr->grid[x], coord);
#endif
@@ -730,7 +777,7 @@ int reset_pa_system()
return 1;
}
/* init_grid - set values of every grid point */
-void init_grid(node_info_msg_t * node_info_ptr)
+extern void init_grid(node_info_msg_t * node_info_ptr)
{
node_info_t *node_ptr;
int x, i = 0;
@@ -738,7 +785,7 @@ void init_grid(node_info_msg_t * node_info_ptr)
/* For systems with more than 62 active jobs or BGL blocks,
* we just repeat letters */
-#if HAVE_BGL
+#ifdef HAVE_BGL
int y,z;
for (x = 0; x < DIM_SIZE[X]; x++)
for (y = 0; y < DIM_SIZE[Y]; y++)
@@ -803,12 +850,15 @@ void init_grid(node_info_msg_t * node_info_ptr)
return;
}
-int *find_bp_loc(char* bp_id)
+extern int *find_bp_loc(char* bp_id)
{
-#if HAVE_BGL
- pa_bp_map_t *bp_map;
+#ifdef HAVE_BGL_FILES
+ pa_bp_map_t *bp_map = NULL;
ListIterator itr;
+ if(!bp_map_list)
+ set_bp_map();
+
itr = list_iterator_create(bp_map_list);
while ((bp_map = list_next(itr)) != NULL)
if (!strcmp(bp_map->bp_id, bp_id))
@@ -819,15 +869,16 @@ int *find_bp_loc(char* bp_id)
return bp_map->coord;
else
return NULL;
+
#else
return NULL;
#endif
}
-char *find_bp_rack_mid(char* xyz)
+extern char *find_bp_rack_mid(char* xyz)
{
-#if HAVE_BGL
- pa_bp_map_t *bp_map;
+#ifdef HAVE_BGL_FILES
+ pa_bp_map_t *bp_map = NULL;
ListIterator itr;
int number;
int coord[PA_SYSTEM_DIMENSIONS];
@@ -836,18 +887,22 @@ char *find_bp_rack_mid(char* xyz)
coord[X] = number / 100;
coord[Y] = (number % 100) / 10;
coord[Z] = (number % 10);
+ if(!bp_map_list)
+ set_bp_map();
+
itr = list_iterator_create(bp_map_list);
while ((bp_map = list_next(itr)) != NULL)
if (bp_map->coord[X] == coord[X] &&
bp_map->coord[Y] == coord[Y] &&
bp_map->coord[Z] == coord[Z])
break; /* we found it */
-
+
list_iterator_destroy(itr);
if(bp_map != NULL)
return bp_map->bp_id;
else
return NULL;
+
#else
return NULL;
#endif
@@ -855,7 +910,7 @@ char *find_bp_rack_mid(char* xyz)
/********************* Local Functions *********************/
-#if HAVE_BGL
+#ifdef HAVE_BGL_FILES
static void _bp_map_list_del(void *object)
{
pa_bp_map_t *bp_map = (pa_bp_map_t *)object;
@@ -864,7 +919,8 @@ static void _bp_map_list_del(void *object)
xfree(bp_map);
}
}
-
+#endif
+#ifdef HAVE_BGL
static int _check_for_options(pa_request_t* pa_request)
{
@@ -875,7 +931,7 @@ static int _check_for_options(pa_request_t* pa_request)
if(pa_request->rotate) {
rotate_again:
- //printf("Rotating! %d\n",pa_request->rotate_count);
+ //debug("Rotating! %d",pa_request->rotate_count);
if (pa_request->rotate_count==(PA_SYSTEM_DIMENSIONS-1)) {
temp=pa_request->geometry[X];
@@ -906,7 +962,7 @@ static int _check_for_options(pa_request_t* pa_request)
}
if(pa_request->elongate) {
elongate_again:
- //printf("Elongating! %d\n",pa_request->elongate_count);
+ //debug("Elongating! %d",pa_request->elongate_count);
pa_request->rotate_count=0;
pa_request->rotate = true;
@@ -935,8 +991,8 @@ static int _check_for_options(pa_request_t* pa_request)
static int _append_geo(int *geometry, List geos, int rotate)
{
ListIterator itr;
- int *geo_ptr;
- int *geo;
+ int *geo_ptr = NULL;
+ int *geo = NULL;
int temp_geo;
int i, j;
geo = xmalloc(sizeof(int)*3);
@@ -974,18 +1030,18 @@ static int _append_geo(int *geometry, List geos, int rotate)
#endif
/** */
-#if HAVE_BGL
+#ifdef HAVE_BGL
static int _create_config_even(pa_node_t ***grid)
#else
static int _create_config_even(pa_node_t *grid)
#endif
{
int x;
- pa_node_t *source, *target_1;
+ pa_node_t *source = NULL, *target_1 = NULL;
-#if HAVE_BGL
+#ifdef HAVE_BGL
int y,z;
- pa_node_t *target_2, *target_first, *target_second;
+ pa_node_t *target_2 = NULL;
for(x=0;x<DIM_SIZE[X];x++) {
for(y=0;y<DIM_SIZE[Y];y++) {
for(z=0;z<DIM_SIZE[Z];z++) {
@@ -999,14 +1055,8 @@ static int _create_config_even(pa_node_t *grid)
target_2 = &grid[x+2][y][z];
else
target_2 = target_1;
- target_first = &grid[0][y][z];
- if (DIM_SIZE[X] > 1)
- target_second = &grid[1][y][z];
- else
- target_second = target_first;
_set_external_wires(X, x, source,
- target_1, target_2,
- target_first, target_second);
+ target_1, target_2);
if(y<(DIM_SIZE[Y]-1))
target_1 = &grid[x][y+1][z];
@@ -1014,16 +1064,14 @@ static int _create_config_even(pa_node_t *grid)
target_1 = &grid[x][0][z];
_set_external_wires(Y, y, source,
- target_1, NULL,
- NULL, NULL);
+ target_1, NULL);
if(z<(DIM_SIZE[Z]-1))
target_1 = &grid[x][y][z+1];
else
target_1 = &grid[x][y][0];
_set_external_wires(Z, z, source,
- target_1, NULL,
- NULL, NULL);
+ target_1, NULL);
}
}
}
@@ -1051,43 +1099,43 @@ static int _create_config_even(pa_node_t *grid)
target_1 = &grid[x+1];
_set_external_wires(X, x, source,
- target_1, NULL,
- NULL, NULL);
+ target_1, NULL);
}
#endif
return 1;
}
/** */
-static void _set_bp_map(void)
+extern int set_bp_map(void)
{
#ifdef HAVE_BGL_FILES
static rm_BGL_t *bgl = NULL;
int rc;
- rm_BP_t *my_bp;
- pa_bp_map_t *bp_map;
+ rm_BP_t *my_bp = NULL;
+ pa_bp_map_t *bp_map = NULL;
int bp_num, i;
- char *bp_id;
+ char *bp_id = NULL;
rm_location_t bp_loc;
bp_map_list = list_create(_bp_map_list_del);
if (!getenv("DB2INSTANCE") || !getenv("VWSPATH")) {
- error("Missing DB2INSTANCE or VWSPATH env var.\n"
- "Execute 'db2profile'\n");
- return;
+ error("Missing DB2INSTANCE or VWSPATH env var."
+ "Execute 'db2profile'");
+ return -1;
}
if ((rc = rm_set_serial(BGL_SERIAL)) != STATUS_OK) {
- error("rm_set_serial(): %d\n", rc);
- return;
+ error("rm_set_serial(): %d", rc);
+ return -1;
}
if ((rc = rm_get_BGL(&bgl)) != STATUS_OK) {
- error("rm_get_BGL(): %d\n", rc);
- return;
+ error("rm_get_BGL(): %d", rc);
+ return -1;
}
if ((rc = rm_get_data(bgl, RM_BPNum, &bp_num)) != STATUS_OK) {
+ error("rm_get_data(RM_BPNum): %d", rc);
bp_num = 0;
}
@@ -1096,11 +1144,13 @@ static void _set_bp_map(void)
if (i) {
if ((rc = rm_get_data(bgl, RM_NextBP, &my_bp))
!= STATUS_OK) {
+ error("rm_get_data(RM_NextBP): %d", rc);
break;
}
} else {
if ((rc = rm_get_data(bgl, RM_FirstBP, &my_bp))
!= STATUS_OK) {
+ error("rm_get_data(RM_FirstBP): %d", rc);
break;
}
}
@@ -1109,12 +1159,15 @@ static void _set_bp_map(void)
if ((rc = rm_get_data(my_bp, RM_BPID, &bp_id))
!= STATUS_OK) {
+ xfree(bp_map);
+ error("rm_get_data(RM_BPID): %d", rc);
continue;
}
if ((rc = rm_get_data(my_bp, RM_BPLoc, &bp_loc))
!= STATUS_OK) {
xfree(bp_map);
+ error("rm_get_data(RM_BPLoc): %d", rc);
continue;
}
@@ -1126,10 +1179,11 @@ static void _set_bp_map(void)
list_push(bp_map_list, bp_map);
}
- rm_free_BGL(bgl);
-#else
- return;
+ if ((rc = rm_free_BGL(bgl)) != STATUS_OK)
+ error("rm_free_BGL(): %s", rc);
+
#endif
+ return 1;
}
@@ -1155,8 +1209,9 @@ static void _new_pa_node(pa_node_t *pa_node, int *coord)
static void _create_pa_system(void)
{
int x;
-
-#if HAVE_BGL
+ int coord[PA_SYSTEM_DIMENSIONS];
+
+#ifdef HAVE_BGL
int y,z;
pa_system_ptr->grid = (pa_node_t***)
xmalloc(sizeof(pa_node_t**) * DIM_SIZE[X]);
@@ -1165,48 +1220,50 @@ static void _create_pa_system(void)
xmalloc(sizeof(pa_node_t) * DIM_SIZE[X]);
#endif
for (x=0; x<DIM_SIZE[X]; x++) {
-#if HAVE_BGL
+#ifdef HAVE_BGL
pa_system_ptr->grid[x] = (pa_node_t**)
xmalloc(sizeof(pa_node_t*) * DIM_SIZE[Y]);
for (y=0; y<DIM_SIZE[Y]; y++) {
pa_system_ptr->grid[x][y] = (pa_node_t*)
xmalloc(sizeof(pa_node_t) * DIM_SIZE[Z]);
for (z=0; z<DIM_SIZE[Z]; z++){
- int coord[PA_SYSTEM_DIMENSIONS] = {x,y,z};
+ coord[X] = x;
+ coord[Y] = y;
+ coord[Z] = z;
_new_pa_node(&pa_system_ptr->grid[x][y][z], coord);
}
}
#else
- int coord[PA_SYSTEM_DIMENSIONS] = {x};
+ coord[X] = x;
_new_pa_node(&pa_system_ptr->grid[x], coord);
#endif
}
- pa_system_ptr->fill_in_value = (pa_node_t *)
- xmalloc(sizeof(pa_node_t) * pa_system_ptr->num_of_proc);
-
}
/** */
static void _delete_pa_system(void)
{
-
+#ifdef HAVE_BGL
+ int x=0;
+ int y;
+#endif
if (!pa_system_ptr){
return;
}
-#if HAVE_BGL
- int x;
- for (x=0; x<DIM_SIZE[X]; x++) {
- int y;
- for (y=0; y<DIM_SIZE[Y]; y++)
- xfree(pa_system_ptr->grid[x][y]);
+ if(pa_system_ptr->grid) {
+#ifdef HAVE_BGL
+ for (x=0; x<DIM_SIZE[X]; x++) {
+ for (y=0; y<DIM_SIZE[Y]; y++)
+ xfree(pa_system_ptr->grid[x][y]);
+
+ xfree(pa_system_ptr->grid[x]);
+ }
+#endif
- xfree(pa_system_ptr->grid[x]);
+
+ xfree(pa_system_ptr->grid);
}
-#endif
-
- xfree(pa_system_ptr->grid);
- xfree(pa_system_ptr->fill_in_value);
xfree(pa_system_ptr);
}
@@ -1217,18 +1274,18 @@ static int _find_match(pa_request_t *pa_request, List results)
{
int x=0;
int *geometry = pa_request->geometry;
-#if HAVE_BGL
+#ifdef HAVE_BGL
int y=0, z=0;
int start[PA_SYSTEM_DIMENSIONS] = {0,0,0};
int find[PA_SYSTEM_DIMENSIONS] = {0,0,0};
#else
int find[PA_SYSTEM_DIMENSIONS] = {0};
#endif
- pa_node_t* pa_node;
+ pa_node_t* pa_node = NULL;
int found_one=0;
char *name=NULL;
-#if HAVE_BGL
+#ifdef HAVE_BGL
if(geometry[X]>DIM_SIZE[X]
|| geometry[Y]>DIM_SIZE[Y]
|| geometry[Z]>DIM_SIZE[Z])
@@ -1334,7 +1391,7 @@ static int _find_match(pa_request_t *pa_request, List results)
else
return 0;
} else {
- debug("couldn't find it 2\n");
+ debug("couldn't find it 2");
return 0;
}
@@ -1373,7 +1430,7 @@ static int _find_match(pa_request_t *pa_request, List results)
else
return 0;
} else {
- debug("couldn't find it 2\n");
+ debug("couldn't find it 2");
return 0;
}
@@ -1387,7 +1444,7 @@ static int _find_match(pa_request_t *pa_request, List results)