Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Tag of v1.1.18

  • Loading branch information...
commit 97cb396aa102e76e838b78a7daca2b921f928099 2 parents 20ac2ec + dbcfe18
Morris Jette authored November 07, 2006
2  META
@@ -9,7 +9,7 @@
9 9
   Name:		slurm
10 10
   Major:	1
11 11
   Minor:	1
12  
-  Micro:        18
  12
+  Micro:	18
13 13
   Version:	1.1.18
14 14
   Release:	1
15 15
   API_CURRENT:	10	
9  NEWS
@@ -12,8 +12,13 @@ documents those changes that are of interest to users and admins.
12 12
    passthrough nodes to the allocation when creating a block. 
13 13
  - BLUEGENE - Fix deadlock issue with starting and failing jobs at the same
14 14
    time
15  
- - Make connect() non-blocking and poll to avoid possibly very long default 
16  
-   timeout.
  15
+ - Make connect() non-blocking and poll() with timeout to avoid huge 
  16
+   waits under some conditions.
  17
+ - Set "ENVIRONMENT=BATCH" environment variable for "srun --batch" jobs only.
  18
+ - Add logic to save/restore select/cons_res state information.
  19
+ - BLUEGENE - make all sprintf's into snprintf's
  20
+ - Fix timeout calculation to work correctly for fan out.
  21
+ - Fix for "srun -A" segfault on a node failure.
17 22
 
18 23
 * Changes in SLURM 1.1.17
19 24
 =========================
38  doc/html/faq.shtml
@@ -41,21 +41,29 @@ are not responding even if they are not in any partition?</a></li>
41 41
 
42 42
 <h2>For Users</h2>
43 43
 <p><a name="comp"><b>1. Why is my job/node in COMPLETING state?</b></a><br>
44  
-When a job is terminating, both the job and its nodes enter the state &quot;completing.&quot; 
  44
+When a job is terminating, both the job and its nodes enter the COMPLETING state. 
45 45
 As the SLURM daemon on each node determines that all processes associated with 
46  
-the job have terminated, that node changes state to &quot;idle&quot; or some other 
47  
-appropriate state. When every node allocated to a job has determined that all 
48  
-processes associated with it have terminated, the job changes state to &quot;completed&quot; 
49  
-or some other appropriate state. Normally, this happens within a fraction of one 
50  
-second. However, if the job has processes that cannot be terminated with a SIGKILL, 
51  
-the job and one or more nodes can remain in the completing state for an extended 
52  
-period of time. This may be indicative of processes hung waiting for a core file 
53  
-to complete I/O or operating system failure. If this state persists, the system 
54  
-administrator should use the <span class="commandline">scontrol</span> command 
55  
-to change the node's state to DOWN (e.g. &quot;scontrol update 
56  
-NodeName=<i>name</i> State=DOWN Reason=hung_completing&quot;), reboot the node, 
57  
-then reset the node's state to IDLE (e.g. &quot;scontrol update 
58  
-NodeName=<i>name</i> State=RESUME&quot;).</p>
  46
+the job have terminated, that node changes state to IDLE or some other appropriate 
  47
+state. 
  48
+When every node allocated to a job has determined that all processes associated 
  49
+with it have terminated, the job changes state to COMPLETED or some other 
  50
+appropriate state (e.g. FAILED). 
  51
+Normally, this happens within a second. 
  52
+However, if the job has processes that cannot be terminated with a SIGKILL
  53
+signal, the job and one or more nodes can remain in the COMPLETING state 
  54
+for an extended period of time. 
  55
+This may be indicative of processes hung waiting for a core file 
  56
+to complete I/O or operating system failure. 
  57
+If this state persists, the system administrator should check for processes 
  58
+associated with the job that can not be terminated then use the 
  59
+<span class="commandline">scontrol</span> command to change the node's 
  60
+state to DOWN (e.g. &quot;scontrol update NodeName=<i>name</i> State=DOWN Reason=hung_completing&quot;), 
  61
+reboot the node, then reset the node's state to IDLE 
  62
+(e.g. &quot;scontrol update NodeName=<i>name</i> State=RESUME&quot;).
  63
+Note that setting the node DOWN will terminate all running or suspended 
  64
+jobs associated with that node. 
  65
+An alternative is to set the node's state to DRAIN until all jobs 
  66
+associated with it terminate before setting it DOWN and re-booting.</p>
59 67
 
60 68
 <p><a name="rlimit"><b>2. Why do I see the error &quot;Can't propagate RLIMIT_...&quot;?</b></a><br>
61 69
 When the <span class="commandline">srun</span> command executes, it captures the 
@@ -342,6 +350,6 @@ partition. You can control the frequency of this ping with the
342 350
  
343 351
 <p class="footer"><a href="#top">top</a></p>
344 352
 
345  
-<p style="text-align:center;">Last modified 24 August 2006</p>
  353
+<p style="text-align:center;">Last modified 31 October 2006</p>
346 354
 
347 355
 <!--#include virtual="footer.txt"-->
13  src/common/forward.c
@@ -83,11 +83,12 @@ void *_forward_thread(void *arg)
83 83
 	/* figure out where we are in the tree and set the timeout for
84 84
 	   to wait for our childern correctly (timeout+1 sec per step)
85 85
 	   to let the child timeout */
86  
-	steps = (fwd_msg->header.forward.cnt+1)/slurm_get_tree_width();
87  
-	fwd_msg->timeout = (1000*steps);
88  
-	steps++;
89  
-	fwd_msg->timeout += (start_timeout*steps);
90  
-	
  86
+	if(fwd_msg->header.forward.cnt>0) {
  87
+		steps = (fwd_msg->header.forward.cnt+1)/slurm_get_tree_width();
  88
+		fwd_msg->timeout = (5000*steps);
  89
+		steps++;
  90
+		fwd_msg->timeout += (start_timeout*steps);
  91
+	}
91 92
 /* 	info("sending to %s with %d forwards", */
92 93
 /* 	     fwd_msg->node_name, fwd_msg->header.forward.cnt); */
93 94
 	if ((fd = slurm_open_msg_conn(&fwd_msg->addr)) < 0) {
@@ -594,6 +595,7 @@ extern int forward_set(forward_t *forward,
594 595
 /* 	info("forwarding to %s",name); */
595 596
 	
596 597
 	if(span > 0) {
  598
+		span++;
597 599
 		forward->addr = xmalloc(sizeof(slurm_addr) * span);
598 600
 		forward->name = xmalloc(sizeof(char) 
599 601
 					* (MAX_SLURM_NAME * span));
@@ -666,6 +668,7 @@ extern int forward_set_launch(forward_t *forward,
666 668
 /* 	info("forwarding to %s",name); */
667 669
 	
668 670
 	if(span > 0) {
  671
+		span++;
669 672
 		forward->addr = xmalloc(sizeof(slurm_addr) * span);
670 673
 		forward->name = 
671 674
 			xmalloc(sizeof(char) * (MAX_SLURM_NAME * span));
2  src/common/slurm_errno.c
@@ -132,7 +132,7 @@ static slurm_errtab_t slurm_errtab[] = {
132 132
 	{ ESLURM_PATHNAME_TOO_LONG,
133 133
 	  "Pathname of a file or directory too long"   		},
134 134
 	{ ESLURM_NOT_TOP_PRIORITY,
135  
-	  "Immediate execution impossible, higher priority jobs pending" },
  135
+	  "Immediate execution impossible, insufficient priority" },
136 136
 	{ ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE,
137 137
 	  "Requested node configuration is not available"	},
138 138
 	{ ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE,
16  src/common/slurm_protocol_api.c
@@ -748,7 +748,7 @@ List slurm_receive_msg(slurm_fd fd, slurm_msg_t *msg, int timeout)
748 748
 		      timeout);
749 749
 	} 
750 750
 	
751  
-
  751
+	//info("The timeout is %d", timeout);
752 752
 	/*
753 753
 	 * Receive a msg. slurm_msg_recvfrom() will read the message
754 754
 	 *  length and allocate space on the heap for a buffer containing
@@ -1363,6 +1363,12 @@ _send_and_recv_msg(slurm_fd fd, slurm_msg_t *req,
1363 1363
 	List ret_list = NULL;
1364 1364
 	int steps = 0;
1365 1365
 	resp->auth_cred = NULL;
  1366
+	if (!req->forward.timeout) {
  1367
+		if(!timeout)
  1368
+			timeout = SLURM_MESSAGE_TIMEOUT_MSEC_STATIC;
  1369
+		req->forward.timeout = timeout;
  1370
+	}
  1371
+
1366 1372
 	if(slurm_send_node_msg(fd, req) >= 0) {
1367 1373
 		
1368 1374
 		if(req->forward.cnt>0) {
@@ -1370,9 +1376,9 @@ _send_and_recv_msg(slurm_fd fd, slurm_msg_t *req,
1370 1376
 			   the timeout for to wait for our childern
1371 1377
 			   correctly (timeout+1 sec per step)
1372 1378
 			   to let the child timeout */
1373  
-	
  1379
+			
1374 1380
 			steps = req->forward.cnt/slurm_get_tree_width();
1375  
-			timeout = (1000*steps);
  1381
+			timeout = (5000*steps);
1376 1382
 			steps++;
1377 1383
 			timeout += (req->forward.timeout*steps);
1378 1384
 		}
@@ -1660,11 +1666,11 @@ List slurm_send_recv_rc_packed_msg(slurm_msg_t *msg, int timeout)
1660 1666
 		if(msg->forward.cnt>0) {
1661 1667
 			/* figure out where we are in the tree and set
1662 1668
 			   the timeout for to wait for our childern
1663  
-			   correctly (timeout+1 sec per step)
  1669
+			   correctly (timeout+5 sec per step)
1664 1670
 			   to let the child timeout */
1665 1671
 			
1666 1672
 			steps = msg->forward.cnt/slurm_get_tree_width();
1667  
-			timeout = (1000*steps);
  1673
+			timeout = (5000*steps);
1668 1674
 			steps++;
1669 1675
 			timeout += (msg->forward.timeout*steps);
1670 1676
 		}
21  src/common/slurm_protocol_socket_implementation.c
@@ -564,6 +564,9 @@ extern int _slurm_getsockname (int __fd, struct sockaddr * __addr,
564 564
 extern int _slurm_connect (int __fd, struct sockaddr const * __addr, 
565 565
                                 socklen_t __len)
566 566
 {
  567
+#if 0
  568
+	return connect ( __fd , __addr , __len ) ;
  569
+#else
567 570
 	/* From "man connect": Note that for IP sockets the timeout
568 571
 	 * may be very long when syncookies are enabled on the server.
569 572
 	 *
@@ -574,18 +577,22 @@ extern int _slurm_connect (int __fd, struct sockaddr const * __addr,
574 577
 
575 578
 	flags = fcntl(__fd, F_GETFL);
576 579
 	fcntl(__fd, F_SETFL, flags | O_NONBLOCK);
577  
-	rc = connect ( __fd , __addr , __len ) ;
578  
-	if ((rc == -1)
579  
-	&&  ((errno == EINPROGRESS) || (errno == EALREADY))) {
  580
+	rc = connect(__fd , __addr , __len);
  581
+	if ((rc == -1) && (errno == EINPROGRESS)) {
  582
+		int poll_rc;
580 583
 		struct pollfd ufds;
581 584
 		ufds.fd = __fd;
582  
-		ufds.events = POLLOUT;
583  
-		ufds. revents = 0;
584  
-		poll(&ufds, 1, 5000);   /* 5 sec max wait */
585  
-		rc = connect ( __fd , __addr , __len ) ;
  585
+		ufds.events = POLLIN | POLLOUT;
  586
+		ufds.revents = 0;
  587
+		poll_rc = poll(&ufds, 1, 5000);
  588
+		if (poll_rc == 0)
  589
+                        errno = ETIMEDOUT;
  590
+		else if (poll_rc == 1)
  591
+			rc = 0;
586 592
 	}
587 593
 	fcntl(__fd, F_SETFL, flags);
588 594
 	return rc;
  595
+#endif
589 596
 }
590 597
 
591 598
 /* Put the address of the peer connected to socket FD into *ADDR
189  src/plugins/mpi/mvapich/mvapich.c
@@ -15,7 +15,7 @@
15 15
  *  any later version.
16 16
  *
17 17
  *  In addition, as a special exception, the copyright holders give permission 
18  
- *  to link the code of portions of this program with the OpenSSL library under 
  18
+ *  to link the code of portions of this program with the OpenSSL library under
19 19
  *  certain conditions as described in each individual source file, and 
20 20
  *  distribute linked combinations including the two. You must obey the GNU 
21 21
  *  General Public License in all respects for all of the code used other than 
@@ -43,11 +43,14 @@
43 43
 #  include <pthread.h>
44 44
 #endif
45 45
 
  46
+#include <stdlib.h>
46 47
 #include <signal.h>
47 48
 #include <sys/types.h>
48 49
 #include <sys/socket.h>
49 50
 #include <netinet/in.h>
50 51
 #include <strings.h>
  52
+#include <sys/poll.h>
  53
+#include <sys/time.h>
51 54
 
52 55
 #include "src/common/xmalloc.h"
53 56
 #include "src/common/xstring.h"
@@ -82,6 +85,7 @@ struct mvapich_args {
82 85
  */
83 86
 struct mvapich_info
84 87
 {
  88
+	int do_poll;          
85 89
 	int fd;             /* fd for socket connection to MPI task  */
86 90
 	int rank;           /* This process' MPI rank                */
87 91
 	int pidlen;         /* length of pid buffer                  */
@@ -109,8 +113,23 @@ static int  mvapich_fd       = -1;
109 113
 static int  nprocs           = -1;
110 114
 static int  protocol_version = -1;
111 115
 static int  v5_phase         = 0;
  116
+static int  connect_once     = 1;
  117
+static int  mvapich_verbose  = 0;
  118
+static int  do_timing        = 0;
112 119
 
113 120
 
  121
+#define mvapich_debug(args...) \
  122
+	do { \
  123
+		if (mvapich_verbose) \
  124
+			info ("mvapich: " args); \
  125
+	} while (0);
  126
+
  127
+#define mvapich_debug2(args...) \
  128
+	do { \
  129
+		if (mvapich_verbose > 1) \
  130
+			info ("mvapich: " args); \
  131
+	} while (0);
  132
+
114 133
 static struct mvapich_info * mvapich_info_create (void)
115 134
 {
116 135
 	struct mvapich_info *mvi = xmalloc (sizeof (*mvi));
@@ -151,28 +170,32 @@ static int mvapich_get_task_info (struct mvapich_info *mvi)
151 170
 {
152 171
 	int fd = mvi->fd;
153 172
 
154  
-	if (fd_read_n (fd, &mvi->addrlen, sizeof (int)) < 0)
  173
+	if (fd_read_n (fd, &mvi->addrlen, sizeof (int)) <= 0)
155 174
 		return error ("mvapich: Unable to read addrlen for rank %d: %m", 
156 175
 				mvi->rank);
157 176
 
158 177
 	mvi->addr = xmalloc (mvi->addrlen);
159 178
 
160  
-	if (fd_read_n (fd, mvi->addr, mvi->addrlen) < 0)
  179
+	if (fd_read_n (fd, mvi->addr, mvi->addrlen) <= 0)
161 180
 		return error ("mvapich: Unable to read addr info for rank %d: %m", 
162 181
 				mvi->rank);
163 182
 
164 183
 	if (!mvapich_requires_pids ())
165 184
 		return (0);
166 185
 
167  
-	if (fd_read_n (fd, &mvi->pidlen, sizeof (int)) < 0)
  186
+	if (fd_read_n (fd, &mvi->pidlen, sizeof (int)) <= 0)
168 187
 		return error ("mvapich: Unable to read pidlen for rank %d: %m", 
169 188
 				mvi->rank);
170 189
 
  190
+
171 191
 	mvi->pid = xmalloc (mvi->pidlen);
172 192
 
173  
-	if (fd_read_n (fd, mvi->pid, mvi->pidlen) < 0)
  193
+	if (fd_read_n (fd, mvi->pid, mvi->pidlen) <= 0)
174 194
 		return error ("mvapich: Unable to read pid for rank %d: %m", mvi->rank);
175 195
 
  196
+
  197
+	mvi->do_poll = 0;
  198
+
176 199
 	return (0);
177 200
 }
178 201
 
@@ -311,9 +334,15 @@ static void mvapich_bcast_hostids (void)
311 334
 
312 335
 	for (i = 0; i < nprocs; i++) {
313 336
 		struct mvapich_info *mvi = mvarray [i];
  337
+		int co, rc;
314 338
 		if (fd_write_n (mvi->fd, hostids, len) < 0)
315 339
 			error ("mvapich: write hostid rank %d: %m", mvi->rank);
316  
-		close (mvi->fd);
  340
+
  341
+		if ((rc = fd_read_n (mvi->fd, &co, sizeof (int))) <= 0) {
  342
+			close (mvi->fd);
  343
+			connect_once = 0;
  344
+		} else
  345
+			mvi->do_poll = 1;
317 346
 	}
318 347
 
319 348
 	xfree (hostids);
@@ -351,7 +380,7 @@ static void mvapich_barrier (void)
351 380
 	for (i = 0; i < nprocs; i++) {
352 381
 		m = mvarray[i];
353 382
 		if (fd_write_n (m->fd, &i, sizeof (i)) == -1)
354  
-			error("mvapich write on barrier");
  383
+			error("mvapich: write on barrier: %m");
355 384
 		close (m->fd);
356 385
 		m->fd = -1;
357 386
 	}
@@ -368,7 +397,7 @@ static void mvapich_print_abort_message (slurm_step_layout_t *sl, int rank)
368 397
 		return;
369 398
 	}
370 399
 
371  
-	host = step_layout_host_name (sl, step_layout_host_id (sl, rank));
  400
+	host = step_layout_host_name (sl, rank);
372 401
 
373 402
 	info ("mvapich: Received ABORT message from MPI rank %d [on %s]", 
374 403
 	      rank, host);
@@ -431,26 +460,120 @@ static void mvapich_mvarray_destroy (void)
431 460
 	xfree (mvarray);
432 461
 }
433 462
 
  463
+static int mvapich_rank_from_fd (int fd)
  464
+{
  465
+	int rank = 0;
  466
+	while (mvarray[rank]->fd != fd)
  467
+		rank++;
  468
+	return (rank);
  469
+}
  470
+
434 471
 static int mvapich_handle_connection (int fd)
435 472
 {
436 473
 	int version, rank;
437 474
 
438  
-	if (mvapich_get_task_header (fd, &version, &rank) < 0)
439  
-		return (-1);
  475
+	if (v5_phase == 0 || !connect_once) {
  476
+		if (mvapich_get_task_header (fd, &version, &rank) < 0)
  477
+			return (-1);
440 478
 
441  
-	if (rank > nprocs - 1) 
442  
-		return (error ("mvapich: task reported invalid rank (%d)", rank));
  479
+		if (rank > nprocs - 1) 
  480
+			return (error ("mvapich: task reported invalid rank (%d)", rank));
  481
+	}
  482
+	else {
  483
+		rank = mvapich_rank_from_fd (fd);
  484
+	}
443 485
 
444  
-	if (mvapich_handle_task (fd, mvarray [rank]) < 0)
  486
+	if (mvapich_handle_task (fd, mvarray [rank]) < 0) 
445 487
 		return (-1);
446 488
 
447 489
 	return (0);
448 490
 }
449 491
 
  492
+static int poll_mvapich_fds (void)
  493
+{
  494
+	int i = 0;
  495
+	int j = 0;
  496
+	int rc;
  497
+	int fd;
  498
+	int nfds = 0;
  499
+	struct pollfd *fds = xmalloc (nprocs * sizeof (struct pollfd));
  500
+
  501
+	for (i = 0; i < nprocs; i++) {
  502
+		if (mvarray[i]->do_poll) {
  503
+			fds[j].fd = mvarray[i]->fd;
  504
+			fds[j].events = POLLIN;
  505
+			j++;
  506
+			nfds++;
  507
+		}
  508
+	}
  509
+
  510
+	mvapich_debug2 ("Going to poll %d fds", nfds);
  511
+	if ((rc = poll (fds, nfds, -1)) < 0) 
  512
+		return (error ("mvapich: poll: %m"));
  513
+
  514
+	i = 0;
  515
+	while (fds[i].revents != POLLIN)
  516
+		i++;
  517
+
  518
+	fd = fds[i].fd;
  519
+	xfree (fds);
  520
+
  521
+	return (fd);
  522
+}
  523
+
  524
+static int mvapich_get_next_connection (int listenfd)
  525
+{
  526
+	slurm_addr addr;
  527
+	int fd;
  528
+
  529
+	if (connect_once && v5_phase > 0) {
  530
+		return (poll_mvapich_fds ());
  531
+	} 
  532
+		
  533
+	if ((fd = slurm_accept_msg_conn (mvapich_fd, &addr)) < 0) {
  534
+		error ("mvapich: accept: %m");
  535
+		return (-1);
  536
+	}
  537
+	mvapich_debug2 ("accept() = %d", fd);
  538
+
  539
+	return (fd);
  540
+}
  541
+
  542
+static void do_timings (void)
  543
+{
  544
+	static int initialized = 0;
  545
+	static struct timeval initv = { 0, 0 };
  546
+	struct timeval tv;
  547
+	struct timeval result;
  548
+
  549
+	if (!do_timing)
  550
+		return;
  551
+
  552
+	if (!initialized) {
  553
+		if (gettimeofday (&initv, NULL) < 0)
  554
+			error ("mvapich: do_timings(): gettimeofday(): %m\n");
  555
+		initialized = 1;
  556
+		return;
  557
+	}
  558
+
  559
+	if (gettimeofday (&tv, NULL) < 0) {
  560
+		error ("mvapich: do_timings(): gettimeofday(): %m\n");
  561
+		return;
  562
+	}
  563
+
  564
+	timersub (&tv, &initv, &result);
  565
+
  566
+	info ("mvapich: Intialization took %d.%03d seconds", result.tv_sec,
  567
+			result.tv_usec/1000);
  568
+
  569
+	return;
  570
+}
  571
+
450 572
 static void *mvapich_thr(void *arg)
451 573
 {
452 574
 	srun_job_t *job = arg;
453 575
 	int i = 0;
  576
+	int first = 1;
454 577
 
455 578
 	debug ("mvapich-0.9.x/gen2: thread started: %ld", pthread_self ());
456 579
 
@@ -459,20 +582,26 @@ static void *mvapich_thr(void *arg)
459 582
 again:
460 583
 	i = 0;
461 584
 	while (i < nprocs) {
462  
-		slurm_addr addr;
463 585
 		int fd;
464 586
 		
465  
-		if ((fd = slurm_accept_msg_conn (mvapich_fd, &addr)) < 0) {
  587
+		if ((fd = mvapich_get_next_connection (mvapich_fd)) < 0) {
466 588
 			error ("mvapich: accept: %m");
467 589
 			goto fail;
468 590
 		}
469 591
 
  592
+		if (first) {
  593
+			mvapich_debug ("first task checked in");
  594
+			do_timings ();
  595
+			first = 0;
  596
+		}
  597
+
470 598
 		if (mvapich_handle_connection (fd) < 0) 
471 599
 			goto fail;
472 600
 
473 601
 		i++;
474 602
 	}
475 603
 
  604
+	mvapich_debug ("bcasting mvapich info to %d tasks", nprocs);
476 605
 	mvapich_bcast ();
477 606
 
478 607
 	if (protocol_version == 5 && v5_phase == 0) {
@@ -480,7 +609,11 @@ static void *mvapich_thr(void *arg)
480 609
 		goto again;
481 610
 	}
482 611
 
  612
+	mvapich_debug ("calling mvapich_barrier");
483 613
 	mvapich_barrier ();
  614
+	mvapich_debug ("all tasks have checked in");
  615
+
  616
+	do_timings ();
484 617
 
485 618
 	mvapich_wait_for_abort (job);
486 619
 
@@ -494,16 +627,38 @@ static void *mvapich_thr(void *arg)
494 627
 	return (void *)0;
495 628
 }
496 629
 
  630
+static int process_environment (void)
  631
+{
  632
+	char *val;
  633
+
  634
+	if (getenv ("MVAPICH_CONNECT_TWICE"))
  635
+		connect_once = 0;
  636
+
  637
+	if ((val = getenv ("SLURM_MVAPICH_DEBUG"))) {
  638
+		int level = atoi (val);
  639
+		if (level > 0)
  640
+			mvapich_verbose = level;
  641
+	}
  642
+
  643
+	if (getenv ("SLURM_MVAPICH_TIMING"))
  644
+		do_timing = 1;
  645
+
  646
+	return (0);
  647
+}
  648
+
497 649
 extern int mvapich_thr_create(srun_job_t *job)
498 650
 {
499 651
 	int port;
500 652
 	pthread_attr_t attr;
501 653
 	pthread_t tid;
502 654
 
  655
+	if (process_environment () < 0)
  656
+		return error ("mvapich: Failed to read environment settings\n");
  657
+
503 658
 	nprocs = opt.nprocs;
504 659
 
505 660
 	if (net_stream_listen(&mvapich_fd, &port) < 0)
506  
-		error ("Unable to create ib listen port: %m");
  661
+		return error ("Unable to create ib listen port: %m");
507 662
 
508 663
 	/*
509 664
 	 * Accept in a separate thread.
@@ -520,6 +675,8 @@ extern int mvapich_thr_create(srun_job_t *job)
520 675
 	setenvf (NULL, "MPIRUN_PORT",   "%d", ntohs (port));
521 676
 	setenvf (NULL, "MPIRUN_NPROCS", "%d", nprocs);
522 677
 	setenvf (NULL, "MPIRUN_ID",     "%d", job->jobid);
  678
+	if (connect_once)
  679
+		setenvf (NULL, "MPIRUN_CONNECT_ONCE", "1");
523 680
 
524 681
 	verbose ("mvapich-0.9.[45] master listening on port %d", ntohs (port));
525 682
 
2  src/plugins/sched/wiki2/get_jobs.c
@@ -299,7 +299,7 @@ static uint32_t	_get_job_time_limit(struct job_record *job_ptr)
299 299
 	uint32_t limit = job_ptr->time_limit;
300 300
 
301 301
 	if ((limit == NO_VAL) || (limit == INFINITE))
302  
-		return (uint32_t) 0;
  302
+		return (uint32_t) (365 * 24 * 60 * 60);	/* one year */
303 303
 	else
304 304
 		return (limit * 60);	/* seconds, not minutes */
305 305
 }
32  src/plugins/select/bluegene/block_allocator/block_allocator.c
@@ -2444,6 +2444,15 @@ static int _reset_the_path(ba_switch_t *curr_switch, int source,
2444 2444
 	int *node_curr;
2445 2445
 	int port_tar, port_tar1;
2446 2446
 	ba_switch_t *next_switch = NULL; 
  2447
+
  2448
+	if(source < 0 || source > NUM_PORTS_PER_NODE) {
  2449
+		fatal("source port was %d can only be 0->%d",
  2450
+		      source, NUM_PORTS_PER_NODE);
  2451
+	}
  2452
+	if(target < 0 || target > NUM_PORTS_PER_NODE) {
  2453
+		fatal("target port was %d can only be 0->%d",
  2454
+		      target, NUM_PORTS_PER_NODE);
  2455
+	}
2447 2456
 	/*set the switch to not be used */
2448 2457
 	if(!curr_switch->int_wire[source].used) {
2449 2458
 		debug("I reached the end, the source isn't used");
@@ -2451,6 +2460,11 @@ static int _reset_the_path(ba_switch_t *curr_switch, int source,
2451 2460
 	}
2452 2461
 	curr_switch->int_wire[source].used = 0;
2453 2462
 	port_tar = curr_switch->int_wire[source].port_tar;
  2463
+	if(port_tar < 0 || port_tar > NUM_PORTS_PER_NODE) {
  2464
+		fatal("port_tar port was %d can only be 0->%d",
  2465
+		      source, NUM_PORTS_PER_NODE);
  2466
+	}
  2467
+	
2454 2468
 	port_tar1 = port_tar;
2455 2469
 	curr_switch->int_wire[source].port_tar = source;
2456 2470
 	curr_switch->int_wire[port_tar].used = 0;
@@ -3182,22 +3196,22 @@ static char *_set_internal_wires(List nodes, int size, int conn_type)
3182 3196
 	int count=0, i, set=0;
3183 3197
 	int *start = NULL;
3184 3198
 	int *end = NULL;
3185  
-	char *name = xmalloc(sizeof(char)*BUFSIZE);
  3199
+	char *name = xmalloc(BUFSIZE);
3186 3200
 	ListIterator itr;
3187 3201
 	hostlist_t hostlist = hostlist_create(NULL);
3188  
-	
  3202
+	char temp_name[4];
  3203
+
3189 3204
 	if(!nodes)
3190 3205
 		return NULL;
3191 3206
 	itr = list_iterator_create(nodes);
3192 3207
 	while((ba_node[count] = (ba_node_t*) list_next(itr))) {
3193  
-		snprintf(name, BUFSIZE, "%d%d%d", 
3194  
-			ba_node[count]->coord[X],
3195  
-			ba_node[count]->coord[Y],
3196  
-			ba_node[count]->coord[Z]);
3197  
-		debug3("name = %s",name);
  3208
+		snprintf(temp_name, sizeof(temp_name), "%d%d%d", 
  3209
+			 ba_node[count]->coord[X],
  3210
+			 ba_node[count]->coord[Y],
  3211
+			 ba_node[count]->coord[Z]);
  3212
+		debug3("name = %s", temp_name);
3198 3213
 		count++;
3199  
-		hostlist_push(hostlist, name);
3200  
-				
  3214
+		hostlist_push(hostlist, temp_name);
3201 3215
 	}
3202 3216
 	list_iterator_destroy(itr);
3203 3217
 		
23  src/plugins/select/bluegene/plugin/bg_job_place.c
@@ -234,14 +234,14 @@ static int _find_best_block_match(struct job_record* job_ptr,
234 234
 				tmp_record->bg_block_list =
235 235
 					list_create(destroy_ba_node);
236 236
 				slurm_conf_lock();
237  
-				tmp_record->nodes = 
238  
-					xmalloc(sizeof(char)*
239  
-						(len + strlen(slurmctld_conf.
240  
-							      node_prefix)+1));
  237
+				len += strlen(slurmctld_conf.node_prefix)+1;
  238
+				tmp_record->nodes = xmalloc(len);
241 239
 				
242  
-				sprintf(tmp_record->nodes, "%s%s", 
243  
-					slurmctld_conf.node_prefix, 
244  
-					tmp_nodes+i);
  240
+				snprintf(tmp_record->nodes,
  241
+					 len,
  242
+					 "%s%s", 
  243
+					 slurmctld_conf.node_prefix, 
  244
+					 tmp_nodes+i);
245 245
 				slurm_conf_unlock();
246 246
 			
247 247
 				process_nodes(tmp_record);
@@ -472,7 +472,7 @@ static int _find_best_block_match(struct job_record* job_ptr,
472 472
 				goto try_again;
473 473
 			}
474 474
 		}
475  
-		format_node_name(*found_bg_record, tmp_char);
  475
+		format_node_name(*found_bg_record, tmp_char, sizeof(tmp_char));
476 476
 	
477 477
 		debug("_find_best_block_match %s <%s>", 
478 478
 			(*found_bg_record)->bg_block_id, 
@@ -540,14 +540,15 @@ static int _find_best_block_match(struct job_record* job_ptr,
540 540
 			slurm_mutex_unlock(&request_list_mutex);
541 541
 		
542 542
 			slurm_conf_lock();
543  
-			sprintf(tmp_char, "%s%s", 
544  
-				slurmctld_conf.node_prefix, request.save_name);
  543
+			snprintf(tmp_char, sizeof(tmp_char), "%s%s", 
  544
+				 slurmctld_conf.node_prefix,
  545
+				 request.save_name);
545 546
 			slurm_conf_unlock();
546 547
 			if (node_name2bitmap(tmp_char, 
547 548
 					     false, 
548 549
 					     &tmp_bitmap)) {
549 550
 				fatal("Unable to convert nodes %s to bitmap", 
550  
-				      request.save_name);
  551
+				      tmp_char);
551 552
 			}
552 553
 			
553 554
 			bit_and(slurm_block_bitmap, tmp_bitmap);
9  src/plugins/select/bluegene/plugin/block_sys.c
@@ -573,10 +573,11 @@ int read_bg_blocks()
573 573
 			free(bpid);
574 574
 
575 575
 			slurm_conf_lock();
576  
-			sprintf(node_name_tmp, 
577  
-				"%s%d%d%d", 
578  
-				slurmctld_conf.node_prefix,
579  
-				coord[X], coord[Y], coord[Z]);
  576
+			snprintf(node_name_tmp, 
  577
+				 sizeof(node_name_tmp),
  578
+				 "%s%d%d%d", 
  579
+				 slurmctld_conf.node_prefix,
  580
+				 coord[X], coord[Y], coord[Z]);
580 581
 			slurm_conf_unlock();
581 582
 			
582 583
 			hostlist_push(hostlist, node_name_tmp);
139  src/plugins/select/bluegene/plugin/bluegene.c
@@ -221,7 +221,7 @@ extern void print_bg_record(bg_record_t* bg_record)
221 221
 		info("\tbitmap: %s", bitstring);
222 222
 	}
223 223
 #else
224  
-	format_node_name(bg_record, tmp_char);
  224
+	format_node_name(bg_record, tmp_char, sizeof(tmp_char));
225 225
 	info("Record: BlockID:%s Nodes:%s Conn:%s",
226 226
 	     bg_record->bg_block_id, tmp_char,
227 227
 	     convert_conn_type(bg_record->conn_type));
@@ -405,7 +405,7 @@ extern void process_nodes(bg_record_t *bg_record)
405 405
 	if (node_name2bitmap(bg_record->nodes, 
406 406
 			     false, 
407 407
 			     &bg_record->bitmap)) {
408  
-		fatal("Unable to convert nodes %s to bitmap", 
  408
+		fatal("1 Unable to convert nodes %s to bitmap", 
409 409
 		      bg_record->nodes);
410 410
 	}
411 411
 #endif
@@ -614,21 +614,23 @@ extern void drain_as_needed(bg_record_t *bg_record, char *reason)
614 614
 	return;
615 615
 }
616 616
 
617  
-extern int format_node_name(bg_record_t *bg_record, char tmp_char[])
  617
+extern int format_node_name(bg_record_t *bg_record, char *buf, int buf_size)
618 618
 {
619 619
 	if(bg_record->quarter != (uint16_t)NO_VAL) {
620 620
 		if(bg_record->nodecard != (uint16_t)NO_VAL) {
621  
-			sprintf(tmp_char,"%s.%d.%d",
622  
-				bg_record->nodes,
623  
-				bg_record->quarter,
624  
-				bg_record->nodecard);
  621
+			snprintf(buf, buf_size,
  622
+				 "%s.%d.%d",
  623
+				 bg_record->nodes,
  624
+				 bg_record->quarter,
  625
+				 bg_record->nodecard);
625 626
 		} else {
626  
-			sprintf(tmp_char,"%s.%d",
627  
-				bg_record->nodes,
628  
-				bg_record->quarter);
  627
+			snprintf(buf, buf_size,
  628
+				 "%s.%d",
  629
+				 bg_record->nodes,
  630
+				 bg_record->quarter);
629 631
 		}
630 632
 	} else {
631  
-		sprintf(tmp_char,"%s",bg_record->nodes);
  633
+		snprintf(buf, buf_size, "%s", bg_record->nodes);
632 634
 	}
633 635
 	return SLURM_SUCCESS;
634 636
 }
@@ -1039,9 +1041,9 @@ extern int create_defined_blocks(bg_layout_t overlapped)
1039 1041
 				}
1040 1042
 #else
1041 1043
 				if (!bg_record->bg_block_id) { 
1042  
-					bg_record->bg_block_id = 
1043  
-						xmalloc(sizeof(char)*8);
1044  
-					snprintf(bg_record->bg_block_id, 8, 
  1044
+					bg_record->bg_block_id = xmalloc(8);
  1045
+					snprintf(bg_record->bg_block_id,
  1046
+						 8,
1045 1047
 						 "RMP%d", 
1046 1048
 						 block_inx++);
1047 1049
 				}
@@ -1300,13 +1302,15 @@ extern int create_full_system_block(int *block_inx)
1300 1302
 	geo[Z] = max_dim[Z];
1301 1303
 #endif
1302 1304
 	slurm_conf_lock();
1303  
-	name = xmalloc(sizeof(char)*(10+strlen(slurmctld_conf.node_prefix)));
  1305
+	i = (10+strlen(slurmctld_conf.node_prefix));
  1306
+	name = xmalloc(i);
1304 1307
 	if((geo[X] == 0) && (geo[Y] == 0) && (geo[Z] == 0))
1305  
-		sprintf(name, "%s000", slurmctld_conf.node_prefix);
  1308
+		snprintf(name, i, "%s000",
  1309
+			 slurmctld_conf.node_prefix);
1306 1310
 	else
1307  
-		sprintf(name, "%s[000x%d%d%d]",
1308  
-			slurmctld_conf.node_prefix,
1309  
-			geo[X], geo[Y], geo[Z]);
  1311
+		snprintf(name, i, "%s[000x%d%d%d]",
  1312
+			 slurmctld_conf.node_prefix,
  1313
+			 geo[X], geo[Y], geo[Z]);
1310 1314
 	slurm_conf_unlock();
1311 1315
 			
1312 1316
 	if(bg_found_block_list) {
@@ -1397,9 +1401,9 @@ extern int create_full_system_block(int *block_inx)
1397 1401
 	}
1398 1402
 #else
1399 1403
 	if (!bg_record->bg_block_id) { 
1400  
-		bg_record->bg_block_id = 
1401  
-			xmalloc(sizeof(char)*8);
1402  
-		snprintf(bg_record->bg_block_id, 8, "RMP%d", (*block_inx)++);
  1404
+		bg_record->bg_block_id = xmalloc(8);
  1405
+		snprintf(bg_record->bg_block_id, 8,
  1406
+			 "RMP%d", (*block_inx)++);
1403 1407
 	}
1404 1408
 #endif	/* HAVE_BG_FILES */
1405 1409
 	print_bg_record(bg_record);
@@ -1927,9 +1931,10 @@ static int _addto_node_list(bg_record_t *bg_record, int *start, int *end)
1927 1931
 		for (y = start[Y]; y <= end[Y]; y++) {
1928 1932
 			for (z = start[Z]; z <= end[Z]; z++) {
1929 1933
 				slurm_conf_lock();
1930  
-				sprintf(node_name_tmp, "%s%d%d%d", 
1931  
-					slurmctld_conf.node_prefix,
1932  
-					x, y, z);		
  1934
+				snprintf(node_name_tmp, sizeof(node_name_tmp),
  1935
+					 "%s%d%d%d", 
  1936
+					 slurmctld_conf.node_prefix,
  1937
+					 x, y, z);		
1933 1938
 				slurm_conf_unlock();
1934 1939
 				ba_node = ba_copy_node(
1935 1940
 					&ba_system_ptr->grid[x][y][z]);
@@ -2058,7 +2063,8 @@ static int _validate_config_nodes(void)
2058 2063
 		}
2059 2064
 			
2060 2065
 		if (!bg_record->bg_block_id) {
2061  
-			format_node_name(bg_record, tmp_char);	
  2066
+			format_node_name(bg_record, tmp_char,
  2067
+					 sizeof(tmp_char));
2062 2068
 			info("Block found in bluegene.conf to be "
2063 2069
 			     "created: Nodes:%s", 
2064 2070
 			     tmp_char);
@@ -2068,7 +2074,8 @@ static int _validate_config_nodes(void)
2068 2074
 				full_created = 1;
2069 2075
 
2070 2076
 			list_push(bg_found_block_list, bg_record);
2071  
-			format_node_name(bg_record, tmp_char);
  2077
+			format_node_name(bg_record, tmp_char,
  2078
+					 sizeof(tmp_char));
2072 2079
 			info("Existing: BlockID:%s Nodes:%s Conn:%s",
2073 2080
 			     bg_record->bg_block_id, 
2074 2081
 			     tmp_char,
@@ -2090,7 +2097,7 @@ static int _validate_config_nodes(void)
2090 2097
 		copy_bg_record(full_system_bg_record, bg_record);
2091 2098
 		list_push(bg_list, bg_record);
2092 2099
 		list_push(bg_found_block_list, bg_record);
2093  
-		format_node_name(bg_record, tmp_char);
  2100
+		format_node_name(bg_record, tmp_char, sizeof(tmp_char));
2094 2101
 		info("Existing: BlockID:%s Nodes:%s Conn:%s",
2095 2102
 		     bg_record->bg_block_id, 
2096 2103
 		     tmp_char,
@@ -2305,8 +2312,9 @@ static int _add_block_db(bg_record_t *bg_record, int *block_inx)
2305 2312
 		return SLURM_ERROR;
2306 2313
 	}
2307 2314
 #else
2308  
-	bg_record->bg_block_id = xmalloc(sizeof(char)*8);
2309  
-	snprintf(bg_record->bg_block_id, 8, "RMP%d", 
  2315
+	bg_record->bg_block_id = xmalloc(8);
  2316
+	snprintf(bg_record->bg_block_id, 8,
  2317
+		 "RMP%d", 
2310 2318
 		 (*block_inx)++);
2311 2319
 #endif
2312 2320
 	return SLURM_SUCCESS;
@@ -2428,11 +2436,13 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list,
2428 2436
 			debug2("found it here %s, %s",
2429 2437
 			       bg_record->bg_block_id,
2430 2438
 			       bg_record->nodes);
2431  
-			request->save_name = xmalloc(sizeof(char) * 4);
2432  
-			sprintf(request->save_name, "%d%d%d",
2433  
-				bg_record->start[X],
2434  
-				bg_record->start[Y],
2435  
-				bg_record->start[Z]);
  2439
+			request->save_name = xmalloc(4);
  2440
+			snprintf(request->save_name,
  2441
+				 4,
  2442
+				 "%d%d%d",
  2443
+				 bg_record->start[X],
  2444
+				 bg_record->start[Y],
  2445
+				 bg_record->start[Z]);
2436 2446
 			rc = SLURM_SUCCESS;
2437 2447
 			goto finished;
2438 2448
 		}
@@ -2448,11 +2458,13 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list,
2448 2458
 			debug2("1 got %d on quarter %d",
2449 2459
 			       total_proc_cnt, last_quarter);
2450 2460
 			if(total_proc_cnt == request->procs) {
2451  
-				request->save_name = xmalloc(sizeof(char) * 4);
2452  
-				sprintf(request->save_name, "%d%d%d",
2453  
-					bg_record->start[X],
2454  
-					bg_record->start[Y],
2455  
-					bg_record->start[Z]);
  2461
+				request->save_name = xmalloc(4);
  2462
+				snprintf(request->save_name, 
  2463
+					 4,
  2464
+					 "%d%d%d",
  2465
+					 bg_record->start[X],
  2466
+					 bg_record->start[Y],
  2467
+					 bg_record->start[Z]);
2456 2468
 				if(!my_block_list) {
2457 2469
 					rc = SLURM_SUCCESS;
2458 2470
 					goto finished;	
@@ -2515,11 +2527,13 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list,
2515 2527
 			debug2("found it here %s, %s",
2516 2528
 			       bg_record->bg_block_id,
2517 2529
 			       bg_record->nodes);
2518  
-			request->save_name = xmalloc(sizeof(char) * 4);
2519  
-			sprintf(request->save_name, "%d%d%d",
2520  
-				bg_record->start[X],
2521  
-				bg_record->start[Y],
2522  
-				bg_record->start[Z]);
  2530
+			request->save_name = xmalloc(4);
  2531
+			snprintf(request->save_name,
  2532
+				 4,
  2533
+				 "%d%d%d",
  2534
+				 bg_record->start[X],
  2535
+				 bg_record->start[Y],
  2536
+				 bg_record->start[Z]);
2523 2537
 			rc = SLURM_SUCCESS;
2524 2538
 			goto finished;
2525 2539
 		} 
@@ -2536,11 +2550,13 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list,
2536 2550
 			debug2("got %d on quarter %d",
2537 2551
 			       total_proc_cnt, last_quarter);
2538 2552
 			if(total_proc_cnt == request->procs) {
2539  
-				request->save_name = xmalloc(sizeof(char) * 4);
2540  
-				sprintf(request->save_name, "%d%d%d",
2541  
-					bg_record->start[X],
2542  
-					bg_record->start[Y],
2543  
-					bg_record->start[Z]);
  2553
+				request->save_name = xmalloc(4);
  2554
+				snprintf(request->save_name,
  2555
+					 4,
  2556
+					 "%d%d%d",
  2557
+					 bg_record->start[X],
  2558
+					 bg_record->start[Y],
  2559
+					 bg_record->start[Z]);
2544 2560
 				if(!my_block_list) {
2545 2561
 					rc = SLURM_SUCCESS;
2546 2562
 					goto finished;	
@@ -2567,16 +2583,18 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list,
2567 2583
 	}
2568 2584
 found_one:
2569 2585
 	if(bg_record) {
2570  
-		format_node_name(bg_record, tmp_char);
  2586
+		format_node_name(bg_record, tmp_char, sizeof(tmp_char));
2571 2587
 			
2572 2588
 		debug2("going to split %s, %s",
2573 2589
 		       bg_record->bg_block_id,
2574 2590
 		       tmp_char);
2575  
-		request->save_name = xmalloc(sizeof(char) * 4);
2576  
-		sprintf(request->save_name, "%d%d%d",
2577  
-			bg_record->start[X],
2578  
-			bg_record->start[Y],
2579  
-			bg_record->start[Z]);
  2591
+		request->save_name = xmalloc(4);
  2592
+		snprintf(request->save_name, 
  2593
+			 4,
  2594
+			 "%d%d%d",
  2595
+			 bg_record->start[X],
  2596
+			 bg_record->start[Y],
  2597
+			 bg_record->start[Z]);
2580 2598
 		if(!my_block_list) {
2581 2599
 			rc = SLURM_SUCCESS;
2582 2600
 			goto finished;	
@@ -2673,12 +2691,9 @@ static int _add_bg_record(List records, List used_nodes, blockreq_t *blockreq)
2673 2691
 	if(i<len) {
2674 2692
 		len -= i;
2675 2693
 		slurm_conf_lock();
2676  
-		bg_record->nodes = xmalloc(sizeof(char)*
2677  
-					   (len
2678  
-					    +strlen(slurmctld_conf.node_prefix)
2679  
-					    +1));
2680  
-		
2681  
-		sprintf(bg_record->nodes, "%s%s", 
  2694
+		len += strlen(slurmctld_conf.node_prefix)+1;
  2695
+		bg_record->nodes = xmalloc(len);
  2696
+		snprintf(bg_record->nodes, len, "%s%s", 
2682 2697
 			slurmctld_conf.node_prefix, blockreq->block+i);
2683 2698
 		slurm_conf_unlock();
2684 2699
 			
2  src/plugins/select/bluegene/plugin/bluegene.h
@@ -188,7 +188,7 @@ extern bg_record_t *find_bg_record_in_list(List my_list, char *bg_block_id);
188 188
 */
189 189
 extern int update_block_user(bg_record_t *bg_block_id, int set); 
190 190
 extern void drain_as_needed(bg_record_t *bg_record, char *reason);
191  
-extern int format_node_name(bg_record_t *bg_record, char tmp_char[]);
  191
+extern int format_node_name(bg_record_t *bg_record, char *buf, int buf_size);
192 192
 extern bool blocks_overlap(bg_record_t *rec_a, bg_record_t *rec_b);
193 193
 
194 194
 
2  src/plugins/select/bluegene/plugin/opts.c
@@ -16,7 +16,7 @@
16 16
  *  any later version.
17 17
  *
18 18
  *  In addition, as a special exception, the copyright holders give permission 
19  
- *  to link the code of portions of this program with the OpenSSL library under 
  19
+ *  to link the code of portions of this program with the OpenSSL library under
20 20
  *  certain conditions as described in each individual source file, and 
21 21
  *  distribute linked combinations including the two. You must obey the GNU 
22 22
  *  General Public License in all respects for all of the code used other than 
6  src/plugins/select/bluegene/plugin/select_bluegene.c
@@ -468,7 +468,8 @@ extern int select_p_alter_node_cnt(enum select_node_cnt type, void *data)
468 468
 	
469 469
 	switch (type) {
470 470
 	case SELECT_GET_NODE_SCALING:
471  
-		(*nodes) = bluegene_bp_node_cnt;
  471
+		if((*nodes) != INFINITE)
  472
+			(*nodes) = bluegene_bp_node_cnt;
472 473
 		break;
473 474
 	case SELECT_APPLY_NODE_MIN_OFFSET:
474 475
 		if((*nodes) == 1) {
@@ -480,7 +481,8 @@ extern int select_p_alter_node_cnt(enum select_node_cnt type, void *data)
480 481
 		(*nodes) *= bluegene_bp_node_cnt;
481 482
 		break;
482 483
 	case SELECT_APPLY_NODE_MAX_OFFSET:
483  
-		(*nodes) *= bluegene_bp_node_cnt;
  484
+		if((*nodes) != INFINITE)
  485
+			(*nodes) *= bluegene_bp_node_cnt;
484 486
 		break;
485 487
 	case SELECT_SET_NODE_CNT:
486 488
 		select_g_get_jobinfo(job_desc->select_jobinfo,
386  src/plugins/select/cons_res/select_cons_res.c
@@ -102,6 +102,7 @@
102 102
 #  endif
103 103
 #endif
104 104
 
  105
+#include <fcntl.h>
105 106
 #include <stdio.h>
106 107
 #include <slurm/slurm.h>
107 108
 #include <slurm/slurm_errno.h>
@@ -149,12 +150,14 @@ const char plugin_name[] =
149 150
     "Consumable Resources (CR) Node Selection plugin";
150 151
 const char plugin_type[] = "select/cons_res";
151 152
 const uint32_t plugin_version = 90;
  153
+const uint32_t pstate_version = 1;	/* version control on saved state */
152 154
 
153 155
 /* node_cr_record keeps track of the resources within a node which 
154 156
  * have been reserved by already scheduled jobs. 
155 157
  */
156 158
 struct node_cr_record {
157 159
 	struct node_record *node_ptr;	/* ptr to the node that own these resources */
  160
+	char *name;		/* reference copy of node_ptr name */
158 161
 	uint32_t used_cpus;	/* cpu count reserved by already scheduled jobs */
159 162
 	struct node_cr_record *node_next;/* next entry with same hash index */
160 163
 };
@@ -164,18 +167,22 @@ struct node_cr_record {
164 167
 struct select_cr_job {
165 168
 	uint32_t job_id;	/* job ID, default set by SLURM        */
166 169
 	uint16_t state;		/* job state information               */
167  
-	int nprocs;		/* --nprocs=n,      -n n               */
168  
-	int nhosts;		/* number of hosts allocated to job    */
  170
+	int32_t nprocs;		/* --nprocs=n,      -n n               */
  171
+	int32_t nhosts;		/* number of hosts allocated to job    */
169 172
 	char **host;		/* hostname vector                     */
170  
-	int *cpus;		/* number of processors on each host   */
171  
-	int *ntask;		/* number of tasks to run on each host */
  173
+	int32_t *cpus;		/* number of processors on each host   */
  174
+	int32_t *ntask;		/* number of tasks to run on each host */
172 175
 	bitstr_t *node_bitmap;	/* bitmap of nodes allocated to job    */
173 176
 };
174 177
 
175  
-static struct node_cr_record *select_node_ptr; /* Array of node_cr_record. One
176  
-						  entry for each node in the cluster */
177  
-static int select_node_cnt;
178  
-static struct node_cr_record **cr_node_hash_table = NULL; /* node_cr_record hash table */
  178
+/* Array of node_cr_record. One entry for each node in the cluster */
  179
+static struct node_cr_record *select_node_ptr = NULL;
  180
+static int select_node_cnt = 0;
  181
+static struct node_cr_record **cr_node_hash_table = NULL;
  182
+
  183
+/* Restored node_cr_records - used by select_p_state_restore/node_init */
  184
+static struct node_cr_record *prev_select_node_ptr = NULL;
  185
+static int prev_select_node_cnt = 0;
179 186
 
180 187
 static uint16_t select_fast_schedule;
181 188
 
@@ -362,9 +369,18 @@ static void _cr_exclusive_dist(struct select_cr_job *job)
362 369
 		job->ntask[i] = job->cpus[i];
363 370
 }
364 371
 
  372
+/* xfree an array of node_cr_record */
  373
+static void _xfree_select_nodes(struct node_cr_record *ptr, int select_node_cnt)
  374
+{
  375
+        xfree(ptr);
  376
+}
  377
+
365 378
 /* xfree a select_cr_job job */
366 379
 static void _xfree_select_cr_job(struct select_cr_job *job)
367 380
 {
  381
+	if (job == NULL)
  382
+		return;
  383
+
368 384
 	xfree(job->host);
369 385
 	xfree(job->cpus);
370 386
 	xfree(job->ntask);
@@ -380,6 +396,10 @@ static void _clear_job_list(void)