Skip to content

Commit

Permalink
merged revision 5773 from 4.0-fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
knielson committed Feb 17, 2012
1 parent 7eaa2e0 commit 90fe3d5
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 11 deletions.
12 changes: 8 additions & 4 deletions README.new_in_4.0
Expand Up @@ -102,11 +102,15 @@ default it is located in the /var/spool/torque/server_priv directory. The file u
syntax. By the time TORQUE 4.0 is released this will be an XML compatible syntax. For now
they syntax is as follows:

<path>
<level> comma separated node list </level>
<level> comma separated node list </level>
<path attr=val>
<level attr=val> comma separated node list </level>
<level attr=val> comma separated node list </level>
...
</path>
</path attr=val>
<path attr=val>
<level attr=val> comma separated node list </level>
...
</path attr=val>
...

The <path> </path> tag pair identifies a group of compute nodes. The <level></level> tag pair
Expand Down
52 changes: 45 additions & 7 deletions src/resmom/mom_comm.c
Expand Up @@ -8775,6 +8775,7 @@ void fork_demux(
struct sigaction act;
struct routefd *routem;
int open_sockets = 0;
int amt_read = 0;

maxfd = sysconf(_SC_OPEN_MAX);

Expand Down Expand Up @@ -8802,6 +8803,7 @@ void fork_demux(
if (im_mom_stdout == -1)
{
fprintf(stderr, "could not dup stdout in fork_demux");
free(routem);
return;
}
close(pjob->ji_im_stdout);
Expand All @@ -8810,6 +8812,7 @@ void fork_demux(
if (im_mom_stdout == -1)
{
fprintf(stderr, "could not dup stdout in fork_demux");
free(routem);
return;
}
close(pjob->ji_im_stderr);
Expand All @@ -8835,15 +8838,17 @@ void fork_demux(
pjob->ji_qs.ji_jobid);

log_err(-1, id, log_buffer);

close(im_mom_stdout);
close(im_mom_stderr);
_exit(5);
}

ret = getaddrinfo(momhost, NULL, NULL, &res);
if (ret)
{
fprintf(stderr,"get addrinfo failed in im_demux_thread: %d\n", ret);

close(im_mom_stdout);
close(im_mom_stderr);
_exit(5);
}

Expand All @@ -8868,14 +8873,16 @@ void fork_demux(
if (listen(im_mom_stdout, TORQUE_LISTENQUEUE) < 0)
{
perror("listen on out");

close(im_mom_stdout);
close(im_mom_stderr);
_exit(5);
}

if (listen(im_mom_stderr, TORQUE_LISTENQUEUE) < 0)
{
perror("listen on err");

close(im_mom_stdout);
close(im_mom_stderr);
_exit(5);
}

Expand All @@ -8888,13 +8895,26 @@ void fork_demux(
if (fd1 >= 0)
break;

usleep(500000);
retries++;
} while(retries < 10);

if(retries >= 10)
{
perror("could not open demux to parent");
close(im_mom_stdout);
close(im_mom_stderr);
_exit(5);
}


fd2 = open_demux(htonl(ipaddr), pjob->ji_porterr);
if (fd2 < 0)
{
perror("cannot open mux stderr port");
close(im_mom_stdout);
close(im_mom_stderr);
close(fd1);
_exit(5);
}

Expand All @@ -8915,7 +8935,10 @@ void fork_demux(
else
{
perror("fork_demux: select failed\n");

close(im_mom_stdout);
close(im_mom_stderr);
close(fd1);
close(fd2);
_exit(1);
}
}
Expand Down Expand Up @@ -8952,6 +8975,10 @@ void fork_demux(
if (newsock < 0)
{
perror("accept failed");
close(fd1);
close(fd2);
close(im_mom_stdout);
close(im_mom_stderr);
_exit(5);
}

Expand All @@ -8965,11 +8992,22 @@ void fork_demux(

case new_out:

readit(i, fd1);
amt_read = readit(i, fd1);
if(amt_read <= 0)
{
routem[i].r_fd = -1;
routem[i].r_which = invalid;
}
break;

case new_err:

readit(i, fd2);
amt_read = readit(i, fd2);
if(amt_read <= 0)
{
routem[i].r_fd = -1;
routem[i].r_which = invalid;
}
break;

default:
Expand Down

0 comments on commit 90fe3d5

Please sign in to comment.