Skip to content

Commit

Permalink
PHP.wasm: yield 0 bytes read on fd_read failure to improve PHP's frea…
Browse files Browse the repository at this point in the history
…d() and feof() behavior (#1053)

Ensures that PHP.wasm indicates that `feof($fp) === true` when reading
from pipes exposed by `proc_open()` after the process have exited and
all the data have been read.

[Internally in
stdiop_read](https://github.com/php/php-src/blob/718a8b4278cf811919906045d3bea4092e8a0844/main/streams/plain_wrapper.c#L426-L453),
PHP expects the read() syscall to return 0 while indicating that the
number of bytes read was also 0. This PR makes the PHP.wasm polling
logic comply with those expectations.

## Remaining work

Rebuild all the PHP versions.

## Testing instructions

Confirm the CI checks for PHP 8.2 are green.
  • Loading branch information
adamziel committed Feb 26, 2024
1 parent e2734f5 commit 97ca132
Show file tree
Hide file tree
Showing 56 changed files with 696 additions and 460 deletions.
141 changes: 79 additions & 62 deletions packages/php-wasm/compile/php/phpwasm-emscripten-library.js
Original file line number Diff line number Diff line change
Expand Up @@ -722,74 +722,91 @@ const LibraryExample = {
* @see https://github.com/emscripten-core/emscripten/issues/13214
*/
js_fd_read: function (fd, iov, iovcnt, pnum) {
var returnCode;
var stream;
try {
stream = SYSCALLS.getStreamFromFD(fd);
var num = doReadv(stream, iov, iovcnt);
HEAPU32[pnum >> 2] = num;
returnCode = 0;
} catch (e) {
if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e;
returnCode = e.errno;
// Only run the read operation on a regular call,
// never when rewinding the stack.
if (Asyncify.state === Asyncify.State.Normal) {
var returnCode;
var stream;
let num = 0;
try {
stream = SYSCALLS.getStreamFromFD(fd);
const num = doReadv(stream, iov, iovcnt);
HEAPU32[pnum >> 2] = num;
return 0;
} catch (e) {
// Rethrow any unexpected non-filesystem errors.
if (typeof FS == "undefined" || !(e.name === "ErrnoError")) {
throw e;
}
// Only return synchronously if this isn't an asynchronous pipe.
// Error code 6 indicates EWOULDBLOCK – this is our signal to wait.
// We also need to distinguish between a process pipe and a file pipe, otherwise
// reading from an empty file would block until the timeout.
if (e.errno !== 6 || !(stream?.fd in PHPWASM.child_proc_by_fd)) {
// On failure, yield 0 bytes read to indicate EOF.
HEAPU32[pnum >> 2] = 0;
return returnCode
}
}
}

// If it's a blocking process pipe, wait for it to become readable.
// We need to distinguish between a process pipe and a file pipe, otherwise
// reading from an empty file would block until the timeout.
if (
returnCode === 6 /*EWOULDBLOCK*/ &&
stream?.fd in PHPWASM.child_proc_by_fd
) {
// You might wonder why we duplicate the code here instead of always using
// Asyncify.handleSleep(). The reason is performance. Most of the time,
// the read operation will work synchronously and won't require yielding
// back to JS. In these cases we don't want to pay the Asyncify overhead,
// save the stack, yield back to JS, restore the stack etc.
return Asyncify.handleSleep(function (wakeUp) {
var retries = 0;
var interval = 50;
var timeout = 5000;
// We poll for data and give up after a timeout.
// We can't simply rely on PHP timeout here because we don't want
// to, say, block the entire PHPUnit test suite without any visible
// feedback.
var maxRetries = timeout / interval;
function poll() {
var returnCode;
var stream;
try {
stream = SYSCALLS.getStreamFromFD(fd);
var num = doReadv(stream, iov, iovcnt);
HEAPU32[pnum >> 2] = num;
returnCode = 0;
} catch (e) {
if (
typeof FS == 'undefined' ||
!(e.name === 'ErrnoError')
) {
console.error(e);
throw e;
}
returnCode = e.errno;
}

// At this point we know we have to poll.
// You might wonder why we duplicate the code here instead of always using
// Asyncify.handleSleep(). The reason is performance. Most of the time,
// the read operation will work synchronously and won't require yielding
// back to JS. In these cases we don't want to pay the Asyncify overhead,
// save the stack, yield back to JS, restore the stack etc.
return Asyncify.handleSleep(function (wakeUp) {
var retries = 0;
var interval = 50;
var timeout = 5000;
// We poll for data and give up after a timeout.
// We can't simply rely on PHP timeout here because we don't want
// to, say, block the entire PHPUnit test suite without any visible
// feedback.
var maxRetries = timeout / interval;
function poll() {
var returnCode;
var stream;
let num;
try {
stream = SYSCALLS.getStreamFromFD(fd);
num = doReadv(stream, iov, iovcnt);
returnCode = 0;
} catch (e) {
if (
returnCode !== 6 ||
++retries > maxRetries ||
!(fd in PHPWASM.child_proc_by_fd) ||
PHPWASM.child_proc_by_fd[fd]?.exited ||
FS.isClosed(stream)
typeof FS == 'undefined' ||
!(e.name === 'ErrnoError')
) {
wakeUp(returnCode);
} else {
setTimeout(poll, interval);
console.error(e);
throw e;
}
returnCode = e.errno;
}
poll();
});
}
return returnCode;

const success = returnCode === 0;
const failure = (
++retries > maxRetries ||
!(fd in PHPWASM.child_proc_by_fd) ||
PHPWASM.child_proc_by_fd[fd]?.exited ||
FS.isClosed(stream)
);

if (success) {
HEAPU32[pnum >> 2] = num;
wakeUp(0);
} else if (failure) {
// On failure, yield 0 bytes read to indicate EOF.
HEAPU32[pnum >> 2] = 0;
// If the failure is due to a timeout, return 0 to indicate that we
// reached EOF. Otherwise, propagate the error code.
wakeUp(returnCode === 6 ? 0 : returnCode);
} else {
setTimeout(poll, interval);
}
}
poll();
});
},

/**
Expand Down

0 comments on commit 97ca132

Please sign in to comment.