diff --git a/ChangeLog.md b/ChangeLog.md index 001d770cd71d1..ed064b7895ea7 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -24,6 +24,8 @@ See docs/process.md for more on how version tagging works. `--proxy-to-worker` flag) was removed due to lack of usage. If you were depending on this feature but missed the PSA, please let us know about your use case. (#25645, #25440) +- The fetch library now supports streaming data requests when + `-sFETCH_STREAMING` is enabled. 4.0.20 - 11/18/25 ----------------- diff --git a/site/source/docs/tools_reference/settings_reference.rst b/site/source/docs/tools_reference/settings_reference.rst index 5524c30e2741f..79d6f7501093e 100644 --- a/site/source/docs/tools_reference/settings_reference.rst +++ b/site/source/docs/tools_reference/settings_reference.rst @@ -2785,6 +2785,26 @@ If nonzero, enables emscripten_fetch API. Default value: false +.. _fetch_streaming: + +FETCH_STREAMING +=============== + +Enables streaming fetched data when the fetch attribute +EMSCRIPTEN_FETCH_STREAM_DATA is used. For streaming requests, the DOM Fetch +API is used otherwise XMLHttpRequest is used. +Both modes generally support the same API, but there are some key +differences: + + - XHR supports synchronous requests + - XHR supports overriding mime types + - Fetch supports streaming data using the 'onprogress' callback + +If set to a value of 2, only the DOM Fetch backend will be used. This should +only be used in testing. + +Default value: 0 + .. _wasmfs: WASMFS diff --git a/src/Fetch.js b/src/Fetch.js index 03203d0f53af5..75555c6ffada8 100644 --- a/src/Fetch.js +++ b/src/Fetch.js @@ -4,6 +4,243 @@ * SPDX-License-Identifier: MIT */ +#if FETCH_STREAMING +/** + * A class that mimics the XMLHttpRequest API using the modern Fetch API. + * This implementation is specifically tailored to only handle 'arraybuffer' + * responses. + */ +// TODO Use a regular class name when #5840 is fixed. +var FetchXHR = class { + // --- Public XHR Properties --- + + // Event Handlers + onload = null; + onerror = null; + onprogress = null; + onreadystatechange = null; + ontimeout = null; + + // Request Configuration + responseType = 'arraybuffer'; + withCredentials = false; + timeout = 0; // Standard XHR timeout property + + // Response / State Properties + readyState = 0; // 0: UNSENT + response = null; + responseURL = ''; + status = 0; + statusText = ''; + + // --- Internal Properties --- + _method = ''; + _url = ''; + _headers = {}; + _abortController = null; + _aborted = false; + _responseHeaders = null; + + // --- Private state management --- + _changeReadyState(state) { + this.readyState = state; + this.onreadystatechange?.(); + } + + // --- Public XHR Methods --- + + /** + * Initializes a request. + * @param {string} method The HTTP request method (e.g., 'GET', 'POST'). + * @param {string} url The URL to send the request to. + * @param {boolean} [async=true] This parameter is ignored as Fetch is always async. + * @param {string|null} [user=null] The username for basic authentication. + * @param {string|null} [password=null] The password for basic authentication. + */ + open(method, url, async = true, user = null, password = null) { + if (this.readyState !== 0 && this.readyState !== 4) { + console.warn("FetchXHR.open() called while a request is in progress."); + this.abort(); + } + + // Reset internal state for the new request + this._method = method; + this._url = url; + this._headers = {}; + this._responseHeaders = null; + + // The async parameter is part of the XHR API but is an error here because + // the Fetch API is inherently asynchronous and does not support synchronous requests. + if (!async) { + throw new Error("FetchXHR does not support synchronous requests."); + } + + // Handle Basic Authentication if user/password are provided. + // This creates a base64-encoded string and sets the Authorization header. + if (user) { + const credentials = btoa(`${user}:${password || ''}`); + this._headers['Authorization'] = `Basic ${credentials}`; + } + + this._changeReadyState(1); // 1: OPENED + } + + /** + * Sets the value of an HTTP request header. + * @param {string} header The name of the header. + * @param {string} value The value of the header. + */ + setRequestHeader(header, value) { + if (this.readyState !== 1) { + throw new Error('setRequestHeader can only be called when state is OPENED.'); + } + this._headers[header] = value; + } + + /** + * This method is not effectively implemented because Fetch API relies on the + * server's Content-Type header and does not support overriding the MIME type + * on the client side in the same way as XHR. + * @param {string} mimetype The MIME type to use. + */ + overrideMimeType(mimetype) { + throw new Error("overrideMimeType is not supported by the Fetch API and has no effect."); + } + + /** + * Returns a string containing all the response headers, separated by CRLF. + * @returns {string} The response headers. + */ + getAllResponseHeaders() { + if (!this._responseHeaders) { + return ''; + } + + let headersString = ''; + // The Headers object is iterable. + for (const [key, value] of this._responseHeaders.entries()) { + headersString += `${key}: ${value}\r\n`; + } + return headersString; + } + + /** + * Sends the request. + * @param body The body of the request. + */ + async send(body = null) { + if (this.readyState !== 1) { + throw new Error('send() can only be called when state is OPENED.'); + } + + this._abortController = new AbortController(); + const signal = this._abortController.signal; + + // Handle timeout + let timeoutID; + if (this.timeout > 0) { + timeoutID = setTimeout( + () => this._abortController.abort(new DOMException('The user aborted a request.', 'TimeoutError')), + this.timeout + ); + } + + const fetchOptions = { + method: this._method, + headers: this._headers, + body: body, + signal: signal, + credentials: this.withCredentials ? 'include' : 'same-origin', + }; + + try { + const response = await fetch(this._url, fetchOptions); + + // Populate response properties once headers are received + this.status = response.status; + this.statusText = response.statusText; + this.responseURL = response.url; + this._responseHeaders = response.headers; + this._changeReadyState(2); // 2: HEADERS_RECEIVED + + // Start processing the body + this._changeReadyState(3); // 3: LOADING + + if (!response.body) { + throw new Error("Response has no body to read."); + } + + const reader = response.body.getReader(); + const contentLength = +response.headers.get('Content-Length'); + + let receivedLength = 0; + const chunks = []; + + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + + chunks.push(value); + receivedLength += value.length; + + if (this.onprogress) { + // Convert to ArrayBuffer as requested by responseType. + this.response = value.buffer; + const progressEvent = { + lengthComputable: contentLength > 0, + loaded: receivedLength, + total: contentLength + }; + this.onprogress(progressEvent); + } + } + + // Combine chunks into a single Uint8Array. + const allChunks = new Uint8Array(receivedLength); + let position = 0; + for (const chunk of chunks) { + allChunks.set(chunk, position); + position += chunk.length; + } + + // Convert to ArrayBuffer as requested by responseType + this.response = allChunks.buffer; + } catch (error) { + this.statusText = error.message; + + if (error.name === 'AbortError') { + // Do nothing. + } else if (error.name === 'TimeoutError') { + this.ontimeout?.(); + } else { + // This is a network error + this.onerror?.(); + } + } finally { + clearTimeout(timeoutID); + if (!this._aborted) { + this._changeReadyState(4); // 4: DONE + // The XHR 'load' event fires for successful HTTP statuses (2xx) as well as + // unsuccessful ones (4xx, 5xx). The 'error' event is for network failures. + this.onload?.(); + } + } + } + + /** + * Aborts the request if it has already been sent. + */ + abort() { + this._aborted = true; + this.status = 0; + this._changeReadyState(4); // 4: DONE + this._abortController?.abort(); + } +} +#endif + var Fetch = { // HandleAllocator for XHR request object // xhrs: undefined, @@ -267,7 +504,18 @@ function fetchXHR(fetch, onsuccess, onerror, onprogress, onreadystatechange) { var userNameStr = userName ? UTF8ToString(userName) : undefined; var passwordStr = password ? UTF8ToString(password) : undefined; +#if FETCH_STREAMING == 1 + if (fetchAttrStreamData) { + var xhr = new FetchXHR(); + } else { + var xhr = new XMLHttpRequest(); + } +#elif FETCH_STREAMING == 2 + // This setting forces using FetchXHR for all requests. Used only in testing. + var xhr = new FetchXHR(); +#else var xhr = new XMLHttpRequest(); +#endif xhr.withCredentials = !!{{{ makeGetValue('fetch_attr', C_STRUCTS.emscripten_fetch_attr_t.withCredentials, 'u8') }}};; #if FETCH_DEBUG dbg(`fetch: xhr.timeout: ${xhr.timeout}, xhr.withCredentials: ${xhr.withCredentials}`); @@ -276,8 +524,8 @@ function fetchXHR(fetch, onsuccess, onerror, onprogress, onreadystatechange) { xhr.open(requestMethod, url_, !fetchAttrSynchronous, userNameStr, passwordStr); if (!fetchAttrSynchronous) xhr.timeout = timeoutMsecs; // XHR timeout field is only accessible in async XHRs, and must be set after .open() but before .send(). xhr.url_ = url_; // Save the url for debugging purposes (and for comparing to the responseURL that server side advertised) -#if ASSERTIONS - assert(!fetchAttrStreamData, 'streaming uses moz-chunked-arraybuffer which is no longer supported; TODO: rewrite using fetch()'); +#if ASSERTIONS && !FETCH_STREAMING + assert(!fetchAttrStreamData, 'Streaming is only supported when FETCH_STREAMING is enabled.'); #endif xhr.responseType = 'arraybuffer'; diff --git a/src/lib/libfetch.js b/src/lib/libfetch.js index 539ddc0f02fca..2a7668a4e3e7e 100644 --- a/src/lib/libfetch.js +++ b/src/lib/libfetch.js @@ -22,6 +22,9 @@ var LibraryFetch = { $fetchCacheData: fetchCacheData, #endif $fetchXHR: fetchXHR, +#if FETCH_STREAMING + $FetchXHR: FetchXHR, +#endif emscripten_start_fetch: startFetch, emscripten_start_fetch__deps: [ @@ -38,7 +41,10 @@ var LibraryFetch = { '$fetchLoadCachedData', '$fetchDeleteCachedData', #endif - ] +#if FETCH_STREAMING + '$FetchXHR', +#endif + ], }; addToLibrary(LibraryFetch); diff --git a/src/settings.js b/src/settings.js index 6afb1c5614a5e..39b5d637b5dd1 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1823,6 +1823,21 @@ var FETCH_DEBUG = false; // [link] var FETCH = false; +// Enables streaming fetched data when the fetch attribute +// EMSCRIPTEN_FETCH_STREAM_DATA is used. For streaming requests, the DOM Fetch +// API is used otherwise XMLHttpRequest is used. +// Both modes generally support the same API, but there are some key +// differences: +// +// - XHR supports synchronous requests +// - XHR supports overriding mime types +// - Fetch supports streaming data using the 'onprogress' callback +// +// If set to a value of 2, only the DOM Fetch backend will be used. This should +// only be used in testing. +// [link] +var FETCH_STREAMING = 0; + // ATTENTION [WIP]: Experimental feature. Please use at your own risk. // This will eventually replace the current JS file system implementation. // If set to 1, uses new filesystem implementation. diff --git a/test/decorators.py b/test/decorators.py index 51548221dd0eb..ae1184f52a9df 100644 --- a/test/decorators.py +++ b/test/decorators.py @@ -404,6 +404,21 @@ def metafunc(self, with_wasm64, *args, **kwargs): return metafunc +def also_with_fetch_streaming(f): + assert callable(f) + + @wraps(f) + def metafunc(self, with_fetch, *args, **kwargs): + if with_fetch: + self.set_setting('FETCH_STREAMING', '2') + self.cflags += ['-DSKIP_SYNC_FETCH_TESTS'] + f(self, *args, **kwargs) + + parameterize(metafunc, {'': (False,), + 'fetch_backend': (True,)}) + return metafunc + + def also_with_wasm2js(func): assert callable(func) diff --git a/test/fetch/test_fetch_redirect.c b/test/fetch/test_fetch_redirect.c index 0066cfc2bdd51..d2f35dcbc5e3e 100644 --- a/test/fetch/test_fetch_redirect.c +++ b/test/fetch/test_fetch_redirect.c @@ -69,11 +69,13 @@ void start_next_async_fetch() { async_method_idx++; if (async_method_idx >= num_methods) { // All async tests done, now run sync tests +#ifndef SKIP_SYNC_FETCH_TESTS for (int m = 0; m < num_methods; ++m) { for (int i = 0; i < num_codes; ++i) { fetchSyncTest(redirect_codes[i], methods[m]); } } +#endif exit(0); } } diff --git a/test/fetch/test_fetch_response_headers.cpp b/test/fetch/test_fetch_response_headers.cpp index ec5178622dbfe..0a1c579b64812 100644 --- a/test/fetch/test_fetch_response_headers.cpp +++ b/test/fetch/test_fetch_response_headers.cpp @@ -21,7 +21,10 @@ int main() { emscripten_fetch_attr_t attr; emscripten_fetch_attr_init(&attr); strcpy(attr.requestMethod, "GET"); - attr.attributes = EMSCRIPTEN_FETCH_REPLACE | EMSCRIPTEN_FETCH_LOAD_TO_MEMORY | EMSCRIPTEN_FETCH_SYNCHRONOUS; + attr.attributes = EMSCRIPTEN_FETCH_REPLACE | EMSCRIPTEN_FETCH_LOAD_TO_MEMORY; +#ifndef SKIP_SYNC_FETCH_TESTS + attr.attributes |= EMSCRIPTEN_FETCH_SYNCHRONOUS; +#endif attr.requestHeaders = headers; attr.onsuccess = [] (emscripten_fetch_t *fetch) { @@ -44,6 +47,9 @@ int main() { printf("Data checksum: %02X\n", checksum); assert(checksum == 0x08); emscripten_fetch_close(fetch); +#ifdef SKIP_SYNC_FETCH_TESTS + exit(0); +#endif if (result == 1) result = 0; }; @@ -63,9 +69,11 @@ int main() { }; emscripten_fetch_t *fetch = emscripten_fetch(&attr, "gears.png"); +#ifndef SKIP_SYNC_FETCH_TESTS if (result != 0) { result = 2; printf("emscripten_fetch() failed to run synchronously!\n"); } +#endif return result; } diff --git a/test/test_browser.py b/test/test_browser.py index 52fb3031587b5..e4e1191770d4c 100644 --- a/test/test_browser.py +++ b/test/test_browser.py @@ -50,6 +50,7 @@ ) from decorators import ( also_with_asan, + also_with_fetch_streaming, also_with_minimal_runtime, also_with_wasm2js, also_with_wasmfs, @@ -4408,6 +4409,7 @@ def test_preallocated_heap(self): # Tests emscripten_fetch() usage to XHR data directly to memory without persisting results to IndexedDB. @also_with_wasm2js + @also_with_fetch_streaming def test_fetch_to_memory(self): # Test error reporting in the negative case when the file URL doesn't exist. (http 404) self.btest_exit('fetch/test_fetch_to_memory.cpp', @@ -4420,6 +4422,7 @@ def test_fetch_to_memory(self): cflags=['-sFETCH_DEBUG', '-sFETCH'] + arg) @also_with_wasm2js + @also_with_fetch_streaming @parameterized({ '': ([],), 'pthread_exit': (['-DDO_PTHREAD_EXIT'],), @@ -4431,12 +4434,14 @@ def test_fetch_from_thread(self, args): cflags=args + ['-pthread', '-sPROXY_TO_PTHREAD', '-sFETCH_DEBUG', '-sFETCH', '-DFILE_DOES_NOT_EXIST']) @also_with_wasm2js + @also_with_fetch_streaming def test_fetch_to_indexdb(self): shutil.copy(test_file('gears.png'), '.') self.btest_exit('fetch/test_fetch_to_indexeddb.cpp', cflags=['-sFETCH_DEBUG', '-sFETCH']) # Tests emscripten_fetch() usage to persist an XHR into IndexedDB and subsequently load up from there. @also_with_wasm2js + @also_with_fetch_streaming def test_fetch_cached_xhr(self): shutil.copy(test_file('gears.png'), '.') self.btest_exit('fetch/test_fetch_cached_xhr.cpp', cflags=['-sFETCH_DEBUG', '-sFETCH']) @@ -4444,13 +4449,20 @@ def test_fetch_cached_xhr(self): # Tests that response headers get set on emscripten_fetch_t values. @no_firefox('https://github.com/emscripten-core/emscripten/issues/16868') @also_with_wasm2js - def test_fetch_response_headers(self): + @also_with_fetch_streaming + @parameterized({ + '': ([],), + 'sync': (['-DSYNC'],), + }) + def test_fetch_response_headers(self, args): + if self.get_setting('FETCH_STREAMING') and '-DSYNC' in args: + self.skipTest('Fetch backend does not support sync fetch.') shutil.copy(test_file('gears.png'), '.') - self.btest_exit('fetch/test_fetch_response_headers.cpp', cflags=['-sFETCH_DEBUG', '-sFETCH', '-pthread', '-sPROXY_TO_PTHREAD']) + self.btest_exit('fetch/test_fetch_response_headers.cpp', cflags=['-sFETCH_DEBUG', '-sFETCH', '-pthread', '-sPROXY_TO_PTHREAD'] + args) - # Test emscripten_fetch() usage to stream a XHR in to memory without storing the full file in memory + # Test emscripten_fetch() usage to stream a fetch in to memory without storing the full file in memory + # Streaming only works the fetch backend. @also_with_wasm2js - @disabled('moz-chunked-arraybuffer was firefox-only and has been removed') def test_fetch_stream_file(self): # Strategy: create a large 128MB file, and compile with a small 16MB Emscripten heap, so that the tested file # won't fully fit in the heap. This verifies that streaming works properly. @@ -4460,12 +4472,14 @@ def test_fetch_stream_file(self): with open('largefile.txt', 'w') as f: for _ in range(1024): f.write(s) - self.btest_exit('fetch/test_fetch_stream_file.cpp', cflags=['-sFETCH_DEBUG', '-sFETCH']) + self.btest_exit('fetch/test_fetch_stream_file.cpp', cflags=['-sFETCH_DEBUG', '-sFETCH', '-sFETCH_STREAMING']) + @also_with_fetch_streaming def test_fetch_headers_received(self): create_file('myfile.dat', 'hello world\n') self.btest_exit('fetch/test_fetch_headers_received.c', cflags=['-sFETCH_DEBUG', '-sFETCH']) + @also_with_fetch_streaming def test_fetch_xhr_abort(self): shutil.copy(test_file('gears.png'), '.') self.btest_exit('fetch/test_fetch_xhr_abort.cpp', cflags=['-sFETCH_DEBUG', '-sFETCH']) @@ -4500,13 +4514,16 @@ def test_fetch_idb_delete(self): shutil.copy(test_file('gears.png'), '.') self.btest_exit('fetch/test_fetch_idb_delete.cpp', cflags=['-pthread', '-sFETCH_DEBUG', '-sFETCH', '-sWASM=0', '-sPROXY_TO_PTHREAD']) + @also_with_fetch_streaming def test_fetch_post(self): self.btest_exit('fetch/test_fetch_post.c', cflags=['-sFETCH']) + @also_with_fetch_streaming def test_fetch_progress(self): create_file('myfile.dat', 'hello world\n' * 1000) self.btest_exit('fetch/test_fetch_progress.c', cflags=['-sFETCH']) + @also_with_fetch_streaming def test_fetch_to_memory_async(self): create_file('myfile.dat', 'hello world\n' * 1000) self.btest_exit('fetch/test_fetch_to_memory_async.c', cflags=['-sFETCH']) @@ -4516,16 +4533,18 @@ def test_fetch_to_memory_sync(self): create_file('myfile.dat', 'hello world\n' * 1000) self.btest_exit('fetch/test_fetch_to_memory_sync.c', cflags=['-sFETCH', '-pthread', '-sPROXY_TO_PTHREAD']) - @disabled('moz-chunked-arraybuffer was firefox-only and has been removed') + # Streaming only works the fetch backend. def test_fetch_stream_async(self): create_file('myfile.dat', 'hello world\n' * 1000) - self.btest_exit('fetch/test_fetch_stream_async.c', cflags=['-sFETCH']) + self.btest_exit('fetch/test_fetch_stream_async.c', cflags=['-sFETCH', '-sFETCH_STREAMING']) + @also_with_fetch_streaming def test_fetch_persist(self): create_file('myfile.dat', 'hello world\n') self.btest_exit('fetch/test_fetch_persist.c', cflags=['-sFETCH']) @no_firefox('https://github.com/emscripten-core/emscripten/issues/16868') + @also_with_fetch_streaming def test_fetch_redirect(self): self.btest_exit('fetch/test_fetch_redirect.c', cflags=['-sFETCH', '-pthread', '-sPROXY_TO_PTHREAD', f'-DSERVER="{self.SERVER_URL}"'])