From 68c3e44421c966ed81ad1bd834951220cd7e9fdd Mon Sep 17 00:00:00 2001 From: Sebastian Markbage Date: Wed, 17 May 2023 16:53:09 -0400 Subject: [PATCH] Optimize large strings --- .../react-client/src/ReactFlightClient.js | 104 ++++++++++++------ .../ReactDOMLegacyServerStreamConfig.js | 4 + .../src/ReactServerStreamConfigFB.js | 4 + .../src/__tests__/ReactFlightDOMEdge-test.js | 21 ++++ .../src/__tests__/ReactFlightDOMNode-test.js | 27 +++++ .../react-server/src/ReactFlightServer.js | 50 +++++++-- .../src/ReactServerStreamConfigBrowser.js | 4 + .../src/ReactServerStreamConfigBun.js | 4 + .../src/ReactServerStreamConfigEdge.js | 4 + .../src/ReactServerStreamConfigNode.js | 6 + .../forks/ReactServerStreamConfig.custom.js | 1 + 11 files changed, 184 insertions(+), 45 deletions(-) diff --git a/packages/react-client/src/ReactFlightClient.js b/packages/react-client/src/ReactFlightClient.js index a7fca07462753..c4e80af15edc7 100644 --- a/packages/react-client/src/ReactFlightClient.js +++ b/packages/react-client/src/ReactFlightClient.js @@ -179,7 +179,7 @@ export type Response = { _rowID: number, // parts of a row ID parsed so far _rowTag: number, // 0 indicates that we're currently parsing the row ID _rowLength: number, // remaining bytes in the row. 0 indicates that we're looking for a newline. - _buffer: Array, // chunks received so far as part of this row + _buffer: Array, // chunks received so far as part of this row }; function readChunk(chunk: SomeChunk): T { @@ -288,6 +288,14 @@ function createResolvedModuleChunk( return new Chunk(RESOLVED_MODULE, value, null, response); } +function createInitializedTextChunk( + response: Response, + value: string, +): InitializedChunk { + // $FlowFixMe[invalid-constructor] Flow doesn't support functions as constructors + return new Chunk(INITIALIZED, value, null, response); +} + function resolveModelChunk( chunk: SomeChunk, value: UninitializedModel, @@ -704,6 +712,13 @@ function resolveModel( } } +function resolveText(response: Response, id: number, text: string): void { + const chunks = response._chunks; + // We assume that we always reference large strings after they've been + // emitted. + chunks.set(id, createInitializedTextChunk(response, text)); +} + function resolveModule( response: Response, id: number, @@ -818,7 +833,7 @@ function resolveHint( code: string, model: UninitializedModel, ): void { - const hintModel = parseModel(response, model); + const hintModel: HintModel = parseModel(response, model); dispatchHint(code, hintModel); } @@ -826,18 +841,14 @@ function processFullRow( response: Response, id: number, tag: number, - buffer: Array, + buffer: Array, lastChunk: string | Uint8Array, ): void { let row = ''; const stringDecoder = response._stringDecoder; for (let i = 0; i < buffer.length; i++) { const chunk = buffer[i]; - if (typeof chunk === 'string') { - row += chunk; - } else { - row += readPartialStringChunk(stringDecoder, chunk); - } + row += readPartialStringChunk(stringDecoder, chunk); } if (typeof lastChunk === 'string') { row += lastChunk; @@ -869,6 +880,10 @@ function processFullRow( } return; } + case 84 /* "T" */: { + resolveText(response, id, row); + return; + } default: { // We assume anything else is JSON. resolveModel(response, id, row); @@ -882,33 +897,50 @@ export function processBinaryChunk( chunk: Uint8Array, ): void { let i = 0; - while (i < chunk.length) { + let rowState = response._rowState; + let rowID = response._rowID; + let rowTag = response._rowTag; + let rowLength = response._rowLength; + const buffer = response._buffer; + const chunkLength = chunk.length; + while (i < chunkLength) { let lastIdx = -1; - switch (response._rowState) { + switch (rowState) { case ROW_ID: { const byte = chunk[i++]; if (byte === 58 /* ":" */) { // Finished the rowID, next we'll parse the tag. - response._rowState = ROW_TAG; + rowState = ROW_TAG; } else { - response._rowID = - (response._rowID << 4) | (byte > 96 ? byte - 87 : byte - 48); + rowID = (rowID << 4) | (byte > 96 ? byte - 87 : byte - 48); } continue; } case ROW_TAG: { const resolvedRowTag = chunk[i]; - if (resolvedRowTag > 64 && resolvedRowTag < 91) { - response._rowTag = resolvedRowTag; + if (resolvedRowTag === 84 /* "T" */) { + rowTag = resolvedRowTag; + rowState = ROW_LENGTH; + i++; + } else if (resolvedRowTag > 64 && resolvedRowTag < 91 /* "A"-"Z" */) { + rowTag = resolvedRowTag; + rowState = ROW_CHUNK_BY_NEWLINE; i++; } else { + rowTag = 0; + rowState = ROW_CHUNK_BY_NEWLINE; // This was an unknown tag so it was probably part of the data. } - response._rowState = ROW_CHUNK_BY_NEWLINE; continue; } case ROW_LENGTH: { - // TODO + const byte = chunk[i++]; + if (byte === 44 /* "," */) { + // Finished the rowLength, next we'll buffer up to that length. + rowState = ROW_CHUNK_BY_LENGTH; + } else { + rowLength = (rowLength << 4) | (byte > 96 ? byte - 87 : byte - 48); + } continue; } case ROW_CHUNK_BY_NEWLINE: { @@ -918,7 +950,6 @@ export function processBinaryChunk( } case ROW_CHUNK_BY_LENGTH: { // We're looking for the remaining byte length - const rowLength = response._rowLength; if (i + rowLength <= chunk.length) { lastIdx = i + rowLength; } @@ -927,31 +958,34 @@ export function processBinaryChunk( } if (lastIdx > -1) { // We found the last chunk of the row - const lastChunk = chunk.slice(i, lastIdx); - processFullRow( - response, - response._rowID, - response._rowTag, - response._buffer, - lastChunk, - ); + const offset = chunk.byteOffset + i; + const length = lastIdx - i; + const lastChunk = new Uint8Array(chunk.buffer, offset, length); + processFullRow(response, rowID, rowTag, buffer, lastChunk); // Reset state machine for a new row - response._rowState = ROW_ID; - response._rowTag = 0; - response._rowID = 0; - response._rowLength = 0; - response._buffer.length = 0; + rowState = ROW_ID; + rowTag = 0; + rowID = 0; + rowLength = 0; + buffer.length = 0; i = lastIdx + 1; } else { // The rest of this row is in a future chunk. We stash the rest of the // current chunk until we can process the full row. - const remainingSlice = chunk.slice(i); - response._buffer.push(remainingSlice); - // Update how many bytes we're still waiting for. - response._rowLength -= remainingSlice.length; + const offset = chunk.byteOffset + i; + const length = chunk.byteLength - i; + const remainingSlice = new Uint8Array(chunk.buffer, offset, length); + buffer.push(remainingSlice); + // Update how many bytes we're still waiting for. If we're looking for + // a newline, this doesn't hurt since we'll just ignore it. + rowLength -= remainingSlice.byteLength; break; } } + response._rowState = rowState; + response._rowID = rowID; + response._rowTag = rowTag; + response._rowLength = rowLength; } function parseModel(response: Response, json: UninitializedModel): T { diff --git a/packages/react-dom-bindings/src/server/ReactDOMLegacyServerStreamConfig.js b/packages/react-dom-bindings/src/server/ReactDOMLegacyServerStreamConfig.js index 242c3a79f382c..5d055026492f3 100644 --- a/packages/react-dom-bindings/src/server/ReactDOMLegacyServerStreamConfig.js +++ b/packages/react-dom-bindings/src/server/ReactDOMLegacyServerStreamConfig.js @@ -57,6 +57,10 @@ export function clonePrecomputedChunk( return chunk; } +export function byteLengthOfChunk(chunk: Chunk | PrecomputedChunk): number { + throw new Error('Not implemented.'); +} + export function closeWithError(destination: Destination, error: mixed): void { // $FlowFixMe[incompatible-call]: This is an Error object or the destination accepts other types. destination.destroy(error); diff --git a/packages/react-server-dom-fb/src/ReactServerStreamConfigFB.js b/packages/react-server-dom-fb/src/ReactServerStreamConfigFB.js index 88d7d3c52ae40..71f1949d2a17b 100644 --- a/packages/react-server-dom-fb/src/ReactServerStreamConfigFB.js +++ b/packages/react-server-dom-fb/src/ReactServerStreamConfigFB.js @@ -63,6 +63,10 @@ export function clonePrecomputedChunk( return chunk; } +export function byteLengthOfChunk(chunk: Chunk | PrecomputedChunk): number { + throw new Error('Not implemented.'); +} + export function closeWithError(destination: Destination, error: mixed): void { destination.done = true; destination.fatal = true; diff --git a/packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMEdge-test.js b/packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMEdge-test.js index ee110d483d711..a94911643e165 100644 --- a/packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMEdge-test.js +++ b/packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMEdge-test.js @@ -98,4 +98,25 @@ describe('ReactFlightDOMEdge', () => { const result = await readResult(ssrStream); expect(result).toEqual('Client Component'); }); + + it('should encode long string in a compact format', async () => { + const testString = '"\n\t'.repeat(500) + '🙃'; + + const stream = ReactServerDOMServer.renderToReadableStream({ + text: testString, + }); + const [stream1, stream2] = stream.tee(); + + const serializedContent = await readResult(stream1); + // The content should be compact an unescaped + expect(serializedContent.length).toBeLessThan(2000); + expect(serializedContent).not.toContain('\\n'); + expect(serializedContent).not.toContain('\\t'); + expect(serializedContent).not.toContain('\\"'); + expect(serializedContent).toContain('\t'); + + const result = await ReactServerDOMClient.createFromReadableStream(stream2); + // Should still match the result when parsed + expect(result.text).toBe(testString); + }); }); diff --git a/packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMNode-test.js b/packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMNode-test.js index c720b4434cdf9..4eed4c562152c 100644 --- a/packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMNode-test.js +++ b/packages/react-server-dom-webpack/src/__tests__/ReactFlightDOMNode-test.js @@ -104,4 +104,31 @@ describe('ReactFlightDOMNode', () => { const result = await readResult(ssrStream); expect(result).toEqual('Client Component'); }); + + it('should encode long string in a compact format', async () => { + const testString = '"\n\t'.repeat(500) + '🙃'; + + const stream = ReactServerDOMServer.renderToPipeableStream({ + text: testString, + }); + + const readable = new Stream.PassThrough(); + + const stringResult = readResult(readable); + const parsedResult = ReactServerDOMClient.createFromNodeStream(readable); + + stream.pipe(readable); + + const serializedContent = await stringResult; + // The content should be compact an unescaped + expect(serializedContent.length).toBeLessThan(2000); + expect(serializedContent).not.toContain('\\n'); + expect(serializedContent).not.toContain('\\t'); + expect(serializedContent).not.toContain('\\"'); + expect(serializedContent).toContain('\t'); + + const result = await parsedResult; + // Should still match the result when parsed + expect(result.text).toBe(testString); + }); }); diff --git a/packages/react-server/src/ReactFlightServer.js b/packages/react-server/src/ReactFlightServer.js index 40cdeab9c2af9..3aab4f6111772 100644 --- a/packages/react-server/src/ReactFlightServer.js +++ b/packages/react-server/src/ReactFlightServer.js @@ -15,6 +15,7 @@ import { beginWriting, writeChunkAndReturn, stringToChunk, + byteLengthOfChunk, completeWriting, close, closeWithError, @@ -171,7 +172,7 @@ export type Request = { pingedTasks: Array, completedImportChunks: Array, completedHintChunks: Array, - completedJSONChunks: Array, + completedRegularChunks: Array, completedErrorChunks: Array, writtenSymbols: Map, writtenClientReferences: Map, @@ -230,7 +231,7 @@ export function createRequest( pingedTasks: pingedTasks, completedImportChunks: ([]: Array), completedHintChunks: ([]: Array), - completedJSONChunks: ([]: Array), + completedRegularChunks: ([]: Array), completedErrorChunks: ([]: Array), writtenSymbols: new Map(), writtenClientReferences: new Map(), @@ -715,11 +716,25 @@ function serializeServerReference( metadataId, serverReferenceMetadata, ); - request.completedJSONChunks.push(processedChunk); + request.completedRegularChunks.push(processedChunk); writtenServerReferences.set(serverReference, metadataId); return serializeServerReferenceID(metadataId); } +function serializeLargeTextString(request: Request, text: string): string { + request.pendingChunks += 2; + const textId = request.nextChunkId++; + const textChunk = stringToChunk(text); + const headerChunk = processTextHeader( + request, + textId, + text, + byteLengthOfChunk(textChunk), + ); + request.completedRegularChunks.push(headerChunk, textChunk); + return serializeByValueID(textId); +} + function escapeStringValue(value: string): string { if (value[0] === '$') { // We need to escape $ prefixed strings since we use those to encode @@ -960,7 +975,12 @@ function resolveModelToJSON( return serializeDateFromDateJSON(value); } } - + if (value.length >= 1024) { + // For large strings, we encode them outside the JSON payload so that we + // don't have to double encode and double parse the strings. This can also + // be more compact in case the string has a lot of escaped characters. + return serializeLargeTextString(request, value); + } return escapeStringValue(value); } @@ -1152,7 +1172,7 @@ function emitProviderChunk( ): void { const contextReference = serializeProviderReference(contextName); const processedChunk = processReferenceChunk(request, id, contextReference); - request.completedJSONChunks.push(processedChunk); + request.completedRegularChunks.push(processedChunk); } function retryTask(request: Request, task: Task): void { @@ -1216,7 +1236,7 @@ function retryTask(request: Request, task: Task): void { } const processedChunk = processModelChunk(request, task.id, value); - request.completedJSONChunks.push(processedChunk); + request.completedRegularChunks.push(processedChunk); request.abortableTasks.delete(task); task.status = COMPLETED; } catch (thrownValue) { @@ -1323,11 +1343,11 @@ function flushCompletedChunks( hintChunks.splice(0, i); // Next comes model data. - const jsonChunks = request.completedJSONChunks; + const regularChunks = request.completedRegularChunks; i = 0; - for (; i < jsonChunks.length; i++) { + for (; i < regularChunks.length; i++) { request.pendingChunks--; - const chunk = jsonChunks[i]; + const chunk = regularChunks[i]; const keepWriting: boolean = writeChunkAndReturn(destination, chunk); if (!keepWriting) { request.destination = null; @@ -1335,7 +1355,7 @@ function flushCompletedChunks( break; } } - jsonChunks.splice(0, i); + regularChunks.splice(0, i); // Finally, errors are sent. The idea is that it's ok to delay // any error messages and prioritize display of other parts of @@ -1545,3 +1565,13 @@ function processHintChunk( const row = serializeRowHeader('H' + code, id) + json + '\n'; return stringToChunk(row); } + +function processTextHeader( + request: Request, + id: number, + text: string, + binaryLength: number, +): Chunk { + const row = id.toString(16) + ':T' + binaryLength.toString(16) + ','; + return stringToChunk(row); +} diff --git a/packages/react-server/src/ReactServerStreamConfigBrowser.js b/packages/react-server/src/ReactServerStreamConfigBrowser.js index 28c16a92ede25..44b5e5af839b8 100644 --- a/packages/react-server/src/ReactServerStreamConfigBrowser.js +++ b/packages/react-server/src/ReactServerStreamConfigBrowser.js @@ -139,6 +139,10 @@ export function clonePrecomputedChunk( : precomputedChunk; } +export function byteLengthOfChunk(chunk: Chunk | PrecomputedChunk): number { + return chunk.byteLength; +} + export function closeWithError(destination: Destination, error: mixed): void { // $FlowFixMe[method-unbinding] if (typeof destination.error === 'function') { diff --git a/packages/react-server/src/ReactServerStreamConfigBun.js b/packages/react-server/src/ReactServerStreamConfigBun.js index b71b6542f36eb..ac245209d53d0 100644 --- a/packages/react-server/src/ReactServerStreamConfigBun.js +++ b/packages/react-server/src/ReactServerStreamConfigBun.js @@ -66,6 +66,10 @@ export function clonePrecomputedChunk( return chunk; } +export function byteLengthOfChunk(chunk: Chunk | PrecomputedChunk): number { + return Buffer.byteLength(chunk, 'utf8'); +} + export function closeWithError(destination: Destination, error: mixed): void { if (typeof destination.error === 'function') { // $FlowFixMe[incompatible-call]: This is an Error object or the destination accepts other types. diff --git a/packages/react-server/src/ReactServerStreamConfigEdge.js b/packages/react-server/src/ReactServerStreamConfigEdge.js index e41bf7940134b..00b0f4077d5b9 100644 --- a/packages/react-server/src/ReactServerStreamConfigEdge.js +++ b/packages/react-server/src/ReactServerStreamConfigEdge.js @@ -139,6 +139,10 @@ export function clonePrecomputedChunk( : precomputedChunk; } +export function byteLengthOfChunk(chunk: Chunk | PrecomputedChunk): number { + return chunk.byteLength; +} + export function closeWithError(destination: Destination, error: mixed): void { // $FlowFixMe[method-unbinding] if (typeof destination.error === 'function') { diff --git a/packages/react-server/src/ReactServerStreamConfigNode.js b/packages/react-server/src/ReactServerStreamConfigNode.js index 0313daf307a12..12814e36e0b47 100644 --- a/packages/react-server/src/ReactServerStreamConfigNode.js +++ b/packages/react-server/src/ReactServerStreamConfigNode.js @@ -215,6 +215,12 @@ export function clonePrecomputedChunk( : precomputedChunk; } +export function byteLengthOfChunk(chunk: Chunk | PrecomputedChunk): number { + return typeof chunk === 'string' + ? Buffer.byteLength(chunk, 'utf8') + : chunk.byteLength; +} + export function closeWithError(destination: Destination, error: mixed): void { // $FlowFixMe[incompatible-call]: This is an Error object or the destination accepts other types. destination.destroy(error); diff --git a/packages/react-server/src/forks/ReactServerStreamConfig.custom.js b/packages/react-server/src/forks/ReactServerStreamConfig.custom.js index f62c2a54035ba..913bb56d67e64 100644 --- a/packages/react-server/src/forks/ReactServerStreamConfig.custom.js +++ b/packages/react-server/src/forks/ReactServerStreamConfig.custom.js @@ -40,3 +40,4 @@ export const closeWithError = $$$config.closeWithError; export const stringToChunk = $$$config.stringToChunk; export const stringToPrecomputedChunk = $$$config.stringToPrecomputedChunk; export const clonePrecomputedChunk = $$$config.clonePrecomputedChunk; +export const byteLengthOfChunk = $$$config.byteLengthOfChunk;