Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TEX-537] Refactor compression #555

Merged
merged 12 commits into from
Dec 10, 2023
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
},
"dependencies": {
"assert-never": "^1.2.1",
"brotli": "^1.3.3",
Copy link
Contributor Author

@timdawborn timdawborn Dec 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replacing this third-party library with the brotli implementation provided by the NodeJS builtin zlib module.

"chalk": "^4.1.0",
"commander": "^10.0.1",
"content-type": "^1.0.5",
Expand Down
16 changes: 0 additions & 16 deletions src/brotli.d.ts

This file was deleted.

362 changes: 362 additions & 0 deletions src/compression.spec.ts

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions src/compression.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import zlib from "zlib";

export type CompressionAlgorithm = "br" | "gzip" | "none";

export function compressBuffer(
algorithm: CompressionAlgorithm,
buffer: Buffer,
): Buffer {
switch (algorithm) {
case "none":
return buffer;
case "br":
return zlib.brotliCompressSync(buffer);
case "gzip":
return zlib.gzipSync(buffer);
default:
throw new Error(`Unhandled compression algorithm value "${algorithm}"`);
}
}

export function decompressBuffer(
algorithm: CompressionAlgorithm,
buffer: Buffer,
): Buffer {
switch (algorithm) {
case "none":
return buffer;
case "br":
return zlib.brotliDecompressSync(buffer);
case "gzip":
return zlib.gunzipSync(buffer);
default:
throw new Error(`Unhandled compression algorithm value "${algorithm}"`);
}
}

export function convertHttpContentEncodingToCompressionAlgorithm(
contentEncoding: string,
): CompressionAlgorithm {
switch (contentEncoding) {
case "":
return "none";
case "br":
return "br";
case "gzip":
return "gzip";
default:
throw new Error(`Unhandled content-encoding value "${contentEncoding}"`);
}
}
43 changes: 19 additions & 24 deletions src/http.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import brotli from "brotli";
import zlib from "zlib";
import { ParsedMediaType as ParsedContentType } from "content-type";
import {
convertHttpContentEncodingToCompressionAlgorithm,
decompressBuffer,
} from "./compression";

/**
* Headers of a request or response.
Expand Down Expand Up @@ -35,7 +37,7 @@ export interface HttpResponse {
body: Buffer;
}

export function getHeaderAsString(
export function getHttpHeaderAsString(
headers: HttpHeaders,
headerName: string,
): string {
Expand All @@ -49,35 +51,28 @@ export function getHeaderAsString(
}
}

export function getHttpRequestContentType(request: HttpRequest): string {
export function getHttpContentEncoding(r: HttpRequest | HttpResponse): string {
return getHttpHeaderAsString(r.headers, "content-encoding");
}

export function getHttpContentType(r: HttpRequest | HttpResponse): string {
return (
getHeaderAsString(request.headers, "content-type") ||
getHttpHeaderAsString(r.headers, "content-type") ||
"application/octet-stream"
);
}

export function getHttpRequestBodyDecoded(request: HttpRequest): Buffer {
// Process the content-encoding before looking at the content-type.
const contentEncoding = getHeaderAsString(
request.headers,
"content-encoding",
);
switch (contentEncoding) {
case "":
return request.body;
case "br":
return Buffer.from(brotli.decompress(request.body));
case "gzip":
return zlib.gunzipSync(request.body);
default:
throw Error(`Unhandled content-encoding value "${contentEncoding}"`);
Comment on lines -65 to -73
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved this common logic into compression.ts.

}
export function getHttpBodyDecoded(r: HttpRequest | HttpResponse): Buffer {
const contentEncoding = getHttpHeaderAsString(r.headers, "content-encoding");
const compressionAlgorithm =
convertHttpContentEncodingToCompressionAlgorithm(contentEncoding);
return decompressBuffer(compressionAlgorithm, r.body);
}

export function decodeHttpRequestBodyToString(
request: HttpRequest,
export function decodeHttpBodyToString(
r: HttpRequest | HttpResponse,
contentType: ParsedContentType,
): string {
const encoding = contentType.parameters.charset as BufferEncoding | undefined;
return getHttpRequestBodyDecoded(request).toString(encoding || "utf-8");
return getHttpBodyDecoded(r).toString(encoding || "utf-8");
}
17 changes: 9 additions & 8 deletions src/persistence.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import brotli from "brotli";
import { gzipSync } from "zlib";
import { brotliCompressSync, gzipSync } from "zlib";
import { persistTape, reviveTape, redactRequestHeaders } from "./persistence";

// Note the repetition. This is necessary otherwise Brotli compression
Expand All @@ -20,13 +19,15 @@ const BINARY_RESPONSE = Buffer.from([
]);

const UTF8_REQUEST_BROTLI = Buffer.from(
brotli.compress(Buffer.from(UTF8_REQUEST, "utf8"))!,
brotliCompressSync(Buffer.from(UTF8_REQUEST, "utf8"))!,
);
const UTF8_RESPONSE_BROTLI = Buffer.from(
brotli.compress(Buffer.from(UTF8_RESPONSE, "utf8"))!,
brotliCompressSync(Buffer.from(UTF8_RESPONSE, "utf8"))!,
);
const BINARY_REQUEST_BROTLI = Buffer.from(brotliCompressSync(BINARY_REQUEST)!);
const BINARY_RESPONSE_BROTLI = Buffer.from(
brotliCompressSync(BINARY_RESPONSE)!,
);
const BINARY_REQUEST_BROTLI = Buffer.from(brotli.compress(BINARY_REQUEST)!);
const BINARY_RESPONSE_BROTLI = Buffer.from(brotli.compress(BINARY_RESPONSE)!);

const UTF8_REQUEST_GZIP = gzipSync(Buffer.from(UTF8_REQUEST, "utf8"));
const UTF8_RESPONSE_GZIP = gzipSync(Buffer.from(UTF8_RESPONSE, "utf8"));
Expand Down Expand Up @@ -221,7 +222,7 @@ describe("Persistence", () => {
},
body: {
encoding: "base64",
data: "GxcAAI6UrMm1WkAERl0HoDFuCn3CIekc",
data: "GxcA+I+UrMm1WkAERl0HoDFuCn3CAZLOAQ==",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The brotli encoding default compression configuration differs between the old library and the new NodeJS builtin implementation. This is why hard-coded expected encoding here differs.

},
},
response: {
Expand All @@ -233,7 +234,7 @@ describe("Persistence", () => {
},
body: {
encoding: "base64",
data: "GxcAAI6UrPmFmgFmOV+HoM3+C33CIe4U",
data: "GxcA+I+UrPmFmgFmOV+HoM3+C33CAeJOAQ==",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The brotli encoding default compression configuration differs between the old library and the new NodeJS builtin implementation. This is why hard-coded expected encoding here differs.

},
},
});
Expand Down
65 changes: 22 additions & 43 deletions src/persistence.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import brotli from "brotli";
import fs from "fs-extra";
import yaml from "js-yaml";
import path from "path";
import { gunzipSync, gzipSync } from "zlib";
import { HttpHeaders } from "./http";
import {
CompressionAlgorithm,
PersistedBuffer,
PersistedTapeRecord,
TapeRecord,
} from "./tape";
compressBuffer,
convertHttpContentEncodingToCompressionAlgorithm,
} from "./compression";
import {
getHttpBodyDecoded,
getHttpContentEncoding,
HttpRequest,
HttpResponse,
} from "./http";
import { PersistedBuffer, PersistedTapeRecord, TapeRecord } from "./tape";

/**
* Persistence layer to save tapes to disk and read them from disk.
Expand Down Expand Up @@ -94,12 +96,12 @@ export function persistTape(record: TapeRecord): PersistedTapeRecord {
method: record.request.method,
path: record.request.path,
headers: record.request.headers,
body: serialiseBuffer(record.request.body, record.request.headers),
body: serialiseForTape(record.request),
},
response: {
status: record.response.status,
headers: record.response.headers,
body: serialiseBuffer(record.response.body, record.response.headers),
body: serialiseForTape(record.response),
},
};
}
Expand All @@ -120,22 +122,12 @@ export function reviveTape(persistedRecord: PersistedTapeRecord): TapeRecord {
};
}

export function serialiseBuffer(
buffer: Buffer,
headers: HttpHeaders,
): PersistedBuffer {
const header = headers["content-encoding"];
const contentEncoding = typeof header === "string" ? header : undefined;
const originalBuffer = buffer;
let compression: CompressionAlgorithm = "none";
if (contentEncoding === "br") {
buffer = Buffer.from(brotli.decompress(buffer));
compression = "br";
}
if (contentEncoding === "gzip") {
buffer = gunzipSync(buffer);
compression = "gzip";
}
Comment on lines -130 to -138
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved this common logic into compression.ts.

function serialiseForTape(r: HttpRequest | HttpResponse): PersistedBuffer {
const buffer = getHttpBodyDecoded(r);
const contentEncoding = getHttpContentEncoding(r);
const compressionAlgorithm =
convertHttpContentEncodingToCompressionAlgorithm(contentEncoding);

const utf8Representation = buffer.toString("utf8");
try {
// Can it be safely stored and recreated in YAML?
Expand All @@ -148,17 +140,18 @@ export function serialiseBuffer(
return {
encoding: "utf8",
data: utf8Representation,
compression,
compression: compressionAlgorithm,
};
}
} catch {
// Fall through.
}

// No luck. Fall back to Base64, persisting the original buffer
// since we might as well store it in its compressed state.
return {
encoding: "base64",
data: originalBuffer.toString("base64"),
data: r.body.toString("base64"),
};
}

Expand All @@ -170,21 +163,7 @@ function unserialiseBuffer(persisted: PersistedBuffer): Buffer {
break;
case "utf8":
buffer = Buffer.from(persisted.data, "utf8");
if (persisted.compression === "br") {
// TODO: Find a workaround for the new compressed message not necessarily
// being identical to what was originally sent (update Content-Length?).
const compressed = brotli.compress(buffer);
if (compressed) {
buffer = Buffer.from(compressed);
} else {
throw new Error(`Brotli compression failed!`);
}
}
if (persisted.compression === "gzip") {
// TODO: Find a workaround for the new compressed message not necessarily
// being identical to what was originally sent (update Content-Length?).
buffer = gzipSync(buffer);
}
Comment on lines -173 to -187
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved this common logic into compression.ts.

buffer = compressBuffer(persisted.compression || "none", buffer);
break;
default:
throw new Error(`Unsupported encoding!`);
Expand Down
16 changes: 8 additions & 8 deletions src/similarity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ import { compareTwoStrings } from "string-similarity";
import { RewriteRules } from "./rewrite";
import { TapeRecord } from "./tape";
import {
decodeHttpRequestBodyToString,
getHttpRequestContentType,
decodeHttpBodyToString,
getHttpContentType,
HttpHeaders,
HttpRequest,
} from "./http";
Expand Down Expand Up @@ -79,8 +79,8 @@ function countBodyDifferences(
request2: HttpRequest,
rewriteBeforeDiffRules: RewriteRules,
): number {
const contentType1 = parseContentType(getHttpRequestContentType(request1));
const contentType2 = parseContentType(getHttpRequestContentType(request1));
const contentType1 = parseContentType(getHttpContentType(request1));
const contentType2 = parseContentType(getHttpContentType(request1));

// If the content types are not the same, we cannot compare.
if (contentType1.type !== contentType2.type) {
Expand Down Expand Up @@ -118,8 +118,8 @@ function countBodyDifferencesApplicationJson(
rewriteBeforeDiffRules: RewriteRules,
): number {
// Decode the bodies to strings.
const body1 = decodeHttpRequestBodyToString(request1, contentType1);
const body2 = decodeHttpRequestBodyToString(request2, contentType2);
const body1 = decodeHttpBodyToString(request1, contentType1);
const body2 = decodeHttpBodyToString(request2, contentType2);

// Early bail if bodies are empty.
if (body1.length === 0 && body1.length === body2.length) {
Expand Down Expand Up @@ -149,8 +149,8 @@ function countBodyDifferencesText(
rewriteBeforeDiffRules: RewriteRules,
): number {
// Decode the bodies to strings.
const body1 = decodeHttpRequestBodyToString(request1, contentType1);
const body2 = decodeHttpRequestBodyToString(request2, contentType2);
const body1 = decodeHttpBodyToString(request1, contentType1);
const body2 = decodeHttpBodyToString(request2, contentType2);

// Early bail if bodies are empty.
if (body1.length === 0 && body1.length === body2.length) {
Expand Down
5 changes: 2 additions & 3 deletions src/tape.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { CompressionAlgorithm } from "./compression";
import { HttpHeaders, HttpRequest, HttpResponse } from "./http";

/**
Expand Down Expand Up @@ -37,8 +38,6 @@ export type PersistedBuffer =
}
| {
encoding: "utf8";
compression: CompressionAlgorithm;
compression: CompressionAlgorithm | undefined;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Older tapes don't have the compression attribute serialized.

data: string;
};

export type CompressionAlgorithm = "br" | "gzip" | "none";
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved into compression.ts.

1 change: 1 addition & 0 deletions src/tests/setup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export function setupServers({
defaultTapeName,
host: TEST_SERVER_HOST,
timeout: 100,
enableLogging: true,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated change: enable logging on the proxay instance running in the integration tests.

unframeGrpcWebJsonRequestsHostnames,
});
await Promise.all([
Expand Down
6 changes: 2 additions & 4 deletions tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@
/* Module Resolution Options */
// "moduleResolution": "node", /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */
"baseUrl": ".", /* Base directory to resolve non-absolute module names. */
"paths": {
"brotli": ["src/brotli.d.ts"]
}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */
"paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */
"rootDirs": ["src"], /* List of root folders whose combined content represents the structure of the project at runtime. */
// "typeRoots": ["], /* List of folders to include type definitions from. */
// "types": [], /* Type declaration files to be included in compilation. */
Expand All @@ -58,4 +56,4 @@
// "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */
// "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */
}
}
}
Loading