Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TEX-537] Refactor compression #555

Merged
merged 12 commits into from
Dec 10, 2023
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,16 @@
},
"dependencies": {
"assert-never": "^1.2.1",
"brotli": "^1.3.3",
Copy link
Contributor Author

@timdawborn timdawborn Dec 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replacing this third-party library with the brotli implementation provided by the NodeJS builtin zlib module.

"chalk": "^4.1.0",
"commander": "^10.0.1",
"content-type": "^1.0.5",
"deep-diff": "^1.0.2",
"fs-extra": "^11.1.1",
"js-yaml": "^4.1.0",
"query-string": "^6.13.6",
"string-similarity": "^4.0.4"
},
"devDependencies": {
"@types/content-type": "^1.1.8",
"@types/deep-diff": "^1.0.5",
"@types/express": "^4.17.21",
"@types/fs-extra": "^11.0.4",
Expand Down
16 changes: 0 additions & 16 deletions src/brotli.d.ts

This file was deleted.

362 changes: 362 additions & 0 deletions src/compression.spec.ts

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions src/compression.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import zlib from "zlib";

export type CompressionAlgorithm = "br" | "gzip" | "none";

export function compressBuffer(
algorithm: CompressionAlgorithm,
buffer: Buffer,
): Buffer {
switch (algorithm) {
case "none":
return buffer;
case "br":
return zlib.brotliCompressSync(buffer);
case "gzip":
return zlib.gzipSync(buffer);
default:
throw new Error(`Unhandled compression algorithm value "${algorithm}"`);
}
}

export function decompressBuffer(
algorithm: CompressionAlgorithm,
buffer: Buffer,
): Buffer {
switch (algorithm) {
case "none":
return buffer;
case "br":
return zlib.brotliDecompressSync(buffer);
case "gzip":
return zlib.gunzipSync(buffer);
default:
throw new Error(`Unhandled compression algorithm value "${algorithm}"`);
}
}

export function convertHttpContentEncodingToCompressionAlgorithm(
contentEncoding: string,
): CompressionAlgorithm {
switch (contentEncoding) {
case "":
return "none";
case "br":
return "br";
case "gzip":
return "gzip";
default:
throw new Error(`Unhandled content-encoding value "${contentEncoding}"`);
}
}
78 changes: 78 additions & 0 deletions src/http.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import { ParsedMediaType as ParsedContentType } from "content-type";
import {
convertHttpContentEncodingToCompressionAlgorithm,
decompressBuffer,
} from "./compression";

/**
* Headers of a request or response.
*/
export interface HttpHeaders {
[headerName: string]: string | string[] | undefined;
}

/**
* The common fields of a HTTP request.
*/
export interface HttpRequest {
host?: string;
method: string;
path: string;
headers: HttpHeaders;
body: Buffer;
}

export interface HttpRequestWithHost extends HttpRequest {
host: string;
}

/**
* The common fields of a HTTP response.
*/
export interface HttpResponse {
status: {
code: number;
};
headers: HttpHeaders;
body: Buffer;
}

export function getHttpHeaderAsString(
headers: HttpHeaders,
headerName: string,
): string {
const rawValue = headers[headerName];
if (rawValue === undefined) {
return "";
} else if (typeof rawValue === "string") {
return rawValue;
} else {
return rawValue[0];
}
}

export function getHttpContentEncoding(r: HttpRequest | HttpResponse): string {
return getHttpHeaderAsString(r.headers, "content-encoding");
}

export function getHttpContentType(r: HttpRequest | HttpResponse): string {
return (
getHttpHeaderAsString(r.headers, "content-type") ||
"application/octet-stream"
);
}

export function getHttpBodyDecoded(r: HttpRequest | HttpResponse): Buffer {
const contentEncoding = getHttpHeaderAsString(r.headers, "content-encoding");
const compressionAlgorithm =
convertHttpContentEncodingToCompressionAlgorithm(contentEncoding);
return decompressBuffer(compressionAlgorithm, r.body);
}

export function decodeHttpBodyToString(
r: HttpRequest | HttpResponse,
contentType: ParsedContentType,
): string {
const encoding = contentType.parameters.charset as BufferEncoding | undefined;
return getHttpBodyDecoded(r).toString(encoding || "utf-8");
}
13 changes: 4 additions & 9 deletions src/matcher.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { HttpRequest } from "./http";
import { RewriteRules } from "./rewrite";
import { computeSimilarity } from "./similarity";
import { Headers, TapeRecord } from "./tape";
import { TapeRecord } from "./tape";

/**
* Returns the first of a list of records that hasn't been replayed before.
Expand Down Expand Up @@ -42,21 +43,15 @@ export function findNextRecordToReplay(
* against different paths).
*/
export function findRecordMatches(
request: HttpRequest,
tapeRecords: TapeRecord[],
requestMethod: string,
requestPath: string,
requestHeaders: Headers,
requestBody: Buffer,
rewriteBeforeDiffRules: RewriteRules,
): TapeRecord[] {
let bestSimilarityScore = +Infinity;
let bestMatches: TapeRecord[] = [];
for (const potentialMatch of tapeRecords) {
const similarityScore = computeSimilarity(
requestMethod,
requestPath,
requestHeaders,
requestBody,
request,
potentialMatch,
rewriteBeforeDiffRules,
);
Expand Down
17 changes: 9 additions & 8 deletions src/persistence.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import brotli from "brotli";
import { gzipSync } from "zlib";
import { brotliCompressSync, gzipSync } from "zlib";
import { persistTape, reviveTape, redactRequestHeaders } from "./persistence";

// Note the repetition. This is necessary otherwise Brotli compression
Expand All @@ -20,13 +19,15 @@ const BINARY_RESPONSE = Buffer.from([
]);

const UTF8_REQUEST_BROTLI = Buffer.from(
brotli.compress(Buffer.from(UTF8_REQUEST, "utf8"))!,
brotliCompressSync(Buffer.from(UTF8_REQUEST, "utf8"))!,
);
const UTF8_RESPONSE_BROTLI = Buffer.from(
brotli.compress(Buffer.from(UTF8_RESPONSE, "utf8"))!,
brotliCompressSync(Buffer.from(UTF8_RESPONSE, "utf8"))!,
);
const BINARY_REQUEST_BROTLI = Buffer.from(brotliCompressSync(BINARY_REQUEST)!);
const BINARY_RESPONSE_BROTLI = Buffer.from(
brotliCompressSync(BINARY_RESPONSE)!,
);
const BINARY_REQUEST_BROTLI = Buffer.from(brotli.compress(BINARY_REQUEST)!);
const BINARY_RESPONSE_BROTLI = Buffer.from(brotli.compress(BINARY_RESPONSE)!);

const UTF8_REQUEST_GZIP = gzipSync(Buffer.from(UTF8_REQUEST, "utf8"));
const UTF8_RESPONSE_GZIP = gzipSync(Buffer.from(UTF8_RESPONSE, "utf8"));
Expand Down Expand Up @@ -221,7 +222,7 @@ describe("Persistence", () => {
},
body: {
encoding: "base64",
data: "GxcAAI6UrMm1WkAERl0HoDFuCn3CIekc",
data: "GxcA+I+UrMm1WkAERl0HoDFuCn3CAZLOAQ==",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The brotli encoding default compression configuration differs between the old library and the new NodeJS builtin implementation. This is why hard-coded expected encoding here differs.

},
},
response: {
Expand All @@ -233,7 +234,7 @@ describe("Persistence", () => {
},
body: {
encoding: "base64",
data: "GxcAAI6UrPmFmgFmOV+HoM3+C33CIe4U",
data: "GxcA+I+UrPmFmgFmOV+HoM3+C33CAeJOAQ==",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The brotli encoding default compression configuration differs between the old library and the new NodeJS builtin implementation. This is why hard-coded expected encoding here differs.

},
},
});
Expand Down
65 changes: 22 additions & 43 deletions src/persistence.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import brotli from "brotli";
import fs from "fs-extra";
import yaml from "js-yaml";
import path from "path";
import { gunzipSync, gzipSync } from "zlib";
import {
CompressionAlgorithm,
Headers,
PersistedBuffer,
PersistedTapeRecord,
TapeRecord,
} from "./tape";
compressBuffer,
convertHttpContentEncodingToCompressionAlgorithm,
} from "./compression";
import {
getHttpBodyDecoded,
getHttpContentEncoding,
HttpRequest,
HttpResponse,
} from "./http";
import { PersistedBuffer, PersistedTapeRecord, TapeRecord } from "./tape";

/**
* Persistence layer to save tapes to disk and read them from disk.
Expand Down Expand Up @@ -94,12 +96,12 @@ export function persistTape(record: TapeRecord): PersistedTapeRecord {
method: record.request.method,
path: record.request.path,
headers: record.request.headers,
body: serialiseBuffer(record.request.body, record.request.headers),
body: serialiseForTape(record.request),
},
response: {
status: record.response.status,
headers: record.response.headers,
body: serialiseBuffer(record.response.body, record.response.headers),
body: serialiseForTape(record.response),
},
};
}
Expand All @@ -120,22 +122,12 @@ export function reviveTape(persistedRecord: PersistedTapeRecord): TapeRecord {
};
}

export function serialiseBuffer(
buffer: Buffer,
headers: Headers,
): PersistedBuffer {
const header = headers["content-encoding"];
const contentEncoding = typeof header === "string" ? header : undefined;
const originalBuffer = buffer;
let compression: CompressionAlgorithm = "none";
if (contentEncoding === "br") {
buffer = Buffer.from(brotli.decompress(buffer));
compression = "br";
}
if (contentEncoding === "gzip") {
buffer = gunzipSync(buffer);
compression = "gzip";
}
Comment on lines -130 to -138
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved this common logic into compression.ts.

function serialiseForTape(r: HttpRequest | HttpResponse): PersistedBuffer {
const buffer = getHttpBodyDecoded(r);
const contentEncoding = getHttpContentEncoding(r);
const compressionAlgorithm =
convertHttpContentEncodingToCompressionAlgorithm(contentEncoding);

const utf8Representation = buffer.toString("utf8");
try {
// Can it be safely stored and recreated in YAML?
Expand All @@ -148,17 +140,18 @@ export function serialiseBuffer(
return {
encoding: "utf8",
data: utf8Representation,
compression,
compression: compressionAlgorithm,
};
}
} catch {
// Fall through.
}

// No luck. Fall back to Base64, persisting the original buffer
// since we might as well store it in its compressed state.
return {
encoding: "base64",
data: originalBuffer.toString("base64"),
data: r.body.toString("base64"),
};
}

Expand All @@ -170,21 +163,7 @@ function unserialiseBuffer(persisted: PersistedBuffer): Buffer {
break;
case "utf8":
buffer = Buffer.from(persisted.data, "utf8");
if (persisted.compression === "br") {
// TODO: Find a workaround for the new compressed message not necessarily
// being identical to what was originally sent (update Content-Length?).
const compressed = brotli.compress(buffer);
if (compressed) {
buffer = Buffer.from(compressed);
} else {
throw new Error(`Brotli compression failed!`);
}
}
if (persisted.compression === "gzip") {
// TODO: Find a workaround for the new compressed message not necessarily
// being identical to what was originally sent (update Content-Length?).
buffer = gzipSync(buffer);
}
Comment on lines -173 to -187
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved this common logic into compression.ts.

buffer = compressBuffer(persisted.compression || "none", buffer);
break;
default:
throw new Error(`Unsupported encoding!`);
Expand Down
17 changes: 3 additions & 14 deletions src/sender.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ import chalk from "chalk";
import http from "http";
import https from "https";
import { ensureBuffer } from "./buffer";
import { Headers, TapeRecord } from "./tape";
import { HttpRequestWithHost } from "./http";
import { TapeRecord } from "./tape";

/**
* Sends a network request and returns the recorded tape.
*/
export async function send(
request: RequestWithHost,
request: HttpRequestWithHost,
options: {
loggingEnabled?: boolean;
timeout?: number;
Expand Down Expand Up @@ -76,15 +77,3 @@ export async function send(
throw e;
}
}

export interface Request {
host?: string;
method: string;
path: string;
headers: Headers;
body: Buffer;
}

export interface RequestWithHost extends Request {
host: string;
}
Loading