From 082c3e1c24be9f5b2947672bdbfaba03c1a15f5a Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 3 May 2026 15:36:19 +0000 Subject: [PATCH] fix(tripwire): native tar.gz extraction in ASN cron Vercel's serverless runtime has no `tar` binary, so spawnSync("tar", ...) was failing the daily ASN refresh. Replace it with an in-memory gunzip + ustar parser. Drops the temp directory and disk I/O. --- src/lib/tripwire/sync-geoip.test.ts | 60 +++++++++++++++++ src/lib/tripwire/sync-geoip.ts | 101 ++++++++++++++++------------ 2 files changed, 118 insertions(+), 43 deletions(-) create mode 100644 src/lib/tripwire/sync-geoip.test.ts diff --git a/src/lib/tripwire/sync-geoip.test.ts b/src/lib/tripwire/sync-geoip.test.ts new file mode 100644 index 0000000..1e263fd --- /dev/null +++ b/src/lib/tripwire/sync-geoip.test.ts @@ -0,0 +1,60 @@ +// src/lib/tripwire/sync-geoip.test.ts +import { describe, test, expect } from "bun:test" +import { gzipSync } from "node:zlib" +import { extractFileFromTarGz } from "./sync-geoip" + +const BLOCK = 512 + +// Build a minimal POSIX ustar tarball containing a single file. +// Mirrors the shape MaxMind ships: "/" inside a tarball. +function makeTarGz(entries: Array<{ name: string; body: Buffer }>): Buffer { + const blocks: Buffer[] = [] + for (const { name, body } of entries) { + const header = Buffer.alloc(BLOCK, 0) + header.write(name, 0, 100, "utf8") + header.write("0000644\0", 100, 8, "ascii") // mode + header.write("0000000\0", 108, 8, "ascii") // uid + header.write("0000000\0", 116, 8, "ascii") // gid + header.write(body.length.toString(8).padStart(11, "0") + "\0", 124, 12, "ascii") + header.write("00000000000\0", 136, 12, "ascii") // mtime + header.write(" ", 148, 8, "ascii") // chksum placeholder (spaces) + header.write("0", 156, 1, "ascii") // typeflag: regular file + header.write("ustar\0", 257, 6, "ascii") + header.write("00", 263, 2, "ascii") + + let sum = 0 + for (let i = 0; i < BLOCK; i++) sum += header[i] + header.write(sum.toString(8).padStart(6, "0") + "\0 ", 148, 8, "ascii") + + blocks.push(header) + const padded = Buffer.alloc(Math.ceil(body.length / BLOCK) * BLOCK, 0) + body.copy(padded) + blocks.push(padded) + } + blocks.push(Buffer.alloc(BLOCK, 0)) + blocks.push(Buffer.alloc(BLOCK, 0)) + return gzipSync(Buffer.concat(blocks)) +} + +describe("extractFileFromTarGz", () => { + test("finds a file by basename inside a directory", () => { + const body = Buffer.from("fake mmdb payload, not actually a database") + const tar = makeTarGz([ + { name: "GeoLite2-ASN_20260503/COPYRIGHT.txt", body: Buffer.from("c") }, + { name: "GeoLite2-ASN_20260503/GeoLite2-ASN.mmdb", body }, + ]) + const out = extractFileFromTarGz(tar, "GeoLite2-ASN.mmdb") + expect(out.equals(body)).toBe(true) + }) + + test("handles bodies that are not block-aligned", () => { + const body = Buffer.alloc(1000, 7) // 1000 bytes, spans 2 blocks + const tar = makeTarGz([{ name: "dir/file.bin", body }]) + expect(extractFileFromTarGz(tar, "file.bin").equals(body)).toBe(true) + }) + + test("throws when the file is missing", () => { + const tar = makeTarGz([{ name: "dir/other.txt", body: Buffer.from("x") }]) + expect(() => extractFileFromTarGz(tar, "missing.mmdb")).toThrow(/not found/) + }) +}) diff --git a/src/lib/tripwire/sync-geoip.ts b/src/lib/tripwire/sync-geoip.ts index b187727..e20c0dc 100644 --- a/src/lib/tripwire/sync-geoip.ts +++ b/src/lib/tripwire/sync-geoip.ts @@ -5,17 +5,17 @@ // Refreshing the db never requires a redeploy. // // Pure library: no console.log, no process.exit. Both the CLI script -// and the cron route call this. +// and the cron route call this. Runs on Vercel's serverless runtime, +// so the tarball is decompressed and parsed in memory: no shelling out +// to tar, no temp files. import { put } from "@vercel/blob" -import { spawnSync } from "node:child_process" -import { mkdtemp, readdir, readFile, rm, stat, writeFile } from "node:fs/promises" -import { join } from "node:path" -import { tmpdir } from "node:os" +import { gunzipSync } from "node:zlib" const DOWNLOAD_URL = "https://download.maxmind.com/geoip/databases/GeoLite2-ASN/download?suffix=tar.gz" export const ASN_BLOB_KEY = "geoip/GeoLite2-ASN.mmdb" +const MMDB_NAME = "GeoLite2-ASN.mmdb" export interface SyncGeoipResult { tarballBytes: number @@ -40,26 +40,50 @@ async function downloadTarball( return Buffer.from(await res.arrayBuffer()) } -// Tarball structure: GeoLite2-ASN_/GeoLite2-ASN.mmdb. Extract -// to a fresh tmp dir and walk one directory deep to find the .mmdb. -async function extractMmdb(tarballPath: string, extractDir: string): Promise { - const result = spawnSync("tar", ["-xzf", tarballPath, "-C", extractDir], { - encoding: "utf8", - }) - if (result.status !== 0) { - throw new Error(`tar extraction failed (status=${result.status}): ${result.stderr}`) - } - const entries = await readdir(extractDir) - for (const entry of entries) { - const candidate = join(extractDir, entry, "GeoLite2-ASN.mmdb") - try { - await stat(candidate) - return candidate - } catch { - // keep looking +// POSIX ustar header. We only need name, size, typeflag, prefix. +// https://www.gnu.org/software/tar/manual/html_node/Standard.html +const BLOCK = 512 + +function readCString(buf: Buffer, offset: number, len: number): string { + let end = offset + const max = offset + len + while (end < max && buf[end] !== 0) end++ + return buf.toString("utf8", offset, end) +} + +function readOctal(buf: Buffer, offset: number, len: number): number { + let start = offset + let end = offset + len + while (end > start && (buf[end - 1] === 0 || buf[end - 1] === 0x20)) end-- + while (start < end && buf[start] === 0x20) start++ + if (start === end) return 0 + return parseInt(buf.toString("ascii", start, end), 8) +} + +// Find a file in a gzipped tar by basename. Throws if absent. +export function extractFileFromTarGz(tarball: Buffer, basename: string): Buffer { + const data = gunzipSync(tarball) + let offset = 0 + while (offset + BLOCK <= data.length) { + // End-of-archive marker is a zero block. The name field is at the + // start of the header, so a zero first byte means no more entries. + if (data[offset] === 0) break + + const name = readCString(data, offset, 100) + const size = readOctal(data, offset + 124, 12) + const typeflag = data[offset + 156] + const prefix = readCString(data, offset + 345, 155) + const fullName = prefix ? `${prefix}/${name}` : name + // typeflag '0' (0x30) or NUL = regular file + const isFile = typeflag === 0 || typeflag === 0x30 + + offset += BLOCK + if (isFile && (fullName === basename || fullName.endsWith(`/${basename}`))) { + return Buffer.from(data.subarray(offset, offset + size)) } + offset += Math.ceil(size / BLOCK) * BLOCK } - throw new Error("GeoLite2-ASN.mmdb not found in extracted tarball") + throw new Error(`${basename} not found in tarball`) } export async function syncGeoipToBlob(): Promise { @@ -70,27 +94,18 @@ export async function syncGeoipToBlob(): Promise { } const tarball = await downloadTarball(accountId, licenseKey) + const mmdb = extractFileFromTarGz(tarball, MMDB_NAME) - const tmp = await mkdtemp(join(tmpdir(), "geolite2-asn-")) - try { - const tarballPath = join(tmp, "GeoLite2-ASN.tar.gz") - await writeFile(tarballPath, tarball) - const mmdbPath = await extractMmdb(tarballPath, tmp) - const mmdbBytes = await readFile(mmdbPath) - - await put(ASN_BLOB_KEY, mmdbBytes, { - access: "private", - contentType: "application/octet-stream", - addRandomSuffix: false, - allowOverwrite: true, - }) + await put(ASN_BLOB_KEY, mmdb, { + access: "private", + contentType: "application/octet-stream", + addRandomSuffix: false, + allowOverwrite: true, + }) - return { - tarballBytes: tarball.length, - mmdbBytes: mmdbBytes.length, - blobKey: ASN_BLOB_KEY, - } - } finally { - await rm(tmp, { recursive: true, force: true }) + return { + tarballBytes: tarball.length, + mmdbBytes: mmdb.length, + blobKey: ASN_BLOB_KEY, } }