Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions src/lib/tripwire/sync-geoip.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// src/lib/tripwire/sync-geoip.test.ts
import { describe, test, expect } from "bun:test"
import { gzipSync } from "node:zlib"
import { extractFileFromTarGz } from "./sync-geoip"

const BLOCK = 512

// Build a minimal POSIX ustar tarball containing a single file.
// Mirrors the shape MaxMind ships: "<dir>/<basename>" inside a tarball.
function makeTarGz(entries: Array<{ name: string; body: Buffer }>): Buffer {
const blocks: Buffer[] = []
for (const { name, body } of entries) {
const header = Buffer.alloc(BLOCK, 0)
header.write(name, 0, 100, "utf8")
header.write("0000644\0", 100, 8, "ascii") // mode
header.write("0000000\0", 108, 8, "ascii") // uid
header.write("0000000\0", 116, 8, "ascii") // gid
header.write(body.length.toString(8).padStart(11, "0") + "\0", 124, 12, "ascii")
header.write("00000000000\0", 136, 12, "ascii") // mtime
header.write(" ", 148, 8, "ascii") // chksum placeholder (spaces)
header.write("0", 156, 1, "ascii") // typeflag: regular file
header.write("ustar\0", 257, 6, "ascii")
header.write("00", 263, 2, "ascii")

let sum = 0
for (let i = 0; i < BLOCK; i++) sum += header[i]
header.write(sum.toString(8).padStart(6, "0") + "\0 ", 148, 8, "ascii")

blocks.push(header)
const padded = Buffer.alloc(Math.ceil(body.length / BLOCK) * BLOCK, 0)
body.copy(padded)
blocks.push(padded)
}
blocks.push(Buffer.alloc(BLOCK, 0))
blocks.push(Buffer.alloc(BLOCK, 0))
return gzipSync(Buffer.concat(blocks))
}

describe("extractFileFromTarGz", () => {
test("finds a file by basename inside a directory", () => {
const body = Buffer.from("fake mmdb payload, not actually a database")
const tar = makeTarGz([
{ name: "GeoLite2-ASN_20260503/COPYRIGHT.txt", body: Buffer.from("c") },
{ name: "GeoLite2-ASN_20260503/GeoLite2-ASN.mmdb", body },
])
const out = extractFileFromTarGz(tar, "GeoLite2-ASN.mmdb")
expect(out.equals(body)).toBe(true)
})

test("handles bodies that are not block-aligned", () => {
const body = Buffer.alloc(1000, 7) // 1000 bytes, spans 2 blocks
const tar = makeTarGz([{ name: "dir/file.bin", body }])
expect(extractFileFromTarGz(tar, "file.bin").equals(body)).toBe(true)
})

test("throws when the file is missing", () => {
const tar = makeTarGz([{ name: "dir/other.txt", body: Buffer.from("x") }])
expect(() => extractFileFromTarGz(tar, "missing.mmdb")).toThrow(/not found/)
})
})
101 changes: 58 additions & 43 deletions src/lib/tripwire/sync-geoip.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@
// Refreshing the db never requires a redeploy.
//
// Pure library: no console.log, no process.exit. Both the CLI script
// and the cron route call this.
// and the cron route call this. Runs on Vercel's serverless runtime,
// so the tarball is decompressed and parsed in memory: no shelling out
// to tar, no temp files.

import { put } from "@vercel/blob"
import { spawnSync } from "node:child_process"
import { mkdtemp, readdir, readFile, rm, stat, writeFile } from "node:fs/promises"
import { join } from "node:path"
import { tmpdir } from "node:os"
import { gunzipSync } from "node:zlib"

const DOWNLOAD_URL =
"https://download.maxmind.com/geoip/databases/GeoLite2-ASN/download?suffix=tar.gz"
export const ASN_BLOB_KEY = "geoip/GeoLite2-ASN.mmdb"
const MMDB_NAME = "GeoLite2-ASN.mmdb"

export interface SyncGeoipResult {
tarballBytes: number
Expand All @@ -40,26 +40,50 @@ async function downloadTarball(
return Buffer.from(await res.arrayBuffer())
}

// Tarball structure: GeoLite2-ASN_<YYYYMMDD>/GeoLite2-ASN.mmdb. Extract
// to a fresh tmp dir and walk one directory deep to find the .mmdb.
async function extractMmdb(tarballPath: string, extractDir: string): Promise<string> {
const result = spawnSync("tar", ["-xzf", tarballPath, "-C", extractDir], {
encoding: "utf8",
})
if (result.status !== 0) {
throw new Error(`tar extraction failed (status=${result.status}): ${result.stderr}`)
}
const entries = await readdir(extractDir)
for (const entry of entries) {
const candidate = join(extractDir, entry, "GeoLite2-ASN.mmdb")
try {
await stat(candidate)
return candidate
} catch {
// keep looking
// POSIX ustar header. We only need name, size, typeflag, prefix.
// https://www.gnu.org/software/tar/manual/html_node/Standard.html
const BLOCK = 512

function readCString(buf: Buffer, offset: number, len: number): string {
let end = offset
const max = offset + len
while (end < max && buf[end] !== 0) end++
return buf.toString("utf8", offset, end)
}

function readOctal(buf: Buffer, offset: number, len: number): number {
let start = offset
let end = offset + len
while (end > start && (buf[end - 1] === 0 || buf[end - 1] === 0x20)) end--
while (start < end && buf[start] === 0x20) start++
if (start === end) return 0
return parseInt(buf.toString("ascii", start, end), 8)
}

// Find a file in a gzipped tar by basename. Throws if absent.
export function extractFileFromTarGz(tarball: Buffer, basename: string): Buffer {
const data = gunzipSync(tarball)
let offset = 0
while (offset + BLOCK <= data.length) {
// End-of-archive marker is a zero block. The name field is at the
// start of the header, so a zero first byte means no more entries.
if (data[offset] === 0) break

const name = readCString(data, offset, 100)
const size = readOctal(data, offset + 124, 12)
const typeflag = data[offset + 156]
const prefix = readCString(data, offset + 345, 155)
const fullName = prefix ? `${prefix}/${name}` : name
// typeflag '0' (0x30) or NUL = regular file
const isFile = typeflag === 0 || typeflag === 0x30

offset += BLOCK
if (isFile && (fullName === basename || fullName.endsWith(`/${basename}`))) {
return Buffer.from(data.subarray(offset, offset + size))
}
offset += Math.ceil(size / BLOCK) * BLOCK
}
throw new Error("GeoLite2-ASN.mmdb not found in extracted tarball")
throw new Error(`${basename} not found in tarball`)
}

export async function syncGeoipToBlob(): Promise<SyncGeoipResult> {
Expand All @@ -70,27 +94,18 @@ export async function syncGeoipToBlob(): Promise<SyncGeoipResult> {
}

const tarball = await downloadTarball(accountId, licenseKey)
const mmdb = extractFileFromTarGz(tarball, MMDB_NAME)

const tmp = await mkdtemp(join(tmpdir(), "geolite2-asn-"))
try {
const tarballPath = join(tmp, "GeoLite2-ASN.tar.gz")
await writeFile(tarballPath, tarball)
const mmdbPath = await extractMmdb(tarballPath, tmp)
const mmdbBytes = await readFile(mmdbPath)

await put(ASN_BLOB_KEY, mmdbBytes, {
access: "private",
contentType: "application/octet-stream",
addRandomSuffix: false,
allowOverwrite: true,
})
await put(ASN_BLOB_KEY, mmdb, {
access: "private",
contentType: "application/octet-stream",
addRandomSuffix: false,
allowOverwrite: true,
})

return {
tarballBytes: tarball.length,
mmdbBytes: mmdbBytes.length,
blobKey: ASN_BLOB_KEY,
}
} finally {
await rm(tmp, { recursive: true, force: true })
return {
tarballBytes: tarball.length,
mmdbBytes: mmdb.length,
blobKey: ASN_BLOB_KEY,
}
}
Loading