From f07fcfd64e9264d4765e915f26855822d31fdf8a Mon Sep 17 00:00:00 2001 From: mevBlaze Date: Fri, 29 May 2026 15:42:12 +0530 Subject: [PATCH] feat(packages): publish live @dotprotocol/* source (compression, qr, wrapper, arena, relay, identity, sdk) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes the source of the LIVE published @dotprotocol/* npm packages public and reproducible in the dot-protocol org. Additive only — no live package name, semver, or API is changed. - Import 7 dist-shipped packages from the private source (dot-engine-week4 cut): compression, qr, wrapper, arena, relay, identity, sdk. Source version 0.3.0; published live at 1.0.0 (version bump, no API change). - Add per-package LICENSE matching the published license field (MIT for the 7; core/chain/mesh/cli/lang remain Apache-2.0 per their published metadata). - Add `repository`/`homepage`/`bugs` fields to all live-source packages. - Add packages/PROVENANCE.md documenting the live-npm → source map, the @dotprotocol (no-hyphen, published) vs @dot-protocol (hyphen, repo) scope relationship, the build/reproduce steps, and the acceptance-test results. Reproducibility (npm pack source vs live tarball): - core/chain/mesh: file lists IDENTICAL, src byte-identical (only package.json name scope differs: @dot-protocol/ here vs @dotprotocol/ published). - identity/qr/arena/sdk: built dist export surface matches live exactly (dist filenames carry tsdown content-hash suffixes -> functionally identical). No secrets in any imported source (scanned). No npm publish performed. Co-Authored-By: Claude Opus 4.8 --- packages/PROVENANCE.md | 65 ++ packages/arena/LICENSE | 21 + packages/arena/README.md | 116 +++ packages/arena/package.json | 51 ++ packages/arena/src/elo.ts | 54 ++ packages/arena/src/index.ts | 36 + packages/arena/src/resolution.ts | 95 +++ packages/arena/src/tests/elo.test.ts | 105 +++ packages/arena/src/tests/resolution.test.ts | 138 +++ packages/arena/src/types.ts | 54 ++ packages/arena/tsconfig.json | 8 + packages/chain/LICENSE | 189 ++++ packages/chain/package.json | 11 +- packages/cli/LICENSE | 189 ++++ packages/cli/package.json | 13 +- packages/compression/.npmignore | 4 + packages/compression/LICENSE | 21 + packages/compression/README.md | 81 ++ packages/compression/package.json | 44 + .../compression/scripts/train-dictionary.ts | 173 ++++ packages/compression/src/batch-v2.ts | 805 ++++++++++++++++++ .../compression/src/dictionary-registry.ts | 135 +++ packages/compression/src/index.ts | 11 + packages/compression/src/predictor.ts | 217 +++++ packages/compression/src/rans.ts | 251 ++++++ packages/compression/src/rle.ts | 108 +++ packages/compression/src/sample-generator.ts | 153 ++++ .../src/tests/batch-v2-dict.test.ts | 327 +++++++ .../src/tests/batch-v2-predict.test.ts | 333 ++++++++ .../compression/src/tests/batch-v2.test.ts | 279 ++++++ .../src/tests/benchmark-full.test.ts | 390 +++++++++ .../src/tests/benchmark-phase2a.test.ts | 264 ++++++ .../src/tests/benchmark-phase2b.test.ts | 644 ++++++++++++++ .../src/tests/coverage-gaps.test.ts | 655 ++++++++++++++ .../src/tests/dictionary-registry.test.ts | 178 ++++ .../compression/src/tests/predictor.test.ts | 300 +++++++ packages/compression/src/tests/rans.test.ts | 110 +++ packages/compression/src/tests/rle.test.ts | 173 ++++ .../src/tests/sample-generator.test.ts | 139 +++ .../src/tests/timestamp-delta.test.ts | 139 +++ packages/compression/src/tests/varint.test.ts | 228 +++++ .../compression/src/tests/weissman.test.ts | 66 ++ packages/compression/src/tests/zstd.test.ts | 180 ++++ packages/compression/src/timestamp-delta.ts | 108 +++ packages/compression/src/varint.ts | 133 +++ packages/compression/src/weissman.ts | 100 +++ packages/compression/src/zstd.ts | 147 ++++ packages/compression/tsconfig.json | 9 + packages/compression/tsdown.config.ts | 11 + packages/compression/vitest.config.ts | 17 + packages/core/LICENSE | 189 ++++ packages/core/package.json | 11 +- packages/identity/.npmignore | 4 + packages/identity/LICENSE | 21 + packages/identity/README.md | 88 ++ packages/identity/package.json | 41 + packages/identity/src/identity.ts | 63 ++ packages/identity/src/index.ts | 2 + packages/identity/src/tests/identity.test.ts | 62 ++ packages/identity/tsconfig.json | 8 + packages/identity/tsdown.config.ts | 11 + packages/lang/LICENSE | 189 ++++ packages/lang/package.json | 11 +- packages/mesh/LICENSE | 189 ++++ packages/mesh/package.json | 11 +- packages/qr/LICENSE | 21 + packages/qr/README.md | 118 +++ packages/qr/package.json | 49 ++ packages/qr/src/encode.ts | 196 +++++ packages/qr/src/index.ts | 31 + packages/qr/src/tests/encode.test.ts | 121 +++ packages/qr/src/types.ts | 57 ++ packages/qr/src/verify.ts | 50 ++ packages/qr/tsconfig.json | 8 + packages/relay/.npmignore | 4 + packages/relay/LICENSE | 21 + packages/relay/README.md | 121 +++ packages/relay/package.json | 53 ++ packages/relay/src/client.ts | 156 ++++ packages/relay/src/index.ts | 4 + packages/relay/src/server.ts | 103 +++ packages/relay/src/tests/client.test.ts | 351 ++++++++ packages/relay/src/tests/frame.test.ts | 63 ++ packages/relay/src/tests/server.test.ts | 391 +++++++++ packages/relay/src/types.ts | 63 ++ packages/relay/tsconfig.json | 8 + packages/relay/tsdown.config.ts | 11 + packages/sdk/.npmignore | 4 + packages/sdk/LICENSE | 21 + packages/sdk/README.md | 82 ++ packages/sdk/package.json | 57 ++ packages/sdk/src/index.ts | 110 +++ packages/sdk/src/tests/sdk.test.ts | 87 ++ packages/sdk/tsconfig.json | 9 + packages/sdk/tsdown.config.ts | 11 + packages/sdk/vitest.config.ts | 18 + packages/wrapper/.npmignore | 4 + packages/wrapper/LICENSE | 21 + packages/wrapper/README.md | 100 +++ packages/wrapper/package.json | 51 ++ .../wrapper/scripts/milestone-claude-api.ts | 361 ++++++++ packages/wrapper/src/bridge.ts | 382 +++++++++ packages/wrapper/src/identity.ts | 264 ++++++ packages/wrapper/src/index.ts | 19 + packages/wrapper/src/session.ts | 44 + packages/wrapper/src/tests/bridge.test.ts | 207 +++++ .../wrapper/src/tests/coverage-gaps.test.ts | 546 ++++++++++++ packages/wrapper/src/tests/identity.test.ts | 156 ++++ packages/wrapper/src/tests/wrap.test.ts | 227 +++++ packages/wrapper/src/types.ts | 138 +++ packages/wrapper/src/unwrap.ts | 285 +++++++ packages/wrapper/src/wrap.ts | 109 +++ packages/wrapper/tsconfig.json | 9 + packages/wrapper/tsdown.config.ts | 11 + packages/wrapper/vitest.config.ts | 18 + 115 files changed, 14323 insertions(+), 6 deletions(-) create mode 100644 packages/PROVENANCE.md create mode 100644 packages/arena/LICENSE create mode 100644 packages/arena/README.md create mode 100644 packages/arena/package.json create mode 100644 packages/arena/src/elo.ts create mode 100644 packages/arena/src/index.ts create mode 100644 packages/arena/src/resolution.ts create mode 100644 packages/arena/src/tests/elo.test.ts create mode 100644 packages/arena/src/tests/resolution.test.ts create mode 100644 packages/arena/src/types.ts create mode 100644 packages/arena/tsconfig.json create mode 100644 packages/chain/LICENSE create mode 100644 packages/cli/LICENSE create mode 100644 packages/compression/.npmignore create mode 100644 packages/compression/LICENSE create mode 100644 packages/compression/README.md create mode 100644 packages/compression/package.json create mode 100644 packages/compression/scripts/train-dictionary.ts create mode 100644 packages/compression/src/batch-v2.ts create mode 100644 packages/compression/src/dictionary-registry.ts create mode 100644 packages/compression/src/index.ts create mode 100644 packages/compression/src/predictor.ts create mode 100644 packages/compression/src/rans.ts create mode 100644 packages/compression/src/rle.ts create mode 100644 packages/compression/src/sample-generator.ts create mode 100644 packages/compression/src/tests/batch-v2-dict.test.ts create mode 100644 packages/compression/src/tests/batch-v2-predict.test.ts create mode 100644 packages/compression/src/tests/batch-v2.test.ts create mode 100644 packages/compression/src/tests/benchmark-full.test.ts create mode 100644 packages/compression/src/tests/benchmark-phase2a.test.ts create mode 100644 packages/compression/src/tests/benchmark-phase2b.test.ts create mode 100644 packages/compression/src/tests/coverage-gaps.test.ts create mode 100644 packages/compression/src/tests/dictionary-registry.test.ts create mode 100644 packages/compression/src/tests/predictor.test.ts create mode 100644 packages/compression/src/tests/rans.test.ts create mode 100644 packages/compression/src/tests/rle.test.ts create mode 100644 packages/compression/src/tests/sample-generator.test.ts create mode 100644 packages/compression/src/tests/timestamp-delta.test.ts create mode 100644 packages/compression/src/tests/varint.test.ts create mode 100644 packages/compression/src/tests/weissman.test.ts create mode 100644 packages/compression/src/tests/zstd.test.ts create mode 100644 packages/compression/src/timestamp-delta.ts create mode 100644 packages/compression/src/varint.ts create mode 100644 packages/compression/src/weissman.ts create mode 100644 packages/compression/src/zstd.ts create mode 100644 packages/compression/tsconfig.json create mode 100644 packages/compression/tsdown.config.ts create mode 100644 packages/compression/vitest.config.ts create mode 100644 packages/core/LICENSE create mode 100644 packages/identity/.npmignore create mode 100644 packages/identity/LICENSE create mode 100644 packages/identity/README.md create mode 100644 packages/identity/package.json create mode 100644 packages/identity/src/identity.ts create mode 100644 packages/identity/src/index.ts create mode 100644 packages/identity/src/tests/identity.test.ts create mode 100644 packages/identity/tsconfig.json create mode 100644 packages/identity/tsdown.config.ts create mode 100644 packages/lang/LICENSE create mode 100644 packages/mesh/LICENSE create mode 100644 packages/qr/LICENSE create mode 100644 packages/qr/README.md create mode 100644 packages/qr/package.json create mode 100644 packages/qr/src/encode.ts create mode 100644 packages/qr/src/index.ts create mode 100644 packages/qr/src/tests/encode.test.ts create mode 100644 packages/qr/src/types.ts create mode 100644 packages/qr/src/verify.ts create mode 100644 packages/qr/tsconfig.json create mode 100644 packages/relay/.npmignore create mode 100644 packages/relay/LICENSE create mode 100644 packages/relay/README.md create mode 100644 packages/relay/package.json create mode 100644 packages/relay/src/client.ts create mode 100644 packages/relay/src/index.ts create mode 100644 packages/relay/src/server.ts create mode 100644 packages/relay/src/tests/client.test.ts create mode 100644 packages/relay/src/tests/frame.test.ts create mode 100644 packages/relay/src/tests/server.test.ts create mode 100644 packages/relay/src/types.ts create mode 100644 packages/relay/tsconfig.json create mode 100644 packages/relay/tsdown.config.ts create mode 100644 packages/sdk/.npmignore create mode 100644 packages/sdk/LICENSE create mode 100644 packages/sdk/README.md create mode 100644 packages/sdk/package.json create mode 100644 packages/sdk/src/index.ts create mode 100644 packages/sdk/src/tests/sdk.test.ts create mode 100644 packages/sdk/tsconfig.json create mode 100644 packages/sdk/tsdown.config.ts create mode 100644 packages/sdk/vitest.config.ts create mode 100644 packages/wrapper/.npmignore create mode 100644 packages/wrapper/LICENSE create mode 100644 packages/wrapper/README.md create mode 100644 packages/wrapper/package.json create mode 100644 packages/wrapper/scripts/milestone-claude-api.ts create mode 100644 packages/wrapper/src/bridge.ts create mode 100644 packages/wrapper/src/identity.ts create mode 100644 packages/wrapper/src/index.ts create mode 100644 packages/wrapper/src/session.ts create mode 100644 packages/wrapper/src/tests/bridge.test.ts create mode 100644 packages/wrapper/src/tests/coverage-gaps.test.ts create mode 100644 packages/wrapper/src/tests/identity.test.ts create mode 100644 packages/wrapper/src/tests/wrap.test.ts create mode 100644 packages/wrapper/src/types.ts create mode 100644 packages/wrapper/src/unwrap.ts create mode 100644 packages/wrapper/src/wrap.ts create mode 100644 packages/wrapper/tsconfig.json create mode 100644 packages/wrapper/tsdown.config.ts create mode 100644 packages/wrapper/vitest.config.ts diff --git a/packages/PROVENANCE.md b/packages/PROVENANCE.md new file mode 100644 index 000000000..0cf09e6a7 --- /dev/null +++ b/packages/PROVENANCE.md @@ -0,0 +1,65 @@ +# Live npm provenance — `@dotprotocol/*` packages + +This document records where the **live published** npm packages come from, so anyone +can reproduce them. Resolved 2026-05-29 (Room "Mark III" reproducibility gate). + +## The two scopes + +There are two npm scopes, one hyphen apart: + +- **`@dotprotocol/*`** (NO hyphen) — the **live, published, in-use** packages on npm. +- **`@dot-protocol/*`** (hyphen) — the namespace used inside this repo's source and in + the (unpublished) granular alpha packages. + +The live published packages keep their published **names, versions, and APIs** unchanged. +This repo is the public, reproducible **source** for them. + +## Source map (live `@dotprotocol/*` → source in this repo) + +| Live npm package (v1.0.0) | How it ships | Source | +|---|---|---| +| `@dotprotocol/core` | raw `src/index.ts` | `packages/core` (named `@dot-protocol/core` here; published under no-hyphen scope) | +| `@dotprotocol/chain` | raw `src/` | `packages/chain` | +| `@dotprotocol/mesh` | raw `src/` | `packages/mesh` | +| `@dotprotocol/cli` | — | `packages/cli` | +| `@dotprotocol/lang` | — | `packages/lang` | +| `@dotprotocol/compression` | tsdown `dist/` | `packages/compression` | +| `@dotprotocol/qr` | tsdown `dist/` | `packages/qr` | +| `@dotprotocol/wrapper`| tsdown `dist/` | `packages/wrapper` | +| `@dotprotocol/arena` | tsdown `dist/` | `packages/arena` | +| `@dotprotocol/relay` | tsdown `dist/` | `packages/relay` | +| `@dotprotocol/identity`| tsdown `dist/` | `packages/identity` | +| `@dotprotocol/sdk` | tsdown `dist/` | `packages/sdk` | + +The 7 `dist`-shipped packages were originally cut at source version `0.3.0` and published +to npm at `1.0.0` (a version bump, no API change). Their `src/` is committed here. + +## Build / reproduce + +`core`, `chain`, `mesh`, `cli`, `lang` publish their `src/` directly (the package `main` +points at `src/index.ts`), so they are reproduced by `npm pack` of the package directory. + +The 7 `dist`-shipped packages build with `tsdown`: + +```bash +pnpm install +cd packages/ && pnpm exec tsdown # emits dist/ +npm pack # produces the publishable tarball +``` + +Note: the 7 packages declare `workspace:*` deps on `@dotprotocol/core` etc. When built +standalone for publish, those resolve against the published `@dotprotocol/*` packages on +the npm registry. Within this repo, the local core/chain are named `@dot-protocol/*` +(hyphen) — this scope difference is intentional and preserved to avoid renaming live +packages. + +## Acceptance test results (2026-05-29) + +`npm pack` of source vs the live npm tarball, per package: + +- `core`, `chain`, `mesh`: **file lists IDENTICAL**, `src/` contents byte-identical. + Only difference is the package.json `name` (`@dot-protocol/` here vs `@dotprotocol/` + published) — the publish step renames the scope. +- `identity`, `qr`, `arena`, `sdk`: built `dist/` **export surface matches live exactly** + (same exported symbols). `dist` filenames carry tsdown content-hash suffixes which vary + with toolchain version, so they are functionally — not byte — identical. diff --git a/packages/arena/LICENSE b/packages/arena/LICENSE new file mode 100644 index 000000000..d90201772 --- /dev/null +++ b/packages/arena/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 DOT Protocol contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/arena/README.md b/packages/arena/README.md new file mode 100644 index 000000000..79b938e3b --- /dev/null +++ b/packages/arena/README.md @@ -0,0 +1,116 @@ +# @dotprotocol/arena + +Elo engine + blind prediction evaluation for DOT Protocol. + +[![npm](https://img.shields.io/npm/v/@dotprotocol/arena)](https://www.npmjs.com/package/@dotprotocol/arena) + +## Install + +```bash +npm install @dotprotocol/arena +``` + +## Quick start + +```js +import { resolveSession, rankLeaderboard } from '@dotprotocol/arena'; + +// Resolve predictions against oracle outcome +const { matches, ratings } = await resolveSession(session, resolutionDOT); + +// Rank by Elo +const board = rankLeaderboard('prediction', entries); +``` + +## How blind evaluation works + +1. **Predictors submit** DOTs with sealed answers (commitment scheme) +2. **Hash is published** before the oracle resolves — no retroactive changes +3. **Oracle resolves** by emitting a resolution DOT +4. **Session resolves** — correct predictions win Elo, incorrect ones lose it +5. **Chain proves** the sequence — oracle cannot have seen predictions before resolving + +```js +import { hashPredictionDOT, resolveSession, verifyPrediction } from '@dotprotocol/arena'; + +// --- Predictor --- +const predDOT = await createDOT({ keypair, payload: myAnswer }); +const commitment = hashPredictionDOT(predDOT); // publish this hash + +// --- Oracle --- +const resDOT = await createDOT({ keypair: oracleKeypair, payload: trueOutcome }); + +// --- Resolution --- +const session = { predictions: [predDOT], commitments: [commitment], domain: 'crypto' }; +const { matches, ratings } = await resolveSession(session, resDOT); +``` + +## API + +### `resolveSession(session, resolutionDOT)` + +Evaluate all predictions in a session against the oracle's resolution. + +```js +const { matches, ratings } = await resolveSession({ + predictions: predictionDOTs, // DOT[] — sealed predictions + commitments: commitmentHashes, // Uint8Array[] — published hashes + domain: 'crypto', // string — Elo domain + initialRatings: new Map(), // optional — Map +}, resolutionDOT); + +// matches: [{ predictor: Uint8Array, correct: boolean, eloDelta: number }] +// ratings: Map — updated Elo ratings +``` + +### `verifyPrediction(predictionDOT, commitment)` + +Verify that a prediction DOT matches its published commitment. + +```js +const ok = verifyPrediction(predictionDOT, commitment); // boolean +``` + +### `hashPredictionDOT(dot)` + +Compute the commitment hash for a prediction DOT. + +```js +const hash = hashPredictionDOT(dot); // Uint8Array(32) +``` + +### `rankLeaderboard(domain, entries)` + +Sort leaderboard entries by Elo descending. + +```js +const board = rankLeaderboard('crypto', [ + { pubkey: aliceKey, elo: 1650 }, + { pubkey: bobKey, elo: 1720 }, +]); +// [{ pubkey: bobKey, elo: 1720, rank: 1 }, ...] +``` + +### Elo utilities + +```js +import { updateElo, computeEloFromMatches, computeEloPercentile } from '@dotprotocol/arena'; + +// Single match +const { newRating, delta } = updateElo({ + rating: 1500, + opponentRating: 1600, + outcome: 'win', // 'win' | 'draw' | 'loss' + K: 32, +}); + +// Bulk from history +const rating = computeEloFromMatches(ELO_DEFAULT, matchHistory); + +// Where does this rating fall? +const pct = computeEloPercentile(1720, allRatings); // 0.87 = top 13% +``` + +## License + +MIT diff --git a/packages/arena/package.json b/packages/arena/package.json new file mode 100644 index 000000000..4fe1e227f --- /dev/null +++ b/packages/arena/package.json @@ -0,0 +1,51 @@ +{ + "name": "@dotprotocol/arena", + "version": "0.3.0", + "description": "DOT Protocol Arena \u2014 Elo engine, blind evaluation, prediction resolution. DOT spreads like life, not like messages.", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "require": "./dist/index.cjs", + "types": "./dist/index.d.ts" + } + }, + "sideEffects": false, + "scripts": { + "build": "tsdown", + "test": "vitest run", + "typecheck": "tsc --noEmit" + }, + "keywords": [ + "dot-protocol", + "elo", + "prediction", + "blind-evaluation", + "arena" + ], + "license": "MIT", + "files": [ + "dist", + "README.md", + "LICENSE" + ], + "devDependencies": { + "@types/node": "^20.0.0" + }, + "dependencies": { + "@dotprotocol/core": "workspace:*", + "@dotprotocol/chain": "workspace:*" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/dot-protocol/dot.git", + "directory": "packages/arena" + }, + "homepage": "https://github.com/dot-protocol/dot/tree/main/packages/arena#readme", + "bugs": { + "url": "https://github.com/dot-protocol/dot/issues" + } +} diff --git a/packages/arena/src/elo.ts b/packages/arena/src/elo.ts new file mode 100644 index 000000000..808ddda14 --- /dev/null +++ b/packages/arena/src/elo.ts @@ -0,0 +1,54 @@ +/** + * DOT Protocol v0.3.0 — Arena Elo Engine + * + * Per-domain Elo ratings computed from prediction/resolution DOT pairs. + * Re-exports and extends the @dotprotocol/chain scoring utilities. + */ + +import { updateElo, applyEloUpdates, ELO_DEFAULT } from '@dotprotocol/chain'; +import type { EloUpdate } from '@dotprotocol/chain'; +import type { ArenaMatch } from './types.js'; + +export { updateElo, applyEloUpdates, ELO_DEFAULT }; +export type { EloUpdate }; + +/** + * Compute Elo deltas for a batch of arena matches. + * Returns a map of domain → new Elo rating. + */ +export function computeEloFromMatches( + existingElo: Map, + matches: ArenaMatch[] +): Map { + const updates: EloUpdate[] = matches.map((m) => ({ + domain: m.prediction.domain, + correct: m.correct, + })); + return applyEloUpdates(existingElo, updates); +} + +/** + * Compute Elo percentile of a given rating against a population. + * Returns 0–1 (1 = top of population). + */ +export function computeEloPercentile(rating: number, population: number[]): number { + if (population.length === 0) return 0.5; + const below = population.filter((r) => r < rating).length; + return below / population.length; +} + +/** + * Produce a ranked leaderboard from a map of pubkey → Elo scores. + */ +export function rankLeaderboard( + domain: string, + entries: Array<{ pubkey: string; elo: number; totalPredictions: number; correctPredictions: number }> +): import('./types.js').LeaderboardEntry[] { + const sorted = [...entries].sort((a, b) => b.elo - a.elo); + return sorted.map((e, i) => ({ + ...e, + domain, + rank: i + 1, + accuracy: e.totalPredictions > 0 ? e.correctPredictions / e.totalPredictions : 0, + })); +} diff --git a/packages/arena/src/index.ts b/packages/arena/src/index.ts new file mode 100644 index 000000000..b4bcb0c9f --- /dev/null +++ b/packages/arena/src/index.ts @@ -0,0 +1,36 @@ +/** + * @dotprotocol/arena — v0.3.0 + * + * Elo engine, blind evaluation, prediction resolution. + * DOT spreads like life, not like messages. + * + * "DOT is a noun-verb. A state that records its own transition." + */ + +// Elo engine +export { + updateElo, + applyEloUpdates, + computeEloFromMatches, + computeEloPercentile, + rankLeaderboard, + ELO_DEFAULT, +} from './elo.js'; +export type { EloUpdate } from './elo.js'; + +// Resolution protocol +export { + verifyResolution, + verifyPrediction, + resolveSession, + hashPredictionDOT, +} from './resolution.js'; + +// Types +export type { + PredictionDOT, + ResolutionDOT, + ArenaMatch, + BlindEvalSession, + LeaderboardEntry, +} from './types.js'; diff --git a/packages/arena/src/resolution.ts b/packages/arena/src/resolution.ts new file mode 100644 index 000000000..e25a90d37 --- /dev/null +++ b/packages/arena/src/resolution.ts @@ -0,0 +1,95 @@ +/** + * DOT Protocol v0.3.0 — Arena Resolution Engine + * + * Blind evaluation protocol: + * 1. Predictor signs a prediction DOT (claim + domain + expiry) + * 2. Oracle signs a resolution DOT (outcome + evidence pointer) + * 3. Arena matches them and updates Elo + * + * The protocol is blind: the oracle does not see individual predictions + * before posting its resolution. Only the outcome is verifiable. + */ + +import { verifyDOT } from '@dotprotocol/core'; +import { toBytes } from '@dotprotocol/core'; +import type { PredictionDOT, ResolutionDOT, ArenaMatch, BlindEvalSession } from './types.js'; + +/** + * Verify a resolution DOT is authentic (oracle signed it). + */ +export async function verifyResolution(resolution: ResolutionDOT): Promise { + return verifyDOT(resolution.dot); +} + +/** + * Verify a prediction DOT is authentic (predictor signed it). + */ +export async function verifyPrediction(prediction: PredictionDOT): Promise { + return verifyDOT(prediction.dot); +} + +/** + * Resolve a blind evaluation session. + * Matches prediction DOTs with the resolution DOT. + * Returns one ArenaMatch per prediction. + */ +export async function resolveSession( + session: BlindEvalSession, + resolution: ResolutionDOT +): Promise<{ session: BlindEvalSession; matches: ArenaMatch[] }> { + // Verify oracle's resolution + const resValid = await verifyResolution(resolution); + if (!resValid) { + throw new Error('Resolution DOT signature invalid — oracle key mismatch'); + } + + // Verify oracle key matches session's declared oracle + const oracleKeyMatches = resolution.dot.pubkey.every( + (b, i) => b === session.oracleKey[i] + ); + if (!oracleKeyMatches) { + throw new Error('Resolution DOT pubkey does not match session oracle key'); + } + + // Match predictions against resolution + const matches: ArenaMatch[] = []; + for (const prediction of session.predictions) { + const predValid = await verifyPrediction(prediction); + if (!predValid) continue; // Skip invalid predictions silently + + matches.push({ + prediction, + resolution, + correct: resolution.outcome, + pointsAwarded: resolution.outcome ? 1 : 0, + }); + } + + const resolved: BlindEvalSession = { + ...session, + resolution, + resolvedAt: Date.now(), + }; + + return { session: resolved, matches }; +} + +/** + * Hash a prediction DOT's wire bytes (for resolution reference). + * Uses SubtleCrypto if available, falls back to a simple XOR fingerprint. + */ +export async function hashPredictionDOT(prediction: PredictionDOT): Promise { + const wire = toBytes(prediction.dot); + + if (typeof globalThis.crypto?.subtle?.digest === 'function') { + const hash = await globalThis.crypto.subtle.digest('SHA-256', wire); + return new Uint8Array(hash); + } + + // Fallback: XOR fold to 32 bytes (deterministic, not cryptographic) + const fold = new Uint8Array(32); + for (let i = 0; i < wire.length; i++) { + fold[i % 32] ^= wire[i]; + } + return fold; +} diff --git a/packages/arena/src/tests/elo.test.ts b/packages/arena/src/tests/elo.test.ts new file mode 100644 index 000000000..6b5e33d08 --- /dev/null +++ b/packages/arena/src/tests/elo.test.ts @@ -0,0 +1,105 @@ +import { describe, it, expect } from 'vitest'; +import { + updateElo, + applyEloUpdates, + computeEloFromMatches, + computeEloPercentile, + rankLeaderboard, + ELO_DEFAULT, +} from '../elo.js'; +import type { ArenaMatch } from '../types.js'; +import { createKeypair, createDOT } from '@dotprotocol/core'; + +async function makeMockMatch(domain: string, correct: boolean): Promise { + const kp = await createKeypair(); + const dot = await createDOT({ keypair: kp }); + const resDot = await createDOT({ keypair: kp }); + return { + prediction: { dot, domain, claim: 'test claim', expiresAt: Date.now() + 1000 }, + resolution: { dot: resDot, predictionRef: new Uint8Array(32), outcome: correct }, + correct, + pointsAwarded: correct ? 1 : 0, + }; +} + +describe('updateElo', () => { + it('correct prediction increases rating', () => { + const after = updateElo(ELO_DEFAULT, { domain: 'd', correct: true }); + expect(after).toBeGreaterThan(ELO_DEFAULT); + }); + + it('incorrect prediction decreases rating', () => { + const after = updateElo(ELO_DEFAULT, { domain: 'd', correct: false }); + expect(after).toBeLessThan(ELO_DEFAULT); + }); +}); + +describe('computeEloFromMatches', () => { + it('applies matches to elo map', async () => { + const matches = [ + await makeMockMatch('prediction', true), + await makeMockMatch('prediction', true), + ]; + const initial = new Map(); + const result = computeEloFromMatches(initial, matches); + expect(result.get('prediction')).toBeGreaterThan(ELO_DEFAULT); + }); + + it('handles multiple domains independently', async () => { + const matches = [ + await makeMockMatch('prediction', true), + await makeMockMatch('teaching', false), + ]; + const result = computeEloFromMatches(new Map(), matches); + expect(result.get('prediction')).toBeGreaterThan(ELO_DEFAULT); + expect(result.get('teaching')).toBeLessThan(ELO_DEFAULT); + }); +}); + +describe('computeEloPercentile', () => { + it('returns 0.5 for empty population', () => { + expect(computeEloPercentile(1500, [])).toBe(0.5); + }); + + it('top rating gets percentile near 1.0', () => { + const pop = [1200, 1300, 1400, 1500, 1600]; + expect(computeEloPercentile(1700, pop)).toBe(1.0); + }); + + it('bottom rating gets percentile 0', () => { + const pop = [1300, 1400, 1500]; + expect(computeEloPercentile(1200, pop)).toBe(0); + }); +}); + +describe('rankLeaderboard', () => { + it('sorts by Elo descending and assigns ranks', () => { + const entries = [ + { pubkey: 'a', elo: 1600, totalPredictions: 10, correctPredictions: 8 }, + { pubkey: 'b', elo: 1700, totalPredictions: 5, correctPredictions: 5 }, + { pubkey: 'c', elo: 1400, totalPredictions: 20, correctPredictions: 10 }, + ]; + const board = rankLeaderboard('prediction', entries); + expect(board[0].pubkey).toBe('b'); + expect(board[0].rank).toBe(1); + expect(board[1].pubkey).toBe('a'); + expect(board[2].pubkey).toBe('c'); + expect(board[2].rank).toBe(3); + }); + + it('computes accuracy correctly', () => { + const entries = [ + { pubkey: 'x', elo: 1500, totalPredictions: 10, correctPredictions: 7 }, + ]; + const board = rankLeaderboard('d', entries); + expect(board[0].accuracy).toBeCloseTo(0.7, 5); + }); + + it('accuracy = 0 when totalPredictions = 0', () => { + const entries = [ + { pubkey: 'x', elo: 1500, totalPredictions: 0, correctPredictions: 0 }, + ]; + const board = rankLeaderboard('d', entries); + expect(board[0].accuracy).toBe(0); + }); +}); diff --git a/packages/arena/src/tests/resolution.test.ts b/packages/arena/src/tests/resolution.test.ts new file mode 100644 index 000000000..bcea263c3 --- /dev/null +++ b/packages/arena/src/tests/resolution.test.ts @@ -0,0 +1,138 @@ +import { describe, it, expect } from 'vitest'; +import { createKeypair, createDOT } from '@dotprotocol/core'; +import { + verifyPrediction, + verifyResolution, + resolveSession, + hashPredictionDOT, +} from '../resolution.js'; +import type { BlindEvalSession, PredictionDOT, ResolutionDOT } from '../types.js'; + +async function makePrediction(domain = 'prediction'): Promise<{ + prediction: PredictionDOT; + keypair: Awaited>; +}> { + const keypair = await createKeypair(); + const dot = await createDOT({ keypair }); + const prediction: PredictionDOT = { + dot, + domain, + claim: 'Test claim', + expiresAt: Date.now() + 60_000, + }; + return { prediction, keypair }; +} + +async function makeResolution( + oracleKeypair: Awaited>, + outcome: boolean +): Promise { + const dot = await createDOT({ keypair: oracleKeypair }); + return { + dot, + predictionRef: new Uint8Array(32), + outcome, + evidence: 'test evidence', + }; +} + +describe('verifyPrediction', () => { + it('returns true for a valid prediction DOT', async () => { + const { prediction } = await makePrediction(); + expect(await verifyPrediction(prediction)).toBe(true); + }); +}); + +describe('verifyResolution', () => { + it('returns true for a valid resolution DOT', async () => { + const kp = await createKeypair(); + const resolution = await makeResolution(kp, true); + expect(await verifyResolution(resolution)).toBe(true); + }); +}); + +describe('resolveSession', () => { + it('resolves a session and returns matches for all valid predictions', async () => { + const oracleKp = await createKeypair(); + const { prediction } = await makePrediction(); + const resolution = await makeResolution(oracleKp, true); + + const session: BlindEvalSession = { + id: 'test-session', + domain: 'prediction', + oracleKey: oracleKp.publicKey, + closesAt: Date.now() - 1, + predictions: [prediction], + }; + + const result = await resolveSession(session, resolution); + expect(result.matches).toHaveLength(1); + expect(result.matches[0].correct).toBe(true); + expect(result.session.resolvedAt).toBeDefined(); + expect(result.session.resolution).toBeDefined(); + }); + + it('throws when resolution DOT pubkey does not match oracle key', async () => { + const oracleKp = await createKeypair(); + const wrongKp = await createKeypair(); + const { prediction } = await makePrediction(); + const resolution = await makeResolution(wrongKp, true); // wrong signer + + const session: BlindEvalSession = { + id: 'test-session', + domain: 'prediction', + oracleKey: oracleKp.publicKey, // expects oracleKp + closesAt: Date.now() - 1, + predictions: [prediction], + }; + + await expect(resolveSession(session, resolution)).rejects.toThrow( + /oracle key/ + ); + }); + + it('resolves multiple predictions in one session', async () => { + const oracleKp = await createKeypair(); + const predictions = await Promise.all([ + makePrediction().then((p) => p.prediction), + makePrediction().then((p) => p.prediction), + makePrediction().then((p) => p.prediction), + ]); + const resolution = await makeResolution(oracleKp, false); + + const session: BlindEvalSession = { + id: 'multi-session', + domain: 'prediction', + oracleKey: oracleKp.publicKey, + closesAt: Date.now() - 1, + predictions, + }; + + const result = await resolveSession(session, resolution); + expect(result.matches).toHaveLength(3); + expect(result.matches.every((m) => m.correct === false)).toBe(true); + }); +}); + +describe('hashPredictionDOT', () => { + it('returns 32 bytes', async () => { + const { prediction } = await makePrediction(); + const hash = await hashPredictionDOT(prediction); + expect(hash).toHaveLength(32); + }); + + it('same prediction produces same hash', async () => { + const { prediction } = await makePrediction(); + const h1 = await hashPredictionDOT(prediction); + const h2 = await hashPredictionDOT(prediction); + expect(h1).toEqual(h2); + }); + + it('different predictions produce different hashes', async () => { + const { prediction: p1 } = await makePrediction(); + const { prediction: p2 } = await makePrediction(); + const h1 = await hashPredictionDOT(p1); + const h2 = await hashPredictionDOT(p2); + expect(h1).not.toEqual(h2); + }); +}); diff --git a/packages/arena/src/types.ts b/packages/arena/src/types.ts new file mode 100644 index 000000000..5d6cf0c6b --- /dev/null +++ b/packages/arena/src/types.ts @@ -0,0 +1,54 @@ +/** + * DOT Protocol v0.3.0 — Arena Types + * + * The Arena is where predictions meet reality. + * DOTs make claims. Reality signs resolutions. The chain remembers. + */ + +import type { DOT } from '@dotprotocol/core'; + +/** A prediction DOT — a claim about a future state */ +export interface PredictionDOT { + dot: DOT; + domain: string; // e.g. "prediction", "engineering", "teaching" + claim: string; // human-readable claim (stored off-chain) + expiresAt: number; // unix ms — when this prediction resolves +} + +/** A resolution DOT — reality's answer, signed by an oracle key */ +export interface ResolutionDOT { + dot: DOT; + predictionRef: Uint8Array; // SHA-256 of prediction DOT wire bytes + outcome: boolean; // true = prediction correct, false = wrong + evidence?: string; // optional human-readable evidence pointer +} + +/** A completed match in the Arena */ +export interface ArenaMatch { + prediction: PredictionDOT; + resolution: ResolutionDOT; + correct: boolean; + pointsAwarded: number; +} + +/** Blind evaluation session — predictions submitted before resolution is known */ +export interface BlindEvalSession { + id: string; // ULID + domain: string; + oracleKey: Uint8Array; // Ed25519 pubkey of the oracle that will resolve + closesAt: number; // unix ms — no more predictions after this + resolvedAt?: number; // unix ms — when oracle posted resolution + predictions: PredictionDOT[]; + resolution?: ResolutionDOT; +} + +/** Leaderboard entry computed from chain DOTs */ +export interface LeaderboardEntry { + pubkey: string; // hex Ed25519 public key + domain: string; + elo: number; + rank: number; // position in this domain's leaderboard (1 = best) + totalPredictions: number; + correctPredictions: number; + accuracy: number; // 0–1 +} diff --git a/packages/arena/tsconfig.json b/packages/arena/tsconfig.json new file mode 100644 index 000000000..792172fb8 --- /dev/null +++ b/packages/arena/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src"] +} diff --git a/packages/chain/LICENSE b/packages/chain/LICENSE new file mode 100644 index 000000000..cdcd7ef79 --- /dev/null +++ b/packages/chain/LICENSE @@ -0,0 +1,189 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work. + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to the Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by the Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding any notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2026 DOT Protocol Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/packages/chain/package.json b/packages/chain/package.json index 1a1200e44..f316e6791 100644 --- a/packages/chain/package.json +++ b/packages/chain/package.json @@ -24,5 +24,14 @@ "vitest": "^3.1.0", "@types/better-sqlite3": "^7.6.0" }, - "license": "Apache-2.0" + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/dot-protocol/dot.git", + "directory": "packages/chain" + }, + "homepage": "https://github.com/dot-protocol/dot/tree/main/packages/chain#readme", + "bugs": { + "url": "https://github.com/dot-protocol/dot/issues" + } } diff --git a/packages/cli/LICENSE b/packages/cli/LICENSE new file mode 100644 index 000000000..cdcd7ef79 --- /dev/null +++ b/packages/cli/LICENSE @@ -0,0 +1,189 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work. + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to the Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by the Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding any notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2026 DOT Protocol Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/packages/cli/package.json b/packages/cli/package.json index 054bdecd7..c5748f35a 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,7 +1,7 @@ { "name": "@dot-protocol/cli", "version": "1.0.0", - "description": "DOT Protocol CLI — run, compile, check, and explain DOT programs. Ed25519 identity management.", + "description": "DOT Protocol CLI \u2014 run, compile, check, and explain DOT programs. Ed25519 identity management.", "type": "module", "main": "src/index.ts", "types": "src/index.ts", @@ -27,5 +27,14 @@ "typescript": "^5.8.0", "vitest": "^3.1.0" }, - "license": "Apache-2.0" + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/dot-protocol/dot.git", + "directory": "packages/cli" + }, + "homepage": "https://github.com/dot-protocol/dot/tree/main/packages/cli#readme", + "bugs": { + "url": "https://github.com/dot-protocol/dot/issues" + } } diff --git a/packages/compression/.npmignore b/packages/compression/.npmignore new file mode 100644 index 000000000..91b700e0b --- /dev/null +++ b/packages/compression/.npmignore @@ -0,0 +1,4 @@ +src/tests/ +src/**/*.test.ts +coverage/ +*.tsbuildinfo diff --git a/packages/compression/LICENSE b/packages/compression/LICENSE new file mode 100644 index 000000000..d90201772 --- /dev/null +++ b/packages/compression/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 DOT Protocol contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/compression/README.md b/packages/compression/README.md new file mode 100644 index 000000000..237e36279 --- /dev/null +++ b/packages/compression/README.md @@ -0,0 +1,81 @@ +# @dotprotocol/compression + +Batch packing for DOT Protocol — Ed25519 + BLS12-381 signature aggregation. + +[![npm](https://img.shields.io/npm/v/@dotprotocol/compression)](https://www.npmjs.com/package/@dotprotocol/compression) + +## Install + +```bash +npm install @dotprotocol/compression +``` + +## Quick start + +```js +import { pack, unpack } from '@dotprotocol/compression'; + +// Pack 1000 DOTs for storage/transport +const packed = pack(dots); // much smaller than 1000 × 153 bytes +const restored = unpack(packed); // original DOTs, fully verified +``` + +## When to use + +- Storing large numbers of DOTs in a database +- Transmitting batches over bandwidth-constrained channels +- Archiving worldlines +- Feed snapshots + +## API + +### `pack(dots, options?)` + +Aggregate a batch of DOTs into a compact representation. + +```js +const packed = pack(dots, { + method: 'ed25519', // default — lossless compression + // method: 'bls' // BLS12-381 signature aggregation (experimental) +}); +// Returns: Uint8Array +``` + +### `unpack(packed)` + +Restore DOTs from a packed batch. Verifies all signatures. + +```js +const dots = unpack(packed); +// Returns: DOT[] +``` + +### `packStream(dotStream)` + +Streaming pack — useful for very large archives: + +```js +import { packStream } from '@dotprotocol/compression'; + +const writer = packStream(outputStream); +for await (const dot of dotStream) { + writer.write(dot); +} +await writer.end(); +``` + +## Compression ratios + +Typical results with `ed25519` method (LZ4 + deduplication): + +| DOTs | Raw size | Packed size | Ratio | +|------|----------|-------------|-------| +| 100 | 15.3 KB | ~4-6 KB | ~3x | +| 1,000 | 153 KB | ~35-55 KB | ~3-4x | +| 10,000 | 1.53 MB | ~300-500 KB | ~4-5x | + +Actual ratios depend on payload entropy. + +## License + +MIT diff --git a/packages/compression/package.json b/packages/compression/package.json new file mode 100644 index 000000000..6ecc21f67 --- /dev/null +++ b/packages/compression/package.json @@ -0,0 +1,44 @@ +{ + "name": "@dotprotocol/compression", + "version": "0.3.0", + "description": "DOT Protocol stream compression \u2014 batch v2, varint, RLE, dictionary, prediction, rANS.", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "require": "./dist/index.cjs", + "types": "./dist/index.d.ts" + } + }, + "sideEffects": false, + "scripts": { + "build": "tsdown", + "test": "vitest run", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@dotprotocol/core": "workspace:*", + "zstd-napi": "^0.0.12" + }, + "devDependencies": { + "@types/node": "^20.0.0" + }, + "license": "MIT", + "files": [ + "dist", + "README.md", + "LICENSE" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/dot-protocol/dot.git", + "directory": "packages/compression" + }, + "homepage": "https://github.com/dot-protocol/dot/tree/main/packages/compression#readme", + "bugs": { + "url": "https://github.com/dot-protocol/dot/issues" + } +} diff --git a/packages/compression/scripts/train-dictionary.ts b/packages/compression/scripts/train-dictionary.ts new file mode 100644 index 000000000..6b0b9963d --- /dev/null +++ b/packages/compression/scripts/train-dictionary.ts @@ -0,0 +1,173 @@ +/** + * DOT Compression — Dictionary Training Pipeline Script + * + * Generates DOTs using a sensor profile, serializes batches in plain v2 and + * dict-compressed v2, trains a zstd dictionary, and reports compression ratios + * plus a Weissman score vs gzip. + * + * Usage: + * npx tsx scripts/train-dictionary.ts [--profile ] [--count ] [--batch-size ] + * + * Example: + * npx tsx scripts/train-dictionary.ts --profile kulhadVoltage --count 1000 --batch-size 100 + */ + +import { createHash, gzipSync } from 'node:zlib'; +import { createHash as cryptoHash } from 'node:crypto'; +import { createKeypair, createBLSKeypair } from '@dotprotocol/core'; +import { generateSensorStream, type SensorProfile } from '../src/sample-generator.js'; +import { serializeBatchV2 } from '../src/batch-v2.js'; +import { trainDictionary } from '../src/zstd.js'; + +// ─── CLI arg parsing ───────────────────────────────────────────────────────── + +function parseArgs(): { profile: SensorProfile; count: number; batchSize: number } { + const args = process.argv.slice(2); + let profile: SensorProfile = 'kulhadVoltage'; + let count = 10_000; + let batchSize = 100; + + for (let i = 0; i < args.length; i++) { + const arg = args[i]!; + const next = args[i + 1]; + if (arg === '--profile' && next) { + profile = next as SensorProfile; + i++; + } else if (arg === '--count' && next) { + count = parseInt(next, 10); + i++; + } else if (arg === '--batch-size' && next) { + batchSize = parseInt(next, 10); + i++; + } + } + + return { profile, count, batchSize }; +} + +// ─── Helpers ───────────────────────────────────────────────────────────────── + +function fmt(n: number): string { + return n.toLocaleString('en-US'); +} + +function pct(ratio: number): string { + return ((1 - 1 / ratio) * 100).toFixed(1); +} + +// ─── Main ──────────────────────────────────────────────────────────────────── + +async function main(): Promise { + const { profile, count, batchSize } = parseArgs(); + const batchCount = Math.ceil(count / batchSize); + + console.log('DOT Compression — Dictionary Training Pipeline'); + console.log(`Profile: ${profile}`); + console.log(`Total DOTs: ${fmt(count)}`); + console.log(`Batch size: ${batchSize}`); + console.log(`Batches: ${batchCount}`); + console.log(''); + + // ── Generate all DOTs (one chain, same keypair) ─────────────────────────── + const keypair = await createKeypair(); + const blsKeypair = createBLSKeypair(); + + const allDots = await generateSensorStream({ count, profile, keypair }); + + // ── Group into batches ─────────────────────────────────────────────────── + const batches: Uint8Array[][] = []; + for (let i = 0; i < count; i += batchSize) { + batches.push(allDots.slice(i, i + batchSize)); + } + + // ── Serialize each batch in plain v2 (delta + RLE, no dict) ───────────── + const plainBodies: Uint8Array[] = []; + let totalPlainSize = 0; + + for (const batch of batches) { + const frame = await serializeBatchV2(batch, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + plainBodies.push(frame); + totalPlainSize += frame.length; + } + + // ── Train dictionary on plain batch bodies ─────────────────────────────── + console.log(`Training dictionary on ${batchCount} batch samples (column layout)...`); + const dictionary = await trainDictionary(plainBodies); + console.log(`Dictionary size: ${fmt(dictionary.length)} bytes`); + console.log(''); + + // ── Compute dictionary ID: SHA-256(dictionary) ─────────────────────────── + const dictionaryId = new Uint8Array( + cryptoHash('sha256').update(dictionary).digest().buffer, + ); + + // ── Serialize each batch WITH dictionary ───────────────────────────────── + let totalDictSize = 0; + for (const batch of batches) { + const frame = await serializeBatchV2(batch, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + dictionary, + dictionaryId, + }); + totalDictSize += frame.length; + } + + // ── Gzip baseline: concatenate all raw DOT bytes, gzip once ────────────── + const totalRawBytes = count * 153; + const rawConcat = new Uint8Array(totalRawBytes); + let offset = 0; + for (const dot of allDots) { + rawConcat.set(dot, offset); + offset += dot.length; + } + const gzipped = gzipSync(rawConcat); + const totalGzipSize = gzipped.length; + + // ── Compute ratios ──────────────────────────────────────────────────────── + const ratioDotPlain = totalRawBytes / totalPlainSize; + const ratioDotDict = totalRawBytes / totalDictSize; + const ratioGzip = totalRawBytes / totalGzipSize; + + const weissmanAlpha = 1.0; + const weissmanScore = (weissmanAlpha * ratioDotDict) / ratioGzip; + + const bytesPerDotPlain = totalPlainSize / count; + const bytesPerDotDict = totalDictSize / count; + const bytesPerDotGzip = totalGzipSize / count; + const bytesPerDotRaw = 153; + + const dictBonusRatio = totalPlainSize / totalDictSize; + const dictBonusSavings = ((1 - 1 / dictBonusRatio) * 100).toFixed(1); + + // ── Print report ───────────────────────────────────────────────────────── + console.log('Results:'); + console.log(` Raw DOTs: ${fmt(totalRawBytes).padStart(12)} bytes (${bytesPerDotRaw.toFixed(2)} bytes/DOT)`); + console.log(` Batch v2 plain: ${fmt(totalPlainSize).padStart(12)} bytes (${bytesPerDotPlain.toFixed(2)} bytes/DOT)`); + console.log(` Batch v2 + dict: ${fmt(totalDictSize).padStart(12)} bytes (${bytesPerDotDict.toFixed(2)} bytes/DOT)`); + console.log(` Gzip baseline: ${fmt(totalGzipSize).padStart(12)} bytes (${bytesPerDotGzip.toFixed(2)} bytes/DOT)`); + console.log(''); + + console.log('Compression ratios:'); + console.log(` Batch v2 plain: ${ratioDotPlain.toFixed(1)}× vs raw (${pct(ratioDotPlain)}% reduction)`); + console.log(` Batch v2 + dict: ${ratioDotDict.toFixed(1)}× vs raw (${pct(ratioDotDict)}% reduction)`); + console.log(` Dictionary bonus: ${dictBonusRatio.toFixed(1)}× over plain (${dictBonusSavings}% savings)`); + console.log(''); + + console.log(`Weissman Score vs gzip: W = ${weissmanScore.toFixed(3)}`); + console.log(''); + + if (bytesPerDotDict <= 8) { + console.log(`✓ Dictionary achieves ≤ 8 bytes/DOT`); + } else { + console.log(`✗ Target not met: ${bytesPerDotDict.toFixed(2)} bytes/DOT`); + } +} + +main().catch(err => { + console.error('Error:', err); + process.exit(1); +}); diff --git a/packages/compression/src/batch-v2.ts b/packages/compression/src/batch-v2.ts new file mode 100644 index 000000000..052c5ab3e --- /dev/null +++ b/packages/compression/src/batch-v2.ts @@ -0,0 +1,805 @@ +/** + * Batch v2 serializer — column-oriented DOT batch with BLS aggregate signature. + * + * Wire format: + * HEADER (86 bytes without dictionary, 118 bytes with dictionary): + * [0] version = 0x03 + * [1] flags: + * bit0 = timestamp_delta_encoded + * bit1 = payload_type_rle + * bit2 = reserved + * bit3 = dictionary_compressed ← NEW + * [2..5] dot_count: uint32 LE + * [6..37] shared_pubkey: 32B Ed25519 public key + * [38..85] aggregated_bls_sig: 48B BLS G1 aggregate signature + * [86..117] dictionary_id: 32B SHA-256 of dictionary (only present when bit3 set) + * BODY (column-oriented): + * [if bit3] zstd-compressed body (timestamps + types + payloads as normal columns) + * [else] raw columns as before: + * [if bit0] varint-delta timestamps (encodeTimestampDeltas) + * [else] raw timestamps: dot_count × 8B big-endian uint64 + * [if bit1] RLE-encoded types (encodePayloadTypes) + * [else] raw types: dot_count × 1B + * [always] raw payloads: dot_count × 16B + * + * Chain hashes are NOT stored — reconstructed on decode via sequential SHA-256. + * + * Sprint 1 limitation: assumes genesis-anchored batch (chain[0] = 32 zeros). + * For non-genesis chains, the first chain hash must be tracked externally. + * A future sprint will add an optional prevChainHash field to the header. + */ + +import { createHash } from 'node:crypto'; +import { + signBLS, + aggregateSignatures, + verifyAggregateSameSigner, + type BLSKeypair, +} from '@dotprotocol/core'; +import { encodeTimestampDeltas, decodeTimestampDeltas } from './timestamp-delta.js'; +import { encodePayloadTypes, decodePayloadTypes } from './rle.js'; +import { compressWithDictionary, decompressWithDictionary } from './zstd.js'; +import { type DictionaryRegistry } from './dictionary-registry.js'; +import { + type PayloadPredictor, + NullPredictor, + LastValuePredictor, + LinearPredictor, + computeResidual, + applyResidual, +} from './predictor.js'; +import { buildFrequencyTable, ransEncode, ransDecode, type FrequencyTable } from './rans.js'; + +// ─── Constants ──────────────────────────────────────────────────────────────── + +const BATCH_V2_VERSION = 0x03; +const HEADER_SIZE = 86; // 1(ver) + 1(flags) + 4(count) + 32(pubkey) + 48(aggSig) +const DICT_ID_SIZE = 32; +const HEADER_SIZE_WITH_DICT = HEADER_SIZE + DICT_ID_SIZE; // 118B when dict flag set +const FLAG_TS_DELTA = 0b00000001; +const FLAG_TYPE_RLE = 0b00000010; +// bit 2 = reserved +export const FLAG_DICT_COMPRESSED = 0x08; // bit 3: body is zstd-compressed with a dictionary +export const FLAG_PREDICTION = 0x10; // bit 4: payload column uses predictor + rANS coding + +// Prediction metadata sizes +const FREQ_TABLE_BYTES = 256 * 2; // 256 symbols × 2 bytes (uint16 LE) = 512 bytes +const PREDICTION_META_SIZE = 1 + FREQ_TABLE_BYTES; // modelId(1) + freqTable(512) = 513 bytes + +const DOT_SIZE = 153; +const PUBKEY_SIZE = 32; +const SIG_SIZE = 64; +const CHAIN_SIZE = 32; +const PAYLOAD_SIZE = 16; +const BLS_AGG_SIG_SIZE = 48; + +// Byte offsets in DOT wire format +const OFF_PUBKEY = 0; +const OFF_SIG = 32; +const OFF_CHAIN = 96; +const OFF_TS = 128; +const OFF_TYPE = 136; +const OFF_PAYLOAD = 137; + +// ─── Types ──────────────────────────────────────────────────────────────────── + +export interface SerializeBatchV2Options { + /** Encode timestamps as varint deltas (default: true). Saves ~75% on periodic streams. */ + timestampDelta?: boolean; + /** RLE-encode payload type column (default: true). Saves ~90%+ on homogeneous batches. */ + payloadTypeRLE?: boolean; + /** + * Trained zstd dictionary bytes. When provided together with `dictionaryId`, + * the encoded body columns are zstd-compressed using this dictionary and + * FLAG_DICT_COMPRESSED (bit 3) is set in the header flags. + */ + dictionary?: Uint8Array; + /** + * 32-byte SHA-256 ID of the dictionary (SHA-256 of the dictionary bytes). + * Stored in the extended header [86..117] so the deserializer can look it up + * in a DictionaryRegistry. Must be provided together with `dictionary`. + */ + dictionaryId?: Uint8Array; + /** + * Payload predictor to use for prediction + rANS coding of the payload column. + * When set, FLAG_PREDICTION (bit 4) is set in the frame header flags. + * + * Use a specific PayloadPredictor instance (e.g. new LinearPredictor()) for + * explicit control, or 'auto' to automatically select between LinearPredictor + * and NullPredictor based on a compressibility heuristic. + * + * Mutually exclusive with `dictionary` / `dictionaryId` — both cannot be set + * simultaneously. Throws TypeError if both are provided. + */ + predictor?: PayloadPredictor | 'auto'; +} + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +/** Compute SHA-256 of the given bytes. */ +function sha256(data: Uint8Array): Uint8Array { + return new Uint8Array(createHash('sha256').update(data).digest()); +} + +/** + * Extract the 89-byte signed portion from a 153-byte DOT buffer. + * Returns pubkey(32) + chain+ts+type+payload(57) = 89 bytes. + * Mirrors core's signedBytes(buf) function. + */ +function extractSignedBytes(dot: Uint8Array): Uint8Array { + const out = new Uint8Array(89); + out.set(dot.subarray(OFF_PUBKEY, OFF_PUBKEY + PUBKEY_SIZE), 0); + out.set(dot.subarray(OFF_CHAIN, DOT_SIZE), 32); + return out; +} + +/** + * Build the sig field (64B) from an aggregate BLS signature (48B). + * BLS chain hash rule: first 48 bytes = aggSig, remaining 16 bytes = 0. + */ +function buildSigField(aggSig: Uint8Array): Uint8Array { + const sigField = new Uint8Array(SIG_SIZE); // 64 bytes, zero-initialized + sigField.set(aggSig); // copy 48-byte aggSig into [0..47] + return sigField; +} + +/** + * Read a big-endian uint64 as bigint from buf at the given offset. + */ +function readTimestamp(buf: Uint8Array, offset: number): bigint { + const view = new DataView(buf.buffer, buf.byteOffset + offset, 8); + return view.getBigUint64(0, false); +} + +/** + * Write a big-endian uint64 bigint into buf at the given offset. + */ +function writeTimestamp(buf: Uint8Array, offset: number, ts: bigint): void { + const view = new DataView(buf.buffer, buf.byteOffset + offset, 8); + view.setBigUint64(0, ts, false); +} + +// ─── Prediction helpers ─────────────────────────────────────────────────────── + +/** + * Serialize a FrequencyTable's freq array as 256 × uint16 LE = 512 bytes. + * The cumFreq array is deterministic and is NOT stored (reconstructed on decode). + */ +function serializeFreqTable(table: FrequencyTable): Uint8Array { + const buf = new Uint8Array(FREQ_TABLE_BYTES); + const view = new DataView(buf.buffer); + for (let i = 0; i < 256; i++) { + view.setUint16(i * 2, table.freq[i]!, true); // little-endian + } + return buf; +} + +/** + * Deserialize a FrequencyTable from 512 bytes at `offset` in `buf`. + * Reconstructs cumFreq from freq (deterministic). + */ +function deserializeFreqTable(buf: Uint8Array, offset: number): FrequencyTable { + const freq = new Uint16Array(256); + const cumFreq = new Uint16Array(257); + const view = new DataView(buf.buffer, buf.byteOffset); + for (let i = 0; i < 256; i++) { + freq[i] = view.getUint16(offset + i * 2, true); + } + cumFreq[0] = 0; + for (let i = 0; i < 256; i++) { + cumFreq[i + 1] = cumFreq[i]! + freq[i]!; + } + return { freq, cumFreq }; +} + +/** + * Compute XOR residuals for all payloads using the given predictor. + * Returns a flat Uint8Array of N × PAYLOAD_SIZE residual bytes. + * Predictor MUST be reset before calling. + */ +function computeAllResiduals(payloads: Uint8Array[], predictor: PayloadPredictor): Uint8Array { + const n = payloads.length; + const residuals = new Uint8Array(n * PAYLOAD_SIZE); + for (let i = 0; i < n; i++) { + const predicted = predictor.predict(); + const residual = computeResidual(payloads[i]!, predicted); + residuals.set(residual, i * PAYLOAD_SIZE); + predictor.update(payloads[i]!); + } + return residuals; +} + +/** + * Heuristic to select best predictor for 'auto' mode. + * Uses sum of residual bytes as a proxy for compressibility. + * Lower sum = more zeros = better compression. + */ +function selectPredictor(payloads: Uint8Array[]): PayloadPredictor { + const linear = new LinearPredictor(); + const nullP = new NullPredictor(); + + const linearResiduals = computeAllResiduals(payloads, linear); + const nullResiduals = computeAllResiduals(payloads, nullP); + + let linearScore = 0; + let nullScore = 0; + for (let i = 0; i < linearResiduals.length; i++) { + linearScore += linearResiduals[i]!; + } + for (let i = 0; i < nullResiduals.length; i++) { + nullScore += nullResiduals[i]!; + } + + return linearScore < nullScore ? new LinearPredictor() : new NullPredictor(); +} + +// ─── Serialize ──────────────────────────────────────────────────────────────── + +/** + * Serialize N DOTs (same pubkey, chain order) into a batch v2 frame. + * Column-oriented layout: timestamps | types | payloads. + * BLS aggregate signature over all DOT signed portions. + * + * @param dots - Array of 153-byte Uint8Array DOTs, chain order, same pubkey + * @param blsKeypair - BLS keypair for aggregate signing (same identity as Ed25519 DOTs) + * @param options - Compression flags (both default true) + */ +export async function serializeBatchV2( + dots: Uint8Array[], + blsKeypair: BLSKeypair, + options?: SerializeBatchV2Options, +): Promise { + // ── Validate input ────────────────────────────────────────────────────────── + if (dots.length === 0) { + throw new RangeError('serializeBatchV2: dots array must not be empty'); + } + for (let i = 0; i < dots.length; i++) { + if (dots[i]!.length !== DOT_SIZE) { + throw new RangeError( + `serializeBatchV2: dot[${i}] has ${dots[i]!.length} bytes, expected ${DOT_SIZE}`, + ); + } + } + const sharedPubkey = dots[0]!.subarray(OFF_PUBKEY, OFF_PUBKEY + PUBKEY_SIZE); + for (let i = 1; i < dots.length; i++) { + const pk = dots[i]!.subarray(OFF_PUBKEY, OFF_PUBKEY + PUBKEY_SIZE); + for (let b = 0; b < PUBKEY_SIZE; b++) { + if (pk[b] !== sharedPubkey[b]) { + throw new Error(`serializeBatchV2: dot[${i}] has different pubkey than dot[0]`); + } + } + } + + // ── Options ───────────────────────────────────────────────────────────────── + const useTsDelta = options?.timestampDelta !== false; // default true + const useTypeRLE = options?.payloadTypeRLE !== false; // default true + + // Guard: both dictionary and dictionaryId must be provided together + if (options?.dictionary && !options?.dictionaryId) { + throw new TypeError('serializeBatchV2: dictionary requires dictionaryId'); + } + if (options?.dictionaryId && !options?.dictionary) { + throw new TypeError('serializeBatchV2: dictionaryId requires dictionary'); + } + + // Guard: predictor and dictionary are mutually exclusive + if (options?.predictor && (options?.dictionary || options?.dictionaryId)) { + throw new TypeError( + 'serializeBatchV2: predictor and dictionary are mutually exclusive — cannot set both', + ); + } + + const useDict = !!(options?.dictionary && options?.dictionaryId); + + if (useDict) { + if (options!.dictionaryId!.length !== DICT_ID_SIZE) { + throw new RangeError( + `serializeBatchV2: dictionaryId must be ${DICT_ID_SIZE} bytes, got ${options!.dictionaryId!.length}`, + ); + } + } + + // ── Extract columns ───────────────────────────────────────────────────────── + const timestamps: bigint[] = new Array(dots.length); + const types = new Uint8Array(dots.length); + const payloads: Uint8Array[] = new Array(dots.length); + + for (let i = 0; i < dots.length; i++) { + const dot = dots[i]!; + timestamps[i] = readTimestamp(dot, OFF_TS); + types[i] = dot[OFF_TYPE]!; + payloads[i] = dot.subarray(OFF_PAYLOAD, OFF_PAYLOAD + PAYLOAD_SIZE); + } + + // ── Build BLS-form DOTs (zeroed sig field) for deterministic chain hashing ── + // CRITICAL: We sign signedBytes of the BLS-form DOT (with zeroed sig, not + // the original Ed25519 sig). This makes signing and verification fully + // deterministic: the deserializer reconstructs the same zeroed-sig DOTs + // from the frame's compressed data, computes their signedBytes, and verifies. + const zeroedSig = new Uint8Array(SIG_SIZE); // 64 zero bytes + const blsFormDots: Uint8Array[] = new Array(dots.length); + let prevBlsDot: Uint8Array | null = null; + + for (let i = 0; i < dots.length; i++) { + const dot = dots[i]!; + + let chainHash: Uint8Array; + if (i === 0) { + // Sprint 1: genesis-anchored batch — chain[0] = 32 zero bytes + chainHash = new Uint8Array(CHAIN_SIZE); + } else { + chainHash = sha256(prevBlsDot!); + } + + const blsDot = new Uint8Array(DOT_SIZE); + blsDot.set(sharedPubkey, OFF_PUBKEY); + blsDot.set(zeroedSig, OFF_SIG); + blsDot.set(chainHash, OFF_CHAIN); + writeTimestamp(blsDot, OFF_TS, timestamps[i]!); + blsDot[OFF_TYPE] = types[i]!; + blsDot.set(payloads[i]!, OFF_PAYLOAD); + + blsFormDots[i] = blsDot; + prevBlsDot = blsDot; + } + + // ── BLS sign each BLS-form DOT's signed portion ───────────────────────────── + const perDotSigs: Uint8Array[] = new Array(dots.length); + for (let i = 0; i < dots.length; i++) { + const msg = extractSignedBytes(blsFormDots[i]!); + perDotSigs[i] = signBLS(msg, blsKeypair.privateKey); + } + const aggSig = aggregateSignatures(perDotSigs); + + // ── Resolve predictor ─────────────────────────────────────────────────────── + let resolvedPredictor: PayloadPredictor | null = null; + if (options?.predictor) { + if (options.predictor === 'auto') { + resolvedPredictor = selectPredictor(payloads); + } else { + resolvedPredictor = options.predictor; + } + resolvedPredictor.reset(); + } + const usePrediction = resolvedPredictor !== null; + + // ── Encode columns ────────────────────────────────────────────────────────── + const encodedTs = useTsDelta + ? encodeTimestampDeltas(timestamps) + : (() => { + const buf = new Uint8Array(dots.length * 8); + for (let i = 0; i < dots.length; i++) { + writeTimestamp(buf, i * 8, timestamps[i]!); + } + return buf; + })(); + + const encodedTypes = useTypeRLE + ? encodePayloadTypes(types) + : types.slice(); + + // ── Encode payload column ─────────────────────────────────────────────────── + // Either: raw payloads, predictor+rANS residuals, or dict-compressed body. + let predictionMeta: Uint8Array | null = null; // 513 bytes when usePrediction + let ransEncodedResiduals: Uint8Array | null = null; + let ransEncodedLen = 0; + + if (usePrediction) { + // Compute residuals with the resolved predictor + const residuals = computeAllResiduals(payloads, resolvedPredictor!); + + // Build frequency table from residuals + const freqTable = buildFrequencyTable(residuals); + + // rANS-encode all residuals as one stream + ransEncodedResiduals = ransEncode(residuals, freqTable); + ransEncodedLen = ransEncodedResiduals.length; + + // Serialize prediction metadata: modelId(1) + freqTable(512) = 513 bytes + predictionMeta = new Uint8Array(PREDICTION_META_SIZE); + predictionMeta[0] = resolvedPredictor!.modelId; + predictionMeta.set(serializeFreqTable(freqTable), 1); + } + + const encodedPayloads = usePrediction + ? null // payload column is replaced by rANS residuals in body + : (() => { + const buf = new Uint8Array(dots.length * PAYLOAD_SIZE); + for (let i = 0; i < dots.length; i++) { + buf.set(payloads[i]!, i * PAYLOAD_SIZE); + } + return buf; + })(); + + // ── Assemble body ─────────────────────────────────────────────────────────── + // Body layout: + // ts column | types column | [if prediction: uint32-LE rans_len + rans data | else: raw payloads] + // If dict (no prediction): entire body is zstd-compressed. + let body: Uint8Array; + + if (usePrediction) { + // Body = ts | types | rans_data | uint32LE(rans_len) [last 4 bytes] + // Storing rans_len at the END allows clean RLE-types boundary detection on decode: + // rleEnd = bodyBuf.length - 4 - ransLen + const bodySize = encodedTs.length + encodedTypes.length + ransEncodedLen + 4; + const bodyBuf = new Uint8Array(bodySize); + let off = 0; + bodyBuf.set(encodedTs, off); off += encodedTs.length; + bodyBuf.set(encodedTypes, off); off += encodedTypes.length; + bodyBuf.set(ransEncodedResiduals!, off); off += ransEncodedLen; + // uint32 LE: rANS encoded byte count (last 4 bytes of body) + const lenView = new DataView(bodyBuf.buffer, bodyBuf.byteOffset + off, 4); + lenView.setUint32(0, ransEncodedLen, true); + body = bodyBuf; + } else { + const rawBody = new Uint8Array( + encodedTs.length + encodedTypes.length + encodedPayloads!.length, + ); + let off = 0; + rawBody.set(encodedTs, off); off += encodedTs.length; + rawBody.set(encodedTypes, off); off += encodedTypes.length; + rawBody.set(encodedPayloads!, off); + + body = useDict ? compressWithDictionary(rawBody, options!.dictionary!) : rawBody; + } + + // ── Assemble frame ────────────────────────────────────────────────────────── + // Header area = fixed header + optional dict_id + optional prediction metadata + const headerSize = useDict ? HEADER_SIZE_WITH_DICT : HEADER_SIZE; + const predMetaSize = usePrediction ? PREDICTION_META_SIZE : 0; + const totalSize = headerSize + predMetaSize + body.length; + const frame = new Uint8Array(totalSize); + let cursor = 0; + + // Header + frame[cursor++] = BATCH_V2_VERSION; + frame[cursor++] = + (useTsDelta ? FLAG_TS_DELTA : 0) | + (useTypeRLE ? FLAG_TYPE_RLE : 0) | + (useDict ? FLAG_DICT_COMPRESSED : 0) | + (usePrediction ? FLAG_PREDICTION : 0); + + // dot_count: uint32 LE + const countView = new DataView(frame.buffer, frame.byteOffset + cursor, 4); + countView.setUint32(0, dots.length, true); + cursor += 4; + + // shared_pubkey: 32B + frame.set(sharedPubkey, cursor); + cursor += PUBKEY_SIZE; + + // aggregated_bls_sig: 48B + frame.set(aggSig, cursor); + cursor += BLS_AGG_SIG_SIZE; + + // dictionary_id: 32B (only when FLAG_DICT_COMPRESSED is set) + if (useDict) { + frame.set(options!.dictionaryId!, cursor); + cursor += DICT_ID_SIZE; + } + + // prediction metadata: 513B (only when FLAG_PREDICTION is set) + if (usePrediction) { + frame.set(predictionMeta!, cursor); + cursor += PREDICTION_META_SIZE; + } + + // Body (columns, possibly prediction-coded or dict-compressed) + frame.set(body, cursor); + + return frame; +} + +// ─── Deserialize ────────────────────────────────────────────────────────────── + +/** + * Deserialize a batch v2 frame back to 153-byte DOTs. + * Verifies BLS aggregate signature. Throws if invalid. + * + * Sprint 1 limitation: assumes genesis-anchored batch — chain[0] = 32 zero bytes. + * Each subsequent chain hash is reconstructed as SHA-256(full_dot[i-1]) where + * the sig field of each reconstructed DOT is the aggregate sig zero-padded to 64B. + * + * @param buf - The batch v2 frame + * @param blsPubkey - 96-byte BLS G2 public key for verification + * @param dictionaryRegistry - Optional registry for looking up zstd dictionaries. + * Required when the frame has FLAG_DICT_COMPRESSED set. + */ +export async function deserializeBatchV2( + buf: Uint8Array, + blsPubkey: Uint8Array, + dictionaryRegistry?: DictionaryRegistry, +): Promise { + if (buf.length < HEADER_SIZE) { + throw new RangeError( + `deserializeBatchV2: buffer too short (${buf.length} bytes, need at least ${HEADER_SIZE})`, + ); + } + + // ── Parse header ──────────────────────────────────────────────────────────── + let cursor = 0; + + const version = buf[cursor++]!; + if (version !== BATCH_V2_VERSION) { + throw new Error( + `deserializeBatchV2: unsupported version 0x${version.toString(16).padStart(2, '0')} (expected 0x${BATCH_V2_VERSION.toString(16).padStart(2, '0')})`, + ); + } + + const flags = buf[cursor++]!; + const hasTsDelta = (flags & FLAG_TS_DELTA) !== 0; + const hasTypeRLE = (flags & FLAG_TYPE_RLE) !== 0; + const hasDictCompressed = (flags & FLAG_DICT_COMPRESSED) !== 0; + const hasPrediction = (flags & FLAG_PREDICTION) !== 0; + + const countView = new DataView(buf.buffer, buf.byteOffset + cursor, 4); + const dotCount = countView.getUint32(0, true); + cursor += 4; + + if (dotCount === 0) { + throw new RangeError('deserializeBatchV2: dot_count is 0'); + } + + const pubkey = buf.slice(cursor, cursor + PUBKEY_SIZE); + cursor += PUBKEY_SIZE; + + const aggSig = buf.slice(cursor, cursor + BLS_AGG_SIG_SIZE); + cursor += BLS_AGG_SIG_SIZE; + + // cursor is now at HEADER_SIZE (86) + + // ── Read dictionary_id if present and decompress body ──────────────────────── + let bodyBuf: Uint8Array; + if (hasDictCompressed) { + if (buf.length < HEADER_SIZE_WITH_DICT) { + throw new RangeError( + `deserializeBatchV2: buffer too short for dict header (${buf.length} bytes, need at least ${HEADER_SIZE_WITH_DICT})`, + ); + } + const dictionaryId = buf.slice(cursor, cursor + DICT_ID_SIZE); + cursor += DICT_ID_SIZE; + // cursor is now at HEADER_SIZE_WITH_DICT (118) + + if (!dictionaryRegistry) { + const idHex = Array.from(dictionaryId) + .map(b => b.toString(16).padStart(2, '0')) + .join(''); + throw new Error( + `deserializeBatchV2: frame uses dictionary compression (id=${idHex}) but no dictionaryRegistry was provided`, + ); + } + + const entry = dictionaryRegistry.get(dictionaryId); + if (!entry) { + const idHex = Array.from(dictionaryId) + .map(b => b.toString(16).padStart(2, '0')) + .join(''); + throw new Error( + `deserializeBatchV2: unknown dictionary id=${idHex} — register it in the DictionaryRegistry before deserializing`, + ); + } + + // Decompress body using the looked-up dictionary + const compressedBody = buf.subarray(cursor); + bodyBuf = decompressWithDictionary(compressedBody, entry.dictionary); + } else { + // No dictionary compression — body starts at cursor (HEADER_SIZE = 86) + bodyBuf = buf.subarray(cursor); + } + + // ── Read prediction metadata if FLAG_PREDICTION is set ────────────────────── + // Prediction metadata is stored after dict_id (if any) and BEFORE the body. + // Format: 1B modelId + 512B freq table = 513 bytes total. + // We read it from buf (not bodyBuf), advancing cursor past the prediction meta. + let predictionPredictor: PayloadPredictor | null = null; + let predictionFreqTable: FrequencyTable | null = null; + + if (hasPrediction) { + // Re-derive the absolute cursor position in buf + // cursor advanced through: version(1) + flags(1) + count(4) + pubkey(32) + aggSig(48) = 86 + // + dict_id(32) if hasDictCompressed + // bodyBuf is buf.subarray(cursor_after_dict) — but we haven't advanced cursor for prediction meta yet + // We need to read from the start of bodyBuf (before body columns) + if (bodyBuf.length < PREDICTION_META_SIZE) { + throw new RangeError( + `deserializeBatchV2: buffer too short for prediction metadata (${bodyBuf.length} bytes, need ${PREDICTION_META_SIZE})`, + ); + } + const modelId = bodyBuf[0]!; + predictionFreqTable = deserializeFreqTable(bodyBuf, 1); + + // Select predictor by modelId + switch (modelId) { + case 0x00: + predictionPredictor = new NullPredictor(); + break; + case 0x01: + predictionPredictor = new LastValuePredictor(); + break; + case 0x02: + predictionPredictor = new LinearPredictor(); + break; + default: + throw new Error(`deserializeBatchV2: unknown predictor modelId 0x${modelId.toString(16)}`); + } + predictionPredictor.reset(); + + // Advance bodyBuf past the prediction metadata + bodyBuf = bodyBuf.subarray(PREDICTION_META_SIZE); + } + + // From here on, all column decoding operates on bodyBuf (decompressed or raw, + // with prediction metadata already consumed if hasPrediction). + // bodyCursor is always relative to bodyBuf (starts at 0). + let bodyCursor = 0; + const payloadsTotalSize = dotCount * PAYLOAD_SIZE; + let timestamps: bigint[]; + let tsColumnSize: number; + + if (hasTsDelta) { + // Delta-encoded: decode first, then re-encode to measure consumed bytes + const tsBuf = bodyBuf.subarray(bodyCursor); + timestamps = decodeTimestampDeltas(tsBuf, dotCount); + // Measure size by re-encoding (idempotent roundtrip) + tsColumnSize = encodeTimestampDeltas(timestamps).length; + } else { + // Raw timestamps: dotCount × 8B + tsColumnSize = dotCount * 8; + if (bodyCursor + tsColumnSize > bodyBuf.length) { + throw new RangeError('deserializeBatchV2: buffer too short for raw timestamps'); + } + timestamps = []; + for (let i = 0; i < dotCount; i++) { + timestamps.push(readTimestamp(bodyBuf, bodyCursor + i * 8)); + } + } + + const tsEnd = bodyCursor + tsColumnSize; + + // ── Decode type column ────────────────────────────────────────────────────── + let types: Uint8Array; + let typesEnd: number; + + if (hasTypeRLE) { + // RLE-encoded types — the types column ends where payload data begins. + // When prediction is active, payload data = uint32(4) + rans_bytes (variable). + // When prediction is inactive, payload data = dotCount × 16B (fixed). + // We find the RLE end differently for each case. + if (hasPrediction) { + // With prediction the body layout is: [ts][types][rans_data][uint32 rans_len (last 4B)] + // rans_len is stored as the last 4 bytes → rleEnd = bodyBuf.length - 4 - ransLen + const ransLenView = new DataView( + bodyBuf.buffer, + bodyBuf.byteOffset + bodyBuf.length - 4, + 4, + ); + const ransEncodedLen = ransLenView.getUint32(0, true); + // rle types run from tsEnd to (bodyBuf.length - 4 - ransEncodedLen) + const rleEnd = bodyBuf.length - 4 - ransEncodedLen; + if (rleEnd <= tsEnd) { + throw new RangeError('deserializeBatchV2: buffer too short for RLE types + rANS data'); + } + const rleSlice = bodyBuf.subarray(tsEnd, rleEnd); + types = decodePayloadTypes(rleSlice, dotCount); + typesEnd = rleEnd; + } else { + // No prediction: payloads are always at the tail: dotCount × 16B. + const rleEnd = bodyBuf.length - payloadsTotalSize; + if (rleEnd <= tsEnd) { + throw new RangeError('deserializeBatchV2: buffer too short for RLE types + payloads'); + } + const rleSlice = bodyBuf.subarray(tsEnd, rleEnd); + types = decodePayloadTypes(rleSlice, dotCount); + typesEnd = rleEnd; + } + } else { + // Raw types: dotCount × 1B + if (tsEnd + dotCount > bodyBuf.length) { + throw new RangeError('deserializeBatchV2: buffer too short for raw types'); + } + types = bodyBuf.slice(tsEnd, tsEnd + dotCount); + typesEnd = tsEnd + dotCount; + } + + // ── Decode payload column ─────────────────────────────────────────────────── + // Decode payloads into a flat array: either raw bytes or rANS-decoded residuals. + const decodedPayloads: Uint8Array[] = new Array(dotCount); + + if (hasPrediction) { + // With prediction: body = [ts][types][rans_data][uint32 rans_len (last 4B)] + // ransEnd = bodyBuf.length - 4 + // ransStart = typesEnd + const ransLen = new DataView( + bodyBuf.buffer, + bodyBuf.byteOffset + bodyBuf.length - 4, + 4, + ).getUint32(0, true); + const ransStart = typesEnd; + const ransEnd = bodyBuf.length - 4; + if (ransEnd - ransStart !== ransLen) { + throw new RangeError( + `deserializeBatchV2: rANS data length mismatch (got ${ransEnd - ransStart}, expected ${ransLen})`, + ); + } + const ransData = bodyBuf.subarray(ransStart, ransEnd); + const totalResidualBytes = dotCount * PAYLOAD_SIZE; + const residuals = ransDecode(ransData, predictionFreqTable!, totalResidualBytes); + + // Reconstruct payloads from residuals using the predictor + for (let i = 0; i < dotCount; i++) { + const predicted = predictionPredictor!.predict(); + const residual = residuals.subarray(i * PAYLOAD_SIZE, (i + 1) * PAYLOAD_SIZE); + const actual = applyResidual(residual, predicted); + decodedPayloads[i] = actual; + predictionPredictor!.update(actual); + } + } else { + // Raw payloads at tail + if (typesEnd + payloadsTotalSize > bodyBuf.length) { + throw new RangeError('deserializeBatchV2: buffer too short for payloads'); + } + for (let i = 0; i < dotCount; i++) { + const off = typesEnd + i * PAYLOAD_SIZE; + decodedPayloads[i] = bodyBuf.subarray(off, off + PAYLOAD_SIZE); + } + } + + // ── Reconstruct BLS-form DOTs (zeroed sig field) for verification ─────────── + // CRITICAL: Use zeroed sig field for chain hash computation — same rule as + // serialization. The BLS aggregate sig is NOT used in chain hashes; it would + // create a circular dependency (sig depends on messages, messages depend on chain + // hashes, chain hashes depend on sig). Zeroed sig is the canonical form. + // + // Sprint 1 limitation: genesis-anchored batch — chain[0] = 32 zero bytes. + const zeroedSig = new Uint8Array(SIG_SIZE); // 64 zero bytes + const blsFormDots: Uint8Array[] = new Array(dotCount); + let prevChain = new Uint8Array(CHAIN_SIZE); // 32 zeros = genesis chain + + for (let i = 0; i < dotCount; i++) { + const dot = new Uint8Array(DOT_SIZE); + + // pubkey [0..31] + dot.set(pubkey, OFF_PUBKEY); + + // sig field [32..95] — zeroed (canonical BLS form for chain hashing) + dot.set(zeroedSig, OFF_SIG); + + // chain hash [96..127] — previous DOT's chain (or genesis zeros) + dot.set(prevChain, OFF_CHAIN); + + // timestamp [128..135] + writeTimestamp(dot, OFF_TS, timestamps[i]!); + + // type [136] + dot[OFF_TYPE] = types[i]!; + + // payload [137..152] + dot.set(decodedPayloads[i]!, OFF_PAYLOAD); + + blsFormDots[i] = dot; + + // Chain hash for next DOT: SHA-256(this BLS-form DOT) + prevChain = sha256(dot); + } + + // ── BLS aggregate signature verification ──────────────────────────────────── + const signedMessages = blsFormDots.map(dot => extractSignedBytes(dot)); + const valid = verifyAggregateSameSigner(aggSig, signedMessages, blsPubkey); + if (!valid) { + throw new Error('deserializeBatchV2: BLS aggregate signature verification failed'); + } + + // ── Build final DOTs with aggSig in sig field (wire-format completeness) ───── + // The returned DOTs store the aggregate sig (zero-padded to 64B) in the sig + // field for wire-format completeness. Chain hashes were computed from zeroed + // sig form above, so they are preserved here (prevChain already advanced). + const sigField = buildSigField(aggSig); + const dots: Uint8Array[] = new Array(dotCount); + for (let i = 0; i < dotCount; i++) { + const dot = blsFormDots[i]!.slice(); // copy BLS-form DOT + dot.set(sigField, OFF_SIG); // replace zeroed sig with aggSig field + dots[i] = dot; + } + + return dots; +} diff --git a/packages/compression/src/dictionary-registry.ts b/packages/compression/src/dictionary-registry.ts new file mode 100644 index 000000000..6a2a7b0ea --- /dev/null +++ b/packages/compression/src/dictionary-registry.ts @@ -0,0 +1,135 @@ +/** + * Dictionary Registry — maps 32-byte dict IDs to trained zstd dictionaries. + * + * Each dictionary is identified by SHA-256(dictionary_bytes) — 32 bytes. + * This makes IDs deterministic and content-addressable. + * + * In Sprint 2, this is in-memory with JSON serialization. + * In the future, dictionaries become DOTs on a registry chain. + */ + +// ─── Types ──────────────────────────────────────────────────────────────────── + +export interface DictionaryEntry { + id: Uint8Array; // 32-byte SHA-256 hash of dictionary content + dictionary: Uint8Array; // the dictionary bytes (~32KB max) + domain: string; // human label: e.g. "voltage-sensor-v1" + created: bigint; // Unix ms timestamp as bigint +} + +// ─── Helpers ───────────────────────────────────────────────────────────────── + +function bytesToHex(bytes: Uint8Array): string { + return Array.from(bytes).map(b => b.toString(16).padStart(2, '0')).join(''); +} + +function hexToBytes(hex: string): Uint8Array { + if (hex.length % 2 !== 0) { + throw new RangeError(`hexToBytes: odd-length hex string (${hex.length} chars)`); + } + const out = new Uint8Array(hex.length / 2); + for (let i = 0; i < out.length; i++) { + out[i] = parseInt(hex.slice(i * 2, i * 2 + 2), 16); + } + return out; +} + +async function sha256(data: Uint8Array): Promise { + const hashBuffer = await crypto.subtle.digest('SHA-256', data.slice(0).buffer); + return new Uint8Array(hashBuffer); +} + +// ─── Registry ──────────────────────────────────────────────────────────────── + +/** Serialised form used in JSON export. */ +interface SerializedEntry { + id: string; // hex + dictionary: string; // hex + domain: string; + created: string; // bigint as decimal string +} + +export class DictionaryRegistry { + /** Internal store: hex(id) → DictionaryEntry */ + private readonly _entries: Map = new Map(); + + // ─── Public API ──────────────────────────────────────────────────────────── + + /** + * Register a trained dictionary. Returns its 32-byte ID (SHA-256 of content). + * Idempotent — registering the same dictionary twice returns the same ID. + */ + async register(dictionary: Uint8Array, domain: string): Promise { + const id = await sha256(dictionary); + const key = bytesToHex(id); + + if (!this._entries.has(key)) { + const entry: DictionaryEntry = { + id, + dictionary: new Uint8Array(dictionary), // defensive copy + domain, + created: BigInt(Date.now()), + }; + this._entries.set(key, entry); + } + + // Return the id from the stored entry (idempotent path returns same object) + return this._entries.get(key)!.id; + } + + /** + * Look up a dictionary by ID. Returns null if unknown. + * ID comparison is byte-by-byte (not reference equality). + */ + get(id: Uint8Array): DictionaryEntry | null { + const key = bytesToHex(id); + return this._entries.get(key) ?? null; + } + + /** + * Export registry to JSON string for persistence. + * Uint8Array fields serialized as hex strings. + */ + export(): string { + const serialized: SerializedEntry[] = []; + for (const entry of this._entries.values()) { + serialized.push({ + id: bytesToHex(entry.id), + dictionary: bytesToHex(entry.dictionary), + domain: entry.domain, + created: entry.created.toString(), + }); + } + return JSON.stringify(serialized); + } + + /** + * Import registry from JSON string (produced by export()). + * Returns a new DictionaryRegistry instance. + */ + static import(json: string): DictionaryRegistry { + const registry = new DictionaryRegistry(); + const serialized: SerializedEntry[] = JSON.parse(json); + + for (const raw of serialized) { + const id = hexToBytes(raw.id); + const dictionary = hexToBytes(raw.dictionary); + const entry: DictionaryEntry = { + id, + dictionary, + domain: raw.domain, + created: BigInt(raw.created), + }; + registry._entries.set(raw.id, entry); + } + + return registry; + } + + /** + * Number of dictionaries in the registry. + */ + get size(): number { + return this._entries.size; + } +} diff --git a/packages/compression/src/index.ts b/packages/compression/src/index.ts new file mode 100644 index 000000000..6e761bd35 --- /dev/null +++ b/packages/compression/src/index.ts @@ -0,0 +1,11 @@ +// @dotprotocol/compression — DOT stream compression library +export * from './varint.js'; +export * from './timestamp-delta.js'; +export * from './rle.js'; +export * from './batch-v2.js'; +export * from './zstd.js'; +export * from './dictionary-registry.js'; +export * from './sample-generator.js'; +export * from './predictor.js'; +export * from './rans.js'; +export * from './weissman.js'; diff --git a/packages/compression/src/predictor.ts b/packages/compression/src/predictor.ts new file mode 100644 index 000000000..076e83744 --- /dev/null +++ b/packages/compression/src/predictor.ts @@ -0,0 +1,217 @@ +/** + * Payload predictors for DOT stream compression. + * + * Predictive coding works by predicting the next payload value based on history, + * then storing only the residual (actual XOR predicted). When the predictor is + * accurate, residuals are near-zero → compress extremely well. + * + * Three models are provided, each suited to different data patterns: + * - NullPredictor (0x00): Always predicts zeros. Baseline — no savings, never hurts. + * - LastValuePredictor (0x01): Predicts the previous actual. Good for slowly-changing data. + * - LinearPredictor (0x02): Extrapolates linear trend. Good for steadily-changing data. + * + * All payloads are exactly 16 bytes (the DOT payload width). + */ + +const PAYLOAD_SIZE = 16; + +// --------------------------------------------------------------------------- +// Interface +// --------------------------------------------------------------------------- + +/** + * Prediction model interface. All models implement this. + * The model predicts the next payload value based on history. + * Compression sends only the residual (actual - predicted via XOR). + */ +export interface PayloadPredictor { + /** Returns predicted next payload (16 bytes). */ + predict(): Uint8Array; + + /** Feed actual observed payload to update model state. */ + update(actual: Uint8Array): void; + + /** Reset model state. */ + reset(): void; + + /** Model identifier byte for batch header. */ + readonly modelId: number; +} + +// --------------------------------------------------------------------------- +// NullPredictor (modelId: 0x00) +// --------------------------------------------------------------------------- + +/** + * Always predicts all-zeros. + * + * Residual = actual XOR zeros = actual (no compression savings, but provably safe baseline). + * Useful as a passthrough when no prediction is possible or practical. + */ +export class NullPredictor implements PayloadPredictor { + readonly modelId = 0x00; + + predict(): Uint8Array { + return new Uint8Array(PAYLOAD_SIZE); + } + + update(_actual: Uint8Array): void { + // Stateless: no update needed. + } + + reset(): void { + // No state to reset. + } +} + +// --------------------------------------------------------------------------- +// LastValuePredictor (modelId: 0x01) +// --------------------------------------------------------------------------- + +/** + * Predicts that the next payload equals the previous actual payload. + * + * Ideal when payloads change slowly or repeat (e.g., heartbeat PINGs, sensor + * readings that hold steady). On a constant stream, residuals are all-zero + * from the second observation onward. + * + * State: previous payload (16 bytes). + */ +export class LastValuePredictor implements PayloadPredictor { + readonly modelId = 0x01; + + private _prev: Uint8Array = new Uint8Array(PAYLOAD_SIZE); + + predict(): Uint8Array { + return new Uint8Array(this._prev); // Return a copy — caller must not mutate. + } + + update(actual: Uint8Array): void { + this._prev = new Uint8Array(actual); + } + + reset(): void { + this._prev = new Uint8Array(PAYLOAD_SIZE); + } +} + +// --------------------------------------------------------------------------- +// LinearPredictor (modelId: 0x02) +// --------------------------------------------------------------------------- + +/** + * Extrapolates the linear trend between the last two observations. + * + * Prediction formula (byte-level, wrapping uint8 arithmetic): + * predicted[i] = (2 * current[i] - previous[i]) & 0xFF + * + * This is equivalent to "the slope between the last two points continues." + * On a linearly-changing or constant stream, residuals are zero after the + * first two observations. Excellent for telemetry, counter chains, or any + * data with a steady rate of change. + * + * Fallback behaviour: + * - No history (0 updates seen): predict zeros. + * - One update seen: predict = that one value (no trend yet). + * - Two+ updates seen: full linear extrapolation. + * + * State: previous two payloads (32 bytes). + */ +export class LinearPredictor implements PayloadPredictor { + readonly modelId = 0x02; + + private _count = 0; + private _prev: Uint8Array = new Uint8Array(PAYLOAD_SIZE); // t-1 + private _curr: Uint8Array = new Uint8Array(PAYLOAD_SIZE); // t-0 + + predict(): Uint8Array { + const out = new Uint8Array(PAYLOAD_SIZE); + + if (this._count === 0) { + // No history: all zeros. + return out; + } + + if (this._count === 1) { + // One observation: repeat it. + return new Uint8Array(this._curr); + } + + // Full linear extrapolation: next = 2 * curr - prev (byte-wrapping). + for (let i = 0; i < PAYLOAD_SIZE; i++) { + out[i] = ((2 * this._curr[i]!) - this._prev[i]!) & 0xff; + } + return out; + } + + update(actual: Uint8Array): void { + this._prev = new Uint8Array(this._curr); + this._curr = new Uint8Array(actual); + this._count++; + } + + reset(): void { + this._count = 0; + this._prev = new Uint8Array(PAYLOAD_SIZE); + this._curr = new Uint8Array(PAYLOAD_SIZE); + } +} + +// --------------------------------------------------------------------------- +// Helper functions +// --------------------------------------------------------------------------- + +/** + * Compute XOR residual between actual and predicted payloads. + * + * residual[i] = actual[i] XOR predicted[i] + * + * When actual[i] === predicted[i], residual[i] = 0. The more correlated the + * data with the predictor's model, the more zeros appear — and zeros compress + * to near nothing under any entropy coder or zstd. + * + * @param actual - Observed 16-byte payload. + * @param predicted - Predictor's 16-byte estimate. + * @returns 16-byte residual. + * @throws RangeError if either array is not exactly 16 bytes, or they differ. + */ +export function computeResidual(actual: Uint8Array, predicted: Uint8Array): Uint8Array { + if (actual.length !== PAYLOAD_SIZE || predicted.length !== PAYLOAD_SIZE) { + throw new RangeError( + `computeResidual: both arrays must be exactly ${PAYLOAD_SIZE} bytes ` + + `(got ${actual.length} and ${predicted.length})`, + ); + } + const residual = new Uint8Array(PAYLOAD_SIZE); + for (let i = 0; i < PAYLOAD_SIZE; i++) { + residual[i] = actual[i]! ^ predicted[i]!; + } + return residual; +} + +/** + * Reconstruct actual payload from residual and predicted. + * + * actual[i] = residual[i] XOR predicted[i] + * + * This is the exact inverse of computeResidual. XOR is its own inverse: + * apply(compute(actual, pred), pred) === actual. + * + * @param residual - 16-byte residual from computeResidual. + * @param predicted - The same 16-byte predicted value used at encode time. + * @returns Reconstructed 16-byte actual payload. + * @throws RangeError if either array is not exactly 16 bytes, or they differ. + */ +export function applyResidual(residual: Uint8Array, predicted: Uint8Array): Uint8Array { + if (residual.length !== PAYLOAD_SIZE || predicted.length !== PAYLOAD_SIZE) { + throw new RangeError( + `applyResidual: both arrays must be exactly ${PAYLOAD_SIZE} bytes ` + + `(got ${residual.length} and ${predicted.length})`, + ); + } + const actual = new Uint8Array(PAYLOAD_SIZE); + for (let i = 0; i < PAYLOAD_SIZE; i++) { + actual[i] = residual[i]! ^ predicted[i]!; + } + return actual; +} diff --git a/packages/compression/src/rans.ts b/packages/compression/src/rans.ts new file mode 100644 index 000000000..a71c04dd4 --- /dev/null +++ b/packages/compression/src/rans.ts @@ -0,0 +1,251 @@ +/** + * rANS (Asymmetric Numeral Systems) entropy coder for byte-level data. + * + * Implements table-based streaming rANS with a 32-bit state and 16-bit output + * chunks. Symbols are bytes (0-255). Frequencies are normalised to sum = 4096 + * (SCALE = 2^12). Laplace smoothing ensures no zero-frequency symbols. + * + * State invariant: x in [RANS_L, RANS_L * 2^16). With RANS_L = 2^15 this is + * [32768, 2147483648) which fits comfortably in a 32-bit unsigned integer. + * + * For DOT XOR residuals the zero byte dominates → near-zero bits per symbol. + */ + +/** Precision of the frequency table: 2^SCALE_BITS total slots. */ +const SCALE_BITS = 12; +const SCALE = 1 << SCALE_BITS; // 4096 + +/** + * Lower bound of the normalisation interval. + * State x is always in [RANS_L, RANS_L * 65536). + * RANS_L = 2^15 so max state = 2^15 * 2^16 - 1 = 2^31 - 1 < 2^32. ✓ + */ +const RANS_L = 1 << 15; // 32768 + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +/** + * Normalised frequency table over 256 symbols. + * sum(freq) === 4096, freq[b] >= 1 for all b. + */ +export interface FrequencyTable { + /** Normalised frequency for each byte value (0-255). */ + freq: Uint16Array; // length 256, sum = 4096 + /** Cumulative frequencies. cumFreq[b] = sum(freq[0..b-1]). */ + cumFreq: Uint16Array; // length 257, cumFreq[0]=0, cumFreq[256]=4096 +} + +// --------------------------------------------------------------------------- +// buildFrequencyTable +// --------------------------------------------------------------------------- + +/** + * Build a FrequencyTable from byte data using Largest Remainder Method (LRM) + * to guarantee exact normalisation to SCALE = 4096. + * + * Steps: + * 1. Count raw occurrences of each byte. + * 2. Add 1 to every count (Laplace smoothing) — eliminates zero frequencies. + * 3. Assign floor(count/total * SCALE) to each symbol; ensure minimum of 1. + * 4. Distribute remaining slots to symbols with largest fractional remainders. + * 5. Build cumulative prefix sums. + * + * @param data - Representative byte data (e.g., XOR residuals). + */ +export function buildFrequencyTable(data: Uint8Array): FrequencyTable { + // Step 1: raw counts + const rawCounts = new Float64Array(256); + for (let i = 0; i < data.length; i++) { + rawCounts[data[i]!]++; + } + + // Step 2: Laplace smoothing — every symbol gets at least 1 count + const counts = new Float64Array(256); + for (let i = 0; i < 256; i++) { + counts[i] = rawCounts[i]! + 1; + } + + const total = counts.reduce((a, b) => a + b, 0); + + // Step 3: floor allocation, minimum 1 per symbol + const freq = new Uint16Array(256); + const remainders = new Float64Array(256); + let allocated = 0; + + for (let i = 0; i < 256; i++) { + const exact = (counts[i]! / total) * SCALE; + const floored = Math.floor(exact); + const f = Math.max(1, floored); + freq[i] = f; + allocated += f; + // Store remainder for LRM — use exact - floored (pre-clamp remainder) + remainders[i] = floored >= 1 ? exact - floored : 0; + } + + // Step 4: Largest Remainder Method — correct the total to exactly SCALE + const delta = SCALE - allocated; // positive = under, negative = over + if (delta > 0) { + // Under-allocated: give extra slots to highest remainders + const indices = Array.from({ length: 256 }, (_, i) => i); + indices.sort((a, b) => remainders[b]! - remainders[a]!); + for (let k = 0; k < delta; k++) { + freq[indices[k % 256]!]! + 1; // suppress lint + freq[indices[k % 256]!] += 1; + } + } else if (delta < 0) { + // Over-allocated: remove slots from largest frequencies (keep min=1) + const indices = Array.from({ length: 256 }, (_, i) => i); + indices.sort((a, b) => freq[b]! - freq[a]!); + let toRemove = -delta; + let k = 0; + while (toRemove > 0) { + const idx = indices[k % 256]!; + if (freq[idx]! > 1) { + freq[idx] -= 1; + toRemove--; + } + k++; + } + } + + // Step 5: cumulative prefix sums + const cumFreq = new Uint16Array(257); + for (let i = 0; i < 256; i++) { + cumFreq[i + 1] = cumFreq[i]! + freq[i]!; + } + + return { freq, cumFreq }; +} + +// --------------------------------------------------------------------------- +// ransEncode +// --------------------------------------------------------------------------- + +/** + * rANS encode a byte sequence. + * + * Processes symbols in REVERSE order (standard rANS convention). Emits 16-bit + * output chunks (lo byte, hi byte) when normalizing. After all symbols, flushes + * the 32-bit state (4 bytes LE). Reverses the whole output so the decoder sees: + * [state_BE_4bytes] [oldest_chunk...] [newest_chunk]. + * + * @param symbols - Input bytes to encode. + * @param table - FrequencyTable built from representative data. + */ +export function ransEncode(symbols: Uint8Array, table: FrequencyTable): Uint8Array { + if (symbols.length === 0) { + return new Uint8Array(0); + } + + const output: number[] = []; + let x = RANS_L; + + // L_over_M = floor(RANS_L / SCALE) = floor(2^15 / 2^12) = 8 + const L_over_M = Math.floor(RANS_L / SCALE); // 8 + + // Process symbols in REVERSE order + for (let i = symbols.length - 1; i >= 0; i--) { + const s = symbols[i]!; + const fs = table.freq[s]!; + const cs = table.cumFreq[s]!; + + // Normalise: emit 16-bit chunk(s) until x is in the valid pre-encode range. + // Valid range before encoding s: x in [RANS_L/SCALE*fs, RANS_L/SCALE*fs*65536) + const upperBound = L_over_M * fs * 65536; + while (x >= upperBound) { + output.push(x & 0xff); + output.push((x >>> 8) & 0xff); + x = Math.floor(x / 65536); + } + + // Encode symbol: maps x in [L/M*fs, L/M*fs*B) to [L, L*B) bijectively + x = Math.floor(x / fs) * SCALE + (x % fs) + cs; + } + + // Flush final state (4 bytes, little-endian) + // State fits in 32 bits since RANS_L = 2^15 and x < RANS_L * 65536 = 2^31 + output.push(x & 0xff); + output.push((x >>> 8) & 0xff); + output.push((x >>> 16) & 0xff); + output.push((x >>> 24) & 0xff); + + // Reverse so the decoder reads state first, then chunks in oldest-to-newest order. + // After reversing, state bytes are [b3,b2,b1,b0] (big-endian) at positions 0..3. + output.reverse(); + return new Uint8Array(output); +} + +// --------------------------------------------------------------------------- +// ransDecode +// --------------------------------------------------------------------------- + +/** + * rANS decode a byte sequence encoded by ransEncode. + * + * @param encoded - Output of ransEncode. + * @param table - Same FrequencyTable used for encoding. + * @param length - Number of original symbols (required for correct termination). + */ +export function ransDecode( + encoded: Uint8Array, + table: FrequencyTable, + length: number, +): Uint8Array { + if (length === 0) { + return new Uint8Array(0); + } + + // Build a fast slot→symbol lookup table (length SCALE = 4096) + const cumToSym = new Uint8Array(SCALE); + for (let sym = 0; sym < 256; sym++) { + const start = table.cumFreq[sym]!; + const end = table.cumFreq[sym + 1]!; + for (let j = start; j < end; j++) { + cumToSym[j] = sym; + } + } + + // Read initial 32-bit state. + // The encoder pushes LE bytes [b0,b1,b2,b3] last, then reverses the whole output, + // so in the stream they appear as [b3,b2,b1,b0] = big-endian. + // Use DataView for a clean big-endian read without JS bitwise sign issues. + const initBuf = new ArrayBuffer(4); + const initView = new DataView(initBuf); + initView.setUint8(0, encoded[0]!); + initView.setUint8(1, encoded[1]!); + initView.setUint8(2, encoded[2]!); + initView.setUint8(3, encoded[3]!); + let x = initView.getUint32(0, false); // big-endian = MSB first + + let streamPos = 4; + const output = new Uint8Array(length); + + for (let i = 0; i < length; i++) { + // Identify symbol: slot = x mod SCALE + const slot = x & (SCALE - 1); + const s = cumToSym[slot]!; + output[i] = s; + + const fs = table.freq[s]!; + const cs = table.cumFreq[s]!; + + // Advance state: x = fs * floor(x / SCALE) + slot - cs + x = fs * Math.floor(x / SCALE) + slot - cs; + + // Renormalise: pull 16-bit chunks from the stream until x >= RANS_L. + // Each chunk was pushed as [lo, hi] then reversed, so in stream order it's [hi, lo] + // = big-endian. Read accordingly. + while (x < RANS_L && streamPos + 2 <= encoded.length) { + const hi = encoded[streamPos]!; + const lo = encoded[streamPos + 1]!; + const chunk = (hi << 8) | lo; + // x * 65536 max: (RANS_L - 1) * 65536 = 2147418112 < 2^31, safe for Number + x = x * 65536 + chunk; + streamPos += 2; + } + } + + return output; +} diff --git a/packages/compression/src/rle.ts b/packages/compression/src/rle.ts new file mode 100644 index 000000000..bc53cf35c --- /dev/null +++ b/packages/compression/src/rle.ts @@ -0,0 +1,108 @@ +/** + * Run-length encoding for DOT payload type arrays. + * + * Format: [type_byte, count_varint, type_byte, count_varint, ...] + * + * For a homogeneous batch (all same type), encodes to 2 bytes total. + * For alternating types, encodes to 2× the number of elements (no savings, but no expansion). + * + * Used in batch v2 to compress the type column. + */ + +import { encodeVarint, decodeVarint } from './varint.js'; + +/** + * Encode a Uint8Array of type bytes using run-length encoding. + * @param types - Array of uint8 DOT type values + * @returns RLE-encoded buffer + * @throws RangeError if types is empty + */ +export function encodePayloadTypes(types: Uint8Array): Uint8Array { + if (types.length === 0) { + throw new RangeError('encodePayloadTypes: types array must not be empty'); + } + + // First pass: collect [type, runLength] pairs + const runs: Array<[number, number]> = []; + let currentType = types[0]!; + let runLength = 1; + + for (let i = 1; i < types.length; i++) { + const t = types[i]!; + if (t === currentType) { + runLength++; + } else { + runs.push([currentType, runLength]); + currentType = t; + runLength = 1; + } + } + // Flush final run + runs.push([currentType, runLength]); + + // Second pass: calculate total encoded size + let totalBytes = 0; + const countVarints: Uint8Array[] = []; + for (const [, count] of runs) { + const countEncoded = encodeVarint(count); + countVarints.push(countEncoded); + totalBytes += 1 + countEncoded.length; // 1 byte for type + varint for count + } + + // Assemble output + const out = new Uint8Array(totalBytes); + let offset = 0; + for (let r = 0; r < runs.length; r++) { + const [type] = runs[r]!; + out[offset++] = type; + const cv = countVarints[r]!; + out.set(cv, offset); + offset += cv.length; + } + + return out; +} + +/** + * Decode RLE-encoded type buffer back to flat array. + * @param buf - RLE buffer from encodePayloadTypes + * @param totalCount - Total number of types to decode (must match original) + * @returns Flat Uint8Array of type values + * @throws RangeError if decoded count doesn't match totalCount or buffer is malformed + */ +export function decodePayloadTypes(buf: Uint8Array, totalCount: number): Uint8Array { + const out = new Uint8Array(totalCount); + let writePos = 0; + let readPos = 0; + + while (readPos < buf.length) { + // Read type byte + if (readPos >= buf.length) { + throw new RangeError( + `decodePayloadTypes: unexpected end of buffer reading type byte at offset ${readPos}`, + ); + } + const type = buf[readPos++]!; + + // Read count varint + const [count, bytesConsumed] = decodeVarint(buf, readPos); + readPos += bytesConsumed; + + // Fill output + if (writePos + count > totalCount) { + throw new RangeError( + `decodePayloadTypes: decoded count exceeds totalCount (${writePos + count} > ${totalCount})`, + ); + } + out.fill(type, writePos, writePos + count); + writePos += count; + } + + if (writePos !== totalCount) { + throw new RangeError( + `decodePayloadTypes: count mismatch — decoded ${writePos} types but expected ${totalCount}`, + ); + } + + return out; +} diff --git a/packages/compression/src/sample-generator.ts b/packages/compression/src/sample-generator.ts new file mode 100644 index 000000000..459183ddb --- /dev/null +++ b/packages/compression/src/sample-generator.ts @@ -0,0 +1,153 @@ +/** + * Sample data generator for @dotprotocol/compression benchmarks and tests. + * + * Generates arrays of 153-byte DOT Uint8Arrays for various sensor stream + * profiles. Produces correlated, realistic data without external dependencies. + */ +import { createKeypair, createDOT, toBytes, DotType } from '@dotprotocol/core'; + +// ─── Types ──────────────────────────────────────────────────────────────────── + +export type SensorProfile = 'kulhadVoltage' | 'temperature' | 'gps' | 'random'; + +export interface SampleGeneratorOptions { + /** Number of DOTs to generate. */ + count: number; + /** Which sensor profile to use. */ + profile: SensorProfile; + /** Optional Ed25519 keypair. If omitted, a fresh one is generated. */ + keypair?: { publicKey: Uint8Array; privateKey: Uint8Array }; + /** Starting timestamp in Unix ms as bigint. Defaults to Date.now(). */ + startTimestamp?: bigint; +} + +// ─── Profile helpers ────────────────────────────────────────────────────────── + +/** Write a little-endian float32 into a 16-byte payload at the given offset. */ +function writeFloat32LE(payload: Uint8Array, offset: number, value: number): void { + const view = new DataView(payload.buffer, payload.byteOffset, payload.byteLength); + view.setFloat32(offset, value, true); +} + +/** Build payload + ts + type for kulhadVoltage profile (i = index in stream). */ +function kulhadVoltagePayload(i: number, baseTs: bigint): { payload: Uint8Array; ts: bigint; type: DotType } { + const payload = new Uint8Array(16); // bytes [4..15] remain zero + const voltage = 0.497 + (Math.random() - 0.5) * 0.03; // ±0.015V + writeFloat32LE(payload, 0, voltage); + // ~100ms apart, starting at baseTs. Add positive jitter to avoid going before baseTs. + const jitter = Math.round(Math.random() * 10); // 0..10ms (positive only) + const ts = baseTs + BigInt(i * 100 + jitter); + return { payload, ts, type: DotType.PUBLIC }; +} + +/** Build payload + ts + type for temperature profile. */ +function temperaturePayload(i: number, baseTs: bigint): { payload: Uint8Array; ts: bigint; type: DotType } { + const payload = new Uint8Array(16); + const temp = 20 + (Math.random() - 0.5) * 4; // 20°C ±2°C + writeFloat32LE(payload, 0, temp); + // ~1000ms apart, starting at baseTs. Positive jitter only to stay >= baseTs. + const jitter = Math.round(Math.random() * 100); // 0..100ms + const ts = baseTs + BigInt(i * 1000 + jitter); + return { payload, ts, type: DotType.PUBLIC }; +} + +/** Build payload + ts + type for GPS profile (near Mumbai). */ +function gpsPayload(i: number, baseTs: bigint): { payload: Uint8Array; ts: bigint; type: DotType } { + const payload = new Uint8Array(16); + const lat = 19.076 + (Math.random() - 0.5) * 0.02; + const lon = 72.877 + (Math.random() - 0.5) * 0.02; + writeFloat32LE(payload, 0, lat); + writeFloat32LE(payload, 4, lon); + // ~1000ms apart, starting at baseTs. Positive jitter only. + const jitter = Math.round(Math.random() * 200); // 0..200ms + const ts = baseTs + BigInt(i * 1000 + jitter); + return { payload, ts, type: DotType.PUBLIC }; +} + +// ─── Public API ─────────────────────────────────────────────────────────────── + +/** + * Generate an array of `count` properly-signed 153-byte DOT Uint8Arrays + * according to the given sensor profile. DOTs form a valid chain (each DOT's + * chain hash = SHA-256 of previous DOT bytes; genesis chain hash = 32 zeros). + */ +export async function generateSensorStream( + options: SampleGeneratorOptions +): Promise { + const { count, profile } = options; + const keypair = options.keypair ?? (await createKeypair()); + const baseTs = options.startTimestamp ?? BigInt(Date.now()); + + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + // For random profile we track accumulated offset to ensure monotonic ts + let randomAccumTs = baseTs; + + for (let i = 0; i < count; i++) { + let payload: Uint8Array; + let ts: bigint; + let type: DotType; + + switch (profile) { + case 'kulhadVoltage': { + const r = kulhadVoltagePayload(i, baseTs); + payload = r.payload; ts = r.ts; type = r.type; + break; + } + case 'temperature': { + const r = temperaturePayload(i, baseTs); + payload = r.payload; ts = r.ts; type = r.type; + break; + } + case 'gps': { + const r = gpsPayload(i, baseTs); + payload = r.payload; ts = r.ts; type = r.type; + break; + } + case 'random': + default: { + // Spread over 24h but keep monotonically increasing + const spread = BigInt(Math.round((Math.random() + 0.5) * ((24 * 60 * 60 * 1000) / count))); + randomAccumTs = randomAccumTs + spread; + const rPay = new Uint8Array(16); + crypto.getRandomValues(rPay); + payload = rPay; + ts = randomAccumTs; + const VALID_TYPES: DotType[] = [DotType.PUBLIC, DotType.CIRCLE, DotType.PRIVATE, DotType.EPHEMERAL]; + type = VALID_TYPES[Math.floor(Math.random() * VALID_TYPES.length)]!; + break; + } + } + + const dot = await createDOT({ + keypair, + payload, + type, + ts: (() => { + if (ts > BigInt(Number.MAX_SAFE_INTEGER)) { + throw new RangeError(`Timestamp ${ts} exceeds Number.MAX_SAFE_INTEGER — cannot convert without precision loss`); + } + return Number(ts); + })(), + ...(prev ? { previous: prev } : {}), + }); + + const bytes = toBytes(dot); + dots.push(bytes); + prev = bytes; + } + + return dots; +} + +/** + * Generate raw payload columns (16B each) suitable for zstd dictionary training. + * Extracts the payload slice from each DOT (bytes [137..152]). + */ +export async function generateTrainingSamples( + profile: SensorProfile, + count: number = 100 +): Promise { + const dots = await generateSensorStream({ count, profile }); + return dots.map(dot => dot.slice(137, 153)); +} diff --git a/packages/compression/src/tests/batch-v2-dict.test.ts b/packages/compression/src/tests/batch-v2-dict.test.ts new file mode 100644 index 000000000..5b1e2d333 --- /dev/null +++ b/packages/compression/src/tests/batch-v2-dict.test.ts @@ -0,0 +1,327 @@ +/** + * batch-v2-dict.test.ts + * + * Tests for Task 2.3 — dictionary compression integration in batch v2. + * + * Wire format when FLAG_DICT_COMPRESSED (bit 3) is set: + * HEADER (118B): version(1) + flags(1) + count(4) + pubkey(32) + aggSig(48) + dictId(32) + * BODY: zstd-compressed column body + */ + +import { describe, it, expect } from 'vitest'; +import { createHash } from 'node:crypto'; +import { + createKeypair, + createDOT, + toBytes, + DotType, + createBLSKeypair, + verifyAggregateSameSigner, +} from '@dotprotocol/core'; +import { + serializeBatchV2, + deserializeBatchV2, + FLAG_DICT_COMPRESSED, +} from '../batch-v2.js'; +import { trainDictionary } from '../zstd.js'; +import { DictionaryRegistry } from '../dictionary-registry.js'; + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +/** Compute SHA-256 of bytes (sync, via node:crypto). */ +function sha256(data: Uint8Array): Uint8Array { + return new Uint8Array(createHash('sha256').update(data).digest()); +} + +/** + * Build a chain of N voltage-sensor DOTs. + * Payload: float64 (big-endian) at bytes [0..7], rest zero. + * Produces correlated data, ideal for demonstrating dictionary compression. + */ +async function buildVoltageChain(n: number): Promise { + const keypair = await createKeypair(); + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + + for (let i = 0; i < n; i++) { + const payload = new Uint8Array(16); + new DataView(payload.buffer).setFloat64(0, 0.497 + i * 0.0001, false); + const dot = await createDOT({ + keypair, + type: DotType.PUBLIC, + payload, + ...(prev ? { previous: prev } : {}), + }); + const bytes = toBytes(dot); + dots.push(bytes); + prev = bytes; + } + + return dots; +} + +/** + * Generate training samples: 50 batches of 20 DOTs each. + * Returns the raw body bytes (timestamps + types + payloads as serialized by batch v2 + * but WITHOUT dictionary compression — just the plain column-encoded body). + * + * We do this by serializing without dictionary and slicing off the 86-byte header. + */ +async function generateTrainingSamples(): Promise { + const samples: Uint8Array[] = []; + const blsKeypair = createBLSKeypair(); + + for (let b = 0; b < 50; b++) { + const dots = await buildVoltageChain(20); + const frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + // Slice off the 86-byte header to get the raw body + samples.push(frame.slice(86)); + } + + return samples; +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe('batch v2 dictionary compression', () => { + + // 1. Roundtrip with dictionary: 100 DOTs → serialize with dict → deserialize → all match + it('roundtrip with dictionary: 100 DOTs fully match after serialize/deserialize', async () => { + const dots = await buildVoltageChain(100); + const blsKeypair = createBLSKeypair(); + + // Train dictionary from 50×20-DOT samples + const samples = await generateTrainingSamples(); + const dictionary = await trainDictionary(samples); + const dictionaryId = sha256(dictionary); + + // Serialize with dictionary + const frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + dictionary, + dictionaryId, + }); + + // Verify FLAG_DICT_COMPRESSED is set in the frame + expect(frame[1]! & FLAG_DICT_COMPRESSED).toBe(FLAG_DICT_COMPRESSED); + // Header is 118B when dict flag is set + expect(frame.length).toBeGreaterThanOrEqual(118); + + // Register dictionary and deserialize + const registry = new DictionaryRegistry(); + await registry.register(dictionary, 'voltage-sensor-v1'); + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey, registry); + + expect(recovered.length).toBe(dots.length); + + for (let i = 0; i < dots.length; i++) { + const orig = dots[i]!; + const rec = recovered[i]!; + + expect(rec.length).toBe(153); + + // pubkey [0..31] + expect(Array.from(rec.subarray(0, 32))).toEqual(Array.from(orig.subarray(0, 32))); + // timestamp [128..135] + expect(Array.from(rec.subarray(128, 136))).toEqual(Array.from(orig.subarray(128, 136))); + // type [136] + expect(rec[136]).toBe(orig[136]); + // payload [137..152] + expect(Array.from(rec.subarray(137, 153))).toEqual(Array.from(orig.subarray(137, 153))); + } + }, 120_000); + + // 2. Backward compat: no dictionary flag → existing behavior unchanged + it('backward compat: no dictionary flag — existing behavior unchanged', async () => { + const dots = await buildVoltageChain(50); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + // No dictionary or dictionaryId + }); + + // FLAG_DICT_COMPRESSED must NOT be set + expect(frame[1]! & FLAG_DICT_COMPRESSED).toBe(0); + + // Deserialize without registry (must succeed) + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + expect(recovered.length).toBe(dots.length); + + for (let i = 0; i < dots.length; i++) { + const orig = dots[i]!; + const rec = recovered[i]!; + expect(Array.from(rec.subarray(0, 32))).toEqual(Array.from(orig.subarray(0, 32))); + expect(Array.from(rec.subarray(128, 136))).toEqual(Array.from(orig.subarray(128, 136))); + expect(rec[136]).toBe(orig[136]); + expect(Array.from(rec.subarray(137, 153))).toEqual(Array.from(orig.subarray(137, 153))); + } + }, 60_000); + + // 3. Size reduction: dict-compressed batch should be smaller than plain v2 + it('size reduction: dict-compressed batch smaller than uncompressed v2 for 100 correlated DOTs', async () => { + const blsKeypair = createBLSKeypair(); + const dots = await buildVoltageChain(100); + + // Plain v2 (no dict) + const plainFrame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + + // Train dictionary + const samples = await generateTrainingSamples(); + const dictionary = await trainDictionary(samples); + const dictionaryId = sha256(dictionary); + + // Dict-compressed v2 + const dictFrame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + dictionary, + dictionaryId, + }); + + const plainSize = plainFrame.length; + const dictSize = dictFrame.length; + + console.log( + `Size comparison (N=100): plain-v2=${plainSize}B dict-v2=${dictSize}B ` + + `savings=${plainSize - dictSize}B (${(((plainSize - dictSize) / plainSize) * 100).toFixed(1)}%)`, + ); + + // Dictionary compression must produce a smaller frame for correlated voltage data + expect(dictSize).toBeLessThan(plainSize); + }, 120_000); + + // 4. Error: missing registry on dict-compressed frame + it('error: missing registry — throws with message including dict id', async () => { + const dots = await buildVoltageChain(20); + const blsKeypair = createBLSKeypair(); + + const samples = await generateTrainingSamples(); + const dictionary = await trainDictionary(samples); + const dictionaryId = sha256(dictionary); + + const frame = await serializeBatchV2(dots, blsKeypair, { + dictionary, + dictionaryId, + }); + + // Deserialize without passing a registry — must throw with actionable message + await expect( + deserializeBatchV2(frame, blsKeypair.publicKey), + // no registry argument + ).rejects.toThrow(/no dictionaryRegistry/i); + }, 120_000); + + // 5a. Error: dictionary provided without dictionaryId + it('error: dictionary without dictionaryId throws TypeError', async () => { + const dots = await buildVoltageChain(5); + const blsKeypair = createBLSKeypair(); + const samples = await generateTrainingSamples(); + const dictionary = await trainDictionary(samples); + + await expect( + serializeBatchV2(dots, blsKeypair, { dictionary }), + ).rejects.toThrow(/dictionaryId/i); + }, 120_000); + + // 5b. Error: dictionaryId provided without dictionary + it('error: dictionaryId without dictionary throws TypeError', async () => { + const dots = await buildVoltageChain(5); + const blsKeypair = createBLSKeypair(); + const dictionaryId = new Uint8Array(32).fill(1); + + await expect( + serializeBatchV2(dots, blsKeypair, { dictionaryId }), + ).rejects.toThrow(/dictionary/i); + }, 120_000); + + // 5. Error: wrong dictionary in registry + it('error: wrong dictionary in registry — throws with unknown id', async () => { + const dots = await buildVoltageChain(20); + const blsKeypair = createBLSKeypair(); + + // Train dict1 (used to serialize) + const samples1 = await generateTrainingSamples(); + const dict1 = await trainDictionary(samples1); + const dictId1 = sha256(dict1); + + // Train a different dict2 (put in registry instead of dict1) + const samples2 = await generateTrainingSamples(); + const dict2 = await trainDictionary(samples2); + + const frame = await serializeBatchV2(dots, blsKeypair, { + dictionary: dict1, + dictionaryId: dictId1, + }); + + // Registry only has dict2, not dict1 + const registry = new DictionaryRegistry(); + await registry.register(dict2, 'voltage-sensor-wrong'); + + await expect( + deserializeBatchV2(frame, blsKeypair.publicKey, registry), + ).rejects.toThrow(/unknown dictionary id=/i); + }, 120_000); + + // 6. BLS verification passes on roundtrip with dictionary + it('BLS verification passes explicitly on roundtrip with dictionary', async () => { + const dots = await buildVoltageChain(30); + const blsKeypair = createBLSKeypair(); + + const samples = await generateTrainingSamples(); + const dictionary = await trainDictionary(samples); + const dictionaryId = sha256(dictionary); + + const frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + dictionary, + dictionaryId, + }); + + const registry = new DictionaryRegistry(); + await registry.register(dictionary, 'voltage-sensor-v1'); + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey, registry); + + expect(recovered.length).toBe(dots.length); + + // Explicitly reconstruct signedBytes and call verifyAggregateSameSigner + // This mirrors what the deserializer does internally, making the BLS pass explicit. + const OFF_PUBKEY = 0; + const OFF_CHAIN = 96; + const PUBKEY_SIZE = 32; + const DOT_SIZE = 153; + + function extractSignedBytes(dot: Uint8Array): Uint8Array { + const out = new Uint8Array(89); + out.set(dot.subarray(OFF_PUBKEY, OFF_PUBKEY + PUBKEY_SIZE), 0); + out.set(dot.subarray(OFF_CHAIN, DOT_SIZE), 32); + return out; + } + + // The recovered DOTs have the aggSig in [32..79] (48B zero-padded to 64B). + // Extract the 48-byte aggSig from the first recovered dot's sig field. + const aggSig = recovered[0]!.subarray(32, 80); // first 48B of sig field + + const signedMessages = recovered.map(dot => { + // For verification we need the zeroed-sig form of each dot. + // The deserializer returns DOTs with aggSig in sig field, but BLS was + // computed over zeroed-sig DOTs. Reconstruct zeroed-sig form here. + const zeroed = dot.slice(); + zeroed.fill(0, 32, 96); // zero out sig field [32..95] + return extractSignedBytes(zeroed); + }); + + const valid = verifyAggregateSameSigner(aggSig, signedMessages, blsKeypair.publicKey); + expect(valid).toBe(true); + }, 120_000); + +}); diff --git a/packages/compression/src/tests/batch-v2-predict.test.ts b/packages/compression/src/tests/batch-v2-predict.test.ts new file mode 100644 index 000000000..d0f30476c --- /dev/null +++ b/packages/compression/src/tests/batch-v2-predict.test.ts @@ -0,0 +1,333 @@ +/** + * Tests for predictor + rANS payload coding in batch v2. + * + * FLAG_PREDICTION (bit 4) integrates the PayloadPredictor + rANS entropy coder + * into the batch v2 wire format, replacing the raw payload column with: + * - prediction metadata header (predictor_model_id + frequency_table) + * - rANS-encoded XOR residuals + */ + +import { describe, it, expect } from 'vitest'; +import { + createKeypair, + createDOT, + toBytes, + DotType, + createBLSKeypair, + verifyAggregateSameSigner, +} from '@dotprotocol/core'; +import { + serializeBatchV2, + deserializeBatchV2, + FLAG_PREDICTION, + FLAG_DICT_COMPRESSED, +} from '../batch-v2.js'; +import { LinearPredictor, NullPredictor, LastValuePredictor } from '../predictor.js'; + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +/** + * Build a chain of N DOTs with voltage-style payloads: steadily-incrementing + * 16-byte payloads that the LinearPredictor will compress well. + */ +async function buildVoltageChain(n: number): Promise { + const keypair = await createKeypair(); + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + + for (let i = 0; i < n; i++) { + // Voltage-style: payload bytes 0..15 each increment by 1 per step + const payload = new Uint8Array(16); + for (let b = 0; b < 16; b++) { + payload[b] = (i + b) & 0xff; + } + + const dot = await createDOT({ + keypair, + type: DotType.PUBLIC, + ts: 1_700_000_000_000 + i * 100, + payload, + ...(prev ? { previous: prev } : {}), + }); + const bytes = toBytes(dot); + dots.push(bytes); + prev = bytes; + } + + return dots; +} + +/** + * Build a chain of N DOTs with temperature-style payloads: slowly-changing, + * mostly-constant payloads suited to LastValuePredictor. + */ +async function buildTemperatureChain(n: number): Promise { + const keypair = await createKeypair(); + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + + for (let i = 0; i < n; i++) { + // Temperature: first 4 bytes change slowly (every 10 steps), rest are fixed + const payload = new Uint8Array(16); + const temp = Math.floor(i / 10); // 0..9 for n=100 + payload[0] = temp & 0xff; + payload[1] = 0x42; // fixed + payload[2] = 0x00; // fixed + payload[3] = 0x00; // fixed + // bytes 4..15 stay zero + + const dot = await createDOT({ + keypair, + type: DotType.PUBLIC, + ts: 1_700_000_000_000 + i * 1000, + payload, + ...(prev ? { previous: prev } : {}), + }); + const bytes = toBytes(dot); + dots.push(bytes); + prev = bytes; + } + + return dots; +} + +/** + * Build a chain of N DOTs with random payloads (crypto-quality random bytes). + * The LinearPredictor will not compress this better than NullPredictor. + */ +async function buildRandomChain(n: number): Promise { + const keypair = await createKeypair(); + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + + for (let i = 0; i < n; i++) { + const payload = new Uint8Array(16); + // Use Math.random for repeatability in tests (not crypto — just needs to be non-predictable) + for (let b = 0; b < 16; b++) { + payload[b] = Math.floor(Math.random() * 256); + } + + const dot = await createDOT({ + keypair, + type: DotType.PUBLIC, + ts: 1_700_000_000_000 + i * 100, + payload, + ...(prev ? { previous: prev } : {}), + }); + const bytes = toBytes(dot); + dots.push(bytes); + prev = bytes; + } + + return dots; +} + +// ─── Byte offset constants (from batch v2 wire format) ──────────────────────── +const OFF_PUBKEY = 0; +const PUBKEY_SIZE = 32; +const OFF_FLAGS = 1; + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe('batch v2 predictor + rANS integration', () => { + + // 1. Roundtrip with LinearPredictor: 100 voltage DOTs + it( + 'roundtrip with LinearPredictor: 100 voltage DOTs byte-for-byte match', + async () => { + const dots = await buildVoltageChain(100); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + predictor: new LinearPredictor(), + }); + + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + + expect(recovered.length).toBe(dots.length); + for (let i = 0; i < dots.length; i++) { + const orig = dots[i]!; + const rec = recovered[i]!; + // pubkey + expect(Array.from(rec.subarray(0, 32))).toEqual(Array.from(orig.subarray(0, 32))); + // timestamp + expect(Array.from(rec.subarray(128, 136))).toEqual(Array.from(orig.subarray(128, 136))); + // type + expect(rec[136]).toBe(orig[136]); + // payload — must match exactly + expect(Array.from(rec.subarray(137, 153))).toEqual(Array.from(orig.subarray(137, 153))); + } + }, + 120_000, + ); + + // 2. BLS verification on reconstructed DOTs + it( + 'BLS verification passes on reconstructed DOTs with LinearPredictor', + async () => { + const dots = await buildVoltageChain(20); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + predictor: new LinearPredictor(), + }); + + // deserializeBatchV2 verifies BLS internally — if it throws, the test fails + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + expect(recovered.length).toBe(20); + }, + 120_000, + ); + + // 3. Roundtrip with LastValuePredictor: 100 temperature DOTs + it( + 'roundtrip with LastValuePredictor: 100 temperature DOTs payload preserved', + async () => { + const dots = await buildTemperatureChain(100); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + predictor: new LastValuePredictor(), + }); + + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + + expect(recovered.length).toBe(dots.length); + for (let i = 0; i < dots.length; i++) { + expect(Array.from(recovered[i]!.subarray(137, 153))).toEqual( + Array.from(dots[i]!.subarray(137, 153)), + ); + } + }, + 120_000, + ); + + // 4. Auto predictor: voltage → FLAG_PREDICTION is set + it( + 'auto predictor: voltage chain sets FLAG_PREDICTION in serialized frame', + async () => { + const dots = await buildVoltageChain(50); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + predictor: 'auto', + }); + + // Check that FLAG_PREDICTION (bit 4 = 0x10) is set in the flags byte + const flags = frame[OFF_FLAGS]!; + expect(flags & FLAG_PREDICTION).toBe(FLAG_PREDICTION); + + // Also verify roundtrip works + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + expect(recovered.length).toBe(dots.length); + for (let i = 0; i < dots.length; i++) { + expect(Array.from(recovered[i]!.subarray(137, 153))).toEqual( + Array.from(dots[i]!.subarray(137, 153)), + ); + } + }, + 120_000, + ); + + // 5. Auto predictor: random → FLAG_PREDICTION is set, modelId=0x00 (NullPredictor) + it( + 'auto predictor: random chain sets FLAG_PREDICTION with NullPredictor (modelId=0x00)', + async () => { + const dots = await buildRandomChain(50); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + predictor: 'auto', + }); + + // FLAG_PREDICTION must be set regardless of which predictor was chosen + const flags = frame[OFF_FLAGS]!; + expect(flags & FLAG_PREDICTION).toBe(FLAG_PREDICTION); + + // Roundtrip must still work + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + expect(recovered.length).toBe(dots.length); + for (let i = 0; i < dots.length; i++) { + expect(Array.from(recovered[i]!.subarray(137, 153))).toEqual( + Array.from(dots[i]!.subarray(137, 153)), + ); + } + }, + 120_000, + ); + + // 6. Size: voltage with prediction < voltage without prediction + it( + 'size: voltage batch with prediction is smaller than without prediction', + async () => { + const dots = await buildVoltageChain(100); + const blsKeypair = createBLSKeypair(); + + const withPrediction = await serializeBatchV2(dots, blsKeypair, { + predictor: new LinearPredictor(), + }); + + const withoutPrediction = await serializeBatchV2(dots, blsKeypair, { + // no predictor — raw payloads + }); + + console.log( + `Voltage 100 DOTs: with_prediction=${withPrediction.length}B without=${withoutPrediction.length}B`, + ); + + // Prediction should reduce the payload column size for voltage data + expect(withPrediction.length).toBeLessThan(withoutPrediction.length); + }, + 120_000, + ); + + // 7. Error: predictor + dictionary both set → throws TypeError + it( + 'throws TypeError when both predictor and dictionary are set', + async () => { + const dots = await buildVoltageChain(10); + const blsKeypair = createBLSKeypair(); + const fakeDict = new Uint8Array(1024); + const fakeDictId = new Uint8Array(32).fill(0x42); + + await expect( + serializeBatchV2(dots, blsKeypair, { + predictor: new LinearPredictor(), + dictionary: fakeDict, + dictionaryId: fakeDictId, + }), + ).rejects.toThrow(TypeError); + }, + 30_000, + ); + + // 8. Roundtrip: 1 DOT with prediction works + it( + 'roundtrip: single-DOT batch with LinearPredictor works', + async () => { + const dots = await buildVoltageChain(1); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + predictor: new LinearPredictor(), + }); + + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + + expect(recovered.length).toBe(1); + expect(recovered[0]!.length).toBe(153); + expect(Array.from(recovered[0]!.subarray(0, 32))).toEqual( + Array.from(dots[0]!.subarray(0, 32)), + ); + expect(Array.from(recovered[0]!.subarray(128, 136))).toEqual( + Array.from(dots[0]!.subarray(128, 136)), + ); + expect(recovered[0]![136]).toBe(dots[0]![136]); + expect(Array.from(recovered[0]!.subarray(137, 153))).toEqual( + Array.from(dots[0]!.subarray(137, 153)), + ); + }, + 60_000, + ); + +}); diff --git a/packages/compression/src/tests/batch-v2.test.ts b/packages/compression/src/tests/batch-v2.test.ts new file mode 100644 index 000000000..32c26929b --- /dev/null +++ b/packages/compression/src/tests/batch-v2.test.ts @@ -0,0 +1,279 @@ +import { describe, it, expect } from 'vitest'; +import { + createKeypair, + createDOT, + toBytes, + DotType, + createBLSKeypair, + batchPackBLS, +} from '@dotprotocol/core'; +import { serializeBatchV2, deserializeBatchV2 } from '../batch-v2.js'; + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +/** + * Build a chain of N DOTs starting from genesis (no previous). + * All DOTs share the same Ed25519 keypair. + */ +async function buildChain(n: number, tsStart = 1_700_000_000_000): Promise { + const keypair = await createKeypair(); + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + + for (let i = 0; i < n; i++) { + const dot = await createDOT({ + keypair, + type: DotType.PUBLIC, + ts: tsStart + i * 100, // 100ms intervals + ...(prev ? { previous: prev } : {}), + }); + const bytes = toBytes(dot); + dots.push(bytes); + prev = bytes; + } + + return dots; +} + +/** + * Build a chain of N DOTs with varied types to stress the type RLE. + */ +async function buildMixedTypeChain(n: number): Promise { + const keypair = await createKeypair(); + const types = [DotType.PUBLIC, DotType.CIRCLE, DotType.PRIVATE, DotType.EPHEMERAL]; + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + + for (let i = 0; i < n; i++) { + const type = types[i % types.length]!; + const dot = await createDOT({ + keypair, + type, + ts: Date.now() + i * 1000, + payload: new Uint8Array(16).fill(i & 0xff), + ...(prev ? { previous: prev } : {}), + }); + const bytes = toBytes(dot); + dots.push(bytes); + prev = bytes; + } + + return dots; +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe('batch v2 serializer', () => { + + // 1. Basic roundtrip — 10 DOTs, defaults (delta + RLE enabled) + it('basic roundtrip: 10 DOTs serialize/deserialize to identical bytes', async () => { + const dots = await buildChain(10); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair); + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + + expect(recovered.length).toBe(dots.length); + for (let i = 0; i < dots.length; i++) { + expect(recovered[i]!.length).toBe(153); + // The reconstructed DOT must have matching pubkey, type, payload, and timestamp. + // (chain hash and sig field differ from original — batch v2 uses aggSig and + // genesis-anchored reconstruction, which is a Sprint 1 design constraint) + const orig = dots[i]!; + const rec = recovered[i]!; + // pubkey [0..31] + expect(Array.from(rec.subarray(0, 32))).toEqual(Array.from(orig.subarray(0, 32))); + // timestamp [128..135] + expect(Array.from(rec.subarray(128, 136))).toEqual(Array.from(orig.subarray(128, 136))); + // type [136] + expect(rec[136]).toBe(orig[136]); + // payload [137..152] + expect(Array.from(rec.subarray(137, 153))).toEqual(Array.from(orig.subarray(137, 153))); + } + }, 30_000); + + // 2. Flags roundtrip — 100 DOTs, both flags enabled, BLS verification passes + it('flags roundtrip: 100 DOTs with both flags enabled — BLS passes', async () => { + const dots = await buildChain(100); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + + expect(recovered.length).toBe(100); + for (const dot of recovered) { + expect(dot.length).toBe(153); + } + // If BLS fails, deserializeBatchV2 throws — so getting here means it passed. + }, 60_000); + + // 3. No-RLE roundtrip — payloadTypeRLE disabled + it('no-RLE roundtrip: payloadTypeRLE:false still round-trips', async () => { + const dots = await buildChain(20); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: false, + }); + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + + expect(recovered.length).toBe(20); + for (let i = 0; i < dots.length; i++) { + const orig = dots[i]!; + const rec = recovered[i]!; + expect(rec[136]).toBe(orig[136]); + expect(Array.from(rec.subarray(137, 153))).toEqual(Array.from(orig.subarray(137, 153))); + } + }, 30_000); + + // 4. No-delta roundtrip — timestampDelta disabled + it('no-delta roundtrip: timestampDelta:false still round-trips', async () => { + const dots = await buildChain(20); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: false, + payloadTypeRLE: true, + }); + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + + expect(recovered.length).toBe(20); + for (let i = 0; i < dots.length; i++) { + const orig = dots[i]!; + const rec = recovered[i]!; + // timestamp preserved + expect(Array.from(rec.subarray(128, 136))).toEqual(Array.from(orig.subarray(128, 136))); + expect(Array.from(rec.subarray(137, 153))).toEqual(Array.from(orig.subarray(137, 153))); + } + }, 30_000); + + // 5. BLS verification — tamper payload byte → must throw + it('BLS verification: tampered payload causes deserialize to throw', async () => { + const dots = await buildChain(10); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair); + + // Tamper a byte in the payload section (last 160 bytes of the frame) + const tampered = frame.slice(); + tampered[tampered.length - 5] ^= 0xff; + + await expect(deserializeBatchV2(tampered, blsKeypair.publicKey)).rejects.toThrow( + 'BLS aggregate signature verification failed', + ); + }, 30_000); + + // 6. Size comparison: batch v2 vs batch v1 BLS for periodic DOTs + // + // v1 BLS layout: header(44) + N×entry(18) + aggSig(48) + // N=100: 44 + 1800 + 48 = 1892 B + // v2 layout: header(86) + ts_delta(8+99×2) + type_rle(2) + payloads(N×16) + // N=100: 86 + 206 + 2 + 1600 = 1894 B ← 2B larger due to bigger fixed header + // N=200: 86 + 8+199×2 + 2 + 3200 = 3704 B vs v1: 44+3600+48 = 3692 B (still v1 wins for uniform types) + // + // v2 wins when timestamp deltas compress more than v1's 1B-per-delta, + // and when payloads are diverse enough to reward column layout. + // For homogeneous periodic streams, v1 and v2 are within 1% — within expected margin. + // + // Test: v2 is within 1% of v1 for N=100 periodic DOTs (acceptable overhead). + it('size comparison: v2 frame is within 1% of v1 BLS frame for 100 periodic DOTs', async () => { + const keypair = await createKeypair(); + const blsKeypair = createBLSKeypair(); + + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + const N = 100; + + for (let i = 0; i < N; i++) { + const dot = await createDOT({ + keypair, + type: DotType.PUBLIC, + ts: 1_700_000_000_000 + i * 100, + ...(prev ? { previous: prev } : {}), + }); + const bytes = toBytes(dot); + dots.push(bytes); + prev = bytes; + } + + // v1 BLS: header(44) + N×18 + aggSig(48) + const v1Frame = await batchPackBLS(dots, blsKeypair); + const v2Frame = await serializeBatchV2(dots, blsKeypair); + + // v2 should be within 1% of v1 for this worst-case scenario + const overhead = (v2Frame.length - v1Frame.length) / v1Frame.length; + expect(overhead).toBeLessThanOrEqual(0.01); // ≤1% overhead + + // Log sizes for visibility + const diff = v2Frame.length - v1Frame.length; + console.log( + `Size comparison (N=${N}): v1=${v1Frame.length}B v2=${v2Frame.length}B diff=${diff > 0 ? '+' : ''}${diff}B (${(overhead * 100).toFixed(2)}%)` + ); + }, 60_000); + + // 7. Edge case: 1 DOT roundtrip + it('edge case: 1 DOT roundtrips correctly', async () => { + const dots = await buildChain(1); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair); + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + + expect(recovered.length).toBe(1); + expect(recovered[0]!.length).toBe(153); + + const orig = dots[0]!; + const rec = recovered[0]!; + expect(Array.from(rec.subarray(0, 32))).toEqual(Array.from(orig.subarray(0, 32))); + expect(Array.from(rec.subarray(128, 136))).toEqual(Array.from(orig.subarray(128, 136))); + expect(rec[136]).toBe(orig[136]); + expect(Array.from(rec.subarray(137, 153))).toEqual(Array.from(orig.subarray(137, 153))); + }, 15_000); + + // 8. Mixed types — validates type column decoding with RLE of varied types + it('mixed types: 20 DOTs with alternating types roundtrip via RLE', async () => { + const dots = await buildMixedTypeChain(20); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + + expect(recovered.length).toBe(dots.length); + for (let i = 0; i < dots.length; i++) { + // Type must be preserved + expect(recovered[i]![136]).toBe(dots[i]![136]); + // Payload must be preserved + expect(Array.from(recovered[i]!.subarray(137, 153))).toEqual( + Array.from(dots[i]!.subarray(137, 153)), + ); + } + }, 30_000); + + // 9. Validation: empty array throws + it('validation: empty dots array throws RangeError', async () => { + const blsKeypair = createBLSKeypair(); + await expect(serializeBatchV2([], blsKeypair)).rejects.toThrow(RangeError); + }); + + // 10. Validation: wrong version in frame throws + it('validation: wrong version byte throws', async () => { + const dots = await buildChain(5); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair); + const bad = frame.slice(); + bad[0] = 0x01; // wrong version + + await expect(deserializeBatchV2(bad, blsKeypair.publicKey)).rejects.toThrow( + 'unsupported version', + ); + }, 15_000); + +}); diff --git a/packages/compression/src/tests/benchmark-full.test.ts b/packages/compression/src/tests/benchmark-full.test.ts new file mode 100644 index 000000000..8861db874 --- /dev/null +++ b/packages/compression/src/tests/benchmark-full.test.ts @@ -0,0 +1,390 @@ +/** + * Full compression pipeline benchmark — Levels 0-3 across 5 datasets. + * + * Compression levels: + * Level 0: Raw DOTs = 153 bytes/DOT + * Level 1: Batch v2 plain (ts_delta + type_rle) = ~18.9 B/DOT + * Level 2: Batch v2 + zstd dictionary = varies, much lower for correlated data + * Level 3: Batch v2 + predictor (auto) + rANS = varies + * + * Datasets (1000 DOTs each): + * 1. kulhadVoltage — periodic voltage ~0.497V, correlated + * 2. temperature — temperature ~20°C, correlated + * 3. gps — GPS near Mumbai, correlated + * 4. random — random payloads, incompressible + * 5. mixed — 500 voltage + 500 temperature interleaved + * + * Assertions: + * 1. Correlated datasets: Level 2 ≤ Level 1 × 0.7 (dict saves ≥ 30%) + * 2. Correlated datasets: Level 3 ≤ Level 2 (prediction + rANS at least as good as dict) + * 3. Random dataset: Level 1 ≤ Level 0 (batch always helps vs raw) + * 4. All measurements > 0 + */ + +import { describe, it, expect } from 'vitest'; +import { createHash } from 'node:crypto'; +import { gzipSync } from 'node:zlib'; +import { + createKeypair, + createDOT, + toBytes, + DotType, + createBLSKeypair, +} from '@dotprotocol/core'; +import { serializeBatchV2 } from '../batch-v2.js'; +import { trainDictionary } from '../zstd.js'; +import { generateSensorStream } from '../sample-generator.js'; + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +/** Compute SHA-256 of bytes (sync, via node:crypto). */ +function sha256(data: Uint8Array): Uint8Array { + return new Uint8Array(createHash('sha256').update(data).digest()); +} + +/** Concatenate all DOT bytes into a single Uint8Array. */ +function concatDots(dots: Uint8Array[]): Uint8Array { + const total = new Uint8Array(dots.length * 153); + for (let i = 0; i < dots.length; i++) { + total.set(dots[i]!, i * 153); + } + return total; +} + +/** + * Compute Weissman Score for a given compression method vs gzip baseline. + * + * W = (compression_ratio_method / compression_ratio_gzip) × (log(speed_method) / log(speed_gzip)) + * + * For this benchmark we use the simplified ratio-only version (speeds not measured): + * W = ratio_method / ratio_gzip + */ +function weissmanScore(methodBytes: number, gzipBytes: number, rawBytes: number): number { + const ratioMethod = rawBytes / methodBytes; + const ratioGzip = rawBytes / gzipBytes; + return ratioMethod / ratioGzip; +} + +// ─── Dataset Generation ─────────────────────────────────────────────────────── + +/** Generate 1000 mixed DOTs: 500 voltage (even indices) + 500 temperature (odd indices). */ +async function generateMixedDataset(): Promise { + const keypair = await createKeypair(); + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + const baseTs = Date.now(); + + for (let i = 0; i < 1000; i++) { + const payload = new Uint8Array(16); + const view = new DataView(payload.buffer); + if (i % 2 === 0) { + // voltage: ~0.497V ±0.015 + view.setFloat32(0, 0.497 + (Math.random() - 0.5) * 0.03, true); + } else { + // temperature: ~20°C ±2°C + view.setFloat32(0, 20.0 + (Math.random() - 0.5) * 4.0, true); + } + const ts = baseTs + i * 100; + + const dot = await createDOT({ + keypair, + payload, + type: DotType.PUBLIC, + ts, + ...(prev ? { previous: prev } : {}), + }); + const buf = toBytes(dot); + dots.push(buf); + prev = buf; + } + + return dots; +} + +// ─── Dictionary Training ────────────────────────────────────────────────────── + +/** + * Build 10 training samples for dictionary training from a sensor profile. + * Each sample is the body (bytes after 86B header) of a 100-DOT batch v2 frame. + */ +async function buildDictTrainingSamples( + profile: 'kulhadVoltage' | 'temperature' | 'gps' | 'random', + samplesCount = 10, +): Promise { + const blsKeypair = createBLSKeypair(); + const samples: Uint8Array[] = []; + + for (let b = 0; b < samplesCount; b++) { + const chain = await generateSensorStream({ count: 100, profile }); + const frame = await serializeBatchV2(chain, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + // Slice off the 86-byte header to get the raw body + samples.push(frame.slice(86)); + } + + return samples; +} + +/** + * Build 10 training samples for the mixed dataset. + * Each sample is the body of a 100-DOT mixed (voltage+temp interleaved) batch. + */ +async function buildMixedDictTrainingSamples(samplesCount = 10): Promise { + const blsKeypairTrain = createBLSKeypair(); + const samples: Uint8Array[] = []; + + for (let b = 0; b < samplesCount; b++) { + const keypair = await createKeypair(); + const miniDots: Uint8Array[] = []; + let miniPrev: Uint8Array | undefined; + const miniBase = Date.now() + b * 50000; + + for (let i = 0; i < 100; i++) { + const p = new Uint8Array(16); + const v = new DataView(p.buffer); + if (i % 2 === 0) { + v.setFloat32(0, 0.497 + (Math.random() - 0.5) * 0.03, true); + } else { + v.setFloat32(0, 20.0 + (Math.random() - 0.5) * 4.0, true); + } + const miniDot = await createDOT({ + keypair, + payload: p, + type: DotType.PUBLIC, + ts: miniBase + i * 100, + ...(miniPrev ? { previous: miniPrev } : {}), + }); + const miniBuf = toBytes(miniDot); + miniDots.push(miniBuf); + miniPrev = miniBuf; + } + + const miniFrame = await serializeBatchV2(miniDots, blsKeypairTrain, { + timestampDelta: true, + payloadTypeRLE: true, + }); + samples.push(miniFrame.slice(86)); + } + + return samples; +} + +// ─── Measurement ────────────────────────────────────────────────────────────── + +interface LevelResult { + bytesTotal: number; + bytesPerDot: number; +} + +interface DatasetResult { + label: string; + n: number; + level0: LevelResult; // Raw + level1: LevelResult; // Batch v2 plain + level2: LevelResult; // Batch v2 + dict + level3: LevelResult; // Batch v2 + predict + rANS + gzipBytes: number; + level3WeissmanVsGzip: number; + level2AutoDisabled: boolean; +} + +/** + * Measure all 4 levels for a given DOT array. + * Returns per-level byte totals and bytes/DOT. + */ +async function measureAllLevels( + dots: Uint8Array[], + label: string, + trainingSamples: Uint8Array[], +): Promise { + const n = dots.length; + const blsKeypair = createBLSKeypair(); + + // Level 0: Raw + const rawBytes = n * 153; + + // Level 1: Batch v2 plain (ts_delta + type_rle, no dict, no predict) + const level1Frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + const level1Bytes = level1Frame.length; + + // Dictionary training (for Level 2) + const dictionary = await trainDictionary(trainingSamples); + const dictionaryId = sha256(dictionary); + + // Level 2: Batch v2 + zstd dictionary + const level2FrameRaw = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + dictionary, + dictionaryId, + }); + const level2AutoDisabled = level2FrameRaw.length >= level1Bytes; + const level2Bytes = level2AutoDisabled ? level1Bytes : level2FrameRaw.length; + + // Level 3: Batch v2 + predictor (auto) + rANS + const level3Frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + predictor: 'auto', + }); + const level3Bytes = level3Frame.length; + + // Gzip baseline on concatenated raw DOTs + const gzipResult = gzipSync(concatDots(dots)); + const gzipBytes = gzipResult.length; + + // Weissman Score: Level 3 vs gzip + const level3Weissman = weissmanScore(level3Bytes, gzipBytes, rawBytes); + + return { + label, + n, + level0: { bytesTotal: rawBytes, bytesPerDot: 153 }, + level1: { bytesTotal: level1Bytes, bytesPerDot: level1Bytes / n }, + level2: { bytesTotal: level2Bytes, bytesPerDot: level2Bytes / n }, + level3: { bytesTotal: level3Bytes, bytesPerDot: level3Bytes / n }, + gzipBytes, + level3WeissmanVsGzip: level3Weissman, + level2AutoDisabled, + }; +} + +// ─── Table Formatting ───────────────────────────────────────────────────────── + +function formatCompressTable(results: DatasetResult[]): string { + const lines: string[] = []; + + const border = '┌────────────────────┬───────────┬───────────┬───────────┬───────────┐'; + const header = '│ Dataset │ Level 0 │ Level 1 │ Level 2 │ Level 3 │'; + const subhdr = '│ │ Raw │ Plain v2 │ + Dict │ + Predict │'; + const midLine = '├────────────────────┼───────────┼───────────┼───────────┼───────────┤'; + const bottom = '└────────────────────┴───────────┴───────────┴───────────┴───────────┘'; + + lines.push(border); + lines.push(header); + lines.push(subhdr); + lines.push(midLine); + + for (const r of results) { + const l2Str = r.level2AutoDisabled + ? r.level2.bytesPerDot.toFixed(1) + ' B*' + : r.level2.bytesPerDot.toFixed(1) + ' B '; + const row = + `│ ${r.label.padEnd(18)} │ ${(r.level0.bytesPerDot.toFixed(1) + ' B').padStart(9)} │ ${(r.level1.bytesPerDot.toFixed(1) + ' B').padStart(9)} │ ${l2Str.padStart(9)} │ ${(r.level3.bytesPerDot.toFixed(1) + ' B').padStart(9)} │`; + lines.push(row); + } + + lines.push(bottom); + lines.push('* dict/prediction auto-disabled for random (no benefit)'); + return lines.join('\n'); +} + +function formatWeissmanTable(results: DatasetResult[]): string { + const lines: string[] = []; + lines.push('\n=== Weissman Score: Level 3 vs Gzip ==='); + lines.push(`| ${'Dataset'.padEnd(18)} | ${'Gzip B/DOT'.padStart(10)} | ${'L3 B/DOT'.padStart(10)} | ${'W Score'.padStart(8)} |`); + lines.push(`|${'-'.repeat(20)}|${'-'.repeat(12)}|${'-'.repeat(12)}|${'-'.repeat(10)}|`); + for (const r of results) { + const gzipBPD = r.gzipBytes / r.n; + lines.push( + `| ${r.label.padEnd(18)} | ${gzipBPD.toFixed(2).padStart(10)} | ${r.level3.bytesPerDot.toFixed(2).padStart(10)} | ${r.level3WeissmanVsGzip.toFixed(3).padStart(8)} |`, + ); + } + return lines.join('\n'); +} + +// ─── The Test ───────────────────────────────────────────────────────────────── + +describe('Full compression pipeline benchmark — Levels 0-3 across 5 datasets', () => { + + it('measures all levels on all 5 datasets and prints results table', async () => { + // ── Generate datasets ─────────────────────────────────────────────────── + + console.log('\nGenerating datasets (1000 DOTs each)...'); + + const [ + voltDots, + tempDots, + gpsDots, + randDots, + mixedDots, + ] = await Promise.all([ + generateSensorStream({ count: 1000, profile: 'kulhadVoltage' }), + generateSensorStream({ count: 1000, profile: 'temperature' }), + generateSensorStream({ count: 1000, profile: 'gps' }), + generateSensorStream({ count: 1000, profile: 'random' }), + generateMixedDataset(), + ]); + + // ── Build training samples for dictionary (sequential, zstd --train is CPU-bound) ── + + console.log('Building dictionary training samples...'); + const voltSamples = await buildDictTrainingSamples('kulhadVoltage', 10); + const tempSamples = await buildDictTrainingSamples('temperature', 10); + const gpsSamples = await buildDictTrainingSamples('gps', 10); + const randSamples = await buildDictTrainingSamples('random', 10); + const mixedSamples = await buildMixedDictTrainingSamples(10); + + // ── Measure all levels ────────────────────────────────────────────────── + + console.log('Measuring compression levels...'); + + const results: DatasetResult[] = []; + + results.push(await measureAllLevels(voltDots, 'Kulhad voltage', voltSamples)); + results.push(await measureAllLevels(tempDots, 'Temperature', tempSamples)); + results.push(await measureAllLevels(gpsDots, 'GPS', gpsSamples)); + results.push(await measureAllLevels(randDots, 'Random', randSamples)); + results.push(await measureAllLevels(mixedDots, 'Mixed', mixedSamples)); + + // ── Print results ─────────────────────────────────────────────────────── + + console.log('\n' + formatCompressTable(results)); + console.log(formatWeissmanTable(results)); + + // Additional per-dataset detail + console.log('\n=== Per-Dataset Compression Details ==='); + for (const r of results) { + const l2Status = r.level2AutoDisabled ? '[dict auto-disabled]' : `[dict saves ${(((r.level1.bytesTotal - r.level2.bytesTotal) / r.level1.bytesTotal) * 100).toFixed(1)}%]`; + console.log(`${r.label}: L0=${r.level0.bytesPerDot.toFixed(1)} L1=${r.level1.bytesPerDot.toFixed(1)} L2=${r.level2.bytesPerDot.toFixed(1)} ${l2Status} L3=${r.level3.bytesPerDot.toFixed(1)} W=${r.level3WeissmanVsGzip.toFixed(3)}`); + } + + // ── Assertions ────────────────────────────────────────────────────────── + + // All measurements > 0 + for (const r of results) { + expect(r.level0.bytesTotal).toBeGreaterThan(0); + expect(r.level1.bytesTotal).toBeGreaterThan(0); + expect(r.level2.bytesTotal).toBeGreaterThan(0); + expect(r.level3.bytesTotal).toBeGreaterThan(0); + } + + // Correlated datasets: dict saves ≥ 30% over plain v2 + const correlatedResults = results.filter(r => + r.label === 'Kulhad voltage' || r.label === 'Temperature' || r.label === 'GPS', + ); + for (const r of correlatedResults) { + const dictSavings = (r.level1.bytesTotal - r.level2.bytesTotal) / r.level1.bytesTotal; + expect(dictSavings).toBeGreaterThanOrEqual(0.30); + } + + // Correlated datasets: Level 3 achieves better compression than plain batch (Level 1) + // Note: dictionary (Level 2) typically beats prediction+rANS on structured float data because + // the 513-byte freq-table metadata overhead is amortized less well than a shared dictionary. + // The important property is Level 3 < Level 1 (prediction helps over plain batch encoding). + for (const r of correlatedResults) { + expect(r.level3.bytesTotal).toBeLessThanOrEqual(r.level1.bytesTotal); + } + + // Random dataset: Level 1 ≤ Level 0 (batch always helps vs raw — header overhead is tiny) + const randomResult = results.find(r => r.label === 'Random')!; + expect(randomResult.level1.bytesTotal).toBeLessThanOrEqual(randomResult.level0.bytesTotal); + + }, 300_000); // 5 min timeout — dictionary training + BLS signing is slow + +}); diff --git a/packages/compression/src/tests/benchmark-phase2a.test.ts b/packages/compression/src/tests/benchmark-phase2a.test.ts new file mode 100644 index 000000000..97464458d --- /dev/null +++ b/packages/compression/src/tests/benchmark-phase2a.test.ts @@ -0,0 +1,264 @@ +/** + * Phase 2a Benchmark — Compression ratio measurements across realistic sensor datasets. + * + * Three datasets: + * 1. Periodic sensor — 1000 DOTs at exactly 100ms intervals + * 2. Irregular sensor — 1000 DOTs at random 50–500ms intervals + * 3. Burst sensor — 100 bursts × 10 DOTs (1ms within burst, 1000ms between) + * + * Measurements per dataset: + * - Raw: N × 153 bytes + * - Batch v1 BLS: batchPackBLS (Phase 1 implementation) + * - Batch v2: serializeBatchV2 (Phase 2, both flags enabled) + * + * This is a benchmark test, not a correctness test. It asserts realistic thresholds + * and logs compression tables for human review. + */ + +import { describe, it, expect } from 'vitest'; +import { + createKeypair, + createDOT, + toBytes, + DotType, + createBLSKeypair, + batchPackBLS, +} from '@dotprotocol/core'; +import { serializeBatchV2 } from '../batch-v2.js'; + +// ─── Table Formatting ───────────────────────────────────────────────────────── + +interface TableRow { + name: string; + size: number; + perDot: number; + ratio: number; +} + +function formatTable(rows: TableRow[], n: number): string { + const header = `| ${'Method'.padEnd(20)} | ${'Size (B)'.padStart(8)} | ${'Bytes/DOT'.padStart(9)} | ${'Ratio'.padStart(6)} |`; + const divider = `|${'-'.repeat(22)}|${'-'.repeat(10)}|${'-'.repeat(11)}|${'-'.repeat(8)}|`; + const rowLines = rows.map( + r => + `| ${r.name.padEnd(20)} | ${String(r.size).padStart(8)} | ${r.perDot.toFixed(1).padStart(9)} | ${r.ratio.toFixed(2).padStart(6)}× |`, + ); + return [header, divider, ...rowLines].join('\n'); +} + +// ─── Payload Encoders ───────────────────────────────────────────────────────── + +/** Encode a voltage float64 into 16 bytes (big-endian float64 in [0..7], zeros in [8..15]). */ +function encodeVoltage(v: number): Uint8Array { + const buf = new Uint8Array(16); + new DataView(buf.buffer).setFloat64(0, v, false); // big-endian + return buf; +} + +/** Encode a random-walk value (float64) into 16 bytes. */ +function encodeRandomWalk(v: number): Uint8Array { + return encodeVoltage(v); +} + +// ─── Corpus Builders ────────────────────────────────────────────────────────── + +/** + * Periodic sensor — 1000 DOTs at exactly 100ms intervals. + * Payload: slowly drifting voltage reading (float64 in first 8 bytes). + */ +async function buildPeriodicCorpus(n = 1000): Promise { + const keypair = await createKeypair(); + const baseTs = Date.now(); + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + + for (let i = 0; i < n; i++) { + const voltage = 0.497 + (i / n) * 0.006; // slow drift 0.497→0.503 + const payload = encodeVoltage(voltage); + const dot = await createDOT({ + keypair, + type: DotType.PUBLIC, + payload, + ts: baseTs + i * 100, // exactly 100ms intervals + ...(prev ? { previous: prev } : {}), + }); + const bytes = toBytes(dot); + dots.push(bytes); + prev = bytes; + } + + return dots; +} + +/** + * Irregular sensor — 1000 DOTs at random 50–500ms intervals. + * Payload: small random walk from previous value. + */ +async function buildIrregularCorpus(n = 1000): Promise { + const keypair = await createKeypair(); + // Use a seeded-ish pseudo-random via simple LCG for reproducibility + let lcg = 42; + const rand = () => { + lcg = (lcg * 1664525 + 1013904223) & 0x7fffffff; + return lcg / 0x7fffffff; + }; + + const baseTs = Date.now(); + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + let ts = baseTs; + let value = 0.5; + + for (let i = 0; i < n; i++) { + ts += Math.floor(50 + rand() * 450); // 50–500ms random interval + value += (rand() - 0.5) * 0.01; // small random walk + value = Math.max(0, Math.min(1, value)); // clamp [0,1] + const payload = encodeRandomWalk(value); + const dot = await createDOT({ + keypair, + type: DotType.PUBLIC, + payload, + ts, + ...(prev ? { previous: prev } : {}), + }); + const bytes = toBytes(dot); + dots.push(bytes); + prev = bytes; + } + + return dots; +} + +/** + * Burst sensor — 100 bursts × 10 DOTs. + * Within a burst: 1ms gaps. Between bursts: 1000ms gap. + */ +async function buildBurstCorpus(bursts = 100, dotsPerBurst = 10): Promise { + const keypair = await createKeypair(); + const baseTs = Date.now(); + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + let ts = baseTs; + + for (let b = 0; b < bursts; b++) { + if (b > 0) ts += 1000; // 1000ms between bursts + for (let d = 0; d < dotsPerBurst; d++) { + if (d > 0) ts += 1; // 1ms within burst + const payload = encodeVoltage(0.5 + b * 0.001); // slowly drifting per burst + const dot = await createDOT({ + keypair, + type: DotType.PUBLIC, + payload, + ts, + ...(prev ? { previous: prev } : {}), + }); + const bytes = toBytes(dot); + dots.push(bytes); + prev = bytes; + } + } + + return dots; +} + +// ─── Measurement Helper ─────────────────────────────────────────────────────── + +async function measureAll( + dots: Uint8Array[], + label: string, +): Promise<{ rows: TableRow[]; v2BytesPerDot: number; v1BytesPerDot: number }> { + const n = dots.length; + const rawSize = n * 153; + + // BLS keypair (shared for v1 and v2) + const blsKeypair = createBLSKeypair(); + + // Batch v1 BLS + const v1Buf = await batchPackBLS(dots, blsKeypair); + const v1Size = v1Buf.length; + const v1BytesPerDot = v1Size / n; + + // Batch v2 (both flags: timestampDelta + payloadTypeRLE) + const v2Buf = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + const v2Size = v2Buf.length; + const v2BytesPerDot = v2Size / n; + + const rows: TableRow[] = [ + { + name: 'Raw (N×153)', + size: rawSize, + perDot: 153, + ratio: 1.0, + }, + { + name: 'Batch v1 BLS', + size: v1Size, + perDot: v1BytesPerDot, + ratio: rawSize / v1Size, + }, + { + name: 'Batch v2 (δts+RLE)', + size: v2Size, + perDot: v2BytesPerDot, + ratio: rawSize / v2Size, + }, + ]; + + console.log(`\n=== ${label} (N=${n}) ===`); + console.log(formatTable(rows, n)); + + return { rows, v2BytesPerDot, v1BytesPerDot }; +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe('Phase 2a benchmark', () => { + it('periodic sensor — 1000 DOTs at 100ms intervals', async () => { + const dots = await buildPeriodicCorpus(1000); + expect(dots).toHaveLength(1000); + + const { v1BytesPerDot, v2BytesPerDot } = await measureAll(dots, 'PERIODIC SENSOR'); + + // Phase 1 BLS should be well below raw 153 bytes/DOT + expect(v1BytesPerDot).toBeLessThan(20); + + // Phase 2 with delta encoding on perfectly periodic data should be very efficient + expect(v2BytesPerDot).toBeLessThan(25); + + console.log(`\nPhase 2a periodic result: v1=${v1BytesPerDot.toFixed(1)} B/DOT, v2=${v2BytesPerDot.toFixed(1)} B/DOT`); + }, 60_000); + + it('irregular sensor — random 50–500ms intervals', async () => { + const dots = await buildIrregularCorpus(1000); + expect(dots).toHaveLength(1000); + + const { v1BytesPerDot, v2BytesPerDot } = await measureAll(dots, 'IRREGULAR SENSOR'); + + // v1 BLS baseline — irregular timestamps use more bytes due to variable-length + // varint encoding of larger random deltas. Measured ~20.3 B/DOT; threshold is 22. + expect(v1BytesPerDot).toBeLessThan(22); + + // v2 with varint delta still beats v1 on irregular data (better timestamp compression) + expect(v2BytesPerDot).toBeLessThan(25); + + console.log(`\nPhase 2a irregular result: v1=${v1BytesPerDot.toFixed(1)} B/DOT, v2=${v2BytesPerDot.toFixed(1)} B/DOT`); + }, 60_000); + + it('burst sensor — 100 bursts × 10 DOTs', async () => { + const dots = await buildBurstCorpus(100, 10); + expect(dots).toHaveLength(1000); + + const { v1BytesPerDot, v2BytesPerDot } = await measureAll(dots, 'BURST SENSOR'); + + // v1 BLS baseline + expect(v1BytesPerDot).toBeLessThan(20); + + // v2: within-burst deltas are tiny (1ms), between-burst deltas are larger (1000ms) + // Mixed delta pattern — still expect reasonable compression + expect(v2BytesPerDot).toBeLessThan(25); + + console.log(`\nPhase 2a burst result: v1=${v1BytesPerDot.toFixed(1)} B/DOT, v2=${v2BytesPerDot.toFixed(1)} B/DOT`); + }, 60_000); +}); diff --git a/packages/compression/src/tests/benchmark-phase2b.test.ts b/packages/compression/src/tests/benchmark-phase2b.test.ts new file mode 100644 index 000000000..5334f5a27 --- /dev/null +++ b/packages/compression/src/tests/benchmark-phase2b.test.ts @@ -0,0 +1,644 @@ +/** + * Phase 2b Benchmark — Dictionary compression across 5 datasets. + * + * Datasets: + * 1. voltage-100 — 100 voltage DOTs (kulhadVoltage, correlated) + * 2. temperature-100 — 100 temperature DOTs (temperature, correlated) + * 3. gps-100 — 100 GPS DOTs (gps, correlated) + * 4. mixed-1000 — 500 voltage + 500 temperature interleaved (correlated, two domains) + * 5. random-1000 — 1000 random payload DOTs (worst case, incompressible) + * + * Measurements per dataset: + * - Raw: N × 153 bytes + * - Batch v2 plain: serializeBatchV2 with timestampDelta + payloadTypeRLE, no dict + * - Batch v2 + dict: same but zstd-compressed with a trained dictionary + * - Gzip baseline: gzipSync on concatenated raw DOT bytes + * + * Acceptance criteria: + * - Correlated datasets (voltage, temperature, gps): dict adds ≥ 30% savings over plain v2 + * - Correlated datasets: dict result ≤ 8 bytes/DOT + * - Random dataset: dict may hurt — auto-disabled if larger than plain v2 (not a failure) + * - All roundtrips: deserializeBatchV2 recovers all DOTs byte-for-byte (payload + ts + type) + */ + +import { describe, it, expect } from 'vitest'; +import { createHash } from 'node:crypto'; +import { gzipSync } from 'node:zlib'; +import { + createKeypair, + createDOT, + toBytes, + DotType, + createBLSKeypair, +} from '@dotprotocol/core'; +import { + serializeBatchV2, + deserializeBatchV2, +} from '../batch-v2.js'; +import { trainDictionary } from '../zstd.js'; +import { DictionaryRegistry } from '../dictionary-registry.js'; +import { generateSensorStream } from '../sample-generator.js'; + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +/** Compute SHA-256 of bytes (sync, via node:crypto). */ +function sha256(data: Uint8Array): Uint8Array { + return new Uint8Array(createHash('sha256').update(data).digest()); +} + +/** Concatenate all DOT bytes into a single Uint8Array for gzip baseline. */ +function concatDots(dots: Uint8Array[]): Uint8Array { + const total = new Uint8Array(dots.length * 153); + for (let i = 0; i < dots.length; i++) { + total.set(dots[i]!, i * 153); + } + return total; +} + +// ─── Table Formatting ───────────────────────────────────────────────────────── + +interface TableRow { + method: string; + sizeBytes: number; + bytesPerDot: number; + ratio: number; +} + +function formatTable(label: string, n: number, rows: TableRow[]): string { + const header = `\n=== ${label} (N=${n}) ===`; + const colHeader = `| ${'Method'.padEnd(26)} | ${'Size(B)'.padStart(8)} | ${'B/DOT'.padStart(7)} | ${'Ratio'.padStart(6)} |`; + const divider = `|${'-'.repeat(28)}|${'-'.repeat(10)}|${'-'.repeat(9)}|${'-'.repeat(8)}|`; + const lines = rows.map( + r => + `| ${r.method.padEnd(26)} | ${String(r.sizeBytes).padStart(8)} | ${r.bytesPerDot.toFixed(1).padStart(7)} | ${r.ratio.toFixed(2).padStart(6)}× |`, + ); + return [header, colHeader, divider, ...lines].join('\n'); +} + +// ─── Core Measurement Utility ───────────────────────────────────────────────── + +interface MeasureResult { + rawSize: number; + plainSize: number; + dictSize: number; + gzipSize: number; + plainBytesPerDot: number; + dictBytesPerDot: number; + dictSavingsPct: number; + dictAutoDisabled: boolean; + rows: TableRow[]; + dictionary: Uint8Array; + dictionaryId: Uint8Array; + registry: DictionaryRegistry; + plainFrame: Uint8Array; + dictFrame: Uint8Array | null; +} + +/** + * Measure compression across Raw / Plain v2 / Dict v2 / Gzip for a given DOT array. + * + * Training samples are built by serializing 20 shorter chains (20 DOTs each) from the + * same profile generator. For the mixed dataset, training samples are the plain frame + * bodies of the full dataset to capture the interleaved pattern. + */ +async function measureCompression( + dots: Uint8Array[], + label: string, + trainingSamples: Uint8Array[], +): Promise { + const n = dots.length; + const blsKeypair = createBLSKeypair(); + + // Raw + const rawSize = n * 153; + + // Gzip baseline + const gzipBytes = gzipSync(concatDots(dots)); + const gzipSize = gzipBytes.length; + + // Plain v2 + const plainFrame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + const plainSize = plainFrame.length; + const plainBytesPerDot = plainSize / n; + + // Train dictionary from provided samples + const dictionary = await trainDictionary(trainingSamples); + const dictionaryId = sha256(dictionary); + + // Dict v2 + const dictFrameRaw = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + dictionary, + dictionaryId, + }); + const dictSizeRaw = dictFrameRaw.length; + + // Determine if dictionary actually helps + const dictAutoDisabled = dictSizeRaw >= plainSize; + const dictSize = dictAutoDisabled ? plainSize : dictSizeRaw; + const dictFrame = dictAutoDisabled ? null : dictFrameRaw; + const dictBytesPerDot = dictSize / n; + + // Savings: how much smaller is dict vs plain v2? + const dictSavingsPct = ((plainSize - dictSize) / plainSize) * 100; + + // Register dictionary for later deserialization + const registry = new DictionaryRegistry(); + await registry.register(dictionary, label); + + const rows: TableRow[] = [ + { + method: 'Raw (N×153)', + sizeBytes: rawSize, + bytesPerDot: 153, + ratio: 1.0, + }, + { + method: 'Batch v2 plain', + sizeBytes: plainSize, + bytesPerDot: plainBytesPerDot, + ratio: rawSize / plainSize, + }, + { + method: dictAutoDisabled ? 'Batch v2 + dict (disabled)' : 'Batch v2 + dict', + sizeBytes: dictSize, + bytesPerDot: dictBytesPerDot, + ratio: rawSize / dictSize, + }, + { + method: 'Gzip (raw bytes)', + sizeBytes: gzipSize, + bytesPerDot: gzipSize / n, + ratio: rawSize / gzipSize, + }, + ]; + + console.log(formatTable(label, n, rows)); + if (dictAutoDisabled) { + console.log( + ` dict auto-disabled for ${label}: dict=${dictSizeRaw}B >= plain=${plainSize}B — skipping dict`, + ); + } else { + console.log( + ` dict savings vs plain v2: ${dictSavingsPct.toFixed(1)}% | dict=${dictBytesPerDot.toFixed(1)} B/DOT`, + ); + } + + return { + rawSize, + plainSize, + dictSize, + gzipSize, + plainBytesPerDot, + dictBytesPerDot, + dictSavingsPct, + dictAutoDisabled, + rows, + dictionary, + dictionaryId, + registry, + plainFrame, + dictFrame, + }; +} + +/** + * Verify roundtrip: deserialize both plain and dict frames, compare payload/ts/type + * with originals byte-for-byte. + */ +async function verifyRoundtrip( + dots: Uint8Array[], + result: MeasureResult, + label: string, +): Promise { + const blsKeypair = createBLSKeypair(); + + // Plain roundtrip — we need the same blsKeypair that was used to serialize. + // Since measureCompression() creates its own blsKeypair internally, we re-serialize + // plain here with a fresh keypair for roundtrip verification. Same DOT data, same test. + const plainFrameVerify = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + const plainRecovered = await deserializeBatchV2(plainFrameVerify, blsKeypair.publicKey); + + if (plainRecovered.length !== dots.length) { + throw new Error(`${label}: plain roundtrip: recovered ${plainRecovered.length} DOTs, expected ${dots.length}`); + } + + for (let i = 0; i < dots.length; i++) { + const orig = dots[i]!; + const rec = plainRecovered[i]!; + // pubkey [0..31] + for (let b = 0; b < 32; b++) { + if (orig[b] !== rec[b]) { + throw new Error(`${label}: plain roundtrip: DOT[${i}] pubkey[${b}] mismatch`); + } + } + // timestamp [128..135] + for (let b = 128; b < 136; b++) { + if (orig[b] !== rec[b]) { + throw new Error(`${label}: plain roundtrip: DOT[${i}] ts[${b}] mismatch`); + } + } + // type [136] + if (orig[136] !== rec[136]) { + throw new Error(`${label}: plain roundtrip: DOT[${i}] type mismatch`); + } + // payload [137..152] + for (let b = 137; b < 153; b++) { + if (orig[b] !== rec[b]) { + throw new Error(`${label}: plain roundtrip: DOT[${i}] payload[${b}] mismatch`); + } + } + } + + // Dict roundtrip (only if dict was enabled) + if (!result.dictAutoDisabled) { + const dictRegistry = new DictionaryRegistry(); + await dictRegistry.register(result.dictionary, label); + + const dictFrameVerify = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + dictionary: result.dictionary, + dictionaryId: result.dictionaryId, + }); + const dictRecovered = await deserializeBatchV2( + dictFrameVerify, + blsKeypair.publicKey, + dictRegistry, + ); + + if (dictRecovered.length !== dots.length) { + throw new Error(`${label}: dict roundtrip: recovered ${dictRecovered.length} DOTs, expected ${dots.length}`); + } + + for (let i = 0; i < dots.length; i++) { + const orig = dots[i]!; + const rec = dictRecovered[i]!; + // timestamp [128..135] + for (let b = 128; b < 136; b++) { + if (orig[b] !== rec[b]) { + throw new Error(`${label}: dict roundtrip: DOT[${i}] ts[${b}] mismatch`); + } + } + // type [136] + if (orig[136] !== rec[136]) { + throw new Error(`${label}: dict roundtrip: DOT[${i}] type mismatch`); + } + // payload [137..152] + for (let b = 137; b < 153; b++) { + if (orig[b] !== rec[b]) { + throw new Error(`${label}: dict roundtrip: DOT[${i}] payload[${b}] mismatch`); + } + } + } + } +} + +/** + * Build training samples for a given profile: serialize 20 chains of 20 DOTs each, + * extract the 86-byte-truncated bodies. Returns ≥ 20 samples for the dictionary trainer. + */ +async function buildTrainingSamples( + profile: 'kulhadVoltage' | 'temperature' | 'gps', + samplesCount = 20, +): Promise { + const blsKeypair = createBLSKeypair(); + const samples: Uint8Array[] = []; + + for (let b = 0; b < samplesCount; b++) { + const chain = await generateSensorStream({ count: 20, profile }); + const frame = await serializeBatchV2(chain, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + // Slice off the 86-byte header to get the raw body + samples.push(frame.slice(86)); + } + + return samples; +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe('Phase 2b benchmark — dictionary compression across datasets', () => { + + // ── voltage-100 ───────────────────────────────────────────────────────────── + + it('voltage-100: dictionary reduces to ≤ 8 bytes/DOT with ≥ 30% savings over plain', async () => { + const dots = await generateSensorStream({ count: 100, profile: 'kulhadVoltage' }); + expect(dots).toHaveLength(100); + expect(dots[0]!.length).toBe(153); + + const trainingSamples = await buildTrainingSamples('kulhadVoltage', 20); + const result = await measureCompression(dots, 'VOLTAGE-100', trainingSamples); + + // Verify roundtrip byte-for-byte + await verifyRoundtrip(dots, result, 'voltage-100'); + + // Dictionary must improve over plain v2 by ≥ 30% + expect(result.dictSavingsPct).toBeGreaterThanOrEqual(30); + + // Dict result must be ≤ 8 bytes/DOT + expect(result.dictBytesPerDot).toBeLessThanOrEqual(8); + }, 120_000); + + // ── temperature-100 ───────────────────────────────────────────────────────── + + it('temperature-100: dictionary reduces bytes/DOT with ≥ 30% savings over plain', async () => { + const dots = await generateSensorStream({ count: 100, profile: 'temperature' }); + expect(dots).toHaveLength(100); + expect(dots[0]!.length).toBe(153); + + const trainingSamples = await buildTrainingSamples('temperature', 20); + const result = await measureCompression(dots, 'TEMPERATURE-100', trainingSamples); + + // Verify roundtrip byte-for-byte + await verifyRoundtrip(dots, result, 'temperature-100'); + + // Dictionary must improve over plain v2 by ≥ 30% + expect(result.dictSavingsPct).toBeGreaterThanOrEqual(30); + + // Dict result must be well below raw (≤ 14 bytes/DOT — temperature has more entropy than voltage) + expect(result.dictBytesPerDot).toBeLessThanOrEqual(14); + }, 120_000); + + // ── gps-100 ───────────────────────────────────────────────────────────────── + + it('gps-100: dictionary reduces bytes/DOT with ≥ 30% savings over plain', async () => { + const dots = await generateSensorStream({ count: 100, profile: 'gps' }); + expect(dots).toHaveLength(100); + expect(dots[0]!.length).toBe(153); + + const trainingSamples = await buildTrainingSamples('gps', 20); + const result = await measureCompression(dots, 'GPS-100', trainingSamples); + + // Verify roundtrip byte-for-byte + await verifyRoundtrip(dots, result, 'gps-100'); + + // Dictionary must improve over plain v2 by ≥ 30% + expect(result.dictSavingsPct).toBeGreaterThanOrEqual(30); + + // Dict result must be well below raw (≤ 15 bytes/DOT — GPS has 2 float32 coords with higher variance) + expect(result.dictBytesPerDot).toBeLessThanOrEqual(15); + }, 120_000); + + // ── mixed-1000 ────────────────────────────────────────────────────────────── + + it('mixed-1000: dictionary improves compression on multi-domain correlated data', async () => { + // Generate 1000 DOTs on a SINGLE chain: alternating voltage (even) + temperature (odd) + const keypair = await createKeypair(); + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + const baseTs = Date.now(); + + for (let i = 0; i < 1000; i++) { + const payload = new Uint8Array(16); + const view = new DataView(payload.buffer); + if (i % 2 === 0) { + // voltage: ~0.497V ±0.015 + view.setFloat32(0, 0.497 + (Math.random() - 0.5) * 0.03, true); + } else { + // temperature: ~20°C ±2°C + view.setFloat32(0, 20.0 + (Math.random() - 0.5) * 4.0, true); + } + // Timestamps: 100ms apart for voltage, 1000ms for temperature intervals + // Simplified: monotonically increasing at 100ms each + const ts = baseTs + i * 100; + + const dot = await createDOT({ + keypair, + payload, + type: DotType.PUBLIC, + ts, + ...(prev ? { previous: prev } : {}), + }); + const buf = toBytes(dot); + dots.push(buf); + prev = buf; + } + + expect(dots).toHaveLength(1000); + expect(dots[0]!.length).toBe(153); + + // Training samples: serialize 20 shorter chains of the same mixed pattern + const blsKeypairTrain = createBLSKeypair(); + const trainingSamples: Uint8Array[] = []; + for (let b = 0; b < 20; b++) { + const miniKeypair = await createKeypair(); + const miniDots: Uint8Array[] = []; + let miniPrev: Uint8Array | undefined; + const miniBase = Date.now() + b * 10000; + for (let i = 0; i < 20; i++) { + const p = new Uint8Array(16); + const v = new DataView(p.buffer); + if (i % 2 === 0) { + v.setFloat32(0, 0.497 + (Math.random() - 0.5) * 0.03, true); + } else { + v.setFloat32(0, 20.0 + (Math.random() - 0.5) * 4.0, true); + } + const miniDot = await createDOT({ + keypair: miniKeypair, + payload: p, + type: DotType.PUBLIC, + ts: miniBase + i * 100, + ...(miniPrev ? { previous: miniPrev } : {}), + }); + const miniBuf = toBytes(miniDot); + miniDots.push(miniBuf); + miniPrev = miniBuf; + } + const miniFrame = await serializeBatchV2(miniDots, blsKeypairTrain, { + timestampDelta: true, + payloadTypeRLE: true, + }); + trainingSamples.push(miniFrame.slice(86)); + } + + const result = await measureCompression(dots, 'MIXED-1000', trainingSamples); + + // Verify roundtrip (plain at minimum; dict if enabled) + await verifyRoundtrip(dots, result, 'mixed-1000'); + + // Mixed dataset should still benefit from dictionary (correlated sub-patterns) + // Threshold is more lenient: ≥ 20% savings or plain v2 is already very good + if (!result.dictAutoDisabled) { + console.log(`mixed-1000 dict savings: ${result.dictSavingsPct.toFixed(1)}%`); + // For mixed data, just verify that dict did not bloat significantly (≥ -5% headroom) + expect(result.dictSavingsPct).toBeGreaterThan(-5); + } + + // Plain v2 should compress decently on correlated mixed data + expect(result.plainBytesPerDot).toBeLessThan(153); + }, 120_000); + + // ── random-1000 ───────────────────────────────────────────────────────────── + + it('random-1000: roundtrip correct; reports whether dict helps or hurts', async () => { + const dots = await generateSensorStream({ count: 1000, profile: 'random' }); + expect(dots).toHaveLength(1000); + expect(dots[0]!.length).toBe(153); + + // Training samples from the random DOTs (worst case: random payloads) + const blsKeypairTrain = createBLSKeypair(); + const trainingSamples: Uint8Array[] = []; + // Build 20 short random chains for training + for (let b = 0; b < 20; b++) { + const miniChain = await generateSensorStream({ count: 20, profile: 'random' }); + const miniFrame = await serializeBatchV2(miniChain, blsKeypairTrain, { + timestampDelta: true, + payloadTypeRLE: true, + }); + trainingSamples.push(miniFrame.slice(86)); + } + + const result = await measureCompression(dots, 'RANDOM-1000', trainingSamples); + + // Roundtrip on plain (dict may be disabled for random data — that's expected) + await verifyRoundtrip(dots, result, 'random-1000'); + + if (result.dictAutoDisabled) { + console.log('random-1000: dict auto-disabled (dict_compressed > plain_v2) — EXPECTED for random data'); + // Plain v2 is the effective result — verify it exists and is sane + expect(result.plainSize).toBeGreaterThan(0); + } else { + // If dict somehow helped on random data, log it as an interesting result + console.log(`random-1000: dict surprisingly helped by ${result.dictSavingsPct.toFixed(1)}%`); + } + + // Regardless of dict, raw is not further compressible — plain v2 should be + // close to raw (within 2x) since payloads are random + expect(result.plainBytesPerDot).toBeLessThan(153 * 2); // trivially true + expect(result.plainSize).toBeGreaterThan(0); + }, 120_000); + + // ── Summary: all roundtrips verified byte-for-byte ────────────────────────── + + it('summary: all roundtrips verified byte-for-byte', async () => { + const datasets: Array<{ label: string; count: number; profile: 'kulhadVoltage' | 'temperature' | 'gps' | 'random' }> = [ + { label: 'voltage-100', count: 100, profile: 'kulhadVoltage' }, + { label: 'temperature-100', count: 100, profile: 'temperature' }, + { label: 'gps-100', count: 100, profile: 'gps' }, + { label: 'random-1000', count: 1000, profile: 'random' }, + ]; + + const summaryRows: Array<{ + label: string; + n: number; + plainBPD: number; + dictBPD: number; + savings: string; + dictStatus: string; + }> = []; + + const blsKeypair = createBLSKeypair(); + + for (const ds of datasets) { + const dots = await generateSensorStream({ count: ds.count, profile: ds.profile }); + + // Plain serialize + deserialize + const plainFrame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + const plainRecovered = await deserializeBatchV2(plainFrame, blsKeypair.publicKey); + + expect(plainRecovered.length).toBe(dots.length); + for (let i = 0; i < dots.length; i++) { + const orig = dots[i]!; + const rec = plainRecovered[i]!; + // payload [137..152] is the most critical correctness check + for (let b = 137; b < 153; b++) { + expect(rec[b]).toBe(orig[b]); + } + // type [136] + expect(rec[136]).toBe(orig[136]); + // timestamp [128..135] + for (let b = 128; b < 136; b++) { + expect(rec[b]).toBe(orig[b]); + } + } + + const plainBPD = plainFrame.length / dots.length; + summaryRows.push({ + label: ds.label, + n: ds.count, + plainBPD, + dictBPD: 0, // populated below for correlated + savings: 'N/A', + dictStatus: 'plain-only', + }); + } + + // Mixed-1000 roundtrip + const mixedKeypair = await createKeypair(); + const mixedDots: Uint8Array[] = []; + let mixedPrev: Uint8Array | undefined; + const mixedBase = Date.now(); + for (let i = 0; i < 1000; i++) { + const payload = new Uint8Array(16); + const view = new DataView(payload.buffer); + if (i % 2 === 0) { + view.setFloat32(0, 0.497 + (Math.random() - 0.5) * 0.03, true); + } else { + view.setFloat32(0, 20.0 + (Math.random() - 0.5) * 4.0, true); + } + const dot = await createDOT({ + keypair: mixedKeypair, + payload, + type: DotType.PUBLIC, + ts: mixedBase + i * 100, + ...(mixedPrev ? { previous: mixedPrev } : {}), + }); + const buf = toBytes(dot); + mixedDots.push(buf); + mixedPrev = buf; + } + + const mixedFrame = await serializeBatchV2(mixedDots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + const mixedRecovered = await deserializeBatchV2(mixedFrame, blsKeypair.publicKey); + + expect(mixedRecovered.length).toBe(mixedDots.length); + for (let i = 0; i < mixedDots.length; i++) { + const orig = mixedDots[i]!; + const rec = mixedRecovered[i]!; + for (let b = 137; b < 153; b++) { + expect(rec[b]).toBe(orig[b]); + } + expect(rec[136]).toBe(orig[136]); + for (let b = 128; b < 136; b++) { + expect(rec[b]).toBe(orig[b]); + } + } + + summaryRows.push({ + label: 'mixed-1000', + n: 1000, + plainBPD: mixedFrame.length / 1000, + dictBPD: 0, + savings: 'N/A', + dictStatus: 'plain-only', + }); + + // Print summary table + console.log('\n=== PHASE 2B SUMMARY — All Roundtrips Verified ==='); + console.log(`| ${'Dataset'.padEnd(18)} | ${'N'.padStart(6)} | ${'Plain B/DOT'.padStart(11)} | ${'Status'.padEnd(14)} |`); + console.log(`|${'-'.repeat(20)}|${'-'.repeat(8)}|${'-'.repeat(13)}|${'-'.repeat(16)}|`); + for (const row of summaryRows) { + console.log( + `| ${row.label.padEnd(18)} | ${String(row.n).padStart(6)} | ${row.plainBPD.toFixed(2).padStart(11)} | ${'roundtrip OK'.padEnd(14)} |`, + ); + } + console.log('\nAll roundtrips verified byte-for-byte (payload, type, timestamp).'); + }, 120_000); + +}); diff --git a/packages/compression/src/tests/coverage-gaps.test.ts b/packages/compression/src/tests/coverage-gaps.test.ts new file mode 100644 index 000000000..620f83105 --- /dev/null +++ b/packages/compression/src/tests/coverage-gaps.test.ts @@ -0,0 +1,655 @@ +/** + * coverage-gaps.test.ts + * + * Targeted tests to reach 100% coverage on: + * - rans.ts (line 99-111: over-allocated branch) + * - rle.ts (lines 81-84: inner guard — not reachable normally, but verifies edge) + * - sample-generator.ts (lines 128-129: timestamp overflow throw) + * - timestamp-delta.ts (lines 73-74: count=0; 76-79: buf too short) + * - varint.ts (lines 81-82: varint too long; 117-118: signed non-integer) + * - zstd.ts (lines 64-68: ENOENT; 95-102: execFileSync throw re-throw) + * - dictionary-registry.ts (lines 28-29: odd-length hex in hexToBytes) + * - index.ts (entire file: just import to load it) + * - batch-v2.ts (error paths not yet hit by existing tests) + */ + +import { describe, it, expect, vi } from 'vitest'; + +// ─── index.ts: import the barrel to register coverage ───────────────────────── + +import * as compression from '../index.js'; + +describe('index.ts barrel export', () => { + it('exports key symbols from all sub-modules', () => { + expect(compression.encodeVarint).toBeTypeOf('function'); + expect(compression.encodeTimestampDeltas).toBeTypeOf('function'); + expect(compression.encodePayloadTypes).toBeTypeOf('function'); + expect(compression.serializeBatchV2).toBeTypeOf('function'); + expect(compression.DictionaryRegistry).toBeTypeOf('function'); + expect(compression.generateSensorStream).toBeTypeOf('function'); + expect(compression.NullPredictor).toBeTypeOf('function'); + expect(compression.buildFrequencyTable).toBeTypeOf('function'); + expect(compression.weissmanScore).toBeTypeOf('function'); + }); +}); + +// ─── rans.ts: line 99-111 — over-allocated branch (delta < 0) ───────────────── +// The over-allocated branch fires when the floor allocation exceeds SCALE. +// We can force it by crafting a frequency table where the allocated total +// exceeds 4096 due to Math.max(1, floored) promoting zeros to 1. +// With 256 symbols each floored to at least 1, and very few actual bytes, +// the total after the floor step can be > 4096. + +import { buildFrequencyTable, ransEncode, ransDecode } from '../rans.js'; + +describe('rans.ts — over-allocated branch (lines 99-111)', () => { + it('handles dataset where initial allocation exceeds SCALE (forces removal loop)', () => { + // Over-allocation in the LRM step requires many symbols to be bumped from + // floor=0 to min=1, causing the total to exceed SCALE=4096. + // + // With N dominant bytes (0x00) + one each of bytes 1-255: + // counts[0] = N+1, counts[x>0] = 2 (after Laplace smoothing) + // total = N+1 + 255*2 = N+511 + // exact[x>0] = 2/(N+511) * 4096 — must be < 1 to trigger floor=0 → clamped to 1 + // Condition: 2/(N+511) * 4096 < 1 → N+511 > 8192 → N > 7681 + // + // With N=8192: + // total = 8703 + // exact[0] = 8193/8703 * 4096 ≈ 3855.2 → floor 3855 + // exact[x>0] = 2/8703 * 4096 ≈ 0.941 → floor 0 → clamped to 1 + // allocated = 3855 + 255 = 4110 > 4096 → over-allocated by 14 → triggers lines 99-111 + const dominant = new Uint8Array(8192).fill(0x00); + // Add one of each other byte to ensure all 256 symbols get count > 0 + const others = new Uint8Array(255); + for (let i = 0; i < 255; i++) others[i] = i + 1; + const data = new Uint8Array(dominant.length + others.length); + data.set(dominant, 0); + data.set(others, dominant.length); + + const table = buildFrequencyTable(data); + + // Verify the table is still valid (sums to exactly 4096) + const total = Array.from(table.freq).reduce((a, b) => a + b, 0); + expect(total).toBe(4096); + expect(table.cumFreq[256]).toBe(4096); + + // Verify round-trip still works through this table + const symbols = new Uint8Array([0x00, 0x01, 0x02, 0x00]); + const encoded = ransEncode(symbols, table); + const decoded = ransDecode(encoded, table, symbols.length); + expect(Array.from(decoded)).toEqual(Array.from(symbols)); + }); +}); + +// ─── timestamp-delta.ts: lines 73-74 (count=0) and 76-79 (buf too short) ───── + +import { decodeTimestampDeltas } from '../timestamp-delta.js'; + +describe('timestamp-delta.ts — edge cases for decodeTimestampDeltas', () => { + it('count=0 returns empty array immediately (line 73-74)', () => { + // When count=0, decodeTimestampDeltas returns [] without touching the buffer + const buf = new Uint8Array(8).fill(0); + const result = decodeTimestampDeltas(buf, 0); + expect(result).toEqual([]); + expect(result.length).toBe(0); + }); + + it('buf.length < 8 throws RangeError for count > 0 (lines 76-79)', () => { + // A 4-byte buffer is too short for even one timestamp (needs 8 bytes) + const shortBuf = new Uint8Array(4).fill(0); + expect(() => decodeTimestampDeltas(shortBuf, 1)).toThrow(RangeError); + expect(() => decodeTimestampDeltas(shortBuf, 1)).toThrow('too short'); + }); + + it('empty buffer throws RangeError for count > 0', () => { + const emptyBuf = new Uint8Array(0); + expect(() => decodeTimestampDeltas(emptyBuf, 1)).toThrow(RangeError); + }); +}); + +// ─── varint.ts: lines 81-82 (varint too long) ───────────────────────────────── + +import { decodeVarint, encodeSignedVarint } from '../varint.js'; + +describe('varint.ts — varint too long guard (lines 81-82)', () => { + it('throws RangeError when varint exceeds 8 continuation bytes', () => { + // Craft a fake varint with 8+ continuation bytes (all with MSB=1) + // This triggers the shift >= 56 guard. + const tooLong = new Uint8Array(9).fill(0x80); // 9 bytes, all continuation + tooLong[8] = 0x01; // final byte without continuation + expect(() => decodeVarint(tooLong, 0)).toThrow(RangeError); + expect(() => decodeVarint(tooLong, 0)).toThrow('too long'); + }); +}); + +describe('varint.ts — signed non-integer guard (lines 117-118)', () => { + it('throws RangeError when encodeSignedVarint receives a float', () => { + expect(() => encodeSignedVarint(1.5)).toThrow(RangeError); + expect(() => encodeSignedVarint(1.5)).toThrow('finite integer'); + }); + + it('throws RangeError when encodeSignedVarint receives Infinity', () => { + expect(() => encodeSignedVarint(Infinity)).toThrow(RangeError); + }); + + it('throws RangeError when encodeSignedVarint receives NaN', () => { + expect(() => encodeSignedVarint(NaN)).toThrow(RangeError); + }); +}); + +// ─── dictionary-registry.ts: lines 28-29 (odd-length hex) ──────────────────── +// hexToBytes is private but used via DictionaryRegistry.import(). +// Inject invalid hex via JSON with an odd-length dictionary hex string. + +import { DictionaryRegistry } from '../dictionary-registry.js'; + +describe('DictionaryRegistry — hexToBytes odd-length hex (lines 28-29)', () => { + it('import throws RangeError when dictionary hex has odd length', () => { + // Build a valid JSON array but with an odd-length hex string in dictionary field + const badJson = JSON.stringify([ + { + id: 'aabbcc' + 'dd'.repeat(13), // 32 bytes = 64 hex chars + dictionary: 'abc', // odd-length hex → should throw + domain: 'test', + created: '1700000000000', + }, + ]); + expect(() => DictionaryRegistry.import(badJson)).toThrow(RangeError); + expect(() => DictionaryRegistry.import(badJson)).toThrow('odd-length'); + }); +}); + +// ─── sample-generator.ts: lines 128-129 (timestamp > MAX_SAFE_INTEGER) ──────── +// generateSensorStream throws if ts > Number.MAX_SAFE_INTEGER. +// We need to trigger the internal timestamp conversion guard. + +import { generateSensorStream } from '../sample-generator.js'; + +describe('sample-generator.ts — timestamp overflow guard (lines 128-129)', () => { + it('throws RangeError when startTimestamp exceeds MAX_SAFE_INTEGER', async () => { + // The overflow guard fires when ts > BigInt(Number.MAX_SAFE_INTEGER). + // The random profile adds a spread BigInt per step — but even the first DOT + // uses randomAccumTs = baseTs + spread. With kulhadVoltage, ts = baseTs + i*100. + // So if startTimestamp itself is safe but the jitter pushes it over: + // MAX_SAFE_INTEGER = 9007199254740991 + // We need ts to exceed this. Use a base just below max + large offset. + const tooLargeBase = BigInt(Number.MAX_SAFE_INTEGER) + 1n; + await expect( + generateSensorStream({ count: 1, profile: 'kulhadVoltage', startTimestamp: tooLargeBase }), + ).rejects.toThrow(RangeError); + }); +}); + +// ─── zstd.ts: lines 64-68 (ENOENT) and 95-102 (re-throw non-ENOENT) ────────── +// trainDictionary shells out to the zstd CLI. +// These error paths are covered via the actual zstd.test.ts suite which runs +// real CLI calls. Here we cover the warning path by running with < 10 samples. +// +// NOTE: execFileSync from node:child_process has non-configurable properties +// in the ESM runtime, preventing vi.spyOn. We cover lines via actual runs. + +import { trainDictionary } from '../zstd.js'; + +describe('zstd.ts — warning path (fewer than 10 samples)', () => { + it('emits console.warn when fewer than 10 samples are provided', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + + // Use 3 samples — will trigger warning and then attempt the real zstd CLI. + // If zstd is installed, training will succeed (or fail with real zstd error). + // If zstd is not installed, it will throw the ENOENT path. + // Either way, the warning must fire FIRST. + const fewSamples = Array.from({ length: 3 }, (_, i) => + new Uint8Array(200).fill(i + 1) + ); + try { + await trainDictionary(fewSamples); + } catch { + // expected — either ENOENT or zstd training failure + } + // Assert BEFORE mockRestore so vi can still inspect the call count + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('samples provided')); + warnSpy.mockRestore(); + }, 30_000); +}); + +// ─── rle.ts: lines 81-84 (dead inner guard) ────────────────────────────────── +// The inner `if (readPos >= buf.length)` at line 80-84 in decodePayloadTypes +// is guarded by the outer `while (readPos < buf.length)` so it can never fire. +// Verify the outer while loop exits cleanly instead. + +import { decodePayloadTypes, encodePayloadTypes } from '../rle.js'; + +describe('rle.ts — boundary and error coverage', () => { + it('decodes count that matches exactly (no boundary overshoot)', () => { + const types = new Uint8Array([0x00, 0x00, 0x01, 0x02]); + const encoded = encodePayloadTypes(types); + const decoded = decodePayloadTypes(encoded, 4); + expect(Array.from(decoded)).toEqual(Array.from(types)); + }); + + it('throws when decoded count exceeds totalCount (lines 92-95)', () => { + // Encode 10 types but try to decode only 5 — count mismatch at end + const types = new Uint8Array(10).fill(0x00); + const encoded = encodePayloadTypes(types); // encodes as [0x00, varint(10)] + // Decoding with totalCount=5 will work (RLE decodes 10 but we expect 5 → mismatch) + expect(() => decodePayloadTypes(encoded, 5)).toThrow(RangeError); + }); + + it('throws on empty types array (encodePayloadTypes line 22-24)', () => { + expect(() => encodePayloadTypes(new Uint8Array(0))).toThrow(RangeError); + expect(() => encodePayloadTypes(new Uint8Array(0))).toThrow('must not be empty'); + }); + + it('throws when writePos + count > totalCount (lines 92-95 overflow guard)', () => { + // Craft a buffer where the RLE count overflows totalCount mid-decode + // [type=0x00, count_varint=50] but totalCount=10 → overflow at fill + const encoded = new Uint8Array([0x00, 50]); // type=0, count=50 (1-byte varint) + expect(() => decodePayloadTypes(encoded, 10)).toThrow(RangeError); + expect(() => decodePayloadTypes(encoded, 10)).toThrow('decoded count exceeds totalCount'); + }); + + it('throws count mismatch when RLE ends early (lines 101-104)', () => { + // Encode a buffer that has fewer types than totalCount expects + const types = new Uint8Array([0x00, 0x01]); // 2 types + const encoded = encodePayloadTypes(types); + // Try to decode 10 from a buffer that only has 2 → mismatch + expect(() => decodePayloadTypes(encoded, 10)).toThrow(RangeError); + expect(() => decodePayloadTypes(encoded, 10)).toThrow('count mismatch'); + }); +}); + +// ─── batch-v2.ts: uncovered error/alternate paths ──────────────────────────── + +import { + serializeBatchV2, + deserializeBatchV2, + FLAG_PREDICTION, + FLAG_DICT_COMPRESSED, +} from '../batch-v2.js'; +import { + createKeypair, + createDOT, + toBytes, + DotType, + createBLSKeypair, +} from '@dotprotocol/core'; +import { LinearPredictor, NullPredictor, LastValuePredictor } from '../predictor.js'; + +async function buildTestChain(n: number, tsStart = 1_700_000_000_000): Promise { + const keypair = await createKeypair(); + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + + for (let i = 0; i < n; i++) { + const dot = await createDOT({ + keypair, + type: DotType.PUBLIC, + ts: tsStart + i * 100, + ...(prev ? { previous: prev } : {}), + }); + const bytes = toBytes(dot); + dots.push(bytes); + prev = bytes; + } + return dots; +} + +describe('batch-v2.ts — validation error paths', () => { + it('throws when dot has wrong size (not 153 bytes)', async () => { + const blsKeypair = createBLSKeypair(); + const badDot = new Uint8Array(100); // not 153 bytes + await expect(serializeBatchV2([badDot], blsKeypair)).rejects.toThrow(RangeError); + await expect(serializeBatchV2([badDot], blsKeypair)).rejects.toThrow('153'); + }); + + it('throws when dots have different pubkeys', async () => { + const blsKeypair = createBLSKeypair(); + const kp1 = await createKeypair(); + const kp2 = await createKeypair(); + const dot1 = toBytes(await createDOT({ keypair: kp1, type: DotType.PUBLIC, ts: 1_700_000_000_000 })); + const dot2 = toBytes(await createDOT({ keypair: kp2, type: DotType.PUBLIC, ts: 1_700_000_000_001 })); + await expect(serializeBatchV2([dot1, dot2], blsKeypair)).rejects.toThrow('different pubkey'); + }); + + it('throws when dictionary provided without dictionaryId', async () => { + const dots = await buildTestChain(2); + const blsKeypair = createBLSKeypair(); + const dict = new Uint8Array(100).fill(0xab); + await expect( + serializeBatchV2(dots, blsKeypair, { dictionary: dict }) + ).rejects.toThrow(TypeError); + await expect( + serializeBatchV2(dots, blsKeypair, { dictionary: dict }) + ).rejects.toThrow('dictionaryId'); + }); + + it('throws when dictionaryId provided without dictionary', async () => { + const dots = await buildTestChain(2); + const blsKeypair = createBLSKeypair(); + const dictId = new Uint8Array(32).fill(0x01); + await expect( + serializeBatchV2(dots, blsKeypair, { dictionaryId: dictId }) + ).rejects.toThrow(TypeError); + await expect( + serializeBatchV2(dots, blsKeypair, { dictionaryId: dictId }) + ).rejects.toThrow('dictionary'); + }); + + it('throws when predictor and dictionary are both provided', async () => { + const dots = await buildTestChain(2); + const blsKeypair = createBLSKeypair(); + const dict = new Uint8Array(100).fill(0xab); + const dictId = new Uint8Array(32).fill(0x01); + await expect( + serializeBatchV2(dots, blsKeypair, { + predictor: new LinearPredictor(), + dictionary: dict, + dictionaryId: dictId, + }) + ).rejects.toThrow(TypeError); + await expect( + serializeBatchV2(dots, blsKeypair, { + predictor: new LinearPredictor(), + dictionary: dict, + dictionaryId: dictId, + }) + ).rejects.toThrow('mutually exclusive'); + }); + + it('throws when dictionaryId has wrong length (not 32 bytes)', async () => { + const dots = await buildTestChain(2); + const blsKeypair = createBLSKeypair(); + const dict = new Uint8Array(100).fill(0xab); + const badDictId = new Uint8Array(16).fill(0x01); // wrong length + await expect( + serializeBatchV2(dots, blsKeypair, { dictionary: dict, dictionaryId: badDictId }) + ).rejects.toThrow(RangeError); + await expect( + serializeBatchV2(dots, blsKeypair, { dictionary: dict, dictionaryId: badDictId }) + ).rejects.toThrow('32 bytes'); + }); +}); + +describe('batch-v2.ts — deserializeBatchV2 error paths', () => { + it('throws RangeError when buffer is shorter than header (86 bytes)', async () => { + const blsKeypair = createBLSKeypair(); + const shortBuf = new Uint8Array(50); + await expect(deserializeBatchV2(shortBuf, blsKeypair.publicKey)).rejects.toThrow(RangeError); + }); + + it('throws RangeError when dot_count is 0 in frame', async () => { + const blsKeypair = createBLSKeypair(); + // Build a minimal 86-byte frame with version=0x03, dot_count=0 + const buf = new Uint8Array(86); + buf[0] = 0x03; // version + buf[1] = 0x00; // flags (no delta, no RLE) + // dot_count at [2..5] = 0 (already 0) + await expect(deserializeBatchV2(buf, blsKeypair.publicKey)).rejects.toThrow(RangeError); + await expect(deserializeBatchV2(buf, blsKeypair.publicKey)).rejects.toThrow('dot_count is 0'); + }); + + it('throws buffer too short for dict header (FLAG_DICT_COMPRESSED but buf < 118B, line 555-558)', async () => { + // Create a 100-byte frame with FLAG_DICT_COMPRESSED set → buf.length < 118 → line 555 + const blsKeypair = createBLSKeypair(); + const buf = new Uint8Array(100); + buf[0] = 0x03; // version + buf[1] = FLAG_DICT_COMPRESSED; // flag set + const view = new DataView(buf.buffer); + view.setUint32(2, 1, true); // dot_count = 1 + await expect(deserializeBatchV2(buf, blsKeypair.publicKey)).rejects.toThrow(RangeError); + await expect(deserializeBatchV2(buf, blsKeypair.publicKey)).rejects.toThrow('buffer too short for dict header'); + }, 15_000); + + it('throws buffer too short for prediction metadata (hasPrediction but body < 513B, line 604-607)', async () => { + // Create a minimal frame (86 bytes + a few body bytes) with FLAG_PREDICTION set + // bodyBuf.length = frame.length - 86 = few bytes < 513 → line 604 + const blsKeypair = createBLSKeypair(); + const buf = new Uint8Array(100); // 86 header + 14 body bytes (< 513) + buf[0] = 0x03; // version + buf[1] = FLAG_PREDICTION; // flag set, no dict + const view = new DataView(buf.buffer); + view.setUint32(2, 1, true); // dot_count = 1 + await expect(deserializeBatchV2(buf, blsKeypair.publicKey)).rejects.toThrow(RangeError); + await expect(deserializeBatchV2(buf, blsKeypair.publicKey)).rejects.toThrow('buffer too short for prediction metadata'); + }, 15_000); + + it('throws buffer too short for raw timestamps (no-delta path, line 649-650)', async () => { + // no-delta, no-RLE, 1 DOT: body normally = 8+1+16 = 25B. + // Truncate body to 4 bytes → bodyCursor+tsColumnSize = 8 > 4 → line 649. + const dots = await buildTestChain(1); + const blsKeypair = createBLSKeypair(); + const frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: false, + payloadTypeRLE: false, + }); + const truncated = frame.slice(0, 86 + 4); // only 4 body bytes, need 8 for ts + await expect(deserializeBatchV2(truncated, blsKeypair.publicKey)).rejects.toThrow(RangeError); + await expect(deserializeBatchV2(truncated, blsKeypair.publicKey)).rejects.toThrow('buffer too short for raw timestamps'); + }, 15_000); + + it('throws buffer too short for RLE types + rANS data (prediction+RLE path, line 680-681)', async () => { + // RLE+ts-delta+prediction, 1 DOT: last 4 bytes = ransEncodedLen. + // To get rleEnd <= tsEnd: set ransEncodedLen = bodyBuf.length - 4 - tsEnd + 1 + // which makes rleEnd = tsEnd - 1 <= tsEnd. + const dots = await buildTestChain(1); + const blsKeypair = createBLSKeypair(); + const frame = await serializeBatchV2(dots, blsKeypair, { + predictor: new NullPredictor(), + timestampDelta: true, + payloadTypeRLE: true, + }); + + // Read actual ts column size. For 1 DOT with ts-delta, tsColumnSize = 8 (anchor only). + // tsEnd = 0 + PREDICTION_META_SIZE + 8 bytes... actually let's just corrupt ransEncodedLen + // to the max possible value: bodyBuf.length - 4 (=> rleEnd = 0 <= tsEnd). + const tampered = frame.slice(); + const view = new DataView(tampered.buffer); + // bodyBuf.length = frame.length - 86 - PREDICTION_META_SIZE(513) + // But we can compute it: set ransEncodedLen = (frame.length - 86 - 513) which makes rleEnd = 0 + const bodyLen = frame.length - 86 - 513; // 513 = prediction meta size + const ransEncodedLen = bodyLen - 4; // makes rleEnd = bodyLen - 4 - (bodyLen-4) = 0 <= tsEnd + view.setUint32(tampered.length - 4, ransEncodedLen, true); + + await expect(deserializeBatchV2(tampered, blsKeypair.publicKey)).rejects.toThrow(RangeError); + await expect(deserializeBatchV2(tampered, blsKeypair.publicKey)).rejects.toThrow('buffer too short for RLE types + rANS data'); + }, 15_000); + + it('throws buffer too short for raw types (line 698-699)', async () => { + // no-RLE, no-delta, 1 DOT: normal body = 8(ts)+1(type)+16(payload) = 25B + // Truncate to 86+8 bytes → bodyBuf.length=8, tsEnd=8, tsEnd+dotCount=9 > 8 → line 698 + const dots = await buildTestChain(1); + const blsKeypair = createBLSKeypair(); + const frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: false, + payloadTypeRLE: false, + }); + const truncated = frame.slice(0, 86 + 8); // just the ts column, no type or payload + await expect(deserializeBatchV2(truncated, blsKeypair.publicKey)).rejects.toThrow(RangeError); + await expect(deserializeBatchV2(truncated, blsKeypair.publicKey)).rejects.toThrow('buffer too short for raw types'); + }, 15_000); + + it('throws buffer too short for RLE types + payloads, no-prediction (line 689-690)', async () => { + // RLE+ts-delta, 1 DOT, no-prediction: rleEnd = bodyBuf.length - 16 + // Need rleEnd <= tsEnd. With ts-delta for 1 DOT, tsColumnSize=8, tsEnd=8. + // If bodyBuf.length = 24, rleEnd = 24-16 = 8 <= 8 → throws at line 689. + const dots = await buildTestChain(1); + const blsKeypair = createBLSKeypair(); + const frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: true, + payloadTypeRLE: true, + }); + // Truncate to 86+24=110 bytes so body=24 bytes → rleEnd=8 <= tsEnd=8 → line 689 + const truncated = frame.slice(0, 86 + 24); + await expect(deserializeBatchV2(truncated, blsKeypair.publicKey)).rejects.toThrow(RangeError); + await expect(deserializeBatchV2(truncated, blsKeypair.publicKey)).rejects.toThrow('buffer too short for RLE types + payloads'); + }, 15_000); + + it('throws when buffer is too short for raw payloads (no-delta, no-RLE, no-prediction) — line 739-740', async () => { + // Serialize 1 DOT with no-delta, no-RLE, no-prediction + const dots = await buildTestChain(1); + const blsKeypair = createBLSKeypair(); + const frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: false, + payloadTypeRLE: false, + }); + + // Frame body: 8B (raw ts) + 1B (raw type) + 16B (payload) = 25B + // Truncate to remove last 16B (payload) → typesEnd+payloadsTotalSize > body.length + const truncated = frame.slice(0, frame.length - 16); + await expect(deserializeBatchV2(truncated, blsKeypair.publicKey)).rejects.toThrow(RangeError); + await expect(deserializeBatchV2(truncated, blsKeypair.publicKey)).rejects.toThrow('buffer too short for payloads'); + }, 15_000); + + it('throws when rANS data length mismatches (prediction + no-RLE path, line 720-723)', async () => { + // Serialize with prediction + no-RLE so the frame uses raw types. + // With raw types: typesEnd is independent of ransLen (read from last 4 bytes). + // Corrupting ransLen to a different value → ransEnd - ransStart !== ransLen → line 720-723. + const dots = await buildTestChain(3); + const blsKeypair = createBLSKeypair(); + const frame = await serializeBatchV2(dots, blsKeypair, { + predictor: new NullPredictor(), + payloadTypeRLE: false, // raw types so typesEnd is independent of ransLen + }); + + // Read the actual ransLen from the last 4 bytes, then change to ransLen + 1 + const tampered = frame.slice(); + const view = new DataView(tampered.buffer); + const actualRansLen = view.getUint32(tampered.length - 4, true); + // Set to actualRansLen + 1 → mismatch without affecting rleEnd (no RLE) + view.setUint32(tampered.length - 4, actualRansLen + 1, true); + + await expect(deserializeBatchV2(tampered, blsKeypair.publicKey)).rejects.toThrow(RangeError); + await expect(deserializeBatchV2(tampered, blsKeypair.publicKey)).rejects.toThrow('rANS data length mismatch'); + }, 15_000); + + it('throws on unknown predictor modelId during deserialization', async () => { + const dots = await buildTestChain(3); + const blsKeypair = createBLSKeypair(); + + // Serialize with NullPredictor (modelId=0) + const frame = await serializeBatchV2(dots, blsKeypair, { + predictor: new NullPredictor(), + }); + + // Corrupt the modelId byte — it's right after the 86-byte header + // at position 86 (first byte of prediction metadata = modelId) + const tampered = frame.slice(); + tampered[86] = 0xFF; // unknown modelId + + await expect(deserializeBatchV2(tampered, blsKeypair.publicKey)).rejects.toThrow( + 'unknown predictor modelId' + ); + }, 30_000); +}); + +describe('batch-v2.ts — predictor roundtrip with all three models', () => { + it('NullPredictor (modelId=0x00) roundtrips', async () => { + const dots = await buildTestChain(5); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + predictor: new NullPredictor(), + }); + expect(frame[1]! & FLAG_PREDICTION).toBeTruthy(); + + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + expect(recovered).toHaveLength(dots.length); + for (let i = 0; i < dots.length; i++) { + expect(Array.from(recovered[i]!.subarray(137, 153))).toEqual( + Array.from(dots[i]!.subarray(137, 153)), + ); + } + }, 30_000); + + it('LastValuePredictor (modelId=0x01) roundtrips', async () => { + const dots = await buildTestChain(5); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + predictor: new LastValuePredictor(), + }); + expect(frame[1]! & FLAG_PREDICTION).toBeTruthy(); + + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + expect(recovered).toHaveLength(dots.length); + for (let i = 0; i < dots.length; i++) { + expect(Array.from(recovered[i]!.subarray(137, 153))).toEqual( + Array.from(dots[i]!.subarray(137, 153)), + ); + } + }, 30_000); + + it('LinearPredictor (modelId=0x02) roundtrips', async () => { + const dots = await buildTestChain(5); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + predictor: new LinearPredictor(), + }); + expect(frame[1]! & FLAG_PREDICTION).toBeTruthy(); + + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + expect(recovered).toHaveLength(dots.length); + for (let i = 0; i < dots.length; i++) { + expect(Array.from(recovered[i]!.subarray(137, 153))).toEqual( + Array.from(dots[i]!.subarray(137, 153)), + ); + } + }, 30_000); + + it("'auto' predictor selects the best model and roundtrips", async () => { + // Use a chain with linearly-changing payloads to bias toward LinearPredictor + const keypair = await createKeypair(); + const blsKeypair = createBLSKeypair(); + const dots: Uint8Array[] = []; + let prev: Uint8Array | undefined; + + for (let i = 0; i < 10; i++) { + const payload = new Uint8Array(16); + for (let b = 0; b < 16; b++) payload[b] = (i + b) & 0xff; + const dot = await createDOT({ + keypair, + type: DotType.PUBLIC, + ts: 1_700_000_000_000 + i * 100, + payload, + ...(prev ? { previous: prev } : {}), + }); + const bytes = toBytes(dot); + dots.push(bytes); + prev = bytes; + } + + const frame = await serializeBatchV2(dots, blsKeypair, { predictor: 'auto' }); + expect(frame[1]! & FLAG_PREDICTION).toBeTruthy(); + + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + expect(recovered).toHaveLength(dots.length); + for (let i = 0; i < dots.length; i++) { + expect(Array.from(recovered[i]!.subarray(137, 153))).toEqual( + Array.from(dots[i]!.subarray(137, 153)), + ); + } + }, 30_000); + + it('no-delta + no-RLE + prediction roundtrips', async () => { + const dots = await buildTestChain(5); + const blsKeypair = createBLSKeypair(); + + const frame = await serializeBatchV2(dots, blsKeypair, { + timestampDelta: false, + payloadTypeRLE: false, + predictor: new LinearPredictor(), + }); + + const recovered = await deserializeBatchV2(frame, blsKeypair.publicKey); + expect(recovered).toHaveLength(dots.length); + for (let i = 0; i < dots.length; i++) { + expect(Array.from(recovered[i]!.subarray(128, 136))).toEqual( + Array.from(dots[i]!.subarray(128, 136)), + ); + expect(Array.from(recovered[i]!.subarray(137, 153))).toEqual( + Array.from(dots[i]!.subarray(137, 153)), + ); + } + }, 30_000); +}); diff --git a/packages/compression/src/tests/dictionary-registry.test.ts b/packages/compression/src/tests/dictionary-registry.test.ts new file mode 100644 index 000000000..a198f3e76 --- /dev/null +++ b/packages/compression/src/tests/dictionary-registry.test.ts @@ -0,0 +1,178 @@ +import { describe, it, expect } from 'vitest'; +import { DictionaryRegistry } from '../dictionary-registry.js'; + +// ─── Helpers ───────────────────────────────────────────────────────────────── + +/** Random Uint8Array of given length (crypto-quality). */ +function randomBytes(n: number): Uint8Array { + const buf = new Uint8Array(n); + crypto.getRandomValues(buf); + return buf; +} + +/** Return true iff two Uint8Arrays are equal byte-by-byte. */ +function bytesEqual(a: Uint8Array, b: Uint8Array): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) return false; + } + return true; +} + +// ─── Tests ─────────────────────────────────────────────────────────────────── + +describe('DictionaryRegistry — register → get by ID → same bytes', () => { + it('retrieves the exact dictionary bytes by the returned ID', async () => { + const registry = new DictionaryRegistry(); + const dict = randomBytes(512); + const id = await registry.register(dict, 'test-domain'); + + const entry = registry.get(id); + expect(entry).not.toBeNull(); + expect(bytesEqual(entry!.dictionary, dict)).toBe(true); + }); +}); + +describe('DictionaryRegistry — unknown ID → null', () => { + it('returns null for an ID that was never registered', async () => { + const registry = new DictionaryRegistry(); + const unknownId = randomBytes(32); + expect(registry.get(unknownId)).toBeNull(); + }); + + it('returns null after registering a different dictionary', async () => { + const registry = new DictionaryRegistry(); + await registry.register(randomBytes(256), 'domain-a'); + const unknownId = randomBytes(32); + expect(registry.get(unknownId)).toBeNull(); + }); +}); + +describe('DictionaryRegistry — export → import → same entries', () => { + it('round-trips a single entry through JSON', async () => { + const registry = new DictionaryRegistry(); + const dict = randomBytes(256); + const id = await registry.register(dict, 'voltage-sensor-v1'); + + const json = registry.export(); + const restored = DictionaryRegistry.import(json); + + expect(restored.size).toBe(1); + const entry = restored.get(id); + expect(entry).not.toBeNull(); + expect(bytesEqual(entry!.dictionary, dict)).toBe(true); + expect(entry!.domain).toBe('voltage-sensor-v1'); + expect(bytesEqual(entry!.id, id)).toBe(true); + expect(typeof entry!.created).toBe('bigint'); + }); + + it('round-trips multiple entries through JSON', async () => { + const registry = new DictionaryRegistry(); + const dicts = [randomBytes(256), randomBytes(512), randomBytes(128)]; + const ids: Uint8Array[] = []; + + for (let i = 0; i < dicts.length; i++) { + ids.push(await registry.register(dicts[i]!, `domain-${i}`)); + } + + const restored = DictionaryRegistry.import(registry.export()); + expect(restored.size).toBe(3); + + for (let i = 0; i < ids.length; i++) { + const entry = restored.get(ids[i]!); + expect(entry).not.toBeNull(); + expect(bytesEqual(entry!.dictionary, dicts[i]!)).toBe(true); + } + }); +}); + +describe('DictionaryRegistry — two different dicts → different IDs', () => { + it('produces distinct IDs for distinct same-length byte arrays', async () => { + const registry = new DictionaryRegistry(); + const dict1 = randomBytes(256); + const dict2 = randomBytes(256); + + const id1 = await registry.register(dict1, 'domain-1'); + const id2 = await registry.register(dict2, 'domain-2'); + + expect(bytesEqual(id1, id2)).toBe(false); + expect(registry.size).toBe(2); + }); +}); + +describe('DictionaryRegistry — idempotent register', () => { + it('returns the same ID and keeps size === 1 when registered twice', async () => { + const registry = new DictionaryRegistry(); + const dict = randomBytes(256); + + const id1 = await registry.register(dict, 'same-domain'); + const id2 = await registry.register(dict, 'same-domain'); + + expect(bytesEqual(id1, id2)).toBe(true); + expect(registry.size).toBe(1); + }); + + it('idempotent even with different domain label on second call', async () => { + // The ID is content-addressed, so same bytes → same ID regardless of domain. + // The first registration wins on domain. + const registry = new DictionaryRegistry(); + const dict = randomBytes(256); + + await registry.register(dict, 'original-domain'); + const id = await registry.register(dict, 'different-domain'); + + expect(registry.size).toBe(1); + const entry = registry.get(id); + // First registration wins — domain should still be 'original-domain' + expect(entry!.domain).toBe('original-domain'); + }); +}); + +describe('DictionaryRegistry — domain stored correctly', () => { + it('stores the exact domain string provided at registration', async () => { + const registry = new DictionaryRegistry(); + const id = await registry.register(randomBytes(64), 'voltage-sensor-v1'); + const entry = registry.get(id); + expect(entry!.domain).toBe('voltage-sensor-v1'); + }); +}); + +describe('DictionaryRegistry — created timestamp', () => { + it('created is a bigint approximately equal to Date.now()', async () => { + const before = BigInt(Date.now()); + const registry = new DictionaryRegistry(); + const id = await registry.register(randomBytes(64), 'ts-test'); + const after = BigInt(Date.now()); + + const entry = registry.get(id); + expect(typeof entry!.created).toBe('bigint'); + expect(entry!.created).toBeGreaterThanOrEqual(before); + expect(entry!.created).toBeLessThanOrEqual(after); + }); + + it('created survives export/import as a bigint', async () => { + const registry = new DictionaryRegistry(); + const id = await registry.register(randomBytes(64), 'ts-persist'); + const original = registry.get(id)!.created; + + const restored = DictionaryRegistry.import(registry.export()); + const entry = restored.get(id); + expect(entry!.created).toBe(original); + expect(typeof entry!.created).toBe('bigint'); + }); +}); + +describe('DictionaryRegistry — size', () => { + it('starts at 0', () => { + expect(new DictionaryRegistry().size).toBe(0); + }); + + it('increments with each unique dictionary', async () => { + const registry = new DictionaryRegistry(); + expect(registry.size).toBe(0); + await registry.register(randomBytes(64), 'a'); + expect(registry.size).toBe(1); + await registry.register(randomBytes(64), 'b'); + expect(registry.size).toBe(2); + }); +}); diff --git a/packages/compression/src/tests/predictor.test.ts b/packages/compression/src/tests/predictor.test.ts new file mode 100644 index 000000000..6189d9c0e --- /dev/null +++ b/packages/compression/src/tests/predictor.test.ts @@ -0,0 +1,300 @@ +import { describe, it, expect } from 'vitest'; +import { + NullPredictor, + LastValuePredictor, + LinearPredictor, + computeResidual, + applyResidual, +} from '../predictor.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makePayload(fillByte: number): Uint8Array { + return new Uint8Array(16).fill(fillByte); +} + +function makeRamp(start: number, step: number): Uint8Array { + const arr = new Uint8Array(16); + for (let i = 0; i < 16; i++) { + arr[i] = (start + i * step) & 0xff; + } + return arr; +} + +function arraysEqual(a: Uint8Array, b: Uint8Array): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) return false; + } + return true; +} + +function allZeros(a: Uint8Array): boolean { + return a.every((b) => b === 0); +} + +// --------------------------------------------------------------------------- +// 1. NullPredictor +// --------------------------------------------------------------------------- + +describe('NullPredictor', () => { + it('predict() always returns 16 zero bytes before any update', () => { + const p = new NullPredictor(); + const pred = p.predict(); + expect(pred.length).toBe(16); + expect(allZeros(pred)).toBe(true); + }); + + it('predict() still returns zeros after update()', () => { + const p = new NullPredictor(); + p.update(makePayload(0x42)); + const pred = p.predict(); + expect(allZeros(pred)).toBe(true); + }); + + it('predict() returns zeros after multiple updates', () => { + const p = new NullPredictor(); + p.update(makePayload(0xAA)); + p.update(makePayload(0xBB)); + p.update(makePayload(0xCC)); + expect(allZeros(p.predict())).toBe(true); + }); + + it('modelId is 0x00', () => { + const p = new NullPredictor(); + expect(p.modelId).toBe(0x00); + }); +}); + +// --------------------------------------------------------------------------- +// 2. LastValuePredictor +// --------------------------------------------------------------------------- + +describe('LastValuePredictor', () => { + it('first predict() returns zeros (no history)', () => { + const p = new LastValuePredictor(); + expect(allZeros(p.predict())).toBe(true); + }); + + it('after update(x), predict() returns x', () => { + const p = new LastValuePredictor(); + const x = makePayload(0x42); + p.update(x); + expect(arraysEqual(p.predict(), x)).toBe(true); + }); + + it('after update(x) then update(y), predict() returns y', () => { + const p = new LastValuePredictor(); + const x = makePayload(0x10); + const y = makePayload(0xFF); + p.update(x); + p.update(y); + expect(arraysEqual(p.predict(), y)).toBe(true); + }); + + it('predict() does not mutate internal state (idempotent)', () => { + const p = new LastValuePredictor(); + const x = makePayload(0x77); + p.update(x); + const pred1 = p.predict(); + const pred2 = p.predict(); + expect(arraysEqual(pred1, pred2)).toBe(true); + }); + + it('reset() causes predict() to return zeros again', () => { + const p = new LastValuePredictor(); + p.update(makePayload(0x55)); + p.reset(); + expect(allZeros(p.predict())).toBe(true); + }); + + it('modelId is 0x01', () => { + const p = new LastValuePredictor(); + expect(p.modelId).toBe(0x01); + }); +}); + +// --------------------------------------------------------------------------- +// 3. LinearPredictor +// --------------------------------------------------------------------------- + +describe('LinearPredictor — basic predictions', () => { + it('first predict() returns zeros (no history)', () => { + const p = new LinearPredictor(); + expect(allZeros(p.predict())).toBe(true); + }); + + it('after update(x), predict() returns x (one-step: repeat first)', () => { + const p = new LinearPredictor(); + const x = makePayload(0x30); + p.update(x); + expect(arraysEqual(p.predict(), x)).toBe(true); + }); + + it('after update(x) then update(y), predict() extrapolates: 2y - x (byte-level, wrapping)', () => { + const p = new LinearPredictor(); + // x[i] = 0x10, y[i] = 0x20 → predicted = 2*0x20 - 0x10 = 0x30 + const x = makePayload(0x10); + const y = makePayload(0x20); + p.update(x); + p.update(y); + const pred = p.predict(); + expect(pred.length).toBe(16); + // Every byte: 2 * 0x20 - 0x10 = 0x30 + for (let i = 0; i < 16; i++) { + expect(pred[i]).toBe(0x30); + } + }); + + it('byte-level wrapping: (2 * 0xFF - 0xFE) & 0xFF = 0x00', () => { + const p = new LinearPredictor(); + // x = 0xFE, y = 0xFF → predicted = 2*0xFF - 0xFE = 256 = 0x00 (wrapped) + const x = makePayload(0xFE); + const y = makePayload(0xFF); + p.update(x); + p.update(y); + const pred = p.predict(); + for (let i = 0; i < 16; i++) { + expect(pred[i]).toBe(0x00); + } + }); + + it('reset() clears state and predict() returns zeros again', () => { + const p = new LinearPredictor(); + p.update(makePayload(0x10)); + p.update(makePayload(0x20)); + p.reset(); + expect(allZeros(p.predict())).toBe(true); + }); + + it('modelId is 0x02', () => { + const p = new LinearPredictor(); + expect(p.modelId).toBe(0x02); + }); +}); + +describe('LinearPredictor — convergence on constant stream', () => { + it('residuals are all zeros from the third observation onward (constant value)', () => { + const p = new LinearPredictor(); + const value = makePayload(0xAB); + + // Feed 10 constant values, check residuals from index 2 onward + for (let i = 0; i < 10; i++) { + const pred = p.predict(); + if (i >= 2) { + const residual = computeResidual(value, pred); + expect(allZeros(residual)).toBe(true); + } + p.update(value); + } + }); +}); + +describe('LinearPredictor — convergence on linear ramp', () => { + it('residuals are all zeros from the third observation onward (linear ramp)', () => { + const p = new LinearPredictor(); + const step = 3; + + // Feed 10 ramp payloads: each byte increments by `step` from the previous + for (let i = 0; i < 10; i++) { + const actual = makePayload((i * step) & 0xff); + const pred = p.predict(); + if (i >= 2) { + const residual = computeResidual(actual, pred); + expect(allZeros(residual)).toBe(true); + } + p.update(actual); + } + }); +}); + +// --------------------------------------------------------------------------- +// 4. computeResidual +// --------------------------------------------------------------------------- + +describe('computeResidual', () => { + it('actual XOR predicted — spot check: 0x42 XOR 0x40 = 0x02', () => { + const actual = makePayload(0x42); + const predicted = makePayload(0x40); + const residual = computeResidual(actual, predicted); + expect(residual.length).toBe(16); + for (let i = 0; i < 16; i++) { + expect(residual[i]).toBe(0x02); + } + }); + + it('identical inputs → all-zero residual', () => { + const val = makePayload(0x55); + expect(allZeros(computeResidual(val, val))).toBe(true); + }); + + it('XOR is its own inverse: residual XOR predicted = actual', () => { + const actual = makePayload(0xDE); + const predicted = makePayload(0xAD); + const residual = computeResidual(actual, predicted); + const recovered = applyResidual(residual, predicted); + expect(arraysEqual(recovered, actual)).toBe(true); + }); + + it('throws RangeError when sizes differ', () => { + const a = new Uint8Array(16); + const b = new Uint8Array(15); + expect(() => computeResidual(a, b)).toThrow(RangeError); + }); + + it('throws RangeError when sizes are not 16', () => { + const a = new Uint8Array(8); + const b = new Uint8Array(8); + expect(() => computeResidual(a, b)).toThrow(RangeError); + }); +}); + +// --------------------------------------------------------------------------- +// 5. applyResidual +// --------------------------------------------------------------------------- + +describe('applyResidual', () => { + it('apply(compute(actual, pred), pred) === actual (round-trip)', () => { + const actual = new Uint8Array(16); + for (let i = 0; i < 16; i++) actual[i] = (i * 17 + 33) & 0xff; + + const predicted = new Uint8Array(16); + for (let i = 0; i < 16; i++) predicted[i] = (i * 7 + 11) & 0xff; + + const residual = computeResidual(actual, predicted); + const recovered = applyResidual(residual, predicted); + expect(arraysEqual(recovered, actual)).toBe(true); + }); + + it('throws RangeError when sizes differ', () => { + const a = new Uint8Array(16); + const b = new Uint8Array(8); + expect(() => applyResidual(a, b)).toThrow(RangeError); + }); + + it('throws RangeError when sizes are not 16', () => { + const a = new Uint8Array(10); + const b = new Uint8Array(10); + expect(() => applyResidual(a, b)).toThrow(RangeError); + }); +}); + +// --------------------------------------------------------------------------- +// 6. modelId values +// --------------------------------------------------------------------------- + +describe('modelId constants', () => { + it('NullPredictor modelId = 0x00', () => { + expect(new NullPredictor().modelId).toBe(0x00); + }); + + it('LastValuePredictor modelId = 0x01', () => { + expect(new LastValuePredictor().modelId).toBe(0x01); + }); + + it('LinearPredictor modelId = 0x02', () => { + expect(new LinearPredictor().modelId).toBe(0x02); + }); +}); diff --git a/packages/compression/src/tests/rans.test.ts b/packages/compression/src/tests/rans.test.ts new file mode 100644 index 000000000..556d193f0 --- /dev/null +++ b/packages/compression/src/tests/rans.test.ts @@ -0,0 +1,110 @@ +import { describe, it, expect } from 'vitest'; +import { buildFrequencyTable, ransEncode, ransDecode } from '../rans.js'; + +describe('rANS entropy coder', () => { + // --- FrequencyTable tests --- + + it('FrequencyTable: sums to 4096', () => { + const data = new Uint8Array([0, 1, 2, 3, 4, 5, 10, 20, 100, 200]); + const table = buildFrequencyTable(data); + const total = Array.from(table.freq).reduce((a, b) => a + b, 0); + expect(total).toBe(4096); + }); + + it('FrequencyTable: no zero frequencies after Laplace smoothing', () => { + // Even with all-zeros input, every symbol should have freq >= 1 after smoothing + const data = new Uint8Array(100); // all zeros + const table = buildFrequencyTable(data); + for (let i = 0; i < 256; i++) { + expect(table.freq[i]).toBeGreaterThanOrEqual(1); + } + }); + + it('FrequencyTable: cumFreq[256] == 4096', () => { + const data = new Uint8Array([0, 1, 2, 42, 255]); + const table = buildFrequencyTable(data); + expect(table.cumFreq[256]).toBe(4096); + }); + + // --- Roundtrip tests --- + + it('Roundtrip: all-zeros (length 100)', () => { + const symbols = new Uint8Array(100); // all zeros + const table = buildFrequencyTable(symbols); + const encoded = ransEncode(symbols, table); + const decoded = ransDecode(encoded, table, symbols.length); + expect(decoded).toEqual(symbols); + }); + + it('Roundtrip: all-same byte (0x42, length 50)', () => { + const symbols = new Uint8Array(50).fill(0x42); + const table = buildFrequencyTable(symbols); + const encoded = ransEncode(symbols, table); + const decoded = ransDecode(encoded, table, symbols.length); + expect(decoded).toEqual(symbols); + }); + + it('Roundtrip: random bytes (length 100)', () => { + // Deterministic "random" using lcg + const symbols = new Uint8Array(100); + let seed = 0xdeadbeef; + for (let i = 0; i < 100; i++) { + seed = (Math.imul(seed, 1664525) + 1013904223) >>> 0; + symbols[i] = seed & 0xff; + } + const table = buildFrequencyTable(symbols); + const encoded = ransEncode(symbols, table); + const decoded = ransDecode(encoded, table, symbols.length); + expect(decoded).toEqual(symbols); + }); + + it('Roundtrip: highly skewed (90% zeros, length 100)', () => { + const symbols = new Uint8Array(100); + // 10 random non-zero bytes scattered in + for (let i = 0; i < 10; i++) { + symbols[i * 10] = (i + 1) & 0xff; + } + const table = buildFrequencyTable(symbols); + const encoded = ransEncode(symbols, table); + const decoded = ransDecode(encoded, table, symbols.length); + expect(decoded).toEqual(symbols); + }); + + // --- Compression ratio test --- + + it('Size: zeros compress better than random bytes', () => { + const zeros = new Uint8Array(100); + const random = new Uint8Array(100); + let seed = 0xc0ffee; + for (let i = 0; i < 100; i++) { + seed = (Math.imul(seed, 1664525) + 1013904223) >>> 0; + random[i] = seed & 0xff; + } + + const tableZeros = buildFrequencyTable(zeros); + const tableRandom = buildFrequencyTable(random); + + const encodedZeros = ransEncode(zeros, tableZeros); + const encodedRandom = ransEncode(random, tableRandom); + + expect(encodedZeros.length).toBeLessThan(encodedRandom.length); + }); + + // --- Edge cases --- + + it('Edge: length=1 roundtrip', () => { + const symbols = new Uint8Array([0xab]); + const table = buildFrequencyTable(symbols); + const encoded = ransEncode(symbols, table); + const decoded = ransDecode(encoded, table, symbols.length); + expect(decoded).toEqual(symbols); + }); + + it('Edge: length=0 returns empty output', () => { + const symbols = new Uint8Array(0); + const table = buildFrequencyTable(new Uint8Array([0])); // need some data for table + const encoded = ransEncode(symbols, table); + const decoded = ransDecode(encoded, table, 0); + expect(decoded.length).toBe(0); + }); +}); diff --git a/packages/compression/src/tests/rle.test.ts b/packages/compression/src/tests/rle.test.ts new file mode 100644 index 000000000..1f15493bb --- /dev/null +++ b/packages/compression/src/tests/rle.test.ts @@ -0,0 +1,173 @@ +import { describe, it, expect } from 'vitest'; +import { encodePayloadTypes, decodePayloadTypes } from '../rle.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeTypes(pattern: Array<[number, number]>): Uint8Array { + const total = pattern.reduce((acc, [, n]) => acc + n, 0); + const arr = new Uint8Array(total); + let pos = 0; + for (const [type, count] of pattern) { + arr.fill(type, pos, pos + count); + pos += count; + } + return arr; +} + +function roundtrip(types: Uint8Array): Uint8Array { + const encoded = encodePayloadTypes(types); + return decodePayloadTypes(encoded, types.length); +} + +function arraysEqual(a: Uint8Array, b: Uint8Array): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) return false; + } + return true; +} + +// --------------------------------------------------------------------------- +// 1. Homogeneous batch: 100 × type 0x01 → exactly 2 bytes +// --------------------------------------------------------------------------- + +describe('encodePayloadTypes — homogeneous batch', () => { + it('100 × 0x01 encodes to exactly 2 bytes', () => { + const types = new Uint8Array(100).fill(0x01); + const encoded = encodePayloadTypes(types); + expect(encoded.length).toBe(2); + }); + + it('100 × 0x00 (public) encodes to exactly 2 bytes', () => { + const types = new Uint8Array(100).fill(0x00); + const encoded = encodePayloadTypes(types); + expect(encoded.length).toBe(2); + }); +}); + +// --------------------------------------------------------------------------- +// 2. Alternating types: worst case — [0x00, 0x01, ...] × 50 → 100 bytes +// --------------------------------------------------------------------------- + +describe('encodePayloadTypes — alternating types (worst case)', () => { + it('[0x00, 0x01] × 50 (100 elements) encodes to 200 bytes', () => { + const types = new Uint8Array(100); + for (let i = 0; i < 100; i++) { + types[i] = i % 2 === 0 ? 0x00 : 0x01; + } + const encoded = encodePayloadTypes(types); + // 100 runs of length 1 → each run = 1 type byte + 1 count varint byte = 2 bytes + // 100 runs × 2 bytes = 200 bytes (worst case: 2× expansion vs raw storage) + expect(encoded.length).toBe(200); + }); + + it('alternating roundtrip is correct', () => { + const types = new Uint8Array(100); + for (let i = 0; i < 100; i++) { + types[i] = i % 2 === 0 ? 0x00 : 0x01; + } + const decoded = roundtrip(types); + expect(arraysEqual(decoded, types)).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// 3. Mixed runs: [0x01 × 50, 0x02 × 30, 0x01 × 20] → 6 bytes (3 runs × 2 bytes) +// --------------------------------------------------------------------------- + +describe('encodePayloadTypes — mixed runs', () => { + it('[0x01×50, 0x02×30, 0x01×20] encodes to exactly 6 bytes', () => { + // All counts (50, 30, 20) are < 128 → each count varint = 1 byte + // 3 runs × (1 type byte + 1 varint byte) = 6 bytes + const types = makeTypes([[0x01, 50], [0x02, 30], [0x01, 20]]); + const encoded = encodePayloadTypes(types); + expect(encoded.length).toBe(6); + }); + + it('[0x01×50, 0x02×30, 0x01×20] roundtrip is correct', () => { + const types = makeTypes([[0x01, 50], [0x02, 30], [0x01, 20]]); + const decoded = roundtrip(types); + expect(arraysEqual(decoded, types)).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// 4. Single element → 2 bytes +// --------------------------------------------------------------------------- + +describe('encodePayloadTypes — single element', () => { + it('[0x03] encodes to 2 bytes', () => { + const types = new Uint8Array([0x03]); + const encoded = encodePayloadTypes(types); + expect(encoded.length).toBe(2); + }); + + it('[0x00] encodes to 2 bytes', () => { + const types = new Uint8Array([0x00]); + const encoded = encodePayloadTypes(types); + expect(encoded.length).toBe(2); + }); +}); + +// --------------------------------------------------------------------------- +// 5. Roundtrip correctness on all cases +// --------------------------------------------------------------------------- + +describe('encodePayloadTypes / decodePayloadTypes — roundtrip', () => { + it('homogeneous 100 × 0x01 roundtrip', () => { + const types = new Uint8Array(100).fill(0x01); + expect(arraysEqual(roundtrip(types), types)).toBe(true); + }); + + it('single element [0x02] roundtrip', () => { + const types = new Uint8Array([0x02]); + expect(arraysEqual(roundtrip(types), types)).toBe(true); + }); + + it('mixed runs [0x00×10, 0x01×5, 0x02×1, 0x03×20] roundtrip', () => { + const types = makeTypes([[0x00, 10], [0x01, 5], [0x02, 1], [0x03, 20]]); + expect(arraysEqual(roundtrip(types), types)).toBe(true); + }); + + it('all four type values in sequence roundtrip', () => { + const types = new Uint8Array([0x00, 0x01, 0x02, 0x03]); + expect(arraysEqual(roundtrip(types), types)).toBe(true); + }); + + it('large homogeneous batch of 1000 roundtrip', () => { + const types = new Uint8Array(1000).fill(0x00); + expect(arraysEqual(roundtrip(types), types)).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// 6. Empty array → throws +// --------------------------------------------------------------------------- + +describe('encodePayloadTypes — empty array', () => { + it('throws RangeError for empty Uint8Array', () => { + expect(() => encodePayloadTypes(new Uint8Array(0))).toThrow(RangeError); + }); +}); + +// --------------------------------------------------------------------------- +// 7. Count mismatch in decode → throws +// --------------------------------------------------------------------------- + +describe('decodePayloadTypes — count mismatch', () => { + it('throws RangeError when totalCount is too large', () => { + const types = new Uint8Array(10).fill(0x01); + const encoded = encodePayloadTypes(types); + // Claim there should be 20 types, but only 10 are encoded + expect(() => decodePayloadTypes(encoded, 20)).toThrow(RangeError); + }); + + it('throws RangeError when totalCount is too small', () => { + const types = new Uint8Array(10).fill(0x01); + const encoded = encodePayloadTypes(types); + // Claim there should be only 5 types, but 10 are encoded + expect(() => decodePayloadTypes(encoded, 5)).toThrow(RangeError); + }); +}); diff --git a/packages/compression/src/tests/sample-generator.test.ts b/packages/compression/src/tests/sample-generator.test.ts new file mode 100644 index 000000000..41f05c3c8 --- /dev/null +++ b/packages/compression/src/tests/sample-generator.test.ts @@ -0,0 +1,139 @@ +import { describe, it, expect } from 'vitest'; +import { generateSensorStream, generateTrainingSamples, SensorProfile } from '../sample-generator.js'; + +const PROFILES: SensorProfile[] = ['kulhadVoltage', 'temperature', 'gps', 'random']; + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +function readFloat32LE(buf: Uint8Array, offset: number): number { + const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength); + return view.getFloat32(offset, true); +} + +function sha256(data: Uint8Array): Promise { + return crypto.subtle.digest('SHA-256', data).then(b => new Uint8Array(b)); +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe('generateSensorStream', () => { + it('all profiles produce arrays of 153-byte DOTs', async () => { + for (const profile of PROFILES) { + const dots = await generateSensorStream({ count: 10, profile }); + expect(dots).toHaveLength(10); + for (const dot of dots) { + expect(dot).toBeInstanceOf(Uint8Array); + expect(dot.byteLength).toBe(153); + } + } + }); + + it('chain hash of dot[0] is 32 zero bytes (genesis)', async () => { + const dots = await generateSensorStream({ count: 5, profile: 'kulhadVoltage' }); + const chainHash0 = dots[0].slice(96, 128); + expect(chainHash0.every(b => b === 0)).toBe(true); + }); + + it('chain hash of dot[1] equals SHA-256(dot[0])', async () => { + const dots = await generateSensorStream({ count: 5, profile: 'kulhadVoltage' }); + const expected = await sha256(dots[0]); + const actual = dots[1].slice(96, 128); + expect(actual).toEqual(expected); + }); + + it('timestamps are monotonically increasing within a stream', async () => { + for (const profile of PROFILES) { + const dots = await generateSensorStream({ count: 10, profile }); + let prevTs = 0n; + for (const dot of dots) { + const view = new DataView(dot.buffer, dot.byteOffset, dot.byteLength); + const ts = view.getBigUint64(128, false); + expect(ts).toBeGreaterThan(prevTs); + prevTs = ts; + } + } + }); + + it('kulhadVoltage payloads are correlated — float32 values within 0.1V of each other', async () => { + const dots = await generateSensorStream({ count: 20, profile: 'kulhadVoltage' }); + const values = dots.map(d => readFloat32LE(d, 137)); + const min = Math.min(...values); + const max = Math.max(...values); + expect(max - min).toBeLessThan(0.1); + // all near 0.497V + for (const v of values) { + expect(Math.abs(v - 0.497)).toBeLessThan(0.05); + } + }); + + it('temperature payloads are correlated — float32 values within 5°C of each other', async () => { + const dots = await generateSensorStream({ count: 20, profile: 'temperature' }); + const values = dots.map(d => readFloat32LE(d, 137)); + const min = Math.min(...values); + const max = Math.max(...values); + expect(max - min).toBeLessThan(5); + // all near 20°C + for (const v of values) { + expect(Math.abs(v - 20)).toBeLessThan(3); + } + }); + + it('GPS payloads stay near Mumbai — lat ±0.02° and lon ±0.02°', async () => { + const dots = await generateSensorStream({ count: 20, profile: 'gps' }); + for (const dot of dots) { + const lat = readFloat32LE(dot, 137); + const lon = readFloat32LE(dot, 141); + expect(Math.abs(lat - 19.076)).toBeLessThan(0.02); + expect(Math.abs(lon - 72.877)).toBeLessThan(0.02); + } + }); + + it('random profile has entropy — not all payloads identical', async () => { + const dots = await generateSensorStream({ count: 10, profile: 'random' }); + const payloads = dots.map(d => Array.from(d.slice(137, 153)).join(',')); + const unique = new Set(payloads); + expect(unique.size).toBeGreaterThan(1); + }); + + it('accepts custom keypair and uses it', async () => { + const { createKeypair } = await import('@dotprotocol/core'); + const keypair = await createKeypair(); + const dots = await generateSensorStream({ count: 5, profile: 'temperature', keypair }); + // Public key at [0..31] should match the provided keypair + for (const dot of dots) { + expect(dot.slice(0, 32)).toEqual(keypair.publicKey); + } + }); + + it('respects startTimestamp option', async () => { + const startTs = 1_700_000_000_000n; + const dots = await generateSensorStream({ count: 3, profile: 'temperature', startTimestamp: startTs }); + const view = new DataView(dots[0].buffer, dots[0].byteOffset, dots[0].byteLength); + const ts0 = view.getBigUint64(128, false); + expect(ts0).toBeGreaterThanOrEqual(startTs); + }); +}); + +describe('generateTrainingSamples', () => { + it('returns arrays of Uint8Array(16) with correct count', async () => { + const samples = await generateTrainingSamples('kulhadVoltage', 50); + expect(samples).toHaveLength(50); + for (const s of samples) { + expect(s).toBeInstanceOf(Uint8Array); + expect(s.byteLength).toBe(16); + } + }); + + it('defaults to 100 samples when count omitted', async () => { + const samples = await generateTrainingSamples('temperature'); + expect(samples).toHaveLength(100); + }); + + it('extracts payload bytes (not full DOT)', async () => { + const samples = await generateTrainingSamples('kulhadVoltage', 10); + // Each sample should be exactly 16 bytes — payload column only + for (const s of samples) { + expect(s.byteLength).toBe(16); + } + }); +}); diff --git a/packages/compression/src/tests/timestamp-delta.test.ts b/packages/compression/src/tests/timestamp-delta.test.ts new file mode 100644 index 000000000..e4c76ac6c --- /dev/null +++ b/packages/compression/src/tests/timestamp-delta.test.ts @@ -0,0 +1,139 @@ +import { describe, it, expect } from 'vitest'; +import { encodeTimestampDeltas, decodeTimestampDeltas } from '../timestamp-delta.js'; + +// ─── helpers ────────────────────────────────────────────────────────────────── + +/** Generate N timestamps starting at `start` with a fixed step in ms. */ +function uniformTimestamps(count: number, startMs: bigint, stepMs: bigint): bigint[] { + return Array.from({ length: count }, (_, i) => startMs + BigInt(i) * stepMs); +} + +/** Assert exact roundtrip for a timestamp array. */ +function assertRoundtrip(ts: bigint[]): void { + const encoded = encodeTimestampDeltas(ts); + const decoded = decodeTimestampDeltas(encoded, ts.length); + expect(decoded.length).toBe(ts.length); + for (let i = 0; i < ts.length; i++) { + expect(decoded[i]).toBe(ts[i]); + } +} + +// ─── test suite ─────────────────────────────────────────────────────────────── + +const BASE = 1_700_000_000_000n; // Nov 2023 epoch anchor in ms + +describe('timestamp-delta encoder', () => { + // 1. 100ms intervals — delta=100 → zigzag(100)=200 → 2 varint bytes each + // (zigzag maps positive n to 2n, so 100→200 which needs 2 bytes) + // total = 8 + 99×2 = 206 bytes; assert ≤210 + it('100ms intervals: encodes 100 timestamps in ≤210 bytes', () => { + const ts = uniformTimestamps(100, BASE, 100n); + const encoded = encodeTimestampDeltas(ts); + // zigzag(100) = 200 → needs 2 varint bytes (128-16383 range) + // 8-byte anchor + 99 × 2-byte deltas = 206 bytes + expect(encoded.byteLength).toBeLessThanOrEqual(210); + // Verify it IS exactly 206 (not more) + expect(encoded.byteLength).toBe(206); + assertRoundtrip(ts); + }); + + // 2. 1s intervals — delta=1000 → zigzag(1000)=2000 → 2 varint bytes each + it('1s intervals: encodes 100 timestamps in ≤210 bytes', () => { + const ts = uniformTimestamps(100, BASE, 1000n); + const encoded = encodeTimestampDeltas(ts); + // zigzag(1000) = 2000 → 2 varint bytes (128-16383 range) + // 8-byte anchor + 99 × 2-byte deltas = 206 bytes + expect(encoded.byteLength).toBeLessThanOrEqual(210); + expect(encoded.byteLength).toBe(206); + assertRoundtrip(ts); + }); + + // 3. Irregular timestamps — random deltas 1–10,000,000ms + it('irregular timestamps: roundtrip is exact', () => { + const ts: bigint[] = [BASE]; + // Use a deterministic pseudo-random sequence (LCG) for reproducibility + let seed = 42; + for (let i = 1; i < 100; i++) { + seed = (seed * 1664525 + 1013904223) & 0x7fffffff; + const deltaMs = BigInt((seed % 10_000_000) + 1); + ts.push(ts[ts.length - 1]! + deltaMs); + } + assertRoundtrip(ts); + }); + + // 4. Zero delta — consecutive timestamps with same ms value + it('zero delta: consecutive identical timestamps roundtrip exactly', () => { + const ts: bigint[] = Array.from({ length: 10 }, () => BASE); + const encoded = encodeTimestampDeltas(ts); + // Each zero delta = 1 varint byte (zigzag(0)=0 → 1 byte) + expect(encoded.byteLength).toBe(8 + 9 * 1); // 8 anchor + 9 zero deltas + assertRoundtrip(ts); + }); + + // 5. Negative deltas — out-of-order timestamps + it('negative deltas: out-of-order timestamps roundtrip exactly', () => { + const ts: bigint[] = [ + BASE, + BASE - 500n, // -500ms + BASE - 1000n, // -500ms again + BASE + 200n, // +1200ms + BASE - 300n, // -500ms + ]; + assertRoundtrip(ts); + }); + + // 6. Size savings comparison vs raw uint64 + it('size savings: delta-encoded 100ms stream saves ≥60% vs raw uint64', () => { + const ts = uniformTimestamps(100, BASE, 100n); + const encoded = encodeTimestampDeltas(ts); + const rawSize = 100 * 8; // 800 bytes as raw uint64 + const savedFraction = (rawSize - encoded.byteLength) / rawSize; + expect(savedFraction).toBeGreaterThanOrEqual(0.6); + }); + + // 7. Single timestamp — edge case + it('single timestamp: roundtrips exactly', () => { + const ts = [BASE + 999n]; + const encoded = encodeTimestampDeltas(ts); + expect(encoded.byteLength).toBe(8); // just the anchor + assertRoundtrip(ts); + }); + + // 8. Empty array — should throw + it('empty array: throws RangeError', () => { + expect(() => encodeTimestampDeltas([])).toThrow(RangeError); + }); + + // 9. Roundtrip invariant — decodeTimestampDeltas(encodeTimestampDeltas(ts), ts.length) === ts + it('roundtrip invariant: holds for all test patterns', () => { + // Max safe delta for encodeSignedVarint: ±(MAX_SAFE_INTEGER / 2) ≈ ±4.5T ms ≈ ±143 years + // DOT timestamps are realistic epoch values — deltas stay well within this range + const cases: bigint[][] = [ + uniformTimestamps(1, BASE, 100n), + uniformTimestamps(50, BASE, 100n), + uniformTimestamps(50, BASE, 1000n), + uniformTimestamps(10, BASE, 0n), // all same (zero delta) + [BASE, BASE - 1n, BASE - 2n, BASE + 10n], // mixed neg/pos + [BASE, BASE + 1_000_000_000n, BASE + 1_000_000_001n], // large but safe deltas + ]; + + for (const ts of cases) { + assertRoundtrip(ts); + } + }); + + // Bonus: very large anchor timestamp (near 2^53 ms) with small deltas still encodes correctly + it('large timestamp anchor: encodes and decodes MAX_SAFE_INTEGER-scale timestamps', () => { + // 2^53 - 1 ms ≈ year 285,428 — anchor can be huge; deltas stay small + const huge = BigInt(Number.MAX_SAFE_INTEGER); + const ts = [huge, huge + 100n, huge + 200n, huge + 300n]; + assertRoundtrip(ts); + }); + + // Verify that decoding with wrong count throws on buffer exhaustion + it('decode: throws RangeError when count exceeds encoded data', () => { + const ts = uniformTimestamps(5, BASE, 100n); + const encoded = encodeTimestampDeltas(ts); + expect(() => decodeTimestampDeltas(encoded, 100)).toThrow(RangeError); + }); +}); diff --git a/packages/compression/src/tests/varint.test.ts b/packages/compression/src/tests/varint.test.ts new file mode 100644 index 000000000..6102501cd --- /dev/null +++ b/packages/compression/src/tests/varint.test.ts @@ -0,0 +1,228 @@ +import { describe, it, expect } from 'vitest'; +import { + encodeVarint, + decodeVarint, + encodeSignedVarint, + decodeSignedVarint, +} from '../varint.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function roundtripUnsigned(n: number): number { + const encoded = encodeVarint(n); + const [decoded] = decodeVarint(encoded, 0); + return decoded; +} + +function roundtripSigned(n: number): number { + const encoded = encodeSignedVarint(n); + const [decoded] = decodeSignedVarint(encoded, 0); + return decoded; +} + +// --------------------------------------------------------------------------- +// Unsigned roundtrip +// --------------------------------------------------------------------------- + +describe('encodeVarint / decodeVarint — roundtrip', () => { + const cases: number[] = [0, 1, 127, 128, 255, 256, 16383, 16384, 2 ** 32, 2 ** 48]; + + for (const n of cases) { + it(`roundtrips ${n}`, () => { + expect(roundtripUnsigned(n)).toBe(n); + }); + } +}); + +// --------------------------------------------------------------------------- +// Signed roundtrip (zigzag) +// --------------------------------------------------------------------------- + +describe('encodeSignedVarint / decodeSignedVarint — roundtrip', () => { + const cases: number[] = [0, -1, 1, -127, 127, -128, 128, -1000000, 1000000]; + + for (const n of cases) { + it(`roundtrips signed ${n}`, () => { + expect(roundtripSigned(n)).toBe(n); + }); + } +}); + +// --------------------------------------------------------------------------- +// Size assertions (unsigned) +// --------------------------------------------------------------------------- + +describe('encodeVarint — byte size guarantees', () => { + it('0 encodes to 1 byte', () => { + expect(encodeVarint(0).length).toBe(1); + }); + + it('127 encodes to 1 byte', () => { + expect(encodeVarint(127).length).toBe(1); + }); + + it('128 encodes to 2 bytes', () => { + expect(encodeVarint(128).length).toBe(2); + }); + + it('16383 encodes to 2 bytes', () => { + expect(encodeVarint(16383).length).toBe(2); + }); + + it('16384 encodes to 3 bytes', () => { + expect(encodeVarint(16384).length).toBe(3); + }); + + it('2097151 encodes to 3 bytes', () => { + expect(encodeVarint(2097151).length).toBe(3); + }); + + it('2097152 encodes to 4 bytes', () => { + expect(encodeVarint(2097152).length).toBe(4); + }); +}); + +// --------------------------------------------------------------------------- +// bytesConsumed is correct (mid-buffer offset test) +// --------------------------------------------------------------------------- + +describe('decodeVarint — bytesConsumed accuracy', () => { + it('correctly reports bytesConsumed for 1-byte varint at offset 3', () => { + // Build a buffer: [0, 0, 0, 42, 0, 0] + const buf = new Uint8Array([0, 0, 0, 42, 0, 0]); + const [value, consumed] = decodeVarint(buf, 3); + expect(value).toBe(42); + expect(consumed).toBe(1); + }); + + it('correctly reports bytesConsumed for 2-byte varint at offset 2', () => { + // Encode 300 = 0xAC 0x02 in varint + const encoded = encodeVarint(300); + expect(encoded.length).toBe(2); + // Embed at offset 2 inside a larger buffer + const buf = new Uint8Array(10); + buf.set(encoded, 2); + const [value, consumed] = decodeVarint(buf, 2); + expect(value).toBe(300); + expect(consumed).toBe(2); + }); + + it('correctly reports bytesConsumed for 3-byte varint at offset 1', () => { + const encoded = encodeVarint(16384); + expect(encoded.length).toBe(3); + const buf = new Uint8Array(10); + buf.set(encoded, 1); + const [value, consumed] = decodeVarint(buf, 1); + expect(value).toBe(16384); + expect(consumed).toBe(3); + }); + + it('can decode multiple consecutive varints from a single buffer', () => { + // Pack three varints: 1 (1B), 300 (2B), 16384 (3B) + const a = encodeVarint(1); + const b = encodeVarint(300); + const c = encodeVarint(16384); + const buf = new Uint8Array(a.length + b.length + c.length); + buf.set(a, 0); + buf.set(b, a.length); + buf.set(c, a.length + b.length); + + const [v1, c1] = decodeVarint(buf, 0); + const [v2, c2] = decodeVarint(buf, c1); + const [v3, c3] = decodeVarint(buf, c1 + c2); + + expect(v1).toBe(1); + expect(v2).toBe(300); + expect(v3).toBe(16384); + expect(c1).toBe(1); + expect(c2).toBe(2); + expect(c3).toBe(3); + }); +}); + +// --------------------------------------------------------------------------- +// encodeVarint — float input guard +// --------------------------------------------------------------------------- + +describe('encodeVarint — input validation', () => { + it('throws RangeError for float input (1.5)', () => { + expect(() => encodeVarint(1.5)).toThrow(RangeError); + }); +}); + +// --------------------------------------------------------------------------- +// encodeSignedVarint — signed overflow guard +// --------------------------------------------------------------------------- + +describe('encodeSignedVarint — range guard', () => { + it('throws RangeError when value exceeds MAX_SIGNED (Math.floor(MAX_SAFE_INTEGER / 2) + 1)', () => { + const tooLarge = Math.floor(Number.MAX_SAFE_INTEGER / 2) + 1; + expect(() => encodeSignedVarint(tooLarge)).toThrow(RangeError); + }); + + it('throws RangeError when value is below -MAX_SIGNED', () => { + const tooSmall = -(Math.floor(Number.MAX_SAFE_INTEGER / 2) + 1); + expect(() => encodeSignedVarint(tooSmall)).toThrow(RangeError); + }); +}); + +// --------------------------------------------------------------------------- +// Error on reading past end of buffer +// --------------------------------------------------------------------------- + +describe('decodeVarint — error on buffer overrun', () => { + it('throws RangeError when buffer is completely empty', () => { + expect(() => decodeVarint(new Uint8Array(0), 0)).toThrow(RangeError); + }); + + it('throws RangeError when offset equals buffer length', () => { + const buf = new Uint8Array([42]); + expect(() => decodeVarint(buf, 1)).toThrow(RangeError); + }); + + it('throws RangeError when a multi-byte varint is truncated', () => { + // 0x80 = continuation bit set but no following byte + const buf = new Uint8Array([0x80]); + expect(() => decodeVarint(buf, 0)).toThrow(RangeError); + }); + + it('throws RangeError when varint is truncated mid-sequence', () => { + // 300 needs 2 bytes; only provide 1 continuation byte + const partial = new Uint8Array([0xac]); // first byte of 300, continuation bit set + expect(() => decodeVarint(partial, 0)).toThrow(RangeError); + }); +}); + +// --------------------------------------------------------------------------- +// Signed varint — additional edge cases +// --------------------------------------------------------------------------- + +describe('encodeSignedVarint / decodeSignedVarint — edge cases', () => { + it('negative zero is treated as zero', () => { + expect(roundtripSigned(-0)).toBe(0); + }); + + it('signed 0 encodes to 1 byte (zigzag 0 → 0)', () => { + expect(encodeSignedVarint(0).length).toBe(1); + }); + + it('signed -1 encodes to 1 byte (zigzag -1 → 1)', () => { + expect(encodeSignedVarint(-1).length).toBe(1); + }); + + it('signed 1 encodes to 1 byte (zigzag 1 → 2)', () => { + expect(encodeSignedVarint(1).length).toBe(1); + }); + + it('signed -64 encodes to 1 byte (zigzag -64 → 127)', () => { + // zigzag(-64) = 64*2-1 = 127 → 1 byte + expect(encodeSignedVarint(-64).length).toBe(1); + }); + + it('signed 64 encodes to 2 bytes (zigzag 64 → 128)', () => { + // zigzag(64) = 128 → 2 bytes + expect(encodeSignedVarint(64).length).toBe(2); + }); +}); diff --git a/packages/compression/src/tests/weissman.test.ts b/packages/compression/src/tests/weissman.test.ts new file mode 100644 index 000000000..4727b70c8 --- /dev/null +++ b/packages/compression/src/tests/weissman.test.ts @@ -0,0 +1,66 @@ +import { describe, it, expect } from 'vitest'; +import { weissmanScore, WEISSMAN_PRESETS } from '../weissman.js'; + +describe('weissmanScore()', () => { + it('computes ratio correctly: 153B raw, 18B compressed, 106B gzip', () => { + // r_algo = 153/18 = 8.5, r_gzip = 153/106 ≈ 1.4434 + // W = 8.5 / 1.4434 ≈ 5.889 + const w = weissmanScore(153, 18, 106); + expect(w).toBeCloseTo(5.889, 2); + }); + + it('returns 1.0 when algorithm equals gzip (equal compression)', () => { + const w = weissmanScore(100, 100, 100, 1.0); + expect(w).toBe(1.0); + }); + + it('respects alpha scaling factor', () => { + const w1 = weissmanScore(100, 50, 100, 1.0); + const w2 = weissmanScore(100, 50, 100, 2.0); + expect(w2).toBeCloseTo(w1 * 2, 10); + }); + + it('default alpha is 1.0', () => { + const w = weissmanScore(200, 50, 100); + // r_algo = 200/50 = 4, r_gzip = 200/100 = 2, W = 4/2 = 2 + expect(w).toBeCloseTo(2.0, 10); + }); +}); + +describe('WEISSMAN_PRESETS', () => { + it('gzip preset has weissmanScore of exactly 1.0 (reference baseline)', () => { + expect(WEISSMAN_PRESETS.gzip.weissmanScore).toBe(1.0); + }); + + it('dotPhase2DictLarge has bytesPerDot < 5 (best measured level)', () => { + expect(WEISSMAN_PRESETS.dotPhase2DictLarge.bytesPerDot).toBeLessThan(5); + }); + + it('dotBLSBatch weissmanScore ≈ 2.459 (Phase 1.5 published result)', () => { + expect(WEISSMAN_PRESETS.dotBLSBatch.weissmanScore).toBeCloseTo(2.459, 2); + }); + + it('all presets have compressionRatio > 0', () => { + for (const [key, preset] of Object.entries(WEISSMAN_PRESETS)) { + expect(preset.compressionRatio, `${key}.compressionRatio`).toBeGreaterThan(0); + } + }); + + it('all presets have bytesPerDot > 0', () => { + for (const [key, preset] of Object.entries(WEISSMAN_PRESETS)) { + expect(preset.bytesPerDot, `${key}.bytesPerDot`).toBeGreaterThan(0); + } + }); + + it('weissmanScore function is exported and callable', () => { + expect(typeof weissmanScore).toBe('function'); + const result = weissmanScore(153, 18, 106); + expect(typeof result).toBe('number'); + expect(result).toBeGreaterThan(0); + }); + + it('Phase 2 dictionary preset outperforms plain preset', () => { + expect(WEISSMAN_PRESETS.dotPhase2Dict.weissmanScore) + .toBeGreaterThan(WEISSMAN_PRESETS.dotPhase2Plain.weissmanScore); + }); +}); diff --git a/packages/compression/src/tests/zstd.test.ts b/packages/compression/src/tests/zstd.test.ts new file mode 100644 index 000000000..0566e65df --- /dev/null +++ b/packages/compression/src/tests/zstd.test.ts @@ -0,0 +1,180 @@ +import { describe, it, expect } from 'vitest'; +import { trainDictionary, compressWithDictionary, decompressWithDictionary } from '../zstd.js'; + +// ─── DOT-like sample generators ─────────────────────────────────────────────── + +/** + * Generate a single DOT-like batch body. + * + * Mimics the column layout of a batch v2 body: + * - Timestamp column: varint-delta encoded (small deltas, predictable range) + * - Type column: RLE-encoded (mostly 0x00 = public) + * - Payload column: 16-byte payloads (float64 fields with small deltas) + * + * Using realistic structure improves dictionary quality vs random bytes. + */ +function generateDotLikeSample(dotCount = 50, tsBase = 1_700_000_000_000): Uint8Array { + // Timestamp deltas: varint-encoded, ~100ms intervals + const tsDeltas: number[] = []; + tsDeltas.push(tsBase & 0xff); // simplified — just push low byte as "base marker" + for (let i = 0; i < dotCount; i++) { + tsDeltas.push(100 + (i % 10)); // predictable 100-110ms deltas + } + + // Type column: mostly 0x00 (PUBLIC) + const types = new Uint8Array(dotCount); + for (let i = 0; i < dotCount; i++) { + types[i] = i % 20 === 0 ? 0x01 : 0x00; // 5% CIRCLE, 95% PUBLIC + } + + // Payload column: 16 bytes per DOT — simulate float64 values with small deltas + const payloads = new Uint8Array(dotCount * 16); + const view = new DataView(payloads.buffer); + let val = 42_000.0; + for (let i = 0; i < dotCount; i++) { + val += (Math.random() - 0.5) * 0.01; // tiny price tick + view.setFloat64(i * 16, val, false); + // bytes [8..15] = zero-padded (DOT payload structure) + } + + // Concatenate: [tsDeltas][types][payloads] + const tsBuf = new Uint8Array(tsDeltas); + const total = tsBuf.length + types.length + payloads.length; + const out = new Uint8Array(total); + let cursor = 0; + out.set(tsBuf, cursor); cursor += tsBuf.length; + out.set(types, cursor); cursor += types.length; + out.set(payloads, cursor); + return out; +} + +/** + * Generate N DOT-like batch bodies. + * Each sample varies dot count (40-60) and timestamp base slightly to add + * realistic variation without losing structural similarity — key for dict training. + */ +function generateDotLikeSamples(n: number): Uint8Array[] { + return Array.from({ length: n }, (_, i) => + generateDotLikeSample(40 + (i % 21), 1_700_000_000_000 + i * 60_000), + ); +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe('zstd dictionary compression', () => { + + it('trains dictionary without error from sample batch bodies', async () => { + const samples = generateDotLikeSamples(20); + const dict = await trainDictionary(samples); + + // Dictionary must be non-empty and not exceed requested size + expect(dict.length).toBeGreaterThan(0); + expect(dict.length).toBeLessThanOrEqual(32_768); + + // zstd dictionary magic number: 0xEC30A437 (little-endian) + const view = new DataView(dict.buffer, dict.byteOffset); + expect(view.getUint32(0, true)).toBe(0xEC30A437); + }, 60_000); + + it('compress + decompress roundtrip with dictionary', async () => { + const samples = generateDotLikeSamples(50); + const dict = await trainDictionary(samples); + + const data = generateDotLikeSample(); + const compressed = compressWithDictionary(data, dict); + const decompressed = decompressWithDictionary(compressed, dict); + + // Byte-exact roundtrip + expect(decompressed.length).toBe(data.length); + for (let i = 0; i < data.length; i++) { + if (decompressed[i] !== data[i]) { + throw new Error(`Byte mismatch at index ${i}: got ${decompressed[i]}, want ${data[i]}`); + } + } + }, 60_000); + + it('compression reduces size on DOT-like data', async () => { + const samples = generateDotLikeSamples(100); + const dict = await trainDictionary(samples); + const data = generateDotLikeSample(100); // larger sample for meaningful compression + + const compressed = compressWithDictionary(data, dict); + + // Dictionary-trained compression should not expand data significantly + // Allow 10% expansion worst case (small input can slightly expand with compression overhead) + expect(compressed.length).toBeLessThanOrEqual(data.length * 1.1); + + const ratio = data.length / compressed.length; + console.log( + ` DOT batch: ${data.length}B → ${compressed.length}B (${ratio.toFixed(2)}× compression)`, + ); + }, 60_000); + + it('compression ratio improves with more training samples', async () => { + const dataToCompress = generateDotLikeSample(200); + + // Train on small set (20 samples) + const dictSmall = await trainDictionary(generateDotLikeSamples(20)); + const compressedSmall = compressWithDictionary(dataToCompress, dictSmall); + + // Train on larger set (100 samples) + const dictLarge = await trainDictionary(generateDotLikeSamples(100)); + const compressedLarge = compressWithDictionary(dataToCompress, dictLarge); + + console.log( + ` Small dict (20 samples): ${dataToCompress.length}B → ${compressedSmall.length}B`, + ); + console.log( + ` Large dict (100 samples): ${dataToCompress.length}B → ${compressedLarge.length}B`, + ); + + // Both should produce valid compressed output (decompressible) + const roundtripped = decompressWithDictionary(compressedLarge, dictLarge); + expect(roundtripped.length).toBe(dataToCompress.length); + }, 60_000); + + it('decompression fails with wrong dictionary', async () => { + // Train two independent dictionaries from different sample sets + const dict1 = await trainDictionary(generateDotLikeSamples(20)); + const dict2 = await trainDictionary(generateDotLikeSamples(20)); + + const data = generateDotLikeSample(); + const compressed = compressWithDictionary(data, dict1); + + // Decompressing with the wrong dictionary must throw, not silently corrupt + // zstd embeds a 4-byte dictionary ID in the frame header and validates it + expect(() => decompressWithDictionary(compressed, dict2)).toThrow(); + }, 60_000); + + it('respects custom dictSize parameter', async () => { + const samples = generateDotLikeSamples(30); + + // Request a smaller dictionary (8 KB) + const dict8k = await trainDictionary(samples, 8_192); + expect(dict8k.length).toBeLessThanOrEqual(8_192); + expect(dict8k.length).toBeGreaterThan(0); + }, 60_000); + + it('throws on empty samples array', async () => { + await expect(trainDictionary([])).rejects.toThrow('must not be empty'); + }, 60_000); + + it('handles empty input to compressWithDictionary gracefully', async () => { + const dict = await trainDictionary(generateDotLikeSamples(20)); + const empty = new Uint8Array(0); + + let compressed: Uint8Array; + try { + compressed = compressWithDictionary(empty, dict); + } catch (err: unknown) { + // If the native binding rejects empty input, error must be a real Error, not a crash + expect(err).toBeInstanceOf(Error); + expect((err as Error).message.length).toBeGreaterThan(0); + return; + } + + // If compression succeeded, decompression must round-trip back to empty + const decompressed = decompressWithDictionary(compressed, dict); + expect(decompressed.length).toBe(0); + }, 60_000); +}); diff --git a/packages/compression/src/timestamp-delta.ts b/packages/compression/src/timestamp-delta.ts new file mode 100644 index 000000000..163d4cd73 --- /dev/null +++ b/packages/compression/src/timestamp-delta.ts @@ -0,0 +1,108 @@ +/** + * Timestamp delta encoding for DOT batch streams. + * + * Given N timestamps (Unix ms as bigint), encode as: + * - First timestamp: 8 bytes big-endian uint64 + * - Subsequent timestamps: signed varint delta from previous (in ms) + * + * For periodic sensors (100ms intervals), every delta = 100 → 1 varint byte. + * For 1s intervals, delta = 1000 → 2 varint bytes. + * For irregular sensors, delta varies but stays small → 2-3 bytes typically. + * + * Negative deltas are supported (out-of-order timestamps from sensors). + */ + +import { encodeSignedVarint, decodeSignedVarint } from './varint.js'; + +/** + * Encode array of bigint timestamps as first-value + signed-varint deltas. + * Returns a Uint8Array containing the encoded data. + * + * Layout: + * [0..7] — first timestamp as 8-byte big-endian uint64 + * [8..] — signed varint deltas for timestamps[1], timestamps[2], ... + * + * @throws RangeError if timestamps array is empty + */ +export function encodeTimestampDeltas(timestamps: bigint[]): Uint8Array { + if (timestamps.length === 0) { + throw new RangeError('encodeTimestampDeltas: timestamps array must not be empty'); + } + + // Collect all encoded pieces: first the 8-byte anchor, then varint deltas + const pieces: Uint8Array[] = []; + + // Write first timestamp as 8-byte big-endian uint64 + const anchor = new Uint8Array(8); + const view = new DataView(anchor.buffer); + // bigint → write high 32 bits and low 32 bits separately + const first = timestamps[0]!; + const high = Number(first >> 32n); + const low = Number(first & 0xffffffffn); + view.setUint32(0, high, false); // big-endian + view.setUint32(4, low, false); // big-endian + pieces.push(anchor); + + // Write each subsequent delta as a signed varint + for (let i = 1; i < timestamps.length; i++) { + const delta = Number(timestamps[i]! - timestamps[i - 1]!); + pieces.push(encodeSignedVarint(delta)); + } + + // Concatenate all pieces into a single buffer + const totalLength = pieces.reduce((sum, p) => sum + p.length, 0); + const result = new Uint8Array(totalLength); + let offset = 0; + for (const piece of pieces) { + result.set(piece, offset); + offset += piece.length; + } + + return result; +} + +/** + * Decode timestamp delta buffer back to array of bigint timestamps. + * + * @param buf - The encoded buffer (from encodeTimestampDeltas) + * @param count - Number of timestamps to decode + * @throws RangeError if buf is too short for count timestamps + */ +export function decodeTimestampDeltas(buf: Uint8Array, count: number): bigint[] { + if (count === 0) { + return []; + } + if (buf.length < 8) { + throw new RangeError( + `decodeTimestampDeltas: buffer too short (${buf.length} bytes) to decode even the anchor timestamp`, + ); + } + + const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength); + + // Read first timestamp as 8-byte big-endian uint64 + const high = view.getUint32(0, false); + const low = view.getUint32(4, false); + const first = (BigInt(high) << 32n) | BigInt(low); + + const timestamps: bigint[] = [first]; + + if (count === 1) { + return timestamps; + } + + // Decode varint deltas starting at byte 8 + let offset = 8; + for (let i = 1; i < count; i++) { + if (offset >= buf.length) { + throw new RangeError( + `decodeTimestampDeltas: unexpected end of buffer at offset ${offset} while decoding timestamp ${i} of ${count}`, + ); + } + const [delta, bytesConsumed] = decodeSignedVarint(buf, offset); + offset += bytesConsumed; + timestamps.push(timestamps[i - 1]! + BigInt(delta)); + } + + return timestamps; +} diff --git a/packages/compression/src/varint.ts b/packages/compression/src/varint.ts new file mode 100644 index 000000000..bbad3f79f --- /dev/null +++ b/packages/compression/src/varint.ts @@ -0,0 +1,133 @@ +/** + * Varint encoding — protobuf-style variable-length integers. + * + * Each byte uses 7 bits for data and bit 7 (MSB) as continuation flag. + * 0-127 → 1 byte + * 128-16383 → 2 bytes + * 16384-2097151 → 3 bytes + * etc. + * + * Signed integers use zigzag encoding: + * 0 → 0, -1 → 1, 1 → 2, -2 → 3, 2 → 4, ... + * This maps small negative numbers to small positive numbers, + * making them encode efficiently. + */ + +/** + * Encode unsigned integer as varint. + * Max safe value: 2^53 - 1 (Number.MAX_SAFE_INTEGER). + */ +export function encodeVarint(value: number): Uint8Array { + if (value < 0 || !Number.isFinite(value) || !Number.isInteger(value)) { + throw new RangeError(`encodeVarint: value must be a non-negative finite integer, got ${value}`); + } + + // Fast path: single byte + if (value < 128) { + return new Uint8Array([value]); + } + + // Compute bytes needed and encode + const bytes: number[] = []; + let remaining = value; + + while (remaining >= 128) { + // Take low 7 bits and set continuation bit + bytes.push((remaining & 0x7f) | 0x80); + // Shift right 7 bits. For values > 2^31 we can't use bitwise >> + // because JS bitwise ops truncate to signed 32-bit. Use division. + remaining = Math.floor(remaining / 128); + } + // Last byte has no continuation bit + bytes.push(remaining & 0x7f); + + return new Uint8Array(bytes); +} + +/** + * Decode varint from buffer at offset. + * Returns [value, bytesConsumed]. + * Throws if the varint extends past the end of the buffer. + */ +export function decodeVarint(buf: Uint8Array, offset: number): [number, number] { + let value = 0; + let shift = 0; + let bytesConsumed = 0; + + while (true) { + const pos = offset + bytesConsumed; + if (pos >= buf.length) { + throw new RangeError( + `decodeVarint: unexpected end of buffer at offset ${pos} (buf.length=${buf.length})`, + ); + } + + const byte = buf[pos]!; + bytesConsumed++; + + // For shifts >= 32 we can't use bitwise << (truncates to 32-bit signed). + // Use multiplication by powers of 2 instead. + const contribution = (byte & 0x7f) * Math.pow(2, shift); + value += contribution; + shift += 7; + + // No continuation bit — done + if ((byte & 0x80) === 0) { + break; + } + + // Guard against impossibly long varints (>= 8 bytes for 53-bit values) + if (shift >= 56) { + throw new RangeError(`decodeVarint: varint too long (shift=${shift}), possible data corruption`); + } + } + + return [value, bytesConsumed]; +} + +/** + * Zigzag-encode a signed integer to an unsigned integer. + * Maps small negatives to small positives: + * 0 → 0, -1 → 1, 1 → 2, -2 → 3, 2 → 4, ... + */ +function zigzagEncode(n: number): number { + // For safe integer range we can use: n >= 0 ? n * 2 : (-n * 2) - 1 + return n >= 0 ? n * 2 : (-n) * 2 - 1; +} + +/** + * Zigzag-decode an unsigned integer back to a signed integer. + */ +function zigzagDecode(n: number): number { + // Even → positive: n >>> 1 + // Odd → negative: -((n + 1) >>> 1) + // For values > 2^31 we can't use bitwise >>> safely. Use Math.floor. + if ((n & 1) === 0) { + return n / 2; + } else { + return -((n + 1) / 2); + } +} + +/** + * Encode signed integer using zigzag encoding, then varint. + */ +export function encodeSignedVarint(value: number): Uint8Array { + if (!Number.isInteger(value) || !Number.isFinite(value)) { + throw new RangeError(`encodeSignedVarint: value must be a finite integer, got ${value}`); + } + const MAX_SIGNED = Math.floor(Number.MAX_SAFE_INTEGER / 2); + if (Math.abs(value) > MAX_SIGNED) { + throw new RangeError(`encodeSignedVarint: value ${value} exceeds safe range (±${MAX_SIGNED})`); + } + return encodeVarint(zigzagEncode(value)); +} + +/** + * Decode zigzag-encoded signed varint from buffer at offset. + * Returns [value, bytesConsumed]. + */ +export function decodeSignedVarint(buf: Uint8Array, offset: number): [number, number] { + const [unsigned, bytesConsumed] = decodeVarint(buf, offset); + return [zigzagDecode(unsigned), bytesConsumed]; +} diff --git a/packages/compression/src/weissman.ts b/packages/compression/src/weissman.ts new file mode 100644 index 000000000..26a306aaf --- /dev/null +++ b/packages/compression/src/weissman.ts @@ -0,0 +1,100 @@ +/** + * Weissman Score calculator for DOT compression benchmarking. + * + * W = α × r_algorithm / r_gzip + * + * α = 1.0 (normalization constant) + * r = compression ratio (raw_bytes / compressed_bytes) + * + * Reference: Weissman et al., "A Mathematical Theory of Gauss" (2013) + * Used in the TV show "Silicon Valley" to compare compression algorithms. + */ + +/** Compute Weissman Score for a compression algorithm vs gzip baseline. */ +export function weissmanScore( + rawBytes: number, + compressedBytes: number, + gzipBytes: number, + alpha: number = 1.0, +): number { + const r_algo = rawBytes / compressedBytes; + const r_gzip = rawBytes / gzipBytes; + return alpha * r_algo / r_gzip; +} + +/** + * Preset compression configurations with measured performance numbers. + * All measurements on DOT sensor streams (kulhad voltage, N=1000 unless noted). + * Raw DOT = 153 bytes. + */ +export interface WeissmanPreset { + name: string; + description: string; + /** Compression ratio vs raw (raw_bytes / compressed_bytes). */ + compressionRatio: number; + /** Weissman Score vs gzip. */ + weissmanScore: number; + /** bytes/DOT. */ + bytesPerDot: number; + /** N DOTs per measurement (smaller N = less accurate). */ + n: number; +} + +export const WEISSMAN_PRESETS: Record = { + raw: { + name: 'Raw DOT', + description: '153-byte wire format, no compression', + compressionRatio: 1.0, + weissmanScore: 0.694, // 1 / (gzip ratio 1.44) + bytesPerDot: 153.0, + n: 1, + }, + gzip: { + name: 'gzip (reference)', + description: 'gzip on raw DOT bytes (cannot exploit domain structure)', + compressionRatio: 1.44, + weissmanScore: 1.0, // reference = 1.0 + bytesPerDot: 106.25, + n: 1000, + }, + dotBLSBatch: { + name: 'DOT BLS Batch (Phase 1.5)', + description: 'BLS aggregate signature + batch header. 90 tests. Published.', + compressionRatio: 8.5, + weissmanScore: 2.459, + bytesPerDot: 18.0, + n: 100, + }, + dotPhase2Plain: { + name: 'DOT Phase 2 Plain (v2 column layout)', + description: 'Column-oriented batch: timestamps delta + type RLE. No dictionary.', + compressionRatio: 8.1, + weissmanScore: 5.63, // 8.1 / 1.44 + bytesPerDot: 18.9, + n: 100, + }, + dotPhase2Dict: { + name: 'DOT Phase 2 + Dictionary (zstd)', + description: 'Column layout + zstd dictionary trained on sensor stream bodies.', + compressionRatio: 19.8, + weissmanScore: 13.75, // 19.8 / 1.44 + bytesPerDot: 7.7, + n: 100, + }, + dotPhase2DictLarge: { + name: 'DOT Phase 2 + Dictionary (N=1000)', + description: 'Column layout + zstd dictionary, 1000-DOT batch from train-dictionary.ts script.', + compressionRatio: 42.0, + weissmanScore: 29.17, // 42 / 1.44 + bytesPerDot: 3.64, + n: 1000, + }, + dotPhase2Predict: { + name: 'DOT Phase 2 + Prediction + rANS', + description: 'Column layout + LinearPredictor XOR residuals + rANS entropy coding.', + compressionRatio: 17.1, + weissmanScore: 11.88, // 17.1 / 1.44 + bytesPerDot: 8.95, + n: 100, + }, +}; diff --git a/packages/compression/src/zstd.ts b/packages/compression/src/zstd.ts new file mode 100644 index 000000000..dfe62e9fd --- /dev/null +++ b/packages/compression/src/zstd.ts @@ -0,0 +1,147 @@ +/** + * Zstd dictionary compression for DOT batch bodies. + * + * A zstd dictionary trained on sample DOT batch bodies dramatically reduces + * compressed size because DOT data has highly regular structure: float64 payloads + * with small deltas, 0x00 type bytes, varint-encoded timestamps in a predictable range. + * + * Dictionary approach: + * - Train once on 100+ sample batch bodies from the target domain + * - Ship dictionary alongside the application (or as a DOT on a registry chain) + * - Compress each new batch body with the dictionary + * - Receiver must have the same dictionary (identified by 32-byte hash) + * + * Implementation: + * - Training: shells out to `zstd --train` CLI (v1.5+, must be on PATH) + * - Compress/Decompress: uses `zstd-napi` native Node.js bindings with + * `Compressor.loadDictionary()` / `Decompressor.loadDictionary()` + * + * Why CLI for training? + * `zstd-napi` exposes compress/decompress with dictionary support but does NOT + * expose `ZDICT_trainFromBuffer`. The zstd CLI bundles the training algorithm and + * is available on macOS via Homebrew (`brew install zstd`) and most Linux distros. + */ + +import { execFileSync } from 'node:child_process'; +import { mkdirSync, mkdtempSync, writeFileSync, readFileSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { Compressor, Decompressor } from 'zstd-napi'; + +// ─── Constants ──────────────────────────────────────────────────────────────── + +/** Default target dictionary size: 32 KB. */ +const DEFAULT_DICT_SIZE = 32_768; + +/** Minimum recommended samples for a meaningful dictionary. */ +const MIN_SAMPLES_RECOMMENDED = 10; + +// ─── Public API ─────────────────────────────────────────────────────────────── + +/** + * Train a zstd dictionary from sample DOT batch bodies. + * + * Each sample should be a serialized batch v2 body (timestamps + types + payloads). + * Returns the trained dictionary as a Uint8Array (target: 32 KB). + * + * Shells out to `zstd --train` — requires the zstd CLI on PATH (v1.5+). + * Install via: `brew install zstd` (macOS) or `apt install zstd` (Debian/Ubuntu). + * + * @param samples - Array of sample batch bodies. At least 10 recommended, 100+ for quality. + * @param dictSize - Target dictionary size in bytes (default: 32768 = 32 KB). + * @returns Trained dictionary as Uint8Array. + * @throws If the zstd CLI is not found on PATH or training fails. + */ +export async function trainDictionary( + samples: Uint8Array[], + dictSize: number = DEFAULT_DICT_SIZE, +): Promise { + if (samples.length === 0) { + throw new RangeError('trainDictionary: samples array must not be empty'); + } + if (samples.length < MIN_SAMPLES_RECOMMENDED) { + // Warn but don't throw — some tests use fewer samples intentionally + console.warn( + `trainDictionary: ${samples.length} samples provided; ` + + `${MIN_SAMPLES_RECOMMENDED}+ recommended for a quality dictionary`, + ); + } + + // Create a temp directory, write each sample as a numbered file + const tmpDir = mkdtempSync(join(tmpdir(), 'dot-zstd-train-')); + const dictPath = join(tmpDir, 'dict.zstd'); + + try { + const sampleDir = join(tmpDir, 'samples'); + mkdirSync(sampleDir); + + for (let i = 0; i < samples.length; i++) { + const samplePath = join(sampleDir, `sample_${String(i).padStart(6, '0')}.bin`); + writeFileSync(samplePath, samples[i]!); + } + + // zstd --train /* -o --maxdict + // Using glob expansion via shell would be fragile; pass directory instead. + // `zstd --train` with a directory glob is done by listing files explicitly. + const samplePaths = samples.map((_, i) => + join(sampleDir, `sample_${String(i).padStart(6, '0')}.bin`), + ); + + try { + execFileSync('zstd', ['--train', ...samplePaths, '-o', dictPath, '--maxdict', String(dictSize)], { + stdio: ['ignore', 'ignore', 'pipe'], + }); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + if (msg.includes('ENOENT')) { + throw new Error( + 'zstd CLI not found. Install: brew install zstd (macOS) or apt install zstd (Linux). ' + msg, + ); + } + throw err; + } + + const dictBytes = readFileSync(dictPath); + return new Uint8Array(dictBytes.buffer, dictBytes.byteOffset, dictBytes.byteLength); + } finally { + // Clean up temp directory unconditionally + rmSync(tmpDir, { recursive: true, force: true }); + } +} + +/** + * Compress data using a trained zstd dictionary. + * + * The dictionary must have been produced by {@link trainDictionary}. Both the + * compressor and decompressor must use the same dictionary — zstd embeds a + * dictionary ID in the compressed frame so mismatches are detected at + * decompression time. + * + * @param data - Raw bytes to compress (e.g. a batch v2 body). + * @param dictionary - Trained zstd dictionary from {@link trainDictionary}. + * @returns Compressed bytes. + */ +export function compressWithDictionary(data: Uint8Array, dictionary: Uint8Array): Uint8Array { + const cmp = new Compressor(); + cmp.loadDictionary(dictionary); + const result = cmp.compress(data); + return new Uint8Array(result.buffer, result.byteOffset, result.byteLength); +} + +/** + * Decompress data using the same dictionary that was used for compression. + * + * Throws if the dictionary does not match the one used during compression + * (zstd detects mismatches via the embedded dictionary ID in the frame). + * + * @param compressed - Compressed bytes from {@link compressWithDictionary}. + * @param dictionary - The same trained zstd dictionary used to compress. + * @returns Original uncompressed bytes. + * @throws If the dictionary ID in the frame does not match, or the data is corrupt. + */ +export function decompressWithDictionary(compressed: Uint8Array, dictionary: Uint8Array): Uint8Array { + const dec = new Decompressor(); + dec.loadDictionary(dictionary); + const result = dec.decompress(compressed); + return new Uint8Array(result.buffer, result.byteOffset, result.byteLength); +} diff --git a/packages/compression/tsconfig.json b/packages/compression/tsconfig.json new file mode 100644 index 000000000..dace867e4 --- /dev/null +++ b/packages/compression/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src", + "types": ["node", "vitest/globals"] + }, + "include": ["src"] +} diff --git a/packages/compression/tsdown.config.ts b/packages/compression/tsdown.config.ts new file mode 100644 index 000000000..aa536bdcb --- /dev/null +++ b/packages/compression/tsdown.config.ts @@ -0,0 +1,11 @@ +import { defineConfig } from 'tsdown' + +export default defineConfig({ + entry: ['src/index.ts'], + format: ['esm', 'cjs'], + dts: true, + clean: true, + outDir: 'dist', + platform: 'neutral', + sourcemap: true, +}) diff --git a/packages/compression/vitest.config.ts b/packages/compression/vitest.config.ts new file mode 100644 index 000000000..8f7148c8e --- /dev/null +++ b/packages/compression/vitest.config.ts @@ -0,0 +1,17 @@ +import { defineConfig } from 'vitest/config'; +import { resolve } from 'node:path'; + +export default defineConfig({ + resolve: { + alias: { + '@dotprotocol/core': resolve(__dirname, '../core/src/index.ts'), + }, + }, + test: { + environment: 'node', + coverage: { + provider: 'v8', + exclude: ['scripts/**', 'src/tests/**', '**/*.d.ts', '**/*.config.*'], + }, + }, +}); diff --git a/packages/core/LICENSE b/packages/core/LICENSE new file mode 100644 index 000000000..cdcd7ef79 --- /dev/null +++ b/packages/core/LICENSE @@ -0,0 +1,189 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work. + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to the Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by the Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding any notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2026 DOT Protocol Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/packages/core/package.json b/packages/core/package.json index daa33c618..374e5936a 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -30,5 +30,14 @@ "files": [ "dist" ], - "license": "Apache-2.0" + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/dot-protocol/dot.git", + "directory": "packages/core" + }, + "homepage": "https://github.com/dot-protocol/dot/tree/main/packages/core#readme", + "bugs": { + "url": "https://github.com/dot-protocol/dot/issues" + } } diff --git a/packages/identity/.npmignore b/packages/identity/.npmignore new file mode 100644 index 000000000..91b700e0b --- /dev/null +++ b/packages/identity/.npmignore @@ -0,0 +1,4 @@ +src/tests/ +src/**/*.test.ts +coverage/ +*.tsbuildinfo diff --git a/packages/identity/LICENSE b/packages/identity/LICENSE new file mode 100644 index 000000000..d90201772 --- /dev/null +++ b/packages/identity/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 DOT Protocol contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/identity/README.md b/packages/identity/README.md new file mode 100644 index 000000000..6507eb589 --- /dev/null +++ b/packages/identity/README.md @@ -0,0 +1,88 @@ +# @dotprotocol/identity + +Persistent keypair + DID for DOT Protocol. Your key, your identity. + +[![npm](https://img.shields.io/npm/v/@dotprotocol/identity)](https://www.npmjs.com/package/@dotprotocol/identity) + +## Install + +```bash +npm install @dotprotocol/identity +``` + +## Quick start + +```js +import { loadOrCreate, generateDID } from '@dotprotocol/identity'; + +// Load existing keypair or create a new one +const keypair = await loadOrCreate('./my-identity.json'); + +// Derive a DID from the public key +const did = generateDID(keypair.pubkey); +// "did:dot:abc123def456..." +``` + +## API + +### `loadOrCreate(path, options?)` + +Load keypair from file. If the file doesn't exist, generate and save a new one. + +```js +const keypair = await loadOrCreate('./identity.json'); +// { pubkey: Uint8Array(32), privkey: Uint8Array(64) } +``` + +### `loadKeypair(path)` + +Load keypair from file. Throws if not found. + +```js +const keypair = await loadKeypair('./identity.json'); +``` + +### `saveKeypair(keypair, path)` + +Save keypair to file. + +```js +await saveKeypair(keypair, './identity.json'); +``` + +### `generateDID(pubkey)` + +Derive a `did:dot:` DID from an Ed25519 public key. + +```js +const did = generateDID(keypair.pubkey); +// "did:dot:z6Mk..." (base58btc-encoded) +``` + +## Identity file format + +```json +{ + "pubkey": "hex-encoded-32-bytes", + "privkey": "hex-encoded-64-bytes", + "created": 1709000000000 +} +``` + +Store this file securely. Anyone with the `privkey` can sign DOTs as you. + +## Key backup + +Your keypair IS your identity. There is no recovery service. Back it up: + +```bash +# Backup +cp identity.json identity.backup.json + +# Or export as hex +node -e "const k = require('./identity.json'); console.log(k.privkey)" +``` + +## License + +MIT diff --git a/packages/identity/package.json b/packages/identity/package.json new file mode 100644 index 000000000..53ba18dc1 --- /dev/null +++ b/packages/identity/package.json @@ -0,0 +1,41 @@ +{ + "name": "@dotprotocol/identity", + "version": "0.3.0", + "description": "DOT Protocol identity \u2014 keypair + genesis DOT + export/import.", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "require": "./dist/index.cjs", + "types": "./dist/index.d.ts" + } + }, + "sideEffects": false, + "scripts": { + "build": "tsdown", + "test": "vitest run", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@dotprotocol/core": "workspace:*" + }, + "devDependencies": {}, + "license": "MIT", + "files": [ + "dist", + "README.md", + "LICENSE" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/dot-protocol/dot.git", + "directory": "packages/identity" + }, + "homepage": "https://github.com/dot-protocol/dot/tree/main/packages/identity#readme", + "bugs": { + "url": "https://github.com/dot-protocol/dot/issues" + } +} diff --git a/packages/identity/src/identity.ts b/packages/identity/src/identity.ts new file mode 100644 index 000000000..f7c315359 --- /dev/null +++ b/packages/identity/src/identity.ts @@ -0,0 +1,63 @@ +import { createKeypair, createDOT, DotType, toBytes, fromBytes } from '@dotprotocol/core'; +import type { DOT, Keypair } from '@dotprotocol/core'; + +export interface Identity { + keypair: Keypair; + genesisDOT: DOT; + createdAt: number; +} + +export interface ExportedIdentity { + publicKey: string; // hex + privateKey: string; // hex — store securely + genesisDOT: string; // hex (153 bytes) + createdAt: number; +} + +/** Create a new identity with a random keypair and genesis DOT. */ +export async function createIdentity(seed?: Uint8Array): Promise { + const keypair = await createKeypair(seed); + const createdAt = Date.now(); + const genesisDOT = await createDOT({ + keypair, + type: DotType.PUBLIC, + ts: createdAt, + }); + return { keypair, genesisDOT, createdAt }; +} + +function toHex(bytes: Uint8Array): string { + return Array.from(bytes).map(b => b.toString(16).padStart(2, '0')).join(''); +} + +function fromHex(hex: string): Uint8Array { + const bytes = new Uint8Array(hex.length / 2); + for (let i = 0; i < hex.length; i += 2) { + bytes[i / 2] = parseInt(hex.slice(i, i + 2), 16); + } + return bytes; +} + +/** + * Export identity as hex strings for storage. + * WARNING: The returned object contains the raw private key as a hex string. + * Never log, transmit, or persist this to an untrusted location. + */ +export function exportIdentity(id: Identity): ExportedIdentity { + return { + publicKey: toHex(id.keypair.publicKey), + privateKey: toHex(id.keypair.privateKey), + genesisDOT: toHex(toBytes(id.genesisDOT)), + createdAt: id.createdAt, + }; +} + +/** Import identity from exported hex strings. */ +export async function importIdentity(exported: ExportedIdentity): Promise { + const keypair: Keypair = { + publicKey: fromHex(exported.publicKey), + privateKey: fromHex(exported.privateKey), + }; + const genesisDOT = fromBytes(fromHex(exported.genesisDOT)); + return { keypair, genesisDOT, createdAt: exported.createdAt }; +} diff --git a/packages/identity/src/index.ts b/packages/identity/src/index.ts new file mode 100644 index 000000000..c60ef51ea --- /dev/null +++ b/packages/identity/src/index.ts @@ -0,0 +1,2 @@ +export { createIdentity, exportIdentity, importIdentity } from './identity.js'; +export type { Identity, ExportedIdentity } from './identity.js'; diff --git a/packages/identity/src/tests/identity.test.ts b/packages/identity/src/tests/identity.test.ts new file mode 100644 index 000000000..3ebb13156 --- /dev/null +++ b/packages/identity/src/tests/identity.test.ts @@ -0,0 +1,62 @@ +import { describe, it, expect } from 'vitest'; +import { createIdentity, exportIdentity, importIdentity } from '../index.js'; + +describe('createIdentity', () => { + it('returns a keypair with 32-byte public and private keys', async () => { + const id = await createIdentity(); + expect(id.keypair.publicKey.length).toBe(32); + expect(id.keypair.privateKey.length).toBe(32); + }); + + it('returns a genesis DOT (exactly 153 bytes)', async () => { + const { toBytes } = await import('@dotprotocol/core'); + const id = await createIdentity(); + expect(toBytes(id.genesisDOT).length).toBe(153); + }); + + it('genesis DOT is signed by identity keypair', async () => { + const { verifyDOT } = await import('@dotprotocol/core'); + const id = await createIdentity(); + expect(await verifyDOT(id.genesisDOT)).toBe(true); + }); + + it('two identities have different public keys', async () => { + const a = await createIdentity(); + const b = await createIdentity(); + expect(a.keypair.publicKey).not.toEqual(b.keypair.publicKey); + }); + + it('deterministic with seed', async () => { + const seed = new Uint8Array(32).fill(0x42); + const a = await createIdentity(seed); + const b = await createIdentity(seed); + expect(a.keypair.publicKey).toEqual(b.keypair.publicKey); + }); +}); + +describe('export/import roundtrip', () => { + it('exported identity reimports to same keypair', async () => { + const id = await createIdentity(); + const exported = exportIdentity(id); + const imported = await importIdentity(exported); + expect(imported.keypair.publicKey).toEqual(id.keypair.publicKey); + expect(imported.keypair.privateKey).toEqual(id.keypair.privateKey); + }); + + it('exported identity is plain JSON-serializable object', async () => { + const id = await createIdentity(); + const exported = exportIdentity(id); + const json = JSON.stringify(exported); + const parsed = JSON.parse(json); + expect(parsed.publicKey).toBe(exported.publicKey); + expect(parsed.privateKey).toBe(exported.privateKey); + expect(parsed.genesisDOT).toBe(exported.genesisDOT); + }); + + it('reimported genesis DOT still verifies', async () => { + const { verifyDOT } = await import('@dotprotocol/core'); + const id = await createIdentity(); + const imported = await importIdentity(exportIdentity(id)); + expect(await verifyDOT(imported.genesisDOT)).toBe(true); + }); +}); diff --git a/packages/identity/tsconfig.json b/packages/identity/tsconfig.json new file mode 100644 index 000000000..792172fb8 --- /dev/null +++ b/packages/identity/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src"] +} diff --git a/packages/identity/tsdown.config.ts b/packages/identity/tsdown.config.ts new file mode 100644 index 000000000..aa536bdcb --- /dev/null +++ b/packages/identity/tsdown.config.ts @@ -0,0 +1,11 @@ +import { defineConfig } from 'tsdown' + +export default defineConfig({ + entry: ['src/index.ts'], + format: ['esm', 'cjs'], + dts: true, + clean: true, + outDir: 'dist', + platform: 'neutral', + sourcemap: true, +}) diff --git a/packages/lang/LICENSE b/packages/lang/LICENSE new file mode 100644 index 000000000..cdcd7ef79 --- /dev/null +++ b/packages/lang/LICENSE @@ -0,0 +1,189 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work. + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to the Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by the Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding any notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2026 DOT Protocol Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/packages/lang/package.json b/packages/lang/package.json index 420b8d249..e3a882e9e 100644 --- a/packages/lang/package.json +++ b/packages/lang/package.json @@ -22,5 +22,14 @@ "typescript": "^5.8.0", "vitest": "^3.1.0" }, - "license": "Apache-2.0" + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/dot-protocol/dot.git", + "directory": "packages/lang" + }, + "homepage": "https://github.com/dot-protocol/dot/tree/main/packages/lang#readme", + "bugs": { + "url": "https://github.com/dot-protocol/dot/issues" + } } diff --git a/packages/mesh/LICENSE b/packages/mesh/LICENSE new file mode 100644 index 000000000..cdcd7ef79 --- /dev/null +++ b/packages/mesh/LICENSE @@ -0,0 +1,189 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work. + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to the Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by the Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding any notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2026 DOT Protocol Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/packages/mesh/package.json b/packages/mesh/package.json index 06390ccaf..a91e8aed7 100644 --- a/packages/mesh/package.json +++ b/packages/mesh/package.json @@ -25,5 +25,14 @@ "typescript": "^5.8.0", "vitest": "^3.1.0" }, - "license": "Apache-2.0" + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/dot-protocol/dot.git", + "directory": "packages/mesh" + }, + "homepage": "https://github.com/dot-protocol/dot/tree/main/packages/mesh#readme", + "bugs": { + "url": "https://github.com/dot-protocol/dot/issues" + } } diff --git a/packages/qr/LICENSE b/packages/qr/LICENSE new file mode 100644 index 000000000..d90201772 --- /dev/null +++ b/packages/qr/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 DOT Protocol contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/qr/README.md b/packages/qr/README.md new file mode 100644 index 000000000..b7ae23c2b --- /dev/null +++ b/packages/qr/README.md @@ -0,0 +1,118 @@ +# @dotprotocol/qr + +Physical DOT — encode DOTs into QR codes and PNG steganography. The Falooda Protocol. + +[![npm](https://img.shields.io/npm/v/@dotprotocol/qr)](https://www.npmjs.com/package/@dotprotocol/qr) + +## Install + +```bash +npm install @dotprotocol/qr +``` + +## Quick start + +```js +import { encodeBinary, decodeBinary, selectQRSpec } from '@dotprotocol/qr'; +import QRCode from 'qrcode'; // any QR library + +// Pack up to 19 DOTs into a single QR code +const dots = [dot1, dot2, dot3]; +const spec = selectQRSpec(dots.length, 'binary'); +const bytes = encodeBinary(dots, spec); + +// Render with any QR library +await QRCode.toFile('dots.png', Buffer.from(bytes)); + +// Decode scanned QR +const result = decodeBinary(scannedBytes); +if (result.valid) { + console.log(`${result.dots.length} DOTs recovered`); +} +``` + +## Capacity + +| Mode | DOTs per code | Notes | +|------|--------------|-------| +| Binary | up to 19 | Standard QR — scan with any reader | +| Steganographic | up to 5 | Hidden in PNG pixel data | +| Nested | unlimited | Chain of QR codes | + +19 DOTs × 153 bytes = 2,907 bytes. Standard QR capacity is 2,953 bytes (error correction M). + +## API + +### Binary mode + +```js +import { encodeBinary, decodeBinary, selectQRSpec, QR_CAPACITY } from '@dotprotocol/qr'; + +// Encode +const spec = selectQRSpec(dots.length, 'binary'); +const bytes = encodeBinary(dots, spec); + +// Decode +const result = decodeBinary(bytes); +// { valid: boolean, dots: Uint8Array[], count: number } + +// Constants +QR_CAPACITY.maxBytesPerCode // 2953 +QR_CAPACITY.dotsPerCode // 19 +QR_CAPACITY.bytesPerDOT // 153 +``` + +### Steganographic mode + +Hide DOTs in PNG pixel data — imperceptible to the eye. + +```js +import { encodeSteganographic, decodeSteganographic } from '@dotprotocol/qr'; + +const hostImage = fs.readFileSync('photo.png'); +const withDOTs = encodeSteganographic(dots, hostImage); +fs.writeFileSync('photo-with-dots.png', withDOTs); + +// Recover +const result = decodeSteganographic(fs.readFileSync('photo-with-dots.png')); +// { valid: boolean, dots: Uint8Array[] } +``` + +### Nested mode + +Chain of QR codes — for more than 19 DOTs in physical form. + +```js +import { encodeNested, decodeNested } from '@dotprotocol/qr'; + +// Returns array of QR payloads — one per physical code +const codes = encodeNested(manyDots); +// codes[0], codes[1], ... → render each as a separate QR + +// Decode — provide all scanned codes +const result = decodeNested(codes); +``` + +### Verification + +```js +import { verifyPhysicalDOTs } from '@dotprotocol/qr'; + +const { valid, verified, failed } = await verifyPhysicalDOTs(dots); +// valid: boolean (all passed) +// verified: number (count that passed) +// failed: number (count that failed) +``` + +## The Falooda Protocol + +DOT was designed for people without internet. A single printed QR code IS the message — signed, chained, verifiable with a phone that's never been online. The decode logic runs in 3KB of JavaScript. + +``` +Sender prints QR code ──► recipient scans ──► instant cryptographic verification + ──► no server, no internet, no trust required +``` + +## License + +MIT diff --git a/packages/qr/package.json b/packages/qr/package.json new file mode 100644 index 000000000..c6651fe8b --- /dev/null +++ b/packages/qr/package.json @@ -0,0 +1,49 @@ +{ + "name": "@dotprotocol/qr", + "version": "0.3.0", + "description": "QR-DOT encoding: pack DOTs into scannable physical objects. Binary, steganographic, and nested modes.", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "require": "./dist/index.cjs", + "types": "./dist/index.d.ts" + } + }, + "sideEffects": false, + "scripts": { + "build": "tsdown", + "test": "vitest run", + "typecheck": "tsc --noEmit" + }, + "keywords": [ + "dot-protocol", + "qr", + "physical-dot", + "falooda" + ], + "license": "MIT", + "files": [ + "dist", + "README.md", + "LICENSE" + ], + "devDependencies": { + "@types/node": "^20.0.0" + }, + "dependencies": { + "@dotprotocol/core": "workspace:*" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/dot-protocol/dot.git", + "directory": "packages/qr" + }, + "homepage": "https://github.com/dot-protocol/dot/tree/main/packages/qr#readme", + "bugs": { + "url": "https://github.com/dot-protocol/dot/issues" + } +} diff --git a/packages/qr/src/encode.ts b/packages/qr/src/encode.ts new file mode 100644 index 000000000..28337fa94 --- /dev/null +++ b/packages/qr/src/encode.ts @@ -0,0 +1,196 @@ +/** + * DOT Protocol v0.3.0 — QR-DOT Encoder + * + * Encodes one or more DOTs into a QR payload buffer. + * Supports three encoding modes: + * binary: raw 153-byte DOTs, packed sequentially + * steganographic: DOTs XOR-masked into QR module data + * nested: each DOT prefixed with its index (for microdot containers) + * + * Transport (actually generating QR images) is the builder's choice. + * This module produces the raw bytes to encode. + */ + +import { toBytes, fromBytes } from '@dotprotocol/core'; +import type { DOT } from '@dotprotocol/core'; +import { QR_CAPACITY, type QREncoding, type QRDOTSpec } from './types.js'; + +export const DOT_SIZE = 153 as const; + +/** + * Encode an array of DOTs into a binary buffer. + * Each DOT occupies exactly 153 bytes. No framing, no header. + * Maximum ~19 DOTs per standard QR v40 L. + */ +export function encodeBinary(dots: DOT[]): Uint8Array { + if (dots.length === 0) throw new Error('Cannot encode empty DOT array'); + if (dots.length > QR_CAPACITY.dotsPerCode) { + throw new Error( + `Too many DOTs: ${dots.length} exceeds QR capacity of ${QR_CAPACITY.dotsPerCode}` + ); + } + + const buf = new Uint8Array(dots.length * DOT_SIZE); + for (let i = 0; i < dots.length; i++) { + buf.set(toBytes(dots[i]), i * DOT_SIZE); + } + return buf; +} + +/** + * Decode a binary buffer back into DOTs. + * Buffer length must be a multiple of 153. + */ +export function decodeBinary(buf: Uint8Array): DOT[] { + if (buf.length % DOT_SIZE !== 0) { + throw new Error(`Buffer length ${buf.length} is not a multiple of ${DOT_SIZE}`); + } + + const dots: DOT[] = []; + for (let offset = 0; offset < buf.length; offset += DOT_SIZE) { + dots.push(fromBytes(buf.slice(offset, offset + DOT_SIZE))); + } + return dots; +} + +/** + * Encode DOTs using steganographic mode. + * DOTs are XOR-masked with a key derived from the QR module pattern. + * The carrier (QR data) must be provided by the caller. + * + * mask: repeated cycling of DOT bytes XOR'd with carrier bytes. + * The carrier is recovered by XOR-ing again — symmetrical. + */ +export function encodeSteganographic(dots: DOT[], carrier: Uint8Array): Uint8Array { + const dotBytes = encodeBinary(dots); + if (dotBytes.length > carrier.length) { + throw new Error( + `DOT payload (${dotBytes.length}B) exceeds carrier capacity (${carrier.length}B)` + ); + } + + const result = new Uint8Array(carrier); + for (let i = 0; i < dotBytes.length; i++) { + result[i] = result[i] ^ dotBytes[i]; + } + return result; +} + +/** + * Decode DOTs from a steganographic carrier. + * XOR with the known mask (original carrier) to recover DOT bytes. + */ +export function decodeSteganographic( + masked: Uint8Array, + carrier: Uint8Array, + dotCount: number +): DOT[] { + const recovered = new Uint8Array(dotCount * DOT_SIZE); + for (let i = 0; i < recovered.length; i++) { + recovered[i] = masked[i] ^ carrier[i]; + } + return decodeBinary(recovered); +} + +/** + * Encode DOTs in nested mode. + * Each DOT is prefixed with a 2-byte index (big-endian uint16). + * Used for microdot containers where modules are addressed individually. + * + * Format per entry: [index_hi, index_lo, ...153 bytes DOT] + */ +export function encodeNested(dots: DOT[]): Uint8Array { + const ENTRY_SIZE = 2 + DOT_SIZE; // 2-byte index + 153-byte DOT + const buf = new Uint8Array(dots.length * ENTRY_SIZE); + + for (let i = 0; i < dots.length; i++) { + const offset = i * ENTRY_SIZE; + buf[offset] = (i >> 8) & 0xff; + buf[offset + 1] = i & 0xff; + buf.set(toBytes(dots[i]), offset + 2); + } + return buf; +} + +/** + * Decode DOTs from nested mode buffer. + * Returns DOTs ordered by their embedded index. + */ +export function decodeNested(buf: Uint8Array): DOT[] { + const ENTRY_SIZE = 2 + DOT_SIZE; + if (buf.length % ENTRY_SIZE !== 0) { + throw new Error(`Buffer length ${buf.length} is not a multiple of ${ENTRY_SIZE}`); + } + + const entries: Array<{ index: number; dot: DOT }> = []; + for (let offset = 0; offset < buf.length; offset += ENTRY_SIZE) { + const index = (buf[offset] << 8) | buf[offset + 1]; + const dot = fromBytes(buf.slice(offset + 2, offset + 2 + DOT_SIZE)); + entries.push({ index, dot }); + } + + return entries + .sort((a, b) => a.index - b.index) + .map((e) => e.dot); +} + +/** + * Select appropriate QR spec for a given number of DOTs. + * Returns the minimum QR version that fits all DOTs. + */ +export function selectQRSpec(dotCount: number, encoding: QREncoding = 'binary'): QRDOTSpec { + const bytesNeeded = encoding === 'nested' + ? dotCount * (2 + DOT_SIZE) + : dotCount * DOT_SIZE; + + // QR capacity table (data bytes, error correction L) + // Simplified: use version 40 for anything over 500 bytes + let version = 10; // v10 = 346 bytes (fits ~2 DOTs) + if (bytesNeeded <= 17) version = 1; + else if (bytesNeeded <= 32) version = 2; + else if (bytesNeeded <= 53) version = 3; + else if (bytesNeeded <= 78) version = 4; + else if (bytesNeeded <= 106) version = 5; + else if (bytesNeeded <= 134) version = 6; + else if (bytesNeeded <= 154) version = 7; + else if (bytesNeeded <= 192) version = 8; + else if (bytesNeeded <= 230) version = 9; + else if (bytesNeeded <= 271) version = 10; + else if (bytesNeeded <= 321) version = 11; + else if (bytesNeeded <= 367) version = 12; + else if (bytesNeeded <= 425) version = 13; + else if (bytesNeeded <= 458) version = 14; + else if (bytesNeeded <= 520) version = 15; + else if (bytesNeeded <= 586) version = 16; + else if (bytesNeeded <= 644) version = 17; + else if (bytesNeeded <= 718) version = 18; + else if (bytesNeeded <= 792) version = 19; + else if (bytesNeeded <= 858) version = 20; + else if (bytesNeeded <= 929) version = 21; + else if (bytesNeeded <= 1003) version = 22; + else if (bytesNeeded <= 1091) version = 23; + else if (bytesNeeded <= 1171) version = 24; + else if (bytesNeeded <= 1273) version = 25; + else if (bytesNeeded <= 1367) version = 26; + else if (bytesNeeded <= 1465) version = 27; + else if (bytesNeeded <= 1528) version = 28; + else if (bytesNeeded <= 1628) version = 29; + else if (bytesNeeded <= 1732) version = 30; + else if (bytesNeeded <= 1840) version = 31; + else if (bytesNeeded <= 1952) version = 32; + else if (bytesNeeded <= 2068) version = 33; + else if (bytesNeeded <= 2188) version = 34; + else if (bytesNeeded <= 2303) version = 35; + else if (bytesNeeded <= 2431) version = 36; + else if (bytesNeeded <= 2563) version = 37; + else if (bytesNeeded <= 2699) version = 38; + else if (bytesNeeded <= 2809) version = 39; + else version = 40; + + return { + version, + errorCorrection: 'L', + dotsPerCode: dotCount, + encoding, + }; +} diff --git a/packages/qr/src/index.ts b/packages/qr/src/index.ts new file mode 100644 index 000000000..6eff0ee8e --- /dev/null +++ b/packages/qr/src/index.ts @@ -0,0 +1,31 @@ +/** + * @dotprotocol/qr — v0.3.0 + * + * QR-DOT encoding: pack DOTs into scannable physical objects. + * A DOT printed as QR on paper IS Falooda Protocol. + * + * "Zero-cost communication for people without devices." + */ + +export { + encodeBinary, + decodeBinary, + encodeSteganographic, + decodeSteganographic, + encodeNested, + decodeNested, + selectQRSpec, + DOT_SIZE, +} from './encode.js'; + +export { verifyPhysicalDOTs } from './verify.js'; + +export type { + QRErrorCorrection, + QREncoding, + QRDOTSpec, + PhysicalDOT, + QRDecodeResult, +} from './types.js'; + +export { QR_CAPACITY } from './types.js'; diff --git a/packages/qr/src/tests/encode.test.ts b/packages/qr/src/tests/encode.test.ts new file mode 100644 index 000000000..82ff8e9be --- /dev/null +++ b/packages/qr/src/tests/encode.test.ts @@ -0,0 +1,121 @@ +import { describe, it, expect } from 'vitest'; +import { createKeypair, createDOT } from '@dotprotocol/core'; +import { + encodeBinary, + decodeBinary, + encodeNested, + decodeNested, + encodeSteganographic, + decodeSteganographic, + selectQRSpec, + DOT_SIZE, +} from '../encode.js'; +import { QR_CAPACITY } from '../types.js'; + +async function makeDots(count: number) { + const kp = await createKeypair(); + const dots = []; + let prev = undefined; + for (let i = 0; i < count; i++) { + const dot = await createDOT({ keypair: kp, previous: prev }); + dots.push(dot); + prev = dot; + } + return dots; +} + +describe('Binary encoding', () => { + it('encodes and decodes a single DOT', async () => { + const [dot] = await makeDots(1); + const buf = encodeBinary([dot]); + expect(buf.length).toBe(DOT_SIZE); + const decoded = decodeBinary(buf); + expect(decoded).toHaveLength(1); + expect(decoded[0].pubkey).toEqual(dot.pubkey); + expect(decoded[0].sig).toEqual(dot.sig); + }); + + it('encodes and decodes multiple DOTs', async () => { + const dots = await makeDots(5); + const buf = encodeBinary(dots); + expect(buf.length).toBe(5 * DOT_SIZE); + const decoded = decodeBinary(buf); + expect(decoded).toHaveLength(5); + for (let i = 0; i < 5; i++) { + expect(decoded[i].pubkey).toEqual(dots[i].pubkey); + } + }); + + it('throws for empty array', () => { + expect(() => encodeBinary([])).toThrow(); + }); + + it(`throws when exceeding QR capacity of ${QR_CAPACITY.dotsPerCode} DOTs`, async () => { + // Mock DOT count exceeding limit without generating them all + const dots = await makeDots(1); + const tooMany = Array(QR_CAPACITY.dotsPerCode + 1).fill(dots[0]); + expect(() => encodeBinary(tooMany)).toThrow(); + }); + + it('throws on decode if buffer not multiple of 153', () => { + const bad = new Uint8Array(100); + expect(() => decodeBinary(bad)).toThrow(); + }); +}); + +describe('Nested encoding', () => { + it('encodes and decodes preserving DOT order', async () => { + const dots = await makeDots(3); + const buf = encodeNested(dots); + const decoded = decodeNested(buf); + expect(decoded).toHaveLength(3); + for (let i = 0; i < 3; i++) { + expect(decoded[i].pubkey).toEqual(dots[i].pubkey); + } + }); + + it('each entry is 2 + 153 = 155 bytes', async () => { + const dots = await makeDots(2); + const buf = encodeNested(dots); + expect(buf.length).toBe(2 * 155); + }); + + it('throws on decode if buffer not multiple of 155', () => { + const bad = new Uint8Array(100); + expect(() => decodeNested(bad)).toThrow(); + }); +}); + +describe('Steganographic encoding', () => { + it('XOR round-trip recovers original DOT bytes', async () => { + const dots = await makeDots(1); + const dotBytes = encodeBinary(dots); + const carrier = new Uint8Array(dotBytes.length).fill(0xab); + + const masked = encodeSteganographic(dots, carrier); + const recovered = decodeSteganographic(masked, carrier, 1); + expect(recovered[0].pubkey).toEqual(dots[0].pubkey); + }); + + it('throws if DOT payload exceeds carrier', async () => { + const dots = await makeDots(1); + const tinyCarrier = new Uint8Array(10); // too small + expect(() => encodeSteganographic(dots, tinyCarrier)).toThrow(); + }); +}); + +describe('selectQRSpec', () => { + it('returns version 1 for a single DOT in binary mode', async () => { + // 153 bytes — needs v40 since v1 only holds 17 bytes + const spec = selectQRSpec(1, 'binary'); + expect(spec.version).toBeGreaterThanOrEqual(1); + expect(spec.dotsPerCode).toBe(1); + expect(spec.encoding).toBe('binary'); + }); + + it('returns version 40 for 19 DOTs (max capacity)', async () => { + const spec = selectQRSpec(19, 'binary'); + expect(spec.version).toBeLessThanOrEqual(40); + expect(spec.errorCorrection).toBe('L'); + }); +}); diff --git a/packages/qr/src/types.ts b/packages/qr/src/types.ts new file mode 100644 index 000000000..ed3ffffe0 --- /dev/null +++ b/packages/qr/src/types.ts @@ -0,0 +1,57 @@ +/** + * DOT Protocol v0.3.0 — QR-DOT Types + * + * Specification for encoding DOTs into scannable physical objects. + * A DOT printed as QR on paper IS Falooda Protocol. + */ + +/** QR error correction levels */ +export type QRErrorCorrection = 'L' | 'M' | 'Q' | 'H'; + +/** Encoding strategy for DOTs in QR modules */ +export type QREncoding = 'binary' | 'steganographic' | 'nested'; + +/** + * QR code specification for a physical DOT. + * + * - binary: DOTs serialized directly into QR data bytes + * - steganographic: DOTs hidden in visual pattern of QR modules + * - nested: each QR module is itself a microdot container + */ +export interface QRDOTSpec { + version: number; // QR version (1–40) + errorCorrection: QRErrorCorrection; + dotsPerCode: number; // how many DOTs encoded (1–800) + encoding: QREncoding; +} + +/** + * A physical DOT: one or more DOTs encoded into a scannable object. + * The Falooda Protocol endpoint — zero cost communication. + */ +export interface PhysicalDOT { + qr: QRDOTSpec; + chain: import('@dotprotocol/core').DOT[]; // the DOTs encoded + scannerFace: 'camera' | 'reader'; // how to read it + transformOnScan?: string; // transform registry ID triggered by scanning +} + +/** QR channel capacity constants */ +export const QR_CAPACITY = { + /** Max data bytes in a QR v40 L code */ + maxBytesPerCode: 2953, + /** ~19 DOTs per standard QR (2953 / 153) */ + dotsPerCode: Math.floor(2953 / 153), + /** Microdot density at arm's length (dots per cm²) */ + microDotDensity: 800, + /** Storage capacity at arm's length (~122 KB per cm²) */ + microDotCapacity: 800 * 153, +} as const; + +/** Result of decoding a QR image into DOTs */ +export interface QRDecodeResult { + dots: import('@dotprotocol/core').DOT[]; + encoding: QREncoding; + verified: boolean; // true if all DOT signatures passed + errors: string[]; +} diff --git a/packages/qr/src/verify.ts b/packages/qr/src/verify.ts new file mode 100644 index 000000000..802e0899e --- /dev/null +++ b/packages/qr/src/verify.ts @@ -0,0 +1,50 @@ +/** + * DOT Protocol v0.3.0 — QR-DOT Verifier + * + * Verify all DOTs decoded from a QR payload. + * Checks Ed25519 signatures and optionally chain integrity. + */ + +import { verifyDOT, checkChain } from '@dotprotocol/core'; +import type { DOT } from '@dotprotocol/core'; +import type { QRDecodeResult, QREncoding } from './types.js'; + +/** + * Verify an array of DOTs decoded from a physical QR. + * Each DOT's Ed25519 signature is checked individually. + * If all DOTs share the same pubkey, chain integrity is also verified. + */ +export async function verifyPhysicalDOTs( + dots: DOT[], + encoding: QREncoding +): Promise { + const errors: string[] = []; + + // Individual signature checks + for (let i = 0; i < dots.length; i++) { + const ok = await verifyDOT(dots[i]); + if (!ok) { + errors.push(`DOT[${i}] signature invalid`); + } + } + + // Chain integrity check if all DOTs share the same pubkey + if (dots.length > 1) { + const firstKey = dots[0].pubkey; + const sameKey = dots.every((d) => d.pubkey.every((b, j) => b === firstKey[j])); + + if (sameKey) { + const chainResult = await checkChain(dots); + if (!chainResult.valid) { + errors.push(`Chain broken at index ${chainResult.brokenAt}: ${chainResult.reason}`); + } + } + } + + return { + dots, + encoding, + verified: errors.length === 0, + errors, + }; +} diff --git a/packages/qr/tsconfig.json b/packages/qr/tsconfig.json new file mode 100644 index 000000000..792172fb8 --- /dev/null +++ b/packages/qr/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src"] +} diff --git a/packages/relay/.npmignore b/packages/relay/.npmignore new file mode 100644 index 000000000..91b700e0b --- /dev/null +++ b/packages/relay/.npmignore @@ -0,0 +1,4 @@ +src/tests/ +src/**/*.test.ts +coverage/ +*.tsbuildinfo diff --git a/packages/relay/LICENSE b/packages/relay/LICENSE new file mode 100644 index 000000000..d90201772 --- /dev/null +++ b/packages/relay/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 DOT Protocol contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/relay/README.md b/packages/relay/README.md new file mode 100644 index 000000000..6ae811d47 --- /dev/null +++ b/packages/relay/README.md @@ -0,0 +1,121 @@ +# @dotprotocol/relay + +CHORUS relay client — WebSocket transport for DOT Protocol. + +[![npm](https://img.shields.io/npm/v/@dotprotocol/relay)](https://www.npmjs.com/package/@dotprotocol/relay) + +## Install + +```bash +npm install @dotprotocol/relay +``` + +## Quick start + +```js +import { RelayClient } from '@dotprotocol/relay'; +import { toBytes, fromBytes } from '@dotprotocol/core'; + +const relay = new RelayClient('wss://dotdotdot.rocks'); + +await relay.connect(); + +// Listen for incoming DOTs +relay.on('dot', (bytes, from) => { + const dot = fromBytes(bytes); + console.log('received', dot.timestamp, 'from', from); +}); + +// Send a DOT +const bytes = toBytes(myDot); +await relay.send(bytes); + +// Disconnect +await relay.disconnect(); +``` + +## API + +### `new RelayClient(url, options?)` + +```js +const relay = new RelayClient('wss://dotdotdot.rocks', { + reconnect: true, // auto-reconnect on disconnect (default: true) + reconnectDelay: 1000, // ms between reconnect attempts (default: 1000) + maxRetries: 10, // max reconnect attempts (default: 10) +}); +``` + +### `relay.connect()` + +Open WebSocket connection. Resolves when connected. + +```js +await relay.connect(); +``` + +### `relay.disconnect()` + +Close connection cleanly. + +```js +await relay.disconnect(); +``` + +### `relay.send(bytes)` + +Send a 153-byte DOT to the relay. + +```js +await relay.send(toBytes(dot)); +``` + +### `relay.on(event, handler)` + +```js +// Incoming DOT +relay.on('dot', (bytes: Uint8Array, from: Uint8Array) => { ... }); + +// Connection events +relay.on('connect', () => { ... }); +relay.on('disconnect', () => { ... }); +relay.on('error', (err) => { ... }); +``` + +### `packFrame(bytes)` / `unpackFrame(frame)` + +Low-level frame serialization. Usually not needed directly: + +```js +import { packFrame, unpackFrame } from '@dotprotocol/relay'; + +const frame = packFrame(dotBytes); // Uint8Array — relay wire frame +const dotBytes = unpackFrame(frame); // Uint8Array(153) +``` + +## Relay protocol + +The relay is intentionally dumb: + +- Receives 153-byte DOTs wrapped in a minimal frame +- Routes `PUBLIC` (0x00) DOTs to all connected clients +- Routes `CIRCLE` (0x01) DOTs to subscribed circle members +- Routes `PRIVATE` (0x02) DOTs to the recipient public key only +- Does NOT store, index, or read payloads +- Does NOT authenticate connections + +The relay knows nothing. It forwards 153 bytes. + +## Self-hosting + +The CHORUS relay is open source. Run your own: + +```bash +npx @dotprotocol/relay-server --port 8765 +``` + +Point clients at `ws://localhost:8765`. + +## License + +MIT diff --git a/packages/relay/package.json b/packages/relay/package.json new file mode 100644 index 000000000..883fdb383 --- /dev/null +++ b/packages/relay/package.json @@ -0,0 +1,53 @@ +{ + "name": "@dotprotocol/relay", + "version": "0.3.0", + "description": "CHORUS relay client and server for DOT Protocol.", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "require": "./dist/index.cjs", + "types": "./dist/index.d.ts" + }, + "./server": { + "import": "./dist/server.js", + "require": "./dist/server.cjs", + "types": "./dist/server.d.ts" + } + }, + "sideEffects": false, + "scripts": { + "build": "tsdown", + "test": "vitest run", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@dotprotocol/core": "workspace:*" + }, + "optionalDependencies": { + "ws": "^8.18.0" + }, + "devDependencies": { + "@noble/curves": "^2.0.1", + "@types/ws": "^8.18.0", + "ws": "^8.18.0" + }, + "license": "MIT", + "files": [ + "dist", + "README.md", + "LICENSE" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/dot-protocol/dot.git", + "directory": "packages/relay" + }, + "homepage": "https://github.com/dot-protocol/dot/tree/main/packages/relay#readme", + "bugs": { + "url": "https://github.com/dot-protocol/dot/issues" + } +} diff --git a/packages/relay/src/client.ts b/packages/relay/src/client.ts new file mode 100644 index 000000000..43e973137 --- /dev/null +++ b/packages/relay/src/client.ts @@ -0,0 +1,156 @@ +/// +// client.ts — CHORUS relay client (browser + Node compatible) + +import { packFrame, unpackFrame, FRAME_SIZE, type RelayConfig, type RelayStatus } from './types.js'; +import type { Keypair } from '@dotprotocol/core'; + +const PKCS8_PREFIX = new Uint8Array([ + 0x30, 0x2e, 0x02, 0x01, 0x00, 0x30, 0x05, 0x06, + 0x03, 0x2b, 0x65, 0x70, 0x04, 0x22, 0x04, 0x20, +]); + +function bytesToHex(b: Uint8Array): string { + return Array.from(b, x => x.toString(16).padStart(2, '0')).join(''); +} + +function hexToBytes(h: string): Uint8Array { + const bytes = new Uint8Array(h.length / 2); + for (let i = 0; i < h.length; i += 2) { + bytes[i / 2] = parseInt(h.slice(i, i + 2), 16); + } + return bytes; +} + +async function signNonce(privateKey: Uint8Array, nonce: Uint8Array): Promise { + const pkcs8 = new Uint8Array(PKCS8_PREFIX.length + 32); + pkcs8.set(PKCS8_PREFIX); + pkcs8.set(privateKey, PKCS8_PREFIX.length); + const privKey = await crypto.subtle.importKey( + 'pkcs8', pkcs8.buffer as ArrayBuffer, { name: 'Ed25519' }, false, ['sign'] + ); + return new Uint8Array(await crypto.subtle.sign('Ed25519', privKey, nonce.buffer as ArrayBuffer)); +} + +export type FrameHandler = (circleId: string, dotBytes: Uint8Array) => void; +export type StatusHandler = (status: RelayStatus) => void; + +export class RelayClient { + private ws: WebSocket | null = null; + private status: RelayStatus = 'disconnected'; + private keypair: Keypair | null = null; + private subscribedCircles = new Set(); + private frameHandlers: FrameHandler[] = []; + private statusHandlers: StatusHandler[] = []; + private pingTimer: ReturnType | null = null; + private config: Required; + + constructor(config: RelayConfig) { + this.config = { + reconnect: true, + reconnectDelayMs: 3000, + pingIntervalMs: 20_000, + ...config, + }; + } + + onFrame(handler: FrameHandler): () => void { + this.frameHandlers.push(handler); + return () => { this.frameHandlers = this.frameHandlers.filter(h => h !== handler); }; + } + + onStatus(handler: StatusHandler): () => void { + this.statusHandlers.push(handler); + return () => { this.statusHandlers = this.statusHandlers.filter(h => h !== handler); }; + } + + connect(keypair: Keypair): void { + this.keypair = keypair; + this._setStatus('connecting'); + this._open(); + } + + disconnect(): void { + this.config.reconnect = false; + this.ws?.close(); + if (this.pingTimer) clearInterval(this.pingTimer); + this._setStatus('disconnected'); + } + + subscribe(circleId: string): void { + this.subscribedCircles.add(circleId); + if (this.status === 'connected' && this.ws) { + this.ws.send(JSON.stringify({ type: 'subscribe', circleId })); + } + } + + unsubscribe(circleId: string): void { + this.subscribedCircles.delete(circleId); + if (this.status === 'connected' && this.ws) { + this.ws.send(JSON.stringify({ type: 'unsubscribe', circleId })); + } + } + + sendFrame(circleId: string, dotBytes: Uint8Array): boolean { + if (this.status !== 'connected' || !this.ws) return false; + const frame = packFrame(circleId, dotBytes); + this.ws.send(frame.buffer as ArrayBuffer); + return true; + } + + getStatus(): RelayStatus { return this.status; } + + private _setStatus(s: RelayStatus): void { + this.status = s; + for (const h of this.statusHandlers) h(s); + } + + private _open(): void { + const ws = new WebSocket(this.config.url); + ws.binaryType = 'arraybuffer'; + this.ws = ws; + + ws.onopen = () => this._setStatus('authenticating'); + + ws.onmessage = async (event: MessageEvent) => { + if (event.data instanceof ArrayBuffer) { + const frame = new Uint8Array(event.data); + if (frame.length !== FRAME_SIZE) return; + const { circleId, dotBytes } = unpackFrame(frame); + for (const h of this.frameHandlers) h(circleId, dotBytes); + return; + } + try { + const msg = JSON.parse(event.data as string) as Record; + if (msg['type'] === 'challenge' && typeof msg['nonce'] === 'string') { + const nonce = hexToBytes(msg['nonce']); + const sig = await signNonce(this.keypair!.privateKey, nonce); + ws.send(JSON.stringify({ + type: 'auth', + pubHex: bytesToHex(this.keypair!.publicKey), + sig: bytesToHex(sig), + })); + } else if (msg['type'] === 'authenticated') { + this._setStatus('connected'); + for (const circleId of this.subscribedCircles) { + ws.send(JSON.stringify({ type: 'subscribe', circleId })); + } + this.pingTimer = setInterval(() => { + if (ws.readyState === 1 /* WebSocket.OPEN */) { + ws.send(JSON.stringify({ type: 'ping' })); + } + }, this.config.pingIntervalMs); + } + } catch { /* malformed message */ } + }; + + ws.onerror = () => this._setStatus('disconnected'); + + ws.onclose = () => { + this._setStatus('disconnected'); + if (this.pingTimer) { clearInterval(this.pingTimer); this.pingTimer = null; } + if (this.config.reconnect && this.keypair) { + setTimeout(() => this._open(), this.config.reconnectDelayMs); + } + }; + } +} diff --git a/packages/relay/src/index.ts b/packages/relay/src/index.ts new file mode 100644 index 000000000..1af7e4df1 --- /dev/null +++ b/packages/relay/src/index.ts @@ -0,0 +1,4 @@ +export { RelayClient } from './client.js'; +export { packFrame, unpackFrame, encodeCircleId, decodeCircleId, FRAME_SIZE, DOT_SIZE, CIRCLE_ID_SIZE } from './types.js'; +export type { RelayConfig, RelayStatus, IncomingFrame, RelayMessage } from './types.js'; +export type { FrameHandler, StatusHandler } from './client.js'; diff --git a/packages/relay/src/server.ts b/packages/relay/src/server.ts new file mode 100644 index 000000000..3bd23e129 --- /dev/null +++ b/packages/relay/src/server.ts @@ -0,0 +1,103 @@ +// server.ts — Node.js CHORUS relay server (SDK entry point) +// Browser-incompatible (uses ws package + Buffer). Import via '@dotprotocol/relay/server'. + +export { packFrame, unpackFrame, FRAME_SIZE, DOT_SIZE, CIRCLE_ID_SIZE } from './types.js'; + +const SPKI_PREFIX = new Uint8Array([ + 0x30, 0x2a, 0x30, 0x05, 0x06, 0x03, 0x2b, 0x65, + 0x70, 0x03, 0x21, 0x00, +]); + +function bytesToHex(b: Uint8Array): string { + return Array.from(b, x => x.toString(16).padStart(2, '0')).join(''); +} + +/** + * Start a CHORUS relay server. + * Protocol: challenge-auth → subscribe → binary frame routing. + * @param port Default 8765 + */ +export async function startRelayServer(port = 8765): Promise<{ close: () => void }> { + const { WebSocketServer } = await import('ws') as typeof import('ws'); + const wss = new WebSocketServer({ port }); + console.log(`[dot/relay] listening on ws://0.0.0.0:${port}`); + + type WS = import('ws').WebSocket; + const connections = new Map }>(); + const circles = new Map>(); + + wss.on('connection', (ws: WS) => { + const nonce = bytesToHex(crypto.getRandomValues(new Uint8Array(32))); + connections.set(ws, { nonce, pubHex: null, circles: new Set() }); + ws.send(JSON.stringify({ type: 'challenge', nonce })); + + ws.on('message', async (data: Buffer | ArrayBuffer, isBinary: boolean) => { + const conn = connections.get(ws); + if (!conn) return; + + if (!isBinary) { + try { + const msg = JSON.parse(data.toString()) as Record; + if (msg['type'] === 'auth' && !conn.pubHex) { + if (typeof msg['pubHex'] !== 'string' || typeof msg['sig'] !== 'string') { + ws.send(JSON.stringify({ type: 'error', code: 'auth_failed' })); + return; + } + const nonceBytes = Buffer.from(conn.nonce, 'hex'); + const pubKeyBytes = Buffer.from(msg['pubHex'], 'hex'); + const spki = new Uint8Array(SPKI_PREFIX.length + 32); + spki.set(SPKI_PREFIX); + spki.set(pubKeyBytes, SPKI_PREFIX.length); + const pubKey = await crypto.subtle.importKey('spki', spki.buffer as ArrayBuffer, { name: 'Ed25519' }, false, ['verify']); + const sig = Buffer.from(msg['sig'], 'hex'); + const valid = await crypto.subtle.verify('Ed25519', pubKey, sig, nonceBytes); + if (!valid) { ws.send(JSON.stringify({ type: 'error', code: 'auth_failed' })); return; } + conn.pubHex = msg['pubHex']; + ws.send(JSON.stringify({ type: 'authenticated', pubHex: conn.pubHex })); + } else if (msg['type'] === 'subscribe' && conn.pubHex) { + const circleId = msg['circleId'] as string; + if (!circles.has(circleId)) circles.set(circleId, new Set()); + circles.get(circleId)!.add(ws); + conn.circles.add(circleId); + ws.send(JSON.stringify({ type: 'subscribed', circleId })); + } else if (msg['type'] === 'unsubscribe' && conn.pubHex) { + const circleId = msg['circleId'] as string; + circles.get(circleId)?.delete(ws); + conn.circles.delete(circleId); + // Clean up empty circle sets + if (circles.get(circleId)?.size === 0) circles.delete(circleId); + } else if (msg['type'] === 'ping') { + ws.send(JSON.stringify({ type: 'pong', ts: Date.now() })); + } + } catch { /* ignore malformed */ } + return; + } + + // Binary frame + if (!conn.pubHex) { ws.send(JSON.stringify({ type: 'error', code: 'not_authenticated' })); ws.close(); return; } + const frame = Buffer.isBuffer(data) ? data : Buffer.from(data as ArrayBuffer); + if (frame.length !== 185) { ws.send(JSON.stringify({ type: 'error', code: 'invalid_frame_size' })); return; } + const circleId = frame.slice(0, 32).toString('utf8').replace(/\0/g, ''); + if (!circleId) return; + const subs = circles.get(circleId); + if (subs) { + for (const sub of subs) { + if (sub !== ws && sub.readyState === 1) sub.send(frame); + } + } + }); + + ws.on('close', () => { + const conn = connections.get(ws); + if (conn) { + for (const cid of conn.circles) { + circles.get(cid)?.delete(ws); + if (circles.get(cid)?.size === 0) circles.delete(cid); + } + } + connections.delete(ws); + }); + }); + + return { close: () => wss.close() }; +} diff --git a/packages/relay/src/tests/client.test.ts b/packages/relay/src/tests/client.test.ts new file mode 100644 index 000000000..58f544055 --- /dev/null +++ b/packages/relay/src/tests/client.test.ts @@ -0,0 +1,351 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { RelayClient } from '../client.js'; +import { FRAME_SIZE, DOT_SIZE, packFrame } from '../types.js'; + +// Mock WebSocket for testing the client state machine +class MockWS { + readyState = 1; // OPEN + sent: Array = []; + onopen: (() => void) | null = null; + onmessage: ((e: { data: string | ArrayBuffer }) => void) | null = null; + onerror: (() => void) | null = null; + onclose: (() => void) | null = null; + binaryType = 'arraybuffer'; + + send(data: string | ArrayBuffer) { this.sent.push(data); } + close() { this.readyState = 3; this.onclose?.(); } + + // Helper: simulate receiving a JSON message from server + receive(msg: object) { this.onmessage?.({ data: JSON.stringify(msg) }); } + // Helper: simulate receiving binary frame + receiveBinary(buf: ArrayBuffer) { this.onmessage?.({ data: buf }); } + // Helper: trigger open + open() { this.readyState = 1; this.onopen?.(); } +} + +// Patch globalThis.WebSocket with MockWS +function withMockWS(fn: (mockWS: MockWS) => void | Promise) { + return async () => { + const mockWS = new MockWS(); + const OrigWS = (globalThis as Record).WebSocket; + (globalThis as Record).WebSocket = function() { return mockWS; }; + try { + await fn(mockWS); + } finally { + (globalThis as Record).WebSocket = OrigWS; + } + }; +} + +describe('RelayClient state machine', () => { + it('starts disconnected', () => { + const client = new RelayClient({ url: 'ws://localhost:8765' }); + expect(client.getStatus()).toBe('disconnected'); + }); + + it('transitions to connecting then authenticating on connect', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false }); + + const statuses: string[] = []; + client.onStatus(s => statuses.push(s)); + client.connect(kp); + mockWS.open(); + + expect(statuses).toContain('connecting'); + expect(statuses).toContain('authenticating'); + })); + + it('sends auth response to challenge', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false }); + + client.connect(kp); + mockWS.open(); + + const nonce = '0'.repeat(64); // 32 zero bytes as hex + mockWS.receive({ type: 'challenge', nonce }); + + // Give async auth a tick to complete + await new Promise(r => setTimeout(r, 50)); + + const authMsg = mockWS.sent.find(s => typeof s === 'string' && s.includes('"auth"')); + expect(authMsg).toBeDefined(); + const parsed = JSON.parse(authMsg as string); + expect(parsed.type).toBe('auth'); + expect(parsed.pubHex).toBe(Array.from(kp.publicKey).map((b: number) => b.toString(16).padStart(2, '0')).join('')); + })); + + it('transitions to connected after authenticated message', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false, pingIntervalMs: 999999 }); + + const statuses: string[] = []; + client.onStatus(s => statuses.push(s)); + client.connect(kp); + mockWS.open(); + mockWS.receive({ type: 'authenticated', pubHex: 'abc' }); + await new Promise(r => setTimeout(r, 10)); + + expect(client.getStatus()).toBe('connected'); + })); + + it('sendFrame returns false when not connected', () => { + const client = new RelayClient({ url: 'ws://localhost:8765' }); + const dotBytes = new Uint8Array(DOT_SIZE).fill(1); + expect(client.sendFrame('test-circle', dotBytes)).toBe(false); + }); + + it('calls frame handler on incoming binary frame', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false, pingIntervalMs: 999999 }); + + const received: Array<{ circleId: string; dotBytes: Uint8Array }> = []; + client.onFrame((circleId, dotBytes) => received.push({ circleId, dotBytes })); + + client.connect(kp); + mockWS.open(); + mockWS.receive({ type: 'authenticated', pubHex: 'abc' }); + await new Promise(r => setTimeout(r, 10)); + + const dotBytes = new Uint8Array(DOT_SIZE).fill(0x42); + const frame = packFrame('test-circle', dotBytes); + mockWS.receiveBinary(frame.buffer as ArrayBuffer); + + expect(received).toHaveLength(1); + expect(received[0]!.circleId).toBe('test-circle'); + expect(received[0]!.dotBytes).toEqual(dotBytes); + })); + + it('subscribe sends subscribe message when connected', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false, pingIntervalMs: 999999 }); + + client.connect(kp); + mockWS.open(); + mockWS.receive({ type: 'authenticated', pubHex: 'abc' }); + await new Promise(r => setTimeout(r, 10)); + + client.subscribe('my-circle'); + const subMsg = mockWS.sent.find(s => typeof s === 'string' && s.includes('"subscribe"')); + expect(subMsg).toBeDefined(); + })); + + it('pre-connect subscribe is replayed after authentication', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false, pingIntervalMs: 999999 }); + + // Subscribe BEFORE connecting + client.subscribe('early-bird-circle'); + + client.connect(kp); + mockWS.open(); + mockWS.receive({ type: 'authenticated', pubHex: 'abc' }); + await new Promise(r => setTimeout(r, 10)); + + // Should have sent the subscribe after authentication + const subMsg = mockWS.sent.find(s => typeof s === 'string' && s.includes('early-bird-circle')); + expect(subMsg).toBeDefined(); + })); + + it('onFrame unsubscribe function removes handler', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false, pingIntervalMs: 999999 }); + + let callCount = 0; + const unsubscribe = client.onFrame(() => callCount++); + + client.connect(kp); + mockWS.open(); + mockWS.receive({ type: 'authenticated', pubHex: 'abc' }); + await new Promise(r => setTimeout(r, 10)); + + const dotBytes = new Uint8Array(DOT_SIZE).fill(0x42); + const frame = packFrame('c', dotBytes); + mockWS.receiveBinary(frame.buffer as ArrayBuffer); + expect(callCount).toBe(1); + + // Remove the handler + unsubscribe(); + mockWS.receiveBinary(frame.buffer as ArrayBuffer); + expect(callCount).toBe(1); // Should not increase + })); + + it('sendFrame sends packed frame when connected (covers lines 95-97)', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false, pingIntervalMs: 999999 }); + + client.connect(kp); + mockWS.open(); + mockWS.receive({ type: 'authenticated', pubHex: 'abc' }); + await new Promise(r => setTimeout(r, 10)); + + expect(client.getStatus()).toBe('connected'); + const dotBytes = new Uint8Array(DOT_SIZE).fill(0xAA); + const result = client.sendFrame('my-circle', dotBytes); + + expect(result).toBe(true); + // Last sent item should be an ArrayBuffer (the packed frame) + const lastSent = mockWS.sent[mockWS.sent.length - 1]; + expect(lastSent instanceof ArrayBuffer).toBe(true); + expect((lastSent as ArrayBuffer).byteLength).toBe(FRAME_SIZE); + })); + + it('unsubscribe sends unsubscribe message when connected (covers lines 86-88)', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false, pingIntervalMs: 999999 }); + + client.connect(kp); + mockWS.open(); + mockWS.receive({ type: 'authenticated', pubHex: 'abc' }); + await new Promise(r => setTimeout(r, 10)); + + // Subscribe first + client.subscribe('my-circle'); + // Then unsubscribe — should send unsubscribe message + client.unsubscribe('my-circle'); + + const unsubMsg = mockWS.sent.find(s => typeof s === 'string' && s.includes('"unsubscribe"')); + expect(unsubMsg).toBeDefined(); + const parsed = JSON.parse(unsubMsg as string); + expect(parsed.type).toBe('unsubscribe'); + expect(parsed.circleId).toBe('my-circle'); + })); + + it('ping interval fires and sends ping (covers lines 138-140)', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + // Use a very short ping interval so the timer fires during the test + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false, pingIntervalMs: 50 }); + + client.connect(kp); + mockWS.open(); + mockWS.receive({ type: 'authenticated', pubHex: 'abc' }); + await new Promise(r => setTimeout(r, 10)); + + expect(client.getStatus()).toBe('connected'); + + // Wait for the ping interval to fire (> 50ms) + await new Promise(r => setTimeout(r, 120)); + + const pingMsg = mockWS.sent.find(s => typeof s === 'string' && s.includes('"ping"')); + expect(pingMsg).toBeDefined(); + const parsed = JSON.parse(pingMsg as string); + expect(parsed.type).toBe('ping'); + + client.disconnect(); + })); + + it('onclose clears ping timer and disconnects (covers lines 149-151)', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false, pingIntervalMs: 999999 }); + + const statuses: string[] = []; + client.onStatus(s => statuses.push(s)); + + client.connect(kp); + mockWS.open(); + mockWS.receive({ type: 'authenticated', pubHex: 'abc' }); + await new Promise(r => setTimeout(r, 10)); + + expect(client.getStatus()).toBe('connected'); + + // Simulate server closing the connection + mockWS.close(); + await new Promise(r => setTimeout(r, 10)); + + expect(client.getStatus()).toBe('disconnected'); + expect(statuses).toContain('disconnected'); + })); + + it('reconnects after disconnect when reconnect=true (covers lines 151-153)', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + // Short reconnect delay so the test doesn't take long + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: true, reconnectDelayMs: 50, pingIntervalMs: 999999 }); + + const statuses: string[] = []; + client.onStatus(s => statuses.push(s)); + + client.connect(kp); + mockWS.open(); + mockWS.receive({ type: 'authenticated', pubHex: 'abc' }); + await new Promise(r => setTimeout(r, 10)); + + // Simulate server closing — should trigger reconnect + mockWS.close(); + await new Promise(r => setTimeout(r, 10)); + + expect(statuses).toContain('disconnected'); + + // After reconnect delay, _open() is called again → status goes to 'connecting' + await new Promise(r => setTimeout(r, 100)); + expect(statuses).toContain('connecting'); + + client.disconnect(); + })); + + it('onStatus unsubscribe removes handler', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false, pingIntervalMs: 999999 }); + + let callCount = 0; + const unsubscribe = client.onStatus(() => callCount++); + unsubscribe(); + + client.connect(kp); + mockWS.open(); + + // Status changed but handler was removed — count should stay 0 + expect(callCount).toBe(0); + })); + + it('ignores incoming binary frame with wrong size', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false, pingIntervalMs: 999999 }); + + let frameCount = 0; + client.onFrame(() => frameCount++); + + client.connect(kp); + mockWS.open(); + mockWS.receive({ type: 'authenticated', pubHex: 'abc' }); + await new Promise(r => setTimeout(r, 10)); + + // Send a binary frame that is NOT FRAME_SIZE bytes — should be silently ignored + const wrongFrame = new ArrayBuffer(100); + mockWS.receiveBinary(wrongFrame); + await new Promise(r => setTimeout(r, 10)); + + expect(frameCount).toBe(0); + })); + + it('ignores malformed JSON text messages', withMockWS(async (mockWS) => { + const { createKeypair } = await import('@dotprotocol/core'); + const kp = await createKeypair(); + const client = new RelayClient({ url: 'ws://localhost:8765', reconnect: false, pingIntervalMs: 999999 }); + + client.connect(kp); + mockWS.open(); + mockWS.receive({ type: 'authenticated', pubHex: 'abc' }); + await new Promise(r => setTimeout(r, 10)); + + // Send malformed JSON — should not crash + mockWS.onmessage?.({ data: 'this is not json {{{' }); + await new Promise(r => setTimeout(r, 10)); + + expect(client.getStatus()).toBe('connected'); + })); +}); diff --git a/packages/relay/src/tests/frame.test.ts b/packages/relay/src/tests/frame.test.ts new file mode 100644 index 000000000..fd76dab57 --- /dev/null +++ b/packages/relay/src/tests/frame.test.ts @@ -0,0 +1,63 @@ +import { describe, it, expect } from 'vitest'; +import { packFrame, unpackFrame, FRAME_SIZE, DOT_SIZE, encodeCircleId, decodeCircleId, CIRCLE_ID_SIZE } from '../types.js'; +// Import from the index barrel to count it as covered +import { packFrame as packFrameIdx, FRAME_SIZE as FRAME_SIZE_IDX } from '../index.js'; + +describe('frame encoding', () => { + it('packFrame produces FRAME_SIZE bytes', () => { + const dotBytes = new Uint8Array(DOT_SIZE).fill(0xab); + const frame = packFrame('test-circle-id', dotBytes); + expect(frame.length).toBe(FRAME_SIZE); + }); + + it('unpackFrame recovers circleId', () => { + const dotBytes = new Uint8Array(DOT_SIZE).fill(0xcd); + const frame = packFrame('my-circle-123', dotBytes); + const { circleId } = unpackFrame(frame); + expect(circleId).toBe('my-circle-123'); + }); + + it('unpackFrame recovers dotBytes', () => { + const dotBytes = new Uint8Array(DOT_SIZE).fill(0xef); + const frame = packFrame('c', dotBytes); + const { dotBytes: recovered } = unpackFrame(frame); + expect(recovered).toEqual(dotBytes); + }); + + it('packFrame roundtrip with max-length circleId (32 chars)', () => { + const longId = 'a'.repeat(32); + const dotBytes = new Uint8Array(DOT_SIZE).fill(1); + const frame = packFrame(longId, dotBytes); + const { circleId } = unpackFrame(frame); + expect(circleId).toBe(longId); + }); + + it('circleId longer than 32 bytes is truncated', () => { + const dotBytes = new Uint8Array(DOT_SIZE).fill(0); + const frame = packFrame('x'.repeat(40), dotBytes); + const { circleId } = unpackFrame(frame); + expect(circleId.length).toBe(32); + }); + + it('packFrame throws if dotBytes is wrong size', () => { + expect(() => packFrame('test', new Uint8Array(100))).toThrow(); + }); + + it('unpackFrame throws if frame is wrong size', () => { + expect(() => unpackFrame(new Uint8Array(100))).toThrow(); + }); + + it('encodeCircleId / decodeCircleId roundtrip', () => { + const id = 'hello-world'; + const encoded = encodeCircleId(id); + expect(encoded.length).toBe(CIRCLE_ID_SIZE); + expect(decodeCircleId(encoded)).toBe(id); + }); + + it('index barrel re-exports work correctly', () => { + // Verify index.ts re-exports are functional (covers the index.ts barrel file) + const dotBytes = new Uint8Array(DOT_SIZE).fill(0x99); + const frame = packFrameIdx('idx-circle', dotBytes); + expect(frame.length).toBe(FRAME_SIZE_IDX); + }); +}); diff --git a/packages/relay/src/tests/server.test.ts b/packages/relay/src/tests/server.test.ts new file mode 100644 index 000000000..b521fe8f9 --- /dev/null +++ b/packages/relay/src/tests/server.test.ts @@ -0,0 +1,391 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import WebSocket from 'ws'; +import { startRelayServer } from '../server.js'; +import { ed25519 } from '@noble/curves/ed25519.js'; + +// Helper: connect and authenticate a client via Ed25519 signature +async function connectAndAuth(port: number, privateKey: Uint8Array): Promise { + return new Promise((resolve, reject) => { + const ws = new WebSocket(`ws://localhost:${port}`); + ws.on('message', (data: Buffer, isBinary: boolean) => { + // Skip binary frames — they are relayed DOT frames, not protocol messages + if (isBinary) return; + let msg: Record; + try { + msg = JSON.parse(data.toString()) as Record; + } catch { + return; // ignore malformed + } + if (msg['type'] === 'challenge') { + const nonce = Buffer.from(msg['nonce'] as string, 'hex'); + const pubKey = ed25519.getPublicKey(privateKey); + const sig = ed25519.sign(nonce, privateKey); + ws.send(JSON.stringify({ + type: 'auth', + pubHex: Buffer.from(pubKey).toString('hex'), + sig: Buffer.from(sig).toString('hex'), + })); + } else if (msg['type'] === 'authenticated') { + resolve(ws); + } else if (msg['type'] === 'error') { + reject(new Error(`auth error: ${JSON.stringify(msg)}`)); + } + }); + ws.on('error', reject); + }); +} + +// Helper: wait for next JSON message from a websocket +function nextMessage(ws: WebSocket): Promise> { + return new Promise((resolve, reject) => { + ws.once('message', (data: Buffer) => { + resolve(JSON.parse(data.toString()) as Record); + }); + ws.once('error', reject); + }); +} + +// Helper: wait for next binary frame from a websocket +function nextBinary(ws: WebSocket): Promise { + return new Promise((resolve, reject) => { + const onMessage = (data: Buffer, isBinary: boolean) => { + if (isBinary) { + ws.off('message', onMessage); + resolve(data); + } + }; + ws.on('message', onMessage); + ws.once('error', reject); + }); +} + +describe('relay server', () => { + let server: { close: () => void }; + const PORT = 9876; + + beforeEach(async () => { + server = await startRelayServer(PORT); + // Small delay to ensure server is fully listening + await new Promise(r => setTimeout(r, 50)); + }); + + afterEach(async () => { + server.close(); + // Allow port to be released + await new Promise(r => setTimeout(r, 50)); + }); + + it('sends challenge on connect', async () => { + const msg = await new Promise>((resolve, reject) => { + const ws = new WebSocket(`ws://localhost:${PORT}`); + ws.once('message', (data: Buffer) => { + resolve(JSON.parse(data.toString()) as Record); + ws.close(); + }); + ws.once('error', reject); + }); + expect(msg['type']).toBe('challenge'); + expect(typeof msg['nonce']).toBe('string'); + // 32 bytes = 64 hex chars + expect((msg['nonce'] as string).length).toBe(64); + }); + + it('authenticates with valid Ed25519 signature', async () => { + const privKey = ed25519.utils.randomSecretKey(); + const ws = await connectAndAuth(PORT, privKey); + expect(ws.readyState).toBe(WebSocket.OPEN); + ws.close(); + }); + + it('rejects invalid signature', async () => { + const msg = await new Promise>((resolve, reject) => { + const ws = new WebSocket(`ws://localhost:${PORT}`); + ws.once('message', () => { + // Challenge received — send wrong sig + ws.send(JSON.stringify({ + type: 'auth', + pubHex: 'a'.repeat(64), + sig: 'b'.repeat(128), + })); + ws.once('message', (d: Buffer) => { + resolve(JSON.parse(d.toString()) as Record); + ws.close(); + }); + }); + ws.once('error', reject); + }); + expect(msg['type']).toBe('error'); + expect(msg['code']).toBe('auth_failed'); + }); + + it('rejects auth message with missing fields', async () => { + const msg = await new Promise>((resolve, reject) => { + const ws = new WebSocket(`ws://localhost:${PORT}`); + ws.once('message', () => { + // Send auth missing sig field + ws.send(JSON.stringify({ type: 'auth', pubHex: 'aa'.repeat(32) })); + ws.once('message', (d: Buffer) => { + resolve(JSON.parse(d.toString()) as Record); + ws.close(); + }); + }); + ws.once('error', reject); + }); + expect(msg['type']).toBe('error'); + expect(msg['code']).toBe('auth_failed'); + }); + + it('subscribe returns subscribed confirmation', async () => { + const privKey = ed25519.utils.randomSecretKey(); + const ws = await connectAndAuth(PORT, privKey); + + const msgPromise = nextMessage(ws); + ws.send(JSON.stringify({ type: 'subscribe', circleId: 'test-circle' })); + const msg = await msgPromise; + + expect(msg['type']).toBe('subscribed'); + expect(msg['circleId']).toBe('test-circle'); + ws.close(); + }); + + it('routes binary frames between subscribers in the same circle', async () => { + const priv1 = ed25519.utils.randomSecretKey(); + const priv2 = ed25519.utils.randomSecretKey(); + const ws1 = await connectAndAuth(PORT, priv1); + const ws2 = await connectAndAuth(PORT, priv2); + + const circleId = 'route-test'; + + // Both subscribe + await Promise.all([ + new Promise(r => { ws1.once('message', () => r()); ws1.send(JSON.stringify({ type: 'subscribe', circleId })); }), + new Promise(r => { ws2.once('message', () => r()); ws2.send(JSON.stringify({ type: 'subscribe', circleId })); }), + ]); + + // Build a valid 185-byte frame: 32B circleId + 153B payload + const frame = Buffer.alloc(185); + const cidBytes = Buffer.from(circleId.padEnd(32, '\0'), 'utf8'); + cidBytes.copy(frame, 0); + frame[32] = 0xAB; // marker byte in payload + + const receivedPromise = nextBinary(ws2); + // Small yield to ensure listener is registered before sending + await new Promise(r => setTimeout(r, 10)); + ws1.send(frame); + const received = await receivedPromise; + + expect(received.length).toBe(185); + expect(received[32]).toBe(0xAB); + ws1.close(); + ws2.close(); + }); + + it('does not route frame back to sender', async () => { + const priv = ed25519.utils.randomSecretKey(); + const ws = await connectAndAuth(PORT, priv); + + await new Promise(r => { ws.once('message', () => r()); ws.send(JSON.stringify({ type: 'subscribe', circleId: 'self-test' })); }); + + const frame = Buffer.alloc(185); + Buffer.from('self-test'.padEnd(32, '\0'), 'utf8').copy(frame, 0); + ws.send(frame); + + // Should NOT receive it back — wait 200ms with no binary message + const gotMessage = await new Promise((resolve) => { + const timer = setTimeout(() => resolve(false), 200); + const onMsg = (_: Buffer, isBinary: boolean) => { + if (isBinary) { clearTimeout(timer); ws.off('message', onMsg); resolve(true); } + }; + ws.on('message', onMsg); + }); + expect(gotMessage).toBe(false); + ws.close(); + }); + + it('rejects binary frame from unauthenticated client', async () => { + const msg = await new Promise>((resolve, reject) => { + const ws = new WebSocket(`ws://localhost:${PORT}`); + ws.once('message', () => { + // Challenge received — skip auth, send binary immediately + const frame = Buffer.alloc(185); + ws.send(frame); + ws.once('message', (d: Buffer) => { + resolve(JSON.parse(d.toString()) as Record); + ws.close(); + }); + }); + ws.once('error', reject); + }); + expect(msg['type']).toBe('error'); + expect(msg['code']).toBe('not_authenticated'); + }); + + it('rejects binary frame with wrong size', async () => { + const priv = ed25519.utils.randomSecretKey(); + const ws = await connectAndAuth(PORT, priv); + + const msgPromise = nextMessage(ws); + ws.send(Buffer.alloc(100)); // wrong size — not 185 + const msg = await msgPromise; + + expect(msg['type']).toBe('error'); + expect(msg['code']).toBe('invalid_frame_size'); + ws.close(); + }); + + it('handles ping/pong', async () => { + const priv = ed25519.utils.randomSecretKey(); + const ws = await connectAndAuth(PORT, priv); + + const msgPromise = nextMessage(ws); + ws.send(JSON.stringify({ type: 'ping' })); + const msg = await msgPromise; + + expect(msg['type']).toBe('pong'); + expect(typeof msg['ts']).toBe('number'); + expect(msg['ts'] as number).toBeGreaterThan(0); + ws.close(); + }); + + it('handles unsubscribe — removes client from circle', async () => { + const priv1 = ed25519.utils.randomSecretKey(); + const priv2 = ed25519.utils.randomSecretKey(); + const ws1 = await connectAndAuth(PORT, priv1); + const ws2 = await connectAndAuth(PORT, priv2); + + const circleId = 'unsub-test'; + + // Both subscribe + await Promise.all([ + new Promise(r => { ws1.once('message', () => r()); ws1.send(JSON.stringify({ type: 'subscribe', circleId })); }), + new Promise(r => { ws2.once('message', () => r()); ws2.send(JSON.stringify({ type: 'subscribe', circleId })); }), + ]); + + // ws2 unsubscribes + ws2.send(JSON.stringify({ type: 'unsubscribe', circleId })); + await new Promise(r => setTimeout(r, 100)); + + // Send frame from ws1 — ws2 should NOT receive it + const frame = Buffer.alloc(185); + Buffer.from(circleId.padEnd(32, '\0'), 'utf8').copy(frame, 0); + ws1.send(frame); + + const gotFrame = await new Promise((resolve) => { + const timer = setTimeout(() => resolve(false), 200); + const onMsg = (_: Buffer, isBinary: boolean) => { + if (isBinary) { clearTimeout(timer); ws2.off('message', onMsg); resolve(true); } + }; + ws2.on('message', onMsg); + }); + expect(gotFrame).toBe(false); + + ws1.close(); + ws2.close(); + }); + + it('cleans up subscriptions on disconnect', async () => { + const priv1 = ed25519.utils.randomSecretKey(); + const priv2 = ed25519.utils.randomSecretKey(); + const ws1 = await connectAndAuth(PORT, priv1); + + await new Promise(r => { ws1.once('message', () => r()); ws1.send(JSON.stringify({ type: 'subscribe', circleId: 'cleanup-test' })); }); + + // ws1 disconnects + ws1.close(); + await new Promise(r => setTimeout(r, 150)); + + // Server should still work — new client can connect and authenticate + const ws2 = await connectAndAuth(PORT, priv2); + expect(ws2.readyState).toBe(WebSocket.OPEN); + ws2.close(); + }); + + it('ignores malformed JSON messages', async () => { + const priv = ed25519.utils.randomSecretKey(); + const ws = await connectAndAuth(PORT, priv); + + // Send garbage JSON — server should not crash + ws.send('this is not json {{{{'); + await new Promise(r => setTimeout(r, 100)); + + // Server still responds to ping after malformed message + const msgPromise = nextMessage(ws); + ws.send(JSON.stringify({ type: 'ping' })); + const msg = await msgPromise; + expect(msg['type']).toBe('pong'); + ws.close(); + }); + + it('ignores frame with empty circleId after null strip', async () => { + const priv = ed25519.utils.randomSecretKey(); + const ws = await connectAndAuth(PORT, priv); + + // Frame with all-zero circleId field (strips to empty string) + const frame = Buffer.alloc(185, 0); + ws.send(frame); + await new Promise(r => setTimeout(r, 100)); + + // Should not crash — server still responds to ping + const msgPromise = nextMessage(ws); + ws.send(JSON.stringify({ type: 'ping' })); + const msg = await msgPromise; + expect(msg['type']).toBe('pong'); + ws.close(); + }); + + it('multiple subscribers all receive broadcast frame', async () => { + const priv1 = ed25519.utils.randomSecretKey(); + const priv2 = ed25519.utils.randomSecretKey(); + const priv3 = ed25519.utils.randomSecretKey(); + const ws1 = await connectAndAuth(PORT, priv1); + const ws2 = await connectAndAuth(PORT, priv2); + const ws3 = await connectAndAuth(PORT, priv3); + + const circleId = 'multi-test'; + await Promise.all([ + new Promise(r => { ws1.once('message', () => r()); ws1.send(JSON.stringify({ type: 'subscribe', circleId })); }), + new Promise(r => { ws2.once('message', () => r()); ws2.send(JSON.stringify({ type: 'subscribe', circleId })); }), + new Promise(r => { ws3.once('message', () => r()); ws3.send(JSON.stringify({ type: 'subscribe', circleId })); }), + ]); + + const frame = Buffer.alloc(185); + Buffer.from(circleId.padEnd(32, '\0'), 'utf8').copy(frame, 0); + frame[33] = 0xFF; // payload marker + + // Set up listeners first, then send + const got2Promise = nextBinary(ws2); + const got3Promise = nextBinary(ws3); + ws1.send(frame); + const got2 = await got2Promise; + const got3 = await got3Promise; + + expect(got2[33]).toBe(0xFF); + expect(got3[33]).toBe(0xFF); + ws1.close(); + ws2.close(); + ws3.close(); + }); + + it('subscribe is ignored if not authenticated', async () => { + // Connect but don't auth — send subscribe directly + const gotResponse = await new Promise((resolve, reject) => { + const ws = new WebSocket(`ws://localhost:${PORT}`); + ws.once('message', () => { + // Challenge received — skip auth, send subscribe + ws.send(JSON.stringify({ type: 'subscribe', circleId: 'no-auth-circle' })); + // Wait briefly for any response + const timer = setTimeout(() => { resolve(false); ws.close(); }, 200); + ws.once('message', (d: Buffer) => { + clearTimeout(timer); + const msg = JSON.parse(d.toString()) as Record; + // Only an error or no response expected — NOT 'subscribed' + resolve(msg['type'] === 'subscribed'); + ws.close(); + }); + }); + ws.once('error', reject); + }); + // Unauthenticated subscribe should be silently ignored (no 'subscribed' response) + expect(gotResponse).toBe(false); + }); +}); diff --git a/packages/relay/src/types.ts b/packages/relay/src/types.ts new file mode 100644 index 000000000..a75614556 --- /dev/null +++ b/packages/relay/src/types.ts @@ -0,0 +1,63 @@ +// CHORUS relay protocol types +// Frame format: 32B circleId (UTF-8, null-padded) + 153B DOT = 185 bytes total + +export const CIRCLE_ID_SIZE = 32 as const; +export const DOT_SIZE = 153 as const; +export const FRAME_SIZE = 185 as const; + +export interface RelayConfig { + url: string; + reconnect?: boolean; + reconnectDelayMs?: number; + pingIntervalMs?: number; +} + +export type RelayStatus = 'disconnected' | 'connecting' | 'authenticating' | 'connected'; + +export interface IncomingFrame { + circleId: string; + dotBytes: Uint8Array; +} + +export type RelayMessage = + | { type: 'challenge'; nonce: string } + | { type: 'authenticated'; pubHex: string } + | { type: 'subscribed'; circleId: string } + | { type: 'pong'; ts: number } + | { type: 'error'; code: string; reason?: string }; + +/** Encode circleId to 32-byte null-padded buffer */ +export function encodeCircleId(circleId: string): Uint8Array { + const buf = new Uint8Array(CIRCLE_ID_SIZE); + const encoded = new TextEncoder().encode(circleId.slice(0, CIRCLE_ID_SIZE)); + buf.set(encoded); + return buf; +} + +/** Decode circleId from 32-byte buffer (strip null padding) */ +export function decodeCircleId(bytes: Uint8Array): string { + const nullIdx = bytes.indexOf(0); + return new TextDecoder().decode(nullIdx === -1 ? bytes : bytes.slice(0, nullIdx)); +} + +/** Pack a 185-byte relay frame from circleId + 153-byte DOT bytes */ +export function packFrame(circleId: string, dotBytes: Uint8Array): Uint8Array { + if (dotBytes.length !== DOT_SIZE) { + throw new Error(`DOT must be ${DOT_SIZE} bytes, got ${dotBytes.length}`); + } + const frame = new Uint8Array(FRAME_SIZE); + frame.set(encodeCircleId(circleId), 0); + frame.set(dotBytes, CIRCLE_ID_SIZE); + return frame; +} + +/** Unpack a 185-byte relay frame into circleId + dotBytes */ +export function unpackFrame(frame: Uint8Array): { circleId: string; dotBytes: Uint8Array } { + if (frame.length !== FRAME_SIZE) { + throw new Error(`Frame must be ${FRAME_SIZE} bytes, got ${frame.length}`); + } + return { + circleId: decodeCircleId(frame.slice(0, CIRCLE_ID_SIZE)), + dotBytes: frame.slice(CIRCLE_ID_SIZE), + }; +} diff --git a/packages/relay/tsconfig.json b/packages/relay/tsconfig.json new file mode 100644 index 000000000..792172fb8 --- /dev/null +++ b/packages/relay/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src"] +} diff --git a/packages/relay/tsdown.config.ts b/packages/relay/tsdown.config.ts new file mode 100644 index 000000000..b7cc1b7d7 --- /dev/null +++ b/packages/relay/tsdown.config.ts @@ -0,0 +1,11 @@ +import { defineConfig } from 'tsdown' + +export default defineConfig({ + entry: ['src/index.ts', 'src/server.ts'], + format: ['esm', 'cjs'], + dts: true, + clean: true, + outDir: 'dist', + platform: 'neutral', + sourcemap: true, +}) diff --git a/packages/sdk/.npmignore b/packages/sdk/.npmignore new file mode 100644 index 000000000..91b700e0b --- /dev/null +++ b/packages/sdk/.npmignore @@ -0,0 +1,4 @@ +src/tests/ +src/**/*.test.ts +coverage/ +*.tsbuildinfo diff --git a/packages/sdk/LICENSE b/packages/sdk/LICENSE new file mode 100644 index 000000000..d90201772 --- /dev/null +++ b/packages/sdk/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 DOT Protocol contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/sdk/README.md b/packages/sdk/README.md new file mode 100644 index 000000000..dbd8fcd10 --- /dev/null +++ b/packages/sdk/README.md @@ -0,0 +1,82 @@ +# @dotprotocol/sdk + +Everything DOT Protocol in one install. Re-exports all packages. + +[![npm](https://img.shields.io/npm/v/@dotprotocol/sdk)](https://www.npmjs.com/package/@dotprotocol/sdk) + +## Install + +```bash +npm install @dotprotocol/sdk +``` + +## Usage + +```js +import { + // Engine (high-level) + DOT, + + // Core primitives + createKeypair, createDOT, verifyDOT, checkChain, toBytes, fromBytes, + DotType, DOTFace, composeFaces, hasFace, activeFaces, + TransformRegistry, + + // Chain + scoring + buildScores, computeTier, updateElo, applyEloUpdates, computeW, + + // Relay + RelayClient, packFrame, unpackFrame, + + // Identity + loadKeypair, saveKeypair, generateDID, + + // QR + encodeBinary, decodeBinary, encodeSteganographic, selectQRSpec, + + // Arena + resolveSession, rankLeaderboard, computeEloFromMatches, + + // Compression + pack, unpack, + + // Wrapper + wrap, unwrap, +} from '@dotprotocol/sdk'; +``` + +## When to use + +- **Prototyping** — one install gets everything +- **CLI tools** — don't need minimal bundle size +- **Server-side** — Node.js apps where install size doesn't matter + +## When NOT to use + +For production apps that run in the browser, import only what you need: + +```bash +npm install @dotprotocol/core # if you only need primitives +npm install dot-protocol # if you want the high-level API +npm install @dotprotocol/qr # if you only need QR +``` + +The SDK is the kitchen sink. Smaller focused installs produce smaller bundles. + +## Included packages + +| Package | Exports | +|---|---| +| `dot-protocol` | `DOT` | +| `@dotprotocol/core` | Primitives, types, faces, transforms | +| `@dotprotocol/chain` | WorldLine, Four-Score | +| `@dotprotocol/relay` | RelayClient, frames | +| `@dotprotocol/identity` | Keypair persistence, DID | +| `@dotprotocol/compression` | pack / unpack | +| `@dotprotocol/qr` | QR encode / decode | +| `@dotprotocol/arena` | Elo, resolution | +| `@dotprotocol/wrapper` | Binary wrap / unwrap | + +## License + +MIT diff --git a/packages/sdk/package.json b/packages/sdk/package.json new file mode 100644 index 000000000..5a1760a3f --- /dev/null +++ b/packages/sdk/package.json @@ -0,0 +1,57 @@ +{ + "name": "@dotprotocol/sdk", + "version": "0.3.0", + "description": "DOT Protocol SDK \u2014 one install for signing, compression, wrapping, and identity.", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "require": "./dist/index.cjs", + "types": "./dist/index.d.ts" + } + }, + "sideEffects": false, + "scripts": { + "build": "tsdown", + "test": "vitest run", + "typecheck": "tsc --noEmit" + }, + "keywords": [ + "dot-protocol", + "sdk", + "compression", + "signing", + "wrapper" + ], + "license": "MIT", + "files": [ + "dist", + "README.md", + "LICENSE" + ], + "devDependencies": { + "@types/node": "^20.0.0" + }, + "dependencies": { + "@dotprotocol/core": "workspace:*", + "@dotprotocol/compression": "workspace:*", + "@dotprotocol/identity": "workspace:*", + "@dotprotocol/chain": "workspace:*", + "@dotprotocol/relay": "workspace:*", + "@dotprotocol/wrapper": "workspace:*", + "@dotprotocol/qr": "workspace:*", + "@dotprotocol/arena": "workspace:*" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/dot-protocol/dot.git", + "directory": "packages/sdk" + }, + "homepage": "https://github.com/dot-protocol/dot/tree/main/packages/sdk#readme", + "bugs": { + "url": "https://github.com/dot-protocol/dot/issues" + } +} diff --git a/packages/sdk/src/index.ts b/packages/sdk/src/index.ts new file mode 100644 index 000000000..5344a9be6 --- /dev/null +++ b/packages/sdk/src/index.ts @@ -0,0 +1,110 @@ +/** + * @dotprotocol/sdk + * + * Umbrella package re-exporting all @dotprotocol/* packages. + * Install this for the full DOT Protocol developer experience. + * + * @example + * import { createDOT, wrap, dotId, serializeBatchV2 } from '@dotprotocol/sdk'; + */ + +// ── Core — DOT creation, signing, verification, BLS ───────────────────────── +export * from '@dotprotocol/core'; + +// ── Compression — batch v2, zstd, rANS, predictor, Weissman ───────────────── +export * from '@dotprotocol/compression'; + +// ── Identity — keypair + genesis DOT + export/import ──────────────────────── +// (createIdentity, exportIdentity, importIdentity — no conflicts with core) +export * from '@dotprotocol/identity'; + +// ── Chain — append-only worldline + pluggable storage ─────────────────────── +// (createChain, appendDOT, getHead, getRange, verifyChain, MemoryStorage — no conflicts) +export * from '@dotprotocol/chain'; + +// ── Relay — CHORUS relay client + server ──────────────────────────────────── +// DOT_SIZE is excluded here — it conflicts with core's DOT_SIZE (both = 153). +// Use the relay-specific constants via their unique names. +export { + RelayClient, + packFrame, + unpackFrame, + encodeCircleId, + decodeCircleId, + FRAME_SIZE, + CIRCLE_ID_SIZE, +} from '@dotprotocol/relay'; +export type { + RelayConfig, + RelayStatus, + IncomingFrame, + RelayMessage, + FrameHandler, + StatusHandler, +} from '@dotprotocol/relay'; + +// ── QR — encode/decode DOTs into scannable physical objects (Falooda) ──────── +export { + encodeBinary, + decodeBinary, + encodeSteganographic, + decodeSteganographic, + encodeNested, + decodeNested, + selectQRSpec, + verifyPhysicalDOTs, + QR_CAPACITY, +} from '@dotprotocol/qr'; +export type { + QRDOTSpec, + PhysicalDOT, + QRDecodeResult, + QREncoding, + QRErrorCorrection, +} from '@dotprotocol/qr'; + +// ── Arena — Elo engine, blind evaluation, prediction resolution ────────────── +export { + updateElo, + applyEloUpdates, + computeEloFromMatches, + computeEloPercentile, + rankLeaderboard, + ELO_DEFAULT, + verifyResolution, + verifyPrediction, + resolveSession, + hashPredictionDOT, +} from '@dotprotocol/arena'; +export type { + PredictionDOT, + ResolutionDOT, + ArenaMatch, + BlindEvalSession, + LeaderboardEntry, + EloUpdate, +} from '@dotprotocol/arena'; + +// ── Wrapper — wrap/unwrap any binary payload as DOT chain ─────────────────── +// DotType is excluded here — it is already exported by core above. +export { + wrap, + unwrap, + createSession, + createSessionFromKeypair, + bridge, + bridgeFetch, + dotId, +} from '@dotprotocol/wrapper'; +export type { + WrappedChain, + UnwrappedPayload, + WrapOptions, + UnwrapOptions, + WrapSession, + Protocol, + BridgeOptions, + BridgeHandle, + DotIdentity, + IdentityOptions, +} from '@dotprotocol/wrapper'; diff --git a/packages/sdk/src/tests/sdk.test.ts b/packages/sdk/src/tests/sdk.test.ts new file mode 100644 index 000000000..282d197fb --- /dev/null +++ b/packages/sdk/src/tests/sdk.test.ts @@ -0,0 +1,87 @@ +import { describe, it, expect } from 'vitest'; +import { + // from core + createKeypair, + createDOT, + DotType, + toBytes, + fromBytes, + verifyDOT, + createBLSKeypair, + // from compression + serializeBatchV2, + deserializeBatchV2, + // from wrapper + wrap, + unwrap, + createSession, + dotId, +} from '../index.js'; + +describe('@dotprotocol/sdk — umbrella smoke tests', () => { + it('all imports are defined', () => { + expect(createKeypair).toBeDefined(); + expect(createDOT).toBeDefined(); + expect(DotType).toBeDefined(); + expect(toBytes).toBeDefined(); + expect(fromBytes).toBeDefined(); + expect(verifyDOT).toBeDefined(); + expect(createBLSKeypair).toBeDefined(); + expect(serializeBatchV2).toBeDefined(); + expect(deserializeBatchV2).toBeDefined(); + expect(wrap).toBeDefined(); + expect(unwrap).toBeDefined(); + expect(createSession).toBeDefined(); + expect(dotId).toBeDefined(); + }); + + it('createDOT + verifyDOT works', async () => { + const keypair = await createKeypair(); + const dot = await createDOT({ keypair, type: DotType.PUBLIC }); + const bytes = toBytes(dot); + expect(bytes).toHaveLength(153); + const valid = await verifyDOT(dot); + expect(valid).toBe(true); + }); + + it('wrap + unwrap round-trip', async () => { + const payload = new TextEncoder().encode('hello dot protocol'); + const session = await createSession(); + const chain = await wrap(payload, { session }); + // unwrap takes (frame, options) — pass frame bytes + blsPublicKey for verified decode + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(result.verified).toBe(true); + expect(result.data).toEqual(payload); + }); + + it('serializeBatchV2 + deserializeBatchV2 works', async () => { + const keypair = await createKeypair(); + const blsKeypair = createBLSKeypair(); + const dot1 = await createDOT({ keypair, type: DotType.PUBLIC }); + const dot2 = await createDOT({ keypair, type: DotType.PUBLIC, previous: toBytes(dot1) }); + const dotBytes = [toBytes(dot1), toBytes(dot2)]; + + const packed = await serializeBatchV2(dotBytes, blsKeypair); + const unpacked = await deserializeBatchV2(packed, blsKeypair.publicKey); + expect(unpacked).toHaveLength(2); + + // batch-v2 reconstructs chain hashes on decode — compare only the invariant fields: + // pubkey [0..31], timestamp [128..135], type [136], payload [137..152] + for (let i = 0; i < dotBytes.length; i++) { + const orig = dotBytes[i]!; + const rec = unpacked[i]!; + expect(rec).toHaveLength(153); + expect(Array.from(rec.subarray(0, 32))).toEqual(Array.from(orig.subarray(0, 32))); // pubkey + expect(Array.from(rec.subarray(128, 136))).toEqual(Array.from(orig.subarray(128, 136))); // timestamp + expect(rec[136]).toBe(orig[136]); // type + expect(Array.from(rec.subarray(137, 153))).toEqual(Array.from(orig.subarray(137, 153))); // payload + } + }); + + it('dotId() returns an identity with publicKey and did', async () => { + const identity = await dotId(); + expect(identity.publicKey).toBeInstanceOf(Uint8Array); + expect(identity.publicKey).toHaveLength(32); + expect(identity.did).toMatch(/^dot:/); + }); +}); diff --git a/packages/sdk/tsconfig.json b/packages/sdk/tsconfig.json new file mode 100644 index 000000000..f6b4a9e5b --- /dev/null +++ b/packages/sdk/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "dist", + "rootDir": "src", + "declarationDir": "dist" + }, + "include": ["src"] +} diff --git a/packages/sdk/tsdown.config.ts b/packages/sdk/tsdown.config.ts new file mode 100644 index 000000000..aa536bdcb --- /dev/null +++ b/packages/sdk/tsdown.config.ts @@ -0,0 +1,11 @@ +import { defineConfig } from 'tsdown' + +export default defineConfig({ + entry: ['src/index.ts'], + format: ['esm', 'cjs'], + dts: true, + clean: true, + outDir: 'dist', + platform: 'neutral', + sourcemap: true, +}) diff --git a/packages/sdk/vitest.config.ts b/packages/sdk/vitest.config.ts new file mode 100644 index 000000000..855963365 --- /dev/null +++ b/packages/sdk/vitest.config.ts @@ -0,0 +1,18 @@ +import { defineConfig } from 'vitest/config'; +import { resolve } from 'node:path'; + +export default defineConfig({ + resolve: { + alias: { + '@dotprotocol/core': resolve(__dirname, '../core/src/index.ts'), + '@dotprotocol/compression': resolve(__dirname, '../compression/src/index.ts'), + '@dotprotocol/identity': resolve(__dirname, '../identity/src/index.ts'), + '@dotprotocol/chain': resolve(__dirname, '../chain/src/index.ts'), + '@dotprotocol/relay': resolve(__dirname, '../relay/src/index.ts'), + '@dotprotocol/wrapper': resolve(__dirname, '../wrapper/src/index.ts'), + }, + }, + test: { + environment: 'node', + }, +}); diff --git a/packages/wrapper/.npmignore b/packages/wrapper/.npmignore new file mode 100644 index 000000000..91b700e0b --- /dev/null +++ b/packages/wrapper/.npmignore @@ -0,0 +1,4 @@ +src/tests/ +src/**/*.test.ts +coverage/ +*.tsbuildinfo diff --git a/packages/wrapper/LICENSE b/packages/wrapper/LICENSE new file mode 100644 index 000000000..d90201772 --- /dev/null +++ b/packages/wrapper/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 DOT Protocol contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/wrapper/README.md b/packages/wrapper/README.md new file mode 100644 index 000000000..391bf1790 --- /dev/null +++ b/packages/wrapper/README.md @@ -0,0 +1,100 @@ +# @dotprotocol/wrapper + +Wrap any binary data as a DOT chain. Legacy bridge for existing binary formats. + +[![npm](https://img.shields.io/npm/v/@dotprotocol/wrapper)](https://www.npmjs.com/package/@dotprotocol/wrapper) + +## Install + +```bash +npm install @dotprotocol/wrapper +``` + +## Quick start + +```js +import { wrap, unwrap } from '@dotprotocol/wrapper'; +import { createKeypair } from '@dotprotocol/core'; + +const keypair = await createKeypair(); + +// Wrap a file as a DOT chain +const fileBytes = fs.readFileSync('document.pdf'); +const chain = await wrap(keypair, fileBytes); + +// chain is DOT[] — 153-byte DOTs containing the full file +console.log(`${fileBytes.length} bytes → ${chain.length} DOTs`); + +// Reconstruct original bytes +const recovered = await unwrap(chain); +``` + +## How it works + +`wrap()` chunks input bytes into 16-byte payload slices. Each chunk becomes one DOT, cryptographically chained to the previous. The full chain is the original data plus provenance. + +``` +[file bytes] → chunks of 16 → [DOT 1] → [DOT 2] → ... → [DOT N] + ↑ ↑ ↑ + chain hash chain hash chain hash +``` + +`unwrap()` reads payloads in order, verifies every link, and reconstructs the original bytes. + +## API + +### `wrap(keypair, data, options?)` + +```js +const chain = await wrap(keypair, data, { + chunkSize: 16, // bytes per DOT payload (default 16 = max) + type: 0x00, // DOT type for all wrapped DOTs +}); +// Returns: DOT[] +``` + +### `unwrap(chain)` + +```js +const bytes = await unwrap(chain); +// Verifies all signatures and chain hashes before returning +// Throws if any DOT is invalid or chain is broken +``` + +### `wrapStream(keypair, readableStream)` + +```js +import { wrapStream } from '@dotprotocol/wrapper'; + +const dots = []; +for await (const dot of wrapStream(keypair, fs.createReadStream('big-file.bin'))) { + dots.push(dot); +} +``` + +## Use cases + +- Make existing binary files verifiable (who created it, when, has it changed?) +- Archive files with cryptographic provenance +- Stream media with tamper detection on every 16-byte chunk +- Bridge non-DOT-native data into DOT-compatible systems + +## Storage efficiency + +Each 16 bytes of data requires 153 bytes of DOT — a 9.6× overhead. For large files, store the file externally (IPFS, S3) and use a single DOT as an attestation instead: + +```js +import { createDOT } from '@dotprotocol/core'; +import { sha256 } from '@dotprotocol/core'; + +const hash = await sha256(fileBytes); +const pointer = hash.slice(0, 16); // first 16 bytes of hash +const dot = await createDOT({ keypair, payload: pointer }); +// One DOT = proof of who created the file and when +``` + +Use `wrap()` only when you need the data itself to be chain-verifiable and DOT-portable. + +## License + +MIT diff --git a/packages/wrapper/package.json b/packages/wrapper/package.json new file mode 100644 index 000000000..1fb0d7dac --- /dev/null +++ b/packages/wrapper/package.json @@ -0,0 +1,51 @@ +{ + "name": "@dotprotocol/wrapper", + "version": "0.3.0", + "description": "Wrap any binary protocol (HTTPS, WebSocket, JSON, raw bytes) as a signed, compressed DOT chain.", + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "require": "./dist/index.cjs", + "types": "./dist/index.d.ts" + } + }, + "sideEffects": false, + "scripts": { + "build": "tsdown", + "test": "vitest run", + "typecheck": "tsc --noEmit" + }, + "keywords": [ + "dot-protocol", + "wrapper", + "compression", + "signing" + ], + "license": "MIT", + "files": [ + "dist", + "README.md", + "LICENSE" + ], + "devDependencies": { + "@types/node": "^20.0.0" + }, + "dependencies": { + "@dotprotocol/core": "workspace:*", + "@dotprotocol/compression": "workspace:*", + "@noble/curves": "^2.0.1" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/dot-protocol/dot.git", + "directory": "packages/wrapper" + }, + "homepage": "https://github.com/dot-protocol/dot/tree/main/packages/wrapper#readme", + "bugs": { + "url": "https://github.com/dot-protocol/dot/issues" + } +} diff --git a/packages/wrapper/scripts/milestone-claude-api.ts b/packages/wrapper/scripts/milestone-claude-api.ts new file mode 100644 index 000000000..19178603d --- /dev/null +++ b/packages/wrapper/scripts/milestone-claude-api.ts @@ -0,0 +1,361 @@ +/** + * DOT Protocol Phase 1 Milestone — Claude API Conversation + * + * Measures: raw HTTPS bytes vs DOT chain bytes for a multi-turn + * Claude conversation. Verifies round-trip lossless invariant. + * + * Run: + * cd packages/wrapper + * npx tsx --tsconfig tsconfig.json scripts/milestone-claude-api.ts + */ + +import { wrap, unwrap, createSession } from '../src/index.js'; + +// ─── Types ──────────────────────────────────────────────────────────────────── + +interface Message { + role: 'user' | 'assistant'; + content: string; +} + +interface ConversationTurn { + request: { + model: string; + max_tokens: number; + messages: Message[]; + }; + response: { + id: string; + type: string; + role: string; + content: Array<{ type: string; text: string }>; + model: string; + stop_reason: string; + usage: { input_tokens: number; output_tokens: number }; + }; +} + +interface TurnStats { + turn: number; + label: string; + rawBytes: number; + dotBytes: number; + ratio: number; + verified: boolean; + lossless: boolean; +} + +// ─── Mock conversation (fallback when no API key) ───────────────────────────── + +const MOCK_CONVERSATION: ConversationTurn[] = [ + { + request: { + model: 'claude-sonnet-4-20250514', + max_tokens: 100, + messages: [{ role: 'user', content: 'What is the DOT Protocol?' }], + }, + response: { + id: 'msg_01', + type: 'message', + role: 'assistant', + content: [ + { + type: 'text', + text: 'The DOT Protocol is a 153-byte cryptographic observation format. Each DOT contains a public key (32B), Ed25519 signature (64B), SHA-256 chain hash (32B), timestamp (8B), type byte, and 16-byte payload. It is designed for zero-dependency, transport-agnostic attestation.', + }, + ], + model: 'claude-sonnet-4-20250514', + stop_reason: 'end_turn', + usage: { input_tokens: 15, output_tokens: 58 }, + }, + }, + { + request: { + model: 'claude-sonnet-4-20250514', + max_tokens: 150, + messages: [ + { role: 'user', content: 'What is the DOT Protocol?' }, + { + role: 'assistant', + content: + 'The DOT Protocol is a 153-byte cryptographic observation format. Each DOT contains a public key (32B), Ed25519 signature (64B), SHA-256 chain hash (32B), timestamp (8B), type byte, and 16-byte payload. It is designed for zero-dependency, transport-agnostic attestation.', + }, + { role: 'user', content: 'How does compression work in the DOT Protocol SDK?' }, + ], + }, + response: { + id: 'msg_02', + type: 'message', + role: 'assistant', + content: [ + { + type: 'text', + text: 'Compression in the DOT Protocol SDK uses batch serialization (batch-v2 format). Multiple DOTs are packed into column-oriented frames: timestamps stored as deltas, types run-length encoded, payloads concatenated. This achieves significant compression on repetitive sensor-like data streams. The BLS aggregate signature covers all DOTs in a batch, reducing per-DOT overhead from 64 bytes to a single 48-byte aggregate.', + }, + ], + model: 'claude-sonnet-4-20250514', + stop_reason: 'end_turn', + usage: { input_tokens: 87, output_tokens: 79 }, + }, + }, + { + request: { + model: 'claude-sonnet-4-20250514', + max_tokens: 200, + messages: [ + { role: 'user', content: 'What is the DOT Protocol?' }, + { + role: 'assistant', + content: + 'The DOT Protocol is a 153-byte cryptographic observation format. Each DOT contains a public key (32B), Ed25519 signature (64B), SHA-256 chain hash (32B), timestamp (8B), type byte, and 16-byte payload. It is designed for zero-dependency, transport-agnostic attestation.', + }, + { role: 'user', content: 'How does compression work in the DOT Protocol SDK?' }, + { + role: 'assistant', + content: + 'Compression in the DOT Protocol SDK uses batch serialization (batch-v2 format). Multiple DOTs are packed into column-oriented frames: timestamps stored as deltas, types run-length encoded, payloads concatenated. This achieves significant compression on repetitive sensor-like data streams. The BLS aggregate signature covers all DOTs in a batch, reducing per-DOT overhead from 64 bytes to a single 48-byte aggregate.', + }, + { + role: 'user', + content: + 'What is the Weissman Score for DOT compression and what techniques achieve it?', + }, + ], + }, + response: { + id: 'msg_03', + type: 'message', + role: 'assistant', + content: [ + { + type: 'text', + text: 'The Weissman Score for DOT compression is W=29.2 over gzip on sensor streams, achieved through three techniques: (1) column layout separating timestamp, type, and payload columns for better entropy characteristics per column; (2) timestamp delta encoding — only the difference between consecutive timestamps is stored, compressing 8-byte values to 1-2 bytes for high-frequency streams; (3) type run-length encoding — consecutive DOTs of the same type (e.g., PUBLIC) encode as a count prefix rather than repeating the byte per DOT. Together these three transforms align the data to the pattern of real-world observation streams before any dictionary or entropy coding is applied.', + }, + ], + model: 'claude-sonnet-4-20250514', + stop_reason: 'end_turn', + usage: { input_tokens: 175, output_tokens: 128 }, + }, + }, +]; + +// ─── Live API fetch ─────────────────────────────────────────────────────────── + +async function fetchClaudeConversation(apiKey: string): Promise { + const turns: ConversationTurn[] = []; + const history: Message[] = []; + const questions = [ + 'What is the DOT Protocol?', + 'How does compression work in the DOT Protocol SDK?', + 'What is the Weissman Score for DOT compression and what techniques achieve it?', + ]; + + for (const question of questions) { + history.push({ role: 'user', content: question }); + + const requestPayload = { + model: 'claude-opus-4-5', + max_tokens: 200, + messages: history, + }; + + const res = await fetch('https://api.anthropic.com/v1/messages', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-api-key': apiKey, + 'anthropic-version': '2023-06-01', + }, + body: JSON.stringify(requestPayload), + }); + + if (!res.ok) { + throw new Error(`Anthropic API error: ${res.status} ${res.statusText}`); + } + + const responsePayload = await res.json() as ConversationTurn['response']; + const assistantText = responsePayload.content[0]?.text ?? ''; + history.push({ role: 'assistant', content: assistantText }); + + turns.push({ request: requestPayload, response: responsePayload }); + } + + return turns; +} + +// ─── Formatting helpers ─────────────────────────────────────────────────────── + +function fmt(n: number): string { + return n.toLocaleString('en-US'); +} + +function ratio(raw: number, dot: number): string { + return (raw / dot).toFixed(2) + '×'; +} + +function pad(s: string, len: number): string { + return s.padEnd(len); +} + +function lpad(s: string, len: number): string { + return s.padStart(len); +} + +function bytesEqual(a: Uint8Array, b: Uint8Array): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) return false; + } + return true; +} + +// ─── Main ───────────────────────────────────────────────────────────────────── + +async function main(): Promise { + const apiKey = process.env['ANTHROPIC_API_KEY']; + const usingLive = Boolean(apiKey); + + console.log('DOT Protocol Phase 1 Milestone — Claude API Conversation'); + console.log('='.repeat(58)); + console.log(`Mode: ${usingLive ? 'LIVE (Anthropic API)' : 'MOCK (no ANTHROPIC_API_KEY found)'}`); + console.log(); + + // ── Fetch or use mock ──────────────────────────────────────────────────── + let conversation: ConversationTurn[]; + if (usingLive && apiKey) { + console.log('Fetching live conversation from Anthropic API...'); + conversation = await fetchClaudeConversation(apiKey); + console.log('Done.\n'); + } else { + conversation = MOCK_CONVERSATION; + } + + // ── Create stateful session ─────────────────────────────────────────────── + const session = await createSession(); + + // ── Process each turn ───────────────────────────────────────────────────── + const stats: TurnStats[] = []; + let allVerified = true; + let allLossless = true; + + const enc = new TextEncoder(); + const dec = new TextDecoder(); + + for (let i = 0; i < conversation.length; i++) { + const turn = conversation[i]!; + const turnNum = i + 1; + + // — Request — + const reqJson = JSON.stringify(turn.request); + const reqBytes = enc.encode(reqJson); + const rawReqBytes = reqBytes.length; + + const wrappedReq = await wrap(reqBytes, { protocol: 'json', session }); + const dotReqBytes = wrappedReq.frame.length; + + const unwrappedReq = await unwrap(wrappedReq.frame, { blsPublicKey: wrappedReq.blsPublicKey }); + const reqVerified = unwrappedReq.verified; + const reqLossless = bytesEqual(unwrappedReq.data, reqBytes); + + const reqLabel = `Turn ${turnNum} — Request`; + const userMsg = turn.request.messages.filter(m => m.role === 'user').pop(); + const userPreview = userMsg ? `"${userMsg.content.slice(0, 45)}${userMsg.content.length > 45 ? '...' : ''}"` : ''; + + console.log(`${reqLabel}`); + if (userPreview) console.log(` User: ${userPreview}`); + console.log(` Raw JSON: ${lpad(fmt(rawReqBytes) + ' B', 12)} (baseline)`); + console.log(` DOT frame: ${lpad(fmt(dotReqBytes) + ' B', 12)} (${ratio(rawReqBytes, dotReqBytes)} compression)`); + console.log(` Verified: ${reqVerified ? '✓' : '✗'} Lossless: ${reqLossless ? '✓' : '✗'}`); + console.log(); + + stats.push({ + turn: turnNum, + label: `T${turnNum}-req`, + rawBytes: rawReqBytes, + dotBytes: dotReqBytes, + ratio: rawReqBytes / dotReqBytes, + verified: reqVerified, + lossless: reqLossless, + }); + + if (!reqVerified) allVerified = false; + if (!reqLossless) allLossless = false; + + // — Response — + const resJson = JSON.stringify(turn.response); + const resBytes = enc.encode(resJson); + const rawResBytes = resBytes.length; + + const wrappedRes = await wrap(resBytes, { protocol: 'json', session }); + const dotResBytes = wrappedRes.frame.length; + + const unwrappedRes = await unwrap(wrappedRes.frame, { blsPublicKey: wrappedRes.blsPublicKey }); + const resVerified = unwrappedRes.verified; + const resLossless = bytesEqual(unwrappedRes.data, resBytes); + + // Sanity-check: reconstruct and compare JSON round-trip + const resRoundTrip = dec.decode(unwrappedRes.data); + const resJsonMatch = resRoundTrip === resJson; + + const resLabel = `Turn ${turnNum} — Response`; + const assistantText = turn.response.content[0]?.text ?? ''; + const assistantPreview = `"${assistantText.slice(0, 45)}${assistantText.length > 45 ? '...' : ''}"`; + + console.log(`${resLabel}`); + console.log(` Assistant: ${assistantPreview}`); + console.log(` Raw JSON: ${lpad(fmt(rawResBytes) + ' B', 12)} (baseline)`); + console.log(` DOT frame: ${lpad(fmt(dotResBytes) + ' B', 12)} (${ratio(rawResBytes, dotResBytes)} compression)`); + console.log(` Verified: ${resVerified ? '✓' : '✗'} Lossless: ${resLossless ? '✓' : '✗'} JSON match: ${resJsonMatch ? '✓' : '✗'}`); + console.log(); + + stats.push({ + turn: turnNum, + label: `T${turnNum}-res`, + rawBytes: rawResBytes, + dotBytes: dotResBytes, + ratio: rawResBytes / dotResBytes, + verified: resVerified, + lossless: resLossless, + }); + + if (!resVerified) allVerified = false; + if (!resLossless) allLossless = false; + } + + // ── Summary ─────────────────────────────────────────────────────────────── + const totalRaw = stats.reduce((acc, s) => acc + s.rawBytes, 0); + const totalDot = stats.reduce((acc, s) => acc + s.dotBytes, 0); + const overallRatio = totalRaw / totalDot; + + console.log('='.repeat(58)); + console.log('SUMMARY'); + console.log(` Total raw bytes: ${lpad(fmt(totalRaw) + ' B', 12)}`); + console.log(` Total DOT bytes: ${lpad(fmt(totalDot) + ' B', 12)}`); + console.log(` Overall compression: ${overallRatio.toFixed(2)}×`); + console.log(` All signatures: ${allVerified ? '✓ verified' : '✗ FAILED'}`); + console.log(` Lossless invariant: ${allLossless ? '✓ all payloads byte-identical after round-trip' : '✗ FAILED'}`); + console.log(); + + // Per-turn breakdown table + console.log(' Per-turn breakdown:'); + console.log(` ${'Label'.padEnd(10)} ${'Raw'.padStart(9)} ${'DOT'.padStart(9)} ${'Ratio'.padStart(8)}`); + console.log(` ${'-'.repeat(10)} ${'-'.repeat(9)} ${'-'.repeat(9)} ${'-'.repeat(8)}`); + for (const s of stats) { + console.log( + ` ${s.label.padEnd(10)} ${lpad(fmt(s.rawBytes) + ' B', 9)} ${lpad(fmt(s.dotBytes) + ' B', 9)} ${lpad((s.rawBytes / s.dotBytes).toFixed(2) + '×', 8)}`, + ); + } + console.log(); + + const passed = allVerified && allLossless; + console.log(`VERDICT: Phase 1 milestone ${passed ? 'PASSED' : 'FAILED'}`); + + if (!passed) { + process.exit(1); + } +} + +main().catch(err => { + console.error('Fatal:', err); + process.exit(1); +}); diff --git a/packages/wrapper/src/bridge.ts b/packages/wrapper/src/bridge.ts new file mode 100644 index 000000000..32be188b5 --- /dev/null +++ b/packages/wrapper/src/bridge.ts @@ -0,0 +1,382 @@ +// @dotprotocol/wrapper — bridge() +// TCP proxy that translates DOT chains <-> HTTP requests. +// Node.js only (net, fetch — not browser). + +import * as net from 'node:net'; +import { wrap, unwrap } from './index.js'; +import { createSession } from './session.js'; +import type { WrapSession, BridgeOptions, BridgeHandle } from './types.js'; + +// ─── Wire protocol helpers ───────────────────────────────────────────────────── + +/** + * Serialize an HTTP request into the DOT payload bytes format: + * [method_len (1B)][method][url_len (2B uint16 BE)][url][headers_len (2B uint16 BE)][headers JSON][body_len (4B uint32 BE)][body] + */ +function serializeRequest( + method: string, + url: string, + headers: Record, + body: Uint8Array, +): Uint8Array { + const enc = new TextEncoder(); + const methodBytes = enc.encode(method); + const urlBytes = enc.encode(url); + const headersBytes = enc.encode(JSON.stringify(headers)); + + const totalLen = + 1 + methodBytes.length + + 2 + urlBytes.length + + 2 + headersBytes.length + + 4 + body.length; + + const buf = new Uint8Array(totalLen); + const view = new DataView(buf.buffer); + let offset = 0; + + // method_len (1B) + buf[offset++] = methodBytes.length; + // method + buf.set(methodBytes, offset); offset += methodBytes.length; + // url_len (2B BE) + view.setUint16(offset, urlBytes.length, false); offset += 2; + // url + buf.set(urlBytes, offset); offset += urlBytes.length; + // headers_len (2B BE) + view.setUint16(offset, headersBytes.length, false); offset += 2; + // headers JSON + buf.set(headersBytes, offset); offset += headersBytes.length; + // body_len (4B BE) + view.setUint32(offset, body.length, false); offset += 4; + // body + buf.set(body, offset); + + return buf; +} + +/** + * Deserialize the HTTP request bytes (see serializeRequest for layout). + */ +function deserializeRequest(data: Uint8Array): { + method: string; + url: string; + headers: Record; + body: Uint8Array; +} { + const dec = new TextDecoder(); + const view = new DataView(data.buffer, data.byteOffset, data.byteLength); + let offset = 0; + + const methodLen = data[offset++]!; + const method = dec.decode(data.subarray(offset, offset + methodLen)); offset += methodLen; + + const urlLen = view.getUint16(offset, false); offset += 2; + const url = dec.decode(data.subarray(offset, offset + urlLen)); offset += urlLen; + + const headersLen = view.getUint16(offset, false); offset += 2; + const headersJson = dec.decode(data.subarray(offset, offset + headersLen)); offset += headersLen; + const headers: Record = JSON.parse(headersJson); + + const bodyLen = view.getUint32(offset, false); offset += 4; + const body = data.subarray(offset, offset + bodyLen); + + return { method, url, headers, body }; +} + +/** + * Serialize an HTTP response into bytes: + * [status (2B uint16 BE)][headers_len (2B uint16 BE)][headers JSON][body_len (4B uint32 BE)][body] + */ +function serializeResponse( + status: number, + headers: Record, + body: Uint8Array, +): Uint8Array { + const enc = new TextEncoder(); + const headersBytes = enc.encode(JSON.stringify(headers)); + + const totalLen = 2 + 2 + headersBytes.length + 4 + body.length; + const buf = new Uint8Array(totalLen); + const view = new DataView(buf.buffer); + let offset = 0; + + view.setUint16(offset, status, false); offset += 2; + view.setUint16(offset, headersBytes.length, false); offset += 2; + buf.set(headersBytes, offset); offset += headersBytes.length; + view.setUint32(offset, body.length, false); offset += 4; + buf.set(body, offset); + + return buf; +} + +/** + * Deserialize the HTTP response bytes (see serializeResponse for layout). + */ +function deserializeResponse(data: Uint8Array): { + status: number; + headers: Record; + body: Uint8Array; +} { + const dec = new TextDecoder(); + const view = new DataView(data.buffer, data.byteOffset, data.byteLength); + let offset = 0; + + const status = view.getUint16(offset, false); offset += 2; + + const headersLen = view.getUint16(offset, false); offset += 2; + const headersJson = dec.decode(data.subarray(offset, offset + headersLen)); offset += headersLen; + const headers: Record = JSON.parse(headersJson); + + const bodyLen = view.getUint32(offset, false); offset += 4; + const body = data.subarray(offset, offset + bodyLen); + + return { status, headers, body }; +} + +// ─── Frame I/O ──────────────────────────────────────────────────────────────── + +/** + * Read a single length-prefixed DOT frame from a socket. + * Wire: [uint32_BE frame_len][frame bytes] + * + * Accumulates all data until we have 4 (length header) + frame_len bytes, + * then resolves. Handles the case where all data arrives in one chunk + * (common on loopback). + */ +function readFrame(socket: net.Socket): Promise { + return new Promise((resolve, reject) => { + let accumulated = Buffer.alloc(0); + + const tryParse = () => { + // Need at least 4 bytes for the length prefix + if (accumulated.length < 4) return; + const frameLen = accumulated.readUInt32BE(0); + // Need 4 + frameLen bytes total + if (accumulated.length < 4 + frameLen) return; + + // Got a complete frame — resolve and clean up + socket.removeListener('data', onData); + socket.removeListener('error', onError); + socket.removeListener('close', onClose); + + resolve(new Uint8Array(accumulated.subarray(4, 4 + frameLen))); + }; + + const onData = (chunk: Buffer) => { + accumulated = Buffer.concat([accumulated, chunk]); + tryParse(); + }; + + const onError = (err: Error) => { + socket.removeListener('data', onData); + socket.removeListener('close', onClose); + reject(err); + }; + + const onClose = () => { + socket.removeListener('data', onData); + socket.removeListener('error', onError); + reject(new Error('socket closed before read complete')); + }; + + socket.on('data', onData); + socket.once('error', onError); + socket.once('close', onClose); + }); +} + +/** + * Write a length-prefixed DOT frame to a socket. + */ +function writeFrame(socket: net.Socket, frame: Uint8Array): void { + const lenBuf = Buffer.allocUnsafe(4); + lenBuf.writeUInt32BE(frame.length, 0); + socket.write(lenBuf); + socket.write(frame); +} + +// ─── bridge() ───────────────────────────────────────────────────────────────── + +/** + * Start a DOT bridge server that translates DOT chains <-> HTTP. + * + * The bridge listens for incoming TCP connections. Each connection: + * 1. Reads a length-prefixed DOT frame + * 2. Unwraps it to get the HTTP request payload + * 3. Makes the actual HTTP request (if `forward` is set), or echoes (test mode) + * 4. Wraps the response as a DOT chain + * 5. Sends back the length-prefixed DOT frame + * + * @example + * const bridge = await dot.bridge({ + * port: 8100, + * forward: 'https://api.example.com', + * }); + * bridge.close(); + * + * @example + * // Echo mode — wraps/unwraps without HTTP forwarding (for testing) + * const bridge = await dot.bridge({ port: 8100 }); + */ +export async function bridge(options?: BridgeOptions): Promise { + const port = options?.port ?? 8100; + const host = options?.host ?? '127.0.0.1'; + const forward = options?.forward; + const maxBodySize = options?.maxBodySize ?? 10 * 1024 * 1024; // 10MB + const blsPublicKey = options?.blsPublicKey; + + // Session for wrapping responses — create fresh if not provided + const session: WrapSession = options?.session ?? await createSession(); + + let requestCount = 0; + + const server = net.createServer((socket) => { + socket.on('error', () => { + // Ignore per-connection errors (client disconnects, etc.) + }); + + (async () => { + try { + // 1. Read incoming DOT frame + const inFrame = await readFrame(socket); + + // 2. Unwrap DOT chain → raw HTTP request bytes + const unwrapped = await unwrap(inFrame, blsPublicKey ? { blsPublicKey } : undefined); + const reqBytes = unwrapped.data; + + let responseBytes: Uint8Array; + + if (forward) { + // 3. Deserialize the HTTP request + const { method, url, headers, body } = deserializeRequest(reqBytes); + + // Build the full URL (base + path) + const targetUrl = forward.replace(/\/$/, '') + (url.startsWith('/') ? url : '/' + url); + + // Guard body size + if (body.length > maxBodySize) { + throw new Error(`bridge: request body too large (${body.length} > ${maxBodySize})`); + } + + // 4. Make the actual HTTP request + const fetchResponse = await fetch(targetUrl, { + method, + headers, + body: body.length > 0 ? body : undefined, + }); + + // Read response body + const respBodyBuffer = await fetchResponse.arrayBuffer(); + const respBody = new Uint8Array(respBodyBuffer); + + // Collect response headers + const respHeaders: Record = {}; + fetchResponse.headers.forEach((value, key) => { + respHeaders[key] = value; + }); + + // 5. Serialize the HTTP response + responseBytes = serializeResponse(fetchResponse.status, respHeaders, respBody); + } else { + // Echo mode: return the request bytes as-is (for testing) + responseBytes = reqBytes; + } + + // 6. Wrap response as DOT chain + const wrapped = await wrap(responseBytes, { session, protocol: 'raw' }); + + // 7. Send back length-prefixed DOT frame + writeFrame(socket, wrapped.frame); + + requestCount++; + } catch (_err) { + // On any error, close the socket cleanly + } finally { + socket.end(); + } + })(); + }); + + // Start listening + await new Promise((resolve, reject) => { + server.once('error', reject); + server.listen(port, host, () => { + server.removeListener('error', reject); + resolve(); + }); + }); + + const actualPort = (server.address() as net.AddressInfo).port; + + return { + get port() { return actualPort; }, + get requestCount() { return requestCount; }, + close(): Promise { + return new Promise((resolve, reject) => { + server.close((err) => { + if (err) reject(err); + else resolve(); + }); + }); + }, + }; +} + +// ─── bridgeFetch() ──────────────────────────────────────────────────────────── + +/** + * Send an HTTP request via a DOT bridge. + * Wraps the request as a DOT chain, sends to bridge over TCP, unwraps response. + * + * @example + * const bridge = await startBridge({ port: 8100, forward: 'https://api.example.com' }); + * const response = await bridgeFetch(bridge.port, { + * method: 'GET', + * url: '/v1/messages', + * }); + */ +export async function bridgeFetch( + port: number, + request: { + method: string; + url: string; + headers?: Record; + body?: Uint8Array; + host?: string; + }, +): Promise<{ + status: number; + headers: Record; + body: Uint8Array; +}> { + const method = request.method.toUpperCase(); + const url = request.url; + const headers = request.headers ?? {}; + const body = request.body ?? new Uint8Array(0); + + // Serialize the HTTP request into bytes + const reqBytes = serializeRequest(method, url, headers, body); + + // Wrap as DOT chain + const wrapped = await wrap(reqBytes, { protocol: 'raw' }); + + // Connect to bridge and send + const response = await new Promise((resolve, reject) => { + const socket = net.createConnection({ port, host: '127.0.0.1' }, () => { + writeFrame(socket, wrapped.frame); + }); + + socket.on('error', reject); + + readFrame(socket).then(resolve, reject); + }); + + // Unwrap response DOT chain + const unwrapped = await unwrap(response); + + // Deserialize HTTP response + const { status, headers: respHeaders, body: respBody } = deserializeResponse(unwrapped.data); + + return { status, headers: respHeaders, body: respBody }; +} + diff --git a/packages/wrapper/src/identity.ts b/packages/wrapper/src/identity.ts new file mode 100644 index 000000000..318e9a838 --- /dev/null +++ b/packages/wrapper/src/identity.ts @@ -0,0 +1,264 @@ +// @dotprotocol/wrapper — dot.id() hardware-bound identity +// +// Generates an Ed25519 keypair, encrypts the private key with AES-256-GCM +// (key derived via PBKDF2 from a passphrase), and stores it on disk. +// The private key is held in a closure and never returned directly. + +import { createHash, createCipheriv, createDecipheriv, randomBytes, pbkdf2 } from 'node:crypto'; +import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs'; +import { homedir, hostname } from 'node:os'; +import { join, dirname } from 'node:path'; + +// ─── PKCS8 / SPKI constants (matches core/src/keypair.ts) ───────────────────── + +const PKCS8_PREFIX = new Uint8Array([ + 0x30, 0x2e, 0x02, 0x01, 0x00, 0x30, 0x05, 0x06, + 0x03, 0x2b, 0x65, 0x70, 0x04, 0x22, 0x04, 0x20, +]); +const SPKI_PREFIX_LEN = 12; + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +function base64url(bytes: Uint8Array): string { + return Buffer.from(bytes).toString('base64url'); +} + +function makePkcs8(seed: Uint8Array): ArrayBuffer { + const pkcs8 = new Uint8Array(PKCS8_PREFIX.length + 32); + pkcs8.set(PKCS8_PREFIX); + pkcs8.set(seed, PKCS8_PREFIX.length); + return pkcs8.buffer as ArrayBuffer; +} + +function deriveKey(passphrase: string, salt: Buffer): Promise { + return new Promise((resolve, reject) => { + pbkdf2(passphrase, salt, 100_000, 32, 'sha256', (err, key) => { + if (err) reject(err); + else resolve(key); + }); + }); +} + +// ─── Stored key file format ──────────────────────────────────────────────────── + +interface StoredKey { + version: 1; + salt: string; // hex 32B + iv: string; // hex 12B + ciphertext: string; // hex — AES-256-GCM encrypted 32B private key seed + tag: string; // hex 16B GCM authentication tag + publicKey: string; // hex 32B +} + +// ─── Public types ───────────────────────────────────────────────────────────── + +/** + * A DOT identity bound to this device. + * Private key never leaves the secure storage. + */ +export interface DotIdentity { + /** Ed25519 public key (32B) */ + publicKey: Uint8Array; + /** DOT ID string — base64url-encoded public key, prefixed "dot:" */ + did: string; + /** + * Sign data with the private key. + * @param data - Bytes to sign + * @returns 64-byte Ed25519 signature + */ + sign(data: Uint8Array): Promise; + /** + * Verify a signature from any DOT identity. + * @param data - Original bytes + * @param signature - 64-byte Ed25519 signature + * @param publicKey - Signer's public key (defaults to this identity's key) + */ + verify(data: Uint8Array, signature: Uint8Array, publicKey?: Uint8Array): Promise; + /** + * Export the public key as a Base64URL string. + */ + export(): string; + /** + * PUF fingerprint if hardware supports it (always null in this implementation). + * Future: tie to TPM or Secure Enclave. + */ + puf: null; +} + +export interface IdentityOptions { + /** + * Storage path for the encrypted key file (Node.js only). + * Default: ~/.dot-protocol/identity.key + */ + storagePath?: string; + /** + * Passphrase for encrypting the key at rest. + * Default: uses a machine-unique derivation (hostname + username). + * For production, pass an explicit passphrase from the user. + */ + passphrase?: string; + /** + * If true, create a new identity even if one already exists. + * Default: false (reuse existing) + */ + forceNew?: boolean; +} + +// ─── Implementation ─────────────────────────────────────────────────────────── + +/** + * Get or create a DOT identity bound to this device. + * + * First call: generates an Ed25519 keypair, encrypts it with AES-256-GCM, + * stores it at the storage path. + * Subsequent calls: loads and decrypts the existing keypair. + * + * The private key is held in memory only as long as the DotIdentity object + * is referenced. It is never returned directly. + * + * @example + * const identity = await dotId(); + * console.log(identity.did); // "dot:abc123..." + * const sig = await identity.sign(new TextEncoder().encode('hello')); + * const valid = await identity.verify(new TextEncoder().encode('hello'), sig); + * // valid === true + * + * @example + * // Use a specific passphrase for portability + * const identity = await dotId({ passphrase: 'my-secret-phrase' }); + */ +export async function dotId(options?: IdentityOptions): Promise { + const storagePath = options?.storagePath + ?? join(homedir(), '.dot-protocol', 'identity.key'); + + const passphrase = options?.passphrase + ?? createHash('sha256') + .update(hostname()) + .update(process.env['USER'] ?? 'default') + .digest('hex') + .slice(0, 32); + + const forceNew = options?.forceNew ?? false; + + let privateKeySeed: Uint8Array; + let publicKeyBytes: Uint8Array; + + if (!forceNew && existsSync(storagePath)) { + // Load and decrypt existing key + const stored: StoredKey = JSON.parse(readFileSync(storagePath, 'utf8')) as StoredKey; + if (stored.version !== 1) { + throw new Error(`dot.id: unsupported key file version ${stored.version}`); + } + + const salt = Buffer.from(stored.salt, 'hex'); + const iv = Buffer.from(stored.iv, 'hex'); + const ciphertext = Buffer.from(stored.ciphertext, 'hex'); + const tag = Buffer.from(stored.tag, 'hex'); + + const aesKey = await deriveKey(passphrase, salt); + + const decipher = createDecipheriv('aes-256-gcm', aesKey, iv); + decipher.setAuthTag(tag); + const decrypted = Buffer.concat([decipher.update(ciphertext), decipher.final()]); + + privateKeySeed = new Uint8Array(decrypted); + publicKeyBytes = Buffer.from(stored.publicKey, 'hex'); + } else { + // Generate new keypair using Web Crypto (consistent with core/src/keypair.ts) + const { subtle } = globalThis.crypto; + const { privateKey: privCryptoKey, publicKey: pubCryptoKey } = + await subtle.generateKey({ name: 'Ed25519' }, true, ['sign', 'verify']); + + const pkcs8Exported = new Uint8Array(await subtle.exportKey('pkcs8', privCryptoKey)); + const spkiExported = new Uint8Array(await subtle.exportKey('spki', pubCryptoKey)); + + // Seed is the last 32 bytes of PKCS8 export (matches core/src/keypair.ts) + privateKeySeed = pkcs8Exported.slice(PKCS8_PREFIX.length); + publicKeyBytes = spkiExported.slice(SPKI_PREFIX_LEN); + + // Encrypt and persist + const salt = randomBytes(32); + const iv = randomBytes(12); + const aesKey = await deriveKey(passphrase, salt); + + const cipher = createCipheriv('aes-256-gcm', aesKey, iv); + const encrypted = Buffer.concat([ + cipher.update(Buffer.from(privateKeySeed)), + cipher.final(), + ]); + const tag = cipher.getAuthTag(); + + const stored: StoredKey = { + version: 1, + salt: salt.toString('hex'), + iv: iv.toString('hex'), + ciphertext: encrypted.toString('hex'), + tag: tag.toString('hex'), + publicKey: Buffer.from(publicKeyBytes).toString('hex'), + }; + + // Ensure directory exists + const dir = dirname(storagePath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + writeFileSync(storagePath, JSON.stringify(stored, null, 2), { mode: 0o600 }); + } + + // Import the signing key for Web Crypto (private key held in closure) + const { subtle } = globalThis.crypto; + const signingKey = await subtle.importKey( + 'pkcs8', + makePkcs8(privateKeySeed), + { name: 'Ed25519' }, + false, + ['sign'], + ); + + // Zero out seed after importing into CryptoKey object + // (CryptoKey is the non-extractable holder going forward) + privateKeySeed.fill(0); + + // Always use a clean Uint8Array (not a Buffer pool view) for consistent ArrayBuffer ownership + const pubKeySnapshot = new Uint8Array(publicKeyBytes); + + const identity: DotIdentity = { + publicKey: pubKeySnapshot, + did: `dot:${base64url(pubKeySnapshot)}`, + puf: null, + + async sign(data: Uint8Array): Promise { + const sig = await subtle.sign({ name: 'Ed25519' }, signingKey, data); + return new Uint8Array(sig); + }, + + async verify( + data: Uint8Array, + signature: Uint8Array, + publicKey?: Uint8Array, + ): Promise { + try { + const keyBytes = publicKey ?? pubKeySnapshot; + // Always create a fresh ArrayBuffer copy — Node.js Buffer pool means + // .buffer may be a large shared pool, not the 32-byte key alone. + const raw = new Uint8Array(keyBytes).buffer as ArrayBuffer; + const verifyKey = await subtle.importKey( + 'raw', + raw, + { name: 'Ed25519' }, + false, + ['verify'], + ); + return subtle.verify({ name: 'Ed25519' }, verifyKey, signature, data); + } catch { + return false; + } + }, + + export(): string { + return base64url(pubKeySnapshot); + }, + }; + + return identity; +} diff --git a/packages/wrapper/src/index.ts b/packages/wrapper/src/index.ts new file mode 100644 index 000000000..22d1d525b --- /dev/null +++ b/packages/wrapper/src/index.ts @@ -0,0 +1,19 @@ +// @dotprotocol/wrapper — public API + +export { wrap } from './wrap.js'; +export { unwrap } from './unwrap.js'; +export { createSession, createSessionFromKeypair } from './session.js'; +export { bridge, bridgeFetch } from './bridge.js'; +export { dotId } from './identity.js'; +export type { + WrappedChain, + UnwrappedPayload, + WrapOptions, + UnwrapOptions, + WrapSession, + Protocol, + BridgeOptions, + BridgeHandle, +} from './types.js'; +export type { DotIdentity, IdentityOptions } from './identity.js'; +export { DotType } from '@dotprotocol/core'; diff --git a/packages/wrapper/src/session.ts b/packages/wrapper/src/session.ts new file mode 100644 index 000000000..6ee419540 --- /dev/null +++ b/packages/wrapper/src/session.ts @@ -0,0 +1,44 @@ +// @dotprotocol/wrapper — session management + +import { createKeypair, createBLSKeypair } from '@dotprotocol/core'; +import type { WrapSession } from './types.js'; + +/** + * Create a new wrap session with fresh Ed25519 + BLS keypairs. + * + * A session maintains chain continuity and predictor context + * across multiple wrap() calls for better compression. + * + * @example + * const session = await createSession(); + * const chain1 = await wrap(payload1, { session }); + * const chain2 = await wrap(payload2, { session }); // chains from chain1 + */ +export async function createSession(): Promise { + const keypair = await createKeypair(); + const blsKeypair = createBLSKeypair(); + return { + keypair, + blsKeypair, + dots: [], + lastDot: undefined, + baseTimestamp: BigInt(Date.now()), + }; +} + +/** + * Create a session from an existing Ed25519 keypair. + * Use when you want wrap() to use a specific DOT identity. + */ +export async function createSessionFromKeypair( + keypair: { publicKey: Uint8Array; privateKey: Uint8Array }, +): Promise { + const blsKeypair = createBLSKeypair(); + return { + keypair, + blsKeypair, + dots: [], + lastDot: undefined, + baseTimestamp: BigInt(Date.now()), + }; +} diff --git a/packages/wrapper/src/tests/bridge.test.ts b/packages/wrapper/src/tests/bridge.test.ts new file mode 100644 index 000000000..a48f6abc2 --- /dev/null +++ b/packages/wrapper/src/tests/bridge.test.ts @@ -0,0 +1,207 @@ +import { describe, it, expect, afterEach } from 'vitest'; +import * as http from 'node:http'; +import * as net from 'node:net'; +import { bridge, bridgeFetch } from '../index.js'; +import type { BridgeHandle } from '../index.js'; + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +function startEchoServer(): Promise<{ server: http.Server; url: string }> { + return new Promise((resolve) => { + const server = http.createServer((req, res) => { + let body = Buffer.alloc(0); + req.on('data', (chunk: Buffer) => { body = Buffer.concat([body, chunk]); }); + req.on('end', () => { + res.writeHead(200, { 'content-type': 'application/json' }); + res.end(JSON.stringify({ method: req.method, path: req.url, body: body.toString() })); + }); + }); + server.listen(0, '127.0.0.1', () => { + const addr = server.address() as net.AddressInfo; + resolve({ server, url: `http://127.0.0.1:${addr.port}` }); + }); + }); +} + +function closeServer(server: http.Server): Promise { + return new Promise((resolve, reject) => { + server.close((err) => { + if (err) reject(err); + else resolve(); + }); + }); +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe('bridge — lifecycle', () => { + let handle: BridgeHandle | null = null; + + afterEach(async () => { + if (handle) { + await handle.close(); + handle = null; + } + }); + + it('starts and stops cleanly', async () => { + handle = await bridge({ port: 0 }); + expect(handle.port).toBeGreaterThan(0); + expect(handle.requestCount).toBe(0); + await handle.close(); + handle = null; + }); + + it('port 0 gets an auto-assigned port', async () => { + handle = await bridge({ port: 0 }); + expect(handle.port).toBeGreaterThan(1024); + }); +}); + +describe('bridge — echo mode (no forward)', () => { + let handle: BridgeHandle | null = null; + + afterEach(async () => { + if (handle) { + await handle.close(); + handle = null; + } + }); + + it('wraps/unwraps a round-trip without HTTP forwarding', async () => { + handle = await bridge({ port: 0 }); + + // In echo mode the bridge returns the unwrapped request bytes as-is. + // bridgeFetch serializes a request then deserializes what comes back. + // Since there's no forward target, the bridge echoes the serialized + // request bytes directly — which won't parse as a response. + // Instead, test the TCP layer directly: send a DOT chain, get it back. + const { wrap, unwrap, createSession } = await import('../index.js'); + + const session = await createSession(); + const payload = new TextEncoder().encode('echo test payload'); + const wrapped = await wrap(payload, { session, protocol: 'raw' }); + + // Send the DOT frame to the bridge over a raw TCP socket + const response = await new Promise((resolve, reject) => { + const socket = net.createConnection({ port: handle!.port, host: '127.0.0.1' }, () => { + // Write length-prefixed frame + const lenBuf = Buffer.allocUnsafe(4); + lenBuf.writeUInt32BE(wrapped.frame.length, 0); + socket.write(lenBuf); + socket.write(wrapped.frame); + }); + + socket.on('error', reject); + + // Read length-prefixed response frame + let received = Buffer.alloc(0); + socket.on('data', (chunk: Buffer) => { + received = Buffer.concat([received, chunk]); + if (received.length >= 4) { + const frameLen = received.readUInt32BE(0); + if (received.length >= 4 + frameLen) { + resolve(new Uint8Array(received.subarray(4, 4 + frameLen))); + } + } + }); + }); + + // Unwrap the response — it should contain the original payload (echo mode) + const result = await unwrap(response); + expect(result.data).toEqual(payload); + }); +}); + +describe('bridge — HTTP forwarding', () => { + let bridgeHandle: BridgeHandle | null = null; + let echoServer: http.Server | null = null; + + afterEach(async () => { + if (bridgeHandle) { + await bridgeHandle.close(); + bridgeHandle = null; + } + if (echoServer) { + await closeServer(echoServer); + echoServer = null; + } + }); + + it('forwards a GET request and returns the response', async () => { + const { server, url } = await startEchoServer(); + echoServer = server; + + bridgeHandle = await bridge({ port: 0, forward: url }); + + const resp = await bridgeFetch(bridgeHandle.port, { + method: 'GET', + url: '/hello', + }); + + expect(resp.status).toBe(200); + const body = JSON.parse(new TextDecoder().decode(resp.body)); + expect(body.method).toBe('GET'); + expect(body.path).toBe('/hello'); + }); + + it('forwards a POST request with a body', async () => { + const { server, url } = await startEchoServer(); + echoServer = server; + + bridgeHandle = await bridge({ port: 0, forward: url }); + + const reqBody = new TextEncoder().encode('{"key":"value"}'); + const resp = await bridgeFetch(bridgeHandle.port, { + method: 'POST', + url: '/api/data', + headers: { 'content-type': 'application/json' }, + body: reqBody, + }); + + expect(resp.status).toBe(200); + const body = JSON.parse(new TextDecoder().decode(resp.body)); + expect(body.method).toBe('POST'); + expect(body.path).toBe('/api/data'); + expect(body.body).toBe('{"key":"value"}'); + }); +}); + +describe('bridge — large payload', () => { + let bridgeHandle: BridgeHandle | null = null; + let echoServer: http.Server | null = null; + + afterEach(async () => { + if (bridgeHandle) { + await bridgeHandle.close(); + bridgeHandle = null; + } + if (echoServer) { + await closeServer(echoServer); + echoServer = null; + } + }); + + it('handles a 5KB request body round-trip', async () => { + const { server, url } = await startEchoServer(); + echoServer = server; + + bridgeHandle = await bridge({ port: 0, forward: url }); + + // 5KB of repeating ASCII data + const fiveKb = new Uint8Array(5120); + for (let i = 0; i < fiveKb.length; i++) fiveKb[i] = 65 + (i % 26); // A-Z repeating + + const resp = await bridgeFetch(bridgeHandle.port, { + method: 'POST', + url: '/large', + headers: { 'content-type': 'application/octet-stream' }, + body: fiveKb, + }); + + expect(resp.status).toBe(200); + const parsed = JSON.parse(new TextDecoder().decode(resp.body)); + // The echo server returns the body as a string + expect(parsed.body.length).toBe(5120); + }, 15000); +}); diff --git a/packages/wrapper/src/tests/coverage-gaps.test.ts b/packages/wrapper/src/tests/coverage-gaps.test.ts new file mode 100644 index 000000000..002830429 --- /dev/null +++ b/packages/wrapper/src/tests/coverage-gaps.test.ts @@ -0,0 +1,546 @@ +/** + * coverage-gaps.test.ts — Wrapper package coverage gap filler + * + * Covers: + * - session.ts lines 33-44: createSessionFromKeypair + * - identity.ts lines 150-151: unsupported key file version + * - identity.ts lines 254-255: verify() catch → false (invalid key bytes) + * - unwrap.ts lines 142-244: decodeFrameWithoutBLSVerification (no blsPublicKey) + * - unwrap.ts lines 250-251: payload size mismatch + * - bridge.ts lines 257-259: maxBodySize guard + * - bridge.ts lines 170-180: socket close-before-read path + */ + +import { describe, it, expect, afterEach } from 'vitest'; +import * as net from 'node:net'; +import * as http from 'node:http'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { writeFileSync, rmSync, existsSync } from 'node:fs'; +import { createKeypair } from '@dotprotocol/core'; + +// ─── session.ts: createSessionFromKeypair (lines 33-44) ────────────────────── + +import { createSession, createSessionFromKeypair } from '../session.js'; +import { wrap, unwrap } from '../index.js'; +import { readFramePubkey } from '../unwrap.js'; + +describe('readFramePubkey (lines 29-30)', () => { + it('extracts the Ed25519 public key from a wrapped frame', async () => { + const chain = await wrap(new TextEncoder().encode('pubkey test'), { protocol: 'raw' }); + const pubkey = readFramePubkey(chain.frame); + expect(pubkey).toBeInstanceOf(Uint8Array); + expect(pubkey.length).toBe(32); + }); +}); + +describe('createSessionFromKeypair', () => { + it('creates a session using the provided keypair', async () => { + const keypair = await createKeypair(); + const session = await createSessionFromKeypair(keypair); + expect(session.keypair.publicKey).toEqual(keypair.publicKey); + expect(session.keypair.privateKey).toEqual(keypair.privateKey); + expect(session.dots).toHaveLength(0); + expect(session.lastDot).toBeUndefined(); + expect(typeof session.baseTimestamp).toBe('bigint'); + }); + + it('wrap/unwrap round-trip using a session from createSessionFromKeypair', async () => { + const keypair = await createKeypair(); + const session = await createSessionFromKeypair(keypair); + const data = new TextEncoder().encode('hello from existing keypair'); + const chain = await wrap(data, { session, protocol: 'raw' }); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(result.verified).toBe(true); + expect(result.data).toEqual(data); + }); +}); + +// ─── identity.ts: unsupported key version (lines 150-151) ──────────────────── + +import { dotId } from '../identity.js'; + +describe('dotId() — unsupported version in stored key', () => { + let testPath: string; + + afterEach(() => { + if (testPath && existsSync(testPath)) rmSync(testPath, { force: true }); + }); + + it('throws when stored key file has unsupported version', async () => { + testPath = join(tmpdir(), `dot-id-v-test-${Date.now()}.key`); + + // Create a valid-looking key file but with version=2 (unsupported) + const badStored = { + version: 2, + salt: '00'.repeat(32), + iv: '00'.repeat(12), + ciphertext: '00'.repeat(32), + tag: '00'.repeat(16), + publicKey: '00'.repeat(32), + }; + writeFileSync(testPath, JSON.stringify(badStored), { mode: 0o600 }); + + await expect(dotId({ storagePath: testPath, passphrase: 'test' })).rejects.toThrow( + 'unsupported key file version' + ); + }); +}); + +describe('dotId() — default storagePath and passphrase (lines 132, 135-139)', () => { + const defaultKeyPath = join(process.env['HOME'] ?? '/tmp', '.dot-protocol', 'identity.key'); + + afterEach(() => { + // Clean up the default key if it was created by this test + if (existsSync(defaultKeyPath)) rmSync(defaultKeyPath, { force: true }); + }); + + it('creates identity without storagePath option (uses default ~/.dot-protocol/identity.key)', async () => { + // Call dotId with NO options → hits line 132 (default storagePath) AND lines 135-139 (default passphrase) + const identity = await dotId(); // no options at all + expect(identity.publicKey).toBeInstanceOf(Uint8Array); + expect(identity.publicKey.length).toBe(32); + expect(identity.did).toMatch(/^dot:/); + }); + + it('creates identity with USER env unset — exercises ?? "default" branch (line 137)', async () => { + // Temporarily remove USER env var so process.env['USER'] ?? 'default' uses 'default' + const savedUser = process.env['USER']; + delete process.env['USER']; + try { + const tmpPath = join(tmpdir(), `dot-id-no-user-${Date.now()}.key`); + const identity = await dotId({ storagePath: tmpPath }); + expect(identity.publicKey).toBeInstanceOf(Uint8Array); + if (existsSync(tmpPath)) rmSync(tmpPath, { force: true }); + } finally { + if (savedUser !== undefined) process.env['USER'] = savedUser; + } + }); +}); + +describe('dotId() — verify() catch path (invalid signature bytes)', () => { + let testPath: string; + + afterEach(() => { + if (testPath && existsSync(testPath)) rmSync(testPath, { force: true }); + }); + + it('verify() returns false when signature has invalid length (catch → false)', async () => { + testPath = join(tmpdir(), `dot-id-catch-${Date.now()}.key`); + const identity = await dotId({ storagePath: testPath, passphrase: 'test-catch' }); + + const data = new TextEncoder().encode('test data'); + // Provide a malformed signature (not 64 bytes, bad content) to trigger the catch + const badSig = new Uint8Array(10).fill(0xde); // clearly invalid + const result = await identity.verify(data, badSig); + expect(result).toBe(false); + }); + + it('verify() returns false when publicKey bytes are invalid (importKey throws → catch → false)', async () => { + testPath = join(tmpdir(), `dot-id-catch2-${Date.now()}.key`); + const identity = await dotId({ storagePath: testPath, passphrase: 'test-catch2' }); + + const data = new TextEncoder().encode('test data'); + const validSig = new Uint8Array(64).fill(0); // 64-byte sig (valid length) + // Pass a 3-byte publicKey — importKey will throw DataError (must be 32 bytes for Ed25519) + const badPublicKey = new Uint8Array(3).fill(0xab); + const result = await identity.verify(data, validSig, badPublicKey); + expect(result).toBe(false); + }); +}); + +// ─── unwrap.ts: decodeFrameWithoutBLSVerification (lines 142-244) ───────────── +// When no blsPublicKey is provided, unwrap() calls decodeFrameWithoutBLSVerification. + +describe('unwrap() — no blsPublicKey (decodeFrameWithoutBLSVerification)', () => { + it('decodes frame without BLS verification (verified=false)', async () => { + const data = new TextEncoder().encode('test payload for unverified decode'); + const chain = await wrap(data, { protocol: 'raw' }); + + // Call unwrap WITHOUT blsPublicKey → uses internal manual decoder + const result = await unwrap(chain.frame); + expect(result.verified).toBe(false); + expect(result.data).toEqual(data); + expect(result.protocol).toBe('raw'); + }); + + it('handles raw bytes through unverified path', async () => { + const data = new Uint8Array(32).fill(0xab); + const chain = await wrap(data, { protocol: 'raw' }); + + const result = await unwrap(chain.frame); + expect(result.verified).toBe(false); + expect(result.data).toEqual(data); + }); + + it('handles empty payload through unverified path', async () => { + const data = new Uint8Array(0); + const chain = await wrap(data, { protocol: 'raw' }); + + const result = await unwrap(chain.frame); + expect(result.verified).toBe(false); + expect(result.dotCount).toBeGreaterThan(0); + }); + + it('throws when frame is too short (< 86 bytes)', async () => { + const shortFrame = new Uint8Array(50); + await expect(unwrap(shortFrame)).rejects.toThrow('too short'); + }); + + it('throws when dot_count is 0 in frame (unverified path)', async () => { + const frame = new Uint8Array(86); + frame[0] = 0x03; // version + frame[1] = 0x00; // flags + // dot_count at [2..5] = 0 + await expect(unwrap(frame)).rejects.toThrow('dot_count is 0'); + }); + + it('throws on dict-compressed frame without blsPublicKey', async () => { + // Craft a frame with FLAG_DICT_COMPRESSED (bit 3) set + const frame = new Uint8Array(86); + frame[0] = 0x03; // version + frame[1] = 0x08; // FLAG_DICT_COMPRESSED + const view = new DataView(frame.buffer); + view.setUint32(2, 1, true); // dot_count = 1 + await expect(unwrap(frame)).rejects.toThrow('dictionary-compressed'); + }); + + it('throws on prediction-coded frame without blsPublicKey', async () => { + // Craft a frame with FLAG_PREDICTION (bit 4) set + const frame = new Uint8Array(86); + frame[0] = 0x03; // version + frame[1] = 0x10; // FLAG_PREDICTION + const view = new DataView(frame.buffer); + view.setUint32(2, 1, true); // dot_count = 1 + await expect(unwrap(frame)).rejects.toThrow('prediction-coded'); + }); + + it('delta+RLE frame decodes without blsPublicKey (default wrap flags)', async () => { + // Default wrap() uses delta+RLE — unwrap without key tests those paths + const data = new TextEncoder().encode('raw mode'); + const chain = await wrap(data, { protocol: 'raw' }); + + // Unwrap without key — the existing frame uses delta+RLE by default + const result = await unwrap(chain.frame); // no blsPublicKey + expect(result.verified).toBe(false); + expect(result.data).toEqual(data); + }); + + it('no-delta no-RLE frame decodes without blsPublicKey (raw ts + raw types path)', async () => { + // Build a frame with timestampDelta=false, payloadTypeRLE=false + // so decodeFrameWithoutBLSVerification hits the else branches + const { serializeBatchV2 } = await import('@dotprotocol/compression'); + const { createBLSKeypair, createDOT, toBytes, DotType } = await import('@dotprotocol/core'); + const keypair = await createKeypair(); + const blsKeypair = createBLSKeypair(); + + const dot = await createDOT({ + keypair, + type: DotType.PUBLIC, + ts: 1_700_000_000_000, + }); + const dotBytes = toBytes(dot); + + // Serialize with both flags disabled → raw timestamps, raw types + const frame = await serializeBatchV2([dotBytes], blsKeypair, { + timestampDelta: false, + payloadTypeRLE: false, + }); + + // Unwrap the raw batch-v2 frame directly (not a wrap() frame) + // This exercises the raw ts path (hasTsDelta=false) and raw types (hasTypeRLE=false) + // The payload in this frame is a DOT payload, not a wrap() header — so it will fail + // to parse as a wrap header, but decodeFrameWithoutBLSVerification IS called + try { + await unwrap(frame); // may throw on header parse — that's fine + } catch { + // Expected — the frame payload bytes are DOT data, not wrap() format + } + // Just verify the function was invoked (coverage shows the else branches were hit) + // We can verify the raw paths worked by checking a valid case too + }); + + it('buffer too short for RLE types + payloads triggers RangeError (line 236-237)', async () => { + // Craft a frame with FLAG_TS_DELTA + FLAG_TYPE_RLE set, dot_count=1. + // With ts-delta: decoder reads 8-byte anchor, tsColumnSize=8, tsEnd=8. + // rleEnd = body.length - 1*16. + // With body=10 bytes: rleEnd = 10 - 16 = -6, which is <= tsEnd=8 → throws at line 236. + const frame = new Uint8Array(96); // 86 header + 10 body bytes + frame[0] = 0x03; // version + frame[1] = 0b00000011; // FLAG_TS_DELTA (bit0) | FLAG_TYPE_RLE (bit1) + const view = new DataView(frame.buffer); + view.setUint32(2, 1, true); // dot_count = 1 + // body bytes [86..95]: 8 bytes for ts anchor (big-endian uint64), 2 extra bytes + // The 8-byte anchor is all zeros = timestamp 0 (valid) + await expect(unwrap(frame)).rejects.toThrow(RangeError); + }); + + it('buffer too short for payloads triggers RangeError (line 250-251)', async () => { + // Build a valid frame with raw types (no RLE) but not enough payload bytes + // Use a no-delta, no-RLE frame with 1 dot but no payload bytes after types + const { serializeBatchV2 } = await import('@dotprotocol/compression'); + const { createBLSKeypair, createDOT, toBytes, DotType } = await import('@dotprotocol/core'); + const keypair = await createKeypair(); + const blsKeypair = createBLSKeypair(); + + const dot = await createDOT({ keypair, type: DotType.PUBLIC, ts: 1_700_000_000_000 }); + const dotBytes = toBytes(dot); + const frame = await serializeBatchV2([dotBytes], blsKeypair, { + timestampDelta: false, + payloadTypeRLE: false, + }); + + // Truncate the frame so it's missing the payload bytes (last 16 bytes) + const truncated = frame.slice(0, frame.length - 16); + + // This should trigger "buffer too short for payloads" + await expect(unwrap(truncated)).rejects.toThrow(); + }, 15_000); + + it('multi-DOT chain (N>1) exercises sha256 chaining in unverified decode', async () => { + // A larger payload produces multiple DOTs, exercising the sha256(prevDot) code path + // in decodeFrameWithoutBLSVerification (lines 160-162) + const data = new Uint8Array(48).fill(0xcd); // forces 3+ DOTs (48/16 = 3 chunks + header) + const chain = await wrap(data, { protocol: 'raw' }); + expect(chain.chunkCount).toBeGreaterThan(1); + + const result = await unwrap(chain.frame); // unverified path + expect(result.verified).toBe(false); + expect(result.dotCount).toBeGreaterThan(1); + expect(result.data).toEqual(data); + }); + + it('throws when assembled originalLength exceeds assembled data (lines 128-131)', async () => { + // Create a DOT whose payload header claims originalLength = 0xFFFFFFFF (way too large). + // When unwrapped via the unverified path, decodeFrameWithoutBLSVerification returns the DOT, + // assembled = 16 bytes, then originalLength check fires: dataEnd > 16 → RangeError. + const { serializeBatchV2, encodeTimestampDeltas, encodePayloadTypes } = await import('@dotprotocol/compression'); + const { createBLSKeypair, createDOT, toBytes, DotType } = await import('@dotprotocol/core'); + const keypair = await createKeypair(); + const blsKeypair = createBLSKeypair(); + + // Build payload: [protocol_id=0x00][originalLength=0xFFFFFFFF BE][zeros×11] + const badPayload = new Uint8Array(16); + badPayload[0] = 0x00; // raw protocol + const pv = new DataView(badPayload.buffer); + pv.setUint32(1, 0xFFFFFF00, false); // originalLength = huge (> 11 available bytes) + + const dot = await createDOT({ + keypair, + payload: badPayload, + type: DotType.PUBLIC, + ts: 1_700_000_000_000, + }); + const dotBytes = toBytes(dot); + const frame = await serializeBatchV2([dotBytes], blsKeypair); + + // Unwrap without BLS key → unverified path → assembles 16 bytes → originalLength check fails + await expect(unwrap(frame)).rejects.toThrow(RangeError); + }, 15_000); +}); + +// ─── bridge.ts: maxBodySize guard (lines 257-259) ───────────────────────────── + +import { bridge, bridgeFetch } from '../index.js'; +import type { BridgeHandle } from '../index.js'; + +function startEchoServer(): Promise<{ server: http.Server; url: string }> { + return new Promise((resolve) => { + const server = http.createServer((req, res) => { + let body = Buffer.alloc(0); + req.on('data', (chunk: Buffer) => { body = Buffer.concat([body, chunk]); }); + req.on('end', () => { + res.writeHead(200, { 'content-type': 'application/json' }); + res.end(JSON.stringify({ method: req.method, path: req.url, body: body.toString() })); + }); + }); + server.listen(0, '127.0.0.1', () => { + const addr = server.address() as net.AddressInfo; + resolve({ server, url: `http://127.0.0.1:${addr.port}` }); + }); + }); +} + +function closeServer(server: http.Server): Promise { + return new Promise((resolve, reject) => { + server.close((err) => { if (err) reject(err); else resolve(); }); + }); +} + +describe('bridge.ts — additional branch coverage', () => { + let bridgeHandle: BridgeHandle | null = null; + let echoServer: http.Server | null = null; + + afterEach(async () => { + if (bridgeHandle) { await bridgeHandle.close(); bridgeHandle = null; } + if (echoServer) { await closeServer(echoServer); echoServer = null; } + }); + + it('bridge with blsPublicKey option exercises line 244 truthy branch', async () => { + // Pass a BLS public key so the `blsPublicKey ? { blsPublicKey } : undefined` branch + // evaluates to the truthy side (line 244). + // We use a 48-byte dummy key — unwrap will reject it, but the branch IS exercised. + const dummyBLSKey = new Uint8Array(48).fill(0x01); + bridgeHandle = await bridge({ port: 0, blsPublicKey: dummyBLSKey }); + + // Connect and send a real wrapped frame — bridge will try to verify with wrong key + // and fail, closing the connection. That's OK — branch coverage is the goal. + const { wrap: wrapFn } = await import('../index.js'); + const payload = new TextEncoder().encode('bls branch test'); + const chain = await wrapFn(payload, { protocol: 'raw' }); + + // Send the frame — bridge will fail BLS verify and close socket + const result = await new Promise((resolve) => { + const socket = net.createConnection({ port: bridgeHandle!.port, host: '127.0.0.1' }, () => { + const lenBuf = Buffer.allocUnsafe(4); + lenBuf.writeUInt32BE(chain.frame.length, 0); + socket.write(lenBuf); + socket.write(chain.frame); + }); + socket.on('close', () => resolve(true)); + socket.on('error', () => resolve(true)); + setTimeout(() => resolve(true), 2000); + }); + expect(result).toBe(true); + // Bridge still running + expect(bridgeHandle!.port).toBeGreaterThan(0); + }, 10_000); + + it('bridgeFetch with URL not starting with "/" exercises line 254 false branch', async () => { + const { server, url } = await startEchoServer(); + echoServer = server; + bridgeHandle = await bridge({ port: 0, forward: url }); + + // Pass a URL without leading slash — exercises the `url.startsWith('/') ? ... : '/' + url` false branch + const resp = await bridgeFetch(bridgeHandle.port, { + method: 'GET', + url: 'no-leading-slash', + }); + expect(resp.status).toBe(200); + }, 10_000); + + it('sends < 4 bytes to trigger tryParse early-return (line 152)', async () => { + bridgeHandle = await bridge({ port: 0 }); + + // Connect and send only 2 bytes — tryParse returns early, then socket closes + await new Promise((resolve) => { + const socket = net.createConnection({ port: bridgeHandle!.port, host: '127.0.0.1' }, () => { + socket.write(Buffer.from([0x00, 0x01])); // 2 bytes — < 4 needed + setTimeout(() => socket.destroy(), 50); + }); + socket.on('close', () => resolve()); + socket.on('error', () => resolve()); + }); + + await new Promise(r => setTimeout(r, 50)); + expect(bridgeHandle!.port).toBeGreaterThan(0); + }, 10_000); + + it('bridge with no port option uses default port 8100 (line 222 ?? branch)', async () => { + // Calling bridge with no port exercises `options?.port ?? 8100` + // We must check if 8100 is available; skip if not + try { + bridgeHandle = await bridge({ host: '127.0.0.1' }); // no port → defaults to 8100 + expect(bridgeHandle.port).toBe(8100); + } catch { + // Port 8100 may already be in use in CI — that's fine, skip gracefully + } + }, 10_000); +}); + +describe('bridge.ts — maxBodySize guard', () => { + let bridgeHandle: BridgeHandle | null = null; + let echoServer: http.Server | null = null; + + afterEach(async () => { + if (bridgeHandle) { await bridgeHandle.close(); bridgeHandle = null; } + if (echoServer) { await closeServer(echoServer); echoServer = null; } + }); + + it('request body exceeding maxBodySize causes connection to close (no response)', async () => { + const { server, url } = await startEchoServer(); + echoServer = server; + + // Set maxBodySize to 100 bytes to easily trigger the guard + bridgeHandle = await bridge({ port: 0, forward: url, maxBodySize: 100 }); + + // Send a 200-byte body — exceeds 100-byte limit + const bigBody = new Uint8Array(200).fill(0x41); // 'A' × 200 + + // The bridge will throw internally and close the socket + // bridgeFetch will receive no response → it should throw or return an error + await expect(bridgeFetch(bridgeHandle.port, { + method: 'POST', + url: '/test', + headers: { 'content-type': 'application/octet-stream' }, + body: bigBody, + })).rejects.toThrow(); // socket closed without sending response + }, 15_000); +}); + +// ─── bridge.ts: socket error during read (onError path, lines 171-174) ──────── + +describe('bridge.ts — socket error during read (onError path in readFrame)', () => { + let errorTestHandle: BridgeHandle | null = null; + + afterEach(async () => { + if (errorTestHandle) { await errorTestHandle.close(); errorTestHandle = null; } + }); + + it('bridge handles socket error during frame read gracefully (no crash)', async () => { + errorTestHandle = await bridge({ port: 0 }); + + // Connect a raw socket, write the 4-byte length prefix claiming a large frame, + // then send RST (resetAndDestroy) to cause ECONNRESET on the server socket. + // This triggers the onError path in readFrame. + await new Promise((resolve) => { + const socket = net.createConnection({ port: errorTestHandle!.port, host: '127.0.0.1' }, () => { + // Write a 4-byte length prefix claiming 1000 bytes — puts bridge in "reading" state + const lenBuf = Buffer.allocUnsafe(4); + lenBuf.writeUInt32BE(1000, 0); + socket.write(lenBuf, () => { + // Send RST — causes ECONNRESET on the server socket, firing the onError handler + (socket as net.Socket & { resetAndDestroy?: () => void }).resetAndDestroy?.() + ?? socket.destroy(new Error('simulated socket error')); + }); + }); + socket.on('close', () => resolve()); + socket.on('error', () => resolve()); // ignore our own error + }); + + // Give the bridge a moment to handle the error + await new Promise(r => setTimeout(r, 50)); + + // Bridge should still be running (no crash) + expect(errorTestHandle!.port).toBeGreaterThan(0); + }, 10_000); +}); + +// ─── bridge.ts: socket closed before read (onClose path, line 176-180) ──────── + +describe('bridge.ts — socket closes early (onClose path in readFrame)', () => { + let earlyCloseHandle: BridgeHandle | null = null; + + afterEach(async () => { + if (earlyCloseHandle) { await earlyCloseHandle.close(); earlyCloseHandle = null; } + }); + + it('bridge handles early socket close gracefully (no crash)', async () => { + earlyCloseHandle = await bridge({ port: 0 }); + + // Connect a raw socket and immediately close it without sending data + await new Promise((resolve) => { + const socket = net.createConnection({ port: earlyCloseHandle!.port, host: '127.0.0.1' }, () => { + socket.destroy(); // close immediately without sending frame + }); + socket.on('close', () => resolve()); + socket.on('error', () => resolve()); // ignore errors on our end + }); + + // Give the bridge a moment to handle the close event + await new Promise(r => setTimeout(r, 50)); + + // Bridge should still be running (no crash) + expect(earlyCloseHandle!.port).toBeGreaterThan(0); + }, 10_000); +}); diff --git a/packages/wrapper/src/tests/identity.test.ts b/packages/wrapper/src/tests/identity.test.ts new file mode 100644 index 000000000..7b9d24712 --- /dev/null +++ b/packages/wrapper/src/tests/identity.test.ts @@ -0,0 +1,156 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { rmSync, existsSync } from 'node:fs'; +import { dotId } from '../identity.js'; + +// ─── Setup ───────────────────────────────────────────────────────────────────── + +let testPath: string; + +beforeEach(() => { + testPath = join(tmpdir(), `dot-id-test-${Date.now()}-${Math.random().toString(36).slice(2)}`); +}); + +afterEach(() => { + if (existsSync(testPath)) rmSync(testPath, { recursive: true }); +}); + +// ─── Tests ───────────────────────────────────────────────────────────────────── + +describe('dot.id() — identity creation', () => { + it('creates identity with 32-byte public key', async () => { + const identity = await dotId({ storagePath: join(testPath, 'identity.key') }); + expect(identity.publicKey).toBeInstanceOf(Uint8Array); + expect(identity.publicKey.length).toBe(32); + }); + + it('DID format starts with "dot:"', async () => { + const identity = await dotId({ storagePath: join(testPath, 'identity.key') }); + expect(identity.did).toMatch(/^dot:[A-Za-z0-9_-]+$/); + }); + + it('puf is null', async () => { + const identity = await dotId({ storagePath: join(testPath, 'identity.key') }); + expect(identity.puf).toBeNull(); + }); + + it('export() returns a non-empty base64url string', async () => { + const identity = await dotId({ storagePath: join(testPath, 'identity.key') }); + const exported = identity.export(); + expect(typeof exported).toBe('string'); + expect(exported.length).toBeGreaterThan(0); + // base64url: no +, no /, no = + expect(exported).not.toContain('+'); + expect(exported).not.toContain('/'); + expect(exported).not.toContain('='); + }); +}); + +describe('dot.id() — sign and verify', () => { + it('sign and verify round-trip succeeds', async () => { + const identity = await dotId({ storagePath: join(testPath, 'identity.key') }); + const data = new TextEncoder().encode('hello dot protocol'); + const sig = await identity.sign(data); + expect(sig).toBeInstanceOf(Uint8Array); + expect(sig.length).toBe(64); + const valid = await identity.verify(data, sig); + expect(valid).toBe(true); + }); + + it('verify with wrong public key returns false', async () => { + const pathA = join(testPath, 'a.key'); + const pathB = join(testPath, 'b.key'); + const identityA = await dotId({ storagePath: pathA }); + const identityB = await dotId({ storagePath: pathB }); + + const data = new TextEncoder().encode('cross-identity verify test'); + const sig = await identityA.sign(data); + + // Verify with B's public key — should fail + const valid = await identityA.verify(data, sig, identityB.publicKey); + expect(valid).toBe(false); + }); + + it('sign empty bytes succeeds', async () => { + const identity = await dotId({ storagePath: join(testPath, 'identity.key') }); + const empty = new Uint8Array(0); + const sig = await identity.sign(empty); + expect(sig.length).toBe(64); + const valid = await identity.verify(empty, sig); + expect(valid).toBe(true); + }); + + it('sign large bytes (10KB) succeeds', async () => { + const identity = await dotId({ storagePath: join(testPath, 'identity.key') }); + const large = new Uint8Array(10 * 1024); + for (let i = 0; i < large.length; i++) large[i] = i % 251; + const sig = await identity.sign(large); + expect(sig.length).toBe(64); + const valid = await identity.verify(large, sig); + expect(valid).toBe(true); + }); + + it('verify tampered data returns false', async () => { + const identity = await dotId({ storagePath: join(testPath, 'identity.key') }); + const data = new TextEncoder().encode('original message'); + const tampered = new TextEncoder().encode('tampered message'); + const sig = await identity.sign(data); + const valid = await identity.verify(tampered, sig); + expect(valid).toBe(false); + }); +}); + +describe('dot.id() — persistence', () => { + it('two calls with same path return same public key', async () => { + const keyPath = join(testPath, 'identity.key'); + const id1 = await dotId({ storagePath: keyPath, passphrase: 'test-passphrase' }); + const id2 = await dotId({ storagePath: keyPath, passphrase: 'test-passphrase' }); + + expect(Buffer.from(id1.publicKey).toString('hex')) + .toBe(Buffer.from(id2.publicKey).toString('hex')); + expect(id1.did).toBe(id2.did); + }); + + it('loaded identity can verify signatures from original identity', async () => { + const keyPath = join(testPath, 'identity.key'); + const id1 = await dotId({ storagePath: keyPath, passphrase: 'stable-pass' }); + + const data = new TextEncoder().encode('signed by id1'); + const sig = await id1.sign(data); + + // Reload identity from disk + const id2 = await dotId({ storagePath: keyPath, passphrase: 'stable-pass' }); + const valid = await id2.verify(data, sig); + expect(valid).toBe(true); + }); +}); + +describe('dot.id() — forceNew', () => { + it('forceNew creates a different key each time', async () => { + const keyPath = join(testPath, 'identity.key'); + const id1 = await dotId({ storagePath: keyPath, passphrase: 'pass', forceNew: false }); + const id2 = await dotId({ storagePath: keyPath, passphrase: 'pass', forceNew: true }); + + // Extremely unlikely to be equal (2^256 space) + expect(Buffer.from(id1.publicKey).toString('hex')) + .not.toBe(Buffer.from(id2.publicKey).toString('hex')); + }); +}); + +describe('dot.id() — custom passphrase', () => { + it('loads correctly with explicit passphrase', async () => { + const keyPath = join(testPath, 'identity.key'); + const id1 = await dotId({ storagePath: keyPath, passphrase: 'my-custom-phrase-42' }); + const id2 = await dotId({ storagePath: keyPath, passphrase: 'my-custom-phrase-42' }); + expect(id1.did).toBe(id2.did); + }); + + it('rejects decryption with wrong passphrase', async () => { + const keyPath = join(testPath, 'identity.key'); + await dotId({ storagePath: keyPath, passphrase: 'correct-passphrase' }); + await expect( + dotId({ storagePath: keyPath, passphrase: 'wrong-passphrase' }), + ).rejects.toThrow(); + }); +}); diff --git a/packages/wrapper/src/tests/wrap.test.ts b/packages/wrapper/src/tests/wrap.test.ts new file mode 100644 index 000000000..5f246ab7c --- /dev/null +++ b/packages/wrapper/src/tests/wrap.test.ts @@ -0,0 +1,227 @@ +import { describe, it, expect } from 'vitest'; +import { wrap, unwrap, createSession } from '../index.js'; +import { DotType } from '@dotprotocol/core'; + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +function randomBytes(n: number): Uint8Array { + const b = new Uint8Array(n); + for (let i = 0; i < n; i++) b[i] = Math.floor(Math.random() * 256); + return b; +} + +function bytesEqual(a: Uint8Array, b: Uint8Array): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) return false; + } + return true; +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +describe('wrap / unwrap round-trip — raw bytes', () => { + const sizes = [0, 1, 15, 16, 17, 100, 1000, 10000]; + + for (const size of sizes) { + it(`round-trips ${size}B payload`, async () => { + const data = size === 0 ? new Uint8Array(0) : randomBytes(size); + const chain = await wrap(data, { protocol: 'raw' }); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(result.protocol).toBe('raw'); + expect(result.dotCount).toBe(chain.chunkCount); + expect(bytesEqual(result.data, data)).toBe(true); + }); + } +}); + +describe('wrap / unwrap round-trip — JSON', () => { + it('round-trips a JSON string', async () => { + const json = '{"hello":"world","count":42,"nested":{"key":"value"}}'; + const data = new TextEncoder().encode(json); + const chain = await wrap(data, { protocol: 'json' }); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(result.protocol).toBe('json'); + expect(new TextDecoder().decode(result.data)).toBe(json); + }); + + it('round-trips a large JSON payload', async () => { + const obj: Record = {}; + for (let i = 0; i < 100; i++) obj[`key${i}`] = i * 3; + const data = new TextEncoder().encode(JSON.stringify(obj)); + const chain = await wrap(data, { protocol: 'json' }); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(new TextDecoder().decode(result.data)).toBe(JSON.stringify(obj)); + }); +}); + +describe('wrap / unwrap — adversarial payloads', () => { + it('round-trips null bytes (all zeros)', async () => { + const data = new Uint8Array(64); // all zeros + const chain = await wrap(data, { protocol: 'raw' }); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(bytesEqual(result.data, data)).toBe(true); + }); + + it('round-trips all-0xFF bytes', async () => { + const data = new Uint8Array(64).fill(0xff); + const chain = await wrap(data, { protocol: 'raw' }); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(bytesEqual(result.data, data)).toBe(true); + }); + + it('round-trips repeated pattern bytes', async () => { + const data = new Uint8Array(128); + for (let i = 0; i < data.length; i++) data[i] = i % 7; + const chain = await wrap(data, { protocol: 'raw' }); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(bytesEqual(result.data, data)).toBe(true); + }); + + it('round-trips a single byte', async () => { + const data = new Uint8Array([0xab]); + const chain = await wrap(data, { protocol: 'raw' }); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(bytesEqual(result.data, data)).toBe(true); + }); +}); + +describe('protocol field', () => { + const protocols = ['https', 'websocket', 'json', 'raw'] as const; + + for (const protocol of protocols) { + it(`preserves protocol=${protocol}`, async () => { + const data = new TextEncoder().encode('test'); + const chain = await wrap(data, { protocol }); + expect(chain.protocol).toBe(protocol); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(result.protocol).toBe(protocol); + }); + } +}); + +describe('BLS verification', () => { + it('verified=true when correct blsPublicKey provided', async () => { + const data = new TextEncoder().encode('verify me'); + const chain = await wrap(data); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(result.verified).toBe(true); + }); + + it('verified=false when no blsPublicKey provided', async () => { + const data = new TextEncoder().encode('no key provided'); + const chain = await wrap(data); + const result = await unwrap(chain.frame); // no blsPublicKey + expect(result.verified).toBe(false); + // still decodes correctly + expect(bytesEqual(result.data, data)).toBe(true); + }); +}); + +describe('stateful session', () => { + it('wrapping with same session produces valid round-trips for each call', async () => { + const session = await createSession(); + const payloads = [ + new TextEncoder().encode('first message'), + new TextEncoder().encode('second message'), + new TextEncoder().encode('third message'), + ]; + + for (const payload of payloads) { + const chain = await wrap(payload, { session }); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(bytesEqual(result.data, payload)).toBe(true); + } + }); + + it('stateful session compression improves or stays same over repeated identical payload', async () => { + const session = await createSession(); + // Repeated same payload should compress better as predictor learns + const payload = new TextEncoder().encode('{"event":"click","x":100,"y":200}'); + + const sizes: number[] = []; + for (let i = 0; i < 10; i++) { + const chain = await wrap(payload, { session }); + sizes.push(chain.compressedBytes); + } + + // Verify all round-trip (correctness is more important than compression) + const sessionCheck = await createSession(); + for (let i = 0; i < 3; i++) { + const chain = await wrap(payload, { session: sessionCheck }); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(bytesEqual(result.data, payload)).toBe(true); + } + + // Frame sizes should be reasonable (not grow unboundedly) + const firstSize = sizes[0]!; + const lastSize = sizes[sizes.length - 1]!; + // Either stable or improving (within a 2x factor) + expect(lastSize).toBeLessThanOrEqual(firstSize * 2); + }); +}); + +describe('lossless invariant — 20 random payloads', () => { + it('round-trips all 20 random payloads exactly', async () => { + const payloads: Uint8Array[] = []; + for (let i = 0; i < 20; i++) { + const size = Math.floor(Math.random() * 500) + 1; + payloads.push(randomBytes(size)); + } + + for (let i = 0; i < payloads.length; i++) { + const data = payloads[i]!; + const chain = await wrap(data, { protocol: 'raw' }); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(bytesEqual(result.data, data)).toBe(true); + } + }, 15000); +}); + +describe('compression ratio', () => { + it('achieves compressionRatio > 1 for highly repeated data (large payload)', async () => { + // 2000 bytes of repeating pattern — batch encoding should compress well + const pattern = new TextEncoder().encode('AAAAAAAAAAAAAAAA'); // 16B repeated + const data = new Uint8Array(2000); + for (let i = 0; i < data.length; i++) data[i] = pattern[i % pattern.length]!; + + const chain = await wrap(data, { protocol: 'raw' }); + // compressionRatio = originalBytes / compressedBytes + // For highly repetitive data the batch-v2 frame (with RLE type + ts delta) should be smaller + // Note: DOT overhead is 153B per 16B payload = ~9.5x overhead, but compression compensates + // This test checks that the stat field is computed correctly (ratio > 0) + expect(chain.compressionRatio).toBeGreaterThan(0); + expect(chain.originalBytes).toBe(2000); + expect(chain.compressedBytes).toBe(chain.frame.length); + }); + + it('compressionRatio > 1 for repetitive JSON payload', async () => { + // Generate a repetitive JSON payload that should compress + const repeated = JSON.stringify({ a: 1, b: 2, c: 3 }).repeat(20); + const data = new TextEncoder().encode(repeated); + const chain = await wrap(data, { protocol: 'json' }); + // With timestamp-delta + type RLE, repeated patterns should achieve > 1 ratio + // for large enough payloads where the frame is smaller than original + expect(chain.compressionRatio).toBeGreaterThan(0); + expect(chain.compressedBytes).toBeGreaterThan(0); + }); +}); + +describe('DotType option', () => { + it('accepts DotType.EPHEMERAL without throwing', async () => { + const data = new TextEncoder().encode('ephemeral message'); + const chain = await wrap(data, { type: DotType.EPHEMERAL }); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(bytesEqual(result.data, data)).toBe(true); + }); +}); + +describe('empty payload', () => { + it('round-trips zero-length payload', async () => { + const data = new Uint8Array(0); + const chain = await wrap(data, { protocol: 'raw' }); + const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + expect(result.data.length).toBe(0); + expect(bytesEqual(result.data, data)).toBe(true); + }); +}); diff --git a/packages/wrapper/src/types.ts b/packages/wrapper/src/types.ts new file mode 100644 index 000000000..bab1ed85d --- /dev/null +++ b/packages/wrapper/src/types.ts @@ -0,0 +1,138 @@ +// @dotprotocol/wrapper — types + +import type { BLSKeypair } from '@dotprotocol/core'; +export { DotType } from '@dotprotocol/core'; +import { DotType } from '@dotprotocol/core'; + +// ─── Protocol ───────────────────────────────────────────────────────────────── + +/** Supported protocol types for wrap/unwrap */ +export type Protocol = 'https' | 'websocket' | 'json' | 'raw'; + +/** Protocol byte IDs (stored in first byte of prefixed payload) */ +export const PROTOCOL_ID: Record = { + raw: 0, + json: 1, + https: 2, + websocket: 3, +}; + +/** Reverse map: byte → Protocol */ +export const PROTOCOL_FROM_ID: Record = { + 0: 'raw', + 1: 'json', + 2: 'https', + 3: 'websocket', +}; + +// ─── WrappedChain ───────────────────────────────────────────────────────────── + +/** The wire format returned by wrap() */ +export interface WrappedChain { + /** Protocol this chain represents */ + protocol: Protocol; + /** Compressed batch frame (batch-v2 format) — what you transmit */ + frame: Uint8Array; + /** Raw DOT bytes (uncompressed, for inspection/verification) */ + dots: Uint8Array[]; + /** Number of payload chunks (= number of DOTs) */ + chunkCount: number; + /** Total original payload bytes */ + originalBytes: number; + /** Compressed frame bytes */ + compressedBytes: number; + /** Compression ratio: originalBytes / compressedBytes */ + compressionRatio: number; + /** BLS public key (96B G2 point) — pass to unwrap() for verification */ + blsPublicKey: Uint8Array; + /** BLS aggregate signature over all DOTs */ + batchSeal: Uint8Array; +} + +// ─── UnwrappedPayload ───────────────────────────────────────────────────────── + +/** Result of unwrap() */ +export interface UnwrappedPayload { + /** Original protocol */ + protocol: Protocol; + /** Reconstructed original bytes */ + data: Uint8Array; + /** Whether BLS signature was externally verified (true only when blsPublicKey provided) */ + verified: boolean; + /** Number of DOTs in the chain */ + dotCount: number; + /** Compression stats */ + compressionRatio: number; +} + +// ─── WrapOptions ───────────────────────────────────────────────────────────── + +/** Options for wrap() */ +export interface WrapOptions { + /** Protocol hint (default: 'raw') */ + protocol?: Protocol; + /** Pre-existing session for stateful wrapping (maintains predictor context). If omitted, creates a fresh one-shot keypair. */ + session?: WrapSession; + /** DotType for all DOTs in this chain (default: PUBLIC) */ + type?: DotType; + /** If true, use timestamp-delta encoding in batch (default: true) */ + timestampDelta?: boolean; + /** If true, use RLE for payload types (default: true) */ + payloadTypeRLE?: boolean; +} + +// ─── UnwrapOptions ──────────────────────────────────────────────────────────── + +/** Options for unwrap() */ +export interface UnwrapOptions { + /** + * BLS public key (96B G2) for external verification. + * If provided and verification passes, result.verified = true. + * If omitted, BLS check is skipped and result.verified = false. + */ + blsPublicKey?: Uint8Array; +} + +// ─── BridgeOptions / BridgeHandle ───────────────────────────────────────────── + +/** Options for bridge() */ +export interface BridgeOptions { + /** Port to listen on (default: 8100) */ + port?: number; + /** Hostname to bind (default: '127.0.0.1') */ + host?: string; + /** Target base URL to forward requests to (e.g., 'https://api.example.com') */ + forward?: string; + /** Max request body size in bytes (default: 10MB) */ + maxBodySize?: number; + /** BLS public key for verifying incoming DOT chains (optional) */ + blsPublicKey?: Uint8Array; + /** Session for wrapping responses (optional — creates fresh if omitted) */ + session?: WrapSession; +} + +/** Handle returned by bridge() */ +export interface BridgeHandle { + /** Actual port the bridge is listening on */ + readonly port: number; + /** Stop the bridge */ + close(): Promise; + /** Number of requests proxied so far */ + readonly requestCount: number; +} + +// ─── WrapSession ───────────────────────────────────────────────────────────── + +/** Stateful session — maintains chain and predictor context across multiple wrap() calls */ +export interface WrapSession { + /** Ed25519 keypair for DOT creation */ + keypair: { publicKey: Uint8Array; privateKey: Uint8Array }; + /** BLS keypair for batch sealing */ + blsKeypair: BLSKeypair; + /** Accumulated DOT byte arrays from previous wrap() calls */ + dots: Uint8Array[]; + /** Last DOT bytes (for chain linking into next wrap call) */ + lastDot?: Uint8Array; + /** Base timestamp for this session (ms) */ + baseTimestamp: bigint; +} diff --git a/packages/wrapper/src/unwrap.ts b/packages/wrapper/src/unwrap.ts new file mode 100644 index 000000000..d0e248c25 --- /dev/null +++ b/packages/wrapper/src/unwrap.ts @@ -0,0 +1,285 @@ +// @dotprotocol/wrapper — unwrap() + +import { deserializeBatchV2 } from '@dotprotocol/compression'; +import { verifyAggregateSameSigner } from '@dotprotocol/core'; +import type { UnwrappedPayload, UnwrapOptions } from './types.js'; +import { PROTOCOL_FROM_ID } from './types.js'; + +const CHUNK_SIZE = 16; +const HEADER_SIZE = 5; // [1B protocol_id][4B uint32 BE original_length] + +// Frame header layout (batch-v2): +// [0] version +// [1] flags +// [2..5] dot_count (uint32 LE) +// [6..37] shared_pubkey (Ed25519, 32B) +// [38..85] aggregated_bls_sig (BLS G1, 48B) +const FRAME_PUBKEY_OFFSET = 6; +const FRAME_AGGSIG_OFFSET = 38; +const BLS_AGG_SIG_SIZE = 48; + +/** + * Read the BLS public key from a batch-v2 frame. + * Note: this is the Ed25519 *signer* pubkey stored in the frame header, + * not the BLS verification key. Use this only for frame inspection. + * + * To get the BLS public key for verification, use chain.blsPublicKey from wrap(). + */ +export function readFramePubkey(frame: Uint8Array): Uint8Array { + return frame.slice(FRAME_PUBKEY_OFFSET, FRAME_PUBKEY_OFFSET + 32); +} + +/** + * Unwrap a DOT chain frame back to the original binary payload. + * + * Pass blsPublicKey (from WrappedChain.blsPublicKey) for verified round-trips. + * If blsPublicKey is omitted, BLS check is skipped and result.verified = false. + * + * @example + * // Verified round-trip + * const chain = await wrap(data, { protocol: 'json' }); + * const result = await unwrap(chain.frame, { blsPublicKey: chain.blsPublicKey }); + * // result.verified === true + * // result.data deepEquals data + * + * // Unverified (just decode) + * const result = await unwrap(chain.frame); + * // result.verified === false + */ +export async function unwrap(frame: Uint8Array, options?: UnwrapOptions): Promise { + // ── Deserialize frame → DOT bytes ───────────────────────────────────────── + // deserializeBatchV2 always requires a BLS pubkey for internal verification. + // We use the provided key, or fall back to a self-consistency check: + // reconstruct the blsPublicKey embedded in the frame... but the frame only + // stores the Ed25519 pubkey, not the BLS key. + // + // Solution: deserializeBatchV2 verifies the BLS aggSig internally. + // If no blsPublicKey is provided, we CANNOT call it without a key. + // We use a workaround: read aggSig from frame and reconstruct verification + // using the provided key, OR skip verification entirely by calling + // deserializeBatchV2 with the provided key (throws on bad sig). + // + // If no key provided: we still need to decode. We do a two-phase approach: + // 1. If blsPublicKey provided → pass directly, verified=true if no throw + // 2. If not provided → we cannot verify; use a placeholder approach + + let dots: Uint8Array[]; + let verified = false; + + if (options?.blsPublicKey) { + // Full verification path — throws if BLS sig is invalid + dots = await deserializeBatchV2(frame, options.blsPublicKey); + verified = true; + } else { + // No external key — we need to call deserializeBatchV2 which requires a key. + // We use a dummy self-referential approach: we can't skip it, so we must + // accept that decoding without a key requires us to provide SOME key. + // Strategy: since we don't have the BLS key, we call deserializeBatchV2 + // with a dummy key and catch the verification failure, then manually + // extract the DOT data without BLS check. + // + // BETTER approach: Read the aggSig from the frame and perform a direct + // decode by bypassing BLS verification using a manual column parse. + // But that duplicates a lot of deserializeBatchV2 logic. + // + // PRACTICAL approach: The frame ALWAYS contains valid BLS sigs (created by wrap()). + // We only get here when the caller doesn't have the key. We decode by + // providing a dummy key, expecting BLS verification to fail, then catch + // and re-decode. But deserializeBatchV2 throws on bad sig, so we need + // a different strategy. + // + // FINAL decision: use verifyAggregateSameSigner separately for opt-in + // verification. For the decode path (no key), we still need deserializeBatchV2 + // to work. Since we control the frame (it was created by wrap()), we can + // store the BLS pubkey alongside. But unwrap() only receives the frame. + // + // Real solution: extract the pubkey from the DOTs after a partial decode. + // The frame header has the Ed25519 pubkey — not useful for BLS. + // + // Practical tradeoff: require blsPublicKey for decoding OR implement a + // lightweight column parser. For now: if no key, we parse the frame + // headers manually to extract DOT data without BLS verification. + dots = await decodeFrameWithoutBLSVerification(frame); + verified = false; + } + + // ── Reassemble payload from DOT payload fields ──────────────────────────── + // Each DOT's payload field is bytes [137..152] = 16 bytes + const OFF_PAYLOAD = 137; + const assembled = new Uint8Array(dots.length * CHUNK_SIZE); + for (let i = 0; i < dots.length; i++) { + assembled.set(dots[i]!.subarray(OFF_PAYLOAD, OFF_PAYLOAD + CHUNK_SIZE), i * CHUNK_SIZE); + } + + // ── Parse header: [1B protocol_id][4B uint32 BE original_length] ────────── + if (assembled.length < HEADER_SIZE) { + throw new RangeError(`unwrap: assembled data too short (${assembled.length} bytes)`); + } + const protocolId = assembled[0]!; + const headerView = new DataView(assembled.buffer, assembled.byteOffset + 1, 4); + const originalLength = headerView.getUint32(0, false); // big-endian + + const protocol = PROTOCOL_FROM_ID[protocolId] ?? 'raw'; + + // ── Slice to original length ────────────────────────────────────────────── + const dataStart = HEADER_SIZE; + const dataEnd = dataStart + originalLength; + if (dataEnd > assembled.length) { + throw new RangeError( + `unwrap: original_length=${originalLength} exceeds assembled data (${assembled.length - HEADER_SIZE} bytes available)`, + ); + } + const data = assembled.slice(dataStart, dataEnd); + + return { + protocol, + data, + verified, + dotCount: dots.length, + compressionRatio: (originalLength > 0) ? originalLength / frame.length : 1, + }; +} + +// ─── Frame decode without BLS verification ──────────────────────────────────── +// Manual column parser that reconstructs DOTs from the frame without +// verifying the BLS aggregate signature. Used when no blsPublicKey is provided. + +import { createHash } from 'node:crypto'; + +const DOT_SIZE = 153; +const PUBKEY_SIZE = 32; +const SIG_SIZE = 64; +const CHAIN_SIZE = 32; +const PAYLOAD_SIZE_DOT = 16; +const BLS_AGG_SIG_SIZE_LOCAL = 48; +const HEADER_BYTES = 86; // 1(ver) + 1(flags) + 4(count) + 32(pubkey) + 48(aggSig) + +const FLAG_TS_DELTA = 0b00000001; +const FLAG_TYPE_RLE = 0b00000010; + +function sha256(data: Uint8Array): Uint8Array { + return new Uint8Array(createHash('sha256').update(data).digest()); +} + +function readBigUint64BE(buf: Uint8Array, offset: number): bigint { + const view = new DataView(buf.buffer, buf.byteOffset + offset, 8); + return view.getBigUint64(0, false); +} + +function writeBigUint64BE(buf: Uint8Array, offset: number, value: bigint): void { + const view = new DataView(buf.buffer, buf.byteOffset + offset, 8); + view.setBigUint64(0, value, false); +} + +/** + * Decode a batch-v2 frame without BLS signature verification. + * Reconstructs the DOT bytes from the frame's column data. + */ +async function decodeFrameWithoutBLSVerification(frame: Uint8Array): Promise { + if (frame.length < HEADER_BYTES) { + throw new RangeError(`unwrap: frame too short (${frame.length} bytes)`); + } + + // Parse fixed header + // byte 0: version (skip check — we trust the frame) + const flags = frame[1]!; + const hasTsDelta = (flags & FLAG_TS_DELTA) !== 0; + const hasTypeRLE = (flags & FLAG_TYPE_RLE) !== 0; + const hasDictCompressed = (flags & 0x08) !== 0; + const hasPrediction = (flags & 0x10) !== 0; + + const countView = new DataView(frame.buffer, frame.byteOffset + 2, 4); + const dotCount = countView.getUint32(0, true); // LE + + if (dotCount === 0) throw new RangeError('unwrap: dot_count is 0'); + + const pubkey = frame.slice(6, 38); // Ed25519 pubkey (32B) + const aggSig = frame.slice(FRAME_AGGSIG_OFFSET, FRAME_AGGSIG_OFFSET + BLS_AGG_SIG_SIZE_LOCAL); // BLS aggSig (48B) + + if (hasDictCompressed) { + throw new Error('unwrap: dictionary-compressed frames require blsPublicKey for decoding'); + } + if (hasPrediction) { + throw new Error('unwrap: prediction-coded frames require blsPublicKey for decoding'); + } + + // Body starts at byte 86 + let bodyCursor = 0; + const body = frame.subarray(HEADER_BYTES); + + // ── Decode timestamp column ─────────────────────────────────────────────── + let timestamps: bigint[]; + let tsColumnSize: number; + + if (hasTsDelta) { + // Import delta decoder + const { decodeTimestampDeltas, encodeTimestampDeltas } = await import('@dotprotocol/compression'); + timestamps = decodeTimestampDeltas(body.subarray(bodyCursor), dotCount); + tsColumnSize = encodeTimestampDeltas(timestamps).length; + } else { + tsColumnSize = dotCount * 8; + timestamps = []; + for (let i = 0; i < dotCount; i++) { + timestamps.push(readBigUint64BE(body, bodyCursor + i * 8)); + } + } + const tsEnd = bodyCursor + tsColumnSize; + + // ── Decode type column ──────────────────────────────────────────────────── + let types: Uint8Array; + let typesEnd: number; + + if (hasTypeRLE) { + const { decodePayloadTypes } = await import('@dotprotocol/compression'); + const rleEnd = body.length - dotCount * PAYLOAD_SIZE_DOT; + if (rleEnd <= tsEnd) { + throw new RangeError('unwrap: buffer too short for RLE types + payloads'); + } + const rleSlice = body.subarray(tsEnd, rleEnd); + types = decodePayloadTypes(rleSlice, dotCount); + typesEnd = rleEnd; + } else { + types = body.subarray(tsEnd, tsEnd + dotCount); + typesEnd = tsEnd + dotCount; + } + + // ── Decode payload column ───────────────────────────────────────────────── + const payloadStart = typesEnd; + const payloadTotal = dotCount * PAYLOAD_SIZE_DOT; + if (payloadStart + payloadTotal > body.length) { + throw new RangeError('unwrap: buffer too short for payloads'); + } + + // ── Reconstruct DOTs ────────────────────────────────────────────────────── + // sig field = aggSig (48B) + 16 zero bytes + const sigField = new Uint8Array(SIG_SIZE); + sigField.set(aggSig); + + const dots: Uint8Array[] = []; + let prevDot: Uint8Array | null = null; + + for (let i = 0; i < dotCount; i++) { + const dot = new Uint8Array(DOT_SIZE); + dot.set(pubkey, 0); // pubkey + dot.set(sigField, 32); // sig + + // chain hash + const chainHash = new Uint8Array(CHAIN_SIZE); + if (prevDot !== null) { + chainHash.set(sha256(prevDot)); + } + dot.set(chainHash, 96); // chain + + writeBigUint64BE(dot, 128, timestamps[i]!); // timestamp + dot[136] = types[i]!; // type + + // payload (16B) + const payloadOffset = payloadStart + i * PAYLOAD_SIZE_DOT; + dot.set(body.subarray(payloadOffset, payloadOffset + PAYLOAD_SIZE_DOT), 137); + + dots.push(dot); + prevDot = dot; + } + + return dots; +} diff --git a/packages/wrapper/src/wrap.ts b/packages/wrapper/src/wrap.ts new file mode 100644 index 000000000..c858563e6 --- /dev/null +++ b/packages/wrapper/src/wrap.ts @@ -0,0 +1,109 @@ +// @dotprotocol/wrapper — wrap() + +import { createKeypair, createBLSKeypair, createDOT, toBytes, DotType } from '@dotprotocol/core'; +import { serializeBatchV2 } from '@dotprotocol/compression'; +import { createSession } from './session.js'; +import type { WrappedChain, WrapOptions, WrapSession } from './types.js'; +import { PROTOCOL_ID } from './types.js'; + +// Header prefix: [1B protocol_id][4B original_length uint32 BE] = 5 bytes +const HEADER_SIZE = 5; +const CHUNK_SIZE = 16; // DOT payload field is 16 bytes + +/** + * Wrap a binary payload as a signed, compressed DOT chain. + * + * Each 16-byte chunk of the prefixed payload becomes one DOT. + * The DOTs are BLS-batch-signed and compressed via batch-v2. + * + * Stateless (no session): each call creates fresh keypairs, no chain linking. + * Stateful (with session): DOTs chain from previous calls for better compression. + * + * @example + * // Stateless (simple) + * const chain = await wrap(new TextEncoder().encode('{"hello":"world"}'), { protocol: 'json' }); + * + * // Stateful (better compression over time) + * const session = await createSession(); + * const chain1 = await wrap(payload1, { session }); + * const chain2 = await wrap(payload2, { session }); + */ +export async function wrap(payload: Uint8Array, options?: WrapOptions): Promise { + const protocol = options?.protocol ?? 'raw'; + const dotType = options?.type ?? DotType.PUBLIC; + const useTsDelta = options?.timestampDelta !== false; + const useTypeRLE = options?.payloadTypeRLE !== false; + + // ── Build session (ephemeral if not provided) ────────────────────────────── + const session: WrapSession = options?.session ?? await createSession(); + + // ── Prefix payload with 5-byte header ───────────────────────────────────── + // [1B protocol_id][4B original_length big-endian uint32] + const protocolId = PROTOCOL_ID[protocol]; + const prefixed = new Uint8Array(HEADER_SIZE + payload.length); + prefixed[0] = protocolId; + const view = new DataView(prefixed.buffer); + view.setUint32(1, payload.length, false); // big-endian + prefixed.set(payload, HEADER_SIZE); + + // ── Chunk into 16-byte pieces ────────────────────────────────────────────── + const chunkCount = Math.ceil(prefixed.length / CHUNK_SIZE) || 1; + const chunks: Uint8Array[] = []; + for (let i = 0; i < chunkCount; i++) { + const chunk = new Uint8Array(CHUNK_SIZE); // zero-padded + const start = i * CHUNK_SIZE; + const end = Math.min(start + CHUNK_SIZE, prefixed.length); + chunk.set(prefixed.subarray(start, end)); + chunks.push(chunk); + } + + // ── Create one DOT per chunk ─────────────────────────────────────────────── + const nowMs = Date.now(); + const newDots: Uint8Array[] = []; + + for (let i = 0; i < chunks.length; i++) { + const previous = i === 0 ? session.lastDot : newDots[i - 1]; + const dot = await createDOT({ + keypair: session.keypair, + payload: chunks[i]!, + type: dotType, + previous, + ts: nowMs + i, // ensure monotone within batch + }); + newDots.push(toBytes(dot)); + } + + // ── BLS-batch-serialize (compress) ──────────────────────────────────────── + const frame = await serializeBatchV2(newDots, session.blsKeypair, { + timestampDelta: useTsDelta, + payloadTypeRLE: useTypeRLE, + }); + + // ── Update session state for next call ──────────────────────────────────── + if (options?.session) { + session.dots.push(...newDots); + session.lastDot = newDots[newDots.length - 1]; + } + + // ── Compute stats ───────────────────────────────────────────────────────── + const originalBytes = payload.length; + const compressedBytes = frame.length; + const compressionRatio = originalBytes > 0 + ? originalBytes / compressedBytes + : 1; + + // Extract aggSig from frame header [38..85] + const batchSeal = frame.slice(38, 86); + + return { + protocol, + frame, + dots: newDots, + chunkCount, + originalBytes, + compressedBytes, + compressionRatio, + blsPublicKey: session.blsKeypair.publicKey, + batchSeal, + }; +} diff --git a/packages/wrapper/tsconfig.json b/packages/wrapper/tsconfig.json new file mode 100644 index 000000000..f6b4a9e5b --- /dev/null +++ b/packages/wrapper/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "dist", + "rootDir": "src", + "declarationDir": "dist" + }, + "include": ["src"] +} diff --git a/packages/wrapper/tsdown.config.ts b/packages/wrapper/tsdown.config.ts new file mode 100644 index 000000000..aa536bdcb --- /dev/null +++ b/packages/wrapper/tsdown.config.ts @@ -0,0 +1,11 @@ +import { defineConfig } from 'tsdown' + +export default defineConfig({ + entry: ['src/index.ts'], + format: ['esm', 'cjs'], + dts: true, + clean: true, + outDir: 'dist', + platform: 'neutral', + sourcemap: true, +}) diff --git a/packages/wrapper/vitest.config.ts b/packages/wrapper/vitest.config.ts new file mode 100644 index 000000000..9c2c02c19 --- /dev/null +++ b/packages/wrapper/vitest.config.ts @@ -0,0 +1,18 @@ +import { defineConfig } from 'vitest/config'; +import { resolve } from 'node:path'; + +export default defineConfig({ + resolve: { + alias: { + '@dotprotocol/core': resolve(__dirname, '../core/src/index.ts'), + '@dotprotocol/compression': resolve(__dirname, '../compression/src/index.ts'), + }, + }, + test: { + environment: 'node', + coverage: { + provider: 'v8', + exclude: ['scripts/**', 'src/tests/**', '**/*.d.ts', '**/*.config.*'], + }, + }, +});