diff --git a/CHANGELOG.md b/CHANGELOG.md
index f827bc6..6c6fd60 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,26 @@ documented with migration notes.
 - Additional docs for suite-specific adoption.
 - Better compatibility tests for Promptfoo variable contracts.
 
+## [0.1.8] - 2026-07-04
+
+### Added
+
+- Added generic pointwise summary helpers for advisory verdict counts and calibration notes.
+- Documented the shared pointwise report summary pattern for curated manual evidence.
+
+### Fixed
+
+- Hardened pointwise judge result handling so provider, prompt version, rubric version, and run
+  manifest metadata must match the configured run before the result bundle is written.
+- Added regression tests for malformed or missing pointwise run metadata.
+
+### Notes
+
+- Deterministic `run-case` and manual `report` compatibility are preserved.
+- Consumer repos still own judge semantics, prompts, fixtures, and calibration policy.
+- No npm package is published.
+- Consumers may pin `github:agentic-workflow-kit/eval-kit#v0.1.8`.
+
 ## [0.1.7] - 2026-07-04
 
 ### Fixed
@@ -135,7 +155,8 @@ documented with migration notes.
 - Suite-specific presets remain deferred.
 - Consumer repos own their own semantics, prompts, cases, and pass/fail policies.
 
-[Unreleased]: https://github.com/agentic-workflow-kit/eval-kit/compare/v0.1.7...main
+[Unreleased]: https://github.com/agentic-workflow-kit/eval-kit/compare/v0.1.8...main
+[0.1.8]: https://github.com/agentic-workflow-kit/eval-kit/compare/v0.1.7...v0.1.8
 [0.1.7]: https://github.com/agentic-workflow-kit/eval-kit/compare/v0.1.6...v0.1.7
 [0.1.6]: https://github.com/agentic-workflow-kit/eval-kit/compare/v0.1.5...v0.1.6
 [0.1.5]: https://github.com/agentic-workflow-kit/eval-kit/compare/v0.1.4...v0.1.5
diff --git a/README.md b/README.md
index b24c8c8..41b0410 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Shared evaluation infrastructure for `agentic-workflow-kit` repositories.
 ```json
 {
   "devDependencies": {
-    "@agentic-workflow-kit/eval-kit": "github:agentic-workflow-kit/eval-kit#v0.1.7"
+    "@agentic-workflow-kit/eval-kit": "github:agentic-workflow-kit/eval-kit#v0.1.8"
   }
 }
 ```
@@ -70,7 +70,7 @@ Install from a Git tag in a consumer repo:
 ```json
 {
   "devDependencies": {
-    "@agentic-workflow-kit/eval-kit": "github:agentic-workflow-kit/eval-kit#v0.1.7"
+    "@agentic-workflow-kit/eval-kit": "github:agentic-workflow-kit/eval-kit#v0.1.8"
   },
   "scripts": {
     "eval:doctor": "eval-kit doctor --config evals/eval-kit.config.json",
@@ -196,6 +196,7 @@ v0.1.4
 v0.1.5
 v0.1.6
 v0.1.7
+v0.1.8
 v0.2.0
 ```
 
diff --git a/docs/design/consumer-integration.md b/docs/design/consumer-integration.md
index 4916ee8..dee0719 100644
--- a/docs/design/consumer-integration.md
+++ b/docs/design/consumer-integration.md
@@ -9,7 +9,7 @@ Consumer repos should adopt eval-kit through a pinned Git tag and keep their eva
 ```json
 {
   "devDependencies": {
-    "@agentic-workflow-kit/eval-kit": "github:agentic-workflow-kit/eval-kit#v0.1.7"
+    "@agentic-workflow-kit/eval-kit": "github:agentic-workflow-kit/eval-kit#v0.1.8"
   }
 }
 ```
diff --git a/docs/guides/consumer-integration.md b/docs/guides/consumer-integration.md
index 5ad79c6..c1e653a 100644
--- a/docs/guides/consumer-integration.md
+++ b/docs/guides/consumer-integration.md
@@ -18,7 +18,7 @@ If you cannot state the eval goal, do not bootstrap a suite yet. Empty harnesses
 ```json
 {
   "devDependencies": {
-    "@agentic-workflow-kit/eval-kit": "github:agentic-workflow-kit/eval-kit#v0.1.7"
+    "@agentic-workflow-kit/eval-kit": "github:agentic-workflow-kit/eval-kit#v0.1.8"
   }
 }
 ```
diff --git a/docs/guides/model-judge-calibration-reporting.md b/docs/guides/model-judge-calibration-reporting.md
index 4c5cf80..a2358ed 100644
--- a/docs/guides/model-judge-calibration-reporting.md
+++ b/docs/guides/model-judge-calibration-reporting.md
@@ -43,5 +43,15 @@ Manual reports should be written for reviewer handoff, not CI:
   risks;
 - state that model-judge evidence cannot upgrade deterministic red or yellow results.
 
+Eval-kit exposes `countPointwiseVerdicts` and `formatPointwiseCalibrationSummary` as a shared
+summary pattern. Consumers may use these helpers when writing curated notes or report hooks, but the
+consumer still owns expected-good/expected-bad labels, critical-item policy, and false-pass or
+false-fail interpretation.
+
+For pointwise result bundles, eval-kit fails closed when required run metadata is absent or
+mismatched. A valid pointwise run records run id, one case id, model, provider, reasoning effort when
+present, prompt version, rubric version, runner version, and the artifact/output paths for the
+pointwise result bundle.
+
 Keep raw provider bundles under ignored `evals/results/` paths unless a human curates and commits a
 summary.
diff --git a/docs/guides/quickstart.md b/docs/guides/quickstart.md
index 1578331..f6b9463 100644
--- a/docs/guides/quickstart.md
+++ b/docs/guides/quickstart.md
@@ -7,7 +7,7 @@ This guide adds a generic deterministic eval suite to a consumer repo.
 ```json
 {
   "devDependencies": {
-    "@agentic-workflow-kit/eval-kit": "github:agentic-workflow-kit/eval-kit#v0.1.7"
+    "@agentic-workflow-kit/eval-kit": "github:agentic-workflow-kit/eval-kit#v0.1.8"
   }
 }
 ```
diff --git a/docs/reference/adapter-contract.md b/docs/reference/adapter-contract.md
index 2033284..10537b0 100644
--- a/docs/reference/adapter-contract.md
+++ b/docs/reference/adapter-contract.md
@@ -136,6 +136,19 @@ export const canonicalizeExpectedItemMetadata = (actualItems, expectedItems) =>
   }));
 ```
 
+Eval-kit exports generic pointwise helpers for consumers that curate summaries:
+
+```js
+import {
+  countPointwiseVerdicts,
+  formatPointwiseCalibrationSummary,
+} from "@agentic-workflow-kit/eval-kit";
+```
+
+Use these helpers to report advisory counts for `covered`, `partial`, `missing`, `contradicted`, and
+`unknown`, plus expected-good/expected-bad calibration labels and false-pass/false-fail notes. The
+helpers do not define consumer semantics.
+
 ## Pairwise judge hook
 
 Required for `judge-pairwise`:
diff --git a/docs/reference/release-process.md b/docs/reference/release-process.md
index 80a6693..01fc9b6 100644
--- a/docs/reference/release-process.md
+++ b/docs/reference/release-process.md
@@ -34,7 +34,7 @@ Consumers depend on tags like:
 Title:
 
 ```text
-chore(release): v0.1.7
+chore(release): v0.1.8
 ```
 
 Required changes:
@@ -63,18 +63,18 @@ git checkout main
 git pull --ff-only
 git rev-parse HEAD
 
-git tag -a v0.1.7 -m "v0.1.7"
-git push origin v0.1.7
+git tag -a v0.1.8 -m "v0.1.8"
+git push origin v0.1.8
 ```
 
 Verify:
 
 ```bash
-git rev-parse v0.1.7^{}
-git show --no-patch --decorate v0.1.7
+git rev-parse v0.1.8^{}
+git show --no-patch --decorate v0.1.8
 ```
 
-`v0.1.7^{}` must point to the release commit. With an annotated tag, `git rev-parse v0.1.7`
+`v0.1.8^{}` must point to the release commit. With an annotated tag, `git rev-parse v0.1.8`
 returns the tag object; `^{}` dereferences to the commit.
 
 ## GitHub Release
@@ -93,7 +93,7 @@ For each consumer repo:
 
 ```json
 {
-  "@agentic-workflow-kit/eval-kit": "github:agentic-workflow-kit/eval-kit#v0.1.7"
+  "@agentic-workflow-kit/eval-kit": "github:agentic-workflow-kit/eval-kit#v0.1.8"
 }
 ```
 
@@ -108,7 +108,7 @@ pnpm check
 3. Run consumer smoke commands, for example in `technical-design`:
 
 ```bash
-pnpm eval:case -- --case case-tiny-laundry-pickup-v1 --candidate evals/cases/case-tiny-laundry-pickup-v1/reference-design.md --run-id verify-eval-kit-v0.1.7
+pnpm eval:case -- --case case-tiny-laundry-pickup-v1 --candidate evals/cases/case-tiny-laundry-pickup-v1/reference-design.md --run-id verify-eval-kit-v0.1.8
 ```
 
 4. Open a PR with dependency, lockfile, and any compatibility fixes.
@@ -120,7 +120,7 @@ Do not move the tag.
 Create a new patch release:
 
 ```text
-v0.1.7 -> v0.1.8
+v0.1.8 -> v0.1.9
 ```
 
 Then open consumer bump PRs.
diff --git a/docs/reference/results.md b/docs/reference/results.md
index 1675541..a017f8e 100644
--- a/docs/reference/results.md
+++ b/docs/reference/results.md
@@ -39,7 +39,7 @@ Current schema:
   "run_type": "deterministic",
   "runner": {
     "id": "generic-eval-case",
-    "version": "0.1.7"
+    "version": "0.1.8"
   },
   "case_ids": ["case-example-v1"],
   "started_at": "2026-07-03T00:00:00.000Z",
@@ -113,3 +113,9 @@ CLI candidate labels. Its `randomization.original_order` field records the origi
 candidate keys were displayed as Candidate A/B for the model judge.
 
 Treat these as potentially sensitive.
+
+Pointwise `judge-coverage` manifests fail closed if required run metadata is missing or mismatched.
+Required pointwise metadata includes the run id, exactly one case id, model, provider, reasoning
+effort when supplied, prompt version, rubric version, runner version, and artifact/output paths for
+the pointwise report, structured pointwise result, Promptfoo config, raw Promptfoo results, and HTML
+report.
diff --git a/docs/schemas.md b/docs/schemas.md
index b754470..b33eca7 100644
--- a/docs/schemas.md
+++ b/docs/schemas.md
@@ -117,6 +117,20 @@ Optional model-run fields:
 - `randomization`
 - `provenance.parent_run_ids`
 
+For `judge-coverage` pointwise runs, eval-kit additionally validates the run metadata before
+writing the manifest. Required pointwise metadata is:
+
+- `run_id`;
+- exactly one `case_ids` entry matching the judged case;
+- `model`;
+- `provider`;
+- `reasoning_effort` when supplied by the run command;
+- `prompt_version`;
+- `rubric_version`;
+- `runner.version`;
+- artifact and output paths for the pointwise report, structured pointwise result, Promptfoo config,
+  raw Promptfoo results, and Promptfoo HTML report.
+
 ### `finding.schema.json`
 
 Generic minimal finding shape:
diff --git a/package.json b/package.json
index 4ce0780..1547e9c 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@agentic-workflow-kit/eval-kit",
-  "version": "0.1.7",
+  "version": "0.1.8",
   "description": "Portable eval runner primitives for local eval suites.",
   "private": true,
   "type": "module",
diff --git a/skills/bootstrap-eval-suite/SKILL.md b/skills/bootstrap-eval-suite/SKILL.md
index cc0db07..07b44f6 100644
--- a/skills/bootstrap-eval-suite/SKILL.md
+++ b/skills/bootstrap-eval-suite/SKILL.md
@@ -29,6 +29,8 @@ standard two-config pattern:
 - Document the local calibration policy before treating pointwise results as more than raw advisory
   evidence. The policy should define expected-good and expected-bad fixture labels, `partial` and
   `unknown` handling, and where curated summaries live.
+- For curated summaries, use the shared count shape for `covered`, `partial`, `missing`,
+  `contradicted`, and `unknown`, then add consumer-owned false-pass and false-fail notes.
 
 ## Boundaries
 
diff --git a/skills/review-eval-suite/SKILL.md b/skills/review-eval-suite/SKILL.md
index b72d69e..bfe0966 100644
--- a/skills/review-eval-suite/SKILL.md
+++ b/skills/review-eval-suite/SKILL.md
@@ -29,6 +29,9 @@ Use this skill when auditing or reviewing an eval-kit suite.
   passes.
 - Treat `partial` as non-covered unless the consumer explicitly documents why a non-critical partial
   is acceptable. Repeated `unknown` verdicts are calibration or prompt-quality risks.
+- Verify pointwise run metadata before trusting manual judge evidence: run id, one case id, model,
+  provider, reasoning effort when present, prompt version, rubric version, runner version, and
+  artifact/output paths must be present and coherent.
 - Treat run-producing semantic portfolios as local on-demand evidence before significant changes, not default CI.
 - Do not claim suite readiness without command evidence.
 
diff --git a/skills/run-eval-suite/SKILL.md b/skills/run-eval-suite/SKILL.md
index 56da791..1f0f472 100644
--- a/skills/run-eval-suite/SKILL.md
+++ b/skills/run-eval-suite/SKILL.md
@@ -29,6 +29,9 @@ Use this skill when executing a local eval-kit suite.
   scripts before any manual `eval:judge:coverage` run.
 - For pointwise model-judge summaries, treat `partial`, `missing`, `contradicted`, and `unknown` as
   non-covered unless the consumer policy explicitly accepts the item.
+- Prefer the eval-kit pointwise summary helpers for curated report counts, and record
+  expected-good/expected-bad labels plus false-pass/false-fail notes when summarizing manual judge
+  evidence.
 - Expected-bad fixtures should remain adverse on their intended defect. Do not describe an adverse
   bad-fixture result as a failed eval when it matches the calibration label.
 - Preserve raw outputs according to the consumer repo's artifact policy.
@@ -38,4 +41,6 @@ Use this skill when executing a local eval-kit suite.
 Report the config path, cases run, result directories, verdicts, report paths, and any skipped or
 advisory-only checks. For model-assisted runs, state that provider calls were explicitly requested.
 Report deterministic evidence first, then model-judge counts for `covered`, `partial`, `missing`,
-`contradicted`, and `unknown`.
+`contradicted`, and `unknown`. If a pointwise result manifest is missing run id, case id, model,
+provider, prompt version, rubric version, runner version, or artifact paths, treat that run as
+invalid evidence.
diff --git a/src/index.mjs b/src/index.mjs
index 44c828d..3c14479 100644
--- a/src/index.mjs
+++ b/src/index.mjs
@@ -20,6 +20,13 @@ export {
   runPromptfooRaw,
 } from "./promptfoo.mjs";
 export { aggregateVerdict, criticalBlockerCount } from "./verdict.mjs";
+export {
+  POINTWISE_VERDICTS,
+  countPointwiseVerdicts,
+  formatPointwiseCalibrationSummary,
+  formatPointwiseVerdictCounts,
+  validatePointwiseRunMetadata,
+} from "./pointwise.mjs";
 
 export { loadConfig } from "./config.mjs";
 export {
diff --git a/src/pointwise.mjs b/src/pointwise.mjs
new file mode 100644
index 0000000..c400614
--- /dev/null
+++ b/src/pointwise.mjs
@@ -0,0 +1,206 @@
+export const POINTWISE_VERDICTS = [
+  "covered",
+  "partial",
+  "missing",
+  "contradicted",
+  "unknown",
+];
+
+const pointwiseVerdictSet = new Set(POINTWISE_VERDICTS);
+
+const requiredString = (value, label) => {
+  if (typeof value !== "string" || value.trim().length === 0) {
+    throw new Error(`pointwise run metadata missing ${label}`);
+  }
+  return value;
+};
+
+const requireEqual = (actual, expected, label) => {
+  requiredString(actual, label);
+  if (expected !== undefined && actual !== expected) {
+    throw new Error(
+      `pointwise run metadata ${label} mismatch: expected ${expected}, got ${actual}`,
+    );
+  }
+};
+
+export const countPointwiseVerdicts = (items) => {
+  if (!Array.isArray(items)) {
+    throw new Error("pointwise items must be an array");
+  }
+  const counts = Object.fromEntries(
+    POINTWISE_VERDICTS.map((verdict) => [verdict, 0]),
+  );
+  for (const item of items) {
+    if (!pointwiseVerdictSet.has(item?.verdict)) {
+      throw new Error(`unknown pointwise verdict: ${item?.verdict}`);
+    }
+    counts[item.verdict] += 1;
+  }
+  return counts;
+};
+
+export const formatPointwiseVerdictCounts = (counts) =>
+  POINTWISE_VERDICTS.map(
+    (verdict) => `- ${verdict}: ${counts?.[verdict] ?? 0}`,
+  );
+
+export const formatPointwiseCalibrationSummary = ({
+  title = "Advisory Pointwise Model-Judge Summary",
+  counts,
+  fixtureLabel = "not recorded",
+  expectedOutcome = "not recorded",
+  falsePass = "not reviewed",
+  falseFail = "not reviewed",
+  notes = "not reviewed",
+} = {}) =>
+  [
+    `## ${title}`,
+    "",
+    "Model-judge evidence is manual and advisory. It cannot upgrade deterministic red or yellow results.",
+    "",
+    "### Verdict Counts",
+    "",
+    ...formatPointwiseVerdictCounts(counts),
+    "",
+    "### Calibration Record",
+    "",
+    `- fixture label: ${fixtureLabel}`,
+    `- expected outcome: ${expectedOutcome}`,
+    `- false pass: ${falsePass}`,
+    `- false fail: ${falseFail}`,
+    `- notes: ${notes}`,
+  ].join("\n");
+
+const validateRelativeArtifactPath = (relativePath, label) => {
+  requiredString(relativePath, label);
+  if (relativePath.trim() !== relativePath) {
+    throw new Error(
+      `pointwise run metadata ${label} must not contain surrounding whitespace`,
+    );
+  }
+  if (relativePath.includes("\\")) {
+    throw new Error(
+      `pointwise run metadata ${label} must use POSIX separators`,
+    );
+  }
+  if (relativePath.startsWith("/") || relativePath.startsWith("../")) {
+    throw new Error(
+      `pointwise run metadata ${label} must be a relative contained path`,
+    );
+  }
+  if (
+    relativePath
+      .split("/")
+      .some((segment) => segment === "." || segment === "..")
+  ) {
+    throw new Error(
+      `pointwise run metadata ${label} must not contain . or .. path segments`,
+    );
+  }
+  if (
+    relativePath === "." ||
+    relativePath === ".." ||
+    relativePath.includes("/../") ||
+    relativePath.includes("//")
+  ) {
+    throw new Error(`pointwise run metadata ${label} must be normalized`);
+  }
+  if (relativePath.startsWith("./")) {
+    throw new Error(`pointwise run metadata ${label} must not start with ./`);
+  }
+  return relativePath;
+};
+
+const requireArtifactPaths = (manifest, requiredRoles) => {
+  if (!Array.isArray(manifest.artifacts) || manifest.artifacts.length === 0) {
+    throw new Error("pointwise run metadata missing artifacts");
+  }
+  const artifactPaths = new Set();
+  const roles = new Set();
+  for (const artifact of manifest.artifacts) {
+    requiredString(artifact?.role, "artifact role");
+    validateRelativeArtifactPath(
+      artifact?.path,
+      `artifact path for ${artifact.role}`,
+    );
+    roles.add(artifact.role);
+    artifactPaths.add(artifact.path);
+  }
+  for (const role of requiredRoles) {
+    if (!roles.has(role)) {
+      throw new Error(`pointwise run metadata missing artifact role ${role}`);
+    }
+  }
+  if (!Array.isArray(manifest.output_files)) {
+    throw new Error("pointwise run metadata missing output_files");
+  }
+  const outputFiles = new Set(
+    manifest.output_files.map((outputPath, index) =>
+      validateRelativeArtifactPath(outputPath, `output_files[${index}]`),
+    ),
+  );
+  if (!outputFiles.has("manifest.json")) {
+    throw new Error("pointwise run metadata missing manifest.json output file");
+  }
+  for (const artifactPath of artifactPaths) {
+    if (!outputFiles.has(artifactPath)) {
+      throw new Error(
+        `pointwise run metadata output_files missing artifact path ${artifactPath}`,
+      );
+    }
+  }
+};
+
+export const validatePointwiseRunMetadata = ({
+  manifest,
+  expected = {},
+} = {}) => {
+  if (!manifest || typeof manifest !== "object") {
+    throw new Error("pointwise run metadata manifest is required");
+  }
+  requireEqual(manifest.run_id, expected.runId, "run_id");
+  requireEqual(manifest.run_type, "judge-coverage", "run_type");
+  if (!Array.isArray(manifest.case_ids) || manifest.case_ids.length !== 1) {
+    throw new Error("pointwise run metadata must contain exactly one case id");
+  }
+  requireEqual(manifest.case_ids[0], expected.caseId, "case_id");
+  requireEqual(manifest.model, expected.model, "model");
+  requireEqual(manifest.provider, expected.provider, "provider");
+  if (
+    expected.effort !== undefined ||
+    manifest.reasoning_effort !== undefined
+  ) {
+    requireEqual(
+      manifest.reasoning_effort,
+      expected.effort,
+      "reasoning_effort",
+    );
+  }
+  requireEqual(
+    manifest.prompt_version,
+    expected.promptVersion,
+    "prompt_version",
+  );
+  requireEqual(
+    manifest.rubric_version,
+    expected.rubricVersion,
+    "rubric_version",
+  );
+  requireEqual(
+    manifest.runner?.version,
+    expected.runnerVersion,
+    "runner.version",
+  );
+  requireArtifactPaths(
+    manifest,
+    expected.requiredArtifactRoles ?? [
+      "report",
+      "pointwise_result",
+      "promptfoo_config",
+      "raw_promptfoo_results",
+      "promptfoo_html_report",
+    ],
+  );
+  return manifest;
+};
diff --git a/src/sdk.mjs b/src/sdk.mjs
index 6e83549..b842c63 100644
--- a/src/sdk.mjs
+++ b/src/sdk.mjs
@@ -11,11 +11,17 @@ import {
 } from "./promptfoo.mjs";
 import { aggregateVerdict, criticalBlockerCount } from "./verdict.mjs";
 import { assertContainedPath, assertSafeId, toPosixPath } from "./paths.mjs";
+import {
+  countPointwiseVerdicts,
+  formatPointwiseCalibrationSummary,
+  formatPointwiseVerdictCounts,
+  validatePointwiseRunMetadata,
+} from "./pointwise.mjs";
 
 const DEFAULT_SANDBOX_MODE = "read-only";
 const DEFAULT_APPROVAL_POLICY = "never";
 const RANDOMIZATION_METHOD = "sha256-seed-parity-v1";
-const EVAL_KIT_VERSION = "0.1.7";
+const EVAL_KIT_VERSION = "0.1.8";
 
 const escapeRegExp = (value) => value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
 
@@ -762,6 +768,14 @@ export const judgeCoverage = async ({
   // Validate properties match run config
   if (result.case_id !== caseId) throw new Error("case_id mismatch in result");
   if (result.model !== model) throw new Error("model mismatch in result");
+  if (result.provider !== provider)
+    throw new Error("provider mismatch in result");
+  if (result.prompt_version !== promptVersion) {
+    throw new Error("prompt_version mismatch in result");
+  }
+  if (result.rubric_version !== rubricVersion) {
+    throw new Error("rubric_version mismatch in result");
+  }
 
   // Post-process pointwise items if the adapter provides custom canonicalization
   let finalResult = result;
@@ -781,16 +795,7 @@ export const judgeCoverage = async ({
     JSON.stringify(finalResult, null, 2) + "\n",
   );
 
-  const counts = {
-    covered: 0,
-    partial: 0,
-    missing: 0,
-    contradicted: 0,
-    unknown: 0,
-  };
-  for (const item of finalResult.items) {
-    counts[item.verdict] = (counts[item.verdict] ?? 0) + 1;
-  }
+  const counts = countPointwiseVerdicts(finalResult.items);
 
   fs.writeFileSync(
     path.join(resultDir, "report.md"),
@@ -806,11 +811,9 @@ export const judgeCoverage = async ({
       "",
       "## Coverage Summary",
       "",
-      `- covered: ${counts.covered}`,
-      `- partial: ${counts.partial}`,
-      `- missing: ${counts.missing}`,
-      `- contradicted: ${counts.contradicted}`,
-      `- unknown: ${counts.unknown}`,
+      ...formatPointwiseVerdictCounts(counts),
+      "",
+      formatPointwiseCalibrationSummary({ counts }),
       "",
       "## Item Results",
       "",
@@ -831,67 +834,73 @@ export const judgeCoverage = async ({
       redactionStatus: role.startsWith("raw_") ? "raw-local" : "public-safe",
     });
 
-  writeManifest({
-    runDir: resultDir,
-    schemaRegistry: config.schemaRegistry,
-    manifest: {
-      schema_version: "eval-kit.result-manifest.v2",
-      run_id: runId,
-      run_type: "judge-coverage",
-      runner: {
-        id: `${config.raw.suite_id}-pointwise-judge`,
-        version: EVAL_KIT_VERSION,
-      },
-      case_ids: [caseId],
-      started_at: startedAt.toISOString(),
-      ended_at: endedAt.toISOString(),
-      duration_ms: endedAt.getTime() - startedAt.getTime(),
-      status: "completed",
-      git: { commit: getGitCommit(config) },
-      command: process.argv.join(" "),
-      tool_versions: getToolVersions(config),
-      model_provider: codexProviderId({ provider, model }),
-      model,
-      provider,
-      reasoning_effort: effort,
-      sandbox_mode: DEFAULT_SANDBOX_MODE,
-      approval_policy: DEFAULT_APPROVAL_POLICY,
-      codex_auth_mode: authMode,
-      prompt_version: promptVersion,
-      rubric_version: rubricVersion,
-      artifacts: [
-        artRecord("report", "report.md", "text/markdown"),
-        artRecord(
-          "pointwise_result",
-          "pointwise-result.json",
-          "application/json",
-        ),
-        artRecord(
-          "promptfoo_config",
-          "promptfooconfig.json",
-          "application/json",
-        ),
-        artRecord(
-          "raw_promptfoo_results",
-          "promptfoo-results.json",
-          "application/json",
-        ),
-        artRecord(
-          "promptfoo_html_report",
-          "promptfoo-report.html",
-          "text/html",
-        ),
-      ],
-      output_files: [
-        "manifest.json",
-        "report.md",
+  const manifest = {
+    schema_version: "eval-kit.result-manifest.v2",
+    run_id: runId,
+    run_type: "judge-coverage",
+    runner: {
+      id: `${config.raw.suite_id}-pointwise-judge`,
+      version: EVAL_KIT_VERSION,
+    },
+    case_ids: [caseId],
+    started_at: startedAt.toISOString(),
+    ended_at: endedAt.toISOString(),
+    duration_ms: endedAt.getTime() - startedAt.getTime(),
+    status: "completed",
+    git: { commit: getGitCommit(config) },
+    command: process.argv.join(" "),
+    tool_versions: getToolVersions(config),
+    model_provider: codexProviderId({ provider, model }),
+    model,
+    provider,
+    reasoning_effort: effort,
+    sandbox_mode: DEFAULT_SANDBOX_MODE,
+    approval_policy: DEFAULT_APPROVAL_POLICY,
+    codex_auth_mode: authMode,
+    prompt_version: promptVersion,
+    rubric_version: rubricVersion,
+    artifacts: [
+      artRecord("report", "report.md", "text/markdown"),
+      artRecord(
+        "pointwise_result",
         "pointwise-result.json",
-        "promptfooconfig.json",
+        "application/json",
+      ),
+      artRecord("promptfoo_config", "promptfooconfig.json", "application/json"),
+      artRecord(
+        "raw_promptfoo_results",
         "promptfoo-results.json",
-        "promptfoo-report.html",
-      ],
+        "application/json",
+      ),
+      artRecord("promptfoo_html_report", "promptfoo-report.html", "text/html"),
+    ],
+    output_files: [
+      "manifest.json",
+      "report.md",
+      "pointwise-result.json",
+      "promptfooconfig.json",
+      "promptfoo-results.json",
+      "promptfoo-report.html",
+    ],
+  };
+  validatePointwiseRunMetadata({
+    manifest,
+    expected: {
+      runId,
+      caseId,
+      model,
+      provider,
+      effort,
+      promptVersion,
+      rubricVersion,
+      runnerVersion: EVAL_KIT_VERSION,
     },
   });
+  writeManifest({
+    runDir: resultDir,
+    schemaRegistry: config.schemaRegistry,
+    manifest,
+  });
 
   return { resultDir, finalResult };
 };
diff --git a/tests/pointwise.test.mjs b/tests/pointwise.test.mjs
new file mode 100644
index 0000000..0e52400
--- /dev/null
+++ b/tests/pointwise.test.mjs
@@ -0,0 +1,217 @@
+import { describe, expect, it } from "vitest";
+
+import {
+  countPointwiseVerdicts,
+  formatPointwiseCalibrationSummary,
+  validatePointwiseRunMetadata,
+} from "../src/index.mjs";
+
+const pointwiseManifest = (overrides = {}) => ({
+  schema_version: "eval-kit.result-manifest.v2",
+  run_id: "provider-20260704-case-good",
+  run_type: "judge-coverage",
+  runner: {
+    id: "suite-pointwise-judge",
+    version: "0.1.8",
+  },
+  case_ids: ["case-alpha"],
+  started_at: "2026-07-04T00:00:00.000Z",
+  ended_at: "2026-07-04T00:00:01.000Z",
+  duration_ms: 1000,
+  status: "completed",
+  git: {
+    commit: "abc123",
+  },
+  command: "pnpm eval:judge:coverage",
+  tool_versions: {
+    node: "v26.4.0",
+  },
+  model: "gpt-5.4",
+  provider: "openai:codex-app-server",
+  model_provider: "openai:codex-app-server:gpt-5.4",
+  reasoning_effort: "medium",
+  prompt_version: "pointwise-v1",
+  rubric_version: "rubric-v1",
+  artifacts: [
+    { role: "report", path: "report.md" },
+    { role: "pointwise_result", path: "pointwise-result.json" },
+    { role: "promptfoo_config", path: "promptfooconfig.json" },
+    { role: "raw_promptfoo_results", path: "promptfoo-results.json" },
+    { role: "promptfoo_html_report", path: "promptfoo-report.html" },
+  ],
+  output_files: [
+    "manifest.json",
+    "report.md",
+    "pointwise-result.json",
+    "promptfooconfig.json",
+    "promptfoo-results.json",
+    "promptfoo-report.html",
+  ],
+  ...overrides,
+});
+
+const expectedMetadata = {
+  runId: "provider-20260704-case-good",
+  caseId: "case-alpha",
+  model: "gpt-5.4",
+  provider: "openai:codex-app-server",
+  effort: "medium",
+  promptVersion: "pointwise-v1",
+  rubricVersion: "rubric-v1",
+  runnerVersion: "0.1.8",
+};
+
+describe("pointwise model-judge helpers", () => {
+  it("counts verdicts and formats a calibration summary with adverse categories", () => {
+    const counts = countPointwiseVerdicts([
+      { verdict: "covered" },
+      { verdict: "partial" },
+      { verdict: "missing" },
+      { verdict: "contradicted" },
+      { verdict: "unknown" },
+      { verdict: "unknown" },
+    ]);
+
+    expect(counts).toEqual({
+      covered: 1,
+      partial: 1,
+      missing: 1,
+      contradicted: 1,
+      unknown: 2,
+    });
+
+    const summary = formatPointwiseCalibrationSummary({
+      counts,
+      fixtureLabel: "expected-bad",
+      expectedOutcome: "adverse on targeted defect",
+      falsePass: "not observed",
+      falseFail: "not applicable",
+      notes: "bad fixture remained adverse",
+    });
+
+    expect(summary).toContain("Model-judge evidence is manual and advisory");
+    expect(summary).toContain("- covered: 1");
+    expect(summary).toContain("- unknown: 2");
+    expect(summary).toContain("- fixture label: expected-bad");
+    expect(summary).toContain("- false pass: not observed");
+  });
+
+  it("fails closed when pointwise run metadata is missing or mismatched", () => {
+    expect(() =>
+      validatePointwiseRunMetadata({
+        manifest: pointwiseManifest(),
+        expected: expectedMetadata,
+      }),
+    ).not.toThrow();
+
+    expect(() =>
+      validatePointwiseRunMetadata({
+        manifest: pointwiseManifest({ run_id: "" }),
+        expected: expectedMetadata,
+      }),
+    ).toThrow(/missing run_id/);
+
+    expect(() =>
+      validatePointwiseRunMetadata({
+        manifest: pointwiseManifest({ model: "gpt-5.5" }),
+        expected: expectedMetadata,
+      }),
+    ).toThrow(/model mismatch/);
+
+    expect(() =>
+      validatePointwiseRunMetadata({
+        manifest: pointwiseManifest({
+          artifacts: [{ role: "report", path: "report.md" }],
+        }),
+        expected: expectedMetadata,
+      }),
+    ).toThrow(/missing artifact role pointwise_result/);
+
+    expect(() =>
+      validatePointwiseRunMetadata({
+        manifest: pointwiseManifest({
+          output_files: ["manifest.json", "report.md"],
+        }),
+        expected: expectedMetadata,
+      }),
+    ).toThrow(/output_files missing artifact path pointwise-result.json/);
+
+    expect(() =>
+      validatePointwiseRunMetadata({
+        manifest: pointwiseManifest({
+          artifacts: [
+            { role: "report", path: "../report.md" },
+            { role: "pointwise_result", path: "/tmp/pointwise-result.json" },
+            { role: "promptfoo_config", path: "promptfooconfig.json" },
+            { role: "raw_promptfoo_results", path: "promptfoo-results.json" },
+            { role: "promptfoo_html_report", path: "promptfoo-report.html" },
+          ],
+          output_files: [
+            "manifest.json",
+            "../report.md",
+            "/tmp/pointwise-result.json",
+            "promptfooconfig.json",
+            "promptfoo-results.json",
+            "promptfoo-report.html",
+          ],
+        }),
+        expected: expectedMetadata,
+      }),
+    ).toThrow(/relative contained path/);
+
+    expect(() =>
+      validatePointwiseRunMetadata({
+        manifest: pointwiseManifest({
+          output_files: [
+            "manifest.json",
+            "report.md",
+            "pointwise-result.json",
+            "./promptfooconfig.json",
+            "promptfoo-results.json",
+            "promptfoo-report.html",
+          ],
+        }),
+        expected: expectedMetadata,
+      }),
+    ).toThrow(/must not contain \. or \.\. path segments/);
+
+    expect(() =>
+      validatePointwiseRunMetadata({
+        manifest: pointwiseManifest({
+          artifacts: [
+            { role: "report", path: "reports/.." },
+            { role: "pointwise_result", path: "pointwise-result.json" },
+            { role: "promptfoo_config", path: "promptfooconfig.json" },
+            { role: "raw_promptfoo_results", path: "promptfoo-results.json" },
+            { role: "promptfoo_html_report", path: "promptfoo-report.html" },
+          ],
+          output_files: [
+            "manifest.json",
+            "reports/..",
+            "pointwise-result.json",
+            "promptfooconfig.json",
+            "promptfoo-results.json",
+            "promptfoo-report.html",
+          ],
+        }),
+        expected: expectedMetadata,
+      }),
+    ).toThrow(/must not contain \. or \.\. path segments/);
+
+    expect(() =>
+      validatePointwiseRunMetadata({
+        manifest: pointwiseManifest({
+          output_files: [
+            "manifest.json",
+            "report.md",
+            "pointwise-result.json",
+            "promptfoo/./config.json",
+            "promptfoo-results.json",
+            "promptfoo-report.html",
+          ],
+        }),
+        expected: expectedMetadata,
+      }),
+    ).toThrow(/must not contain \. or \.\. path segments/);
+  });
+});
diff --git a/tests/schema.test.mjs b/tests/schema.test.mjs
index 6237cd8..639adf4 100644
--- a/tests/schema.test.mjs
+++ b/tests/schema.test.mjs
@@ -137,6 +137,40 @@ describe("eval-kit schema registry", () => {
     ).not.toThrow();
   });
 
+  it("accepts current pointwise model-run metadata fields", () => {
+    const registry = createSchemaRegistry({
+      schemaRoots: [path.resolve(import.meta.dirname, "../schemas")],
+    });
+    expect(() =>
+      registry.validateWithSchema(
+        "result-manifest.v2.schema.json",
+        {
+          schema_version: "eval-kit.result-manifest.v2",
+          run_id: "provider-20260704-case-good",
+          run_type: "judge-coverage",
+          runner: { id: "suite-pointwise-judge", version: "0.1.8" },
+          case_ids: ["case-a"],
+          started_at: "2026-07-04T00:00:00.000Z",
+          ended_at: "2026-07-04T00:00:01.000Z",
+          duration_ms: 1000,
+          status: "completed",
+          git: { commit: "abc123" },
+          command: "pnpm eval:judge:coverage",
+          tool_versions: { node: "v26.4.0" },
+          artifacts: [],
+          output_files: ["manifest.json"],
+          model: "gpt-5.4",
+          provider: "openai:codex-app-server",
+          model_provider: "openai:codex-app-server:gpt-5.4",
+          reasoning_effort: "medium",
+          prompt_version: "pointwise-v1",
+          rubric_version: "rubric-v1",
+        },
+        "manifest",
+      ),
+    ).not.toThrow();
+  });
+
   it("resolves bundled prompt and schema fallbacks for consumer configs", () => {
     const config = loadConfig(
       path.resolve(