diff --git a/adoption-readiness-grades/ADOPTION-READINESS-GRADES.a2ml b/adoption-readiness-grades/ADOPTION-READINESS-GRADES.a2ml new file mode 100644 index 00000000..d0f4d13e --- /dev/null +++ b/adoption-readiness-grades/ADOPTION-READINESS-GRADES.a2ml @@ -0,0 +1,339 @@ +; SPDX-License-Identifier: PMPL-1.0-or-later +; Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +; Adoption Readiness Grades (ARG) — Machine-readable specification +; Format: A2ML (AI-to-Machine Language) +; Standard: ARG v1.0 +; Companion to: ADOPTION-READINESS-GRADES.adoc (the normative spec) + +(standard + (name "Adoption Readiness Grades") + (abbreviation "ARG") + (version "1.0") + (date "2026-05-28") + (author "Jonathan D.A. Jewell ") + (license "PMPL-1.0-or-later") + (license-fallback "MPL-2.0") + (family "RSR") + (companion-of "Toolchain Readiness Grades v1.0" "Component Readiness Grades v2.2") + (relationship "language-whole-sibling-tier") + (tier "language-adoption") + (sibling-tiers + (component "CRG") + (toolchain "TRG") + (language-adoption "ARG") + (foundations "FRG"))) + +(grades + (grade + (code X) + (name "Untested / Not yet a language") + (release-stage #f) + (stability-posture #f) + (ordinal 0) + (description "No release artefact exists, or no Discovery surface item from §3.1 is present.") + (evidence-required "none") + (minimum-for #f)) + (grade + (code F) + (name "Harmful / Wasteful") + (release-stage #f) + (stability-posture #f) + (ordinal 1) + (description "Released but actively user-harmful: misleading docs, silently incorrect semantics, regressive ergonomics, or abandonment-while-promoted.") + (evidence-required "documented harm or misrepresentation; explicit demotion record in READINESS.md; halt/rename/merge recommendation") + (minimum-for #f)) + (grade + (code E) + (name "Bare Minimum") + (release-stage "pre-alpha") + (stability-posture "internal-only") + (ordinal 2) + (description "Undeniable internal minimum: language qualifies, runs own stdlib, no external users required. Nobody outside the maintainer would use it.") + (evidence-required "TRG >= E for F1-F4 + F6 + F7 + B1 + B2 + B6 + T8; D1 (landing page) and D2 (elevator pitch) present; repo RSR-compliant for TRG-E") + (minimum-for "internal experiment")) + (grade + (code D) + (name "Toy-Project Usable") + (release-stage "pre-alpha+") + (stability-posture "toy-project-usable") + (ordinal 3) + (description "Toy-project usable. A motivated person could build a weekend project without papercut-after-papercut.") + (evidence-required "all E + TRG >= D estate-wide + O1 (5-min hello-world) + O2 (tutorial) + O4 (>=3 cookbook examples >=100 LoC) + R1 (reference manual, even if incomplete) + R3 (formal grammar) + Δ1 (signed reproducible release) + >=3 external programs >=100 LoC each parse+typecheck+run + diversity-metrics: external authors >= 2 distinct individuals beyond maintainer") + (minimum-for #f)) + (grade + (code C) + (name "Self-Validated (alpha gate)") + (release-stage "alpha") + (stability-posture "stable-in-home-context") + (ordinal 4) + (description "Self-validated alpha. Non-trivially usable in home context. Dogfood cohort uses it for real work in >=2 substantive domains.") + (evidence-required "all D + TRG >= C estate-wide + CRG >= C for every adoption-surface §3 component + O3 (playground) + R2 (stdlib reference) + R4 (operational/denotational semantics) + R5 (stable error-code index) + C1+C2+C3 + C5 (RFC/ADR used >=3 times) + Δ2 (registry/install path) + Δ3 (versioning policy) + E1 (slide deck) + >=10 dogfood users meeting diversity-metrics + >=2 substantive domains covered by >=3 cookbook examples each + VeriSimDB instance ingesting per-commit ARG-PROFILE attestations + continuous fuzzing >=14 days per TRG FUZZING-CORPUS-FLOOR.adoc") + (minimum-for "alpha")) + (grade + (code B) + (name "Broadly Adopted (beta gate)") + (release-stage "beta") + (stability-posture "broadly-trial-stable") + (ordinal 5) + (description "Broadly adopted beta. Language has escaped its home repo. External people in unrelated organisations and contexts write real programs in it.") + (evidence-required "all C + TRG >= B estate-wide + CRG >= B per §3 component + >=100 distinct external users meeting diversity-metrics + >=6 diverse external projects use the language (TRG diversity-metrics verified) + X1 >=12 third-party libraries + X2 >=3 third-party tools (not founder-written) + X3 ecosystem-plugged (language-specific per profile) + X4 >=2 maintainers beyond founder with >=6 months sustained contribution + Δ4 deprecation policy active + >=1 deprecation cycle completed + Δ5 SECURITY.md with response demonstrated + E2 training materials used externally + continuous fuzzing >=30 days incl structure-aware + external single-party audit + SBOM + Hypatia zero Critical/High + branch protection") + (minimum-for "beta")) + (grade + (code A) + (name "School-Grade Real") + (release-stage "stable") + (stability-posture "stable") + (ordinal 6) + (description "School-grade real — first-gen-Python equivalent. A school could adopt it next term. Founder no longer a single point of failure. ARG-A is the cap; past A is a cultural phenomenon, not a language property.") + (evidence-required "all B + TRG = A estate-wide + FRG >= B + >=1000 distinct external users meeting diversity-metrics + >=1 educational institution taught the language as a multi-week unit with external educator (E3 + E4) + X1 >= 50 libraries + X2 >= 8 tools + X4 >= 5 independent maintainers + founder bus-factor test signed (could step away 6 months) + 5-nines on standing priority axes + continuous fuzzing 90 days incl differential against reference impl + type-theory/PL-research upstream contribution in last 12 months + full hyperpolymath standards conformance per TRG §5.7.5 + unanimous 6+6 humans+LLMs signoff per TRG A-GRADE-LLM-PANEL.adoc") + (minimum-for "stable release"))) + +(release-stage-mapping + (X "pre-alpha") + (F "rejected") + (E "pre-alpha (internal only)") + (D "pre-alpha+ (toy-project usable)") + (C "alpha") + (B "beta") + (A "stable / release")) + +(required-adoption-surfaces + (discovery + (D1 "Landing page or canonical README") + (D2 "One-paragraph elevator pitch for non-specialist") + (D3 "Provenance: maintainers, start date, current version")) + (onboarding + (O1 "Hello-world runnable from clean install in <= 5 minutes") + (O2 "Tutorial covering distinctive features") + (O3 "Playground (browser-runnable or single-command-local)") + (O4 "Cookbook of worked examples in >= 2 substantive domains")) + (reference + (R1 "Reference manual (operational specification)") + (R2 "Standard library reference (one entry per public symbol)") + (R3 "Formal grammar (EBNF or equivalent)") + (R4 "Operational or denotational semantics (also TRG M5)") + (R5 "Stable error-code index (also TRG diagnostics M)")) + (community + (C1 "Issue tracker with response-time policy") + (C2 "Contribution guide (CONTRIBUTING.md)") + (C3 "Code of conduct (CoC)") + (C4 "Public discussion venue (forum / Discord / Zulip / list)") + (C5 "RFC / ADR process documented and used")) + (distribution + (D1-dist "Stable release artefacts (tagged, signed, reproducible)") + (D2-dist "Package registry presence or canonical install path") + (D3-dist "Versioning policy (Semver or Edition)") + (D4-dist "Deprecation policy and visible deprecation log") + (D5-dist "Security disclosure policy")) + (education + (E1 "Slide deck / 1-hour external talk-script") + (E2 "Training materials for 1-day workshop") + (E3 "Curriculum for school-term unit (multi-week)") + (E4 "External educator(s) who have used the materials")) + (ecosystem + (X1 "Third-party library count") + (X2 "Third-party tool count") + (X3 "Integration count (domain-specific, per profile)") + (X4 "Independent maintainer count beyond founder"))) + +(banned-constructs + ; Inherited verbatim from TRG. + (inherited-from "TRG") + (constructs + "believe_me" + "assert_total" + "Admitted" + "sorry" + "postulate" + "unsafeCoerce" + "Obj.magic" + "Obj.repr" + "unsafePerformIO" + "getUnsafe" + "getExn" + "raw transmute" + "raw reinterpret_cast" + "void * casts that defeat the type system" + "@unchecked" + "@SuppressWarnings on safety lints")) + +(standing-priority-order + ; Inherited verbatim from TRG. + (inherited-from "TRG") + (axes + (1 "dependability") + (2 "security") + (3 "interop") + (4 "usability") + (5 "performance") + (6 "versatility") + (7 "functional-extension"))) + +(grade-aggregation + (rule "worst-of-any-required-adoption-surface") + (no-averaging #t) + (no-weighting #t) + (cross-axis-rule "ARG <= TRG always") + (justification "A language with discovery+onboarding+reference all at B but community at D is ARG-D, not 'ARG-B-with-community-gap'. Honest grade tells maintainer where to focus.")) + +(diversity-metrics + (inherited-from "TRG") + (technical-diversity + (language-family-count >=3) + (runtime-environment-count >=4) + (architecture-count >=2) + (os-family-count >=3)) + (organizational-diversity + (independent-org-count >=4) + (geographic-region-count >=3) + (industry-sector-count >=3)) + (user-diversity + (role-count >=5) + (expertise-level-count >=3) + (use-case-count >=4)) + (downstream-target-diversity + (language-family-count >=3) + (runtime-environment-count >=4) + (architecture-count >=2) + (os-family-count >=3) + (independent-org-count >=4) + (geographic-region-count >=3) + (industry-sector-count >=3))) + +(qualifying-provers + (inherited-from "TRG") + (v1 + "Idris2" + "Lean 4" + "Rocq" + "Agda") + (re-baseline "annual")) + +(a-grade-llm-panel + (inherited-from "TRG") + (composition + (reasoning-models 2) + (specialist-coding-models 2) + (instruct-models 1) + (mixture-of-experts-models 1)) + (vendor-distinctness #t) + (unanimous-required #t) + (re-baseline "quarterly")) + +(verisim-integration + (description "ARG-PROFILE.adoc content is DECLARE'd to VeriSimDB on each commit. Drift between claimed grade and underlying evidence is detected by VeriSimDB's cross-modal drift monitor and surfaces as grade-stale alert.") + (required-from-grade C) + (attestation-language "VCL-total") + (proof-types-used + "EXISTENCE" + "INTEGRITY" + "PROVENANCE" + "FRESHNESS")) + +(demotion + (immediate #t) + (no-grace-period #t) + (triggers + "founder bus-factor test reverts" + "educator withdraws (E4 lost)" + "distinct-user count drops below threshold for >30 days" + "FRG regresses below B at A-grade" + "TRG drops (ARG <= TRG enforced immediately)" + "6+6 signoff rescinded" + "host repo ceases to satisfy any §5.7.5 standard at language's current grade" + "RFC/ADR process abandoned (C+ requirement)" + "VeriSimDB attestation stale for >30 days (C+ requirement)")) + +(per-language-extension + (mechanism "spec/ARG-PROFILE.adoc in language repo") + (template "templates/ARG-PROFILE-TEMPLATE.adoc") + (rule "may tighten, may not loosen") + (examples + "AffineScript: Grade B requires playground with affine-borrow-check feedback" + "Ephapax: Grade C requires external tutorial in dyadic-modality discipline by non-maintainer" + "VCL-total: Grade C requires registered Verisim instance addressable from public example" + "KRL: Grade C requires examples covering all four operation families (Construct/Transform/Resolve/Retrieve)")) + +(definitions + (distinct-external-user + (description "Individual who has used the language for their own work, not maintainer-paid contract, not solicited demo, not the founder, identifiable, dated record") + (inherited-from "TRG §6.4")) + (substantive-domain + (description "Domain in which a non-trivial program (>=500 LoC) could solve a recognisable real problem") + (examples "graphics" "networking" "parsing" "simulation" "data-analysis" "web-frontend" "embedded-control") + (non-example "10 different fibonacci implementations are NOT a substantive domain")) + (bus-factor-test + (description "Founder records signed statement: could stop working on the language for 6 calendar months without it stalling") + (gate-for-grade A) + (signed-by "founder") + (credibility-requires "X4 evidence (>=5 independent maintainers) already in place")) + (school-grade + (description "Calibrated against first-generation Python (c. 1995-2002): non-specialist educator can teach it to children with available materials") + (gate-for-grade A))) + +(self-assessment + (current-grade "n/a") + (justification "ARG grades languages, not specifications. ARG specification is graded by standards repo's CRG and by documentation tooling's TRG.") + (path "SELF-ASSESSMENT.adoc") + (note "ARG document set is self-assessed against internal-consistency criteria in SELF-ASSESSMENT.adoc")) + +(conformance + (requirements + "ARG grade recorded in spec/ARG-PROFILE.adoc in language repo" + "Grade supported by evidence per §4 for that grade and every grade below it" + "Profile reviewed >=1 per release cycle, and within 7 days of any demotion" + "Aggregation rule §5 respected (worst-of, no averaging)" + "Demotion recorded immediately on loss of required evidence" + "Per-commit attestations emitted to VeriSimDB at grade C+" + "Language is also TRG-graded; ARG <= TRG holds at all times")) + +(open-questions + (q1 + (question "Is the '1000 distinct users' threshold for ARG-A correctly calibrated?") + (status "plucked from intuition, same caveat as TRG 5M processor-hours") + (re-baseline-trigger "first ARG-A candidate") + (likely-direction "may drift up for general-purpose languages, down for narrow DSLs")) + (q2 + (question "Is '6 diverse external projects' at ARG-B the right floor?") + (status "borrowed from CRG-B") + (re-baseline-trigger "first ARG-B candidate that is a narrow DSL with <12 plausible adopters total") + (likely-direction "lower threshold for inherently small communities")) + (q3 + (question "Should ARG-A require FRG-A, not just FRG-B?") + (status "currently FRG >= B at ARG-A") + (re-baseline-trigger "first language reaching both ARG-A and FRG-A") + (likely-direction "tighten to FRG-A once any language has shown both can co-exist")) + (q4 + (question "Is the founder bus-factor test sufficiently objective?") + (status "novel, subjective, signed-statement-based") + (re-baseline-trigger "first ARG-A candidate") + (likely-direction "may require external verification of stated independence"))) + +(versioning + (document-version "1.0") + (last-updated "2026-05-28") + (status "active") + (compatibility + (backward-compatible #t) + (breaking-changes "none — initial release") + (deprecations "none")) + (maintenance + (review-cycle "quarterly") + (next-review "2026-08-28") + (deprecation-policy "12 months notice for major changes") + (change-control + (minor "Direct commit with maintainer review") + (major "RFC process + unanimous maintainer approval")))) + +(references + (item "CRG v2.2: Component Readiness Grades") + (item "TRG v1.0: Toolchain Readiness Grades") + (item "FRG v1.0: Foundations Readiness Grades (companion sibling)") + (item "RSR v1.0: Rhodium Standard Repository") + (item "K9 v1.0: Self-Validating Components") + (item "TypeLL: 10-level type-safety lineage origin") + (item "PanLL: panel framework type safety") + (item "VCL-total: VeriSim Consonance Language — attestation form") + (item "VeriSimDB: 8-modal octad attestation persistence") + (item "Hypatia: invariant-path doc-claims grounder")) diff --git a/adoption-readiness-grades/ADOPTION-READINESS-GRADES.adoc b/adoption-readiness-grades/ADOPTION-READINESS-GRADES.adoc new file mode 100644 index 00000000..fc4202b0 --- /dev/null +++ b/adoption-readiness-grades/ADOPTION-READINESS-GRADES.adoc @@ -0,0 +1,507 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + += Adoption Readiness Grades (ARG) +:toc: left +:toclevels: 3 + +[cols="1,3", options="header"] +|=== +| Field | Value + +| Standard | Adoption Readiness Grades +| Version | 1.0 +| Date | 2026-05-28 +| Status | Active +| Author | Jonathan D.A. Jewell +| License | PMPL-1.0-or-later (MPL-2.0 automatic legal fallback) +| Family | Rhodium Standard Repositories (RSR) +| Companion to | Toolchain Readiness Grades v1.0, Component Readiness Grades v2.2 +| Relationship | Language-whole sibling tier (CRG = component, TRG = toolchain, ARG = language adoption) +|=== + +== Abstract + +Adoption Readiness Grades (ARG) is a strict-by-construction grading scheme +for the *language-as-an-adoptable-artefact*. It sits at the language-whole +tier of the estate grading hierarchy: + +* *CRG* grades individual *components* (subcommand, module, library). +* *TRG* grades the *toolchain* (parser, type checker, codegen, etc.). +* *ARG* grades the *language as a thing someone can adopt*. + +ARG uses the same letter grades as CRG and TRG (`X | F | E | D | C | B | A`) +and the same worst-of-any-required-element aggregation rule. The evidence +required at each grade differs: ARG asks "would anyone use this?", not +"is the toolchain engineered?" or "is the type checker sound?". Those are +the questions of TRG and FRG. ARG is the third axis. + +A language can be TRG-A on every component and still be ARG-E if nobody +adopts it. A language can be ARG-D (toy-project usable) with TRG-D +toolchain. The three axes are independent and worst-of dominates *only* +within an axis, never across them. + +This standard is designed to be adopted by any language project +regardless of paradigm, domain, or intended runtime. It is part of the +Rhodium Standard Repositories (RSR) family of standards maintained by +hyperpolymath. + +== 1. Scope + +This standard applies to: + +* Any programming language, query language, configuration language, or + domain-specific language for which the question *"would someone adopt + this?"* is meaningful. +* The *language as a public artefact* — what a prospective adopter + encounters when they discover the language: docs, playground, tutorial, + registry, ecosystem, governance. + +This standard does NOT apply to: + +* Internal-only DSLs whose audience is the project itself (CRG and TRG + cover those). +* Component-level quality (use CRG). +* Toolchain quality (use TRG). +* Mathematical foundations / typing soundness (use FRG). +* Repository hygiene (use RSR). + +=== 1.1 Per-language extension + +Each adopting language SHOULD publish a `spec/ARG-PROFILE.adoc` in its +own repo that tightens this baseline with language-specific obligations. +A profile MAY only tighten, never loosen. Examples of valid tightening: + +* AffineScript: "Grade B requires a playground that lets a user write an + affine-typed program and observe borrow-check feedback." +* Ephapax: "Grade C requires at least one external tutorial in the + dyadic-modality discipline written by someone other than the maintainer." +* VCL-total: "Grade C requires a registered Verisim instance the language + can address from a public example." +* KRL: "Grade C requires example programs covering all four operation + families (Construct / Transform / Resolve / Retrieve)." + +The profile shape is defined in `templates/ARG-PROFILE-TEMPLATE.adoc`. + +== 2. Normative References + +* *CRG v2.2* — Component Readiness Grades. ARG inherits CRG's letter + vocabulary and release-stage mapping. +* *TRG v1.0* — Toolchain Readiness Grades. ARG ≤ TRG always (you cannot + adopt a language whose toolchain is below the same grade). +* *RSR* — Rhodium Standard Repositories. ARG-graded languages MUST be + RSR-compliant at the adoption rung's required level. +* *Diversity Metrics* — Adopt the TRG `diversity-metrics` block + (technical, organisational, user, downstream-target) verbatim. +* *TypeLL / PanLL / VCL-total* — The 10-level type-safety lineage. Where + a language claims compliance with one or more of the 10 levels, ARG-C+ + evidence MUST cite the level explicitly. + +== 3. Required Adoption Surfaces + +ARG grades the *language as adoptable*. An "adoption surface" is one of +the public artefacts an adopter encounters. The required surfaces are: + +=== 3.1 Discovery surface +* *D1* Landing page or canonical README in the language's repo +* *D2* One-paragraph elevator pitch a non-specialist can understand +* *D3* Provenance: who maintains it, when started, current version + +=== 3.2 Onboarding surface +* *O1* "Hello world" runnable from a clean install in ≤ 5 minutes +* *O2* Tutorial covering the language's distinctive features +* *O3* Playground (browser-runnable or single-command-local) +* *O4* Cookbook of worked examples in ≥ 2 substantive domains + +=== 3.3 Reference surface +* *R1* Reference manual (operational specification, complete coverage + of the language as released) +* *R2* Standard library reference (one entry per public module/function) +* *R3* Formal grammar (EBNF or equivalent) +* *R4* Operational or denotational semantics document (also TRG M5) +* *R5* Stable error-code index (also TRG diagnostics M-component) + +=== 3.4 Community surface +* *C1* Issue tracker with response-time policy +* *C2* Contribution guide (CONTRIBUTING.md or equivalent) +* *C3* Code of conduct (CoC) +* *C4* Public discussion venue (forum, Discord, Zulip, mailing list) +* *C5* RFC / ADR process documented and used + +=== 3.5 Distribution surface +* *Δ1* Stable release artefacts (tagged, signed, reproducible per RSR) +* *Δ2* Package registry presence (or canonical install path) +* *Δ3* Versioning policy (Semver or estate Edition system) +* *Δ4* Deprecation policy and visible deprecation log +* *Δ5* Security disclosure policy (SECURITY.md, contact, response time) + +=== 3.6 Education surface +* *E1* Slide deck or talk-script suitable for a 1-hour external presentation +* *E2* Training materials suitable for a 1-day workshop +* *E3* Curriculum suitable for a school-term unit (multi-week) +* *E4* External educator(s) who have used the materials + +=== 3.7 Ecosystem surface +* *X1* Library count: third-party libraries published against the language +* *X2* Tool count: third-party tools (formatters, linters, plugins, etc.) +* *X3* Integration count: integrations with surrounding systems (DOM, vite, + C-ABI, etc. — domain-specific) +* *X4* Independent maintainer count beyond the founder + +== 4. Grade Definitions + +=== 4.1 Grade X — Untested / Not yet a language + +*Release stage:* None + +The candidate is not yet a language a person could adopt. No release +artefact exists, or the artefact is so incomplete that *no* discovery +surface item from §3.1 is present. + +*Evidence required:* None. This is the absence of adoption-surface +evidence. + +=== 4.2 Grade F — Harmful / Wasteful + +*Release stage:* Reject, deprecate, or delegate + +Released but actively harmful to its adopters: misleading documentation, +silently incorrect semantics, regressive ergonomics versus established +alternatives, or maintainer absence amounting to abandonment-while-still-promoted. + +ARG-F is rare. Most languages that *would* be ARG-F never crossed the +ARG-E threshold to begin with. The grade exists so that a language can +be honestly de-promoted when it does cause harm. + +*Evidence required:* +* Documented harm or misrepresentation. +* Explicit demotion record in the per-language `READINESS.md`. +* Recommendation: halt, rename-and-restart, or merge into a sibling. + +=== 4.3 Grade E — Bare Minimum *(pre-alpha — internal only, not releasable)* + +*Release stage:* Pre-alpha +*Stability posture:* Internal experiment +*Honest shorthand:* `internal-bare` + +Undeniable internal minimum: the candidate qualifies as a language, +runs its own stdlib end-to-end, and a maintainer can demonstrate this. +*Nobody outside the maintainer would actually use it* — there is no +worth-using surface yet. + +*Smell test:* "Yes, it's a language. So what?" + +*Evidence required (worst-of, all MUST be PASSED):* + +* TRG ≥ E for all front-end components (F1 grammar, F2 lexer, F3 parser, + F4 AST, F6 import resolver, F7 diagnostics) +* TRG ≥ E for at least one back-end (B1 IR, B2 codegen, B6 runtime) +* TRG ≥ E for stdlib (T8) — language can run its own stdlib +* D1 (landing page) and D2 (elevator pitch) present +* Repo is RSR-compliant at the required level for TRG-E + +=== 4.4 Grade D — Toy-Project Usable *(pre-alpha+ — still not releasable broadly)* + +*Release stage:* Pre-alpha+ +*Stability posture:* Toy-project usable +*Honest shorthand:* `pre-alpha-toy` + +Toy-project usable. A motivated person could build a weekend project in +this language without papercut-after-papercut. The language begins to +exist for someone other than the maintainer, but the surface is still +sparse. + +*Smell test:* "I could write a weekend project in this." + +*Evidence required (all E + the following):* + +* TRG ≥ D estate-wide (all required toolchain components) +* O1 (5-minute hello-world) demonstrated +* O2 (tutorial covering distinctive features) +* O4 (cookbook with ≥ 3 worked examples ≥ 100 LoC each) +* R1 (reference manual, even if incomplete) +* R3 (formal grammar) +* Δ1 (stable release artefacts, signed, reproducible) +* At least 3 external programs ≥ 100 LoC each parse, typecheck, and run +* Diversity-metrics check: external program authors are ≥ 2 distinct + individuals beyond the maintainer + +=== 4.5 Grade C — Self-Validated *(alpha gate)* + +*Release stage:* Alpha +*Stability posture:* Stable in home context +*Honest shorthand:* `alpha-stable` + +Self-validated alpha. The language is non-trivially usable in its home +context. Maintainers and a dogfood cohort use it for real work in ≥ 2 +substantive domains. This is the *alpha gate* — below C is not alpha. + +*Smell test:* "I could ship a real internal tool in this." + +*Evidence required (all D + the following):* + +* TRG ≥ C estate-wide +* CRG ≥ C for every adoption-surface §3 component +* O3 (playground — browser-runnable or single-command-local) operational +* R2 (stdlib reference, one entry per public symbol) +* R4 (operational or denotational semantics document — also TRG M5) +* R5 (stable error-code index) +* C1, C2, C3 (issue tracker + CONTRIBUTING + CoC) in place +* C5 (RFC/ADR process used at least 3 times) +* Δ2 (package registry presence or canonical install path) +* Δ3 (versioning policy) +* E1 (slide deck / talk-script — proves the language can be presented externally) +* ≥ 10 dogfood users meeting diversity-metrics (technical + role + use-case) +* ≥ 2 substantive domains covered by ≥ 3 cookbook examples each +* VeriSimDB instance ingesting per-commit ARG-PROFILE attestations +* Continuous fuzzing per TRG `FUZZING-CORPUS-FLOOR.adoc`, 14 days minimum + +=== 4.6 Grade B — Broadly Adopted *(beta gate)* + +*Release stage:* Beta +*Stability posture:* Broadly trial-stable +*Honest shorthand:* `beta-stable` + +Broadly adopted beta. The language has escaped its home repo. External +people, in organisations and contexts that don't share the maintainer's +worldview, write real programs in it. The number 6 (diverse external +projects) is borrowed from CRG-B intentionally — it is enough to reveal +home-context assumptions without making progress impossible. + +*Smell test:* "It fits the workflows people already have." + +*Evidence required (all C + the following):* + +* TRG ≥ B estate-wide +* CRG ≥ B for every adoption-surface §3 component +* ≥ 100 distinct external users meeting diversity-metrics (technical, + organisational, user, downstream-target) +* ≥ 6 diverse external projects use the language, with target diversity + verified against TRG diversity-metrics +* X1 (≥ 12 third-party libraries published against the language) +* X2 (≥ 3 third-party tools — formatter / linter / plugin / etc. — not + written by the founder) +* X3 (ecosystem-plugged: integrations with surrounding systems exist; + the specific set is language-specific per profile) +* X4 (≥ 2 maintainers beyond the founder with sustained contribution + over ≥ 6 months) +* Δ4 (deprecation policy active; ≥ 1 deprecation cycle completed) +* Δ5 (SECURITY.md with disclosure policy and demonstrated response) +* E2 (training materials for 1-day workshop) exists and has been used + externally +* Continuous fuzzing per TRG, 30 days minimum, including structure-aware +* External single-party audit (security or correctness) completed +* SBOM produced; Hypatia zero Critical/High; branch protection enforced + +=== 4.7 Grade A — School-Grade Real *(stable / release)* + +*Release stage:* Stable +*Stability posture:* Stable +*Honest shorthand:* `school-grade` + +School-grade real — first-generation-Python equivalent. A school could +adopt the language as their teaching language next term, and a real +ecosystem of practitioners exists around it. The founder is no longer +the single point of failure. + +*Smell test:* "Teach it to a 12-year-old next term." + +Past ARG-A is not part of this scheme — what lies beyond is a *cultural +phenomenon*, not a property of the language. ARG-A is the cap. + +*Evidence required (all B + the following):* + +* TRG = A estate-wide (the language's toolchain is field-proven Six-Sigma) +* FRG ≥ B (the language has formally-grounded foundations) +* ≥ 1000 distinct external users meeting diversity-metrics +* ≥ 1 educational institution has taught the language as a multi-week + unit, with external educator (E3 + E4) +* X1 ≥ 50 third-party libraries, X2 ≥ 8 third-party tools, X4 ≥ 5 + independent maintainers beyond the founder +* The founder could step away for 6 months without the language stalling + (the *bus-factor test*; recorded as a maintainer's signed statement) +* 5-nines on the standing priority axes (TRG §5.7.5 — adopted verbatim) +* Continuous fuzzing 90 days including differential testing against a + reference implementation +* Type-theory or PL-research upstream contribution in the last 12 months +* Full hyperpolymath standards conformance per TRG §5.7.5 +* Unanimous 6+6 (humans + LLMs) signoff per TRG `templates/A-GRADE-LLM-PANEL.adoc` + +=== 4.8 Demotion on loss *(critical)* + +Loss of any A-grade gate (founder-bus-factor reverts, educator withdraws, +distinct-user count drops below threshold for ≥ 30 days, FRG regresses +below B, signoff rescinded, standards drift) *immediately* demotes the +language. There is no grace period. Same rule as TRG §5.8. + +== 5. Aggregation Rule + +ARG aggregation is *worst-of-any-required-adoption-surface*. The +language's ARG grade is the *lowest* grade achieved across all required +surfaces at the candidate grade's evidence floor. + +*No averaging. No weighting. No partial credit.* + +A language with discovery + onboarding + reference all at B but +community at D is *ARG-D*, not "ARG-B-with-community-gap". The honest +grade tells the maintainer where to focus. + +Crucially, ARG ≤ TRG always. A language cannot be adopted past the +maturity of its toolchain. If TRG drops to D mid-cycle, ARG immediately +drops to D regardless of how strong the community surface is. + +== 6. Definitions + +=== 6.1 "Distinct external user" + +Same definition as TRG §6.4: an individual who has used the language for +their own work (not maintainer-paid contract work, not solicited +demonstration), who is not the founder, who is identifiable (real name +or stable handle), and whose use is dated and recorded. + +=== 6.2 "Substantive domain" + +A domain in which a non-trivial program (≥ 500 LoC) could be written +that solves a recognisable real problem. Examples: graphics, networking, +parsing, simulation, data analysis, web frontend, embedded control. A +"substantive" domain is *not* "we wrote 10 examples of the same +fibonacci function". + +=== 6.3 "Diversity-metrics" + +Adopt TRG's diversity-metrics block verbatim. The four diversity +dimensions (technical / organisational / user / downstream-target) and +their thresholds apply to ARG identically. + +=== 6.4 "Bus-factor test" + +The founder records a signed statement that they could stop working on +the language for 6 calendar months without the language stalling +(releases halt, decisions queue indefinitely, community migrates away). +This is a personal claim, not a proof — but ARG-A requires it to be +*credible* given the X4 (independent maintainer) evidence already in +place. + +=== 6.5 "School-grade" + +The "school" benchmark is calibrated against *first-generation Python* +(c. 1995-2002): a language a non-specialist educator could pick up and +teach to children with available materials, where adoption does not +require the educator to be a domain expert in the language itself. + +== 7. Per-Language Profile Mechanism + +Each adopting language publishes `spec/ARG-PROFILE.adoc` in its repo, +following the template at `templates/ARG-PROFILE-TEMPLATE.adoc`. The +profile: + +* States the current honest grade (X / F / E / D / C / B / A). +* Cites the evidence for each criterion (file paths, commit SHAs, + external URLs, VeriSimDB attestation IDs). +* Records the path to the next grade with concrete next steps. +* Carries an iteration history with dates and previous grades retained. +* MAY tighten the baseline (add language-specific obligations). +* MUST NOT loosen the baseline (no waiver of evidence floors). + +A language's ARG-PROFILE.adoc is itself a *VCL-total proposition* — its +content is `DECLARE`'d to VeriSimDB on each commit. Drift between the +profile's claimed grade and the underlying evidence is detected by +VeriSimDB's cross-modal drift monitor and surfaces as a *grade-stale* +alert. + +== 8. Self-Assessment + +The ARG specification's own ARG grade is *not applicable* — ARG grades +languages, not specifications. The ARG specification is graded by the +CRG of the standards repo and by the TRG of the documentation tooling. + +That said, the ARG document set is self-assessed against its own +internal-consistency criteria in `SELF-ASSESSMENT.adoc`. + +== 9. Conformance + +A language conforms to ARG if: + +1. Its ARG grade is recorded in `spec/ARG-PROFILE.adoc` in its repo. +2. The grade is supported by the evidence described in §4 for that + grade and every grade below it (X is the empty floor). +3. The profile is reviewed at least once per release cycle, and on any + demotion event within 7 days. +4. The aggregation rule of §5 is respected (worst-of, no averaging). +5. Demotion is recorded immediately on loss of any required evidence + (§4.8). +6. Per-commit attestations are emitted to VeriSimDB at grade C and + above. +7. The language is also TRG-graded; ARG ≤ TRG holds at all times. + +== 10. Open Questions / Versioning + +This is ARG v1.0. Likely v2 changes once real adoption data accumulates: + +* The "1000 distinct users" threshold for A is *plucked from intuition*, + same caveat as TRG's "5M processor-hours" threshold. It is the right + *order of magnitude* for "school-grade real", but the exact number + should be re-baselined against telemetry from the first ARG-A + candidate, if and when one emerges. +* The "6 diverse external projects" at B is borrowed from CRG-B. May + need to drift higher for languages whose communities are inherently + small (e.g. domain-specific languages with only ~12 plausible + adopters total). +* The interaction between ARG and FRG at grades A is currently encoded + as "ARG-A requires FRG ≥ B". This may tighten to "ARG-A requires + FRG ≥ A" once at least one language reaches both. The current + formulation allows for languages whose adoption is high but whose + formal foundations lag (most production languages historically). +* The "founder bus-factor test" is novel and subjective. It is + acknowledged as a number-not-derivation. Re-baseline against the + first ARG-A candidate. + +== 11. Honest Comparison to Real-World Software Standards + +To the author's knowledge, ARG-A is not matched by any language standard +currently in use anywhere. The closest comparators: + +* *Python (c. 2002)* — would have been ARG-B-leaning-A by ARG's + criteria. First-generation Python is the calibration point for + ARG-A's "school-grade" rung. Modern Python is ARG-A. +* *Rust (2024)* — ARG-A. Field-proven, school-taught, ecosystem-plugged. +* *Haskell (2024)* — ARG-A in academia, ARG-B-leaning-A in industry. +* *Elm (2024)* — ARG-B. Strong community, narrow domain, governance + friction has prevented A. +* *Idris2 (2024)* — ARG-D. Real users, but not at C-cohort scale. +* *Most "I built a programming language" weekend projects* — ARG-X or + ARG-E. Honest. + +The author expects approximately *zero* hyperpolymath languages to hit +ARG-A in year one of ARG operation. That is the correct order of +magnitude. If ARG-A becomes easy to hit, the bar is wrong and the +standard MUST be raised. + +This standard is open to challenge. If you think the bar is too low, +specify what evidence class, adoption surface, or community-discipline +requirement is missing. + +== 12. Machine-Readable Grade Declaration + +Languages using ARG SHOULD include in their `READINESS.md`: + +[source,markdown] +---- +**Current ARG Grade:** D +**Current TRG Grade:** D +**Current FRG Grade:** E +**Current CRG Grade:** C +**Current RSR Compliance:** FULL +---- + +This line is parsed by `just arg-grade` and `just arg-badge` from +`rsr-template-repo`. + +== Revision History + +[cols="1,1,1,3", options="header"] +|=== +| Version | Date | Author | Changes + +| 1.0 | 2026-05-28 | Jonathan D.A. Jewell | Initial release. ARG fills the language-adoption tier (CRG = component, TRG = toolchain, ARG = language). Grade vocabulary X-A inherited from CRG/TRG. Worst-of aggregation and immediate demotion inherited from TRG. ARG ≤ TRG always. Integration with VeriSimDB via VCL-total propositions at grade C+. +|=== diff --git a/adoption-readiness-grades/AUDIT-FINDINGS-2026-05-28.adoc b/adoption-readiness-grades/AUDIT-FINDINGS-2026-05-28.adoc new file mode 100644 index 00000000..35f3a58e --- /dev/null +++ b/adoption-readiness-grades/AUDIT-FINDINGS-2026-05-28.adoc @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + += ARG/FRG Framework — First Audit Findings (2026-05-28) +:toc: left +:toclevels: 3 + +[cols="1,3"] +|=== +| Field | Value + +| Report | First ARG/FRG audit findings + prover-CI hygiene sweep addendum +| Date | 2026-05-28 +| Scope | ARG + FRG specs v1.0, three audit-cohort languages, dashboard wiring, identity-consonance plan, estate prover-CI hygiene audit + remediation +| Author | Jonathan D.A. Jewell +| Status | Draft — for owner review +| Updated | 2026-05-28 — Section 7 added post-hygiene-sweep (3 PRs landed, audit calibration lesson recorded) +|=== + +== Executive summary + +The ARG (Adoption Readiness Grades) and FRG (Foundations Readiness +Grades) standards have been drafted as language-whole sibling tier +to CRG/TRG, using the same `X | F | E | D | C | B | A` vocabulary +and worst-of aggregation. Three audit-cohort languages have been +graded honestly: + +[cols="<2,^1,^1,<4", options="header"] +|=== +| Language | ARG | FRG | Headline finding + +| AffineScript | D | E (pragmatic) / X (strict) | Strongest adoption surface; weakest foundations evidence relative to claims +| Ephapax | E | D | Inverse pattern — strongest foundations narrative; sparse adoption surface +| My | D-leaning-E | X | Architecturally ambitious (11 crates, 4 dialects); honest retraction of template-ABI scaffolding sets correct FRG baseline +|=== + +*No language is at FRG-C or above in year one* — matching the FRG +spec's expectation (§11: "approximately one language to reach FRG-C +in year one, zero at B or A"). + +*No language is at ARG-C or above* — matching ARG spec's expectation +(§11: "approximately zero hyperpolymath languages to hit ARG-A in +year one"). + +These honest-low grades are *evidence the framework is working*. If +the first audit had produced ARG-B or FRG-A claims, the calibration +would be wrong by ARG/FRG's own benchmarks (Python 2002, Rust 2024, +CompCert/CakeML). + +== Section 1 — Framework gaps discovered + +These are gaps in the *standards themselves* that the audit exposed. + +=== 1.1 affinescript "operational vs mechanised L7" tension + +The ARG/FRG specs say "claiming L7 without a linear-typing soundness +proof is not L7; it is L4 with linear-typing aspirations". AffineScript +*operationally* enforces QTT-affine discipline at the implementation +level (`check`/`compile`/`eval` gate user programs), but has no +mechanised soundness proof. The strict reading collapses to FRG-X; +the pragmatic reading lands at FRG-E. + +*Recommended action:* FRG v1.1 SHOULD distinguish *operational L4 +with linear-typing aspirations* from *mechanised L7 with soundness +proof* by adding a sub-grade qualifier ("L7-operational" vs "L7-proven"). +This avoids the all-or-nothing collapse to X for languages that have +the discipline but not (yet) the proof. + +=== 1.2 my-lang dialect-multiplicity vs single-grade + +My has four dialects (`solo`, `duet`, `ensemble`, `me`) with +progressive type-richness. The ARG/FRG specs grade the *language as +a whole*, but the progressive-disclosure design genuinely means +different dialects could legitimately hold different FRG grades. + +*Recommended action:* ARG/FRG v1.1 SHOULD allow per-dialect grade +sub-records for languages declaring dialect structures, with the +overall grade being the worst-of-any-dialect (matching the +aggregation rule). The per-language profile (§1.1) can express this +via tightening, but the baseline currently doesn't acknowledge the +shape. + +=== 1.3 PROOF-NEEDS.md honest-retraction discipline + +My's 2026-03-29 retraction of template-ABI scaffolding (Types.idr, +Layout.idr, Foreign.idr) is *exactly the discipline FRG should +reward* — the language voluntarily *demoted itself* by removing +scaffolding that was creating a false impression of formal +verification. The FRG spec doesn't currently *celebrate* this +discipline; it just doesn't penalise it. + +*Recommended action:* FRG v1.1 SHOULD add a "good-FRG-discipline" +recognition section, with the My retraction as the canonical example. +This is the kind of community-pattern recognition that incentivises +honesty over score-gaming. + +=== 1.4 Cross-axis coupling ARG ≤ TRG vs ARG-A requires FRG ≥ B + +The ARG spec encodes two cross-axis rules: hard (ARG ≤ TRG always) +and soft (ARG-A requires FRG ≥ B). The audit suggests these are +*qualitatively different* — the first is an obvious independence +constraint, the second is a value claim that "school-grade adoption" +requires "layered foundations". Some maintainers may legitimately +disagree (Python first-gen was ARG-A by ARG's calibration but FRG-X). + +*Recommended action:* Keep the coupling as v1.0; revisit in v1.1 +after community feedback. Document the disagreement-allowed +position more clearly in §10 of ARG. + +=== 1.5 Echidna integration is currently aspirational + +FRG-A requires Echidna in CI as a gate. Echidna itself is not yet +TRG-C (per the per-repo profile assessment). The FRG spec's +§6.6 says "this requires both Echidna ≥ TRG-C and the language's +formalisation to expose its judgments to Echidna's verification +protocol". The first half (Echidna ≥ TRG-C) is currently *not* +satisfied by the verifier itself. + +*Recommended action:* Track this in Echidna's own profile (when +written) and in the open-questions section of FRG v1.0 (§10 — already +mentioned). Aspirational-from-A is acceptable; enforced-from-A +should activate once Echidna graduates. + +=== 1.6 Dashboard tracker REPOS_DIR mismatch + +`nextgen-languages/language-status-tracker.jl` sets +`REPOS_DIR = joinpath(homedir(), "Documents/hyperpolymath-repos")` +but the actual repos live at `/home/hyperpolymath/developer/repos/`. +The grade-extraction code added in Phase 5 will return "—" (repo not +on disk) for every language until this is fixed. + +*Recommended action:* Single-line PR to the tracker fixing +REPOS_DIR. Independent of the ARG/FRG work; surfaced here for +completeness. + +== Section 2 — Per-language honest grades + +=== 2.1 AffineScript + +* *ARG: D* — CLI gates programs; vscode extension published; + conformance corpus exists; ECOSYSTEM.adoc Engineering Spine + defined. Diversity-floor on external authors is the weakest + pinning (lock to D requires ≥ 2 external authors). +* *FRG: E (pragmatic) / X (strict)* — Honest readers should adopt + the strict X until a `formal/` directory with qualifying-prover + encoding lands. The README's "Honest status sync" is exactly the + hygiene FRG rewards even at low grades. + +Profile: `affinescript/spec/ARG-PROFILE.adoc` + +`affinescript/spec/FRG-PROFILE.adoc`. + +=== 2.2 Ephapax + +* *ARG: E* — README + disambiguation + R3+R4 (grammar + semantics + via formalisation) carry the discovery surface, but no playground, + no tutorial, no external users. ARG-E is the bare-minimum honest + grade. +* *FRG: D* — Preservation + progress stated; 4 Admitted + 1 Axiom + outstanding; four-layer redesign committed + (`PRESERVATION-DESIGN.md` canonical); F-grade evacuated on + 2026-05-27 by owner directive lockdown. Path to FRG-C is the + per-layer derivation work in `project_ephapax_post_slice4_plan`. + +Profile: `ephapax/spec/ARG-PROFILE.adoc` + +`ephapax/spec/FRG-PROFILE.adoc`. + +=== 2.3 My + +* *ARG: D-leaning-E* — Architecturally most ambitious of the cohort + (11 crates, 4 dialects, EBNF grammar, playground/ directory, + AI-integration via my-ai). Surfaces exist but are sparsely + populated; "vision exceeds adoption" is the honest summary. +* *FRG: X* — No formalisation in any qualifying prover. The + 2026-03-29 retraction of template-ABI scaffolding is the + correct discipline and *establishes* the honest X baseline. + Path to FRG-E is creation of domain-specific Idris2 proofs + starting with `solo` dialect. + +Profile: `my-lang/spec/ARG-PROFILE.adoc` + +`my-lang/spec/FRG-PROFILE.adoc`. + +== Section 3 — Identity-consonance integration plan + +The ARG/FRG framework specifies (ARG §7, FRG §7) that per-language +profiles are *VCL-total propositions* — their content is `DECLARE`'d +to VeriSimDB on each commit, and drift between the claimed grade and +the underlying evidence surfaces as a `grade-stale` / `foundations- +stale` alert. + +This is currently *specified but not wired*. The integration plan: + +=== 3.1 Phase A — VCL-total proposition shape + +The VCL-total grammar (vcl-ut repo) needs to accommodate the +ARG/FRG profile shape. Specifically, a `DECLARE PROFILE arg` +statement (and `DECLARE PROFILE frg`) should accept a path to a +profile file and a grade letter, producing the canonical attestation. + +*Owner:* vcl-ut repo. *Estimate:* 1-2 weeks. *Dependency:* none. + +=== 3.2 Phase B — VeriSimDB grade-stale drift type + +VeriSimDB has 8 drift types but no `grade-stale` type yet. Adding +a 9th drift type that triggers when a grade attestation's claimed +level is not supported by the underlying evidence is the wiring +step. + +*Owner:* verisimdb repo. *Estimate:* 2-3 weeks. *Dependency:* Phase A +shape settled. + +=== 3.3 Phase C — Per-repo CI step + +Each language repo gets a CI step (workflow in `.github/workflows/` +or estate-equivalent) that, on every commit: + +. Reads `spec/{ARG,FRG}-PROFILE.adoc`. +. Computes a hash of the evidence-citation block. +. `DECLARE`s the proposition to the language's VeriSimDB instance. +. Optionally: locally runs the banned-construct lint and the + evidence-grounder. + +*Owner:* per-language repos (rolled out via standards template). +*Estimate:* 1-2 weeks per language. *Dependency:* Phase B drift type. + +=== 3.4 Phase D — Cross-modal drift monitor + +VeriSimDB's existing cross-modal drift monitor extends to monitor +grade-stale alerts across all language repos. A dashboard surface +(extending `nextgen-languages/TOOLING-STATUS.adoc`) shows real-time +grade-attestation freshness. + +*Owner:* nextgen-languages + verisimdb. *Estimate:* 2-4 weeks. +*Dependency:* Phase C populated. + +== Section 4 — Outstanding work + +Items NOT closed by this audit; surfaced for owner attention: + +* *Standards convergence (high priority):* The ARG/FRG specs are + uncommitted. They MUST be committed before any language repo + cites them as normative. Recommend a single PR per spec to the + standards repo. +* *Per-language profiles uncommitted:* All 6 profile files + (3 ARG + 3 FRG) are uncommitted in their respective repos. + Recommend a small PR per repo (6 PRs total). +* *Dashboard wiring uncommitted:* TOOLING-STATUS.adoc + the + Julia tracker extension are uncommitted in nextgen-languages. + Recommend a single PR. +* *REPOS_DIR fix (low priority, independent):* one-line PR to the + Julia tracker. +* *TRG-PROFILE.adoc absent for all three languages:* This audit + treated TRG profiles as "TBD". A follow-up audit should produce + honest TRG profiles using the existing `standards/toolchain-readiness-grades/` + template. The dashboard's TRG column won't populate until those + exist. +* *CRG-PROFILE.adoc absent:* Similar — CRG grades components, + multiple components per language. A follow-up audit per language + per component is needed. +* *14 other languages in the estate are unprofiled:* eclexia, + betlang, phronesis, wokelang, anvomidav, julia-the-viper, + oblibeny, error-lang, kitchenspeak, tangle, me-dialect, vcl-ut, + krl, quandledb, typell, panll. The dashboard's TBD entries + honestly surface this. + +== Section 5 — Validation that the framework is doing what it should + +Three signals that the framework is well-calibrated: + +. *No language reaches C or above in year-one audit.* This + matches the explicit expectations in ARG §11 and FRG §11. If + a language had hit ARG-C or FRG-C immediately, the bar would be + wrong. +. *Each language has a different shape of strength/weakness.* + AffineScript = strong adoption, weak foundations. Ephapax = + strong foundations narrative, weak adoption. My = strong + architecture, weak content. This is exactly what worst-of + aggregation surfaces — and the right surfacing for honest + improvement focus. +. *FRG-F evacuation discipline.* Ephapax was *genuinely* FRG-F + for a period; the 2026-05-27 owner directive locked in the + four-layer redesign as canonical and *evacuated* the F-grade. + The framework's F → D evacuation path worked as designed. + +== Section 6 — Open questions for the owner + +. Should the ARG/FRG specs be committed *first* (single PR each) to + establish the baseline before the 6 per-language profiles? + Recommend: yes. +. Should TRG profiles be authored *before* the next audit cycle, or + alongside? Recommend: the next pass should produce TRG profiles + for the three audit-cohort languages, alongside reassessing + ARG/FRG (since TRG profiles will inform the TBD slots). +. Should the framework expand to cover the 14 currently-unprofiled + languages as a batch, or incrementally? Recommend: incrementally, + starting with vcl-ut + krl (the database-language variant that + motivates the per-language tightening mechanism) to validate the + §1.1 extension before broader rollout. +. The FRG-A "kernel-soundness contribution within 12 months" gate + may be too narrow. Worth a v1.1 revisit after the first language + approaches FRG-B. +. The cross-axis coupling ARG-A requires FRG ≥ B may need to + loosen (FRG ≥ C?) or tighten (FRG ≥ A?) once any language + approaches the boundary. + +== Section 7 — Prover-CI hygiene sweep (addendum, 2026-05-28 PM) + +A separate concern surfaced and was resolved within the same session +as the ARG/FRG framework rollout: a self-decaying CI assertion in +`hyperpolymath/echo-types` (the `Expected-failure gate (N5Falsifier +is known-broken)` step in `.github/workflows/agda.yml`) failed in CI +because the underlying proof — held out as "known-broken" since the +2026-05-18 foundation audit — had been resolved on 2026-05-27 by +pinning the implicit `r`/`grade` at four `applyRole` / `applyGrade` +call sites in `proofs/agda/characteristic/N5Falsifier.agda`. + +This prompted a broader question: *across the estate, can "CI green" +be relied upon to mean "proofs pass"?* A systematic prover-CI hygiene +audit was performed, identifying anti-patterns that break that +invariant. + +=== 7.1 Anti-patterns audited + +The sub-agent audit looked for nine classes of CI hygiene failure: + +. *Stale xfail gates* — `if prover ... then exit 1` patterns + asserting that a specific module is expected to fail. Self-decaying: + if the module ever passes, the gate fails CI. +. *Soft-fail patterns* — `continue-on-error: true` on prover test + steps; `|| true` after prover invocations; `set +e` around them; + `allow_failure` on matrix entries. +. *Banned-construct escape hatches* — `postulate`, `believe_me`, + `primTrustMe`, `Admitted.`, `Axiom`, `sorry`, `unsafe`, `oops` + in proof code without CI lint. +. *Aggregator exclusions* — modules excluded from `All.agda` / + `All.lean` / `All.v` such that CI never typechecks them. +. *Cache-mask patterns* — interface-file caches keyed such that + source changes don't invalidate. +. *Path-filter skips* — workflow `paths:` filters excluding proof + directories. +. *Conditional steps that never run* — `if:` guards always false. +. *Compile flags that mask errors* — `--ignore-errors`, + `--keep-going`, `--allow-unsolved-metas`, etc. +. *Tip-module-only checks* — CI typechecks one specific file + instead of an aggregator. + +=== 7.2 Findings before re-verification + +The sub-agent reported 248 repos scanned, 16 with active prover-CI, +6 with at least one anti-pattern: + +[cols="^1,^1,<3", options="header"] +|=== +| Severity | Repos | Pattern + +| Critical | 3 +| echo-types xfail self-decay; echidna 10× `continue-on-error: true`; + affinescript `continue-on-error` on Dune lint/typecheck steps + +| High | 1 +| valence-shell job-level `continue-on-error` in `boj-build.yml` + +| Medium | 5 +| Banned constructs in source without CI lint: 007, echidnabot, + protocol-squisher, statistease, valence-shell +|=== + +=== 7.3 Re-verification — false-positive rate + +Each finding was re-verified by reading the cited files directly. The +audit had *substantial* false-positive contamination, traced to its +grep heuristic counting comments declaring "no banned constructs" as +if they were uses of those constructs. + +[cols="^1,<2,<3", options="header"] +|=== +| Severity | Repo | Re-verification result + +| Critical | echo-types | *Real bug.* xfail gate matched description; fixed in echo-types#136. +| Critical | echidna | *Partial.* 2 of 10 `continue-on-error` were on live-test steps (real bug); the other 8 were on best-effort apt-install of 30+ provers + a job-level guix-reproducibility check (intentional resilience). Real bug fixed in echidna#118. +| Critical | affinescript | *Partial.* 1 of 5 `continue-on-error` was on the bisect_ppx test step (real bug); the other 4 were on odoc docs lint + bench targets + coverage-report tooling artefact (intentional). Real bug fixed in affinescript#409. +| High | valence-shell | *False positive.* The `continue-on-error` flagged is on a `curl` call to BoJ Server (trigger relay), not a prover step. The repo's actual prover workflows (`lean-verification.yml`, `compilation_tests.yml`, `echidna-validation.yml`, `validation.yml`) contain no soft-fail patterns on prover steps. +| Medium | 007 | *False positive.* ZERO banned constructs. All matches were in comments like "no believe_me, no sorry, no assert_total". +| Medium | echidnabot | *Intentional dogfood fixture.* The `Admitted.` in `proofs/coq/admitted_stub.v` is the test target for echidnabot's own axiom scanner — the repo IS the security-monitoring tool that detects this exact pattern. +| Medium | protocol-squisher | *False positive.* ZERO banned constructs. All matches were in comments like "Previously this section used three postulates… were never…", "No Admitted.", "No sorry here". +| Medium | statistease | *False positive.* ZERO banned constructs. Only match was a comment "no postulates". +| Medium | valence-shell (banned constructs) | *Standard math axiom, not escape hatch.* The `postulate` in `proofs/agda/FilesystemModel.agda:161` is `Axiom.Extensionality.Propositional` (HoTT Book §2.9.3), the stdlib's canonical funext-postulate, properly declared and bounded. +|=== + +Net real bugs found and fixed: *3 of the 9 audit findings.* +False-positive rate: ~67% by finding-count, concentrated in +medium-severity (5/5 false positives) and high-severity (1/1 false +positive). Critical-severity was 3/3 real (but with internal +narrowing: only 4 of the 15 `continue-on-error` instances flagged +across echidna+affinescript were actual proof-validity bugs). + +=== 7.4 PRs landed + +[cols="<1,<2,<4", options="header"] +|=== +| PR | Repo | Change + +| `echo-types#136` | echo-types +| `docs(kernel-note)` classifies 18 Echo* modules (kernel-guard + Check B classification-drift fix); `ci(agda)` drops the + `N5Falsifier` xfail gate now that the module typechecks. Merged + on the standard path (CI green). + +| `echidna#118` | echidna +| `ci(live-provers)` drops `continue-on-error: true` on the Tier-2 + and Tier-3 "Run live test for ${{ matrix.backend }}" steps. + Comments explicitly disclosed the soft-fail as transitional + ("allow-fail while Wave-X wires up"); removed both. Provision- + step and job-level resilience preserved. Admin-merged (Actions + budget exhausted). + +| `affinescript#409` | affinescript +| `ci` drops `continue-on-error: true` from the "Run tests with + bisect_ppx instrumentation" step. Docs/bench/coverage-report + soft-fails preserved by design. Admin-merged (Actions budget + exhausted). +|=== + +All three merge events committed cleanly without further CI failure +from the underlying typecheck — confirming the fixes were correct +and that prior test/proof state was healthy beneath the soft-fail +masking. + +=== 7.5 Lesson — audit calibration + +The most important takeaway is *not* the bugs found but the +*audit's false-positive shape*. Future prover-CI hygiene audits +should: + +. *Distinguish "X is mentioned in source" from "X is used in source".* + Comments declaring "no postulate used" are themselves evidence of + good discipline, not violations. Greps that match `postulate` as a + bare word in `.agda` files MUST also account for AsciiDoc-style + comments (`-- no postulate`, `// no postulate`, `(* no + Admitted. *)`). + +. *Distinguish "step has continue-on-error" from "step's failure + would mask proof validity".* A `continue-on-error` on `Provision + ${{ matrix.backend }}` (apt-install best-effort) is different from + one on `Run live test for ${{ matrix.backend }}` (actual prover + invocation). The former is intentional environment resilience; + the latter is the anti-pattern. + +. *Distinguish "stub file with Admitted" from "production proof with + Admitted".* Tool repositories (echidnabot, hypatia, etc.) often + contain *test fixtures* that deliberately use banned constructs to + exercise their own scanners. These are dogfood, not debt. + +. *Re-verify by hand before reporting "critical".* The sub-agent's + report listed 6 repos as having anti-patterns; only 3 had real + ones, and within those, only narrower subsets of the cited lines. + A 30-minute hand re-verification pass after the sub-agent's report + saved roughly 3-5 incorrect PRs that would have caused friction. + +These three calibration rules will be propagated into the audit +playbook for the next hygiene sweep. + +=== 7.6 Final estate state + +After the three PRs above: + +* *No active prover-CI repo in the estate carries an xfail self- + decaying gate.* +* *No active prover-CI repo soft-fails on a prover test invocation.* +* *Soft-fail patterns that remain are all on:* apt-install best- + effort, version-check skip-if-binary-absent, docs lint, bench + targets, coverage-report tooling artefacts. None mask proof + validity. + +"CI green means proofs pass" is now substantively true across the +estate's active prover-CI repos, with the caveat that this is true +*as of 2026-05-28 PM* and depends on the discipline not being +re-introduced. A standing CI-hygiene lint (to be designed in a +future session) could automate the check by rejecting +`continue-on-error: true` on any step whose `run:` contains a +recognised prover binary (`agda`, `coqc`, `lean`, `idris2`, `isabelle`). + +=== 7.7 Open follow-ups from the hygiene sweep + +* *Estate CI-hygiene lint:* As above. Single workflow-linter rule + that lives in `standards/`-equivalent and is wrapped into per-repo + CI. Estimated 1 PR + per-repo wrapping (~80 repos with prover CI + in scope). Defer until separate session. + +* *valence-shell echidna-validation.yml + lean-verification.yml + + compilation_tests.yml:* These were inspected for `continue-on-error` + (none found) but not for the other 7 audit anti-pattern classes. + Deeper inspection deferred. + +* *Audit-doc update for ARG/FRG profile re-grading:* None of the + three audit-cohort languages (affinescript, ephapax, my-lang) + had their grades affected by this hygiene sweep, because none + was running affected CI. ARG/FRG profiles remain at the grades + recorded in §2. + +== References + +* `standards/adoption-readiness-grades/ADOPTION-READINESS-GRADES.adoc` +* `standards/adoption-readiness-grades/ADOPTION-READINESS-GRADES.a2ml` +* `standards/foundations-readiness-grades/FOUNDATIONS-READINESS-GRADES.adoc` +* `standards/foundations-readiness-grades/FOUNDATIONS-READINESS-GRADES.a2ml` +* `affinescript/spec/{ARG,FRG}-PROFILE.adoc` +* `ephapax/spec/{ARG,FRG}-PROFILE.adoc` +* `my-lang/spec/{ARG,FRG}-PROFILE.adoc` +* `nextgen-languages/TOOLING-STATUS.adoc` (Grade Matrix section) +* `nextgen-languages/language-status-tracker.jl` (grade extraction) +* `standards/toolchain-readiness-grades/` — sibling standard +* `standards/component-readiness-grades/` — sibling standard +* `standards/rhodium-standard-repositories/` — sibling standard +* `vcl-ut/README.adoc` — VCL-total identity language + 10-level lineage +* `verisimdb/README.adoc` — cross-modal consistency engine + +Hygiene-sweep landed PRs (Section 7): + +* `hyperpolymath/echo-types#136` — kernel-note classification + N5Falsifier xfail-gate removal +* `hyperpolymath/echidna#118` — Tier-2/3 live-test soft-fail removal +* `hyperpolymath/affinescript#409` — bisect_ppx test-run soft-fail removal diff --git a/adoption-readiness-grades/README.adoc b/adoption-readiness-grades/README.adoc new file mode 100644 index 00000000..dc4cc17d --- /dev/null +++ b/adoption-readiness-grades/README.adoc @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + += Adoption Readiness Grades (ARG) +:toc: + +* *Status:* Active, v1.0, 2026-05-28 +* *License:* PMPL-1.0-or-later (MPL-2.0 automatic legal fallback) +* *Companion to:* Toolchain Readiness Grades (TRG) v1.0, Component Readiness Grades (CRG) v2.2, Foundations Readiness Grades (FRG) v1.0 +* *Part of:* Rhodium Standard Repositories (RSR) + +== What this is + +ARG grades the *language-as-an-adoptable-artefact*. It sits at the +language-whole tier of the estate grading hierarchy: + +* *CRG* grades individual *components*. +* *TRG* grades the *toolchain*. +* *ARG* grades the *language as a thing someone can adopt*. +* *FRG* grades the *mathematical foundations of the language*. + +ARG uses the same letter grades as CRG and TRG (`X | F | E | D | C | B | A`) +and the same worst-of-any-required-element aggregation rule. The evidence +required at each grade asks a different question: *"would anyone use this?"* + +== Why it exists + +Three reasons: + +1. CRG grades components, TRG grades the toolchain — neither answers + "is this language something a person can adopt?". That question + belongs at the language-whole tier. +2. The hyperpolymath estate has 17+ languages in various states of + maturity. ARG provides a uniform yardstick for comparing them on + the adoption axis, independent of TRG or FRG progress. +3. The existing `READINESS.md` per-repo convention (KRL is the canonical + example) can express component grades cleanly but has no rubric for + *language adoption as a whole*. ARG provides that rubric. + +== Files in this directory + +[cols="1,3", options="header"] +|=== +| File | Purpose + +| `ADOPTION-READINESS-GRADES.adoc` +| The normative spec. Read this first. + +| `README.adoc` +| This file. + +| `ADOPTION-READINESS-GRADES.a2ml` +| Machine-readable counterpart of the spec. + +| `SELF-ASSESSMENT.adoc` +| ARG applied to itself (insofar as that's meaningful — ARG grades + languages, not specs; the doc set is self-assessed against + internal-consistency). + +| `templates/ARG-PROFILE-TEMPLATE.adoc` +| The per-language profile template referenced from §1.1 of the spec. + Each adopting language publishes `spec/ARG-PROFILE.adoc` in its repo + following this template. +|=== + +== Quick mental model + +[cols="^1,4", options="header"] +|=== +| Grade | Meaning + +| X | Not yet a language +| F | Released but actively user-harmful (rare) +| E | Bare minimum: qualifies as a language, runs own stdlib. Nobody outside the maintainer would actually use it. +| D | Toy-project usable: a weekend project is feasible +| C | Alpha gate: self-validated, ≥10 dogfood users, ≥2 substantive domains +| B | Beta gate: broadly adopted, ≥100 external users, ≥6 diverse projects, ecosystem-plugged +| A | Stable: school-grade real (first-gen-Python equivalent), ≥1000 users, founder bus-factor passed +|=== + +== Demotion is immediate + +Loss of any A-grade gate (founder steps away without replacement, +educator withdraws, distinct-user count drops, FRG regresses, TRG drops, +6+6 signoff rescinded) immediately demotes the language. There is no +grace period. ARG ≤ TRG always — if the toolchain regresses, adoption +regresses with it. See spec §4.8. + +== How strict is this really? + +See spec §11. Honest answer: ARG-A is calibrated against +*first-generation Python* (c. 1995-2002) and modern Rust. The author +expects approximately *zero* hyperpolymath languages to hit ARG-A in +year one. That is the correct order of magnitude. If ARG-A becomes easy +to hit, the bar is wrong and the standard MUST be raised. + +== Per-language profiles + +ARG is the baseline. Each hyperpolymath language is expected to publish +a `spec/ARG-PROFILE.adoc` in its own repo that tightens ARG with +language-specific obligations. A profile may only tighten, never loosen. +See spec §1.1 and `templates/ARG-PROFILE-TEMPLATE.adoc`. + +== Interaction with the other graders + +* *vs. CRG:* CRG grades components inside the language project (a + formatter, an LSP server, a stdlib module). ARG grades the language + as a whole. Where CRG and ARG would both apply, both grades are + published independently. ARG ≤ worst-of(CRG over all required + adoption-surface components). +* *vs. TRG:* TRG grades the toolchain. ARG asks whether anyone would + adopt the language. ARG ≤ TRG always. +* *vs. FRG:* FRG grades the mathematical foundations of the language + (preservation theorems, soundness, the 10-level TypeLL progression). + Independent of ARG, except that ARG-A requires FRG ≥ B. +* *vs. RSR:* RSR grades repository hygiene. ARG-graded language repos + MUST be RSR-compliant at the level required by TRG at the same + grade. + +== Open invitation + +If you think ARG is too lax (or too strict in a way that lets bad work +hide), file an issue with concrete evidence and a proposed tightening. +ARG earns trust by surviving critique. diff --git a/adoption-readiness-grades/SELF-ASSESSMENT.adoc b/adoption-readiness-grades/SELF-ASSESSMENT.adoc new file mode 100644 index 00000000..4d7c958a --- /dev/null +++ b/adoption-readiness-grades/SELF-ASSESSMENT.adoc @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + += Adoption Readiness Grades — Self-Assessment +:toc: auto +:status: Active +:date: 2026-05-28 + +ARG grades *languages*, not *specifications*. The ARG spec cannot +self-grade with an ARG letter, because the ARG spec is not a language. + +What follows is therefore a *document-set self-assessment* — applying +ARG's internal-consistency criteria to the ARG document set itself, in +the same spirit as TRG's `SELF-ASSESSMENT.adoc`. + +== Current document-set status: COMPLETE-MINIMAL + +*Assessed: 2026-05-28* + +ARG v1.0 was just published. The document set is internally consistent +and structurally complete, but no language has yet been audited against +it. + +== Document-set conformance + +[cols="2,1,4", options="header"] +|=== +| Conformance item | Status | Notes + +| Normative spec exists in `.adoc` +| ✅ +| `ADOPTION-READINESS-GRADES.adoc` (~16 KB, 12 sections) + +| README.adoc +| ✅ +| `README.adoc` + +| Self-assessment +| ✅ +| This document + +| Machine-readable counterpart (`.a2ml`) +| ✅ +| `ADOPTION-READINESS-GRADES.a2ml` + +| Templates referenced from spec +| ✅ +| `templates/ARG-PROFILE-TEMPLATE.adoc` + +| SPDX header on every file +| ✅ +| Verified at creation + +| Inherits CRG/TRG vocabulary +| ✅ +| Grades X-A; release-stage mapping aligned; worst-of aggregation + +| Inherits TRG diversity-metrics +| ✅ +| §6.3 cites TRG verbatim + +| Inherits TRG banned-constructs +| ✅ +| `(inherited-from "TRG")` in a2ml + +| ARG ≤ TRG cross-axis rule stated +| ✅ +| §5 aggregation rule; §6 conformance + +| VeriSimDB integration specified +| ✅ +| §7; VCL-total proposition form; required from grade C + +| At least one language audited against ARG +| ❌ +| Phase 4 of the framework rollout — affinescript, ephapax, my-lang + profiles drafted but not yet committed at the time of this self- + assessment + +| External review by language designers +| ❌ +| Not yet — first audit cohort must complete before external review + +| Adoption by any language outside hyperpolymath +| ❌ +| Not yet — standard is brand new +|=== + +== Honest acknowledgments + +* *The "1000 distinct users" threshold at ARG-A is plucked from + intuition.* Same caveat as TRG's "5M processor-hours" threshold. + It is the right order of magnitude for "school-grade real" but the + exact number should be re-baselined against telemetry from the first + ARG-A candidate (if and when one emerges). + +* *The "6 diverse external projects" threshold at ARG-B is borrowed + from CRG-B.* It may need to drift for languages whose communities + are inherently small (narrow DSLs with very few plausible adopters). + +* *The "founder bus-factor test" is novel and subjective.* It is a + signed-statement gate, not a proof. May require external verification + in a future revision. + +* *The "school-grade" calibration against first-generation Python + (c. 1995-2002) is one author's reading.* Others may calibrate + differently; documented openly in §11 so the choice can be debated. + +* *ARG-A requires FRG ≥ B.* This is a deliberate-but-debatable + coupling. It excludes languages whose adoption is high but whose + foundations lag (most historical production languages). The + coupling may tighten to FRG ≥ A once any single language has shown + both can co-exist; or may loosen if it turns out to be unrealistic. + +* *VeriSimDB integration is required from grade C+.* This assumes + VeriSimDB is operational and addressable from the language repo. + At the time of this self-assessment VeriSimDB is at alpha (v0.1.0) + and the per-language ingestion path is not yet wired. The + requirement is *aspirational-from-C* for now, *enforced-from-C* + once VeriSimDB graduates to TRG-C itself. + +== Path forward + +. Land per-language `spec/ARG-PROFILE.adoc` in affinescript, ephapax, + my-lang following `templates/ARG-PROFILE-TEMPLATE.adoc`. Each at + honest current grade. This is Phase 4 of the framework rollout. +. Extend `nextgen-languages/TOOLING-STATUS.adoc` with the ARG column + alongside TRG / FRG / CRG / RSR. Update `language-status-tracker.jl` + to surface ARG grades from each repo's profile. +. Once the first audit cohort lands, invite external review from + language designers outside hyperpolymath. Adjust thresholds based on + feedback before locking v1.0. +. After feedback cycle, ARG v1.0 becomes the load-bearing standard for + language adoption claims in the estate. + +== Standing rule + +This self-assessment is itself a claim and is itself subject to the +invariant-path doc-claims grounder once it ingests the ARG directory. +Re-run after any change to this directory. If any file-path claim in +this document fails to ground, the self-assessment is invalid until +the gap is closed. diff --git a/adoption-readiness-grades/templates/ARG-PROFILE-TEMPLATE.adoc b/adoption-readiness-grades/templates/ARG-PROFILE-TEMPLATE.adoc new file mode 100644 index 00000000..ae7112ff --- /dev/null +++ b/adoption-readiness-grades/templates/ARG-PROFILE-TEMPLATE.adoc @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + += ARG-PROFILE — {LANGUAGE_NAME} +:toc: left +:toclevels: 2 + +[cols="1,3"] +|=== +| Field | Value + +| Language | {LANGUAGE_NAME} +| Repository | {REPO_URL} +| Current ARG Grade | {GRADE} (X | F | E | D | C | B | A) +| Assessed | {YYYY-MM-DD} +| Assessor | {NAME } +| ARG Spec Version | 1.0 +| TRG Profile | spec/TRG-PROFILE.adoc +| FRG Profile | spec/FRG-PROFILE.adoc +|=== + +== About this profile + +This profile applies the *ARG v1.0* baseline to {LANGUAGE_NAME} and +records the current honest grade, the evidence supporting it, the +language-specific tightening obligations, and the path to the next +grade. Per ARG §1.1, profiles may *tighten* the baseline but never +*loosen* it. + +ARG = {GRADE}. The full chain of axes for this language: + +* ARG: {GRADE} +* TRG: {TRG_GRADE} +* FRG: {FRG_GRADE} +* CRG: {CRG_GRADE} (worst-of any released component) +* RSR: {RSR_LEVEL} + +Cross-axis rule: ARG ≤ TRG holds (currently TRG = {TRG_GRADE}, ARG = {GRADE}). + +== Grade rationale (evidence for {GRADE}) + +[cols="2,1,4", options="header"] +|=== +| Criterion | Met? | Evidence + +| *D1 — Landing page / README* +| {YES/NO} +| {Path to README; one-line summary} + +| *D2 — Elevator pitch* +| {YES/NO} +| {Quote the pitch; where it lives} + +| *D3 — Provenance* +| {YES/NO} +| {Maintainer list, start date, current version} + +| *Required TRG grade (≤ ARG-{GRADE} means TRG ≥ {GRADE})* +| {YES/NO} +| {TRG-PROFILE.adoc cite} + +| *Onboarding O1 — 5-minute hello-world* +| {YES/NO/N/A} +| {Path to tutorial step; time-measured-by who, when} + +| *Onboarding O2 — Tutorial covering distinctive features* +| {YES/NO/N/A} +| {Path to tutorial; named distinctive features covered} + +| *Onboarding O3 — Playground* +| {YES/NO/N/A} +| {URL or just-command; last-verified date} + +| *Onboarding O4 — Cookbook in ≥ 2 substantive domains* +| {YES/NO/N/A} +| {Cookbook path; domains enumerated} + +| *Reference R1 — Reference manual* +| {YES/NO/N/A} +| {Path; coverage notes} + +| *Reference R2 — Stdlib reference* +| {YES/NO/N/A} +| {Path; symbol-coverage estimate} + +| *Reference R3 — Formal grammar* +| {YES/NO/N/A} +| {Path to .ebnf or equivalent} + +| *Reference R4 — Operational/denotational semantics* +| {YES/NO/N/A} +| {Path; also referenced from TRG-PROFILE M5} + +| *Reference R5 — Stable error-code index* +| {YES/NO/N/A} +| {Path; index entry count} + +| *Community C1-C5* +| {YES/NO partial per item} +| {Issue tracker URL; CONTRIBUTING path; CoC path; venue URL; RFC list/count} + +| *Distribution Δ1-Δ5* +| {YES/NO partial per item} +| {Release artefact URL; registry/install URL; versioning policy path; deprecation log; SECURITY.md path} + +| *Education E1-E4 (grade-dependent)* +| {YES/NO partial per item} +| {Slide deck path; workshop materials; curriculum; external educator names} + +| *Ecosystem X1-X4 (grade B+)* +| {YES/NO partial per item} +| {Library count + list URL; tool count + list; integrations enumerated; maintainer count beyond founder} + +| *Diversity metrics (grade C+)* +| {YES/NO} +| {Tech / org / user / downstream breakdown} + +| *VeriSimDB attestation (grade C+)* +| {YES/NO} +| {Attestation ID; last-ingested timestamp} + +| *Founder bus-factor test (grade A only)* +| {YES/NO/N/A} +| {Signed-statement reference; X4 supporting evidence} + +| *Unanimous 6+6 signoff (grade A only)* +| {YES/NO/N/A} +| {Signoff log reference} +|=== + +== Language-specific tightening (this profile only) + +ARG §1.1 permits per-language tightening. For {LANGUAGE_NAME}, the +following additional obligations apply at the stated grades: + +* *Grade C tightening:* {Language-specific obligation; e.g. "AffineScript: + cookbook MUST include at least one borrow-check-rich example."} +* *Grade B tightening:* {e.g. "Ephapax: at least one external tutorial + on dyadic-modality discipline by a non-maintainer."} +* *Grade A tightening:* {e.g. "VCL-total: registered Verisim instance + addressable from a public example, with continuous green + health-check."} + +These tightenings are *additional* to the baseline — none of the +baseline evidence is waived. + +== What is NOT yet met (honest gaps) + +* {Bullet list of specific evidence the language does NOT currently + produce, grouped by adoption surface} +* {Each gap should be specific enough that a reader can verify by + inspection of the repo} + +== Path to next grade ({NEXT_GRADE}) + +* {Specific next-step item 1, with estimated work} +* {Specific next-step item 2} +* {...} + +*Realistic timeline estimate:* {N weeks/months} + +== Path to grade beyond that ({GRADE_AFTER_NEXT}) + +* {Larger items that follow the immediate next grade} + +== Demotion risk + +* *Lowest:* {What would have to break for the grade to drop by one} +* *Medium:* {What would have to break for a multi-step demotion} +* *Catastrophic:* {What would force demotion to F} + +== Iteration history + +[cols="1,1,4", options="header"] +|=== +| Date | Grade | Notes + +| {YYYY-MM-DD} | {grade} | Initial assessment. +| {YYYY-MM-DD} | {grade} | {Reason for change.} +|=== + +== Review cycle + +* *Routine:* Reassess on every release cycle (per ARG §6). +* *Immediate:* Reassess within 7 days of any demotion trigger per + ARG-spec §4.8. +* *VeriSimDB drift alert:* Reassess within 24 hours of any drift alert + from VeriSimDB on this language's attestations. + +== Footer + +Run `just arg-badge` in this repo to generate the shields.io badge for +the README. + +This profile is itself a VCL-total proposition. Its content is +`DECLARE`'d to VeriSimDB on each commit. Drift between the claimed +grade above and the underlying evidence surfaces as a grade-stale alert. diff --git a/foundations-readiness-grades/FOUNDATIONS-READINESS-GRADES.a2ml b/foundations-readiness-grades/FOUNDATIONS-READINESS-GRADES.a2ml new file mode 100644 index 00000000..e3b43354 --- /dev/null +++ b/foundations-readiness-grades/FOUNDATIONS-READINESS-GRADES.a2ml @@ -0,0 +1,294 @@ +; SPDX-License-Identifier: PMPL-1.0-or-later +; Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +; +; FOUNDATIONS-READINESS-GRADES.a2ml — machine-readable counterpart of +; FOUNDATIONS-READINESS-GRADES.adoc. Same normative content; this file +; is the source of truth for tooling. + +(a2ml-document + (schema "rsr/standard/v1") + (standard + (id "foundations-readiness-grades") + (name "Foundations Readiness Grades") + (abbreviation "FRG") + (version "1.0") + (date "2026-05-28") + (status "Active") + (author "Jonathan D.A. Jewell ") + (license "PMPL-1.0-or-later") + (family "Rhodium Standard Repositories") + (relationship + (tier "language-whole") + (siblings ("Component Readiness Grades" "Toolchain Readiness Grades" "Adoption Readiness Grades")) + (question "Is the language's typing discipline mathematically grounded?")) + (cross-axis + (rule "ARG-A requires FRG >= B") + (independence "FRG is otherwise independent of TRG/ARG; worst-of dominates only within an axis"))) + + (grades + (grade + (letter "X") + (name "No formal artefact") + (release-stage "none") + (stability "no-claim") + (shorthand "no-formalisation") + (evidence-required ()) + (note "Most languages live here at birth. Honest.")) + (grade + (letter "F") + (name "Provably wrong, unmarked") + (release-stage "reject-deprecate-delegate") + (stability "actively-misleading") + (shorthand "false-claim") + (evidence-required + (Qed-counterexample-present-in-corpus) + (original-theorem-not-marked-superseded) + (no-redesign-document-committed)) + (evacuation-path "Mark legacy archaeology + commit canonical redesign document -> re-enter D") + (canonical-example "ephapax/formal/ pre-PRESERVATION-DESIGN.md state")) + (grade + (letter "E") + (name "Well-formedness only") + (release-stage "pre-alpha") + (stability "syntactic-ground-only") + (shorthand "well-formed") + (evidence-required + (trg-grade-at-least "E" "for-formalisation-tooling") + (M1 "abstract-syntax-formalised-in-qualifying-prover") + (M2 "typing-judgment-formalised") + (B0 "zero-banned-constructs") + (typell-level-at-least 2 "well-formedness") + (formalisation-in-canonical-formal-directory) + (formalisation-built-by-ci))) + (grade + (letter "D") + (name "Preservation stated") + (release-stage "pre-alpha+") + (stability "foundations-begun") + (shorthand "preservation-stated") + (evidence-required + (all-of-E) + (M3 "operational-or-denotational-semantics-formalised") + (M4 "preservation-theorem-stated") + (M5 "progress-theorem-stated") + (typell-level-at-least 4 "with-evidence") + (proof-needs-md-lists-every-open-goal) + (each-admit-tagged-with-proof-needs-entry) + (no-silent-admits))) + (grade + (letter "C") + (name "Preservation closed") + (release-stage "alpha") + (stability "core-soundness-established") + (shorthand "core-closed") + (evidence-required + (all-of-D) + (P1 "preservation-theorem-closed-zero-admits-zero-axioms-zero-sorries") + (P2 "progress-theorem-closed") + (typell-level-at-least 6 "with-evidence") + (proof-status-a2ml-present-and-current) + (B0-and-B1 "ci-banned-construct-lint-active") + (trg-grade-at-least "C" "for-formalisation-tooling") + (peer-review-by-at-least-one-non-immediate-collaborator))) + (grade + (letter "B") + (name "Layered + cross-prover") + (release-stage "beta") + (stability "robust-foundations") + (shorthand "layered-cross-prover") + (evidence-required + (all-of-C) + (P3 "counterexample-regression-suite-at-least-3-Qed-lemmas") + (P4 "layered-orthogonal-decomposition-with-explicit-cross-layer-invariants") + (V2 "cross-prover-replication-at-least-2-for-central-preservation") + (O1 "operational-checker-tested-against-formal-judgment-at-least-1000-examples") + (typell-level-at-least 8 "with-evidence") + (trg-grade-at-least "B" "for-formalisation-tooling") + (external-single-party-review-completed))) + (grade + (letter "A") + (name "Verified + cross-validated") + (release-stage "stable") + (stability "field-grounded") + (shorthand "kernel-grade") + (evidence-required + (all-of-B) + (V3 "cross-prover-replication-at-least-3-for-central-preservation") + (V4 "echidna-in-ci-as-required-gate") + (O2 "differential-testing-100k-rolling-window") + (O3 "audit-friendly-reference-implementation") + (typell-level 10 "cross-cutting-with-evidence") + (K1 "kernel-soundness-contribution-to-qualifying-prover-within-12-months") + (trg-grade "A" "for-formalisation-tooling") + (five-nines-on-standing-priority-axes) + (unanimous-6-plus-6-llm-panel-signoff)))) + + (release-stage-mapping + (X "none") + (F "reject-deprecate-delegate") + (E "pre-alpha") + (D "pre-alpha+") + (C "alpha") + (B "beta") + (A "stable")) + + (required-foundations-surfaces + (mathematical-core + (M1 "abstract-syntax-formalised") + (M2 "typing-judgment-formalised") + (M3 "operational-or-denotational-semantics-formalised") + (M4 "preservation-theorem-stated") + (M5 "progress-theorem-stated")) + (proof-landings + (P1 "preservation-closed-zero-banned-constructs") + (P2 "progress-closed") + (P3 "counterexample-driven-regression-suite") + (P4 "layered-decomposition-with-cross-layer-invariants")) + (verification-rigor + (V1 "single-qualifying-prover") + (V2 "cross-prover-replication-at-least-2") + (V3 "cross-prover-replication-at-least-3") + (V4 "echidna-neurosymbolic-verifier-in-ci")) + (typell-classification + (L1 "syntactic-parsability") + (L2 "well-formedness") + (L3 "schema-conformance") + (L4 "type-compatibility") + (L5 "nullability-tracking") + (L6 "injection-resistance") + (L7 "linear-types") + (L8 "session-types") + (L9 "effect-systems-modal-types-proof-carrying") + (L10 "quantitative-type-theory-cross-cutting")) + (banned-constructs + (B0 "zero-banned-constructs") + (B1 "ci-banned-construct-lint-active")) + (operational-equivalence + (O1 "operational-checker-tested-against-formal-judgment") + (O2 "differential-testing-at-scale") + (O3 "audit-friendly-reference-implementation")) + (kernel-soundness-contribution + (K1 "upstream-contribution-to-qualifying-prover-within-12-months"))) + + (qualifying-provers + (inherited-from "TRG") + (current-set ("Idris2" "Lean4" "Rocq" "Agda")) + (extension-via "per-language-profile-tightening")) + + (banned-constructs-inherited + (inherited-from "TRG") + (includes + ("believe_me" "Admitted" "sorry" "postulate" "unsafeCoerce" + "Obj.magic" "unsafePerformIO" "assume" "axiom" + "any-compile-time-evidence-erasure-construct")) + (tracked-admits + (permitted-at-grades ("D" "E")) + (forbidden-at-grades ("C" "B" "A")) + (definition "Admitted-whose-statement-is-enumerated-in-PROOF-NEEDS.md-with-discharge-plan"))) + + (diversity-metrics + (inherited-from "TRG") + (applied-to "peer-review-and-external-review-criteria")) + + (a-grade-llm-panel + (inherited-from "TRG") + (template "standards/toolchain-readiness-grades/templates/A-GRADE-LLM-PANEL.adoc")) + + (grade-aggregation + (rule "worst-of-any-required-foundations-surface") + (no-averaging true) + (no-weighting true) + (no-partial-credit true) + (cross-axis-rule "ARG-A requires FRG >= B; otherwise axes are independent within their own evidence floors")) + + (closed-theorem-definition + (criterion-1 "statement-fully-ground-no-remaining-obligations") + (criterion-2 "zero-banned-constructs-in-proof-tree") + (criterion-3 "every-transitively-used-lemma-itself-closed") + (note "Theorem depending on Axiom whose statement is not itself a closed theorem is NOT closed")) + + (layered-decomposition-definition + (criterion-1 "each-layer-preservation-theorem-stateable-independently") + (criterion-2 "cross-layer-composition-explicit-as-named-lemma") + (criterion-3 "strengthening-one-layer-does-not-require-re-proving-others") + (canonical-example "ephapax/formal/PRESERVATION-DESIGN.md four-layer redesign (L1 regions / L2 modality / L3 echo / L4 dyadic)")) + + (counterexample-regression-definition + (form "positive-Qed-lemmas-establishing-what-calculus-does-not-prove") + (canonical-example "ephapax/formal/Counterexample.v three Qed lemmas")) + + (echidna-integration + (required-from-grade "A") + (mechanism "ci-gate-on-PR-touching-formal-directory") + (failure-policy "blocks-merge") + (prerequisite "Echidna >= TRG-C and language exposes judgments to Echidna verification protocol")) + + (differential-testing + (required-from-grade "A") + (mechanism "random-program-generation-fed-to-impl-and-formal-judgment-interpreter") + (window-size 100000) + (window-period "per-release-cycle")) + + (demotion + (policy "immediate-on-loss-of-any-required-evidence") + (no-grace-period true) + (named-triggers + ("any-single-Admitted-or-sorry-or-banned-construct-introduced -> demotion-to-highest-grade-still-supported") + ("counterexample-lemma-broken-or-removed -> demotion-proportional-to-witnessed-property") + ("cross-prover-replication-falls-behind-main-calculus-by-more-than-one-release -> A->B or B->C as applicable") + ("kernel-soundness-contribution-lapses-past-12-months -> A->B") + ("echidna-ci-gate-disabled -> A->B"))) + + (per-language-extension + (mechanism "spec/FRG-PROFILE.adoc") + (template "templates/FRG-PROFILE-TEMPLATE.adoc") + (tightening-allowed true) + (loosening-allowed false)) + + (definitions + (qualifying-prover "inherited-from-TRG") + (banned-constructs "inherited-from-TRG") + (closed-theorem "see closed-theorem-definition") + (layered-decomposition "see layered-decomposition-definition") + (counterexample-regression-suite "see counterexample-regression-definition") + (echidna-in-ci "see echidna-integration") + (differential-testing "see differential-testing")) + + (self-assessment + (target "FRG specification's own FRG grade is not applicable; FRG grades languages, not specifications") + (spec-graded-by "standards repo CRG + documentation tooling TRG") + (document-set-self-assessment "SELF-ASSESSMENT.adoc")) + + (conformance + (criteria + ("FRG grade recorded in spec/FRG-PROFILE.adoc in language repo") + ("Grade supported by evidence for that grade AND every grade below") + ("Profile reviewed per release cycle and within 7 days of demotion") + ("Worst-of aggregation respected") + ("Demotion recorded immediately on loss") + ("Per-commit attestations to VeriSimDB at grade C and above") + ("CI runs banned-construct lint and blocks merge from grade D upward"))) + + (open-questions + ("Cross-prover floor of 3 at A may need to drift as qualifying-prover set grows") + ("100k differential tests per release is order-of-magnitude; re-baseline against first FRG-A candidate") + ("Kernel-soundness contribution narrow definition may need broadening") + ("Echidna-as-sole-neurosymbolic-verifier coupling may relax if second emerges") + ("TypeLL=L10 hard gate at A may relax for domain-specific languages with bounded semantic complexity")) + + (versioning + (current-version "1.0") + (review-cycle "per-release-or-major-event") + (semver-pattern "MAJOR.MINOR for incompatible-evidence-floor changes vs compatible-clarifications")) + + (references + (TRG "standards/toolchain-readiness-grades/TOOLCHAIN-READINESS-GRADES.adoc") + (CRG "standards/component-readiness-grades/COMPONENT-READINESS-GRADES.md") + (ARG "standards/adoption-readiness-grades/ADOPTION-READINESS-GRADES.adoc") + (RSR "standards/rhodium-standard-repositories/README.adoc") + (TypeLL-PanLL-VCLtotal "vcl-ut/README.adoc — 10-level type-safety lineage") + (ephapax-four-layer-redesign "ephapax/formal/PRESERVATION-DESIGN.md") + (ephapax-counterexample-canonical "ephapax/formal/Counterexample.v") + (echidna "echidna/README.adoc") + (verisimdb "verisimdb/README.adoc") + (a-grade-llm-panel "standards/toolchain-readiness-grades/templates/A-GRADE-LLM-PANEL.adoc"))) diff --git a/foundations-readiness-grades/FOUNDATIONS-READINESS-GRADES.adoc b/foundations-readiness-grades/FOUNDATIONS-READINESS-GRADES.adoc new file mode 100644 index 00000000..f0de9f15 --- /dev/null +++ b/foundations-readiness-grades/FOUNDATIONS-READINESS-GRADES.adoc @@ -0,0 +1,627 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + += Foundations Readiness Grades (FRG) +:toc: left +:toclevels: 3 + +[cols="1,3", options="header"] +|=== +| Field | Value + +| Standard | Foundations Readiness Grades +| Version | 1.0 +| Date | 2026-05-28 +| Status | Active +| Author | Jonathan D.A. Jewell +| License | PMPL-1.0-or-later (MPL-2.0 automatic legal fallback) +| Family | Rhodium Standard Repositories (RSR) +| Companion to | Toolchain Readiness Grades v1.0, Adoption Readiness Grades v1.0, Component Readiness Grades v2.2 +| Relationship | Language-whole sibling tier (CRG = component, TRG = toolchain, ARG = adoption, FRG = mathematical foundations) +|=== + +== Abstract + +Foundations Readiness Grades (FRG) is a strict-by-construction grading +scheme for the *mathematical foundations of a language*. It sits at +the language-whole tier of the estate grading hierarchy: + +* *CRG* grades individual *components*. +* *TRG* grades the *toolchain*. +* *ARG* grades the language as a *thing someone can adopt*. +* *FRG* grades the language's *mathematical foundations*. + +FRG uses the same letter grades as CRG, TRG, and ARG +(`X | F | E | D | C | B | A`) and the same worst-of-any-required-element +aggregation rule. The evidence required at each grade asks a different +question: *"is the language's typing discipline mathematically +grounded?"*. That is the question of FRG. + +FRG is *independent* of TRG and ARG — a language can be FRG-C +(preservation theorem closed) while remaining TRG-D (toolchain still +incomplete), and vice versa. The single cross-axis coupling is +*ARG-A requires FRG ≥ B*: no language is judged school-grade adoptable +unless its foundations have crossed into layered, cross-prover +territory. + +FRG distils estate practice. The four-layer redesign in ephapax +(L1 regions / L2 modality / L3 echo / L4 dyadic), the 10-level +TypeLL → PanLL → VCL-total type-safety lineage, the Echidna +neurosymbolic verifier, and the banned-constructs discipline are all +folded in as evidence rungs. + +This standard is part of the Rhodium Standard Repositories (RSR) +family of standards maintained by hyperpolymath. + +== 1. Scope + +This standard applies to: + +* Any programming language, query language, or domain-specific + language whose *typing discipline* is intended to carry meaning — + i.e. the language claims that well-typed programs have some + recoverable property (safety, totality, resource discipline, etc.). +* The *language as a mathematical object* — the formal calculus that + the implementation is purported to refine. + +This standard does NOT apply to: + +* Languages whose typing is *advisory* only (no soundness claim + intended). Such languages are FRG-X by definition and should not + attempt to grade themselves. +* Toolchain components (use TRG). +* Adoption surfaces (use ARG). +* Repository hygiene (use RSR). + +=== 1.1 Per-language extension + +Each adopting language SHOULD publish a `spec/FRG-PROFILE.adoc` in +its own repo that tightens this baseline with language-specific +obligations. A profile MAY only tighten, never loosen. Examples of +valid tightening: + +* AffineScript: "Grade C requires the formalised QTT fragment to be + closed under the affine restriction (no q=ω escape via the affine + rule)." +* Ephapax: "Grade C requires preservation closed *per layer* + (preservation_l1, preservation_l2, preservation_l3) with the + cross-layer composition closed under explicit invariants — + not via patched single-layer admits." +* VCL-total: "Grade B requires Cross-modal consistency proofs over + ≥ 3 of the 8 octad modalities." +* My-lang: "Grade C requires the calculus's borrow-checker to be + modelled in the formal calculus, not stubbed out." + +The profile shape is defined in `templates/FRG-PROFILE-TEMPLATE.adoc`. + +== 2. Normative References + +* *CRG v2.2* — Component Readiness Grades. FRG inherits CRG's letter + vocabulary and release-stage mapping. +* *TRG v1.0* — Toolchain Readiness Grades. FRG inherits TRG's + `qualifying-provers` list (Idris2, Lean 4, Rocq, Agda) and TRG's + `banned-constructs` list verbatim. +* *ARG v1.0* — Adoption Readiness Grades. ARG-A requires FRG ≥ B. +* *RSR* — Rhodium Standard Repositories. FRG-graded language repos + MUST be RSR-compliant at the level required by TRG at the same + grade. +* *TypeLL → PanLL → VCL-total* — The 10-level type-safety lineage. + FRG grades cite the achieved level explicitly from grade D upward. +* *Echidna* — The estate's neurosymbolic proof verifier. Required in + CI from FRG-A. + +== 3. Required Foundations Surfaces + +FRG grades the *language as a mathematical artefact*. A "foundations +surface" is one of the public evidence classes that establishes the +language's mathematical grounding. The required surfaces are: + +=== 3.1 Mathematical core (M) +* *M1* Abstract syntax formalised in a qualifying prover +* *M2* Typing judgment formalised in a qualifying prover +* *M3* Operational (or denotational) semantics formalised in a + qualifying prover +* *M4* Preservation theorem *statement* present in the formalisation +* *M5* Progress theorem *statement* present in the formalisation + +=== 3.2 Proof landings (P) +* *P1* Preservation theorem *closed*: 0 admits, 0 axioms, 0 sorries + (zero banned constructs in the proof) +* *P2* Progress theorem *closed* +* *P3* Counterexample-driven regression suite: known unsound + variants are encoded as `Qed`'d counterexample lemmas (ephapax + `Counterexample.v` is the canonical example) +* *P4* Layered / orthogonal decomposition: independent concerns + (structural / region / modality / effect / echo) are separated + into distinct judgments with explicit cross-layer invariants + +=== 3.3 Verification rigor (V) +* *V1* Single qualifying prover used for M1-M5 +* *V2* Cross-prover replication ≥ 2 (the central preservation + theorem is closed in a second qualifying prover) +* *V3* Cross-prover replication ≥ 3 +* *V4* Echidna neurosymbolic verifier integrated in CI as a gate + +=== 3.4 TypeLL classification (L) +* *L1* Syntactic parsability (parser accepts the input) +* *L2* Well-formedness (structure matches the grammar) +* *L3* Schema conformance (references resolve to declared entities) +* *L4* Type compatibility (operator arities and types align) +* *L5* Nullability tracking (`NULL` propagates explicitly) +* *L6* Injection resistance (parameterisation as type-level invariant) +* *L7* Linear types (QTT q=1 on resources) +* *L8* Session types (state-indexed protocols) +* *L9* Effect systems / Modal types / Proof-carrying code +* *L10* Quantitative type theory / cross-cutting + +For each language, the FRG profile cites the highest level achieved +and *with evidence*. Claiming L7 without a linear-typing soundness +proof is not L7; it is L4 with linear-typing aspirations. + +=== 3.5 Banned constructs (B) +* *B0* Zero `believe_me`, `Admitted`, `sorry`, `postulate`, + `unsafeCoerce`, `Obj.magic`, `unsafePerformIO`, `assume`, `axiom`, + or equivalent in proof code (inherited from TRG) +* *B1* Per-prover banned-construct lint runs in CI + +=== 3.6 Operational equivalence (O) +* *O1* Operational checker / type checker / borrow checker tested + against the formal judgment +* *O2* Differential testing: random programs are well-typed by the + implementation if and only if they are well-typed by the formal + judgment (or a documented gap is recorded) +* *O3* Reference implementation: a small, audit-friendly reference + implementation exists alongside the production implementation + +=== 3.7 Kernel-soundness contribution (K) +* *K1* Upstream contribution to one of the qualifying provers + (Idris2, Lean 4, Rocq, Agda) within the last 12 months — proof + library, tactic, soundness fix, or curated counterexample suite + +== 4. Grade Definitions + +=== 4.1 Grade X — No formal artefact + +*Release stage:* None +*Stability posture:* No claim +*Honest shorthand:* `no-formalisation` + +The language has no formalisation in any qualifying prover. The +typing rules exist (if at all) only as informal prose or as the +implementation's source code. No evidence rung from §3.1 is met. + +*Evidence required:* None. This is the absence of foundations +evidence. + +*Common position:* Most languages live here at birth. Honest. + +=== 4.2 Grade F — Provably wrong, unmarked + +*Release stage:* Reject, deprecate, or delegate +*Stability posture:* Actively misleading +*Honest shorthand:* `false-claim` + +A formalisation exists *and* one of the central theorems +(preservation, progress, soundness) is *known to be false* under the +current formulation, *and* the formalisation is not marked as +archaeology / superseded / counterexample-witnessed. The language +is claiming foundations it does not have. + +Two examples make the rung concrete: + +* A `Theorem preservation` closed by an `Axiom` whose statement is + itself the conclusion being proved (circular discharge). +* A calculus whose `formal/` contains a counterexample lemma + (`Counterexample.v` style) closed by `Qed`, demonstrating that + the central theorem is false as stated, *without* the legacy + `preservation` statement being marked superseded by a redesign + document. + +FRG-F is rare and important. The grade exists so that a language +can be honestly de-promoted without having to silently invalidate +historical claims. Ephapax's pre-redesign `formal/Semantics.v` +state (preservation stated, `Counterexample.v` `Qed`'d, no +redesign locked in) was FRG-F by this definition for the days +before PRESERVATION-DESIGN.md was committed; the redesign +*evacuates* the F-grade by reclassifying the legacy preservation +statement as archaeology and committing to a layered redesign as +canonical. + +*Evidence required to assign F:* +* Counterexample present in the proof corpus, `Qed`'d. +* Original theorem not marked superseded, archaeological, or + conditional on a recorded redesign plan. +* No layered/sequenced redesign document committed. + +*Evacuation path from F to D:* Mark legacy as archaeology, commit a +canonical redesign document (e.g. `PRESERVATION-DESIGN.md`), and +begin the work — at which point the language re-enters D. + +=== 4.3 Grade E — Well-formedness only *(pre-alpha)* + +*Release stage:* Pre-alpha +*Stability posture:* Syntactic ground only +*Honest shorthand:* `well-formed` + +The language has a formalisation that establishes well-formedness: +the abstract syntax and typing judgment are encoded in a qualifying +prover, but no preservation or progress claim has been stated +formally yet. No banned constructs in what exists. + +*Smell test:* "We've written down the rules in Coq." + +*Evidence required (worst-of, all MUST be PASSED):* + +* TRG ≥ E for the formalisation tooling (the prover, build, CI) +* M1 (abstract syntax formalised in a qualifying prover) +* M2 (typing judgment formalised in a qualifying prover) +* B0 (zero banned constructs in formalisation) +* TypeLL ≥ L2 (well-formedness) declared in the FRG profile +* Formalisation lives in the canonical repo `formal/` directory and + is built by CI + +=== 4.4 Grade D — Preservation stated *(pre-alpha+)* + +*Release stage:* Pre-alpha+ +*Stability posture:* Foundations begun +*Honest shorthand:* `preservation-stated` + +The language has stated the central preservation/progress theorems +in the formalisation. Operational or denotational semantics is +formalised. Admits are explicitly *tracked* (a PROOF-NEEDS.md +catalog enumerates every open goal). The work is honest about what +is and isn't done; it is *not* yet closed. + +*Smell test:* "We know what we're trying to prove." + +*Evidence required (all E + the following):* + +* M3 (operational or denotational semantics formalised) +* M4 (preservation theorem *stated*) +* M5 (progress theorem *stated*) +* TypeLL ≥ L4 declared with evidence +* PROOF-NEEDS.md (or equivalent) lists every open goal with file, + line, and the conjecture's role +* Each open goal is either: +** Explicitly `Admitted` / `sorry` with a comment citing the + PROOF-NEEDS.md entry, OR +** Closed via `Qed` ++ +*No silent admits.* Banned-construct lint forbids untracked +admits. + +=== 4.5 Grade C — Preservation closed *(alpha gate)* + +*Release stage:* Alpha +*Stability posture:* Core soundness established +*Honest shorthand:* `core-closed` + +The central preservation and progress theorems are *closed* in a +qualifying prover with zero admits, zero axioms, zero sorries, and +zero banned constructs throughout the proof tree. PROOF-STATUS.a2ml +exists and is current. This is the *alpha gate* of FRG — below C +the language's foundations are aspirational. + +*Smell test:* "If the formalisation says it's typeable, it stays +typeable through evaluation." + +*Evidence required (all D + the following):* + +* P1 (preservation theorem closed: 0 admits, 0 axioms, 0 sorries) +* P2 (progress theorem closed) +* TypeLL ≥ L6 declared with evidence +* PROOF-STATUS.a2ml present and current in `formal/` +* B0 + B1 (zero banned constructs *and* CI lint actively enforcing) +* TRG ≥ C for the formalisation tooling +* Diversity-metrics check: the formalisation has been peer-reviewed + by ≥ 1 reviewer outside the founder's immediate collaborators + +=== 4.6 Grade B — Layered + cross-prover *(beta gate)* + +*Release stage:* Beta +*Stability posture:* Robust foundations +*Honest shorthand:* `layered-cross-prover` + +The formalisation has matured beyond a single-layer proof tree. +Distinct concerns (structural discipline / region capability / +modality / echo / dyadic interaction, or the equivalents for the +language) are factored into independent layers with explicit +cross-layer invariants. Counterexample-driven regression catches +known-false variants. The central theorem is replicated in a +second qualifying prover. The language is genuinely *robust* +against the failure modes that motivated formalisation in the first +place. + +*Smell test:* "If we strengthen one concern, the proof of the +others doesn't have to change." + +*Evidence required (all C + the following):* + +* P3 (counterexample regression suite: ≥ 3 `Qed`'d + counterexample lemmas establishing what the calculus does NOT + prove) +* P4 (layered / orthogonal decomposition with documented + cross-layer invariants) +* V2 (cross-prover replication ≥ 2 for the central preservation + theorem) +* O1 (operational checker tested against the formal judgment for + ≥ 1000 worked examples) +* TypeLL ≥ L8 declared with evidence +* TRG ≥ B for the formalisation tooling +* External (non-collaborator) single-party review of the + formalisation completed + +=== 4.7 Grade A — Verified + cross-validated *(stable)* + +*Release stage:* Stable +*Stability posture:* Field-grounded +*Honest shorthand:* `kernel-grade` + +The language's foundations are field-grounded. Cross-prover +validation has reached three qualifying provers. The Echidna +neurosymbolic verifier is integrated as a CI gate. The +implementation has been certified against the formal judgment via +differential testing at scale. The language has contributed +back to a qualifying prover's kernel-soundness ecosystem within +the last 12 months. Past FRG-A is not part of this scheme — +what lies beyond is a *cultural phenomenon* (a research +programme), not a property of the language. FRG-A is the cap. + +*Smell test:* "Three independent prover communities agree it's +sound." + +*Evidence required (all B + the following):* + +* V3 (cross-prover replication ≥ 3 for the central preservation + theorem) +* V4 (Echidna in CI as a required gate; failures block merge) +* O2 (differential testing: random programs agree between the + formal judgment and the implementation; ≥ 100k tests run in CI + rolling window) +* O3 (audit-friendly reference implementation present alongside + the production implementation) +* TypeLL = L10 (cross-cutting) declared with evidence +* K1 (kernel-soundness contribution to a qualifying prover within + the last 12 months) +* TRG = A for the formalisation tooling +* 5-nines on the standing priority axes (TRG §5.7.5 adopted + verbatim) +* Unanimous 6+6 (humans + LLMs) signoff per TRG + `templates/A-GRADE-LLM-PANEL.adoc` + +=== 4.8 Demotion on loss *(critical)* + +Loss of any A-grade gate (kernel-soundness contribution lapses +past 12 months, Echidna CI gate disabled, cross-prover replication +falls below 3, differential testing window collapses, signoff +rescinded, banned construct introduced in any layer) *immediately* +demotes the language. There is no grace period. Same rule as TRG +§5.8 and ARG §4.8. + +Specific FRG demotion triggers worth naming explicitly: + +* *Any single `Admitted`, `sorry`, or banned construct introduced + into the proof tree* — instant demotion to the highest grade + whose evidence still holds. +* *Counterexample lemma broken / removed* — demotion proportional + to which property the counterexample was witnessing. +* *Cross-prover replication falls behind the main calculus by + more than one release* — demotion from A to B (V3 floor lost), + or from B to C (V2 floor lost), as applicable. + +== 5. Aggregation Rule + +FRG aggregation is *worst-of-any-required-foundations-surface*. The +language's FRG grade is the *lowest* grade achieved across all +required foundations surfaces at the candidate grade's evidence floor. + +*No averaging. No weighting. No partial credit.* + +A language with M1-M5 + P1-P2 at B (layered, cross-prover) but +TypeLL only at L5 is *FRG-D*, not "FRG-B-with-typing-gap". The +honest grade tells the maintainer what to focus on next. + +== 6. Definitions + +=== 6.1 "Qualifying prover" + +Inherited verbatim from TRG. Currently: Idris2, Lean 4, Rocq +(formerly Coq), Agda. A formalisation in any other system (Isabelle, +F*, Twelf, etc.) is admissible by per-language profile tightening +but does not satisfy the baseline. + +=== 6.2 "Banned constructs" + +Inherited verbatim from TRG. Includes (non-exhaustive): +`believe_me`, `Admitted`, `sorry`, `postulate`, `unsafeCoerce`, +`Obj.magic`, `unsafePerformIO`, `assume`, `axiom`, equivalents in +other provers, and any compile-time-evidence-erasure construct +acting as a proof escape hatch. + +A *tracked admit* (an `Admitted` whose statement is enumerated in +PROOF-NEEDS.md with a discharge plan) is permitted at grades D and +below. It is forbidden at grades C and above (the C-gate is "0 +admits, 0 axioms, 0 sorries"). + +=== 6.3 "Closed theorem" + +A theorem is *closed* iff: + +1. Its statement is fully ground (no remaining proof obligations). +2. Its proof tree contains zero banned constructs (per §6.2). +3. Every transitively-used lemma is itself closed by this + definition. + +A theorem whose proof depends on an `Axiom` whose statement is +not itself a closed theorem is *not closed*. + +=== 6.4 "Layered decomposition" + +A formalisation is *layered* iff distinct mathematical concerns +are factored into separate judgments such that: + +1. Each layer's preservation theorem can be stated independently + of the others. +2. Cross-layer composition is explicit (a named composition + lemma, not implicit in the term structure). +3. Strengthening one layer does not require re-proving the others. + +The ephapax four-layer redesign (L1 regions / L2 modality / +L3 echo / L4 dyadic) is the canonical example, encoded in +`formal/PRESERVATION-DESIGN.md`. + +=== 6.5 "Counterexample regression suite" + +A collection of *positive* lemmas (closed by `Qed` / equivalent) +that demonstrate what the calculus does *not* prove. The canonical +example is ephapax's `formal/Counterexample.v`, in which three +`Qed`'d lemmas establish that the original preservation statement +is *false*, motivating the four-layer redesign. + +A counterexample lemma is `Qed`'d positive content: it proves the +*existence* of a configuration the calculus must not type. The +calculus is then strengthened to exclude that configuration. + +=== 6.6 "Echidna in CI" + +The hyperpolymath neurosymbolic proof verifier is invoked as a CI +gate on every PR that touches `formal/`. Failures block merge. +This requires both Echidna ≥ TRG-C and the language's formalisation +to expose its judgments to Echidna's verification protocol. + +=== 6.7 "Differential testing" + +Random programs are generated (per a documented generator) and +fed to both the implementation's type checker and the formal +judgment's executable interpretation. Disagreements are recorded +as bugs and resolved before merge. A 100k-test rolling window +implies running ≥ 100k tests per release cycle. + +== 7. Per-Language Profile Mechanism + +Each adopting language publishes `spec/FRG-PROFILE.adoc` in its repo, +following `templates/FRG-PROFILE-TEMPLATE.adoc`. The profile: + +* States the current honest grade (X / F / E / D / C / B / A). +* Cites the evidence for each criterion (file paths, theorem names, + commit SHAs, PROOF-NEEDS.md entries, PROOF-STATUS.a2ml lines). +* Records the path to the next grade with concrete next steps, + including specific open theorems and their location. +* Carries an iteration history with dates and previous grades + retained. +* MAY tighten the baseline (add language-specific obligations). +* MUST NOT loosen the baseline. + +A language's FRG-PROFILE.adoc is itself a *VCL-total proposition* — +its content is `DECLARE`'d to VeriSimDB on each commit. Drift +between the profile's claimed grade and the underlying proof state +(any new admit, removed `Qed`, etc.) is detected by VeriSimDB's +cross-modal drift monitor and surfaces as a *foundations-stale* +alert. + +== 8. Self-Assessment + +The FRG specification's own FRG grade is *not applicable* — FRG +grades languages, not specifications. The FRG specification is +graded by the CRG of the standards repo and by the TRG of the +documentation tooling. + +The FRG document set is self-assessed against its own +internal-consistency criteria in `SELF-ASSESSMENT.adoc`. + +== 9. Conformance + +A language conforms to FRG if: + +1. Its FRG grade is recorded in `spec/FRG-PROFILE.adoc` in its repo. +2. The grade is supported by the evidence described in §4 for that + grade and every grade below it. +3. The profile is reviewed at least once per release cycle, and on + any demotion event within 7 days. +4. The aggregation rule of §5 is respected (worst-of, no averaging). +5. Demotion is recorded immediately on loss of any required + evidence (§4.8). +6. Per-commit attestations are emitted to VeriSimDB at grade C and + above. +7. The language's CI runs the banned-construct lint (§6.2) and + blocks merge on violation from grade D upward. + +== 10. Open Questions / Versioning + +This is FRG v1.0. Likely v2 changes once cross-prover ecosystem +matures: + +* The "≥ 3 qualifying provers" floor at A is *plucked from + intuition*: it is the minimum number that breaks any single-prover + ecosystem capture. May need to drift up if the qualifying-prover + set grows (Twelf, F*, Isabelle adoptions are under discussion). +* The "100k differential tests per release" at A is order-of- + magnitude reasoning; the exact number should be re-baselined + against the first FRG-A candidate's actual coverage. +* The "kernel-soundness contribution within 12 months" at A is + intentionally narrower than mere bug-fix contribution — it + requires soundness-relevant upstream work. May be too narrow; + re-baseline once tested. +* The relationship to Echidna is currently encoded as a hard + CI-gate at A. This may relax to "any production neurosymbolic + verifier" if a second one emerges in the estate or upstream. +* TypeLL = L10 at A is currently a hard gate. May be relaxed for + domain-specific languages whose semantics genuinely require less + (a configuration language may legitimately cap at L6). + +== 11. Honest Comparison to Real-World Software Standards + +To the author's knowledge, FRG-A is not matched by any language +standard currently in use. The closest comparators: + +* *CompCert / CakeML* — FRG-A by FRG's criteria. End-to-end + verified compilers; cross-prover replicated in part; + kernel-soundness contributions upstream. The model. +* *Idris2 (the language, not its formalisation)* — FRG-B. The + language is formally grounded (QTT), preservation is closed for + the core fragment, but cross-prover replication is incomplete. +* *Rust (2024)* — FRG-D leaning C. RustBelt formalised a fragment; + preservation closed for that fragment in Coq; cross-prover + replication absent; the full language exceeds the formalised + fragment. +* *Haskell (2024)* — FRG-D. System F + extensions formalised in + several places; preservation for the *full* GHC dialect is open; + the production compiler is well past what the formalisation + covers. +* *Most "I built a type system" weekend projects* — FRG-X or + FRG-E. Honest. + +The author expects approximately *one* hyperpolymath language to +reach FRG-C in year one (ephapax, post-four-layer-redesign), +approximately *zero* to reach B, and *zero* to reach A. That is the +correct order of magnitude. If FRG-A becomes easy to hit, the bar +is wrong and the standard MUST be raised. + +This standard is open to challenge. If you think the bar is too +low, specify what evidence class, foundations surface, or +verification-rigor requirement is missing. + +== 12. Machine-Readable Grade Declaration + +Languages using FRG SHOULD include in their `READINESS.md`: + +[source,markdown] +---- +**Current FRG Grade:** D +**Current ARG Grade:** D +**Current TRG Grade:** D +**Current CRG Grade:** C +**Current RSR Compliance:** FULL +---- + +This line is parsed by `just frg-grade` and `just frg-badge` from +`rsr-template-repo`. + +== Revision History + +[cols="1,1,1,3", options="header"] +|=== +| Version | Date | Author | Changes + +| 1.0 | 2026-05-28 | Jonathan D.A. Jewell | Initial release. FRG fills the mathematical-foundations tier (CRG = component, TRG = toolchain, ARG = adoption, FRG = foundations). Grade vocabulary X-A inherited from CRG/TRG/ARG. Worst-of aggregation and immediate demotion inherited from TRG. Cross-axis coupling: ARG-A requires FRG ≥ B. TypeLL 10-level lineage folded into the L surface. Banned-constructs and qualifying-provers inherited from TRG verbatim. +|=== diff --git a/foundations-readiness-grades/README.adoc b/foundations-readiness-grades/README.adoc new file mode 100644 index 00000000..29d40bba --- /dev/null +++ b/foundations-readiness-grades/README.adoc @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + += Foundations Readiness Grades (FRG) +:toc: + +* *Status:* Active, v1.0, 2026-05-28 +* *License:* PMPL-1.0-or-later (MPL-2.0 automatic legal fallback) +* *Companion to:* Toolchain Readiness Grades (TRG) v1.0, Adoption Readiness Grades (ARG) v1.0, Component Readiness Grades (CRG) v2.2 +* *Part of:* Rhodium Standard Repositories (RSR) + +== What this is + +FRG grades the *language as a mathematical artefact*. It sits at the +language-whole tier of the estate grading hierarchy: + +* *CRG* grades individual *components*. +* *TRG* grades the *toolchain*. +* *ARG* grades the language as a *thing someone can adopt*. +* *FRG* grades the language's *mathematical foundations*. + +FRG uses the same letter grades as CRG, TRG, and ARG +(`X | F | E | D | C | B | A`) and the same worst-of-any-required-element +aggregation rule. The evidence required at each grade asks a different +question: *"is the typing discipline mathematically grounded?"* + +== Why it exists + +Three reasons: + +1. CRG grades components, TRG grades the toolchain, ARG grades + adoption — none of those answer *"is the language's typing + discipline mathematically sound?"*. That question belongs at the + language-whole tier. +2. The hyperpolymath estate has 17+ languages, several of which carry + strong soundness claims (affinescript's QTT-affine fragment, + ephapax's dyadic modality, vcl-ut's 10-level type-safety lineage, + typed-wasm's verifier-as-spec). FRG provides a uniform yardstick + for comparing them on the foundations axis. +3. The existing `PROOF-NEEDS.md` and `PROOF-STATUS.a2ml` per-repo + conventions are good catalogs of what's missing but have no + *rubric* for grading the overall mathematical maturity. FRG + provides that rubric. + +== Files in this directory + +[cols="1,3", options="header"] +|=== +| File | Purpose + +| `FOUNDATIONS-READINESS-GRADES.adoc` +| The normative spec. Read this first. + +| `README.adoc` +| This file. + +| `FOUNDATIONS-READINESS-GRADES.a2ml` +| Machine-readable counterpart of the spec. + +| `SELF-ASSESSMENT.adoc` +| FRG applied to itself (insofar as that's meaningful — FRG grades + languages, not specs; the doc set is self-assessed against + internal-consistency). + +| `templates/FRG-PROFILE-TEMPLATE.adoc` +| The per-language profile template referenced from §1.1 of the spec. + Each adopting language publishes `spec/FRG-PROFILE.adoc` in its repo + following this template. +|=== + +== Quick mental model + +[cols="^1,4", options="header"] +|=== +| Grade | Meaning + +| X | No formal artifact. Honest position for most new languages. +| F | Provably wrong formalisation that has not been marked as archaeology. Rare and important — see §4.2 of the spec. +| E | Well-formedness only: syntax + typing judgment formalised in a qualifying prover; no preservation claim yet. +| D | Preservation theorem *stated*; admits explicitly tracked; honest about what's open. +| C | Preservation *closed*: 0 admits, 0 axioms, 0 sorries, 0 banned constructs. +| B | Layered decomposition + cross-prover replication (≥ 2 provers); counterexample regression suite. +| A | Cross-prover ≥ 3 + Echidna in CI + 100k-test differential testing + kernel-soundness contribution upstream. +|=== + +== Demotion is immediate + +Loss of any A-grade gate — kernel-soundness contribution lapses past +12 months, Echidna gate disabled, cross-prover falls below 3, banned +construct introduced anywhere in the proof tree — *immediately* +demotes the language. There is no grace period. Same rule as TRG §5.8 +and ARG §4.8. + +A single new `Admitted` / `sorry` introduced into the proof tree is +sufficient to demote from C, B, or A to the next-lower grade whose +evidence still holds. + +== How strict is this really? + +See spec §11. Honest answer: FRG-A is calibrated against CompCert / +CakeML — end-to-end verified compilers with cross-prover replication +and kernel-soundness contributions upstream. The author expects: + +* ~1 hyperpolymath language to reach FRG-C in year one (ephapax, + post-four-layer-redesign). +* ~0 to reach FRG-B in year one. +* ~0 to reach FRG-A in year one. + +That is the correct order of magnitude. If FRG-A becomes easy to hit, +the bar is wrong and the standard MUST be raised. + +== Per-language profiles + +FRG is the baseline. Each hyperpolymath language is expected to +publish a `spec/FRG-PROFILE.adoc` in its own repo that tightens FRG +with language-specific obligations. A profile may only tighten, never +loosen. See spec §1.1 and `templates/FRG-PROFILE-TEMPLATE.adoc`. + +== Interaction with the other graders + +* *vs. CRG:* CRG grades components inside the language project (a + parser, a formatter, a stdlib module). FRG grades the language's + mathematical foundations as a whole. Where CRG and FRG would both + apply (e.g. a formalisation directory is itself a component), both + grades are published independently. FRG ≤ worst-of(CRG over all + required formalisation components). +* *vs. TRG:* TRG grades the toolchain. FRG grades the calculus that + the toolchain implements. The two are independent — a language can + be FRG-C (preservation closed) and TRG-D (toolchain incomplete), + or vice versa. The only coupling is via the `qualifying-provers` + and `banned-constructs` lists, both inherited verbatim from TRG. +* *vs. ARG:* FRG and ARG are independent except for one coupling: + *ARG-A requires FRG ≥ B*. This excludes school-grade adoption + claims for languages whose foundations have not yet entered + layered / cross-prover territory. +* *vs. RSR:* RSR grades repository hygiene. FRG-graded language repos + MUST be RSR-compliant at the level required by TRG at the same + grade. + +== Open invitation + +If you think FRG is too lax (or too strict in a way that lets bad +work hide), file an issue with concrete evidence and a proposed +tightening. FRG earns trust by surviving critique. + +Particular invitations: + +* Specify a *fifth* qualifying prover that should join {Idris2, Lean + 4, Rocq, Agda}, with evidence of soundness pedigree. +* Specify an alternative neurosymbolic verifier that could relax the + Echidna coupling at FRG-A. +* Argue that the "kernel-soundness contribution within 12 months" + gate at A is wrong — too narrow, too broad, wrong cadence. diff --git a/foundations-readiness-grades/SELF-ASSESSMENT.adoc b/foundations-readiness-grades/SELF-ASSESSMENT.adoc new file mode 100644 index 00000000..c08edb98 --- /dev/null +++ b/foundations-readiness-grades/SELF-ASSESSMENT.adoc @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + += Foundations Readiness Grades — Self-Assessment +:toc: auto +:status: Active +:date: 2026-05-28 + +FRG grades *languages*, not *specifications*. The FRG spec cannot +self-grade with an FRG letter, because the FRG spec is not a language. + +What follows is therefore a *document-set self-assessment* — applying +FRG's internal-consistency criteria to the FRG document set itself, in +the same spirit as TRG's and ARG's `SELF-ASSESSMENT.adoc`. + +== Current document-set status: COMPLETE-MINIMAL + +*Assessed: 2026-05-28* + +FRG v1.0 was just published. The document set is internally consistent +and structurally complete, but no language has yet been audited against +it. + +== Document-set conformance + +[cols="2,1,4", options="header"] +|=== +| Conformance item | Status | Notes + +| Normative spec exists in `.adoc` +| [OK] +| `FOUNDATIONS-READINESS-GRADES.adoc` (~22 KB, 12 sections) + +| README.adoc +| [OK] +| `README.adoc` + +| Self-assessment +| [OK] +| This document + +| Machine-readable counterpart (`.a2ml`) +| [OK] +| `FOUNDATIONS-READINESS-GRADES.a2ml` + +| Templates referenced from spec +| [OK] +| `templates/FRG-PROFILE-TEMPLATE.adoc` + +| SPDX header on every file +| [OK] +| Verified at creation + +| Inherits CRG/TRG/ARG vocabulary +| [OK] +| Grades X-A; release-stage mapping aligned; worst-of aggregation + +| Inherits TRG qualifying-provers list +| [OK] +| `(inherited-from "TRG")` in a2ml; Idris2, Lean 4, Rocq, Agda + +| Inherits TRG banned-constructs list +| [OK] +| `(inherited-from "TRG")` in a2ml verbatim + +| Inherits TRG diversity-metrics +| [OK] +| Applied to peer-review and external-review criteria + +| ARG-A requires FRG ≥ B cross-axis rule stated +| [OK] +| Spec §abstract; §1; §10; cross-axis block in a2ml + +| Counterexample-driven regression formalised as a rung +| [OK] +| P3 in §3.2; §6.5 definition; ephapax `Counterexample.v` cited as canonical example + +| Layered decomposition formalised as a rung +| [OK] +| P4 in §3.2; §6.4 definition; ephapax four-layer redesign cited as canonical example + +| TypeLL 10-level lineage folded into evidence rungs +| [OK] +| L1-L10 in §3.4; grade-specific TypeLL floors at E (L2), D (L4), C (L6), B (L8), A (L10) + +| VeriSimDB attestation integration specified +| [OK] +| §7; VCL-total proposition form; required from grade C + +| Echidna integration specified +| [OK] +| §6.6; required from grade A; §4.7 evidence + +| At least one language audited against FRG +| [No] +| Phase 4 of the framework rollout — ephapax (the canonical example + for several rungs) is the most natural first audit target + +| External review by language designers +| [No] +| Not yet — first audit cohort must complete before external review + +| Adoption by any language outside hyperpolymath +| [No] +| Not yet — standard is brand new +|=== + +== Honest acknowledgments + +* *The "≥ 3 qualifying provers" threshold at FRG-A is plucked from + intuition.* It is the minimum number that breaks any single-prover + ecosystem capture, but the exact number should be re-baselined + against the first FRG-A candidate's actual cross-prover work. + +* *The "100k differential tests per release" at A is order-of- + magnitude.* The exact number should be re-baselined against the + first FRG-A candidate's coverage. + +* *The "kernel-soundness contribution within 12 months" at A is + narrow.* It explicitly excludes mere bug-fix contributions and + feature work — only soundness-relevant upstream contributions + count. May be too narrow; re-baseline once tested. + +* *The Echidna hard CI-gate at A* assumes Echidna remains the + estate's neurosymbolic verifier of record. If a second neurosymbolic + verifier emerges in the estate or upstream, this gate may relax to + "any production neurosymbolic verifier with TRG ≥ B". + +* *TypeLL = L10 hard gate at A* may be too strict for domain-specific + languages whose semantics genuinely don't reach cross-cutting + territory. A configuration language legitimately capping at L6 + should not be FRG-disqualified for failing L10; the per-language + profile mechanism (§1.1 tightening) is the current workaround, + but a future revision may relax the baseline. + +* *FRG-F (provably-wrong unmarked) is novel.* No published + language-grading standard the author is aware of has a "you + *claim* you proved this but you didn't" rung. The grade exists + because the failure mode is real (ephapax pre-redesign was in + this position from at least 2026-03-28 until 2026-05-27). The + evacuation path (mark archaeology + commit canonical redesign) + is the discipline that got ephapax out of F. + +* *The relationship to ARG-A is currently encoded as ARG-A requires + FRG ≥ B.* This is a deliberate-but-debatable coupling. It excludes + languages whose adoption is high but whose foundations lag (most + historical production languages: Python, JavaScript, early Ruby). + The coupling may tighten to FRG ≥ A once any single language has + shown both can co-exist; or may loosen if it turns out unrealistic. + +== Path forward + +. Land per-language `spec/FRG-PROFILE.adoc` in affinescript, ephapax, + my-lang following `templates/FRG-PROFILE-TEMPLATE.adoc`. Each at + honest current grade. This is Phase 4 of the framework rollout. +. Extend `nextgen-languages/TOOLING-STATUS.adoc` with the FRG column + alongside ARG / TRG / CRG / RSR. Update `language-status-tracker.jl` + to surface FRG grades from each repo's profile. +. Once the first audit cohort lands, invite external review from + formalisation-discipline practitioners outside hyperpolymath. The + CompCert / CakeML / RustBelt / Idris2 communities are the natural + reviewer pool. +. Adjust thresholds based on feedback before locking v1.0. +. After feedback cycle, FRG v1.0 becomes the load-bearing standard + for language foundations claims in the estate. + +== Standing rule + +This self-assessment is itself a claim and is itself subject to the +invariant-path doc-claims grounder once it ingests the FRG directory. +Re-run after any change to this directory. If any file-path claim in +this document fails to ground, the self-assessment is invalid until +the gap is closed. diff --git a/foundations-readiness-grades/templates/FRG-PROFILE-TEMPLATE.adoc b/foundations-readiness-grades/templates/FRG-PROFILE-TEMPLATE.adoc new file mode 100644 index 00000000..104a15eb --- /dev/null +++ b/foundations-readiness-grades/templates/FRG-PROFILE-TEMPLATE.adoc @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + += FRG-PROFILE — {LANGUAGE_NAME} +:toc: left +:toclevels: 2 + +[cols="1,3"] +|=== +| Field | Value + +| Language | {LANGUAGE_NAME} +| Repository | {REPO_URL} +| Current FRG Grade | {GRADE} (X | F | E | D | C | B | A) +| Assessed | {YYYY-MM-DD} +| Assessor | {NAME } +| FRG Spec Version | 1.0 +| ARG Profile | spec/ARG-PROFILE.adoc +| TRG Profile | spec/TRG-PROFILE.adoc +| Formalisation Directory | {path/to/formal} +| Qualifying Prover(s) | {Rocq | Lean 4 | Idris2 | Agda} +|=== + +== About this profile + +This profile applies the *FRG v1.0* baseline to {LANGUAGE_NAME} and +records the current honest grade, the evidence supporting it, the +language-specific tightening obligations, and the path to the next +grade. Per FRG §1.1, profiles may *tighten* the baseline but never +*loosen* it. + +FRG = {GRADE}. The full chain of axes for this language: + +* FRG: {GRADE} +* ARG: {ARG_GRADE} +* TRG: {TRG_GRADE} +* CRG: {CRG_GRADE} (worst-of any released component) +* RSR: {RSR_LEVEL} + +Cross-axis rule: ARG-A requires FRG ≥ B +(currently FRG = {GRADE}, ARG = {ARG_GRADE}). + +== Declared TypeLL level + +* *TypeLL level claimed:* L{N} +* *Evidence:* {Path to formalisation file + theorem names that establish + this level. Each level above L4 requires a soundness-class claim with + proof.} +* *Levels not yet claimed:* L{N+1}-L10, with reason. + +== Grade rationale (evidence for {GRADE}) + +[cols="2,1,4", options="header"] +|=== +| Criterion | Met? | Evidence + +| *Mathematical core M1 — Abstract syntax formalised* +| {YES/NO} +| {Path to file; e.g. `formal/Syntax.v`} + +| *M2 — Typing judgment formalised* +| {YES/NO} +| {Path to file; e.g. `formal/Typing.v`} + +| *M3 — Operational or denotational semantics formalised* +| {YES/NO/N/A} +| {Path to file; e.g. `formal/Semantics.v` / `formal/Semantics_L1.v`} + +| *M4 — Preservation theorem stated* +| {YES/NO/N/A} +| {Theorem name + file:line citation} + +| *M5 — Progress theorem stated* +| {YES/NO/N/A} +| {Theorem name + file:line citation} + +| *Proof landings P1 — Preservation closed (0 admits, 0 axioms, 0 sorries)* +| {YES/NO/N/A} +| {Citation; PROOF-STATUS.a2ml line; date closed} + +| *P2 — Progress closed* +| {YES/NO/N/A} +| {Citation; PROOF-STATUS.a2ml line} + +| *P3 — Counterexample regression suite (≥ 3 Qed lemmas)* +| {YES/NO/N/A} +| {Path to counterexample file; lemma count} + +| *P4 — Layered orthogonal decomposition* +| {YES/NO/N/A} +| {Path to layered-design document; layers enumerated} + +| *Verification rigor V1 — Single qualifying prover* +| {YES/NO} +| {Which prover; build status link} + +| *V2 — Cross-prover replication ≥ 2* +| {YES/NO/N/A} +| {Second prover repository; replicated theorem name} + +| *V3 — Cross-prover replication ≥ 3* +| {YES/NO/N/A} +| {Third prover repository; replicated theorem name} + +| *V4 — Echidna integrated in CI as required gate* +| {YES/NO/N/A} +| {CI configuration line citing Echidna step} + +| *TypeLL L1-L6* +| {YES/NO partial per level} +| {Per-level evidence; structural for L1-L2, schema for L3, types for L4, NULL/nullable for L5, injection-safety for L6} + +| *TypeLL L7-L10 (grade B+)* +| {YES/NO partial per level} +| {Linear (L7), session (L8), effect/modal/proof-carrying (L9), QTT/cross-cutting (L10)} + +| *Banned constructs B0 — Zero in proof tree* +| {YES/NO} +| {Lint output reference; count of tracked admits if any} + +| *B1 — CI banned-construct lint active* +| {YES/NO} +| {CI workflow file + step name} + +| *Operational equivalence O1 — Operational checker tested against formal judgment (grade B+)* +| {YES/NO/N/A} +| {Test corpus path; example count} + +| *O2 — Differential testing 100k window (grade A)* +| {YES/NO/N/A} +| {Generator path; test-count rolling window report} + +| *O3 — Audit-friendly reference implementation (grade A)* +| {YES/NO/N/A} +| {Path to reference implementation; line count} + +| *Kernel-soundness contribution K1 (grade A only)* +| {YES/NO/N/A} +| {Upstream PR / commit / paper reference within last 12 months} + +| *VeriSimDB attestation (grade C+)* +| {YES/NO} +| {Attestation ID; last-ingested timestamp} + +| *External single-party review (grade B+)* +| {YES/NO/N/A} +| {Reviewer name; review-output reference} + +| *Unanimous 6+6 signoff (grade A only)* +| {YES/NO/N/A} +| {Signoff log reference} +|=== + +== Open proof obligations + +* *Tracked admits / sorries:* {count} (permitted at D and below; + forbidden at C and above) +* *Each open goal is enumerated in:* `{path/to/PROOF-NEEDS.md}` +* *PROOF-STATUS.a2ml:* `{path/to/PROOF-STATUS.a2ml}` — last updated + {YYYY-MM-DD} + +== Language-specific tightening (this profile only) + +FRG §1.1 permits per-language tightening. For {LANGUAGE_NAME}, the +following additional obligations apply at the stated grades: + +* *Grade C tightening:* {Language-specific obligation; e.g. "AffineScript: + the formalised QTT fragment MUST be closed under the affine + restriction — no q=ω escape via the affine rule."} +* *Grade B tightening:* {e.g. "Ephapax: preservation closed *per + layer* (preservation_l1, preservation_l2, preservation_l3) with + cross-layer composition closed under explicit invariants — not + via patched single-layer admits."} +* *Grade A tightening:* {e.g. "VCL-total: cross-modal consistency + proofs over ≥ 3 of the 8 octad modalities."} + +These tightenings are *additional* to the baseline — none of the +baseline evidence is waived. + +== What is NOT yet met (honest gaps) + +* {Bullet list of specific evidence the language does NOT currently + produce, grouped by foundations surface (M / P / V / L / B / O / K).} +* {Each gap should be specific enough that a reader can verify by + inspection of `formal/` + PROOF-NEEDS.md.} + +== Path to next grade ({NEXT_GRADE}) + +* {Specific next-step item 1, with estimated work and the specific + theorem(s) or evidence rung that would be unlocked.} +* {Specific next-step item 2.} +* {...} + +*Realistic timeline estimate:* {N weeks/months} + +== Path to grade beyond that ({GRADE_AFTER_NEXT}) + +* {Larger items that follow the immediate next grade.} + +== Demotion risk + +* *Lowest:* {What single new admit / sorry / axiom would drop the grade + by one.} +* *Medium:* {What multi-step regression would force a multi-grade + demotion.} +* *Catastrophic:* {What would force demotion to F — i.e. introduction + of a Qed'd counterexample to a currently-claimed theorem without + redesign commitment.} + +== Iteration history + +[cols="1,1,4", options="header"] +|=== +| Date | Grade | Notes + +| {YYYY-MM-DD} | {grade} | Initial assessment. +| {YYYY-MM-DD} | {grade} | {Reason for change.} +|=== + +== Review cycle + +* *Routine:* Reassess on every release cycle (per FRG §6). +* *Immediate:* Reassess within 7 days of any demotion trigger per + FRG-spec §4.8. +* *VeriSimDB drift alert:* Reassess within 24 hours of any drift alert + from VeriSimDB on this language's foundations attestations. +* *New admit detection:* CI lint that introduces a new banned-construct + / Admitted triggers immediate reassessment. + +== Footer + +Run `just frg-badge` in this repo to generate the shields.io badge for +the README. + +This profile is itself a VCL-total proposition. Its content is +`DECLARE`'d to VeriSimDB on each commit. Drift between the claimed +grade above and the underlying evidence surfaces as a +*foundations-stale* alert.