|
54 | 54 | { |
55 | 55 | "pattern": "decision-log", |
56 | 56 | "relation": "uses" |
| 57 | + }, |
| 58 | + { |
| 59 | + "pattern": "blind-grader-with-isolated-context", |
| 60 | + "relation": "alternative-to" |
57 | 61 | } |
58 | 62 | ], |
59 | 63 | "references": [ |
|
184 | 188 | { |
185 | 189 | "pattern": "partial-output-salvage", |
186 | 190 | "relation": "complements" |
| 191 | + }, |
| 192 | + { |
| 193 | + "pattern": "initializer-agent", |
| 194 | + "relation": "complements" |
187 | 195 | } |
188 | 196 | ], |
189 | 197 | "references": [ |
|
492 | 500 | { |
493 | 501 | "pattern": "echo-recognition", |
494 | 502 | "relation": "complements" |
| 503 | + }, |
| 504 | + { |
| 505 | + "pattern": "kritis-auditable-decision-artifact", |
| 506 | + "relation": "complements" |
495 | 507 | } |
496 | 508 | ], |
497 | 509 | "references": [ |
|
602 | 614 | { |
603 | 615 | "pattern": "automatic-workflow-search", |
604 | 616 | "relation": "complements" |
| 617 | + }, |
| 618 | + { |
| 619 | + "pattern": "kritis-auditable-decision-artifact", |
| 620 | + "relation": "complements" |
605 | 621 | } |
606 | 622 | ], |
607 | 623 | "references": [ |
|
843 | 859 | "mermaid": "flowchart TD\n Alert[Alert: kill-switch / sandbox-escape / cost spike] --> Sev{Severity}\n Sev --> OnCall[Page on-call]\n OnCall --> Cont[Containment: kill-switch / reroute]\n Cont --> Pres[Pin traces beyond retention]\n Pres --> Comp[Compensating actions]\n Comp --> Comm[Customer + regulator comms]\n Comm --> PM[Post-mortem]" |
844 | 860 | } |
845 | 861 | }, |
| 862 | + { |
| 863 | + "id": "kritis-auditable-decision-artifact", |
| 864 | + "name": "KRITIS Auditable Decision Artifact", |
| 865 | + "aliases": [ |
| 866 | + "Run-ID + Modell-Digest + Policy-Hash Triple", |
| 867 | + "KRITIS Audit Triple", |
| 868 | + "Re-Executable Decision Record", |
| 869 | + "AI-Act Article-12 Triple" |
| 870 | + ], |
| 871 | + "category": "governance-observability", |
| 872 | + "intent": "For every agent action in regulated or critical-infrastructure contexts, emit a WORM-stored triple of run-id, model-digest, and policy-hash so each decision is independently re-verifiable against the exact code, weights, and policy version that produced it.", |
| 873 | + "context": "Agents operating in domains subject to EU AI Act, NIS2, BSI KRITIS, AIC4, or sectoral equivalents, where regulators or internal auditors must be able to replay any individual decision and prove it was produced by a specific, declared version of the model and the policy stack. Generic action logs and reasoning traces do not satisfy this bar because they do not pin the executable artifacts.", |
| 874 | + "problem": "An action log says what the agent did. A reasoning trace says what it claimed to be thinking. Neither lets an auditor re-execute the decision against the exact weights and rules that produced it months earlier, by which time the model has been updated and the policy has been revised many times. Regulators of critical infrastructure and other high-risk regimes increasingly require not just observability but re-verifiability: a decision is auditable only if the artifacts that produced it can be reconstructed from the log.", |
| 875 | + "forces": [ |
| 876 | + "Auditors need re-executability, not just observability.", |
| 877 | + "Models, policies, and agent code drift on independent release cadences; a decision must pin all three.", |
| 878 | + "Storage must be tamper-evident; a mutable log is not an audit artifact.", |
| 879 | + "Cryptographic digests must be stable across re-execution; non-deterministic build inputs break replay.", |
| 880 | + "Hot-path logging cost must remain bounded; the triple must be compact and computable cheaply.", |
| 881 | + "Regulatory references (AI Act Art. 12, NIS2, BSI AIC4) explicitly tie obligations to artifact identity, not narrative summaries." |
| 882 | + ], |
| 883 | + "therefore": "Therefore: for each agent decision, emit and store an immutable triple of (run-id, model-digest, policy-hash) — together with the input fingerprint and the verdict — in a WORM medium, so that the decision can be re-executed against the exact pinned artifacts and an auditor can verify the result.", |
| 884 | + "solution": "Establish a build pipeline that computes a content digest over the model weights (and tokenizer, system prompt, scaffolding) and a content hash over the policy bundle. Both digests are published, signed, and retained. At runtime, every agent decision is wrapped to emit a record containing run-id, model-digest, policy-hash, input fingerprint, decision verdict, and timestamp. Records are written to WORM storage (object-lock S3, immutable ledger, append-only DB) under a retention horizon set by the governing regulation. The model and policy artifacts themselves are retained for the same horizon, addressable by their digests, so any record can be replayed against its pinned versions. The triple is cited in user-facing audit responses and in regulator submissions.", |
| 885 | + "structure": "Agent decision --> Wrapper --> {run-id, model-digest, policy-hash, input-fingerprint, verdict, ts} --> WORM ledger. Model registry: model-digest --> weights+tokenizer+prompt blob. Policy registry: policy-hash --> rules bundle. Auditor: pick a record --> fetch pinned artifacts by digest --> re-execute --> compare verdict.", |
| 886 | + "consequences": { |
| 887 | + "benefits": [ |
| 888 | + "Every decision is re-executable against its pinned artifacts.", |
| 889 | + "Tamper-evident storage gives the log evidentiary weight.", |
| 890 | + "Maps cleanly onto AI Act Article 12 (record-keeping) and Article 14 (human oversight) obligations and onto BSI AIC4 audit criteria.", |
| 891 | + "Cross-version regression analysis becomes possible: compare decisions on the same inputs across model-digest changes." |
| 892 | + ], |
| 893 | + "liabilities": [ |
| 894 | + "Retaining old weights and policy bundles for the full regulatory horizon is a real storage and security cost.", |
| 895 | + "Model non-determinism (sampling, hardware, kernel versions) can break replay unless seeds and inference environments are also pinned.", |
| 896 | + "Hot-path emission of the triple adds latency and a hard dependency on the WORM medium.", |
| 897 | + "Pinning is necessary but not sufficient: a decision can be 'auditable' and still wrong.", |
| 898 | + "Privacy obligations on the input fingerprint require careful design; raw inputs may not be retainable." |
| 899 | + ] |
| 900 | + }, |
| 901 | + "constrains": "The LLM-driven decision path must not emit an action without producing the (run-id, model-digest, policy-hash) triple, must not write decision records to mutable storage, and must not allow model or policy updates to overwrite the artifacts referenced by retained records.", |
| 902 | + "known_uses": [ |
| 903 | + { |
| 904 | + "system": "German KRITIS / regulated-enterprise agent deployments", |
| 905 | + "note": "Emerging reference architectures described in heise's Agentic AIOps coverage couple agent action logs with model and policy version pins under BSI/NIS2 obligations.", |
| 906 | + "status": "planned", |
| 907 | + "url": "https://www.heise.de/hintergrund/Agentic-AIOps-KI-Agenten-in-kritischen-Infrastrukturen-11267508.html" |
| 908 | + } |
| 909 | + ], |
| 910 | + "related": [ |
| 911 | + { |
| 912 | + "pattern": "provenance-ledger", |
| 913 | + "relation": "specialises", |
| 914 | + "note": "Specialises a generic action/audit log by adding the cryptographic pinning of model and policy versions and the WORM retention requirement." |
| 915 | + }, |
| 916 | + { |
| 917 | + "pattern": "decision-log", |
| 918 | + "relation": "complements", |
| 919 | + "note": "A decision log captures reasoning; the KRITIS triple captures the executable identity of model and policy at decision time. Both are needed in regulated contexts." |
| 920 | + }, |
| 921 | + { |
| 922 | + "pattern": "model-card", |
| 923 | + "relation": "complements", |
| 924 | + "note": "A model card documents the model in the abstract; model-digest in the triple identifies the specific deployed instance." |
| 925 | + }, |
| 926 | + { |
| 927 | + "pattern": "eval-as-contract", |
| 928 | + "relation": "complements", |
| 929 | + "note": "Eval-as-contract validates the model against a contract before release; the audit triple proves which validated version actually produced a given decision." |
| 930 | + }, |
| 931 | + { |
| 932 | + "pattern": "policy-as-code-gate", |
| 933 | + "relation": "uses", |
| 934 | + "note": "Policy-as-code naturally produces the policy-hash component of the triple." |
| 935 | + } |
| 936 | + ], |
| 937 | + "references": [ |
| 938 | + { |
| 939 | + "type": "blog", |
| 940 | + "title": "Agentic AIOps: KI-Agenten in kritischen Infrastrukturen", |
| 941 | + "url": "https://www.heise.de/hintergrund/Agentic-AIOps-KI-Agenten-in-kritischen-Infrastrukturen-11267508.html" |
| 942 | + }, |
| 943 | + { |
| 944 | + "type": "spec", |
| 945 | + "title": "Regulation (EU) 2024/1689 (Artificial Intelligence Act), Articles 12 and 14", |
| 946 | + "year": 2024, |
| 947 | + "url": "https://eur-lex.europa.eu/eli/reg/2024/1689/oj/eng" |
| 948 | + }, |
| 949 | + { |
| 950 | + "type": "spec", |
| 951 | + "title": "BSI AIC4 — AI Cloud Service Compliance Criteria Catalogue", |
| 952 | + "authors": "Bundesamt für Sicherheit in der Informationstechnik", |
| 953 | + "url": "https://www.bsi.bund.de/EN/Themen/Unternehmen-und-Organisationen/Informationen-und-Empfehlungen/Kuenstliche-Intelligenz/AIC4/aic4_node.html" |
| 954 | + } |
| 955 | + ], |
| 956 | + "status_in_practice": "emerging", |
| 957 | + "tags": [ |
| 958 | + "governance", |
| 959 | + "audit", |
| 960 | + "ai-act", |
| 961 | + "kritis", |
| 962 | + "nis2", |
| 963 | + "bsi", |
| 964 | + "worm", |
| 965 | + "observability", |
| 966 | + "re-executability" |
| 967 | + ], |
| 968 | + "applicability": { |
| 969 | + "use_when": [ |
| 970 | + "The deployment is in scope of EU AI Act high-risk, NIS2, BSI KRITIS, AIC4, or analogous regimes.", |
| 971 | + "Auditors or regulators have or may acquire the right to replay individual decisions.", |
| 972 | + "Model and policy artifacts can be content-addressed and retained for the regulatory horizon.", |
| 973 | + "Decisions have material consequences (financial, safety, legal) and a narrative log alone is not enough." |
| 974 | + ], |
| 975 | + "do_not_use_when": [ |
| 976 | + "The deployment is non-regulated and a standard action log meets the bar.", |
| 977 | + "Model artifacts cannot be retained (e.g. third-party API with no version-pinning guarantee) and no equivalent pinning is achievable.", |
| 978 | + "Inference is intrinsically non-deterministic and replay cannot be approximated, removing the value of pinning." |
| 979 | + ] |
| 980 | + }, |
| 981 | + "example_scenario": "A German energy operator runs an agent that helps dispatch grid-balancing actions. Each decision the agent makes is wrapped: the wrapper writes a record with the run-id, the digest of the deployed model weights, the hash of the active policy bundle, a fingerprint of the input situation, and the chosen action, into an immutable object-lock store retained for the regulatory horizon. Six months later BSI auditors pick a specific record, fetch the pinned model and policy by digest from the registry, replay the decision, and confirm the agent's verdict matches what was logged." |
| 982 | + }, |
846 | 983 | { |
847 | 984 | "id": "lineage-tracking", |
848 | 985 | "name": "Lineage Tracking", |
|
1003 | 1140 | { |
1004 | 1141 | "pattern": "shadow-canary", |
1005 | 1142 | "relation": "used-by" |
| 1143 | + }, |
| 1144 | + { |
| 1145 | + "pattern": "blind-grader-with-isolated-context", |
| 1146 | + "relation": "generalises" |
1006 | 1147 | } |
1007 | 1148 | ], |
1008 | 1149 | "references": [ |
|
1104 | 1245 | { |
1105 | 1246 | "pattern": "attention-manipulation-explainability", |
1106 | 1247 | "relation": "complements" |
| 1248 | + }, |
| 1249 | + { |
| 1250 | + "pattern": "kritis-auditable-decision-artifact", |
| 1251 | + "relation": "complements" |
1107 | 1252 | } |
1108 | 1253 | ], |
1109 | 1254 | "references": [ |
|
1206 | 1351 | { |
1207 | 1352 | "pattern": "prompt-response-optimiser", |
1208 | 1353 | "relation": "complements" |
| 1354 | + }, |
| 1355 | + { |
| 1356 | + "pattern": "agentic-context-engineering-playbook", |
| 1357 | + "relation": "complements" |
1209 | 1358 | } |
1210 | 1359 | ], |
1211 | 1360 | "references": [ |
|
1336 | 1485 | { |
1337 | 1486 | "pattern": "world-model-separation", |
1338 | 1487 | "relation": "complements" |
| 1488 | + }, |
| 1489 | + { |
| 1490 | + "pattern": "kritis-auditable-decision-artifact", |
| 1491 | + "relation": "generalises" |
1339 | 1492 | } |
1340 | 1493 | ], |
1341 | 1494 | "references": [ |
|
0 commit comments