From a1f8e4c708626a03635ef0b84ab19ea581b38289 Mon Sep 17 00:00:00 2001
From: Brian McMahon <brian@nousergon.ai>
Date: Mon, 18 May 2026 14:20:17 -0700
Subject: [PATCH 1/2] =?UTF-8?q?feat(sf):=20shell-run=20keystone=20?=
 =?UTF-8?q?=E2=80=94=20spot=20--preflight-only=20+=20Lambda=20--dry-run=20?=
 =?UTF-8?q?instead=20of=20skip?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Converts #258's pure-skip shell_run into actual boot+dry execution of the
Saturday SF workload. ApplyShellRunDefaults no longer force-sets all 16
skip_* true; it now sets a single preflight_args=" --preflight-only"
suffix var (driving the 7 spot states' States.Format command), Lambda dry
flags for the 4 verified-clean Lambda states, and hard-skips ONLY the 5
documented no-clean-dry-path exceptions. InitializeInput seeds the control
vars at non-dry identity values so the shell_run-absent path is
byte-identical (spots) / behaviourally identical (Lambdas) to today's real
Saturday run.

Invariant preserved + test-proven: shell_run absent/false ⇒ every spot
command string char-for-char unchanged (TestByteIdenticalAbsentPath
resolves the States.Array/States.Format intrinsics with preflight_args=""
and asserts equality against origin/main).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 infrastructure/step_function.json            |  70 +--
 tests/test_sf_friday_shell_run_wiring.py     | 496 ++++++++++++++++---
 tests/test_sf_morning_enrich_split_wiring.py |   7 +-
 tests/test_sf_regime_substrate_wiring.py     |  36 +-
 4 files changed, 485 insertions(+), 124 deletions(-)

diff --git a/infrastructure/step_function.json b/infrastructure/step_function.json
index 2238e7e..a2c7364 100644
--- a/infrastructure/step_function.json
+++ b/infrastructure/step_function.json
@@ -4,16 +4,16 @@
   "States": {
     "InitializeInput": {
       "Type": "Pass",
-      "Comment": "Layer defaults under the execution input so manual invocations don't have to pass every field. Scheduled EventBridge runs pass sns_topic_arn + ec2_instance_id explicitly; manual runs for partial-execution testing often omit sns_topic_arn, which previously caused HandleFailure/NotifyComplete to error on missing JSONPath. States.JsonMerge with user-input as the second arg lets explicit values win. | 2026-05-17: also stamps run_date = date($$.Execution.StartTime) so every spot stage of one execution keys backtest/{date}/ off ONE date (fixes the Backtester=05-17 vs Evaluator=05-18 UTC-midnight split). User-passed run_date still wins.",
+      "Comment": "Layer defaults under the execution input so manual invocations don't have to pass every field. Scheduled EventBridge runs pass sns_topic_arn + ec2_instance_id explicitly; manual runs for partial-execution testing often omit sns_topic_arn, which previously caused HandleFailure/NotifyComplete to error on missing JSONPath. States.JsonMerge with user-input as the second arg lets explicit values win. | 2026-05-17: also stamps run_date = date($$.Execution.StartTime) so every spot stage of one execution keys backtest/{date}/ off ONE date (fixes the Backtester=05-17 vs Evaluator=05-18 UTC-midnight split). User-passed run_date still wins. | 2026-05-18 (shell-run keystone): also seeds the dry-path control vars at their NON-DRY identity values — preflight_args=\"\" (empty spot-command suffix), research_dry=false, data_phase2_dry=false, regime_action=\"produce\". These exist on EVERY run so the spot States.Format / Lambda Payload .$ references always resolve; on the real Saturday run (no shell_run) they hold these identity values, making every spot command string byte-identical to pre-keystone and every Lambda Payload behaviourally identical (handlers default dry_run_llm/dry_run to false; regime action 'produce' is the pre-keystone hardcoded value). ApplyShellRunDefaults (shell_run=true only) overrides them with the dry values. innermost defaults blob is merged UNDER run_date which is merged UNDER sns_topic_arn which is merged UNDER the user input, so user/ApplyShellRunDefaults values always win.",
       "Parameters": {
-        "merged.$": "States.JsonMerge(States.JsonMerge(States.StringToJson('{\"sns_topic_arn\":\"arn:aws:sns:us-east-1:711398986525:alpha-engine-alerts\"}'),States.StringToJson(States.Format('\\{\"run_date\":\"{}\"\\}',States.ArrayGetItem(States.StringSplit($$.Execution.StartTime,'T'),0))),false),$$.Execution.Input,false)"
+        "merged.$": "States.JsonMerge(States.JsonMerge(States.StringToJson('{\"preflight_args\":\"\",\"research_dry\":false,\"data_phase2_dry\":false,\"regime_action\":\"produce\",\"sns_topic_arn\":\"arn:aws:sns:us-east-1:711398986525:alpha-engine-alerts\"}'),States.StringToJson(States.Format('\\{\"run_date\":\"{}\"\\}',States.ArrayGetItem(States.StringSplit($$.Execution.StartTime,'T'),0))),false),$$.Execution.Input,false)"
       },
       "OutputPath": "$.merged",
       "Next": "CheckShellRun"
     },
     "CheckShellRun": {
       "Type": "Choice",
-      "Comment": "Friday-PM 'shell run' gate (ROADMAP 'Scheduled Friday-PM shell run', spine landed feat/sf-friday-shell-run). Input {\"shell_run\": true} (supplied by the DISABLED-by-default Friday EventBridge rule alpha-engine-friday-shell-run) routes to ApplyShellRunDefaults, which layers every skip_* flag = true UNDER the user input so the entire Saturday workload is no-op'd via the EXISTING Choice-gated skip mechanism (zero new dry paths added in the spine). shell_run absent OR false routes straight to CheckSkipMorningEnrich — BYTE-IDENTICAL to the pre-spine Saturday run (this is a strict superset; the real Sat 02:00 PT firing passes no shell_run, so this Choice always takes Default for it). Per-module --preflight-only/--dry-run paths (so spots boot + smoke instead of skip) are scoped follow-on PRs per the ROADMAP owed-work — NOT in this spine.",
+      "Comment": "Friday-PM 'shell run' gate (ROADMAP 'Scheduled Friday-PM shell run'; spine landed #258, KEYSTONE landed feat/sf-shell-run-keystone). Input {\"shell_run\": true} (supplied by the DISABLED-by-default Friday EventBridge rule alpha-engine-friday-shell-run) routes to ApplyShellRunDefaults, which now sets the dry-path control vars (preflight_args=\" --preflight-only\" for the 7 spot states, Lambda dry flags for Research/DataPhase2/RegimeSubstrate/RegimeRetrospectiveEval) and hard-skips ONLY the 5 documented no-clean-dry-path exceptions — so the Friday run BOOTS + DRY-EXECUTES the workload instead of pure-skipping it (the keystone's whole point: exercise the real bootstrap/import/lib-pin/transport paths). shell_run absent OR false routes straight to CheckSkipMorningEnrich — BYTE-IDENTICAL to the pre-spine Saturday run (this is a strict superset; the real Sat 02:00 PT firing passes no shell_run, so this Choice always takes Default for it; with preflight_args defaulting to \"\" via InitializeInput every spot command string is char-for-char unchanged and every Lambda Payload behaviourally identical).",
       "Choices": [
         {
           "And": [
@@ -33,9 +33,9 @@
     },
     "ApplyShellRunDefaults": {
       "Type": "Pass",
-      "Comment": "shell_run=true ONLY. Merges every skip_* flag = true UNDER the current state (States.JsonMerge(shellDefaults, $, false) — second arg wins) so an explicit per-flag override in the execution input still takes effect (e.g. {\"shell_run\": true, \"skip_research\": false} still runs Research). Mirrors InitializeInput's defaults-under-input JsonMerge pattern exactly. Every workload state in the Saturday SF already has a Choice-gated skip_* (verified: morning_enrich, data_phase1, rag_ingestion, regime_substrate, regime_retrospective_eval, research, data_phase2, eval_judge, rationale_clustering, replay_concordance, counterfactual, predictor_training, drift_detection, backtester, parity, evaluator) so with all 16 true the SF traverses InitializeInput → (skip every workload) → SaturdayHealthCheck → WeeklySubstrateHealthCheck → Notify. The two health-check states have NO skip gate by design — they are read-only input-freshness / substrate scans and are exactly the kind of bootstrap/transport check the shell run wants exercised Friday PM (their shell_run-aware missing-Friday-bar tolerance is ROADMAP owed-work item 5, a scoped follow-on).",
+      "Comment": "shell_run=true ONLY (keystone — replaces #258's pure-skip with dry EXECUTION). Merges the dry-path control blob UNDER the current state (States.JsonMerge(shellDefaults, $, false) — second arg wins) so an explicit per-flag override in the execution input still takes effect (e.g. {\"shell_run\": true, \"skip_backtester\": true} still skips Backtester; {\"shell_run\": true, \"preflight_args\": \"\"} would run the spots full-fat). Mirrors InitializeInput's defaults-under-input JsonMerge pattern exactly. SPOT states (MorningEnrich, DataPhase1, RAGIngestion, PredictorTraining, Backtester, Parity, Evaluator) boot + run dry via preflight_args=\" --preflight-only\" (LEADING space inside the var; the spot states' final command is a States.Format whose {} is placed immediately after the mode token with NO literal space, so preflight_args=\"\" on the real run yields a byte-identical command — data #259 + predictor #175 + backtester #224 all expose --preflight-only verbatim, an orthogonal MODIFIER). LAMBDA states with a verified clean no-write dry path are routed dry, NOT skipped: Research (dry_run_llm=true — post-#195 install_dry_run_stubs no-ops archive_writer/email_sender/upload_db/write_signals_json/save_sector_team_run/save_agent_run), DataPhase2 (dry_run=true — alternative.collect returns ok_dry_run BEFORE any fetch/S3 write), RegimeSubstrate + RegimeRetrospectiveEval (action=dry_run — produce_*(write=False) returns payload before any put_object). DOCUMENTED EXCEPTIONS still hard-skipped via the #258 mechanism (no verified clean no-write dry path): skip_drift_detection (spot_drift_detection.sh has NO --preflight-only flag), skip_eval_judge (submit handler always _persist_client_side_skips + Anthropic Batch create — no handler-level dry param), skip_rationale_clustering (_persist_analysis S3 put_object is NOT gated by dry_run — only the CloudWatch metric is), skip_replay_concordance + skip_counterfactual (alpha-engine-replay-concordance/counterfactual handler source not present in any cloned repo — cannot verify a clean dry path; routing to an unverified one is forbidden). The #258 skip_* gates are LEFT INTACT and remain valid for targeted operator skips. The two health-check states have NO skip gate by design and run under shell_run (the bootstrap smoke) — their non-blocking Catch absorbs a stale-data sys.exit(1) so a missing-Friday-bar produces only a clearly-Friday-timestamped alert email, NOT a SF-fatal failure (ROADMAP owed-work item 5: a --shell-run-aware staleness tolerance in alpha-engine-dashboard/health_checker.py is a scoped cross-repo follow-on; not a SF-fatal spurious-fail, so deliberately out of this single-file SF PR).",
       "Parameters": {
-        "merged.$": "States.JsonMerge(States.StringToJson('{\"skip_morning_enrich\":true,\"skip_data_phase1\":true,\"skip_rag_ingestion\":true,\"skip_regime_substrate\":true,\"skip_regime_retrospective_eval\":true,\"skip_research\":true,\"skip_data_phase2\":true,\"skip_eval_judge\":true,\"skip_rationale_clustering\":true,\"skip_replay_concordance\":true,\"skip_counterfactual\":true,\"skip_predictor_training\":true,\"skip_drift_detection\":true,\"skip_backtester\":true,\"skip_parity\":true,\"skip_evaluator\":true}'),$,false)"
+        "merged.$": "States.JsonMerge(States.StringToJson('{\"preflight_args\":\" --preflight-only\",\"research_dry\":true,\"data_phase2_dry\":true,\"regime_action\":\"dry_run\",\"skip_drift_detection\":true,\"skip_eval_judge\":true,\"skip_rationale_clustering\":true,\"skip_replay_concordance\":true,\"skip_counterfactual\":true}'),$,false)"
       },
       "OutputPath": "$.merged",
       "Next": "CheckSkipMorningEnrich"
@@ -68,16 +68,7 @@
         "DocumentName": "AWS-RunShellScript",
         "InstanceIds.$": "$.ec2_instance_id",
         "Parameters": {
-          "commands": [
-            "set -eo pipefail",
-            "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-data pull --ff-only origin main",
-            "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-config pull --ff-only origin main",
-            "cd /home/ec2-user/alpha-engine-data",
-            "export HOME=/home/ec2-user",
-            "set -a && source /home/ec2-user/.alpha-engine.env && set +a",
-            "trap 'aws s3 cp /var/log/morning-enrich.log \"s3://alpha-engine-research/_ssm_logs/morning-enrich/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true' EXIT",
-            "bash infrastructure/spot_data_weekly.sh --morning-enrich-only 2>&1 | tee /var/log/morning-enrich.log"
-          ],
+          "commands.$": "States.Array('set -eo pipefail','sudo -u ec2-user git -C /home/ec2-user/alpha-engine-data pull --ff-only origin main','sudo -u ec2-user git -C /home/ec2-user/alpha-engine-config pull --ff-only origin main','cd /home/ec2-user/alpha-engine-data','export HOME=/home/ec2-user','set -a && source /home/ec2-user/.alpha-engine.env && set +a','trap \\'aws s3 cp /var/log/morning-enrich.log \"s3://alpha-engine-research/_ssm_logs/morning-enrich/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true\\' EXIT',States.Format('bash infrastructure/spot_data_weekly.sh --morning-enrich-only{} 2>&1 | tee /var/log/morning-enrich.log',$.preflight_args))",
           "executionTimeout": [
             "5400"
           ]
@@ -191,16 +182,7 @@
         "DocumentName": "AWS-RunShellScript",
         "InstanceIds.$": "$.ec2_instance_id",
         "Parameters": {
-          "commands": [
-            "set -eo pipefail",
-            "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-data pull --ff-only origin main",
-            "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-config pull --ff-only origin main",
-            "cd /home/ec2-user/alpha-engine-data",
-            "export HOME=/home/ec2-user",
-            "set -a && source /home/ec2-user/.alpha-engine.env && set +a",
-            "trap 'aws s3 cp /var/log/data-weekly.log \"s3://alpha-engine-research/_ssm_logs/data-weekly/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true' EXIT",
-            "bash infrastructure/spot_data_weekly.sh --phase1-only 2>&1 | tee /var/log/data-weekly.log"
-          ],
+          "commands.$": "States.Array('set -eo pipefail','sudo -u ec2-user git -C /home/ec2-user/alpha-engine-data pull --ff-only origin main','sudo -u ec2-user git -C /home/ec2-user/alpha-engine-config pull --ff-only origin main','cd /home/ec2-user/alpha-engine-data','export HOME=/home/ec2-user','set -a && source /home/ec2-user/.alpha-engine.env && set +a','trap \\'aws s3 cp /var/log/data-weekly.log \"s3://alpha-engine-research/_ssm_logs/data-weekly/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true\\' EXIT',States.Format('bash infrastructure/spot_data_weekly.sh --phase1-only{} 2>&1 | tee /var/log/data-weekly.log',$.preflight_args))",
           "executionTimeout": [
             "5400"
           ]
@@ -314,15 +296,7 @@
         "DocumentName": "AWS-RunShellScript",
         "InstanceIds.$": "$.ec2_instance_id",
         "Parameters": {
-          "commands": [
-            "set -eo pipefail",
-            "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-data pull --ff-only origin main",
-            "cd /home/ec2-user/alpha-engine-data",
-            "export HOME=/home/ec2-user",
-            "set -a && source /home/ec2-user/.alpha-engine.env && set +a",
-            "trap 'aws s3 cp /var/log/rag-ingestion.log \"s3://alpha-engine-research/_ssm_logs/rag-ingestion/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true' EXIT",
-            "bash infrastructure/spot_data_weekly.sh --rag-only 2>&1 | tee /var/log/rag-ingestion.log"
-          ],
+          "commands.$": "States.Array('set -eo pipefail','sudo -u ec2-user git -C /home/ec2-user/alpha-engine-data pull --ff-only origin main','cd /home/ec2-user/alpha-engine-data','export HOME=/home/ec2-user','set -a && source /home/ec2-user/.alpha-engine.env && set +a','trap \\'aws s3 cp /var/log/rag-ingestion.log \"s3://alpha-engine-research/_ssm_logs/rag-ingestion/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true\\' EXIT',States.Format('bash infrastructure/spot_data_weekly.sh --rag-only{} 2>&1 | tee /var/log/rag-ingestion.log',$.preflight_args))",
           "executionTimeout": [
             "3600"
           ]
@@ -435,7 +409,7 @@
       "Parameters": {
         "FunctionName": "alpha-engine-predictor-regime-substrate:live",
         "Payload": {
-          "action": "produce"
+          "action.$": "$.regime_action"
         }
       },
       "TimeoutSeconds": 360,
@@ -489,7 +463,7 @@
       "Parameters": {
         "FunctionName": "alpha-engine-predictor-regime-retrospective-eval:live",
         "Payload": {
-          "action": "produce"
+          "action.$": "$.regime_action"
         }
       },
       "TimeoutSeconds": 660,
@@ -551,7 +525,8 @@
                 "FunctionName": "alpha-engine-research-runner:live",
                 "Payload": {
                   "weekly_run": true,
-                  "force": true
+                  "force": true,
+                  "dry_run_llm.$": "$.research_dry"
                 }
               },
               "TimeoutSeconds": 900,
@@ -622,7 +597,8 @@
               "Parameters": {
                 "FunctionName": "alpha-engine-data-collector:live",
                 "Payload": {
-                  "phase": 2
+                  "phase": 2,
+                  "dry_run.$": "$.data_phase2_dry"
                 }
               },
               "TimeoutSeconds": 600,
@@ -1155,17 +1131,7 @@
                 "DocumentName": "AWS-RunShellScript",
                 "InstanceIds.$": "$.ec2_instance_id",
                 "Parameters": {
-                  "commands": [
-                    "set -eo pipefail",
-                    "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-predictor pull --ff-only origin main",
-                    "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-config pull --ff-only origin main",
-                    "sudo -u ec2-user cp /home/ec2-user/alpha-engine-config/predictor/predictor.yaml /home/ec2-user/alpha-engine-predictor/config/predictor.yaml",
-                    "cd /home/ec2-user/alpha-engine-predictor",
-                    "export HOME=/home/ec2-user",
-                    "set -a && source /home/ec2-user/.alpha-engine.env && set +a",
-                    "trap 'aws s3 cp /var/log/predictor-training.log \"s3://alpha-engine-research/_ssm_logs/predictor-training/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true' EXIT",
-                    "bash infrastructure/spot_train.sh --full-only 2>&1 | tee /var/log/predictor-training.log"
-                  ],
+                  "commands.$": "States.Array('set -eo pipefail','sudo -u ec2-user git -C /home/ec2-user/alpha-engine-predictor pull --ff-only origin main','sudo -u ec2-user git -C /home/ec2-user/alpha-engine-config pull --ff-only origin main','sudo -u ec2-user cp /home/ec2-user/alpha-engine-config/predictor/predictor.yaml /home/ec2-user/alpha-engine-predictor/config/predictor.yaml','cd /home/ec2-user/alpha-engine-predictor','export HOME=/home/ec2-user','set -a && source /home/ec2-user/.alpha-engine.env && set +a','trap \\'aws s3 cp /var/log/predictor-training.log \"s3://alpha-engine-research/_ssm_logs/predictor-training/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true\\' EXIT',States.Format('bash infrastructure/spot_train.sh --full-only{} 2>&1 | tee /var/log/predictor-training.log',$.preflight_args))",
                   "executionTimeout": [
                     "5400"
                   ]
@@ -1440,7 +1406,7 @@
           "executionTimeout": [
             "7200"
           ],
-          "commands.$": "States.Array('set -eo pipefail','sudo -u ec2-user git -C /home/ec2-user/alpha-engine-backtester pull --ff-only origin main','cd /home/ec2-user/alpha-engine-backtester','export HOME=/home/ec2-user','set -a && source /home/ec2-user/.alpha-engine.env && set +a','trap \\'aws s3 cp /var/log/backtester.log \"s3://alpha-engine-research/_ssm_logs/backtester/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true\\' EXIT',States.Format('export RUN_DATE=\\'{}\\'',$.run_date),'bash infrastructure/spot_backtest.sh --skip-stages=parity,evaluator 2>&1 | tee /var/log/backtester.log')"
+          "commands.$": "States.Array('set -eo pipefail','sudo -u ec2-user git -C /home/ec2-user/alpha-engine-backtester pull --ff-only origin main','cd /home/ec2-user/alpha-engine-backtester','export HOME=/home/ec2-user','set -a && source /home/ec2-user/.alpha-engine.env && set +a','trap \\'aws s3 cp /var/log/backtester.log \"s3://alpha-engine-research/_ssm_logs/backtester/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true\\' EXIT',States.Format('export RUN_DATE=\\'{}\\'',$.run_date),States.Format('bash infrastructure/spot_backtest.sh --skip-stages=parity,evaluator{} 2>&1 | tee /var/log/backtester.log',$.preflight_args))"
         },
         "TimeoutSeconds": 7200
       },
@@ -1556,7 +1522,7 @@
           "executionTimeout": [
             "7200"
           ],
-          "commands.$": "States.Array('set -eo pipefail','sudo -u ec2-user git -C /home/ec2-user/alpha-engine-backtester pull --ff-only origin main','cd /home/ec2-user/alpha-engine-backtester','export HOME=/home/ec2-user','set -a && source /home/ec2-user/.alpha-engine.env && set +a','trap \\'aws s3 cp /var/log/parity.log \"s3://alpha-engine-research/_ssm_logs/parity/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true\\' EXIT',States.Format('export RUN_DATE=\\'{}\\'',$.run_date),'bash infrastructure/spot_backtest.sh --skip-stages=backtest,evaluator 2>&1 | tee /var/log/parity.log')"
+          "commands.$": "States.Array('set -eo pipefail','sudo -u ec2-user git -C /home/ec2-user/alpha-engine-backtester pull --ff-only origin main','cd /home/ec2-user/alpha-engine-backtester','export HOME=/home/ec2-user','set -a && source /home/ec2-user/.alpha-engine.env && set +a','trap \\'aws s3 cp /var/log/parity.log \"s3://alpha-engine-research/_ssm_logs/parity/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true\\' EXIT',States.Format('export RUN_DATE=\\'{}\\'',$.run_date),States.Format('bash infrastructure/spot_backtest.sh --skip-stages=backtest,evaluator{} 2>&1 | tee /var/log/parity.log',$.preflight_args))"
         },
         "TimeoutSeconds": 7200
       },
@@ -1672,7 +1638,7 @@
           "executionTimeout": [
             "3600"
           ],
-          "commands.$": "States.Array('set -eo pipefail','sudo -u ec2-user git -C /home/ec2-user/alpha-engine-backtester pull --ff-only origin main','cd /home/ec2-user/alpha-engine-backtester','export HOME=/home/ec2-user','set -a && source /home/ec2-user/.alpha-engine.env && set +a','trap \\'aws s3 cp /var/log/evaluator.log \"s3://alpha-engine-research/_ssm_logs/evaluator/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true\\' EXIT',States.Format('export RUN_DATE=\\'{}\\'',$.run_date),'bash infrastructure/spot_backtest.sh --skip-stages=backtest,parity 2>&1 | tee /var/log/evaluator.log')"
+          "commands.$": "States.Array('set -eo pipefail','sudo -u ec2-user git -C /home/ec2-user/alpha-engine-backtester pull --ff-only origin main','cd /home/ec2-user/alpha-engine-backtester','export HOME=/home/ec2-user','set -a && source /home/ec2-user/.alpha-engine.env && set +a','trap \\'aws s3 cp /var/log/evaluator.log \"s3://alpha-engine-research/_ssm_logs/evaluator/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true\\' EXIT',States.Format('export RUN_DATE=\\'{}\\'',$.run_date),States.Format('bash infrastructure/spot_backtest.sh --skip-stages=backtest,parity{} 2>&1 | tee /var/log/evaluator.log',$.preflight_args))"
         },
         "TimeoutSeconds": 3600
       },
diff --git a/tests/test_sf_friday_shell_run_wiring.py b/tests/test_sf_friday_shell_run_wiring.py
index c80a7a1..bbc7a2b 100644
--- a/tests/test_sf_friday_shell_run_wiring.py
+++ b/tests/test_sf_friday_shell_run_wiring.py
@@ -1,4 +1,4 @@
-"""Pins the Friday-PM `shell_run` spine in the Saturday SF.
+"""Pins the Friday-PM `shell_run` spine + KEYSTONE in the Saturday SF.
 
 Origin: ROADMAP "Scheduled Friday-PM 'shell run' — automated full-fidelity
 preflight of the Saturday SF" (P1, added 2026-05-16). The *prevention* half
@@ -8,15 +8,28 @@
 the unattended 02:00 PT Sat run fails, wasting the week's
 research/training/backtest cycle.
 
-The spine is a STRICT SUPERSET of the pre-spine Saturday SF:
+#258 shipped the SPINE (pure-skip every workload via the existing
+Choice-gated skip mechanism). The KEYSTONE (feat/sf-shell-run-keystone)
+replaces pure-skip with dry EXECUTION so the Friday run actually boots +
+exercises the real bootstrap/import/lib-pin/transport paths:
+
+The SF is a STRICT SUPERSET of the pre-spine Saturday SF:
 - A `CheckShellRun` Choice after `InitializeInput`. `shell_run` absent OR
   false → `CheckSkipMorningEnrich` (the pre-spine `InitializeInput.Next`),
-  BYTE-IDENTICAL behaviour to today's real Saturday run.
-- `shell_run=true` → `ApplyShellRunDefaults`, a Pass that merges every
-  `skip_*` flag = true UNDER the execution input (user per-flag overrides
-  still win), then → `CheckSkipMorningEnrich`. Every workload state already
-  has a Choice-gated `skip_*`, so the whole workload no-ops via the EXISTING
-  skip mechanism (no new dry paths added in the spine).
+  BYTE-IDENTICAL behaviour to today's real Saturday run. `InitializeInput`
+  seeds the dry-path control vars at their NON-DRY identity values
+  (`preflight_args=""`, `research_dry=false`, `data_phase2_dry=false`,
+  `regime_action="produce"`) so every spot `States.Format` command string is
+  char-for-char unchanged and every Lambda Payload behaviourally identical.
+- `shell_run=true` → `ApplyShellRunDefaults`, a Pass that merges the
+  dry-path control blob UNDER the execution input (user per-flag overrides
+  still win), then → `CheckSkipMorningEnrich`. The 7 SPOT states boot + run
+  dry via `preflight_args=" --preflight-only"`; the 4 verified-clean LAMBDA
+  states (Research, DataPhase2, RegimeSubstrate, RegimeRetrospectiveEval)
+  run dry via their handler's no-write dry flag; the 5 DOCUMENTED EXCEPTION
+  states with NO verified clean dry path (DriftDetection, EvalJudge,
+  RationaleClustering, ReplayConcordance, Counterfactual) are still hard
+  skipped via the #258 skip_* mechanism (which is LEFT INTACT).
 - A `CheckShellRunNotify` Choice before the success notify. shell_run
   absent/false → the unchanged `NotifyComplete`; shell_run=true →
   `NotifyShellRunComplete` (shell-run-tagged Subject, same SNS substrate).
@@ -26,9 +39,11 @@
   `CheckSkipMorningEnrich`, silently dropping the shell-run gate.
 - Someone makes `CheckShellRun.Default` anything other than
   `CheckSkipMorningEnrich` (breaks the strict-superset / real-Saturday path).
-- The `ApplyShellRunDefaults` merge order flips so user overrides lose, or a
-  `skip_*` flag is dropped from the defaults blob (a workload state would
-  then RUN under shell_run — a side-effecting Saturday workload on a Friday).
+- The `ApplyShellRunDefaults` merge order flips so user overrides lose.
+- A spot `States.Format` command drifts so `preflight_args=""` no longer
+  produces a byte-identical command (the real Saturday run would change).
+- A documented-exception Lambda gets routed to an unverified dry path, or a
+  verified-clean Lambda silently loses its dry flag.
 - `NotifyComplete` is mutated (the real Saturday SUCCESS email changes).
 - The Friday EventBridge rule is shipped ENABLED, or without shell_run=true,
   or pointed at a different SF.
@@ -50,10 +65,10 @@
     / "alpha-engine-orchestration.yaml"
 )
 
-# The complete set of Choice-gated skip flags in the Saturday SF. If a new
-# workload state is added with a new skip_* flag, it MUST be added to
-# ApplyShellRunDefaults too (else that state RUNS under shell_run) — this
-# constant + test_shell_defaults_cover_every_skip_gate enforce that.
+# The complete set of Choice-gated skip flags in the Saturday SF. The #258
+# skip_* gates are LEFT INTACT by the keystone (they remain valid for
+# targeted operator skips) — this constant + test_skip_gates_still_intact
+# enforce that none were deleted.
 _EXPECTED_SKIPS = {
     "skip_morning_enrich",
     "skip_data_phase1",
@@ -73,6 +88,187 @@
     "skip_evaluator",
 }
 
+# KEYSTONE: the 7 SPOT workload states. Under shell_run they BOOT + run dry
+# via preflight_args=" --preflight-only" (a States.Format suffix); with
+# preflight_args="" (the real Saturday run) the command is byte-identical.
+# Maps state name → (mode token the {} immediately follows, log file).
+_SPOT_STATES = {
+    "MorningEnrich": (
+        "bash infrastructure/spot_data_weekly.sh --morning-enrich-only",
+        "/var/log/morning-enrich.log",
+    ),
+    "DataPhase1": (
+        "bash infrastructure/spot_data_weekly.sh --phase1-only",
+        "/var/log/data-weekly.log",
+    ),
+    "RAGIngestion": (
+        "bash infrastructure/spot_data_weekly.sh --rag-only",
+        "/var/log/rag-ingestion.log",
+    ),
+    "PredictorTraining": (
+        "bash infrastructure/spot_train.sh --full-only",
+        "/var/log/predictor-training.log",
+    ),
+    "Backtester": (
+        "bash infrastructure/spot_backtest.sh --skip-stages=parity,evaluator",
+        "/var/log/backtester.log",
+    ),
+    "Parity": (
+        "bash infrastructure/spot_backtest.sh --skip-stages=backtest,evaluator",
+        "/var/log/parity.log",
+    ),
+    "Evaluator": (
+        "bash infrastructure/spot_backtest.sh --skip-stages=backtest,parity",
+        "/var/log/evaluator.log",
+    ),
+}
+
+# KEYSTONE: the 4 LAMBDA states with a VERIFIED clean no-write dry path.
+# state name → (Payload key carrying the dry flag, input var it references,
+# the dry value ApplyShellRunDefaults sets, the non-dry identity default).
+_DRY_LAMBDA_STATES = {
+    "Research": ("dry_run_llm.$", "$.research_dry"),
+    "DataPhase2": ("dry_run.$", "$.data_phase2_dry"),
+    "RegimeSubstrate": ("action.$", "$.regime_action"),
+    "RegimeRetrospectiveEval": ("action.$", "$.regime_action"),
+}
+
+# KEYSTONE: the 5 documented-exception states still HARD-skipped under
+# shell_run (no verified clean no-write dry path — see PR body / the
+# ApplyShellRunDefaults Comment for the per-state reason). These are the
+# ONLY skip_* flags ApplyShellRunDefaults force-sets.
+_KEYSTONE_SKIP_EXCEPTIONS = {
+    "skip_drift_detection",
+    "skip_eval_judge",
+    "skip_rationale_clustering",
+    "skip_replay_concordance",
+    "skip_counterfactual",
+}
+
+# Dry-path control vars + their NON-DRY identity values seeded by
+# InitializeInput (so the absent path is byte-identical / behaviourally
+# identical) and the DRY values ApplyShellRunDefaults overrides them with.
+_CTRL_IDENTITY = {
+    "preflight_args": "",
+    "research_dry": False,
+    "data_phase2_dry": False,
+    "regime_action": "produce",
+}
+_CTRL_DRY = {
+    "preflight_args": " --preflight-only",
+    "research_dry": True,
+    "data_phase2_dry": True,
+    "regime_action": "dry_run",
+}
+
+
+def _eval_intrinsic_args(s: str) -> list[str]:
+    """Split a top-level comma-separated ASL-intrinsic arg list, respecting
+    single-quoted strings, nested parens, and \\' escapes."""
+    args: list[str] = []
+    depth = 0
+    i = 0
+    cur: list[str] = []
+    inq = False
+    while i < len(s):
+        c = s[i]
+        if inq:
+            if c == "\\" and i + 1 < len(s) and s[i + 1] == "'":
+                cur.append("'")
+                i += 2
+                continue
+            if c == "'":
+                inq = False
+                cur.append(c)
+                i += 1
+                continue
+            cur.append(c)
+            i += 1
+            continue
+        if c == "'":
+            inq = True
+            cur.append(c)
+            i += 1
+            continue
+        if c == "(":
+            depth += 1
+            cur.append(c)
+            i += 1
+            continue
+        if c == ")":
+            depth -= 1
+            cur.append(c)
+            i += 1
+            continue
+        if c == "," and depth == 0:
+            args.append("".join(cur).strip())
+            cur = []
+            i += 1
+            continue
+        cur.append(c)
+        i += 1
+    if cur:
+        args.append("".join(cur).strip())
+    return args
+
+
+def _eval_expr(e: str, ctx: dict):
+    """Resolve the subset of ASL intrinsics the spot commands.$ use:
+    string literals, $.var refs, States.Array(...), States.Format(...)."""
+    e = e.strip()
+    if e.startswith("'") and e.endswith("'"):
+        return e[1:-1].replace("\\'", "'")
+    if e.startswith("$."):
+        return ctx[e[2:]]
+    if e.startswith("States.Array("):
+        inner = e[len("States.Array(") : -1]
+        return [_eval_expr(a, ctx) for a in _eval_intrinsic_args(inner)]
+    if e.startswith("States.Format("):
+        inner = e[len("States.Format(") : -1]
+        parts = _eval_intrinsic_args(inner)
+        tmpl = _eval_expr(parts[0], ctx)
+        subs = [_eval_expr(p, ctx) for p in parts[1:]]
+        out: list[str] = []
+        si = 0
+        i = 0
+        while i < len(tmpl):
+            if tmpl[i : i + 2] == "{}":
+                out.append(str(subs[si]))
+                si += 1
+                i += 2
+            else:
+                out.append(tmpl[i])
+                i += 1
+        return "".join(out)
+    raise AssertionError(f"unhandled intrinsic in spot command: {e[:80]!r}")
+
+
+def _resolve_spot_commands(state: dict, preflight_args: str) -> list[str]:
+    """Resolve a spot state's commands.$ States.Array to the literal list,
+    binding $.preflight_args (and $.run_date for the RUN_DATE export)."""
+    p = state["Parameters"]["Parameters"]
+    assert "commands.$" in p, (
+        "spot state must use commands.$ States.Array (so the final entry "
+        "can be a States.Format interpolating $.preflight_args)"
+    )
+    return _eval_expr(
+        p["commands.$"],
+        {"preflight_args": preflight_args, "run_date": "2026-05-18"},
+    )
+
+
+@pytest.fixture(scope="module")
+def orig_sf() -> dict:
+    """The pre-keystone (origin/main #258) SF, for byte-identical proof."""
+    import subprocess
+
+    raw = subprocess.check_output(
+        ["git", "show", "origin/main:infrastructure/step_function.json"],
+        cwd=_REPO_ROOT,
+        text=True,
+    )
+    return json.loads(raw)
+
 
 @pytest.fixture(scope="module")
 def sf() -> dict:
@@ -167,12 +363,19 @@ def test_success_notify_gate_default_is_notify_complete(self, states):
 
 
 class TestApplyShellRunDefaults:
-    """shell_run=true ⇒ every workload state is no-op'd via the EXISTING
-    skip mechanism, and user per-flag overrides still win."""
+    """shell_run=true ⇒ spots boot dry (--preflight-only), verified-clean
+    Lambdas run dry, only the 5 documented-exception states hard-skip, and
+    user per-flag overrides still win."""
 
     def _merge_expr(self, states) -> str:
         return states["ApplyShellRunDefaults"]["Parameters"]["merged.$"]
 
+    def _blob(self, states) -> dict:
+        expr = self._merge_expr(states)
+        m = re.search(r"StringToJson\('(.+?)'\)", expr)
+        assert m, "could not extract the embedded shell-run defaults blob"
+        return json.loads(m.group(1))
+
     def test_pass_state_routes_into_existing_skip_chain(self, states):
         st = states["ApplyShellRunDefaults"]
         assert st["Type"] == "Pass"
@@ -182,32 +385,154 @@ def test_pass_state_routes_into_existing_skip_chain(self, states):
     def test_user_input_wins_over_shell_defaults(self, states):
         # States.JsonMerge(defaults, $, false) — $ (current state, carrying
         # the user input) MUST be the 2nd arg so an explicit
-        # {"shell_run": true, "skip_research": false} still runs Research.
+        # {"shell_run": true, "skip_backtester": true} still skips it.
         expr = self._merge_expr(states)
         assert expr.startswith("States.JsonMerge(States.StringToJson(")
         assert expr.endswith(",$,false)"), (
             "user input ($) must be the 2nd JsonMerge arg so explicit "
-            "per-flag overrides win over the shell-run skip defaults"
+            "per-flag overrides win over the shell-run defaults"
         )
 
-    def test_shell_defaults_blob_is_valid_json_all_true(self, states):
-        expr = self._merge_expr(states)
-        m = re.search(r"StringToJson\('(.+?)'\)", expr)
-        assert m, "could not extract the embedded skip-defaults JSON blob"
-        blob = json.loads(m.group(1))
-        assert set(blob) == _EXPECTED_SKIPS, set(blob) ^ _EXPECTED_SKIPS
-        assert all(v is True for v in blob.values()), blob
+    def test_shell_defaults_set_dry_control_vars(self, states):
+        """ApplyShellRunDefaults must set every dry-path control var to its
+        DRY value (preflight_args=' --preflight-only' with LEADING space;
+        research/data_phase2 dry true; regime_action 'dry_run')."""
+        blob = self._blob(states)
+        for k, v in _CTRL_DRY.items():
+            assert blob.get(k) == v, (
+                f"shell-run blob {k}={blob.get(k)!r}, expected {v!r}"
+            )
+        assert blob["preflight_args"].startswith(" "), (
+            "preflight_args MUST carry its leading space INSIDE the var so "
+            'the absent-path "" yields a byte-identical spot command'
+        )
+
+    def test_shell_defaults_skip_ONLY_documented_exceptions(self, states):
+        """The keystone must NOT force all 16 skip_* true (that would pure-
+        skip the workload, defeating the point). It force-sets ONLY the 5
+        documented-exception skips (states with no verified clean dry
+        path)."""
+        blob = self._blob(states)
+        skip_keys = {k for k in blob if k.startswith("skip_")}
+        assert skip_keys == _KEYSTONE_SKIP_EXCEPTIONS, (
+            "ApplyShellRunDefaults skip_* set drifted from the documented "
+            f"exceptions: {skip_keys ^ _KEYSTONE_SKIP_EXCEPTIONS}"
+        )
+        for k in _KEYSTONE_SKIP_EXCEPTIONS:
+            assert blob[k] is True, f"{k} must be force-true'd"
+        # The verified-clean workload states must NOT be skipped (they run
+        # dry) — a regression re-adding e.g. skip_research would silently
+        # demote Research from dry-execution back to pure-skip.
+        for forbidden in (
+            "skip_morning_enrich",
+            "skip_data_phase1",
+            "skip_rag_ingestion",
+            "skip_predictor_training",
+            "skip_backtester",
+            "skip_parity",
+            "skip_evaluator",
+            "skip_research",
+            "skip_data_phase2",
+            "skip_regime_substrate",
+            "skip_regime_retrospective_eval",
+        ):
+            assert forbidden not in blob, (
+                f"{forbidden} must NOT be force-skipped under the keystone "
+                "— that state runs DRY, not skipped"
+            )
 
-    def test_shell_defaults_cover_every_skip_gate(self, states):
-        """Every Choice-gated skip_* in the SF must be force-true'd by
-        shell_run — otherwise that workload state RUNS on a Friday dry-pass
-        (a side-effecting Saturday workload firing on a Friday)."""
+    def test_skip_gates_still_intact(self, states):
+        """The keystone LEAVES the #258 Choice-gated skip_* mechanism intact
+        (still valid for targeted operator skips). None deleted."""
         gated = _all_skip_gate_flags(states)
         assert gated == _EXPECTED_SKIPS, (
-            "skip-gate flags drifted from the shell-run defaults blob: "
-            f"{gated ^ _EXPECTED_SKIPS}. Add the new flag to "
-            "ApplyShellRunDefaults (and _EXPECTED_SKIPS) so the new "
-            "workload state no-ops under shell_run."
+            "a Choice-gated skip_* was deleted/added; the keystone keeps "
+            f"the #258 skip mechanism intact: {gated ^ _EXPECTED_SKIPS}"
+        )
+
+    def test_initialize_input_seeds_nondry_identity_defaults(self, states):
+        """InitializeInput must seed every dry-control var at its NON-DRY
+        identity value so the absent path is byte/behaviour-identical and
+        the spot States.Format / Lambda Payload .$ refs always resolve."""
+        expr = states["InitializeInput"]["Parameters"]["merged.$"]
+        m = re.search(r"StringToJson\('(\{[^']*?\})'\)", expr)
+        assert m, "could not extract InitializeInput defaults blob"
+        blob = json.loads(m.group(1))
+        for k, v in _CTRL_IDENTITY.items():
+            assert blob.get(k) == v, (
+                f"InitializeInput {k}={blob.get(k)!r}, expected identity "
+                f"{v!r} (absent path must be byte-identical)"
+            )
+        # The pre-keystone run_date / sns_topic_arn defaults must survive.
+        assert "sns_topic_arn" in blob
+        assert expr.endswith(",$$.Execution.Input,false)")
+
+
+class TestByteIdenticalAbsentPath:
+    """The CORE invariant (#258 established it, the keystone must preserve
+    it): shell_run absent/false ⇒ every spot command string is char-for-
+    char identical to the pre-keystone (origin/main) SF, and every Lambda
+    Payload is behaviourally identical."""
+
+    def _state(self, sf: dict, name: str) -> dict:
+        if name in sf["States"]:
+            return sf["States"][name]
+        # Parallel-branch states (Research/DataPhase2/PredictorTraining).
+        par = sf["States"]["ResearchPredictorParallel"]
+        for br in par["Branches"]:
+            if name in br["States"]:
+                return br["States"][name]
+        raise KeyError(name)
+
+    @pytest.mark.parametrize("name", sorted(_SPOT_STATES))
+    def test_spot_command_byte_identical_when_preflight_args_empty(
+        self, sf, orig_sf, name
+    ):
+        new_cmds = _resolve_spot_commands(
+            self._state(sf, name), preflight_args=""
+        )
+        orig_state = self._state(orig_sf, name)
+        op = orig_state["Parameters"]["Parameters"]
+        if "commands" in op:
+            orig_cmds = op["commands"]
+        else:
+            orig_cmds = _resolve_spot_commands(orig_state, preflight_args="")
+        assert new_cmds == orig_cmds, (
+            f"{name}: keystone changed the absent-path command — the real "
+            f"Saturday run would differ.\norig={orig_cmds[-1]!r}\n"
+            f"new ={new_cmds[-1]!r}"
+        )
+
+    @pytest.mark.parametrize("name", sorted(_SPOT_STATES))
+    def test_spot_command_carries_preflight_only_under_shell_run(
+        self, sf, name
+    ):
+        token, log = _SPOT_STATES[name]
+        cmds = _resolve_spot_commands(
+            self._state(sf, name), preflight_args=" --preflight-only"
+        )
+        final = cmds[-1]
+        # {} sits immediately after the mode token (no literal space) so the
+        # leading-space-bearing var produces exactly one separating space.
+        assert final == f"{token} --preflight-only 2>&1 | tee {log}", final
+        assert "  --preflight-only" not in final, "double space — bad join"
+
+    @pytest.mark.parametrize(
+        "name,payload_key,ref", sorted(
+            (n, k, r) for n, (k, r) in _DRY_LAMBDA_STATES.items()
+        )
+    )
+    def test_dry_lambda_payload_references_control_var(
+        self, sf, name, payload_key, ref
+    ):
+        """Verified-clean Lambdas route their dry flag via a $.var ref, so
+        the absent path (control var at non-dry identity) is behaviourally
+        identical and shell_run flips it to the dry value."""
+        st = self._state(sf, name)
+        payload = st["Parameters"]["Payload"]
+        assert payload.get(payload_key) == ref, (
+            f"{name}.Payload[{payload_key}] must be {ref} (so the dry flag "
+            f"follows the control var); got {payload.get(payload_key)!r}"
         )
 
 
@@ -235,14 +560,24 @@ def test_shell_run_notify_gate_fires_on_true(self, states):
 
 
 class TestHappyPathTraversal:
-    """End-to-end: with shell_run=true the SF must reach
-    NotifyShellRunComplete having visited NO workload Task; with shell_run
-    absent it must be the pre-spine path (visits MorningEnrich etc.)."""
-
-    def _trace_main(self, sf, states, shell_run: bool) -> list[str]:
-        inp = {"shell_run": True} if shell_run else {}
-        # ApplyShellRunDefaults force-sets all skips when shell_run=true.
-        skips = set(_EXPECTED_SKIPS) if shell_run else set()
+    """End-to-end traversal of the deterministic gates (CheckShellRun +
+    every CheckSkip<State>). Models a Task/Wait/status-Choice as "the
+    workload RUNS, then control proceeds past it" (status checks resolve to
+    their success edge on a green run). Asserts the keystone semantics:
+    under shell_run the 7 spot + 4 dry-Lambda workload gates do NOT skip
+    (their state RUNS dry), the 5 documented-exception gates DO skip, and
+    the run still reaches NotifyShellRunComplete; absent shell_run it is the
+    pre-keystone path."""
+
+    def _trace_main(self, sf, states, shell_run: bool) -> tuple:
+        """Returns (visited_state_order, skipped_workload_set). A workload
+        that RUNS is VISITED (appears in order); a skipped workload is NOT
+        (its CheckSkip gate jumps past it)."""
+        # Under shell_run ApplyShellRunDefaults force-sets ONLY the 5
+        # documented-exception skips; all other workload gates fall through
+        # to run the (dry) state.
+        skips = set(_KEYSTONE_SKIP_EXCEPTIONS) if shell_run else set()
+        skipped_workloads: set[str] = set()
         order: list[str] = []
         seen: set[str] = set()
         cur = sf["StartAt"]
@@ -253,29 +588,48 @@ def _trace_main(self, sf, states, shell_run: bool) -> list[str]:
             t = st.get("Type")
             if t == "Choice":
                 taken = None
+                success_edge = None
+                failed_guard = False
                 for c in st.get("Choices", []):
-                    conds = c.get("And") or [c]
+                    conds = c.get("And") or c.get("Or") or [c]
                     vars_ = [
                         cc.get("Variable", "").replace("$.", "")
                         for cc in conds
                         if "Variable" in cc
                     ]
-                    # shell_run gate
-                    if vars_ == ["shell_run", "shell_run"] or vars_ == [
-                        "shell_run"
-                    ]:
-                        if inp.get("shell_run") is True:
+                    if vars_ and set(vars_) == {"shell_run"}:
+                        if shell_run:
                             taken = c["Next"]
                             break
                         continue
-                    # skip_* gate
-                    if vars_ and all(v in _EXPECTED_SKIPS for v in vars_):
+                    if vars_ and all(v.startswith("skip_") for v in vars_):
                         if all(v in skips for v in vars_):
+                            # CheckSkip<X>: skip-true → Next (past X);
+                            # Default is the workload X itself, now skipped.
+                            skipped_workloads.add(st.get("Default"))
                             taken = c["Next"]
                             break
                         continue
-                    # status checks (Success edge) — not exercised on the
-                    # all-skip happy path; fall through to Default
+                    # Status-check Choice on a GREEN happy-path trace:
+                    eqs = {
+                        cc.get("StringEquals")
+                        for cc in conds
+                        if "StringEquals" in cc
+                    }
+                    if eqs & {"Success", "OK", "SKIPPED"}:
+                        # success-continuation edge
+                        success_edge = success_edge or c["Next"]
+                    if eqs & {"FAILED", "ERROR"}:
+                        # a FAILED-guard Choice (CheckBranchOutcomes shape):
+                        # on a green run it is NOT taken → Default proceeds.
+                        failed_guard = True
+                    # InProgress/Pending wait-loop edges: ignored (the poll
+                    # resolves to Success on a green run).
+                if taken is None:
+                    if success_edge is not None:
+                        taken = success_edge
+                    elif failed_guard:
+                        taken = st.get("Default")
                 cur = taken if taken else st.get("Default")
             elif t == "Parallel":
                 cur = st.get("Next")
@@ -286,37 +640,53 @@ def _trace_main(self, sf, states, shell_run: bool) -> list[str]:
                     order.append("[END]")
                     break
                 cur = st.get("Next")
-        return order
+        return order, skipped_workloads
 
-    def test_shell_run_true_reaches_shell_notify_no_workload(
+    def test_shell_run_true_runs_workloads_dry_skips_only_exceptions(
         self, sf, states
     ):
-        order = self._trace_main(sf, states, shell_run=True)
+        order, skipped = self._trace_main(sf, states, shell_run=True)
         assert order[-1] == "[END]"
+        assert "ApplyShellRunDefaults" in order
         assert "NotifyShellRunComplete" in order
         assert "NotifyComplete" not in order
-        # No side-effecting workload Task visited on the main thread.
-        for forbidden in (
+        # The 7 spot + 2 dry-Lambda main-thread workload states are NOT
+        # skipped — their CheckSkip gate falls through so the (dry) state is
+        # VISITED. (Research/DataPhase2/PredictorTraining live inside the
+        # Parallel and aren't on this main-thread trace; their dry-routing
+        # is asserted by TestByteIdenticalAbsentPath +
+        # test_shell_defaults_skip_ONLY_documented_exceptions.)
+        for ran_dry in (
             "MorningEnrich",
             "DataPhase1",
             "RAGIngestion",
-            "RegimeSubstrate",
             "Backtester",
             "Parity",
             "Evaluator",
+            "RegimeSubstrate",
+            "RegimeRetrospectiveEval",
         ):
-            assert forbidden not in order, (
-                f"{forbidden} ran under shell_run — must be skipped"
+            assert ran_dry in order, (
+                f"{ran_dry} was NOT visited under shell_run — the keystone "
+                "runs it DRY (visited), not skip (jumped past)"
+            )
+            assert ran_dry not in skipped, (
+                f"{ran_dry} was skipped under shell_run — keystone runs it "
+                "DRY"
             )
+        # The documented-exception workload states ARE skipped (jumped
+        # past) — no verified clean dry path.
+        assert "DriftDetection" in skipped
+        assert "DriftDetection" not in order
         # Health/substrate checks DO still run (the bootstrap smoke).
         assert "SaturdayHealthCheck" in order
         assert "WeeklySubstrateHealthCheck" in order
 
-    def test_shell_run_absent_is_pre_spine_path(self, sf, states):
-        order = self._trace_main(sf, states, shell_run=False)
-        # No shell_run ⇒ Default at CheckShellRun ⇒ run the real workload.
+    def test_shell_run_absent_is_pre_keystone_path(self, sf, states):
+        order, skipped = self._trace_main(sf, states, shell_run=False)
         assert "ApplyShellRunDefaults" not in order
         assert "NotifyShellRunComplete" not in order
+        assert not skipped, "nothing skipped when shell_run absent"
         assert order[: order.index("CheckSkipMorningEnrich") + 2] == [
             "InitializeInput",
             "CheckShellRun",
diff --git a/tests/test_sf_morning_enrich_split_wiring.py b/tests/test_sf_morning_enrich_split_wiring.py
index 31af8ea..fc62a78 100644
--- a/tests/test_sf_morning_enrich_split_wiring.py
+++ b/tests/test_sf_morning_enrich_split_wiring.py
@@ -26,6 +26,8 @@
 
 import pytest
 
+from tests.sf_command_utils import extract_commands
+
 
 _REPO_ROOT = Path(__file__).resolve().parent.parent
 _SF_PATH = _REPO_ROOT / "infrastructure" / "step_function.json"
@@ -166,7 +168,10 @@ class TestSsmCommandShape:
     from --data-only to --phase1-only."""
 
     def _commands(self, states, name):
-        return states[name]["Parameters"]["Parameters"]["commands"]
+        # commands.$ States.Array (keystone routed the final launch through
+        # a States.Format($.preflight_args) suffix) — resolve via the
+        # shared helper, which renders the Format element as its template.
+        return extract_commands(states[name])
 
     def test_morning_enrich_invokes_morning_enrich_only(self, states):
         joined = " ".join(self._commands(states, "MorningEnrich"))
diff --git a/tests/test_sf_regime_substrate_wiring.py b/tests/test_sf_regime_substrate_wiring.py
index 0a14c7a..a6d3876 100644
--- a/tests/test_sf_regime_substrate_wiring.py
+++ b/tests/test_sf_regime_substrate_wiring.py
@@ -56,12 +56,23 @@ def test_regime_substrate_state_exists() -> None:
 
 def test_regime_substrate_payload_is_produce_action() -> None:
     """Handler-side ``dry_run`` mode returns the payload without writing
-    to S3. Production SF must always call ``produce`` so the artifact
-    actually lands; pin to catch a misguided debugging change."""
+    to S3. Post the Friday shell-run KEYSTONE the action is routed via the
+    ``$.regime_action`` control var: InitializeInput seeds it ``"produce"``
+    (the pre-keystone hardcoded value — the real Saturday run is
+    behaviourally identical, the artifact still lands), and
+    ApplyShellRunDefaults flips it to ``"dry_run"`` ONLY under shell_run
+    (verified clean no-write dry path). Pin the routing so a misguided
+    debugging change can't hardcode dry on the production run."""
     sf = _sf()
     payload = sf["States"]["RegimeSubstrate"]["Parameters"]["Payload"]
-    assert payload.get("action") == "produce", (
-        f"RegimeSubstrate payload must be action=produce; got {payload!r}"
+    assert payload.get("action.$") == "$.regime_action", (
+        "RegimeSubstrate payload must route action via $.regime_action "
+        f"(keystone dry-routing); got {payload!r}"
+    )
+    init_expr = sf["States"]["InitializeInput"]["Parameters"]["merged.$"]
+    assert '"regime_action":"produce"' in init_expr, (
+        "InitializeInput must seed regime_action='produce' so the real "
+        "Saturday run (no shell_run) still calls produce"
     )
 
 
@@ -158,12 +169,21 @@ def test_regime_retrospective_eval_state_exists() -> None:
 
 
 def test_regime_retrospective_eval_payload_is_produce_action() -> None:
-    """Handler-side dry_run does NOT write the artifact; production
-    SF must always invoke produce so the eval artifact lands in S3."""
+    """Handler-side dry_run does NOT write the artifact. Post the Friday
+    shell-run KEYSTONE the action is routed via the ``$.regime_action``
+    control var (InitializeInput seeds ``"produce"`` → real Saturday run
+    behaviourally identical; ApplyShellRunDefaults flips to ``"dry_run"``
+    ONLY under shell_run — verified clean no-write dry path)."""
     sf = _sf()
     payload = sf["States"]["RegimeRetrospectiveEval"]["Parameters"]["Payload"]
-    assert payload.get("action") == "produce", (
-        f"RegimeRetrospectiveEval payload must be action=produce; got {payload!r}"
+    assert payload.get("action.$") == "$.regime_action", (
+        "RegimeRetrospectiveEval payload must route action via "
+        f"$.regime_action (keystone dry-routing); got {payload!r}"
+    )
+    init_expr = sf["States"]["InitializeInput"]["Parameters"]["merged.$"]
+    assert '"regime_action":"produce"' in init_expr, (
+        "InitializeInput must seed regime_action='produce' so the real "
+        "Saturday run still calls produce"
     )
 
 

From 3867565d76dd2089aa15bad03e7793fc9268057f Mon Sep 17 00:00:00 2001
From: Brian McMahon <brian@nousergon.ai>
Date: Mon, 18 May 2026 14:27:15 -0700
Subject: [PATCH 2/2] =?UTF-8?q?test(sf):=20hermetic=20byte-identical=20bas?=
 =?UTF-8?q?eline=20=E2=80=94=20fix=20CI=20(origin/main=20not=20in=20shallo?=
 =?UTF-8?q?w=20PR=20checkout)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The keystone byte-identical proof shelled out to
`git show origin/main:infrastructure/step_function.json` at test time.
GitHub Actions' shallow PR checkout has no `origin/main` local ref →
`subprocess.CalledProcessError ... exit status 128` → `test` check failed.

Replace the live-git `orig_sf` fixture with a committed frozen baseline
`tests/fixtures/sf_prekeystone_spot_commands.json` (the RESOLVED
pre-keystone spot command lists captured from origin/main; handles the
states already on commands.$ — Backtester/Parity/Evaluator). The proof
is now hermetic and still a true regression guard against the
strict-superset invariant. Docstring documents deliberate-regeneration.

Suite: 1337 passed, 1 skipped (unchanged). Keystone file 43/43.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../sf_prekeystone_spot_commands.json         | 72 +++++++++++++++++++
 tests/test_sf_friday_shell_run_wiring.py      | 34 ++++-----
 2 files changed, 89 insertions(+), 17 deletions(-)
 create mode 100644 tests/fixtures/sf_prekeystone_spot_commands.json

diff --git a/tests/fixtures/sf_prekeystone_spot_commands.json b/tests/fixtures/sf_prekeystone_spot_commands.json
new file mode 100644
index 0000000..e139ad8
--- /dev/null
+++ b/tests/fixtures/sf_prekeystone_spot_commands.json
@@ -0,0 +1,72 @@
+{
+  "Backtester": [
+    "set -eo pipefail",
+    "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-backtester pull --ff-only origin main",
+    "cd /home/ec2-user/alpha-engine-backtester",
+    "export HOME=/home/ec2-user",
+    "set -a && source /home/ec2-user/.alpha-engine.env && set +a",
+    "trap 'aws s3 cp /var/log/backtester.log \"s3://alpha-engine-research/_ssm_logs/backtester/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true' EXIT",
+    "export RUN_DATE='2026-05-18'",
+    "bash infrastructure/spot_backtest.sh --skip-stages=parity,evaluator 2>&1 | tee /var/log/backtester.log"
+  ],
+  "DataPhase1": [
+    "set -eo pipefail",
+    "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-data pull --ff-only origin main",
+    "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-config pull --ff-only origin main",
+    "cd /home/ec2-user/alpha-engine-data",
+    "export HOME=/home/ec2-user",
+    "set -a && source /home/ec2-user/.alpha-engine.env && set +a",
+    "trap 'aws s3 cp /var/log/data-weekly.log \"s3://alpha-engine-research/_ssm_logs/data-weekly/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true' EXIT",
+    "bash infrastructure/spot_data_weekly.sh --phase1-only 2>&1 | tee /var/log/data-weekly.log"
+  ],
+  "Evaluator": [
+    "set -eo pipefail",
+    "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-backtester pull --ff-only origin main",
+    "cd /home/ec2-user/alpha-engine-backtester",
+    "export HOME=/home/ec2-user",
+    "set -a && source /home/ec2-user/.alpha-engine.env && set +a",
+    "trap 'aws s3 cp /var/log/evaluator.log \"s3://alpha-engine-research/_ssm_logs/evaluator/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true' EXIT",
+    "export RUN_DATE='2026-05-18'",
+    "bash infrastructure/spot_backtest.sh --skip-stages=backtest,parity 2>&1 | tee /var/log/evaluator.log"
+  ],
+  "MorningEnrich": [
+    "set -eo pipefail",
+    "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-data pull --ff-only origin main",
+    "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-config pull --ff-only origin main",
+    "cd /home/ec2-user/alpha-engine-data",
+    "export HOME=/home/ec2-user",
+    "set -a && source /home/ec2-user/.alpha-engine.env && set +a",
+    "trap 'aws s3 cp /var/log/morning-enrich.log \"s3://alpha-engine-research/_ssm_logs/morning-enrich/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true' EXIT",
+    "bash infrastructure/spot_data_weekly.sh --morning-enrich-only 2>&1 | tee /var/log/morning-enrich.log"
+  ],
+  "Parity": [
+    "set -eo pipefail",
+    "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-backtester pull --ff-only origin main",
+    "cd /home/ec2-user/alpha-engine-backtester",
+    "export HOME=/home/ec2-user",
+    "set -a && source /home/ec2-user/.alpha-engine.env && set +a",
+    "trap 'aws s3 cp /var/log/parity.log \"s3://alpha-engine-research/_ssm_logs/parity/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true' EXIT",
+    "export RUN_DATE='2026-05-18'",
+    "bash infrastructure/spot_backtest.sh --skip-stages=backtest,evaluator 2>&1 | tee /var/log/parity.log"
+  ],
+  "PredictorTraining": [
+    "set -eo pipefail",
+    "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-predictor pull --ff-only origin main",
+    "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-config pull --ff-only origin main",
+    "sudo -u ec2-user cp /home/ec2-user/alpha-engine-config/predictor/predictor.yaml /home/ec2-user/alpha-engine-predictor/config/predictor.yaml",
+    "cd /home/ec2-user/alpha-engine-predictor",
+    "export HOME=/home/ec2-user",
+    "set -a && source /home/ec2-user/.alpha-engine.env && set +a",
+    "trap 'aws s3 cp /var/log/predictor-training.log \"s3://alpha-engine-research/_ssm_logs/predictor-training/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true' EXIT",
+    "bash infrastructure/spot_train.sh --full-only 2>&1 | tee /var/log/predictor-training.log"
+  ],
+  "RAGIngestion": [
+    "set -eo pipefail",
+    "sudo -u ec2-user git -C /home/ec2-user/alpha-engine-data pull --ff-only origin main",
+    "cd /home/ec2-user/alpha-engine-data",
+    "export HOME=/home/ec2-user",
+    "set -a && source /home/ec2-user/.alpha-engine.env && set +a",
+    "trap 'aws s3 cp /var/log/rag-ingestion.log \"s3://alpha-engine-research/_ssm_logs/rag-ingestion/$(date -u +%Y-%m-%d)/$(hostname)-$(date -u +%H%M%SZ).log\" --only-show-errors || true' EXIT",
+    "bash infrastructure/spot_data_weekly.sh --rag-only 2>&1 | tee /var/log/rag-ingestion.log"
+  ]
+}
diff --git a/tests/test_sf_friday_shell_run_wiring.py b/tests/test_sf_friday_shell_run_wiring.py
index bbc7a2b..e0b645d 100644
--- a/tests/test_sf_friday_shell_run_wiring.py
+++ b/tests/test_sf_friday_shell_run_wiring.py
@@ -258,16 +258,21 @@ def _resolve_spot_commands(state: dict, preflight_args: str) -> list[str]:
 
 
 @pytest.fixture(scope="module")
-def orig_sf() -> dict:
-    """The pre-keystone (origin/main #258) SF, for byte-identical proof."""
-    import subprocess
-
-    raw = subprocess.check_output(
-        ["git", "show", "origin/main:infrastructure/step_function.json"],
-        cwd=_REPO_ROOT,
-        text=True,
-    )
-    return json.loads(raw)
+def orig_spot_cmds() -> dict:
+    """Frozen pre-keystone (#258 origin/main) RESOLVED spot command lists.
+
+    Captured at commit time into a committed fixture so the byte-identical
+    proof is HERMETIC. The prior implementation shelled out to
+    `git show origin/main:infrastructure/step_function.json` at test time,
+    which fails (exit 128) in CI's shallow PR checkout where `origin/main`
+    is not a local ref — that was the keystone CI failure.
+
+    Regenerate ONLY on a deliberate, reviewed change to a spot state's
+    absent-path (`preflight_args=""`) command, by re-extracting the
+    resolved spot commands from the new `origin/main` SF.
+    """
+    p = _REPO_ROOT / "tests" / "fixtures" / "sf_prekeystone_spot_commands.json"
+    return json.loads(p.read_text())
 
 
 @pytest.fixture(scope="module")
@@ -486,17 +491,12 @@ def _state(self, sf: dict, name: str) -> dict:
 
     @pytest.mark.parametrize("name", sorted(_SPOT_STATES))
     def test_spot_command_byte_identical_when_preflight_args_empty(
-        self, sf, orig_sf, name
+        self, sf, orig_spot_cmds, name
     ):
         new_cmds = _resolve_spot_commands(
             self._state(sf, name), preflight_args=""
         )
-        orig_state = self._state(orig_sf, name)
-        op = orig_state["Parameters"]["Parameters"]
-        if "commands" in op:
-            orig_cmds = op["commands"]
-        else:
-            orig_cmds = _resolve_spot_commands(orig_state, preflight_args="")
+        orig_cmds = orig_spot_cmds[name]
         assert new_cmds == orig_cmds, (
             f"{name}: keystone changed the absent-path command — the real "
             f"Saturday run would differ.\norig={orig_cmds[-1]!r}\n"