From bb24060f2ac3dc946cbd9aa076e552e081079e06 Mon Sep 17 00:00:00 2001 From: Henry Priest Date: Thu, 9 Oct 2025 13:43:31 -0500 Subject: [PATCH 1/7] Create apptainer-sipnet-carb.yml --- .github/workflows/apptainer-sipnet-carb.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/workflows/apptainer-sipnet-carb.yml diff --git a/.github/workflows/apptainer-sipnet-carb.yml b/.github/workflows/apptainer-sipnet-carb.yml new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/.github/workflows/apptainer-sipnet-carb.yml @@ -0,0 +1 @@ + From 83a23da5ee0b30126f9710b9a049f81a8ab763c5 Mon Sep 17 00:00:00 2001 From: Henry Priest Date: Thu, 11 Dec 2025 13:19:21 -0600 Subject: [PATCH 2/7] Create run-workflow-examples.yml --- .github/workflows/run-workflow-examples.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/workflows/run-workflow-examples.yml diff --git a/.github/workflows/run-workflow-examples.yml b/.github/workflows/run-workflow-examples.yml new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/.github/workflows/run-workflow-examples.yml @@ -0,0 +1 @@ + From c0f8255ddff2b56b34815fbb92e74b0ba747b51c Mon Sep 17 00:00:00 2001 From: Henry Priest Date: Fri, 20 Feb 2026 11:52:10 -0600 Subject: [PATCH 3/7] Add first iteration of workflow CLI with config files and data prep shell --- 2a_grass/00_fetch_s3_and_prepare_run_dir.sh | 113 +++++++ 2a_grass/example_user_config.yaml | 29 ++ 2a_grass/workflow_manifest.yaml | 87 +++++ magic-ensemble | 338 ++++++++++++++++++++ 4 files changed, 567 insertions(+) create mode 100755 2a_grass/00_fetch_s3_and_prepare_run_dir.sh create mode 100644 2a_grass/example_user_config.yaml create mode 100644 2a_grass/workflow_manifest.yaml create mode 100755 magic-ensemble diff --git a/2a_grass/00_fetch_s3_and_prepare_run_dir.sh b/2a_grass/00_fetch_s3_and_prepare_run_dir.sh new file mode 100755 index 0000000..16a1172 --- /dev/null +++ b/2a_grass/00_fetch_s3_and_prepare_run_dir.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +# 00_fetch_s3_and_prepare_run_dir.sh: fetch demo data from S3 and prepare run directory. +# Invoked by the 'get-demo-data' command (for users who do not have local data). +# All configuration is read from the workflow manifest or from environment variables set by the CLI. +# +# Required env (from CLI): +# RUN_DIR run directory (e.g. 2a_grass/run), relative to REPO_ROOT +# REPO_ROOT repo root (workflows directory) +# MANIFEST path to workflow_manifest.yaml +# COMMAND command name (e.g. get-demo-data) +# STEP_INDEX step index in that command (e.g. 0) +# +# Requires: yq (mikefarah/yq), aws CLI + +set -euo pipefail + +RUN_DIR="${RUN_DIR:?RUN_DIR is required}" +REPO_ROOT="${REPO_ROOT:?REPO_ROOT is required}" +MANIFEST="${MANIFEST:?MANIFEST is required}" +COMMAND="${COMMAND:-prepare}" +STEP_INDEX="${STEP_INDEX:-0}" + +if [[ ! -f "$MANIFEST" ]]; then + echo "00_fetch_s3_and_prepare_run_dir: Manifest not found: $MANIFEST" >&2 + exit 1 +fi + +if ! command -v yq &>/dev/null; then + echo "00_fetch_s3_and_prepare_run_dir: yq is required to read the manifest." >&2 + exit 1 +fi + +cd "$REPO_ROOT" + +# Resolve a path relative to run_dir (RUN_DIR may be absolute or relative to REPO_ROOT). +resolve_run_path() { + if [[ "$RUN_DIR" == /* ]]; then + echo "${RUN_DIR}/${1}" + else + echo "${REPO_ROOT}/${RUN_DIR}/${1}" + fi +} + +# --- Read from manifest --- +s3_endpoint=$(yq eval '.s3.endpoint_url' "$MANIFEST") + +# Artifact: url + filename from s3.artifact_02 +artifact_url=$(yq eval '.s3.artifact_02.url' "$MANIFEST") +artifact_filename=$(yq eval '.s3.artifact_02.filename' "$MANIFEST") +artifact_s3_uri="${artifact_url}/${artifact_filename}" + +# LandTrendr TIFs: two S3 resources and two local path segments from paths.landtrendr_raw_files +median_url=$(yq eval '.s3.median_tif.url' "$MANIFEST") +median_filename=$(yq eval '.s3.median_tif.filename' "$MANIFEST") +stdv_url=$(yq eval '.s3.stdv_tif.url' "$MANIFEST") +stdv_filename=$(yq eval '.s3.stdv_tif.filename' "$MANIFEST") +median_s3_uri="${median_url}/${median_filename}" +stdv_s3_uri="${stdv_url}/${stdv_filename}" + +landtrendr_paths_raw=$(yq eval '.paths.landtrendr_raw_files' "$MANIFEST") +# Split comma-separated; first segment = median, second = stdv +landtrendr_segment_1="${landtrendr_paths_raw%%,*}" +landtrendr_segment_2="${landtrendr_paths_raw#*,}" + +# Output path keys for this step: create these dirs (from manifest step.outputs) +output_keys=$(yq eval '.steps["'"$COMMAND"'"] | .['"$STEP_INDEX"'].outputs | .[]' "$MANIFEST" 2>/dev/null || true) + +# --- Create run directory and output dirs from manifest --- +echo "00_fetch_s3_and_prepare_run_dir: Creating run directory and output dirs from manifest" +mkdir -p "$RUN_DIR" + +while IFS= read -r path_key; do + [[ -z "$path_key" ]] && continue + path_value=$(yq eval '.paths["'"$path_key"'"]' "$MANIFEST" 2>/dev/null) + [[ -z "$path_value" || "$path_value" == "null" ]] && continue + resolved=$(resolve_run_path "$path_value") + mkdir -p "$resolved" +done <<< "$output_keys" + +# --- Download and extract artifact --- +if [[ -f "$artifact_filename" ]]; then + echo "00_fetch_s3_and_prepare_run_dir: Artifact tarball already present: $artifact_filename" +else + echo "00_fetch_s3_and_prepare_run_dir: Downloading artifact from S3" + aws s3 cp --endpoint-url "$s3_endpoint" "$artifact_s3_uri" "./$artifact_filename" +fi + +RUN_DIR_ABS=$(if [[ "$RUN_DIR" = /* ]]; then echo "$RUN_DIR"; else echo "$REPO_ROOT/$RUN_DIR"; fi) +echo "00_fetch_s3_and_prepare_run_dir: Extracting artifact into run directory" +tar -xzf "$artifact_filename" -C "$RUN_DIR_ABS" + +# --- Download LandTrendr TIFs if not present (paths from manifest: first=median, second=stdv) --- +seg1=$(echo "$landtrendr_segment_1" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') +seg2=$(echo "$landtrendr_segment_2" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + +download_tif() { + local seg="$1" + local s3_uri="$2" + local label="$3" + [[ -z "$seg" ]] && return 0 + resolved=$(resolve_run_path "$seg") + if [[ -f "$resolved" ]]; then + echo "00_fetch_s3_and_prepare_run_dir: Already present: $resolved" + else + mkdir -p "$(dirname "$resolved")" + echo "00_fetch_s3_and_prepare_run_dir: Downloading $label from S3" + aws s3 cp --endpoint-url "$s3_endpoint" "$s3_uri" "$resolved" + fi +} +download_tif "$seg1" "$median_s3_uri" "median TIF" +download_tif "$seg2" "$stdv_s3_uri" "stdv TIF" + +echo "00_fetch_s3_and_prepare_run_dir: Done." diff --git a/2a_grass/example_user_config.yaml b/2a_grass/example_user_config.yaml new file mode 100644 index 0000000..f1fcbfb --- /dev/null +++ b/2a_grass/example_user_config.yaml @@ -0,0 +1,29 @@ +# Example user-facing config for 2a_grass workflows. +# Pass with: ./magic-ensemble --config workflows/2a_grass/example_user_config.yaml +# +# This file contains only overridable settings. Fixed paths, S3 resources, and +# step I/O are defined in workflow_manifest.yaml (do not put those here). + +# Run directory: where outputs and run-specific data live. +# Relative to the CWD where you invoke the CLI, unless you use an absolute path. +run_dir: "2a_grass/run" + +# Dates used by prepare and run-ensembles. +start_date: "2016-01-01" +end_date: "2023-12-31" +run_LAI_date: "2016-07-01" + +# Ensemble sizes. +n_ens: 20 +n_met: 10 +ic_ensemble_size: 100 + +# Parallelism (e.g. for step 01 --n_cores). +n_workers: 1 + +# Optional: distributed compute adapter (for future use with Slurm/Apptainer). +# distributed_compute_adapter: +# name: "localhost" +# qsub: "sbatch -J @NAME@ -o @STDOUT@ -e @STDERR@ apptainer run ./sipnet-carb_develop.sif" +# qsub_jobid: "Submitted batch job ([0-9]+)" +# qstat: 'if test -z "$(squeue -h -j @JOBID@)"; then echo "DONE"; fi' diff --git a/2a_grass/workflow_manifest.yaml b/2a_grass/workflow_manifest.yaml new file mode 100644 index 0000000..ef48585 --- /dev/null +++ b/2a_grass/workflow_manifest.yaml @@ -0,0 +1,87 @@ +# Workflow manifest: fixed paths and step I/O (internal, not user-facing). +# CLI loads this automatically; do not pass via --config. +# +# Paths: All entries under 'paths' are inside the run directory (no paths outside run_dir). +# Keys are referenced by name in steps (inputs/outputs). At runtime the CLI resolves +# each path as run_dir + "/" + value. +# +# Steps: Each command has a list of step objects. Each step has: +# script: R script path (relative to repo root) +# r_libraries: R packages to check before running this script +# inputs: List of path keys (from 'paths') this script reads (local paths only) +# outputs: List of path keys this script creates or writes + +# S3 resources (not in user config). Remote resources are localized before R runs. +s3: + endpoint_url: "https://s3.garage.ccmmf.ncsa.cloud" + bucket: "carb" + artifact_02: + url: "s3://carb/data/workflows/phase_2a" + filename: "ccmmf_phase_2a_input_artifacts.tgz" + median_tif: + url: "s3://carb/data_raw" + filename: "ca_biomassfiaald_2016_median.tif" + stdv_tif: + url: "s3://carb/data_raw" + filename: "ca_biomassfiaald_2016_stdv.tif" + +# Apptainer (not in user config) +apptainer: + remote: + url: "docker://hdpriest0uiuc/" + container: + name: "sipnet-carb" + tag: "develop" + sif: "sipnet-carb_develop.sif" + +# Path definitions: all contained within the run directory. +# Values are relative to run_dir; CLI resolves as run_dir + "/" + value. +paths: + site_info_file: "site_info.csv" + site_sipnet_met_path: "data/ERA5_SIPNET" + site_era5_path: "data_raw/ERA5_nc" + field_shape_path: "data_raw/dwr_map/i15_Crop_Mapping_2018.gdb" + data_dir: "data/IC_prep" + ic_outdir: "IC_files" + pft_dir: "pfts" + landtrendr_raw_files: "data_raw/ca_biomassfiaald_2016_median.tif,data_raw/ca_biomassfiaald_2016_stdv.tif" + site_file: "site_info.csv" + template_file: "template.xml" + output_file: "settings.xml" + met_dir: "data/ERA5_SIPNET" + ic_dir: "IC_files" + settings_xml: "settings.xml" + +# Fixed workflow values (not user overrides) +params_from_pft: "SLA,leafC" +additional_params: "varname=wood_carbon_fraction,distn=norm,parama=0.48,paramb=0.005" + +# Steps per command: script path, R libs to check (empty for shell scripts), input/output path keys +steps: + get-demo-data: + - script: "2a_grass/00_fetch_s3_and_prepare_run_dir.sh" + r_libraries: [] + inputs: [] + outputs: [data_dir, ic_outdir, site_sipnet_met_path] + + prepare: + - script: "2a_grass/01_ERA5_nc_to_clim.R" + r_libraries: [future, furrr] + inputs: [site_info_file, site_era5_path] + outputs: [site_sipnet_met_path] + + - script: "2a_grass/02_ic_build.R" + r_libraries: [tidyverse] + inputs: [site_info_file, field_shape_path, pft_dir, data_dir, landtrendr_raw_files] + outputs: [ic_outdir, data_dir] + + - script: "2a_grass/03_xml_build.R" + r_libraries: [PEcAn.settings] + inputs: [site_file, template_file, ic_dir, met_dir] + outputs: [output_file] + + run-ensembles: + - script: "2a_grass/04_run_model.R" + r_libraries: [PEcAn.all] + inputs: [settings_xml] + outputs: [] diff --git a/magic-ensemble b/magic-ensemble new file mode 100755 index 0000000..96c05df --- /dev/null +++ b/magic-ensemble @@ -0,0 +1,338 @@ +#!/usr/bin/env bash +# magic-ensemble: minimal CLI for workflows (2a_grass). +# Usage: ./magic-ensemble [--dry-run] [--verbose] [--config ] +# Commands: help | get-demo-data | prepare | run-ensembles + +set -euo pipefail + +# --- Repo root, manifest, and invocation CWD --- +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$SCRIPT_DIR" +MANIFEST="${REPO_ROOT}/2a_grass/workflow_manifest.yaml" +INVOCATION_CWD="${INVOCATION_CWD:-$(pwd)}" + +usage() { + cat <<'EOF' +Usage: ./magic-ensemble [global options] + +Commands: + help Print this usage and help (no scripts run). + get-demo-data Fetch demo data from S3 and create run directory (for users without local data). + prepare Run preparation steps: 01 (ERA5→clim), 02 (IC build), 03 (XML build). + run-ensembles Run step 04 (run model) using existing settings.xml and prepared inputs. + +Global options (after command): + --dry-run Do not run R scripts; print what would be run and run pre-execution checks. + --verbose Echo each Rscript command before running. + --config Path to user YAML config (overridable keys only; fixed paths are in workflow manifest). + +Examples: + ./magic-ensemble help + ./magic-ensemble get-demo-data --config my_config.yaml + ./magic-ensemble prepare --dry-run + ./magic-ensemble prepare --config my_config.yaml --verbose + ./magic-ensemble run-ensembles --config my_config.yaml +EOF +} + +# --- Require yq (mikefarah/yq, jq-style) --- +require_yq() { + if ! command -v yq &>/dev/null; then + echo "magic-ensemble: yq is required to read YAML. Install mikefarah/yq: https://github.com/mikefarah/yq" >&2 + exit 1 + fi + if ! yq eval '.' "$MANIFEST" &>/dev/null; then + echo "magic-ensemble: Could not parse manifest with yq. This CLI requires mikefarah/yq (jq-style). Your 'yq' may be a different implementation." >&2 + exit 1 + fi +} + +# --- Parse arguments: command first, then global options --- +COMMAND="" +DRY_RUN=0 +VERBOSE=0 +CONFIG_FILE="" +while [[ $# -gt 0 ]]; do + case "$1" in + help|get-demo-data|prepare|run-ensembles) + if [[ -z "$COMMAND" ]]; then COMMAND="$1"; shift; continue; fi + ;; + --dry-run) DRY_RUN=1; shift; continue ;; + --verbose) VERBOSE=1; shift; continue ;; + --config) + if [[ $# -lt 2 ]]; then echo "magic-ensemble: --config requires ." >&2; usage >&2; exit 1; fi + CONFIG_FILE="$2"; shift 2; continue + ;; + -*) + echo "magic-ensemble: Unknown option: $1" >&2; usage >&2; exit 1 + ;; + *) + if [[ -z "$COMMAND" ]]; then COMMAND="$1"; shift; continue; fi + echo "magic-ensemble: Unexpected argument: $1" >&2; usage >&2; exit 1 + ;; + esac + shift +done + +# --- Help or no command --- +if [[ -z "$COMMAND" || "$COMMAND" == "help" ]]; then + usage + exit 0 +fi + +if [[ "$COMMAND" != "get-demo-data" && "$COMMAND" != "prepare" && "$COMMAND" != "run-ensembles" ]]; then + echo "magic-ensemble: Unknown command: $COMMAND" >&2 + usage >&2 + exit 1 +fi + +require_yq +if [[ ! -f "$MANIFEST" ]]; then + echo "magic-ensemble: Workflow manifest not found: $MANIFEST" >&2 + exit 1 +fi + +# --- Load effective config: manifest + optional user overrides --- +# User config may contain: run_dir, start_date, end_date, run_LAI_date, n_ens, n_met, ic_ensemble_size, n_workers +get_val() { + local key="$1" + local from_manifest="$2" + if [[ -n "$CONFIG_FILE" && -f "$CONFIG_FILE" ]]; then + local u + u=$(yq eval ".$key // .paths.$key // .dates.$key // .ensemble.$key // empty" "$CONFIG_FILE" 2>/dev/null) + if [[ -n "$u" && "$u" != "null" ]]; then + echo "$u" + return + fi + fi + echo "$from_manifest" +} + +# Read manifest paths and fixed values +p_site_info_file=$(yq eval '.paths.site_info_file' "$MANIFEST") +p_site_sipnet_met_path=$(yq eval '.paths.site_sipnet_met_path' "$MANIFEST") +p_site_era5_path=$(yq eval '.paths.site_era5_path' "$MANIFEST") +p_field_shape_path=$(yq eval '.paths.field_shape_path' "$MANIFEST") +p_data_dir=$(yq eval '.paths.data_dir' "$MANIFEST") +p_ic_outdir=$(yq eval '.paths.ic_outdir' "$MANIFEST") +p_pft_dir=$(yq eval '.paths.pft_dir' "$MANIFEST") +p_landtrendr_raw_files=$(yq eval '.paths.landtrendr_raw_files' "$MANIFEST") +p_site_file=$(yq eval '.paths.site_file' "$MANIFEST") +p_template_file=$(yq eval '.paths.template_file' "$MANIFEST") +p_output_file=$(yq eval '.paths.output_file' "$MANIFEST") +p_met_dir=$(yq eval '.paths.met_dir' "$MANIFEST") +p_ic_dir=$(yq eval '.paths.ic_dir' "$MANIFEST") +p_settings_xml=$(yq eval '.paths.settings_xml' "$MANIFEST") +params_from_pft=$(yq eval '.params_from_pft' "$MANIFEST") +additional_params=$(yq eval '.additional_params' "$MANIFEST") + +# Overridable defaults (manifest may not have these; use script defaults if not in user config) +run_dir_default="magic-ensemble-run-directory/" +start_date_default="2016-01-01" +end_date_default="2023-12-31" +run_LAI_date_default="2016-07-01" +n_ens_default="20" +n_met_default="10" +ic_ensemble_size_default="100" +n_workers_default="1" + +run_dir=$(get_val "run_dir" "$run_dir_default") +# If run_dir is not absolute, resolve relative to CWD where the CLI was invoked +if [[ "$run_dir" != /* ]]; then + run_dir="${INVOCATION_CWD}/${run_dir}" +fi +start_date=$(get_val "start_date" "$start_date_default") +end_date=$(get_val "end_date" "$end_date_default") +run_LAI_date=$(get_val "run_LAI_date" "$run_LAI_date_default") +n_ens=$(get_val "n_ens" "$n_ens_default") +n_met=$(get_val "n_met" "$n_met_default") +ic_ensemble_size=$(get_val "ic_ensemble_size" "$ic_ensemble_size_default") +n_workers=$(get_val "n_workers" "$n_workers_default") + +# Resolve manifest paths relative to run_dir (then relative to repo root). +# Effective path = run_dir / manifest_path so R (CWD=REPO_ROOT) sees the correct file. +resolve_path() { echo "${run_dir}/${1}"; } +site_info_file=$(resolve_path "$p_site_info_file") +site_sipnet_met_path=$(resolve_path "$p_site_sipnet_met_path") +site_era5_path=$(resolve_path "$p_site_era5_path") +field_shape_path=$(resolve_path "$p_field_shape_path") +data_dir=$(resolve_path "$p_data_dir") +ic_outdir=$(resolve_path "$p_ic_outdir") +pft_dir=$(resolve_path "$p_pft_dir") +landtrendr_raw_files=$(resolve_path "$p_landtrendr_raw_files") +site_file=$(resolve_path "$p_site_file") +template_file=$(resolve_path "$p_template_file") +output_file=$(resolve_path "$p_output_file") +met_dir=$(resolve_path "$p_met_dir") +ic_dir=$(resolve_path "$p_ic_dir") +settings_xml=$(resolve_path "$p_settings_xml") +# landtrendr_raw_files is comma-separated; resolve each segment +landtrendr_raw_files="" +while IFS= read -r segment; do + segment=$(echo "$segment" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + [[ -z "$segment" ]] && continue + [[ -n "$landtrendr_raw_files" ]] && landtrendr_raw_files="${landtrendr_raw_files}," + landtrendr_raw_files="${landtrendr_raw_files}${run_dir}/${segment}" +done < <(yq eval '.paths.landtrendr_raw_files' "$MANIFEST" | tr ',' '\n') + +# --- Pre-execution: AWS S3 tools check --- +check_aws() { + if ! command -v aws &>/dev/null; then + echo "magic-ensemble: AWS CLI (aws) not found on PATH; required for S3 access." >&2 + exit 1 + fi +} + +# --- Get list of script paths for current command (from manifest steps) --- +get_steps() { + yq eval '.steps["'"$COMMAND"'"] | .[].script' "$MANIFEST" +} + +# --- Populate STEPS array for current command (from manifest) --- +get_steps_array() { + STEPS=() + while IFS= read -r s; do + [[ -n "$s" ]] && STEPS+=("$s") + done < <(get_steps) +} + +# --- R library check for step at index i (reads r_libraries from manifest step; skip if empty or .sh) --- +check_r_libs_for_step() { + local i="$1" + local script="${STEPS[i]}" + [[ "$script" == *.sh ]] && return 0 + local lib + while IFS= read -r lib; do + [[ -z "$lib" || "$lib" == "null" ]] && continue + if ! (cd "$REPO_ROOT" && Rscript -e "library(\"$lib\")") 2>/dev/null; then + echo "magic-ensemble: R library check failed: library(\"$lib\") not available. Install it or activate the correct environment." >&2 + exit 1 + fi + done < <(yq eval '.steps["'"$COMMAND"'"] | .['"$i"'].r_libraries | .[]?' "$MANIFEST" 2>/dev/null || true) +} + +# --- Dry-run: print scripts and optionally run checks --- +do_dry_run() { + echo "magic-ensemble: dry-run for command: $COMMAND" + echo "Would run the following scripts (CWD = $REPO_ROOT):" + while IFS= read -r script; do + [[ -z "$script" ]] && continue + script_path="${REPO_ROOT}/${script}" + if [[ -f "$script_path" ]]; then + echo " - $script (exists)" + else + echo " - $script (MISSING)" + fi + done < <(get_steps) + echo "" + echo "Pre-execution checks (R libraries, AWS CLI) can be run when not in dry-run." + exit 0 +} + +# --- Run R script with args; CWD = REPO_ROOT --- +run_script() { + local script="$1" + shift + local script_path="${REPO_ROOT}/${script}" + if [[ ! -f "$script_path" ]]; then + echo "magic-ensemble: Script not found: $script_path" >&2 + exit 1 + fi + if [[ $VERBOSE -eq 1 ]]; then + echo "Rscript $script_path $*" >&2 + fi + (cd "$REPO_ROOT" && Rscript "$script_path" "$@") +} + +# --- Run shell script; CWD = REPO_ROOT. Pass COMMAND and STEP_INDEX for manifest lookups. --- +run_shell_script() { + local script="$1" + local step_index="${2:-0}" + local script_path="${REPO_ROOT}/${script}" + if [[ ! -f "$script_path" ]]; then + echo "magic-ensemble: Script not found: $script_path" >&2 + exit 1 + fi + if [[ $VERBOSE -eq 1 ]]; then + echo "RUN_DIR=$run_dir REPO_ROOT=$REPO_ROOT MANIFEST=$MANIFEST COMMAND=$COMMAND STEP_INDEX=$step_index bash $script_path" >&2 + fi + (cd "$REPO_ROOT" && RUN_DIR="$run_dir" REPO_ROOT="$REPO_ROOT" MANIFEST="$MANIFEST" COMMAND="$COMMAND" STEP_INDEX="$step_index" bash "$script_path") +} + +# --- Get-demo-data: run steps from manifest (shell script only) --- +run_get_demo_data() { + get_steps_array + check_aws + for i in "${!STEPS[@]}"; do + check_r_libs_for_step "$i" + run_shell_script "${STEPS[i]}" "$i" + done +} + +# --- Prepare: run steps from manifest (01, 02, 03 with R args) --- +run_prepare() { + get_steps_array + check_aws + for i in "${!STEPS[@]}"; do + check_r_libs_for_step "$i" + done + + for i in "${!STEPS[@]}"; do + case "$i" in + 0) run_script "${STEPS[i]}" \ + --site_era5_path "$site_era5_path" \ + --site_sipnet_met_path "$site_sipnet_met_path" \ + --site_info_file "$site_info_file" \ + --start_date "$start_date" \ + --end_date "$end_date" \ + --n_cores "$n_workers" \ + --parallel_strategy "multisession" ;; + 1) run_script "${STEPS[i]}" \ + --site_info_path "$site_info_file" \ + --field_shape_path "$field_shape_path" \ + --ic_ensemble_size "$ic_ensemble_size" \ + --run_start_date "$start_date" \ + --run_LAI_date "$run_LAI_date" \ + --ic_outdir "$ic_outdir" \ + --data_dir "$data_dir" \ + --pft_dir "$pft_dir" \ + --params_read_from_pft "$params_from_pft" \ + --landtrendr_raw_files "$landtrendr_raw_files" \ + --additional_params "$additional_params" ;; + 2) run_script "${STEPS[i]}" \ + --n_ens "$n_ens" \ + --n_met "$n_met" \ + --start_date "$start_date" \ + --end_date "$end_date" \ + --ic_dir "$ic_dir" \ + --met_dir "$met_dir" \ + --site_file "$site_file" \ + --template_file "$template_file" \ + --output_file "$output_file" ;; + *) echo "magic-ensemble: No argument mapping for prepare step index $i" >&2; exit 1 ;; + esac + done +} + +# --- Run-ensembles: run single step from manifest (04) --- +run_run_ensembles() { + get_steps_array + check_aws + check_r_libs_for_step 0 + + run_script "${STEPS[0]}" \ + --settings "$settings_xml" \ + --continue "FALSE" +} + +# --- Main --- +if [[ $DRY_RUN -eq 1 ]]; then + do_dry_run +fi + +case "$COMMAND" in + get-demo-data) run_get_demo_data ;; + prepare) run_prepare ;; + run-ensembles) run_run_ensembles ;; + *) echo "magic-ensemble: Unknown command: $COMMAND" >&2; exit 1 ;; +esac From f6f0f7bd5cbb4957ee7c39cbc1ff9d8fc753e463 Mon Sep 17 00:00:00 2001 From: Henry Priest Date: Fri, 20 Feb 2026 16:06:45 -0600 Subject: [PATCH 4/7] Enhance run directory handling in scripts: update magic-ensemble to display run directory during dry-run and modify 00_fetch_s3_and_prepare_run_dir.sh to resolve and use absolute run directory for artifact downloads and extractions. --- 2a_grass/00_fetch_s3_and_prepare_run_dir.sh | 20 +++++++++++--------- magic-ensemble | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/2a_grass/00_fetch_s3_and_prepare_run_dir.sh b/2a_grass/00_fetch_s3_and_prepare_run_dir.sh index 16a1172..d4a4bb9 100755 --- a/2a_grass/00_fetch_s3_and_prepare_run_dir.sh +++ b/2a_grass/00_fetch_s3_and_prepare_run_dir.sh @@ -65,9 +65,12 @@ landtrendr_segment_2="${landtrendr_paths_raw#*,}" # Output path keys for this step: create these dirs (from manifest step.outputs) output_keys=$(yq eval '.steps["'"$COMMAND"'"] | .['"$STEP_INDEX"'].outputs | .[]' "$MANIFEST" 2>/dev/null || true) +# --- Resolve absolute run directory (for downloads and extract) --- +RUN_DIR_ABS=$(if [[ "$RUN_DIR" = /* ]]; then echo "$RUN_DIR"; else echo "$REPO_ROOT/$RUN_DIR"; fi) + # --- Create run directory and output dirs from manifest --- echo "00_fetch_s3_and_prepare_run_dir: Creating run directory and output dirs from manifest" -mkdir -p "$RUN_DIR" +mkdir -p "$RUN_DIR_ABS" while IFS= read -r path_key; do [[ -z "$path_key" ]] && continue @@ -77,17 +80,16 @@ while IFS= read -r path_key; do mkdir -p "$resolved" done <<< "$output_keys" -# --- Download and extract artifact --- -if [[ -f "$artifact_filename" ]]; then - echo "00_fetch_s3_and_prepare_run_dir: Artifact tarball already present: $artifact_filename" +# --- Download artifact tarball into run directory and extract --- +artifact_local="${RUN_DIR_ABS}/${artifact_filename}" +if [[ -f "$artifact_local" ]]; then + echo "00_fetch_s3_and_prepare_run_dir: Artifact tarball already present in run dir: $artifact_local" else - echo "00_fetch_s3_and_prepare_run_dir: Downloading artifact from S3" - aws s3 cp --endpoint-url "$s3_endpoint" "$artifact_s3_uri" "./$artifact_filename" + echo "00_fetch_s3_and_prepare_run_dir: Downloading artifact from S3 into run directory" + aws s3 cp --endpoint-url "$s3_endpoint" "$artifact_s3_uri" "$artifact_local" fi - -RUN_DIR_ABS=$(if [[ "$RUN_DIR" = /* ]]; then echo "$RUN_DIR"; else echo "$REPO_ROOT/$RUN_DIR"; fi) echo "00_fetch_s3_and_prepare_run_dir: Extracting artifact into run directory" -tar -xzf "$artifact_filename" -C "$RUN_DIR_ABS" +tar -xzf "$artifact_local" -C "$RUN_DIR_ABS" # --- Download LandTrendr TIFs if not present (paths from manifest: first=median, second=stdv) --- seg1=$(echo "$landtrendr_segment_1" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') diff --git a/magic-ensemble b/magic-ensemble index 96c05df..bd534e8 100755 --- a/magic-ensemble +++ b/magic-ensemble @@ -214,6 +214,7 @@ check_r_libs_for_step() { # --- Dry-run: print scripts and optionally run checks --- do_dry_run() { echo "magic-ensemble: dry-run for command: $COMMAND" + echo "Run directory (for this execution): $run_dir" echo "Would run the following scripts (CWD = $REPO_ROOT):" while IFS= read -r script; do [[ -z "$script" ]] && continue From 2a4c4a647921e4555cf21fb41d5f2335c14e85fd Mon Sep 17 00:00:00 2001 From: Henry Priest Date: Wed, 18 Mar 2026 19:41:16 +0000 Subject: [PATCH 5/7] Add Apptainer support, dispatch configuration, and external input staging to workflow CLI - magic-ensemble: --config is now required; supports use_apptainer (run prepare steps inside a container) and pecan_dispatch (select how ensemble members are submitted/executed) - workflow_manifest.yaml: defines available dispatch modes (local-gnu-parallel, slurm-dispatch) with appropriate host XML for native and apptainer execution; S3 resources consolidated - Prep scripts: accept CLI flags instead of env vars; stage user-provided external files (e.g. template.xml) into the run directory before prepare steps run - tools/patch_xml.py: utility to patch elements in PEcAn XML config files in-place - 01_ERA5_nc_to_clim.R: ERA5 met inputs now looked up by grid cell center rather than site id - example_user_config.yaml: documents new user-facing options (use_apptainer, pecan_dispatch, external_paths) Relates to: https://github.com/orgs/ccmmf/discussions/182 --- .gitignore | 2 + 2a_grass/00_fetch_s3_and_prepare_run_dir.sh | 154 +++++++-- 2a_grass/00_stage_external_inputs.sh | 194 +++++++++++ 2a_grass/01_ERA5_nc_to_clim.R | 21 +- 2a_grass/example_user_config.yaml | 28 +- 2a_grass/template.xml | 14 +- 2a_grass/workflow_manifest.yaml | 70 +++- magic-ensemble | 354 +++++++++++++++----- tools/patch_xml.py | 97 ++++++ 9 files changed, 798 insertions(+), 136 deletions(-) create mode 100644 2a_grass/00_stage_external_inputs.sh create mode 100644 tools/patch_xml.py diff --git a/.gitignore b/.gitignore index 0558174..540bcd8 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ # data files not marked test **/IC_files/** **/data/** +**/data_raw/** **/pfts/** # R-specific files @@ -43,3 +44,4 @@ Thumbs.db # Temporary files *.tmp *.log +**/local_dev_notes/** diff --git a/2a_grass/00_fetch_s3_and_prepare_run_dir.sh b/2a_grass/00_fetch_s3_and_prepare_run_dir.sh index d4a4bb9..13afd3a 100755 --- a/2a_grass/00_fetch_s3_and_prepare_run_dir.sh +++ b/2a_grass/00_fetch_s3_and_prepare_run_dir.sh @@ -1,24 +1,98 @@ #!/usr/bin/env bash # 00_fetch_s3_and_prepare_run_dir.sh: fetch demo data from S3 and prepare run directory. # Invoked by the 'get-demo-data' command (for users who do not have local data). -# All configuration is read from the workflow manifest or from environment variables set by the CLI. -# -# Required env (from CLI): -# RUN_DIR run directory (e.g. 2a_grass/run), relative to REPO_ROOT -# REPO_ROOT repo root (workflows directory) -# MANIFEST path to workflow_manifest.yaml -# COMMAND command name (e.g. get-demo-data) -# STEP_INDEX step index in that command (e.g. 0) +# S3 URLs and path keys come from the workflow manifest; run dir and paths are passed as arguments. # # Requires: yq (mikefarah/yq), aws CLI +# +# Options (see --help): --repo-root (required); --manifest optional, defaults to /2a_grass/workflow_manifest.yaml set -euo pipefail -RUN_DIR="${RUN_DIR:?RUN_DIR is required}" -REPO_ROOT="${REPO_ROOT:?REPO_ROOT is required}" -MANIFEST="${MANIFEST:?MANIFEST is required}" -COMMAND="${COMMAND:-prepare}" -STEP_INDEX="${STEP_INDEX:-0}" +usage() { + cat <<'EOF' +Usage: 00_fetch_s3_and_prepare_run_dir.sh [OPTIONS] + +Fetch demo data from S3 and prepare the run directory. S3 URLs and path keys are +read from the workflow manifest. Run directory is either from --run-dir or from +run_dir in the file given by --config (relative paths resolved with --invocation-cwd). + +Required: + --repo-root PATH Repo root (workflows directory). Script changes to this directory. + +Run directory (one of): + --run-dir PATH Run directory (absolute, or relative to --repo-root). + --config PATH User YAML config file; script reads run_dir from it (use with --invocation-cwd). + +Optional: + --manifest PATH Path to workflow_manifest.yaml (default: /2a_grass/workflow_manifest.yaml). + --invocation-cwd PATH Required when using --config with a relative run_dir. Paths reported relative to this. + --command NAME Command name for manifest step lookup (default: get-demo-data). + --step-index N Step index in that command (default: 0). + -h, --help Print this help and exit. +EOF +} + +RUN_DIR="" +CONFIG_FILE="" +REPO_ROOT="" +MANIFEST="" +COMMAND="get-demo-data" +STEP_INDEX="0" +INVOCATION_CWD="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --run-dir) [[ $# -lt 2 ]] && { echo "00_fetch_s3_and_prepare_run_dir: --run-dir requires PATH." >&2; usage >&2; exit 1; }; RUN_DIR="$2"; shift 2 ;; + --config) [[ $# -lt 2 ]] && { echo "00_fetch_s3_and_prepare_run_dir: --config requires PATH." >&2; usage >&2; exit 1; }; CONFIG_FILE="$2"; shift 2 ;; + --repo-root) [[ $# -lt 2 ]] && { echo "00_fetch_s3_and_prepare_run_dir: --repo-root requires PATH." >&2; usage >&2; exit 1; }; REPO_ROOT="$2"; shift 2 ;; + --manifest) [[ $# -lt 2 ]] && { echo "00_fetch_s3_and_prepare_run_dir: --manifest requires PATH." >&2; usage >&2; exit 1; }; MANIFEST="$2"; shift 2 ;; + --command) [[ $# -lt 2 ]] && { echo "00_fetch_s3_and_prepare_run_dir: --command requires NAME." >&2; usage >&2; exit 1; }; COMMAND="$2"; shift 2 ;; + --step-index) [[ $# -lt 2 ]] && { echo "00_fetch_s3_and_prepare_run_dir: --step-index requires N." >&2; usage >&2; exit 1; }; STEP_INDEX="$2"; shift 2 ;; + --invocation-cwd) [[ $# -lt 2 ]] && { echo "00_fetch_s3_and_prepare_run_dir: --invocation-cwd requires PATH." >&2; usage >&2; exit 1; }; INVOCATION_CWD="$2"; shift 2 ;; + -h|--help) usage; exit 0 ;; + *) echo "00_fetch_s3_and_prepare_run_dir: Unknown option: $1" >&2; usage >&2; exit 1 ;; + esac +done + +if [[ -z "$REPO_ROOT" ]]; then echo "00_fetch_s3_and_prepare_run_dir: --repo-root is required." >&2; usage >&2; exit 1; fi +if [[ -z "$MANIFEST" ]]; then + MANIFEST="${REPO_ROOT}/2a_grass/workflow_manifest.yaml" +fi + +# Run directory: from --run-dir or from config file +if [[ -n "$CONFIG_FILE" ]]; then + if [[ ! -f "$CONFIG_FILE" ]]; then + echo "00_fetch_s3_and_prepare_run_dir: Config file not found: $CONFIG_FILE" >&2 + exit 1 + fi + RUN_DIR=$(yq eval '.run_dir' "$CONFIG_FILE") || { echo "00_fetch_s3_and_prepare_run_dir: yq failed to read .run_dir from config: $CONFIG_FILE" >&2; exit 1; } + if [[ -z "$RUN_DIR" || "$RUN_DIR" == "null" ]]; then + echo "00_fetch_s3_and_prepare_run_dir: run_dir not found or empty in config (expected .run_dir): $CONFIG_FILE" >&2 + exit 1 + fi + if [[ "$RUN_DIR" != /* ]]; then + if [[ -z "$INVOCATION_CWD" ]]; then + echo "00_fetch_s3_and_prepare_run_dir: --invocation-cwd is required when run_dir in config is relative." >&2 + exit 1 + fi + RUN_DIR="${INVOCATION_CWD}/${RUN_DIR}" + fi +elif [[ -z "$RUN_DIR" ]]; then + echo "00_fetch_s3_and_prepare_run_dir: Provide --run-dir or --config (with run_dir in the config file)." >&2 + usage >&2 + exit 1 +fi + +# Show path for user: relative to INVOCATION_CWD if under it, else absolute +report_path() { + local abs_path="$1" + if [[ -n "$INVOCATION_CWD" && "$abs_path" == "$INVOCATION_CWD"/* ]]; then + echo "${abs_path#"$INVOCATION_CWD"/}" + else + echo "$abs_path" + fi +} if [[ ! -f "$MANIFEST" ]]; then echo "00_fetch_s3_and_prepare_run_dir: Manifest not found: $MANIFEST" >&2 @@ -41,21 +115,37 @@ resolve_run_path() { fi } -# --- Read from manifest --- +# --- Read from manifest (endpoint, bucket, and per-resource key_prefix + filename) --- s3_endpoint=$(yq eval '.s3.endpoint_url' "$MANIFEST") +s3_bucket=$(yq eval '.s3.bucket' "$MANIFEST") + +# Build S3 key from key_prefix + filename (key_prefix may be empty or null from yq) +s3_key() { + local prefix="$1" + local name="$2" + [[ "$prefix" == "null" || -z "$prefix" ]] && prefix="" + if [[ -n "$prefix" ]]; then + echo "${prefix}/${name}" + else + echo "$name" + fi +} -# Artifact: url + filename from s3.artifact_02 -artifact_url=$(yq eval '.s3.artifact_02.url' "$MANIFEST") +# Artifact: bucket + key from s3.artifact_02 +artifact_key_prefix=$(yq eval '.s3.artifact_02.key_prefix' "$MANIFEST") artifact_filename=$(yq eval '.s3.artifact_02.filename' "$MANIFEST") -artifact_s3_uri="${artifact_url}/${artifact_filename}" +artifact_s3_key=$(s3_key "$artifact_key_prefix" "$artifact_filename") +artifact_s3_uri="s3://${s3_bucket}/${artifact_s3_key}" -# LandTrendr TIFs: two S3 resources and two local path segments from paths.landtrendr_raw_files -median_url=$(yq eval '.s3.median_tif.url' "$MANIFEST") +# LandTrendr TIFs: bucket + key from s3.median_tif and s3.stdv_tif +median_key_prefix=$(yq eval '.s3.median_tif.key_prefix' "$MANIFEST") median_filename=$(yq eval '.s3.median_tif.filename' "$MANIFEST") -stdv_url=$(yq eval '.s3.stdv_tif.url' "$MANIFEST") +stdv_key_prefix=$(yq eval '.s3.stdv_tif.key_prefix' "$MANIFEST") stdv_filename=$(yq eval '.s3.stdv_tif.filename' "$MANIFEST") -median_s3_uri="${median_url}/${median_filename}" -stdv_s3_uri="${stdv_url}/${stdv_filename}" +median_s3_key=$(s3_key "$median_key_prefix" "$median_filename") +stdv_s3_key=$(s3_key "$stdv_key_prefix" "$stdv_filename") +median_s3_uri="s3://${s3_bucket}/${median_s3_key}" +stdv_s3_uri="s3://${s3_bucket}/${stdv_s3_key}" landtrendr_paths_raw=$(yq eval '.paths.landtrendr_raw_files' "$MANIFEST") # Split comma-separated; first segment = median, second = stdv @@ -68,9 +158,11 @@ output_keys=$(yq eval '.steps["'"$COMMAND"'"] | .['"$STEP_INDEX"'].outputs | .[] # --- Resolve absolute run directory (for downloads and extract) --- RUN_DIR_ABS=$(if [[ "$RUN_DIR" = /* ]]; then echo "$RUN_DIR"; else echo "$REPO_ROOT/$RUN_DIR"; fi) -# --- Create run directory and output dirs from manifest --- +# --- Create run directory and canonicalize so paths have no ".." (clean aws/tar output) --- echo "00_fetch_s3_and_prepare_run_dir: Creating run directory and output dirs from manifest" mkdir -p "$RUN_DIR_ABS" +RUN_DIR_ABS=$(cd "$RUN_DIR_ABS" && pwd) +RUN_DIR="$RUN_DIR_ABS" while IFS= read -r path_key; do [[ -z "$path_key" ]] && continue @@ -82,11 +174,13 @@ done <<< "$output_keys" # --- Download artifact tarball into run directory and extract --- artifact_local="${RUN_DIR_ABS}/${artifact_filename}" +artifact_report=$(report_path "$artifact_local") if [[ -f "$artifact_local" ]]; then - echo "00_fetch_s3_and_prepare_run_dir: Artifact tarball already present in run dir: $artifact_local" + echo "00_fetch_s3_and_prepare_run_dir: Artifact tarball already present in run dir: $artifact_report" else echo "00_fetch_s3_and_prepare_run_dir: Downloading artifact from S3 into run directory" - aws s3 cp --endpoint-url "$s3_endpoint" "$artifact_s3_uri" "$artifact_local" + echo "00_fetch_s3_and_prepare_run_dir: Saving to: $artifact_report" + (cd "$RUN_DIR_ABS" && aws s3 cp --endpoint-url "$s3_endpoint" "$artifact_s3_uri" "$artifact_filename") fi echo "00_fetch_s3_and_prepare_run_dir: Extracting artifact into run directory" tar -xzf "$artifact_local" -C "$RUN_DIR_ABS" @@ -102,11 +196,15 @@ download_tif() { [[ -z "$seg" ]] && return 0 resolved=$(resolve_run_path "$seg") if [[ -f "$resolved" ]]; then - echo "00_fetch_s3_and_prepare_run_dir: Already present: $resolved" + echo "00_fetch_s3_and_prepare_run_dir: Already present: $(report_path "$resolved")" else - mkdir -p "$(dirname "$resolved")" + local dest_dir dest_name + dest_dir=$(dirname "$resolved") + dest_name=$(basename "$resolved") + mkdir -p "$dest_dir" echo "00_fetch_s3_and_prepare_run_dir: Downloading $label from S3" - aws s3 cp --endpoint-url "$s3_endpoint" "$s3_uri" "$resolved" + echo "00_fetch_s3_and_prepare_run_dir: Saving to: $(report_path "$resolved")" + (cd "$dest_dir" && aws s3 cp --endpoint-url "$s3_endpoint" "$s3_uri" "$dest_name") fi } download_tif "$seg1" "$median_s3_uri" "median TIF" diff --git a/2a_grass/00_stage_external_inputs.sh b/2a_grass/00_stage_external_inputs.sh new file mode 100644 index 0000000..51617ef --- /dev/null +++ b/2a_grass/00_stage_external_inputs.sh @@ -0,0 +1,194 @@ +#!/usr/bin/env bash +# 00_stage_external_inputs.sh: create run directory and stage user external inputs. +# Invoked as step 00 of the 'prepare' command. It: +# - Ensures the run directory exists. +# - Copies user-provided external files (from config.external_paths) into +# the run directory so they are available to the workflow. +# +# Requires: yq (mikefarah/yq) +# +# Options (see --help): --repo-root (required); --manifest optional, currently +# unused for staging; defaults to /2a_grass/workflow_manifest.yaml. +# Run directory is either from --run-dir or from run_dir in the file given by +# --config (relative paths resolved with --invocation-cwd). external_paths +# entries are resolved from --invocation-cwd when relative. + +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: 00_stage_external_inputs.sh [OPTIONS] + +Create the run directory (if needed) and copy user-provided external files +from the config's external_paths section into the run directory so they are +available to the workflow. + +Required: + --repo-root PATH Repo root (workflows directory). Script changes to this directory. + +Run directory (one of): + --run-dir PATH Run directory (absolute, or relative to --repo-root). + --config PATH User YAML config file; script reads run_dir from it (use with --invocation-cwd). + +Optional: + --manifest PATH Path to workflow_manifest.yaml (default: /2a_grass/workflow_manifest.yaml). (Currently unused.) + --invocation-cwd PATH Required when using --config with a relative run_dir or relative external_paths. + -h, --help Print this help and exit. +EOF +} + +RUN_DIR="" +CONFIG_FILE="" +REPO_ROOT="" +MANIFEST="" +INVOCATION_CWD="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --run-dir) + [[ $# -lt 2 ]] && { echo "00_stage_external_inputs: --run-dir requires PATH." >&2; usage >&2; exit 1; } + RUN_DIR="$2"; shift 2 ;; + --config) + [[ $# -lt 2 ]] && { echo "00_stage_external_inputs: --config requires PATH." >&2; usage >&2; exit 1; } + CONFIG_FILE="$2"; shift 2 ;; + --repo-root) + [[ $# -lt 2 ]] && { echo "00_stage_external_inputs: --repo-root requires PATH." >&2; usage >&2; exit 1; } + REPO_ROOT="$2"; shift 2 ;; + --manifest) + [[ $# -lt 2 ]] && { echo "00_stage_external_inputs: --manifest requires PATH." >&2; usage >&2; exit 1; } + MANIFEST="$2"; shift 2 ;; + --invocation-cwd) + [[ $# -lt 2 ]] && { echo "00_stage_external_inputs: --invocation-cwd requires PATH." >&2; usage >&2; exit 1; } + INVOCATION_CWD="$2"; shift 2 ;; + -h|--help) + usage; exit 0 ;; + *) + echo "00_stage_external_inputs: Unknown option: $1" >&2 + usage >&2 + exit 1 ;; + esac +done + +if [[ -z "$REPO_ROOT" ]]; then + echo "00_stage_external_inputs: --repo-root is required." >&2 + usage >&2 + exit 1 +fi + +if [[ -z "$MANIFEST" ]]; then + MANIFEST="${REPO_ROOT}/2a_grass/workflow_manifest.yaml" +fi + +# Run directory: from --run-dir or from config file +if [[ -n "$CONFIG_FILE" ]]; then + if [[ ! -f "$CONFIG_FILE" ]]; then + echo "00_stage_external_inputs: Config file not found: $CONFIG_FILE" >&2 + exit 1 + fi + RUN_DIR=$(yq eval '.run_dir' "$CONFIG_FILE") || { + echo "00_stage_external_inputs: yq failed to read .run_dir from config: $CONFIG_FILE" >&2 + exit 1 + } + if [[ -z "$RUN_DIR" || "$RUN_DIR" == "null" ]]; then + echo "00_stage_external_inputs: run_dir not found or empty in config (expected .run_dir): $CONFIG_FILE" >&2 + exit 1 + fi + if [[ "$RUN_DIR" != /* ]]; then + if [[ -z "$INVOCATION_CWD" ]]; then + echo "00_stage_external_inputs: --invocation-cwd is required when run_dir in config is relative." >&2 + exit 1 + fi + RUN_DIR="${INVOCATION_CWD}/${RUN_DIR}" + fi +elif [[ -z "$RUN_DIR" ]]; then + echo "00_stage_external_inputs: Provide --run-dir or --config (with run_dir in the config file)." >&2 + usage >&2 + exit 1 +fi + +if [[ ! -f "$MANIFEST" ]]; then + echo "00_stage_external_inputs: Manifest not found: $MANIFEST" >&2 + exit 1 +fi + +if ! command -v yq &>/dev/null; then + echo "00_stage_external_inputs: yq is required to read the manifest and config." >&2 + exit 1 +fi + +cd "$REPO_ROOT" + +# Show path for user: relative to INVOCATION_CWD if under it, else absolute +report_path() { + local abs_path="$1" + if [[ -n "$INVOCATION_CWD" && "$abs_path" == "$INVOCATION_CWD"/* ]]; then + echo "${abs_path#"$INVOCATION_CWD"/}" + else + echo "$abs_path" + fi +} + +# Resolve an absolute run directory for staging. +RUN_DIR_ABS=$(if [[ "$RUN_DIR" = /* ]]; then echo "$RUN_DIR"; else echo "$REPO_ROOT/$RUN_DIR"; fi) + +echo "00_stage_external_inputs: Ensuring run directory exists" +mkdir -p "$RUN_DIR_ABS" +RUN_DIR_ABS=$(cd "$RUN_DIR_ABS" && pwd) +RUN_DIR="$RUN_DIR_ABS" +echo "00_stage_external_inputs: Run directory: $(report_path "$RUN_DIR_ABS")" + +# If no config or no external_paths, nothing more to do. +if [[ -z "$CONFIG_FILE" || ! -f "$CONFIG_FILE" ]]; then + echo "00_stage_external_inputs: No config file provided; only run directory was created." + echo "00_stage_external_inputs: Done." + exit 0 +fi + +# external_paths is a mapping from arbitrary keys to source file paths. +# We do not depend on manifest paths here; we simply copy each source file +# into the run directory (flattened by basename). +# Parse the YAML block output of .external_paths line by line (yq v4 outputs plain +# scalars without quotes). Split on first ": " to get key and value. +external_block=$(yq eval '.external_paths' "$CONFIG_FILE" 2>/dev/null || echo "null") +if [[ -z "$external_block" || "$external_block" == "null" || "$external_block" == "{}" ]]; then + echo "00_stage_external_inputs: No external_paths configured; nothing to copy." + echo "00_stage_external_inputs: Done." + exit 0 +fi + +echo "00_stage_external_inputs: Staging external inputs into run directory" + +while IFS= read -r line; do + [[ -z "$line" ]] && continue + # Split on first ": " — key is everything before, value everything after. + key="${line%%: *}" + src="${line#*: }" + [[ -z "$key" || "$key" == "$line" ]] && continue # no ": " found + [[ -z "$src" || "$src" == "null" ]] && continue + # Strip surrounding quotes that yq may preserve from the YAML source. + src="${src#\"}" ; src="${src%\"}" + + # Resolve source: absolute as-is, relative to INVOCATION_CWD otherwise. + if [[ "$src" != /* ]]; then + if [[ -z "$INVOCATION_CWD" ]]; then + echo "00_stage_external_inputs: --invocation-cwd is required when external_paths entries are relative." >&2 + exit 1 + fi + src="${INVOCATION_CWD}/${src}" + fi + if [[ ! -f "$src" ]]; then + echo "00_stage_external_inputs: external_paths.${key}: source file not found: ${src}" >&2 + exit 1 + fi + + # Destination: copy into the run directory using the source basename. + dest="${RUN_DIR_ABS}/$(basename "$src")" + dest_dir=$(dirname "$dest") + mkdir -p "$dest_dir" + + echo "00_stage_external_inputs: Copying $(report_path "$src") -> $(report_path "$dest")" + cp -f "$src" "$dest" +done <<< "$external_block" + +echo "00_stage_external_inputs: Done." + diff --git a/2a_grass/01_ERA5_nc_to_clim.R b/2a_grass/01_ERA5_nc_to_clim.R index 0fc5e51..56ebf16 100755 --- a/2a_grass/01_ERA5_nc_to_clim.R +++ b/2a_grass/01_ERA5_nc_to_clim.R @@ -73,21 +73,36 @@ file_info <- site_info |> dplyr::rename(site_id = id) |> dplyr::cross_join(data.frame(ens_id = 1:10)) +# stopifnot( +# length(unique(file_info$id)) == nrow(file_info), +# all(file_info$lat > 0), # just to simplify grid naming below +# all(file_info$lon < 0) +# ) +file_info <- file_info |> + dplyr::mutate( + # match locations to half-degree ERA5 grid cell centers + # CAUTION: Calculation only correct when all lats are N and all lons are W! + ERA5_grid_cell = paste0( + ((lat + 0.25) %/% 0.5) * 0.5, "N_", + ((abs(lon) + 0.25) %/% 0.5) * 0.5, "W" + ) + ) if (!dir.exists(args$site_sipnet_met_path)) { dir.create(args$site_sipnet_met_path, recursive = TRUE) } furrr::future_pwalk( file_info, - function(site_id, start_date, end_date, ens_id, ...) { + function(site_id, start_date, end_date, ens_id, ERA5_grid_cell, ...) { PEcAn.SIPNET::met2model.SIPNET( in.path = file.path( args$site_era5_path, - paste("ERA5", site_id, ens_id, sep = "_") + # paste("ERA5", site_id, ens_id, sep = "_") + paste("ERA5", ERA5_grid_cell, ens_id, sep = "_") ), start_date = args$start_date, end_date = args$end_date, in.prefix = paste0("ERA5.", ens_id), - outfolder = file.path(args$site_sipnet_met_path, site_id) + outfolder = file.path(args$site_sipnet_met_path, ERA5_grid_cell) ) } ) diff --git a/2a_grass/example_user_config.yaml b/2a_grass/example_user_config.yaml index f1fcbfb..05fc1fe 100644 --- a/2a_grass/example_user_config.yaml +++ b/2a_grass/example_user_config.yaml @@ -1,12 +1,13 @@ # Example user-facing config for 2a_grass workflows. # Pass with: ./magic-ensemble --config workflows/2a_grass/example_user_config.yaml # -# This file contains only overridable settings. Fixed paths, S3 resources, and -# step I/O are defined in workflow_manifest.yaml (do not put those here). +# This file contains only overridable settings and user-provided external +# resources. Fixed paths, S3 resources, and step I/O are defined in +# workflow_manifest.yaml and are not overridden here. # Run directory: where outputs and run-specific data live. # Relative to the CWD where you invoke the CLI, unless you use an absolute path. -run_dir: "2a_grass/run" +run_dir: "config-based-rundir/" # Dates used by prepare and run-ensembles. start_date: "2016-01-01" @@ -18,12 +19,21 @@ n_ens: 20 n_met: 10 ic_ensemble_size: 100 +# User-provided external resources. + +# Absolute paths are used as-is; relative paths are resolved from the +# directory where you invoke ./magic-ensemble. +# paths in external_paths will be localized to the run_dir before the workflow is run. +external_paths: + template_file: "2a_grass/template.xml" + # Parallelism (e.g. for step 01 --n_cores). n_workers: 1 -# Optional: distributed compute adapter (for future use with Slurm/Apptainer). -# distributed_compute_adapter: -# name: "localhost" -# qsub: "sbatch -J @NAME@ -o @STDOUT@ -e @STDERR@ apptainer run ./sipnet-carb_develop.sif" -# qsub_jobid: "Submitted batch job ([0-9]+)" -# qstat: 'if test -z "$(squeue -h -j @JOBID@)"; then echo "DONE"; fi' +# Run prepare steps inside Apptainer (single image from workflow manifest; pull-if-not-present). +# When true: 'module load apptainer' is attempted, then apptainer must be on PATH; SIF is +# pulled from manifest remote if not present. No user override of remote; local image only. +use_apptainer: true + +# Switch dispatch method for parallel execution (local-gnu-parallel, slurm-dispatch, etc.). +pecan_dispatch: local-gnu-parallel diff --git a/2a_grass/template.xml b/2a_grass/template.xml index 6d91f23..75c2c24 100644 --- a/2a_grass/template.xml +++ b/2a_grass/template.xml @@ -45,7 +45,7 @@ SIPNET git TRUE - sipnet.git + /usr/local/bin/sipnet.git cp data/events.in @RUNDIR@ @@ -67,15 +67,11 @@ localhost + sbatch -J @NAME@ -o @STDOUT@ -e @STDERR@ apptainer run ./sipnet-carb_develop.sif + Submitted batch job ([0-9]+) + + if test -z "$(squeue -h -j @JOBID@)"; then echo "DONE"; fi output/out output/run - - - squeue -j @JOBID@ || echo DONE - - parallel -j ${NCPUS:-1} --skip-first-line '{}/job.sh' :::: - - 1000 - diff --git a/2a_grass/workflow_manifest.yaml b/2a_grass/workflow_manifest.yaml index ef48585..e0b859e 100644 --- a/2a_grass/workflow_manifest.yaml +++ b/2a_grass/workflow_manifest.yaml @@ -3,7 +3,9 @@ # # Paths: All entries under 'paths' are inside the run directory (no paths outside run_dir). # Keys are referenced by name in steps (inputs/outputs). At runtime the CLI resolves -# each path as run_dir + "/" + value. +# each path as run_dir + "/" + value. These are fixed, non-overrideable locations; +# user configs may only supply external source files that are copied into these +# paths before 'prepare' runs. # # Steps: Each command has a list of step objects. Each step has: # script: R script path (relative to repo root) @@ -12,19 +14,70 @@ # outputs: List of path keys this script creates or writes # S3 resources (not in user config). Remote resources are localized before R runs. +# Stored as endpoint + bucket + per-resource key_prefix and filename (no full URLs). s3: endpoint_url: "https://s3.garage.ccmmf.ncsa.cloud" bucket: "carb" artifact_02: - url: "s3://carb/data/workflows/phase_2a" - filename: "ccmmf_phase_2a_input_artifacts.tgz" + key_prefix: "data_raw" + filename: "ensembles_data_artifact.tar.gz" median_tif: - url: "s3://carb/data_raw" + key_prefix: "data_raw" filename: "ca_biomassfiaald_2016_median.tif" stdv_tif: - url: "s3://carb/data_raw" + key_prefix: "data_raw" filename: "ca_biomassfiaald_2016_stdv.tif" +# Dispatch options for run-ensembles. The user config selects one by name via pecan_dispatch. +# host_xml is the complete ... block to inject into the staged template.xml +# before step 03 (xml_build.R) runs. Valid values for pecan_dispatch in user config are the +# keys listed here. +pecan_dispatch: + local-gnu-parallel: + description: "Run ensemble members locally using GNU parallel (no Slurm required)" + host_xml: | + + localhost + output/out + output/run + squeue -j @JOBID@ || echo DONE + + parallel -j ${NCPUS:-1} --skip-first-line '{}/job.sh' :::: + 1000 + + + host_xml_apptainer: | + + localhost + output/out + output/run + squeue -j @JOBID@ || echo DONE + + parallel -j ${NCPUS:-1} --skip-first-line 'apptainer run @SIF@ {}/job.sh' :::: + 1000 + + + slurm-dispatch: + description: "Submit ensemble members to Slurm via sbatch" + host_xml: | + + localhost + sbatch -J @NAME@ -o @STDOUT@ -e @STDERR@ + Submitted batch job ([0-9]+) + if test -z "$(squeue -h -j @JOBID@)"; then echo "DONE"; fi + output/out + output/run + + host_xml_apptainer: | + + localhost + sbatch -J @NAME@ -o @STDOUT@ -e @STDERR@ apptainer run @SIF@ + Submitted batch job ([0-9]+) + if test -z "$(squeue -h -j @JOBID@)"; then echo "DONE"; fi + output/out + output/run + + # Apptainer (not in user config) apptainer: remote: @@ -35,6 +88,8 @@ apptainer: sif: "sipnet-carb_develop.sif" # Path definitions: all contained within the run directory. +# note that these paths are the internal-workflow expected I/O paths. +# Users should not modify these values unless you know what you are doing. # Values are relative to run_dir; CLI resolves as run_dir + "/" + value. paths: site_info_file: "site_info.csv" @@ -65,6 +120,11 @@ steps: outputs: [data_dir, ic_outdir, site_sipnet_met_path] prepare: + - script: "2a_grass/00_stage_external_inputs.sh" + r_libraries: [] + inputs: [] + outputs: [] + - script: "2a_grass/01_ERA5_nc_to_clim.R" r_libraries: [future, furrr] inputs: [site_info_file, site_era5_path] diff --git a/magic-ensemble b/magic-ensemble index bd534e8..63894f4 100755 --- a/magic-ensemble +++ b/magic-ensemble @@ -1,6 +1,6 @@ #!/usr/bin/env bash # magic-ensemble: minimal CLI for workflows (2a_grass). -# Usage: ./magic-ensemble [--dry-run] [--verbose] [--config ] +# Usage: ./magic-ensemble [--verbose] --config # Commands: help | get-demo-data | prepare | run-ensembles set -euo pipefail @@ -18,18 +18,17 @@ Usage: ./magic-ensemble [global options] Commands: help Print this usage and help (no scripts run). get-demo-data Fetch demo data from S3 and create run directory (for users without local data). - prepare Run preparation steps: 01 (ERA5→clim), 02 (IC build), 03 (XML build). + prepare Run preparation steps: 00 (stage external inputs), 01 (ERA5→clim), 02 (IC build), 03 (XML build). run-ensembles Run step 04 (run model) using existing settings.xml and prepared inputs. Global options (after command): - --dry-run Do not run R scripts; print what would be run and run pre-execution checks. - --verbose Echo each Rscript command before running. - --config Path to user YAML config (overridable keys only; fixed paths are in workflow manifest). + --verbose Echo each command before running (including apptainer run when use_apptainer is true). + --config REQUIRED. Path to user YAML config (overridable scalar keys only; fixed paths are in workflow manifest). + Config may set use_apptainer: true to run prepare inside Apptainer (pull-if-not-present; apptainer only). Examples: ./magic-ensemble help ./magic-ensemble get-demo-data --config my_config.yaml - ./magic-ensemble prepare --dry-run ./magic-ensemble prepare --config my_config.yaml --verbose ./magic-ensemble run-ensembles --config my_config.yaml EOF @@ -49,7 +48,6 @@ require_yq() { # --- Parse arguments: command first, then global options --- COMMAND="" -DRY_RUN=0 VERBOSE=0 CONFIG_FILE="" while [[ $# -gt 0 ]]; do @@ -57,7 +55,6 @@ while [[ $# -gt 0 ]]; do help|get-demo-data|prepare|run-ensembles) if [[ -z "$COMMAND" ]]; then COMMAND="$1"; shift; continue; fi ;; - --dry-run) DRY_RUN=1; shift; continue ;; --verbose) VERBOSE=1; shift; continue ;; --config) if [[ $# -lt 2 ]]; then echo "magic-ensemble: --config requires ." >&2; usage >&2; exit 1; fi @@ -74,6 +71,11 @@ while [[ $# -gt 0 ]]; do shift done +# Resolve paths passed on the command line relative to CWD (use actual pwd so config is found) +if [[ -n "$CONFIG_FILE" && "$CONFIG_FILE" != /* ]]; then + CONFIG_FILE="$(pwd)/${CONFIG_FILE}" +fi + # --- Help or no command --- if [[ -z "$COMMAND" || "$COMMAND" == "help" ]]; then usage @@ -92,23 +94,37 @@ if [[ ! -f "$MANIFEST" ]]; then exit 1 fi +if [[ -z "$CONFIG_FILE" ]]; then + echo "magic-ensemble: --config is required for command '$COMMAND'." >&2 + usage >&2 + exit 1 +fi + +if [[ ! -f "$CONFIG_FILE" ]]; then + echo "magic-ensemble: Config file not found: $CONFIG_FILE" >&2 + exit 1 +fi + # --- Load effective config: manifest + optional user overrides --- -# User config may contain: run_dir, start_date, end_date, run_LAI_date, n_ens, n_met, ic_ensemble_size, n_workers +# User config may contain: run_dir, start_date, end_date, run_LAI_date, n_ens, n_met, ic_ensemble_size, n_workers, pecan_dispatch get_val() { local key="$1" local from_manifest="$2" if [[ -n "$CONFIG_FILE" && -f "$CONFIG_FILE" ]]; then local u - u=$(yq eval ".$key // .paths.$key // .dates.$key // .ensemble.$key // empty" "$CONFIG_FILE" 2>/dev/null) - if [[ -n "$u" && "$u" != "null" ]]; then - echo "$u" - return + u=$(yq eval ".$key" "$CONFIG_FILE" 2>/dev/null) + if [[ -z "$u" || "$u" == "null" ]]; then + echo "magic-ensemble: Config key '$key' is missing or empty in $CONFIG_FILE" >&2 + exit 1 fi + echo "$u" + return fi echo "$from_manifest" } # Read manifest paths and fixed values +# Manifest paths are internal-workflow I/O connections. they should not be altered. p_site_info_file=$(yq eval '.paths.site_info_file' "$MANIFEST") p_site_sipnet_met_path=$(yq eval '.paths.site_sipnet_met_path' "$MANIFEST") p_site_era5_path=$(yq eval '.paths.site_era5_path' "$MANIFEST") @@ -135,12 +151,22 @@ n_ens_default="20" n_met_default="10" ic_ensemble_size_default="100" n_workers_default="1" +use_apptainer_default="false" run_dir=$(get_val "run_dir" "$run_dir_default") + +if [[ "$run_dir" == "/" ]]; then + echo "magic-ensemble: run_dir cannot be the root directory (/)." >&2 + exit 1 +fi + # If run_dir is not absolute, resolve relative to CWD where the CLI was invoked if [[ "$run_dir" != /* ]]; then run_dir="${INVOCATION_CWD}/${run_dir}" fi +# Normalize run_dir to avoid trailing slashes so joined paths do not contain "//" +run_dir="${run_dir%/}" + start_date=$(get_val "start_date" "$start_date_default") end_date=$(get_val "end_date" "$end_date_default") run_LAI_date=$(get_val "run_LAI_date" "$run_LAI_date_default") @@ -148,10 +174,20 @@ n_ens=$(get_val "n_ens" "$n_ens_default") n_met=$(get_val "n_met" "$n_met_default") ic_ensemble_size=$(get_val "ic_ensemble_size" "$ic_ensemble_size_default") n_workers=$(get_val "n_workers" "$n_workers_default") +use_apptainer_raw=$(get_val "use_apptainer" "$use_apptainer_default") +# Normalize: true/yes/1 (case-insensitive) => 1; else 0 +use_apptainer=0 +case "$(echo "$use_apptainer_raw" | tr '[:upper:]' '[:lower:]')" in + true|yes|1) use_apptainer=1 ;; +esac + +pecan_dispatch=$(get_val "pecan_dispatch" "") -# Resolve manifest paths relative to run_dir (then relative to repo root). +# Resolve manifest paths relative to run_dir. # Effective path = run_dir / manifest_path so R (CWD=REPO_ROOT) sees the correct file. resolve_path() { echo "${run_dir}/${1}"; } + +# All workflow paths come from the manifest only (no user overrides). site_info_file=$(resolve_path "$p_site_info_file") site_sipnet_met_path=$(resolve_path "$p_site_sipnet_met_path") site_era5_path=$(resolve_path "$p_site_era5_path") @@ -159,14 +195,13 @@ field_shape_path=$(resolve_path "$p_field_shape_path") data_dir=$(resolve_path "$p_data_dir") ic_outdir=$(resolve_path "$p_ic_outdir") pft_dir=$(resolve_path "$p_pft_dir") -landtrendr_raw_files=$(resolve_path "$p_landtrendr_raw_files") site_file=$(resolve_path "$p_site_file") template_file=$(resolve_path "$p_template_file") output_file=$(resolve_path "$p_output_file") met_dir=$(resolve_path "$p_met_dir") ic_dir=$(resolve_path "$p_ic_dir") settings_xml=$(resolve_path "$p_settings_xml") -# landtrendr_raw_files is comma-separated; resolve each segment +# landtrendr_raw_files is comma-separated; resolve each segment (manifest only for now) landtrendr_raw_files="" while IFS= read -r segment; do segment=$(echo "$segment" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') @@ -183,6 +218,46 @@ check_aws() { fi } +# --- Apptainer: ensure apptainer is available (module load then PATH); apptainer only, no singularity --- +ensure_apptainer_available() { + if command -v apptainer &>/dev/null; then + return 0 + fi + if command -v module &>/dev/null; then + if module load apptainer 2>/dev/null; then + if command -v apptainer &>/dev/null; then + return 0 + fi + fi + fi + echo "magic-ensemble: use_apptainer is true but apptainer is not available. Run 'module load apptainer' or ensure apptainer is on PATH. (Singularity is not supported.)" >&2 + exit 1 +} + +# --- Apptainer: resolve SIF path from manifest (in run_dir); pull from remote if not present (no user override of remote) --- +ensure_sif_present() { + local sif_name sif_path remote_base container_name tag uri + sif_name=$(yq eval '.apptainer.sif' "$MANIFEST") + sif_path="${run_dir}/${sif_name}" + if [[ -f "$sif_path" ]]; then + APPTAINER_SIF="$sif_path" + return 0 + fi + ensure_apptainer_available + remote_base=$(yq eval '.apptainer.remote.url' "$MANIFEST") + remote_base="${remote_base%/}" + container_name=$(yq eval '.apptainer.container.name' "$MANIFEST") + tag=$(yq eval '.apptainer.tag' "$MANIFEST") + uri="${remote_base}/${container_name}:${tag}" + echo "magic-ensemble: SIF not found at $sif_path; pulling from $uri" >&2 + mkdir -p "$run_dir" + if ! apptainer pull "$sif_path" "$uri"; then + echo "magic-ensemble: Failed to pull container to $sif_path" >&2 + exit 1 + fi + APPTAINER_SIF="$sif_path" +} + # --- Get list of script paths for current command (from manifest steps) --- get_steps() { yq eval '.steps["'"$COMMAND"'"] | .[].script' "$MANIFEST" @@ -211,27 +286,39 @@ check_r_libs_for_step() { done < <(yq eval '.steps["'"$COMMAND"'"] | .['"$i"'].r_libraries | .[]?' "$MANIFEST" 2>/dev/null || true) } -# --- Dry-run: print scripts and optionally run checks --- -do_dry_run() { - echo "magic-ensemble: dry-run for command: $COMMAND" - echo "Run directory (for this execution): $run_dir" - echo "Would run the following scripts (CWD = $REPO_ROOT):" - while IFS= read -r script; do - [[ -z "$script" ]] && continue - script_path="${REPO_ROOT}/${script}" - if [[ -f "$script_path" ]]; then - echo " - $script (exists)" - else - echo " - $script (MISSING)" +# --- R library check for step at index i inside Apptainer (APPTAINER_SIF must be set) --- +check_r_libs_for_step_in_apptainer() { + local i="$1" + local script="${STEPS[i]}" + [[ "$script" == *.sh ]] && return 0 + local lib + while IFS= read -r lib; do + [[ -z "$lib" || "$lib" == "null" ]] && continue + if ! apptainer run --bind "$REPO_ROOT:$REPO_ROOT" --bind "$run_dir:$run_dir" --pwd "$REPO_ROOT" "$APPTAINER_SIF" Rscript -e "library(\"$lib\")" 2>/dev/null; then + echo "magic-ensemble: R library check failed inside container: library(\"$lib\") not available in image $APPTAINER_SIF" >&2 + exit 1 fi - done < <(get_steps) - echo "" - echo "Pre-execution checks (R libraries, AWS CLI) can be run when not in dry-run." - exit 0 + done < <(yq eval '.steps["'"$COMMAND"'"] | .['"$i"'].r_libraries | .[]?' "$MANIFEST" 2>/dev/null || true) } -# --- Run R script with args; CWD = REPO_ROOT --- +# --- Run R script with args. Optional leading args: --apptainer, --cwd DIR (default DIR = REPO_ROOT). --- run_script() { + local use_apptainer=0 + local script_cwd="$REPO_ROOT" + while [[ $# -gt 0 ]]; do + case "${1:-}" in + --apptainer) use_apptainer=1; shift ;; + --cwd) + if [[ $# -lt 2 ]]; then + echo "magic-ensemble: run_script --cwd requires DIR." >&2 + exit 1 + fi + script_cwd="$2" + shift 2 + ;; + *) break ;; + esac + done local script="$1" shift local script_path="${REPO_ROOT}/${script}" @@ -239,13 +326,23 @@ run_script() { echo "magic-ensemble: Script not found: $script_path" >&2 exit 1 fi - if [[ $VERBOSE -eq 1 ]]; then - echo "Rscript $script_path $*" >&2 + if [[ $use_apptainer -eq 1 ]]; then + # APPTAINER_SIF and run_dir must be set (ensure_sif_present and run_dir resolved earlier). + echo "magic-ensemble: Rscript (inside apptainer: $APPTAINER_SIF)" + if [[ $VERBOSE -eq 1 ]]; then + echo "apptainer run --bind \"$REPO_ROOT:$REPO_ROOT\" --bind \"$run_dir:$run_dir\" --cwd \"$script_cwd\" \"$APPTAINER_SIF\" Rscript \"$script_path\" $*" >&2 + fi + apptainer run --bind "$REPO_ROOT:$REPO_ROOT" --bind "$run_dir:$run_dir" --cwd "$script_cwd" "$APPTAINER_SIF" Rscript "$script_path" "$@" + else + echo "magic-ensemble: Rscript: $(command -v Rscript)" + if [[ $VERBOSE -eq 1 ]]; then + echo "(cd \"$script_cwd\" && Rscript \"$script_path\" $*)" >&2 + fi + (cd "$script_cwd" && Rscript "$script_path" "$@") fi - (cd "$REPO_ROOT" && Rscript "$script_path" "$@") } -# --- Run shell script; CWD = REPO_ROOT. Pass COMMAND and STEP_INDEX for manifest lookups. --- +# --- Run shell script; CWD = REPO_ROOT. Step scripts receive documented CLI arguments. --- run_shell_script() { local script="$1" local step_index="${2:-0}" @@ -254,10 +351,74 @@ run_shell_script() { echo "magic-ensemble: Script not found: $script_path" >&2 exit 1 fi + local script_basename="${script##*/}" + if [[ "$script_basename" == "00_fetch_s3_and_prepare_run_dir.sh" || "$script_basename" == "00_stage_external_inputs.sh" ]]; then + # Step 00 helpers: documented arguments (see each script's --help). + local args=(--repo-root "$REPO_ROOT") + if [[ "$script_basename" == "00_fetch_s3_and_prepare_run_dir.sh" ]]; then + args+=(--command "$COMMAND" --step-index "$step_index") + fi + if [[ -n "$CONFIG_FILE" && -f "$CONFIG_FILE" ]]; then + args+=(--config "$CONFIG_FILE" --invocation-cwd "$INVOCATION_CWD") + else + args+=(--run-dir "$run_dir" --invocation-cwd "$INVOCATION_CWD") + fi + if [[ $VERBOSE -eq 1 ]]; then + echo "bash $script_path ${args[*]}" >&2 + fi + (cd "$REPO_ROOT" && bash "$script_path" "${args[@]}") + else + # Other shell steps: pass args if/when they are added; no env vars + if [[ $VERBOSE -eq 1 ]]; then + echo "bash $script_path" >&2 + fi + (cd "$REPO_ROOT" && bash "$script_path") + fi +} + +# --- Validate pecan_dispatch value against manifest options --- +validate_pecan_dispatch() { + if ! yq eval ".pecan_dispatch | has(\"$pecan_dispatch\")" "$MANIFEST" | grep -q '^true$'; then + echo "magic-ensemble: Unknown pecan_dispatch value '$pecan_dispatch'. Valid options:" >&2 + yq eval '.pecan_dispatch | keys | .[]' "$MANIFEST" >&2 + exit 1 + fi +} + +# --- Patch ... block in staged template.xml with chosen dispatch XML --- +# Selects host_xml_apptainer when use_apptainer=1 (with @SIF@ substituted); falls back to host_xml. +patch_dispatch() { + if ! command -v python3 &>/dev/null; then + echo "magic-ensemble: python3 is required to patch dispatch in template.xml." >&2 + exit 1 + fi + local template_path="${run_dir}/$(yq eval '.paths.template_file' "$MANIFEST")" + if [[ ! -f "$template_path" ]]; then + echo "magic-ensemble: staged template.xml not found at $template_path" >&2 + exit 1 + fi + + # Select apptainer variant when available and requested; otherwise plain host_xml. + local host_xml_key="host_xml" + if [[ $use_apptainer -eq 1 ]]; then + local has_apptainer_variant + has_apptainer_variant=$(yq eval ".pecan_dispatch[\"$pecan_dispatch\"] | has(\"host_xml_apptainer\")" "$MANIFEST") + if [[ "$has_apptainer_variant" == "true" ]]; then + host_xml_key="host_xml_apptainer" + fi + fi + if [[ $VERBOSE -eq 1 ]]; then - echo "RUN_DIR=$run_dir REPO_ROOT=$REPO_ROOT MANIFEST=$MANIFEST COMMAND=$COMMAND STEP_INDEX=$step_index bash $script_path" >&2 + echo "magic-ensemble: patching block in $template_path (pecan_dispatch=$pecan_dispatch, xml_key=$host_xml_key)" >&2 fi - (cd "$REPO_ROOT" && RUN_DIR="$run_dir" REPO_ROOT="$REPO_ROOT" MANIFEST="$MANIFEST" COMMAND="$COMMAND" STEP_INDEX="$step_index" bash "$script_path") + + local sif_name host_xml + sif_name=$(yq eval '.apptainer.sif' "$MANIFEST") + # Substitute @SIF@ with the SIF filename (relative to run_dir, as jobs execute there). + host_xml=$(yq eval ".pecan_dispatch[\"$pecan_dispatch\"].$host_xml_key" "$MANIFEST" \ + | sed "s|@SIF@|./${sif_name}|g") + + python3 "${REPO_ROOT}/tools/patch_xml.py" "$template_path" "host" "$host_xml" --block } # --- Get-demo-data: run steps from manifest (shell script only) --- @@ -270,67 +431,96 @@ run_get_demo_data() { done } -# --- Prepare: run steps from manifest (01, 02, 03 with R args) --- +# --- Prepare: run steps from manifest (hard-coded sequence for this workflow); optionally inside Apptainer --- run_prepare() { get_steps_array check_aws - for i in "${!STEPS[@]}"; do - check_r_libs_for_step "$i" - done + validate_pecan_dispatch + + local apptainer_arg="" + if [[ $use_apptainer -eq 1 ]]; then + ensure_apptainer_available + ensure_sif_present + apptainer_arg="--apptainer" + for i in "${!STEPS[@]}"; do + check_r_libs_for_step_in_apptainer "$i" + done + else + for i in "${!STEPS[@]}"; do + check_r_libs_for_step "$i" + done + fi for i in "${!STEPS[@]}"; do - case "$i" in - 0) run_script "${STEPS[i]}" \ - --site_era5_path "$site_era5_path" \ - --site_sipnet_met_path "$site_sipnet_met_path" \ - --site_info_file "$site_info_file" \ - --start_date "$start_date" \ - --end_date "$end_date" \ - --n_cores "$n_workers" \ - --parallel_strategy "multisession" ;; - 1) run_script "${STEPS[i]}" \ - --site_info_path "$site_info_file" \ - --field_shape_path "$field_shape_path" \ - --ic_ensemble_size "$ic_ensemble_size" \ - --run_start_date "$start_date" \ - --run_LAI_date "$run_LAI_date" \ - --ic_outdir "$ic_outdir" \ - --data_dir "$data_dir" \ - --pft_dir "$pft_dir" \ - --params_read_from_pft "$params_from_pft" \ - --landtrendr_raw_files "$landtrendr_raw_files" \ - --additional_params "$additional_params" ;; - 2) run_script "${STEPS[i]}" \ - --n_ens "$n_ens" \ - --n_met "$n_met" \ - --start_date "$start_date" \ - --end_date "$end_date" \ - --ic_dir "$ic_dir" \ - --met_dir "$met_dir" \ - --site_file "$site_file" \ - --template_file "$template_file" \ - --output_file "$output_file" ;; - *) echo "magic-ensemble: No argument mapping for prepare step index $i" >&2; exit 1 ;; - esac + step_num=$((i + 1)) + script="${STEPS[i]}" + echo "magic-ensemble: prepare step $step_num of ${#STEPS[@]}: $script" + if [[ "$script" == *.sh ]]; then + run_shell_script "$script" "$i" + if [[ "$i" -eq 0 ]]; then + echo "magic-ensemble: patching template.xml with dispatch: $pecan_dispatch" + patch_dispatch + fi + else + case "$i" in + 1) run_script $apptainer_arg "$script" \ + --site_era5_path "$site_era5_path" \ + --site_sipnet_met_path "$site_sipnet_met_path" \ + --site_info_file "$site_info_file" \ + --start_date "$start_date" \ + --end_date "$end_date" \ + --n_cores "$n_workers" \ + --parallel_strategy "multisession" ;; + 2) run_script $apptainer_arg "$script" \ + --site_info_path "$site_info_file" \ + --field_shape_path "$field_shape_path" \ + --ic_ensemble_size "$ic_ensemble_size" \ + --run_start_date "$start_date" \ + --run_LAI_date "$run_LAI_date" \ + --ic_outdir "$ic_outdir" \ + --data_dir "$data_dir" \ + --pft_dir "$pft_dir" \ + --params_read_from_pft "$params_from_pft" \ + --landtrendr_raw_files "$landtrendr_raw_files" \ + --additional_params "$additional_params" ;; + 3) run_script $apptainer_arg "$script" \ + --n_ens "$n_ens" \ + --n_met "$n_met" \ + --start_date "$start_date" \ + --end_date "$end_date" \ + --ic_dir "$ic_dir" \ + --met_dir "$met_dir" \ + --site_file "$site_file" \ + --template_file "$template_file" \ + --output_file "$output_file" ;; + *) echo "magic-ensemble: No argument mapping for prepare step index $i (script $script)" >&2; exit 1 ;; + esac + fi + echo "magic-ensemble: prepare step $step_num completed" done + echo "magic-ensemble: prepare finished (all ${#STEPS[@]} steps)" } -# --- Run-ensembles: run single step from manifest (04) --- +# --- Run-ensembles: run single step from manifest (04); never inside Apptainer. +# When use_apptainer=1, the SIF must be present for dispatched jobs (already patched +# into template.xml via patch_dispatch during prepare); 04_run_model.R itself always +# runs on the host so it can submit further jobs to Slurm. run_run_ensembles() { get_steps_array check_aws + + if [[ $use_apptainer -eq 1 ]]; then + ensure_apptainer_available + ensure_sif_present + fi check_r_libs_for_step 0 - run_script "${STEPS[0]}" \ + run_script --cwd "$run_dir" "${STEPS[0]}" \ --settings "$settings_xml" \ --continue "FALSE" } # --- Main --- -if [[ $DRY_RUN -eq 1 ]]; then - do_dry_run -fi - case "$COMMAND" in get-demo-data) run_get_demo_data ;; prepare) run_prepare ;; diff --git a/tools/patch_xml.py b/tools/patch_xml.py new file mode 100644 index 0000000..9a346ce --- /dev/null +++ b/tools/patch_xml.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +""" +patch_xml.py — in-place XML element patcher. + +Usage: + patch_xml.py [--block] + +Arguments: + template_path Path to the XML file to patch (modified in-place). + xml_path Slash-separated element path, e.g. "host" or "model/binary". + The last segment is the target tag; an optional leading segment + constrains the match to within that parent element. + new_content Replacement value. Without --block, replaces the text node only. + With --block, replaces the entire element including its tags. + --block Replace the full ... element rather than just its text. + +Exit codes: + 0 Success. + 1 Usage error, file I/O error, or element not found. +""" + +import sys +import re + + +def usage(msg=None): + if msg: + print(f"patch_xml: {msg}", file=sys.stderr) + print(__doc__, file=sys.stderr) + sys.exit(1) + + +def patch_element(text, tag, new_content, replace_block): + # Regex-based: assumes tags have no attributes (e.g. , not ). + # This holds for PEcAn template.xml but would need revision for attributed tags. + if replace_block: + patched, n = re.subn( + r'<' + tag + r'>.*?', + new_content, text, count=1, flags=re.DOTALL, + ) + else: + patched, n = re.subn( + r'(<' + tag + r'>)[^<]*()', + r'\g<1>' + new_content + r'\g<2>', + text, count=1, + ) + return patched, n + + +def main(): + args = sys.argv[1:] + replace_block = '--block' in args + args = [a for a in args if a != '--block'] + + if len(args) != 3: + usage(f"expected 3 positional arguments, got {len(args)}") + + template_path, xml_path, new_content = args + parts = xml_path.split('/') + tag = parts[-1] + parent = parts[0] if len(parts) > 1 else None + + try: + content = open(template_path).read() + except OSError as e: + print(f"patch_xml: {e}", file=sys.stderr) + sys.exit(1) + + if parent: + total_replaced = 0 + + def replacer(m): + nonlocal total_replaced + patched, n = patch_element(m.group(0), tag, new_content, replace_block) + total_replaced += n + return patched + + result = re.sub( + r'<' + parent + r'>.*?', + replacer, content, count=1, flags=re.DOTALL, + ) + else: + result, total_replaced = patch_element(content, tag, new_content, replace_block) + + if total_replaced == 0: + print(f"patch_xml: no element matched path '{xml_path}' in {template_path}", file=sys.stderr) + sys.exit(1) + + try: + open(template_path, 'w').write(result) + except OSError as e: + print(f"patch_xml: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == '__main__': + main() From caf7eab5ccf1be3e33058cfde01068622ed00bdc Mon Sep 17 00:00:00 2001 From: Henry Priest Date: Fri, 3 Apr 2026 21:24:54 +0000 Subject: [PATCH 6/7] Fix external_paths staging contract and add documentation - Stage external inputs to manifest-defined destinations rather than source basename; enforce that each external_paths key has a matching manifest.paths entry and error if not - Make get_val() fall through to defaults for missing config keys instead of erroring; add explicit post-resolution required check for run_dir only - Remove spurious check_aws calls from prepare and run-ensembles commands - Reorganize workflow_manifest.yaml: move steps block to top, normalize to 4-space indentation throughout - Add magic-ensemble-DEVELOPERS.md (architecture, internals, dispatch) and magic-ensemble-README.md --- 2a_grass/00_stage_external_inputs.sh | 22 ++- 2a_grass/README.md | 2 +- 2a_grass/workflow_manifest.yaml | 241 +++++++++++++------------ magic-ensemble | 15 +- magic-ensemble-DEVELOPERS.md | 257 +++++++++++++++++++++++++++ magic-ensemble-README.md | 173 ++++++++++++++++++ 6 files changed, 577 insertions(+), 133 deletions(-) create mode 100644 magic-ensemble-DEVELOPERS.md create mode 100644 magic-ensemble-README.md diff --git a/2a_grass/00_stage_external_inputs.sh b/2a_grass/00_stage_external_inputs.sh index 51617ef..6413565 100644 --- a/2a_grass/00_stage_external_inputs.sh +++ b/2a_grass/00_stage_external_inputs.sh @@ -7,8 +7,8 @@ # # Requires: yq (mikefarah/yq) # -# Options (see --help): --repo-root (required); --manifest optional, currently -# unused for staging; defaults to /2a_grass/workflow_manifest.yaml. +# Options (see --help): --repo-root (required); --manifest optional, +# defaults to /2a_grass/workflow_manifest.yaml. # Run directory is either from --run-dir or from run_dir in the file given by # --config (relative paths resolved with --invocation-cwd). external_paths # entries are resolved from --invocation-cwd when relative. @@ -31,7 +31,7 @@ Run directory (one of): --config PATH User YAML config file; script reads run_dir from it (use with --invocation-cwd). Optional: - --manifest PATH Path to workflow_manifest.yaml (default: /2a_grass/workflow_manifest.yaml). (Currently unused.) + --manifest PATH Path to workflow_manifest.yaml (default: /2a_grass/workflow_manifest.yaml). --invocation-cwd PATH Required when using --config with a relative run_dir or relative external_paths. -h, --help Print this help and exit. EOF @@ -144,9 +144,9 @@ if [[ -z "$CONFIG_FILE" || ! -f "$CONFIG_FILE" ]]; then exit 0 fi -# external_paths is a mapping from arbitrary keys to source file paths. -# We do not depend on manifest paths here; we simply copy each source file -# into the run directory (flattened by basename). +# external_paths is a mapping from manifest path keys to source file paths. +# Each key must match an entry in manifest.paths; the destination filename is +# derived from that manifest path (basename), not from the source filename. # Parse the YAML block output of .external_paths line by line (yq v4 outputs plain # scalars without quotes). Split on first ": " to get key and value. external_block=$(yq eval '.external_paths' "$CONFIG_FILE" 2>/dev/null || echo "null") @@ -181,8 +181,14 @@ while IFS= read -r line; do exit 1 fi - # Destination: copy into the run directory using the source basename. - dest="${RUN_DIR_ABS}/$(basename "$src")" + # Destination: derived from the manifest path for the same key, not the source basename. + # This enforces the manifest contract so downstream scripts always find files where expected. + manifest_path=$(yq eval ".paths.${key}" "$MANIFEST" 2>/dev/null) + if [[ -z "$manifest_path" || "$manifest_path" == "null" ]]; then + echo "00_stage_external_inputs: external_paths key '${key}' has no corresponding entry in manifest.paths" >&2 + exit 1 + fi + dest="${RUN_DIR_ABS}/$(basename "$manifest_path")" dest_dir=$(dirname "$dest") mkdir -p "$dest_dir" diff --git a/2a_grass/README.md b/2a_grass/README.md index 41c2f74..f95273a 100644 --- a/2a_grass/README.md +++ b/2a_grass/README.md @@ -45,7 +45,7 @@ ln -sf ../sipnet ../sipnet.git ### Install or update PEcAn If this is a brand-new installation, expect this step to take a few hours to download and compile more than 300 R packages. If you've installed PEcAn on this machine before, expect it to be just a few minutes of updating only the PEcAn packages and any dependencies whose version requirement has changed. -Defaults to using 4 CPUs to compile packages in parallel. If you have more cores, adjust `sbatch`'s `--cpus-per-task` parameter. +Defaults to using 4 CPUs to compile packages in parallel. If you have more cores, adjust `sbatch`'s `--cpus-per-task` parameter. ``` sbatch -o install_pecan.out ../tools/install_pecan.sh diff --git a/2a_grass/workflow_manifest.yaml b/2a_grass/workflow_manifest.yaml index e0b859e..d703f96 100644 --- a/2a_grass/workflow_manifest.yaml +++ b/2a_grass/workflow_manifest.yaml @@ -13,135 +13,142 @@ # inputs: List of path keys (from 'paths') this script reads (local paths only) # outputs: List of path keys this script creates or writes -# S3 resources (not in user config). Remote resources are localized before R runs. -# Stored as endpoint + bucket + per-resource key_prefix and filename (no full URLs). -s3: - endpoint_url: "https://s3.garage.ccmmf.ncsa.cloud" - bucket: "carb" - artifact_02: - key_prefix: "data_raw" - filename: "ensembles_data_artifact.tar.gz" - median_tif: - key_prefix: "data_raw" - filename: "ca_biomassfiaald_2016_median.tif" - stdv_tif: - key_prefix: "data_raw" - filename: "ca_biomassfiaald_2016_stdv.tif" +# Steps per command: script path, R libs to check (empty for shell scripts), input/output path keys +steps: + get-demo-data: + - script: "2a_grass/00_fetch_s3_and_prepare_run_dir.sh" + r_libraries: [] + inputs: [] + outputs: [data_dir, ic_outdir, site_sipnet_met_path] -# Dispatch options for run-ensembles. The user config selects one by name via pecan_dispatch. -# host_xml is the complete ... block to inject into the staged template.xml -# before step 03 (xml_build.R) runs. Valid values for pecan_dispatch in user config are the -# keys listed here. -pecan_dispatch: - local-gnu-parallel: - description: "Run ensemble members locally using GNU parallel (no Slurm required)" - host_xml: | - - localhost - output/out - output/run - squeue -j @JOBID@ || echo DONE - - parallel -j ${NCPUS:-1} --skip-first-line '{}/job.sh' :::: - 1000 - - - host_xml_apptainer: | - - localhost - output/out - output/run - squeue -j @JOBID@ || echo DONE - - parallel -j ${NCPUS:-1} --skip-first-line 'apptainer run @SIF@ {}/job.sh' :::: - 1000 - - - slurm-dispatch: - description: "Submit ensemble members to Slurm via sbatch" - host_xml: | - - localhost - sbatch -J @NAME@ -o @STDOUT@ -e @STDERR@ - Submitted batch job ([0-9]+) - if test -z "$(squeue -h -j @JOBID@)"; then echo "DONE"; fi - output/out - output/run - - host_xml_apptainer: | - - localhost - sbatch -J @NAME@ -o @STDOUT@ -e @STDERR@ apptainer run @SIF@ - Submitted batch job ([0-9]+) - if test -z "$(squeue -h -j @JOBID@)"; then echo "DONE"; fi - output/out - output/run - + prepare: + - script: "2a_grass/00_stage_external_inputs.sh" + r_libraries: [] + inputs: [] + outputs: [] -# Apptainer (not in user config) -apptainer: - remote: - url: "docker://hdpriest0uiuc/" - container: - name: "sipnet-carb" - tag: "develop" - sif: "sipnet-carb_develop.sif" + - script: "2a_grass/01_ERA5_nc_to_clim.R" + r_libraries: [future, furrr] + inputs: [site_info_file, site_era5_path] + outputs: [site_sipnet_met_path] + + - script: "2a_grass/02_ic_build.R" + r_libraries: [tidyverse] + inputs: + [ + site_info_file, + field_shape_path, + pft_dir, + data_dir, + landtrendr_raw_files, + ] + outputs: [ic_outdir, data_dir] + + - script: "2a_grass/03_xml_build.R" + r_libraries: [PEcAn.settings] + inputs: [site_file, template_file, ic_dir, met_dir] + outputs: [output_file] + + run-ensembles: + - script: "2a_grass/04_run_model.R" + r_libraries: [PEcAn.all] + inputs: [settings_xml] + outputs: [] # Path definitions: all contained within the run directory. # note that these paths are the internal-workflow expected I/O paths. # Users should not modify these values unless you know what you are doing. # Values are relative to run_dir; CLI resolves as run_dir + "/" + value. paths: - site_info_file: "site_info.csv" - site_sipnet_met_path: "data/ERA5_SIPNET" - site_era5_path: "data_raw/ERA5_nc" - field_shape_path: "data_raw/dwr_map/i15_Crop_Mapping_2018.gdb" - data_dir: "data/IC_prep" - ic_outdir: "IC_files" - pft_dir: "pfts" - landtrendr_raw_files: "data_raw/ca_biomassfiaald_2016_median.tif,data_raw/ca_biomassfiaald_2016_stdv.tif" - site_file: "site_info.csv" - template_file: "template.xml" - output_file: "settings.xml" - met_dir: "data/ERA5_SIPNET" - ic_dir: "IC_files" - settings_xml: "settings.xml" + site_info_file: "site_info.csv" + site_sipnet_met_path: "data/ERA5_SIPNET" + site_era5_path: "data_raw/ERA5_nc" + field_shape_path: "data_raw/dwr_map/i15_Crop_Mapping_2018.gdb" + data_dir: "data/IC_prep" + ic_outdir: "IC_files" + pft_dir: "pfts" + landtrendr_raw_files: "data_raw/ca_biomassfiaald_2016_median.tif,data_raw/ca_biomassfiaald_2016_stdv.tif" + site_file: "site_info.csv" + template_file: "template.xml" + output_file: "settings.xml" + met_dir: "data/ERA5_SIPNET" + ic_dir: "IC_files" + settings_xml: "settings.xml" # Fixed workflow values (not user overrides) params_from_pft: "SLA,leafC" additional_params: "varname=wood_carbon_fraction,distn=norm,parama=0.48,paramb=0.005" -# Steps per command: script path, R libs to check (empty for shell scripts), input/output path keys -steps: - get-demo-data: - - script: "2a_grass/00_fetch_s3_and_prepare_run_dir.sh" - r_libraries: [] - inputs: [] - outputs: [data_dir, ic_outdir, site_sipnet_met_path] - - prepare: - - script: "2a_grass/00_stage_external_inputs.sh" - r_libraries: [] - inputs: [] - outputs: [] - - - script: "2a_grass/01_ERA5_nc_to_clim.R" - r_libraries: [future, furrr] - inputs: [site_info_file, site_era5_path] - outputs: [site_sipnet_met_path] - - - script: "2a_grass/02_ic_build.R" - r_libraries: [tidyverse] - inputs: [site_info_file, field_shape_path, pft_dir, data_dir, landtrendr_raw_files] - outputs: [ic_outdir, data_dir] +# S3 resources (not in user config). Remote resources are localized before R runs. +# Stored as endpoint + bucket + per-resource key_prefix and filename (no full URLs). +s3: + endpoint_url: "https://s3.garage.ccmmf.ncsa.cloud" + bucket: "carb" + artifact_02: + key_prefix: "data_raw" + filename: "ensembles_data_artifact.tar.gz" + median_tif: + key_prefix: "data_raw" + filename: "ca_biomassfiaald_2016_median.tif" + stdv_tif: + key_prefix: "data_raw" + filename: "ca_biomassfiaald_2016_stdv.tif" - - script: "2a_grass/03_xml_build.R" - r_libraries: [PEcAn.settings] - inputs: [site_file, template_file, ic_dir, met_dir] - outputs: [output_file] +# Dispatch options for run-ensembles. The user config selects one by name via pecan_dispatch. +# host_xml is the complete ... block to inject into the staged template.xml +# before step 03 (xml_build.R) runs. Valid values for pecan_dispatch in user config are the +# keys listed here. +pecan_dispatch: + local-gnu-parallel: + description: "Run ensemble members locally using GNU parallel (no Slurm required)" + host_xml: | + + localhost + output/out + output/run + squeue -j @JOBID@ || echo DONE + + parallel -j ${NCPUS:-1} --skip-first-line '{}/job.sh' :::: + 1000 + + + host_xml_apptainer: | + + localhost + output/out + output/run + squeue -j @JOBID@ || echo DONE + + parallel -j ${NCPUS:-1} --skip-first-line 'apptainer run @SIF@ {}/job.sh' :::: + 1000 + + + slurm-dispatch: + description: "Submit ensemble members to Slurm via sbatch" + host_xml: | + + localhost + sbatch -J @NAME@ -o @STDOUT@ -e @STDERR@ + Submitted batch job ([0-9]+) + if test -z "$(squeue -h -j @JOBID@)"; then echo "DONE"; fi + output/out + output/run + + host_xml_apptainer: | + + localhost + sbatch -J @NAME@ -o @STDOUT@ -e @STDERR@ apptainer run @SIF@ + Submitted batch job ([0-9]+) + if test -z "$(squeue -h -j @JOBID@)"; then echo "DONE"; fi + output/out + output/run + - run-ensembles: - - script: "2a_grass/04_run_model.R" - r_libraries: [PEcAn.all] - inputs: [settings_xml] - outputs: [] +# Apptainer (not in user config) +apptainer: + remote: + url: "docker://hdpriest0uiuc/" + container: + name: "sipnet-carb" + tag: "develop" + sif: "sipnet-carb_develop.sif" diff --git a/magic-ensemble b/magic-ensemble index 63894f4..5a20008 100755 --- a/magic-ensemble +++ b/magic-ensemble @@ -113,12 +113,10 @@ get_val() { if [[ -n "$CONFIG_FILE" && -f "$CONFIG_FILE" ]]; then local u u=$(yq eval ".$key" "$CONFIG_FILE" 2>/dev/null) - if [[ -z "$u" || "$u" == "null" ]]; then - echo "magic-ensemble: Config key '$key' is missing or empty in $CONFIG_FILE" >&2 - exit 1 + if [[ -n "$u" && "$u" != "null" ]]; then + echo "$u" + return fi - echo "$u" - return fi echo "$from_manifest" } @@ -155,6 +153,11 @@ use_apptainer_default="false" run_dir=$(get_val "run_dir" "$run_dir_default") +if [[ -z "$run_dir" || "$run_dir" == "null" ]]; then + echo "magic-ensemble: run_dir is required in config $CONFIG_FILE" >&2 + exit 1 +fi + if [[ "$run_dir" == "/" ]]; then echo "magic-ensemble: run_dir cannot be the root directory (/)." >&2 exit 1 @@ -434,7 +437,6 @@ run_get_demo_data() { # --- Prepare: run steps from manifest (hard-coded sequence for this workflow); optionally inside Apptainer --- run_prepare() { get_steps_array - check_aws validate_pecan_dispatch local apptainer_arg="" @@ -507,7 +509,6 @@ run_prepare() { # runs on the host so it can submit further jobs to Slurm. run_run_ensembles() { get_steps_array - check_aws if [[ $use_apptainer -eq 1 ]]; then ensure_apptainer_available diff --git a/magic-ensemble-DEVELOPERS.md b/magic-ensemble-DEVELOPERS.md new file mode 100644 index 0000000..f388150 --- /dev/null +++ b/magic-ensemble-DEVELOPERS.md @@ -0,0 +1,257 @@ +# magic-ensemble Developer Guide + +This document covers the internal design of `magic-ensemble` and the +`2a_grass/` workflow: how the pieces fit together, where the boundaries are, +and what to change when adapting this CLI to a different workflow. + +--- + +## Architecture Overview + +The CLI is built on a three-layer configuration model: + +``` +workflow_manifest.yaml — fixed contract: internal paths, step definitions, + S3 coords, dispatch XML, Apptainer image + + +user_config.yaml — runtime overrides: run_dir, dates, ensemble sizes, + dispatch mode, use_apptainer, external_paths + + +external_paths (staged) — user-provided files copied into run_dir before + prepare runs, mapped to manifest-defined destinations +``` + +The manifest is the source of truth for everything that is fixed per workflow. +The user config contains only the values a user legitimately needs to vary +between runs. External paths are the mechanism for injecting user-owned files +(e.g. a custom `template.xml`) without making manifest paths user-overridable. +As written, a user can only inject files that are expected by the pipeline. + +--- + +## Execution Graph + +### `get-demo-data` + +``` +00_fetch_s3_and_prepare_run_dir.sh + → creates run_dir + → downloads and extracts S3 artifact into run_dir +``` + +### `prepare` + +``` +00_stage_external_inputs.sh + → creates run_dir + → copies external_paths files into run_dir (manifest-defined destinations) + → [patch_dispatch() runs after this step] + → reads pecan_dispatch host_xml from manifest + → substitutes @SIF@ if use_apptainer is set + → patches block in run_dir/template.xml via tools/patch_xml.py + +01_ERA5_nc_to_clim.R + reads: run_dir/data_raw/ERA5_nc, run_dir/site_info.csv + writes: run_dir/data/ERA5_SIPNET/ + +02_ic_build.R + reads: run_dir/site_info.csv, run_dir/data_raw/dwr_map/..., + run_dir/data/IC_prep/, run_dir/pfts/, + run_dir/data_raw/ca_biomassfiaald_*.tif + writes: run_dir/IC_files/, run_dir/data/IC_prep/ + +03_xml_build.R + reads: run_dir/site_info.csv, run_dir/template.xml, + run_dir/IC_files/, run_dir/data/ERA5_SIPNET/ + writes: run_dir/settings.xml +``` + +### `run-ensembles` + +``` +04_run_model.R (CWD = run_dir) + reads: run_dir/settings.xml + writes: run_dir/output/ (via PEcAn dispatch) +``` + +--- + +## Configuration Contract + +### What belongs in the manifest + +- `steps`: ordered list of scripts per command, with declared inputs/outputs and R library checks +- `paths`: all internal file/directory locations relative to `run_dir` +- `s3`: S3 endpoint, bucket, and per-resource key prefix and filename +- `pecan_dispatch`: named dispatch modes, each with a `host_xml` (and optionally `host_xml_apptainer`) block +- `apptainer`: remote registry URL, container name, tag, and SIF filename + +None of these are user-overridable. Adding a new workflow means replacing or +extending the manifest, not the user config. As the underlying R-scripts evolve, +the manifest must be kept in-sync with any i/o changes made in R-scripts. + +### What belongs in the user config + +Scalar values that vary between runs: `run_dir`, dates, ensemble sizes, +`n_workers`, `use_apptainer`, `pecan_dispatch`. These all have fallback +defaults in `magic-ensemble`; only `run_dir` is required. + +### What belongs in `external_paths` + +File paths for user-owned inputs that must be injected into `run_dir` before +`prepare` runs. Keys must match entries under `manifest.paths`. The destination +is `run_dir/$(basename manifest.paths.)` — derived from the manifest, not +from the source filename, so downstream scripts always find files where they +expect them. + +--- + +## CLI Internals + +### Argument parsing (`magic-ensemble` lines 50–77) + +Command is the first positional argument. `--config` and `--verbose` are global +options that may appear in any order after the command. The config path is +resolved relative to the actual `pwd` at invocation time and stored as an +absolute path immediately after parsing. + +### `get_val()` resolution order + +``` +get_val "key" "default" + 1. If CONFIG_FILE is set and the key is present and non-null → use config value + 2. Otherwise → use the default passed as the second argument +``` + +Only `run_dir` has an explicit post-resolution check for empty/null; all other +keys silently fall back to their defaults if absent from the config. This makes +the config contract forward-compatible: adding new keys to the CLI does not +break existing user configs. + +### Path normalization + +`run_dir` is resolved in two steps: +1. If relative, it is prepended with `INVOCATION_CWD` (the directory where the + CLI was invoked, not `REPO_ROOT`). +2. The trailing slash is stripped so that `run_dir + "/" + manifest_path` never + produces double slashes. + +All manifest paths are then resolved as `run_dir/manifest_path` and passed as +absolute paths to R scripts. + +--- + +## Dispatch and XML Patching + +### How dispatch modes work + +Each named mode under `manifest.pecan_dispatch` carries a `host_xml` block — +the complete `...` XML to inject into `template.xml`. + +When `use_apptainer` is set to `true` and the mode also defines `host_xml_apptainer`, that +variant is used instead. The `@SIF@` string substituted with the SIF filename +relative to `run_dir` (since dispatched jobs execute there). + +### `patch_dispatch()` (`magic-ensemble` lines 390–422) + +Called immediately after step 00 in `prepare`. Steps: +1. Resolve `template_path` as `run_dir + manifest.paths.template_file`. +2. Select `host_xml` or `host_xml_apptainer` based on `use_apptainer` and + manifest availability. +3. Substitute `@SIF@` via `sed`. +4. Call `tools/patch_xml.py` with `--block` to replace the entire `` element. + +### `tools/patch_xml.py` + +Regex-based in-place XML patcher. In `--block` mode it replaces the entire +`...` element (tags included). Limitations: assumes tags have no +attributes; single substitution only (first match). The tool is intentionally +minimal and workflow-agnostic. + +--- + +## Apptainer Integration + +When `use_apptainer: true`: + +1. `ensure_apptainer_available()` — tries `module load apptainer` if not on PATH. +2. `ensure_sif_present()` — looks for the SIF at `run_dir/`. If absent, + pulls from `manifest.apptainer.remote.url/container.name:tag`. The SIF always + lives in `run_dir` so it is co-located with the run for reproducibility. +3. R library pre-checks run inside the container (`check_r_libs_for_step_in_apptainer`). +4. Each R step is wrapped: `apptainer run --bind REPO_ROOT --bind run_dir`. + +`run-ensembles` always executes `04_run_model.R` on the host (it submits jobs; +it does not run model code itself). When `use_apptainer: true`, the SIF must +be present because the patched `host_xml_apptainer` references it in the +`` or `` command that PEcAn generates for each ensemble member. + +--- + +## External Inputs Staging (`00_stage_external_inputs.sh`) + +The script accepts `--repo-root`, `--config`, `--invocation-cwd`, and +optionally `--manifest`. Manifest defaults to +`/2a_grass/workflow_manifest.yaml`. + +For each entry in `config.external_paths`: +1. Key must exist under `manifest.paths`; if not, the script exits with an error. +2. Source path is resolved: absolute as-is, relative paths prepended with + `INVOCATION_CWD`. +3. Destination is `run_dir/$(basename manifest.paths.)` — manifest-derived, + not source-derived. +4. Parent directories are created if needed; file is copied with `cp -f`. + +This staging runs before `patch_dispatch()`, so `template.xml` is guaranteed +to be present when the XML patching step fires. + +--- + +## Adapting to a New Workflow + +The CLI skeleton (`magic-ensemble`) and the staging/dispatch infrastructure are +designed to be reused. When adapting: + +### Replace in the manifest + +- `steps`: update script paths and input/output path keys for the new workflow +- `paths`: replace with the new workflow's internal file layout +- `params_from_pft`, `additional_params`: workflow-specific fixed values +- `s3`: update bucket, key prefixes, and filenames +- `pecan_dispatch`: keep as-is if PEcAn dispatch is reused; otherwise replace +- `apptainer`: update container name and tag + +### Replace the step scripts + +Each script under `steps` should accept its inputs as named CLI arguments (R +scripts via `optparse`; shell scripts via `--flag value`). The CLI passes all +paths as absolute values so scripts do not need to be CWD-aware. + +### Keep in `magic-ensemble` + +- Argument parsing, `get_val()`, path normalization +- `check_aws` (for any command that fetches from S3) +- `ensure_apptainer_available`, `ensure_sif_present`, `check_r_libs_for_step*` +- `run_script`, `run_shell_script`, `patch_dispatch` + +### Update in `magic-ensemble` + +- The argument mappings in `run_prepare()` (the `case "$i"` block) — these are + the per-step CLI arguments passed to each R script and are workflow-specific. +- `usage()` — update command descriptions and examples. +- The manifest path constant (`MANIFEST=`) if the new workflow lives in a + different subdirectory. + +--- + +## Testing + +_(Placeholder — expand on this.)_ + +Proposed tiers: +- **Unit (bats/shunit2):** `get_val()` fallback behavior, `patch_dispatch()` XML + output, path normalization and `external_paths` destination derivation using + fixture configs and manifests. +- **Integration:** End-to-end `prepare` against a minimal fixture that exercises + the full step sequence without live R script execution (mock scripts that + assert their arguments and touch their expected outputs). diff --git a/magic-ensemble-README.md b/magic-ensemble-README.md new file mode 100644 index 0000000..7d9cb19 --- /dev/null +++ b/magic-ensemble-README.md @@ -0,0 +1,173 @@ +# magic-ensemble CLI + +`magic-ensemble` is a command-line interface for running the 2a_grass statewide +grassland carbon flux workflow. It fetches or stages input data, builds initial +conditions and model settings, and dispatches ensemble runs locally or via Slurm. + +--- + +## Prerequisites + +| Tool | Notes | +|---|---| +| `yq` | mikefarah/yq v4 (jq-style). Other `yq` implementations are not supported. | +| `aws` | AWS CLI v2; required only for `get-demo-data`. | +| `Rscript` | With packages listed per step (see *Commands* below). | +| `python3` | Required for `prepare` (patches template.xml). | +| `apptainer` | Required only when `use_apptainer: true` in your config. | + +--- + +## Quick Start + +```bash +# 1. Copy and edit the example config +cp 2a_grass/example_user_config.yaml my_config.yaml +$EDITOR my_config.yaml # at minimum, set run_dir + +# 2. Fetch demo data (skip if you have your own inputs — see "Supplying Your Own Data") +./magic-ensemble get-demo-data --config my_config.yaml + +# 3. Prepare: stage inputs, build climate files, ICs, and settings XML +./magic-ensemble prepare --config my_config.yaml + +# 4. Run the ensemble +./magic-ensemble run-ensembles --config my_config.yaml +``` + +Add `--verbose` to any command to echo the exact shell and Rscript calls as +they execute. + +--- + +## Configuration + +Copy `2a_grass/example_user_config.yaml` as a starting point. All keys except +`run_dir` are optional and fall back to the defaults shown below. + +| Key | Default | Description | +|---|---|---| +| `run_dir` | **required** | Directory for all run outputs. Relative paths are resolved from the directory where you invoke `./magic-ensemble`. | +| `start_date` | `2016-01-01` | Run start date (YYYY-MM-DD). | +| `end_date` | `2023-12-31` | Run end date (YYYY-MM-DD). | +| `run_LAI_date` | `2016-07-01` | Date used for LAI lookup during IC build. | +| `n_ens` | `20` | Number of parameter ensemble members. | +| `n_met` | `10` | Number of meteorology ensemble members. | +| `ic_ensemble_size` | `100` | IC ensemble draw size. | +| `n_workers` | `1` | Parallel workers for the ERA5 conversion step. | +| `use_apptainer` | `false` | Run R steps inside the workflow Apptainer container. | +| `pecan_dispatch` | _(none)_ | Dispatch mode for `run-ensembles`. Required for `prepare` and `run-ensembles`. | +| `external_paths` | _(none)_ | User-provided input files to stage into `run_dir` before `prepare` runs (see below). | + +Fixed internal paths, S3 coordinates, dispatch XML, and Apptainer image +details are defined in `2a_grass/workflow_manifest.yaml` and are not set in +user configs. + +--- + +## Commands + +### `get-demo-data` + +Downloads demo input data from S3 and creates the run directory. Use this if +you do not have your own ERA5, IC, or site data. + +**Requires:** `aws` CLI; S3 credentials for the CCMMF bucket. + +**Produces:** ERA5 NetCDF files, IC files, and site info CSV inside `run_dir`. + +### `prepare` + +Runs four steps in sequence: + +| Step | Script | R packages | +|---|---|---| +| 00 | Stage external inputs; create run directory | — | +| 01 | Convert ERA5 NetCDF to SIPNET climate format | `future`, `furrr` | +| 02 | Build initial condition ensemble | `tidyverse` | +| 03 | Build PEcAn settings XML | `PEcAn.settings` | + +After step 00, `template.xml` is patched with the `` dispatch block +selected by `pecan_dispatch` (and the Apptainer SIF path when applicable). + +**Requires:** `pecan_dispatch` set in config; `python3` on PATH. + +**Produces:** `settings.xml` in `run_dir`, ready for `run-ensembles`. + +### `run-ensembles` + +Runs `04_run_model.R` using the `settings.xml` produced by `prepare`. The R +script runs on the host and dispatches ensemble members to workers (local or +Slurm) as configured in the patched `settings.xml`. + +**Requires:** `PEcAn.all` R package; `settings.xml` present in `run_dir`. + +--- + +## Dispatch Options + +Set `pecan_dispatch` in your config to one of the following: + +| Value | Description | +|---|---| +| `local-gnu-parallel` | Runs ensemble members locally using GNU parallel. No cluster required. | +| `slurm-dispatch` | Submits ensemble members as Slurm batch jobs via `sbatch`. | + +The corresponding `` XML block is injected into `template.xml` during +`prepare` step 00. + +--- + +## Using Apptainer + +Set `use_apptainer: true` in your config to run the R steps inside the +workflow container. The CLI will: + +1. Attempt `module load apptainer` if `apptainer` is not already on PATH. +2. Look for the SIF file in `run_dir`. If absent, pull it from the registry + defined in `workflow_manifest.yaml`. +3. Bind `run_dir` and the repo root into the container for each R step. + +`run-ensembles` always runs `04_run_model.R` on the host, but when +`use_apptainer: true` the SIF must be present in `run_dir` because dispatched +job scripts reference it directly. + +--- + +## Supplying Your Own Data + +If you have your own ERA5, site, or template files, skip `get-demo-data` and +use `external_paths` in your config to inject them: + +```yaml +external_paths: + template_file: /path/to/my-template.xml +``` + +Each key must match a key under `paths` in `workflow_manifest.yaml`. The file +is copied into `run_dir` at the location the workflow expects, before `prepare` +runs. Paths may be absolute or relative to the directory where you invoke +`./magic-ensemble`. + +--- + +## Troubleshooting + +**`yq` not found or manifest parse fails** +Install mikefarah/yq v4. The `yq` distributed with some Linux package managers +is a different tool and is not compatible. + +**`run_dir is required`** +Your config file is missing `run_dir`. This is the only key with no default. + +**`Unknown pecan_dispatch value`** +The value of `pecan_dispatch` in your config does not match any key in +`workflow_manifest.yaml`. Valid options are printed when this error occurs. + +**`staged template.xml not found`** +`prepare` could not find `template.xml` in `run_dir`. Either run +`get-demo-data` first, or supply `external_paths.template_file` in your config. + +**`apptainer` not available** +Run `module load apptainer` before invoking the CLI, or ensure `apptainer` is +on your PATH. Singularity is not supported. From 2af7a276a20b4294bd31deb140c37be7ca08eca4 Mon Sep 17 00:00:00 2001 From: Henry Priest Date: Tue, 7 Apr 2026 17:04:42 +0000 Subject: [PATCH 7/7] Generalize patch_dispatch into patch_xml_block and add SIPNET model XML Replaces the host-only `patch_dispatch()` function with a generic `patch_xml_block()` that accepts an XML tag name and yq paths for both plain and Apptainer variants. Uses this to patch both the `` block (dispatch config) and the new `` block (SIPNET binary path) in a single prepare pass. Adds `sipnet_model.model_xml` and `sipnet_model.model_xml_apptainer` to the workflow manifest, selecting the Apptainer variant (absolute binary path inside the container) when `use_apptainer=true`. Updates developer docs to reflect the new calling convention and extensibility pattern. --- 2a_grass/workflow_manifest.yaml | 23 +++++++++++++++++ magic-ensemble | 45 ++++++++++++++++++++------------- magic-ensemble-DEVELOPERS.md | 38 ++++++++++++++++++---------- 3 files changed, 75 insertions(+), 31 deletions(-) diff --git a/2a_grass/workflow_manifest.yaml b/2a_grass/workflow_manifest.yaml index d703f96..4b46bff 100644 --- a/2a_grass/workflow_manifest.yaml +++ b/2a_grass/workflow_manifest.yaml @@ -144,6 +144,29 @@ pecan_dispatch: output/run +# SIPNET model XML block injected into template.xml during prepare. +# model_xml is used for host-native runs (sipnet.git must be on PATH). +# model_xml_apptainer is used when use_apptainer=true (binary path is inside container). +sipnet_model: + model_xml: | + + 99000000003 + SIPNET + git + TRUE + sipnet.git + cp data/events.in @RUNDIR@ + + model_xml_apptainer: | + + 99000000003 + SIPNET + git + TRUE + /usr/local/bin/sipnet.git + cp data/events.in @RUNDIR@ + + # Apptainer (not in user config) apptainer: remote: diff --git a/magic-ensemble b/magic-ensemble index 5a20008..b516e7b 100755 --- a/magic-ensemble +++ b/magic-ensemble @@ -388,40 +388,44 @@ validate_pecan_dispatch() { fi } -# --- Patch ... block in staged template.xml with chosen dispatch XML --- -# Selects host_xml_apptainer when use_apptainer=1 (with @SIF@ substituted); falls back to host_xml. -patch_dispatch() { +# --- Patch a named XML block in the staged template.xml from manifest values --- +# Usage: patch_xml_block +# +# Reads the XML block from the manifest at . If use_apptainer=1 +# and resolves to a non-null value in the manifest, uses +# that instead. Always runs @SIF@ substitution (no-op when @SIF@ is absent). +patch_xml_block() { + local xml_tag="$1" plain_yq_path="$2" apptainer_yq_path="$3" + if ! command -v python3 &>/dev/null; then - echo "magic-ensemble: python3 is required to patch dispatch in template.xml." >&2 + echo "magic-ensemble: python3 is required to patch <${xml_tag}> in template.xml." >&2 exit 1 fi + local template_path="${run_dir}/$(yq eval '.paths.template_file' "$MANIFEST")" if [[ ! -f "$template_path" ]]; then echo "magic-ensemble: staged template.xml not found at $template_path" >&2 exit 1 fi - # Select apptainer variant when available and requested; otherwise plain host_xml. - local host_xml_key="host_xml" + local yq_path="$plain_yq_path" if [[ $use_apptainer -eq 1 ]]; then - local has_apptainer_variant - has_apptainer_variant=$(yq eval ".pecan_dispatch[\"$pecan_dispatch\"] | has(\"host_xml_apptainer\")" "$MANIFEST") - if [[ "$has_apptainer_variant" == "true" ]]; then - host_xml_key="host_xml_apptainer" + local apptainer_val + apptainer_val=$(yq eval "$apptainer_yq_path" "$MANIFEST" 2>/dev/null) + if [[ -n "$apptainer_val" && "$apptainer_val" != "null" ]]; then + yq_path="$apptainer_yq_path" fi fi if [[ $VERBOSE -eq 1 ]]; then - echo "magic-ensemble: patching block in $template_path (pecan_dispatch=$pecan_dispatch, xml_key=$host_xml_key)" >&2 + echo "magic-ensemble: patching <${xml_tag}> block in $template_path (yq_path=${yq_path})" >&2 fi - local sif_name host_xml + local sif_name xml_block sif_name=$(yq eval '.apptainer.sif' "$MANIFEST") - # Substitute @SIF@ with the SIF filename (relative to run_dir, as jobs execute there). - host_xml=$(yq eval ".pecan_dispatch[\"$pecan_dispatch\"].$host_xml_key" "$MANIFEST" \ - | sed "s|@SIF@|./${sif_name}|g") + xml_block=$(yq eval "$yq_path" "$MANIFEST" | sed "s|@SIF@|./${sif_name}|g") - python3 "${REPO_ROOT}/tools/patch_xml.py" "$template_path" "host" "$host_xml" --block + python3 "${REPO_ROOT}/tools/patch_xml.py" "$template_path" "$xml_tag" "$xml_block" --block } # --- Get-demo-data: run steps from manifest (shell script only) --- @@ -461,7 +465,12 @@ run_prepare() { run_shell_script "$script" "$i" if [[ "$i" -eq 0 ]]; then echo "magic-ensemble: patching template.xml with dispatch: $pecan_dispatch" - patch_dispatch + patch_xml_block "host" \ + ".pecan_dispatch[\"$pecan_dispatch\"].host_xml" \ + ".pecan_dispatch[\"$pecan_dispatch\"].host_xml_apptainer" + patch_xml_block "model" \ + ".sipnet_model.model_xml" \ + ".sipnet_model.model_xml_apptainer" fi else case "$i" in @@ -505,7 +514,7 @@ run_prepare() { # --- Run-ensembles: run single step from manifest (04); never inside Apptainer. # When use_apptainer=1, the SIF must be present for dispatched jobs (already patched -# into template.xml via patch_dispatch during prepare); 04_run_model.R itself always +# into template.xml via patch_xml_block during prepare); 04_run_model.R itself always # runs on the host so it can submit further jobs to Slurm. run_run_ensembles() { get_steps_array diff --git a/magic-ensemble-DEVELOPERS.md b/magic-ensemble-DEVELOPERS.md index f388150..ab7493c 100644 --- a/magic-ensemble-DEVELOPERS.md +++ b/magic-ensemble-DEVELOPERS.md @@ -45,10 +45,12 @@ As written, a user can only inject files that are expected by the pipeline. 00_stage_external_inputs.sh → creates run_dir → copies external_paths files into run_dir (manifest-defined destinations) - → [patch_dispatch() runs after this step] - → reads pecan_dispatch host_xml from manifest - → substitutes @SIF@ if use_apptainer is set - → patches block in run_dir/template.xml via tools/patch_xml.py + → [patch_xml_block() runs twice after this step] + → patches block: reads pecan_dispatch host_xml from manifest, + substitutes @SIF@ if use_apptainer is set + → patches block: reads sipnet_model model_xml from manifest, + selects model_xml_apptainer variant if use_apptainer is set + → both use tools/patch_xml.py --block 01_ERA5_nc_to_clim.R reads: run_dir/data_raw/ERA5_nc, run_dir/site_info.csv @@ -152,14 +154,24 @@ When `use_apptainer` is set to `true` and the mode also defines `host_xml_apptai variant is used instead. The `@SIF@` string substituted with the SIF filename relative to `run_dir` (since dispatched jobs execute there). -### `patch_dispatch()` (`magic-ensemble` lines 390–422) +### `patch_xml_block()` (`magic-ensemble`) -Called immediately after step 00 in `prepare`. Steps: +Generic XML block patcher called immediately after step 00 in `prepare`. + +``` +patch_xml_block +``` + +Steps: 1. Resolve `template_path` as `run_dir + manifest.paths.template_file`. -2. Select `host_xml` or `host_xml_apptainer` based on `use_apptainer` and - manifest availability. -3. Substitute `@SIF@` via `sed`. -4. Call `tools/patch_xml.py` with `--block` to replace the entire `` element. +2. If `use_apptainer=1` and `` resolves to a non-null value + in the manifest, use it; otherwise use ``. +3. Substitute `@SIF@` via `sed` (no-op when `@SIF@` is absent from the block). +4. Call `tools/patch_xml.py` with `--block` to replace the entire element. + +Called twice in `run_prepare()`: once for `` (dispatch XML) and once for +`` (SIPNET binary path). Adding a new patched block requires only one +more `patch_xml_block` call with the appropriate manifest yq paths. ### `tools/patch_xml.py` @@ -202,7 +214,7 @@ For each entry in `config.external_paths`: not source-derived. 4. Parent directories are created if needed; file is copied with `cp -f`. -This staging runs before `patch_dispatch()`, so `template.xml` is guaranteed +This staging runs before `patch_xml_block()`, so `template.xml` is guaranteed to be present when the XML patching step fires. --- @@ -232,7 +244,7 @@ paths as absolute values so scripts do not need to be CWD-aware. - Argument parsing, `get_val()`, path normalization - `check_aws` (for any command that fetches from S3) - `ensure_apptainer_available`, `ensure_sif_present`, `check_r_libs_for_step*` -- `run_script`, `run_shell_script`, `patch_dispatch` +- `run_script`, `run_shell_script`, `patch_xml_block` ### Update in `magic-ensemble` @@ -249,7 +261,7 @@ paths as absolute values so scripts do not need to be CWD-aware. _(Placeholder — expand on this.)_ Proposed tiers: -- **Unit (bats/shunit2):** `get_val()` fallback behavior, `patch_dispatch()` XML +- **Unit (bats/shunit2):** `get_val()` fallback behavior, `patch_xml_block()` XML output, path normalization and `external_paths` destination derivation using fixture configs and manifests. - **Integration:** End-to-end `prepare` against a minimal fixture that exercises