From 51c0adef4e244f8aaae05ff71602410556a290ac Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Thu, 9 Apr 2026 20:50:45 +0200 Subject: [PATCH 01/21] config file --- src/simtools/applications/run_application.py | 5 ++--- src/simtools/runners/simtools_runner.py | 2 +- tests/unit_tests/runners/test_simtools_runner.py | 8 ++++---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/simtools/applications/run_application.py b/src/simtools/applications/run_application.py index 3023b1d70a..f3706a6455 100644 --- a/src/simtools/applications/run_application.py +++ b/src/simtools/applications/run_application.py @@ -20,8 +20,8 @@ For simplified configuration, a placeholder called ``__SETTING_WORKFLOW__`` can be used in the configuration file. This placeholder will be replaced with the directory below ``input`` -(example: configuration file is in ``input/LSTN-design/num_gains/20250214T134800/config.yml``, -then the placeholder will be replaced with ``LSTN-design/num_gains/20250214T134800``). +(example: configuration file is in ``input/LSTN-design/num_gains/v2.0.0/config.yml``, +then the placeholder will be replaced with ``LSTN-design/num_gains/v2.0.0``). This will also be the directory for any output generated by the application. Run time environments can be defined in the configuration file using the ``runtime_environment`` @@ -70,7 +70,6 @@ def _add_arguments(parser): """Register application-specific command line arguments.""" parser.add_argument( "--config_file", - dest="configuration_file", help="Application configuration.", type=str, required=True, diff --git a/src/simtools/runners/simtools_runner.py b/src/simtools/runners/simtools_runner.py index dead33bf0b..5037c8c886 100644 --- a/src/simtools/runners/simtools_runner.py +++ b/src/simtools/runners/simtools_runner.py @@ -21,7 +21,7 @@ def run_applications(args_dict, logger): Logger for logging application output. """ configurations, runtime_environment, log_file = _read_application_configuration( - args_dict["configuration_file"], args_dict.get("steps"), logger + args_dict["config_file"], args_dict.get("steps"), logger ) run_time = ( read_runtime_environment(runtime_environment) diff --git a/tests/unit_tests/runners/test_simtools_runner.py b/tests/unit_tests/runners/test_simtools_runner.py index 6db5b9d15e..f36cf24c44 100644 --- a/tests/unit_tests/runners/test_simtools_runner.py +++ b/tests/unit_tests/runners/test_simtools_runner.py @@ -175,7 +175,7 @@ def test_run_applications_runs_and_logs(monkeypatch, tmp_test_directory): # Prepare mocks mock_logger = mock.Mock() mock_args_dict = { - "configuration_file": "dummy_config.yml", + "config_file": "dummy_config.yml", "steps": None, "ignore_runtime_environment": False, } @@ -232,7 +232,7 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment def test_run_applications_handles_job_execution_exception(monkeypatch, tmp_test_directory): mock_logger = mock.Mock() mock_args_dict = { - "configuration_file": "dummy_config.yml", + "config_file": "dummy_config.yml", "steps": None, "ignore_runtime_environment": False, } @@ -370,7 +370,7 @@ def test_run_applications_with_runtime_environment_ignored(monkeypatch, tmp_test """Test that runtime environment is ignored when ignore_runtime_environment is True.""" mock_logger = mock.Mock() mock_args_dict = { - "configuration_file": "dummy_config.yml", + "config_file": "dummy_config.yml", "steps": [1], "ignore_runtime_environment": True, } @@ -451,7 +451,7 @@ def test_run_applications_with_empty_configuration_list(monkeypatch, tmp_test_di """Test run_applications with empty configuration list.""" mock_logger = mock.Mock() mock_args_dict = { - "configuration_file": "empty_config.yml", + "config_file": "empty_config.yml", "steps": None, "ignore_runtime_environment": False, } From 4b27929b1e19063ce6058c45435f8d9a74c7390a Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Fri, 10 Apr 2026 11:05:49 +0200 Subject: [PATCH 02/21] introduce uuid7 --- environment.yml | 3 ++- pyproject.toml | 1 + src/simtools/applications/db_add_file_to_db.py | 3 +-- .../applications/db_add_value_from_json_to_db.py | 3 +-- src/simtools/configuration/configurator.py | 3 +-- src/simtools/data_model/metadata_collector.py | 3 +-- src/simtools/utils/general.py | 13 +++++++++++++ 7 files changed, 20 insertions(+), 9 deletions(-) diff --git a/environment.yml b/environment.yml index 0142d833bf..6bba068d35 100644 --- a/environment.yml +++ b/environment.yml @@ -35,6 +35,7 @@ dependencies: - scipy - sphinx - sphinx-design + - uuid6 # temporary dependency; not needed for python >= 3.14 - towncrier - toml - pip: @@ -46,4 +47,4 @@ dependencies: # create: conda env create -f environment.yml # activate: conda activate simtools-dev # update (conda/mamba): conda env update -f environment.yml --prune -# update (micromamba): micromamba update -f environment.yml +# update (micromamba): micromamba env update -f environment.yml -n simtools-dev diff --git a/pyproject.toml b/pyproject.toml index 785d1bbdd5..133bba94fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dependencies = [ "pyyaml", "scipy", "toml", + "uuid6", # temporary dependency; not needed for python >= 3.14 ] optional-dependencies.dev = [ "pre-commit", diff --git a/src/simtools/applications/db_add_file_to_db.py b/src/simtools/applications/db_add_file_to_db.py index 40f9ab0177..d265c273fe 100644 --- a/src/simtools/applications/db_add_file_to_db.py +++ b/src/simtools/applications/db_add_file_to_db.py @@ -37,7 +37,6 @@ """ -import uuid from pathlib import Path import simtools.utils.general as gen @@ -134,7 +133,7 @@ def confirm_and_insert_files(files_to_insert, args_dict, db, logger): plural = "" if len(files_to_insert) == 1 else "s" if args_dict.get("test_db", False): - args_dict["db"] = args_dict["db"] + str(uuid.uuid4()) + args_dict["db"] = args_dict["db"] + gen.uuid() logger.info(f"Using test database: {args_dict['db']}") print(f"Should the following file{plural} be inserted to the {args_dict['db']} DB?:\n") diff --git a/src/simtools/applications/db_add_value_from_json_to_db.py b/src/simtools/applications/db_add_value_from_json_to_db.py index 5bcb0cdb05..e237bfc992 100644 --- a/src/simtools/applications/db_add_value_from_json_to_db.py +++ b/src/simtools/applications/db_add_value_from_json_to_db.py @@ -25,7 +25,6 @@ """ -import uuid from pathlib import Path import simtools.utils.general as gen @@ -58,7 +57,7 @@ def main(): app_context = build_application(initialization_kwargs={"db_config": True}) if app_context.args.get("test_db", False): - app_context.db_config["db_simulation_model_version"] = str(uuid.uuid4()) + app_context.db_config["db_simulation_model_version"] = gen.uuid() app_context.logger.info( f"Using test database version {app_context.db_config['db_simulation_model_version']}" ) diff --git a/src/simtools/configuration/configurator.py b/src/simtools/configuration/configurator.py index a1cc8a15af..41032301f5 100644 --- a/src/simtools/configuration/configurator.py +++ b/src/simtools/configuration/configurator.py @@ -3,7 +3,6 @@ import argparse import logging import sys -import uuid import astropy.units as u @@ -149,7 +148,7 @@ def initialize( self._fill_from_environmental_variables() if self.config.get("activity_id", None) is None: - self.config["activity_id"] = str(uuid.uuid4()) + self.config["activity_id"] = gen.uuid() if self.config["label"] is None: self.config["label"] = self.label self._initialize_model_versions() diff --git a/src/simtools/data_model/metadata_collector.py b/src/simtools/data_model/metadata_collector.py index 81bddf3c12..cffde0fea0 100644 --- a/src/simtools/data_model/metadata_collector.py +++ b/src/simtools/data_model/metadata_collector.py @@ -8,7 +8,6 @@ import getpass import logging -import uuid from pathlib import Path import simtools.utils.general as gen @@ -420,7 +419,7 @@ def _fill_product_meta(self, product_dict): self.schema_file = self.get_data_model_schema_file_name() self.schema_dict = self.get_data_model_schema_dict() - product_dict["id"] = str(uuid.uuid4()) + product_dict["id"] = gen.uuid() product_dict["creation_time"] = gen.now_date_time_in_isoformat() product_dict["description"] = self.schema_dict.get("description", None) diff --git a/src/simtools/utils/general.py b/src/simtools/utils/general.py index 64f11e002f..318560e027 100644 --- a/src/simtools/utils/general.py +++ b/src/simtools/utils/general.py @@ -13,6 +13,7 @@ import dotenv import numpy as np +import uuid6 _logger = logging.getLogger(__name__) @@ -969,3 +970,15 @@ def load_environment_variables(env_file=".env", env_list=None): cleaned_value = env_value.split("#")[0].strip().replace('"', "").replace("'", "") env_values[key.removeprefix("SIMTOOLS_").lower()] = cleaned_value return env_values + + +def uuid(): + """ + Generate a UUID (7) string. + + Returns + ------- + str + A UUID string. + """ + return str(uuid6.uuid7()) From 91e22e1656f89858acee97abd72eb4511f65023b Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Fri, 10 Apr 2026 13:05:15 +0200 Subject: [PATCH 03/21] add application run id --- src/simtools/application_control.py | 14 +++++++++----- src/simtools/settings.py | 3 ++- tests/unit_tests/test_application_control.py | 5 ++++- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/simtools/application_control.py b/src/simtools/application_control.py index caa0100652..f2582b6cae 100644 --- a/src/simtools/application_control.py +++ b/src/simtools/application_control.py @@ -5,7 +5,6 @@ import os import re from dataclasses import dataclass -from datetime import UTC, datetime from pathlib import Path import simtools.utils.general as gen @@ -59,7 +58,9 @@ def setup_logging(logger_name=None, log_level="INFO", log_file=None): log_file_path = Path(log_file) if log_file_path.parent: log_file_path.parent.mkdir(parents=True, exist_ok=True) - file_format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + file_format = logging.Formatter( + f"{config.application_id} - %(name)s - %(levelname)s - %(message)s" + ) file_handler = logging.FileHandler(log_file_path) file_handler.setFormatter(file_format) file_handler.addFilter(redact_filter) @@ -73,7 +74,7 @@ def get_log_file(args_dict): """ Get log file path. - Generate log file path if needed from application name and startup time. + Generate log file path if needed from application name and application ID. Returns ------- @@ -85,8 +86,7 @@ def get_log_file(args_dict): if args_dict.get("application_label") is None or args_dict.get("output_path") is None: return None - timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ") - log_file = f"{args_dict['application_label']}_{timestamp}.log" + log_file = f"{args_dict['application_label']}_{config.application_id}.log" Path(args_dict["output_path"]).mkdir(parents=True, exist_ok=True) return Path(args_dict["output_path"]) / log_file @@ -290,6 +290,10 @@ def main(): ) logger = setup_logging(logger_name, args_dict["log_level"], log_file=get_log_file(args_dict)) + logger.info( + f"simtools application {args_dict.get('application_label')}" + f" started with application ID {config.application_id}" + ) io_handler_instance = io_handler.IOHandler() if setup_io_handler else None diff --git a/src/simtools/settings.py b/src/simtools/settings.py index 64004c387a..bab5aeba60 100644 --- a/src/simtools/settings.py +++ b/src/simtools/settings.py @@ -5,7 +5,7 @@ from pathlib import Path from types import MappingProxyType -from simtools.utils.general import find_executable_in_dir +from simtools.utils.general import find_executable_in_dir, uuid class _Config: @@ -22,6 +22,7 @@ def __init__(self): self._corsika_exe = None self.user = os.getenv("USER", "unknown") self.hostname = socket.gethostname() + self.application_id = uuid() def load(self, args=None, db_config=None, resolve_sim_software_executables=True): """ diff --git a/tests/unit_tests/test_application_control.py b/tests/unit_tests/test_application_control.py index a3f9b03619..12da83bc79 100644 --- a/tests/unit_tests/test_application_control.py +++ b/tests/unit_tests/test_application_control.py @@ -18,6 +18,7 @@ setup_logging, startup_application, ) +from simtools.settings import config def _reset_stream(handler): @@ -690,7 +691,9 @@ def test_setup_logging_with_file_handler(tmp_path): assert len(file_handlers) > 0 assert file_handlers[0].baseFilename == str(log_file) assert log_file.exists() - assert "Test message" in log_file.read_text() + content = log_file.read_text() + assert "Test message" in content + assert config.application_id in content finally: for handler in list(logger.handlers): handler.close() From 88781ecdc0d4e5eeff1c128f2c6ac7299fbaac3e Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Fri, 10 Apr 2026 20:43:25 +0200 Subject: [PATCH 04/21] add activity ID and metadata --- .../configuration/commandline_parser.py | 6 + src/simtools/data_model/metadata_collector.py | 19 ++- src/simtools/runners/simtools_runner.py | 124 +++++++++++++++--- .../application_workflow.metaschema.yml | 3 + src/simtools/schemas/metadata.metaschema.yml | 70 ++++++++++ .../configuration/test_commandline_parser.py | 9 ++ .../runners/test_simtools_runner.py | 113 +++++++++++++++- 7 files changed, 316 insertions(+), 28 deletions(-) diff --git a/src/simtools/configuration/commandline_parser.py b/src/simtools/configuration/commandline_parser.py index 7f9367c0f5..c96482a35a 100644 --- a/src/simtools/configuration/commandline_parser.py +++ b/src/simtools/configuration/commandline_parser.py @@ -130,6 +130,12 @@ def initialize_output_arguments(self): def initialize_application_execution_arguments(self): """Initialize application execution arguments.""" _job_group = self.add_argument_group("execution") + _job_group.add_argument( + "--activity_id", + help="activity identifier", + type=str, + default=None, + ) _job_group.add_argument( "--test", help="test option for faster execution during development", diff --git a/src/simtools/data_model/metadata_collector.py b/src/simtools/data_model/metadata_collector.py index cffde0fea0..3a0370fdc2 100644 --- a/src/simtools/data_model/metadata_collector.py +++ b/src/simtools/data_model/metadata_collector.py @@ -8,6 +8,7 @@ import getpass import logging +from copy import deepcopy from pathlib import Path import simtools.utils.general as gen @@ -93,8 +94,8 @@ def get_top_level_metadata(self): """ try: - self.top_level_meta[self.observatory]["activity"]["end"] = ( - gen.now_date_time_in_isoformat() + self.top_level_meta[self.observatory]["activity"]["end"] = self.args_dict.get( + "activity_end", gen.now_date_time_in_isoformat() ) except KeyError: pass @@ -309,6 +310,10 @@ def _fill_context_meta(self, context_dict): except (KeyError, TypeError): self._logger.debug("No input product metadata appended to associated data.") + associated_activities = self.args_dict.get("associated_activities") + if associated_activities is not None and "associated_activities" in context_dict: + context_dict["associated_activities"] = deepcopy(associated_activities) + def _read_input_metadata_from_file(self, metadata_file_name_expression=None): """ Read and validate input metadata from file. @@ -501,10 +506,16 @@ def _fill_activity_meta(self, activity_dict): activity_dict["name"] = self.args_dict.get("label", None) activity_dict["type"] = "software" activity_dict["id"] = self.args_dict.get("activity_id", "UNDEFINED_ACTIVITY_ID") - activity_dict["start"] = gen.now_date_time_in_isoformat() - activity_dict["end"] = activity_dict["start"] + activity_dict["start"] = self.args_dict.get( + "activity_start", gen.now_date_time_in_isoformat() + ) + activity_dict["end"] = self.args_dict.get("activity_end", activity_dict["start"]) activity_dict["software"]["name"] = "simtools" activity_dict["software"]["version"] = simtools.version.__version__ + if "runtime_environment" in activity_dict: + activity_dict["runtime_environment"] = deepcopy( + self.args_dict.get("runtime_environment") + ) def _merge_config_dicts(self, dict_high, dict_low, add_new_fields=False): """ diff --git a/src/simtools/runners/simtools_runner.py b/src/simtools/runners/simtools_runner.py index 5037c8c886..1754afa340 100644 --- a/src/simtools/runners/simtools_runner.py +++ b/src/simtools/runners/simtools_runner.py @@ -1,10 +1,13 @@ """Tools for running applications in the simtools framework.""" import shutil +from copy import deepcopy +from datetime import UTC, datetime from pathlib import Path import simtools.utils.general as gen from simtools import dependencies +from simtools.data_model.metadata_collector import MetadataCollector from simtools.io import ascii_handler from simtools.job_execution import job_manager @@ -20,9 +23,25 @@ def run_applications(args_dict, logger): logger : logging.Logger Logger for logging application output. """ - configurations, runtime_environment, log_file = _read_application_configuration( - args_dict["config_file"], args_dict.get("steps"), logger + ( + configurations, + runtime_environment, + log_file, + workflow_activity_id, + ) = _read_application_configuration( + args_dict["config_file"], + args_dict.get("steps"), + logger, + args_dict.get("activity_id"), ) + workflow_start = datetime.now(UTC) + associated_activities = [] + runtime_environment_snapshot = deepcopy(runtime_environment) + workflow_site = _get_workflow_configuration_value(configurations, "site") + workflow_instrument = _get_workflow_configuration_value(configurations, "instrument") + if workflow_instrument is None: + workflow_instrument = _get_workflow_configuration_value(configurations, "telescope") + run_time = ( read_runtime_environment(runtime_environment) if not args_dict["ignore_runtime_environment"] @@ -32,25 +51,48 @@ def run_applications(args_dict, logger): with log_file.open("w", encoding="utf-8") as file: file.write("Running simtools applications\n") file.write(dependencies.get_version_string(run_time, include_software_versions=False)) - - for config in configurations: - app = config.get("application") - if not config.get("run_application"): - logger.info(f"Skipping application: {app}") - continue - logger.info(f"Running application: {app}") - result = job_manager.submit( - app, - out_file=None, - err_file=None, - configuration=config.get("configuration"), - runtime_environment=run_time, + try: + for config in configurations: + app = config.get("application") + if not config.get("run_application"): + logger.info(f"Skipping application: {app}") + continue + + app_configuration = config.get("configuration", {}) + app_activity_id = app_configuration.get("activity_id") or workflow_activity_id + app_configuration["activity_id"] = app_activity_id + associated_activities.append({"name": app, "activity_id": app_activity_id}) + + logger.info(f"Running application: {app}") + result = job_manager.submit( + app, + out_file=None, + err_file=None, + configuration=app_configuration, + runtime_environment=run_time, + ) + file.write("=" * 80 + "\n") + file.write( + f"Application: {app}\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}\n" + ) + finally: + workflow_end = datetime.now(UTC) + workflow_end = max(workflow_end, workflow_start) + _write_workflow_metadata( + args_dict=args_dict, + output_path=Path(log_file).parent, + workflow_activity_id=workflow_activity_id, + workflow_start=workflow_start, + workflow_end=workflow_end, + runtime_environment=runtime_environment_snapshot, + associated_activities=associated_activities, + workflow_site=workflow_site, + workflow_instrument=workflow_instrument, + logger=logger, ) - file.write("=" * 80 + "\n") - file.write(f"Application: {app}\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}\n") -def _read_application_configuration(configuration_file, steps, logger): +def _read_application_configuration(configuration_file, steps, logger, workflow_activity_id=None): """ Read application configuration from file and modify for setting workflows. @@ -78,10 +120,15 @@ def _read_application_configuration(configuration_file, steps, logger): Runtime environment configuration. Path Path to the log file. + str + Workflow activity id. """ job_configuration = ascii_handler.collect_data_from_file(configuration_file) configurations = job_configuration.get("applications") + workflow_activity_id = ( + job_configuration.get("activity_id") or workflow_activity_id or gen.uuid() + ) output_path, setting_workflow = _set_input_output_directories(configuration_file) logger.info(f"Setting workflow output path to {output_path}") for step_count, config in enumerate(configurations, start=1): @@ -92,15 +139,56 @@ def _read_application_configuration(configuration_file, steps, logger): output_path, setting_workflow, ) + if config["configuration"].get("activity_id") is None: + config["configuration"]["activity_id"] = workflow_activity_id configurations[step_count - 1] = config return ( configurations, job_configuration.get("runtime_environment"), output_path / "simtools.log", + workflow_activity_id, ) +def _write_workflow_metadata( + args_dict, + output_path, + workflow_activity_id, + workflow_start, + workflow_end, + runtime_environment, + associated_activities, + workflow_site, + workflow_instrument, + logger, +): + """Write workflow-level metadata with authoritative lifecycle timestamps.""" + metadata_args = dict(args_dict) + metadata_args["label"] = "setting_workflow" + metadata_args["activity_id"] = workflow_activity_id + metadata_args["activity_start"] = workflow_start.isoformat(timespec="seconds") + metadata_args["activity_end"] = workflow_end.isoformat(timespec="seconds") + metadata_args["runtime_environment"] = deepcopy(runtime_environment) + metadata_args["associated_activities"] = deepcopy(associated_activities) + metadata_args["site"] = workflow_site + metadata_args["instrument"] = workflow_instrument + metadata_args["output_file"] = str(Path(output_path) / "workflow_metadata.yml") + + collector = MetadataCollector(metadata_args, clean_meta=False) + metadata_file = collector.write(metadata_args["output_file"], add_activity_name=True) + logger.info(f"Writing workflow metadata to {metadata_file}") + + +def _get_workflow_configuration_value(configurations, key): + """Return first non-empty configuration value for a given key.""" + for config in configurations: + value = config.get("configuration", {}).get(key) + if value is not None: + return value + return None + + def _replace_placeholders_in_configuration( configuration, output_path, setting_workflow, place_holder="__SETTING_WORKFLOW__" ): diff --git a/src/simtools/schemas/application_workflow.metaschema.yml b/src/simtools/schemas/application_workflow.metaschema.yml index 223842e246..0178161106 100644 --- a/src/simtools/schemas/application_workflow.metaschema.yml +++ b/src/simtools/schemas/application_workflow.metaschema.yml @@ -25,6 +25,9 @@ definitions: schema_name: type: string description: "Name of the schema." + activity_id: + type: string + description: "Workflow activity UUID. If omitted, generated automatically." runtime_environment: "$ref": "#/definitions/runtime_environment" required: diff --git a/src/simtools/schemas/metadata.metaschema.yml b/src/simtools/schemas/metadata.metaschema.yml index 713fed2570..29e0252494 100644 --- a/src/simtools/schemas/metadata.metaschema.yml +++ b/src/simtools/schemas/metadata.metaschema.yml @@ -454,6 +454,14 @@ definitions: format: date-time - type: "null" default: null + runtime_environment: + description: |- + Runtime environment configuration used to execute this activity. + anyOf: + - type: "null" + - type: object + additionalProperties: true + default: null ############### context: title: Context @@ -571,6 +579,33 @@ definitions: type: array items: $ref: '#/definitions/cta/properties/product' + associated_activities: + title: Associated activities. + description: |- + Activities associated with this data product. + type: array + items: + type: object + additionalProperties: false + properties: + name: + description: |- + Name of the associated activity. + anyOf: + - type: string + - type: "null" + default: null + activity_id: + description: |- + Identifier of the associated activity. + anyOf: + - type: string + - type: "null" + - type: number + default: null + required: + - name + - activity_id ... --- $schema: http://json-schema.org/draft-06/schema# @@ -1028,6 +1063,14 @@ definitions: format: date-time - type: "null" default: null + RUNTIME_ENVIRONMENT: + description: |- + Runtime environment configuration used to execute this activity. + anyOf: + - type: "null" + - type: object + additionalProperties: true + default: null ############### CONTEXT: title: Context @@ -1145,3 +1188,30 @@ definitions: type: array items: $ref: '#/definitions/CTA/properties/PRODUCT' + ASSOCIATED_ACTIVITIES: + title: Associated activities. + description: |- + Activities associated with this data product. + type: array + items: + type: object + additionalProperties: false + properties: + NAME: + description: |- + Name of the associated activity. + anyOf: + - type: string + - type: "null" + default: null + ACTIVITY_ID: + description: |- + Identifier of the associated activity. + anyOf: + - type: string + - type: "null" + - type: number + default: null + required: + - NAME + - ACTIVITY_ID diff --git a/tests/unit_tests/configuration/test_commandline_parser.py b/tests/unit_tests/configuration/test_commandline_parser.py index adc0ba4bbb..0f55fb2f0a 100644 --- a/tests/unit_tests/configuration/test_commandline_parser.py +++ b/tests/unit_tests/configuration/test_commandline_parser.py @@ -250,6 +250,15 @@ def test_initialize_default_arguments(): assert "output" in [str(group.title) for group in job_groups] +def test_initialize_default_arguments_accepts_activity_id(): + parser_with_defaults = parser.CommandLineParser() + parser_with_defaults.initialize_default_arguments() + + args = parser_with_defaults.parse_args(["--activity_id", "my-test-activity-id"]) + + assert args.activity_id == "my-test-activity-id" + + def test_initialize_application_arguments(): app_parser = parser.CommandLineParser() app_parser.initialize_application_arguments( diff --git a/tests/unit_tests/runners/test_simtools_runner.py b/tests/unit_tests/runners/test_simtools_runner.py index f36cf24c44..575126944d 100644 --- a/tests/unit_tests/runners/test_simtools_runner.py +++ b/tests/unit_tests/runners/test_simtools_runner.py @@ -134,12 +134,16 @@ def test_read_application_configuration_selected_steps( lambda config, output_path, setting_workflow: {**config, "output_path": str(output_path)}, ) - configs, _, _ = simtools_runner._read_application_configuration( + configs, _, _, workflow_activity_id = simtools_runner._read_application_configuration( DUMMY_CONFIG_FILE, [2], mock_logger ) assert configs[0]["run_application"] is False assert configs[1]["run_application"] is True assert configs[2]["run_application"] is False + assert workflow_activity_id is not None + assert configs[0]["configuration"]["activity_id"] == workflow_activity_id + assert configs[1]["configuration"]["activity_id"] == workflow_activity_id + assert configs[2]["configuration"]["activity_id"] == workflow_activity_id def test_read_application_configuration_empty_applications( @@ -164,11 +168,12 @@ def test_read_application_configuration_empty_applications( lambda config, output_path, setting_workflow: config, ) - configs, _, log_file = simtools_runner._read_application_configuration( + configs, _, log_file, workflow_activity_id = simtools_runner._read_application_configuration( DUMMY_CONFIG_FILE, None, mock_logger ) assert configs == [] assert isinstance(log_file, Path) + assert workflow_activity_id is not None def test_run_applications_runs_and_logs(monkeypatch, tmp_test_directory): @@ -191,7 +196,11 @@ def test_run_applications_runs_and_logs(monkeypatch, tmp_test_directory): # Patch _read_application_configuration monkeypatch.setattr( "simtools.runners.simtools_runner._read_application_configuration", - mock.Mock(return_value=(mock_configurations, None, log_file_path)), + mock.Mock(return_value=(mock_configurations, None, log_file_path, "wf-activity-id")), + ) + workflow_meta_mock = mock.Mock() + monkeypatch.setattr( + "simtools.runners.simtools_runner._write_workflow_metadata", workflow_meta_mock ) # Patch dependencies.get_version_string @@ -227,6 +236,54 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment mock_logger.info.assert_any_call("Skipping application: app2") mock_logger.info.assert_any_call("Running application: app3") version_string_mock.assert_called_once_with([], include_software_versions=False) + workflow_meta_mock.assert_called_once() + + associated = workflow_meta_mock.call_args.kwargs["associated_activities"] + assert associated == [ + {"name": "app1", "activity_id": "wf-activity-id"}, + {"name": "app3", "activity_id": "wf-activity-id"}, + ] + assert workflow_meta_mock.call_args.kwargs["workflow_site"] is None + assert workflow_meta_mock.call_args.kwargs["workflow_instrument"] is None + + +def test_run_applications_passes_workflow_instrument_context(monkeypatch, tmp_test_directory): + mock_logger = mock.Mock() + mock_args_dict = { + "config_file": "dummy_config.yml", + "steps": None, + "ignore_runtime_environment": False, + } + mock_configurations = [ + { + "application": "app1", + "run_application": True, + "configuration": {"site": "North", "telescope": "LSTN-design"}, + }, + ] + log_file_path = tmp_test_directory / "simtools.log" + + monkeypatch.setattr( + "simtools.runners.simtools_runner._read_application_configuration", + mock.Mock(return_value=(mock_configurations, None, log_file_path, "wf-activity-id")), + ) + monkeypatch.setattr( + "simtools.dependencies.get_version_string", + mock.Mock(return_value="simtools version: 1.2.3\n"), + ) + monkeypatch.setattr( + "simtools.job_execution.job_manager.submit", + mock.Mock(return_value=mock.Mock(stdout="ok", stderr="")), + ) + workflow_meta_mock = mock.Mock() + monkeypatch.setattr( + "simtools.runners.simtools_runner._write_workflow_metadata", workflow_meta_mock + ) + + simtools_runner.run_applications(mock_args_dict, mock_logger) + + assert workflow_meta_mock.call_args.kwargs["workflow_site"] == "North" + assert workflow_meta_mock.call_args.kwargs["workflow_instrument"] == "LSTN-design" def test_run_applications_handles_job_execution_exception(monkeypatch, tmp_test_directory): @@ -244,7 +301,7 @@ def test_run_applications_handles_job_execution_exception(monkeypatch, tmp_test_ monkeypatch.setattr( "simtools.runners.simtools_runner._read_application_configuration", - mock.Mock(return_value=(mock_configurations, None, log_file_path)), + mock.Mock(return_value=(mock_configurations, None, log_file_path, "wf-activity-id")), ) monkeypatch.setattr( "simtools.dependencies.get_version_string", @@ -255,6 +312,7 @@ def mock_submit_failure(app, out_file, err_file, configuration=None, runtime_env raise JobExecutionError("Job failed") monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit_failure) + monkeypatch.setattr("simtools.runners.simtools_runner._write_workflow_metadata", mock.Mock()) with pytest.raises(JobExecutionError): simtools_runner.run_applications(mock_args_dict, mock_logger) @@ -383,7 +441,9 @@ def test_run_applications_with_runtime_environment_ignored(monkeypatch, tmp_test monkeypatch.setattr( "simtools.runners.simtools_runner._read_application_configuration", - mock.Mock(return_value=(mock_configurations, runtime_environment, log_file_path)), + mock.Mock( + return_value=(mock_configurations, runtime_environment, log_file_path, "wf-activity-id") + ), ) monkeypatch.setattr( "simtools.dependencies.get_version_string", @@ -399,6 +459,7 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment return result_mock monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit) + monkeypatch.setattr("simtools.runners.simtools_runner._write_workflow_metadata", mock.Mock()) simtools_runner.run_applications(mock_args_dict, mock_logger) @@ -460,7 +521,7 @@ def test_run_applications_with_empty_configuration_list(monkeypatch, tmp_test_di monkeypatch.setattr( "simtools.runners.simtools_runner._read_application_configuration", - mock.Mock(return_value=([], None, log_file_path)), + mock.Mock(return_value=([], None, log_file_path, "wf-activity-id")), ) monkeypatch.setattr( "simtools.dependencies.get_version_string", @@ -470,6 +531,7 @@ def test_run_applications_with_empty_configuration_list(monkeypatch, tmp_test_di # Should not call job_manager.submit at all mock_submit = mock.Mock() monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit) + monkeypatch.setattr("simtools.runners.simtools_runner._write_workflow_metadata", mock.Mock()) simtools_runner.run_applications(mock_args_dict, mock_logger) @@ -516,3 +578,42 @@ def test_pull_image_raises_if_pull_fails(monkeypatch): with pytest.raises(RuntimeError, match="Failed to pull image"): simtools_runner._pull_image("podman", image) + + +def test_write_workflow_metadata_uses_uncleaned_metadata(monkeypatch): + mock_logger = mock.Mock() + mock_collector = mock.Mock() + mock_collector.write.return_value = "output/workflow_metadata.setting_workflow.meta.yml" + metadata_collector_cls = mock.Mock(return_value=mock_collector) + monkeypatch.setattr( + "simtools.runners.simtools_runner.MetadataCollector", metadata_collector_cls + ) + + simtools_runner._write_workflow_metadata( + args_dict={"config_file": "dummy.yml"}, + output_path=Path("output/test_workflow"), + workflow_activity_id="wf-id", + workflow_start=mock.Mock(isoformat=mock.Mock(return_value="2026-01-01T00:00:00+00:00")), + workflow_end=mock.Mock(isoformat=mock.Mock(return_value="2026-01-01T00:00:01+00:00")), + runtime_environment={"image": "test-image"}, + associated_activities=[{"name": "app", "activity_id": "wf-id"}], + workflow_site="North", + workflow_instrument="LSTN-design", + logger=mock_logger, + ) + + metadata_collector_cls.assert_called_once() + assert metadata_collector_cls.call_args.args[0]["activity_id"] == "wf-id" + assert metadata_collector_cls.call_args.args[0]["site"] == "North" + assert metadata_collector_cls.call_args.args[0]["instrument"] == "LSTN-design" + assert metadata_collector_cls.call_args.kwargs["clean_meta"] is False + mock_collector.write.assert_called_once() + + +def test_get_workflow_configuration_value(): + configurations = [ + {"configuration": {"site": None}}, + {"configuration": {"site": "North"}}, + ] + assert simtools_runner._get_workflow_configuration_value(configurations, "site") == "North" + assert simtools_runner._get_workflow_configuration_value(configurations, "instrument") is None From ceb8f31cdfa16677ff2fefda3c05c771bdb18c17 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sat, 11 Apr 2026 11:17:40 +0200 Subject: [PATCH 05/21] merge metadata to parameter metadata --- src/simtools/application_control.py | 6 +- src/simtools/runners/simtools_runner.py | 88 +++++++++-- src/simtools/settings.py | 2 +- .../runners/test_simtools_runner.py | 149 ++++++++++++++---- tests/unit_tests/test_application_control.py | 2 +- 5 files changed, 198 insertions(+), 49 deletions(-) diff --git a/src/simtools/application_control.py b/src/simtools/application_control.py index f2582b6cae..09f8c53a07 100644 --- a/src/simtools/application_control.py +++ b/src/simtools/application_control.py @@ -59,7 +59,7 @@ def setup_logging(logger_name=None, log_level="INFO", log_file=None): if log_file_path.parent: log_file_path.parent.mkdir(parents=True, exist_ok=True) file_format = logging.Formatter( - f"{config.application_id} - %(name)s - %(levelname)s - %(message)s" + f"{config.activity_id} - %(name)s - %(levelname)s - %(message)s" ) file_handler = logging.FileHandler(log_file_path) file_handler.setFormatter(file_format) @@ -86,7 +86,7 @@ def get_log_file(args_dict): if args_dict.get("application_label") is None or args_dict.get("output_path") is None: return None - log_file = f"{args_dict['application_label']}_{config.application_id}.log" + log_file = f"{args_dict['application_label']}_{config.activity_id}.log" Path(args_dict["output_path"]).mkdir(parents=True, exist_ok=True) return Path(args_dict["output_path"]) / log_file @@ -292,7 +292,7 @@ def main(): logger = setup_logging(logger_name, args_dict["log_level"], log_file=get_log_file(args_dict)) logger.info( f"simtools application {args_dict.get('application_label')}" - f" started with application ID {config.application_id}" + f" started with application ID {config.activity_id}" ) io_handler_instance = io_handler.IOHandler() if setup_io_handler else None diff --git a/src/simtools/runners/simtools_runner.py b/src/simtools/runners/simtools_runner.py index 1754afa340..bfbbe7a703 100644 --- a/src/simtools/runners/simtools_runner.py +++ b/src/simtools/runners/simtools_runner.py @@ -41,6 +41,7 @@ def run_applications(args_dict, logger): workflow_instrument = _get_workflow_configuration_value(configurations, "instrument") if workflow_instrument is None: workflow_instrument = _get_workflow_configuration_value(configurations, "telescope") + model_parameter_metadata_files = [] run_time = ( read_runtime_environment(runtime_environment) @@ -59,9 +60,12 @@ def run_applications(args_dict, logger): continue app_configuration = config.get("configuration", {}) - app_activity_id = app_configuration.get("activity_id") or workflow_activity_id + app_activity_id = app_configuration.get("activity_id") or gen.uuid() app_configuration["activity_id"] = app_activity_id associated_activities.append({"name": app, "activity_id": app_activity_id}) + metadata_file = _get_model_parameter_metadata_file(app, app_configuration) + if metadata_file is not None: + model_parameter_metadata_files.append(metadata_file) logger.info(f"Running application: {app}") result = job_manager.submit( @@ -78,16 +82,19 @@ def run_applications(args_dict, logger): finally: workflow_end = datetime.now(UTC) workflow_end = max(workflow_end, workflow_start) - _write_workflow_metadata( + workflow_metadata = _build_workflow_metadata( args_dict=args_dict, - output_path=Path(log_file).parent, workflow_activity_id=workflow_activity_id, workflow_start=workflow_start, workflow_end=workflow_end, runtime_environment=runtime_environment_snapshot, - associated_activities=associated_activities, workflow_site=workflow_site, workflow_instrument=workflow_instrument, + ) + _update_model_parameter_metadata_files( + model_parameter_metadata_files=model_parameter_metadata_files, + workflow_metadata=workflow_metadata, + associated_activities=associated_activities, logger=logger, ) @@ -140,7 +147,7 @@ def _read_application_configuration(configuration_file, steps, logger, workflow_ setting_workflow, ) if config["configuration"].get("activity_id") is None: - config["configuration"]["activity_id"] = workflow_activity_id + config["configuration"]["activity_id"] = gen.uuid() configurations[step_count - 1] = config return ( @@ -151,33 +158,86 @@ def _read_application_configuration(configuration_file, steps, logger, workflow_ ) -def _write_workflow_metadata( +def _build_workflow_metadata( args_dict, - output_path, workflow_activity_id, workflow_start, workflow_end, runtime_environment, - associated_activities, workflow_site, workflow_instrument, - logger, ): - """Write workflow-level metadata with authoritative lifecycle timestamps.""" + """Build workflow-level metadata dictionary with authoritative lifecycle timestamps.""" metadata_args = dict(args_dict) metadata_args["label"] = "setting_workflow" metadata_args["activity_id"] = workflow_activity_id metadata_args["activity_start"] = workflow_start.isoformat(timespec="seconds") metadata_args["activity_end"] = workflow_end.isoformat(timespec="seconds") metadata_args["runtime_environment"] = deepcopy(runtime_environment) - metadata_args["associated_activities"] = deepcopy(associated_activities) metadata_args["site"] = workflow_site metadata_args["instrument"] = workflow_instrument - metadata_args["output_file"] = str(Path(output_path) / "workflow_metadata.yml") collector = MetadataCollector(metadata_args, clean_meta=False) - metadata_file = collector.write(metadata_args["output_file"], add_activity_name=True) - logger.info(f"Writing workflow metadata to {metadata_file}") + return collector.get_top_level_metadata().get("cta", {}) + + +def _get_model_parameter_metadata_file(application, app_configuration): + """Return expected metadata file for model-parameter submission applications.""" + if application != "simtools-submit-model-parameter-from-external": + return None + + parameter = app_configuration.get("parameter") + parameter_version = app_configuration.get("parameter_version") + output_path = app_configuration.get("output_path") + if not parameter or not parameter_version or not output_path: + return None + + return Path(output_path) / parameter / f"{parameter}-{parameter_version}.meta.yml" + + +def _update_model_parameter_metadata_files( + model_parameter_metadata_files, + workflow_metadata, + associated_activities, + logger, +): + """Inject workflow metadata into model-parameter metadata files.""" + workflow_activity = deepcopy(workflow_metadata.get("activity", {})) + + for metadata_file in model_parameter_metadata_files: + metadata_path = Path(metadata_file) + if not metadata_path.exists(): + logger.debug(f"Model-parameter metadata file does not exist: {metadata_path}") + continue + + metadata = ascii_handler.collect_data_from_file(metadata_path) + metadata = gen.change_dict_keys_case(metadata, True) + cta_meta = metadata.get("cta", {}) + cta_meta["activity"] = deepcopy(workflow_activity) + + context = cta_meta.setdefault("context", {}) + context_associated = context.get("associated_activities") or [] + context["associated_activities"] = _merge_associated_activities( + context_associated, + associated_activities, + ) + + metadata["cta"] = cta_meta + ascii_handler.write_data_to_file(metadata, metadata_path) + logger.info(f"Updated workflow metadata in {metadata_path}") + + +def _merge_associated_activities(existing_activities, new_activities): + """Merge associated activities preserving order and uniqueness.""" + merged_activities = [] + seen = set() + for activity in [*existing_activities, *new_activities]: + key = (activity.get("name"), activity.get("activity_id")) + if key in seen: + continue + seen.add(key) + merged_activities.append(activity) + return merged_activities def _get_workflow_configuration_value(configurations, key): diff --git a/src/simtools/settings.py b/src/simtools/settings.py index bab5aeba60..40f6bb740d 100644 --- a/src/simtools/settings.py +++ b/src/simtools/settings.py @@ -22,7 +22,7 @@ def __init__(self): self._corsika_exe = None self.user = os.getenv("USER", "unknown") self.hostname = socket.gethostname() - self.application_id = uuid() + self.activity_id = uuid() def load(self, args=None, db_config=None, resolve_sim_software_executables=True): """ diff --git a/tests/unit_tests/runners/test_simtools_runner.py b/tests/unit_tests/runners/test_simtools_runner.py index 575126944d..672ea5d24e 100644 --- a/tests/unit_tests/runners/test_simtools_runner.py +++ b/tests/unit_tests/runners/test_simtools_runner.py @@ -5,6 +5,7 @@ from unittest import mock import pytest +import yaml from simtools.job_execution.job_manager import JobExecutionError from simtools.runners import simtools_runner @@ -141,9 +142,9 @@ def test_read_application_configuration_selected_steps( assert configs[1]["run_application"] is True assert configs[2]["run_application"] is False assert workflow_activity_id is not None - assert configs[0]["configuration"]["activity_id"] == workflow_activity_id - assert configs[1]["configuration"]["activity_id"] == workflow_activity_id - assert configs[2]["configuration"]["activity_id"] == workflow_activity_id + assert configs[0]["configuration"]["activity_id"] is not None + assert configs[1]["configuration"]["activity_id"] is not None + assert configs[2]["configuration"]["activity_id"] is not None def test_read_application_configuration_empty_applications( @@ -187,9 +188,21 @@ def test_run_applications_runs_and_logs(monkeypatch, tmp_test_directory): # Prepare configurations returned by _read_application_configuration mock_configurations = [ - {"application": "app1", "run_application": True, "configuration": {"key": "value1"}}, - {"application": "app2", "run_application": False, "configuration": {"key": "value2"}}, - {"application": "app3", "run_application": True, "configuration": {"key": "value3"}}, + { + "application": "app1", + "run_application": True, + "configuration": {"key": "value1", "activity_id": "cfg-id-1"}, + }, + { + "application": "app2", + "run_application": False, + "configuration": {"key": "value2", "activity_id": "cfg-id-2"}, + }, + { + "application": "app3", + "run_application": True, + "configuration": {"key": "value3", "activity_id": "cfg-id-3"}, + }, ] log_file_path = tmp_test_directory / "simtools.log" @@ -198,9 +211,14 @@ def test_run_applications_runs_and_logs(monkeypatch, tmp_test_directory): "simtools.runners.simtools_runner._read_application_configuration", mock.Mock(return_value=(mock_configurations, None, log_file_path, "wf-activity-id")), ) - workflow_meta_mock = mock.Mock() + workflow_build_mock = mock.Mock(return_value={"activity": {"id": "wf-activity-id"}}) + workflow_update_mock = mock.Mock() monkeypatch.setattr( - "simtools.runners.simtools_runner._write_workflow_metadata", workflow_meta_mock + "simtools.runners.simtools_runner._build_workflow_metadata", workflow_build_mock + ) + monkeypatch.setattr( + "simtools.runners.simtools_runner._update_model_parameter_metadata_files", + workflow_update_mock, ) # Patch dependencies.get_version_string @@ -236,15 +254,15 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment mock_logger.info.assert_any_call("Skipping application: app2") mock_logger.info.assert_any_call("Running application: app3") version_string_mock.assert_called_once_with([], include_software_versions=False) - workflow_meta_mock.assert_called_once() + workflow_build_mock.assert_called_once() + workflow_update_mock.assert_called_once() - associated = workflow_meta_mock.call_args.kwargs["associated_activities"] + associated = workflow_update_mock.call_args.kwargs["associated_activities"] assert associated == [ - {"name": "app1", "activity_id": "wf-activity-id"}, - {"name": "app3", "activity_id": "wf-activity-id"}, + {"name": "app1", "activity_id": "cfg-id-1"}, + {"name": "app3", "activity_id": "cfg-id-3"}, ] - assert workflow_meta_mock.call_args.kwargs["workflow_site"] is None - assert workflow_meta_mock.call_args.kwargs["workflow_instrument"] is None + assert workflow_update_mock.call_args.kwargs["model_parameter_metadata_files"] == [] def test_run_applications_passes_workflow_instrument_context(monkeypatch, tmp_test_directory): @@ -258,7 +276,11 @@ def test_run_applications_passes_workflow_instrument_context(monkeypatch, tmp_te { "application": "app1", "run_application": True, - "configuration": {"site": "North", "telescope": "LSTN-design"}, + "configuration": { + "site": "North", + "telescope": "LSTN-design", + "activity_id": "cfg-id-1", + }, }, ] log_file_path = tmp_test_directory / "simtools.log" @@ -275,15 +297,20 @@ def test_run_applications_passes_workflow_instrument_context(monkeypatch, tmp_te "simtools.job_execution.job_manager.submit", mock.Mock(return_value=mock.Mock(stdout="ok", stderr="")), ) - workflow_meta_mock = mock.Mock() + workflow_build_mock = mock.Mock(return_value={"activity": {"id": "wf-activity-id"}}) + workflow_update_mock = mock.Mock() + monkeypatch.setattr( + "simtools.runners.simtools_runner._build_workflow_metadata", workflow_build_mock + ) monkeypatch.setattr( - "simtools.runners.simtools_runner._write_workflow_metadata", workflow_meta_mock + "simtools.runners.simtools_runner._update_model_parameter_metadata_files", + workflow_update_mock, ) simtools_runner.run_applications(mock_args_dict, mock_logger) - assert workflow_meta_mock.call_args.kwargs["workflow_site"] == "North" - assert workflow_meta_mock.call_args.kwargs["workflow_instrument"] == "LSTN-design" + assert workflow_build_mock.call_args.kwargs["workflow_site"] == "North" + assert workflow_build_mock.call_args.kwargs["workflow_instrument"] == "LSTN-design" def test_run_applications_handles_job_execution_exception(monkeypatch, tmp_test_directory): @@ -312,7 +339,13 @@ def mock_submit_failure(app, out_file, err_file, configuration=None, runtime_env raise JobExecutionError("Job failed") monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit_failure) - monkeypatch.setattr("simtools.runners.simtools_runner._write_workflow_metadata", mock.Mock()) + monkeypatch.setattr( + "simtools.runners.simtools_runner._build_workflow_metadata", + mock.Mock(return_value={"activity": {"id": "wf-activity-id"}}), + ) + monkeypatch.setattr( + "simtools.runners.simtools_runner._update_model_parameter_metadata_files", mock.Mock() + ) with pytest.raises(JobExecutionError): simtools_runner.run_applications(mock_args_dict, mock_logger) @@ -459,7 +492,13 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment return result_mock monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit) - monkeypatch.setattr("simtools.runners.simtools_runner._write_workflow_metadata", mock.Mock()) + monkeypatch.setattr( + "simtools.runners.simtools_runner._build_workflow_metadata", + mock.Mock(return_value={"activity": {"id": "wf-activity-id"}}), + ) + monkeypatch.setattr( + "simtools.runners.simtools_runner._update_model_parameter_metadata_files", mock.Mock() + ) simtools_runner.run_applications(mock_args_dict, mock_logger) @@ -531,7 +570,13 @@ def test_run_applications_with_empty_configuration_list(monkeypatch, tmp_test_di # Should not call job_manager.submit at all mock_submit = mock.Mock() monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit) - monkeypatch.setattr("simtools.runners.simtools_runner._write_workflow_metadata", mock.Mock()) + monkeypatch.setattr( + "simtools.runners.simtools_runner._build_workflow_metadata", + mock.Mock(return_value={"activity": {"id": "wf-activity-id"}}), + ) + monkeypatch.setattr( + "simtools.runners.simtools_runner._update_model_parameter_metadata_files", mock.Mock() + ) simtools_runner.run_applications(mock_args_dict, mock_logger) @@ -580,26 +625,22 @@ def test_pull_image_raises_if_pull_fails(monkeypatch): simtools_runner._pull_image("podman", image) -def test_write_workflow_metadata_uses_uncleaned_metadata(monkeypatch): - mock_logger = mock.Mock() +def test_build_workflow_metadata_uses_uncleaned_metadata(monkeypatch): mock_collector = mock.Mock() - mock_collector.write.return_value = "output/workflow_metadata.setting_workflow.meta.yml" + mock_collector.get_top_level_metadata.return_value = {"cta": {"activity": {"id": "wf-id"}}} metadata_collector_cls = mock.Mock(return_value=mock_collector) monkeypatch.setattr( "simtools.runners.simtools_runner.MetadataCollector", metadata_collector_cls ) - simtools_runner._write_workflow_metadata( + metadata = simtools_runner._build_workflow_metadata( args_dict={"config_file": "dummy.yml"}, - output_path=Path("output/test_workflow"), workflow_activity_id="wf-id", workflow_start=mock.Mock(isoformat=mock.Mock(return_value="2026-01-01T00:00:00+00:00")), workflow_end=mock.Mock(isoformat=mock.Mock(return_value="2026-01-01T00:00:01+00:00")), runtime_environment={"image": "test-image"}, - associated_activities=[{"name": "app", "activity_id": "wf-id"}], workflow_site="North", workflow_instrument="LSTN-design", - logger=mock_logger, ) metadata_collector_cls.assert_called_once() @@ -607,7 +648,55 @@ def test_write_workflow_metadata_uses_uncleaned_metadata(monkeypatch): assert metadata_collector_cls.call_args.args[0]["site"] == "North" assert metadata_collector_cls.call_args.args[0]["instrument"] == "LSTN-design" assert metadata_collector_cls.call_args.kwargs["clean_meta"] is False - mock_collector.write.assert_called_once() + assert metadata == {"activity": {"id": "wf-id"}} + + +def test_get_model_parameter_metadata_file(): + config = { + "output_path": "output/test", + "parameter": "pm_photoelectron_spectrum", + "parameter_version": "2.0.1", + } + metadata_file = simtools_runner._get_model_parameter_metadata_file( + "simtools-submit-model-parameter-from-external", config + ) + assert metadata_file == Path( + "output/test/pm_photoelectron_spectrum/pm_photoelectron_spectrum-2.0.1.meta.yml" + ) + + +def test_update_model_parameter_metadata_files(tmp_test_directory): + metadata_file = tmp_test_directory / "pm.meta.yml" + metadata_dict = { + "cta": { + "product": {"id": "prod-id"}, + "activity": {"id": "old-id"}, + "context": {"associated_activities": [{"name": "old", "activity_id": "old-id"}]}, + } + } + + metadata_file.write_text(yaml.safe_dump(metadata_dict), encoding="utf-8") + workflow_metadata = {"activity": {"id": "workflow-id", "name": "setting_workflow"}} + associated_activities = [ + {"name": "app1", "activity_id": "a1"}, + {"name": "app2", "activity_id": "a2"}, + ] + + simtools_runner._update_model_parameter_metadata_files( + model_parameter_metadata_files=[metadata_file], + workflow_metadata=workflow_metadata, + associated_activities=associated_activities, + logger=mock.Mock(), + ) + + updated = yaml.safe_load(metadata_file.read_text(encoding="utf-8")) + assert updated["cta"]["product"]["id"] == "prod-id" + assert updated["cta"]["activity"]["id"] == "workflow-id" + assert updated["cta"]["context"]["associated_activities"] == [ + {"name": "old", "activity_id": "old-id"}, + {"name": "app1", "activity_id": "a1"}, + {"name": "app2", "activity_id": "a2"}, + ] def test_get_workflow_configuration_value(): diff --git a/tests/unit_tests/test_application_control.py b/tests/unit_tests/test_application_control.py index 12da83bc79..2d73150e40 100644 --- a/tests/unit_tests/test_application_control.py +++ b/tests/unit_tests/test_application_control.py @@ -693,7 +693,7 @@ def test_setup_logging_with_file_handler(tmp_path): assert log_file.exists() content = log_file.read_text() assert "Test message" in content - assert config.application_id in content + assert config.activity_id in content finally: for handler in list(logger.handlers): handler.close() From d809c56777ff5c7e5eeeb28a7a9d6271167cec8b Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sat, 11 Apr 2026 11:34:50 +0200 Subject: [PATCH 06/21] consistent metadata --- src/simtools/runners/simtools_runner.py | 6 ++- src/simtools/settings.py | 3 ++ src/simtools/utils/general.py | 23 ++++++++++ .../runners/test_simtools_runner.py | 43 +++++++++++++++++++ tests/unit_tests/test_settings.py | 14 ++++++ tests/unit_tests/utils/test_general.py | 13 ++++++ 6 files changed, 101 insertions(+), 1 deletion(-) diff --git a/src/simtools/runners/simtools_runner.py b/src/simtools/runners/simtools_runner.py index bfbbe7a703..f43968eb24 100644 --- a/src/simtools/runners/simtools_runner.py +++ b/src/simtools/runners/simtools_runner.py @@ -133,8 +133,12 @@ def _read_application_configuration(configuration_file, steps, logger, workflow_ """ job_configuration = ascii_handler.collect_data_from_file(configuration_file) configurations = job_configuration.get("applications") + path_activity_id = gen.extract_uuid7_from_path(configuration_file) workflow_activity_id = ( - job_configuration.get("activity_id") or workflow_activity_id or gen.uuid() + job_configuration.get("activity_id") + or path_activity_id + or workflow_activity_id + or gen.uuid() ) output_path, setting_workflow = _set_input_output_directories(configuration_file) logger.info(f"Setting workflow output path to {output_path}") diff --git a/src/simtools/settings.py b/src/simtools/settings.py index 40f6bb740d..404746528d 100644 --- a/src/simtools/settings.py +++ b/src/simtools/settings.py @@ -43,6 +43,9 @@ def load(self, args=None, db_config=None, resolve_sim_software_executables=True) """ self._args = MappingProxyType(args) if args is not None else {} self._db_config = MappingProxyType(db_config) if db_config is not None else {} + self.activity_id = args.get("activity_id") if args is not None else None + if self.activity_id is None: + self.activity_id = uuid() self._sim_telarray_path = ( args.get("sim_telarray_path") if args is not None and "sim_telarray_path" in args diff --git a/src/simtools/utils/general.py b/src/simtools/utils/general.py index 318560e027..ecb791700d 100644 --- a/src/simtools/utils/general.py +++ b/src/simtools/utils/general.py @@ -982,3 +982,26 @@ def uuid(): A UUID string. """ return str(uuid6.uuid7()) + + +def extract_uuid7_from_path(path): + """Extract UUID7 from path components if present. + + Parameters + ---------- + path : str or Path + Path potentially containing a UUID7 component. + + Returns + ------- + str or None + UUID7 string if found, otherwise None. + """ + for path_part in reversed(Path(path).parts): + try: + candidate = uuid6.UUID(path_part) + except (ValueError, TypeError): + continue + if candidate.version == 7: + return str(candidate) + return None diff --git a/tests/unit_tests/runners/test_simtools_runner.py b/tests/unit_tests/runners/test_simtools_runner.py index 672ea5d24e..33b616f209 100644 --- a/tests/unit_tests/runners/test_simtools_runner.py +++ b/tests/unit_tests/runners/test_simtools_runner.py @@ -7,6 +7,7 @@ import pytest import yaml +import simtools.utils.general as gen from simtools.job_execution.job_manager import JobExecutionError from simtools.runners import simtools_runner @@ -706,3 +707,45 @@ def test_get_workflow_configuration_value(): ] assert simtools_runner._get_workflow_configuration_value(configurations, "site") == "North" assert simtools_runner._get_workflow_configuration_value(configurations, "instrument") is None + + +def test_extract_uuid7_from_configuration_path(): + config_file = ( + "input/LSTN-design/pm_photoelectron_spectrum/" + "019d776b-e24c-741d-bc05-e3f6f7ec77c7/config.yml" + ) + extracted = gen.extract_uuid7_from_path(config_file) + assert extracted == "019d776b-e24c-741d-bc05-e3f6f7ec77c7" + + +def test_read_application_configuration_prefers_path_uuid7( + monkeypatch, + mock_logger, + mock_set_input_output_directories, + mock_change_dict_keys_case, +): + path_uuid = "019d776b-e24c-741d-bc05-e3f6f7ec77c7" + configuration_file = f"input/test/workflow/{path_uuid}/config.yml" + + monkeypatch.setattr( + "simtools.io.ascii_handler.collect_data_from_file", + mock.Mock(return_value={"applications": [{"application": "app1", "configuration": {}}]}), + ) + monkeypatch.setattr( + "simtools.runners.simtools_runner._set_input_output_directories", + mock_set_input_output_directories, + ) + monkeypatch.setattr("simtools.utils.general.change_dict_keys_case", mock_change_dict_keys_case) + monkeypatch.setattr( + "simtools.runners.simtools_runner._replace_placeholders_in_configuration", + lambda config, output_path, setting_workflow: config, + ) + + _, _, _, workflow_activity_id = simtools_runner._read_application_configuration( + configuration_file, + steps=None, + logger=mock_logger, + workflow_activity_id="generated-by-run-application", + ) + + assert workflow_activity_id == path_uuid diff --git a/tests/unit_tests/test_settings.py b/tests/unit_tests/test_settings.py index 68cd6119e4..b8974e61cb 100644 --- a/tests/unit_tests/test_settings.py +++ b/tests/unit_tests/test_settings.py @@ -66,6 +66,20 @@ def test_load_with_db_config(config_instance): assert config_instance._db_config == db_config +@patch.dict(os.environ, {}, clear=True) +def test_load_sets_activity_id_from_args(config_instance): + args = {"activity_id": "019d7bde-b85e-7bae-8edd-e3d1594f7458"} + config_instance.load(args=args) + assert config_instance.activity_id == args["activity_id"] + + +@patch.dict(os.environ, {}, clear=True) +def test_load_generates_activity_id_when_missing(config_instance): + config_instance.load(args={}) + assert isinstance(config_instance.activity_id, str) + assert config_instance.activity_id is not None + + @patch.dict(os.environ, {"SIMTOOLS_SIM_TELARRAY_PATH": "/env/simtel"}) def test_load_with_env_vars(config_instance): config_instance.load() diff --git a/tests/unit_tests/utils/test_general.py b/tests/unit_tests/utils/test_general.py index f89df95d11..a2af6274fd 100644 --- a/tests/unit_tests/utils/test_general.py +++ b/tests/unit_tests/utils/test_general.py @@ -568,6 +568,19 @@ def test_now_date_time_in_isoformat(): assert datetime.datetime.fromisoformat(now) is not None +def test_extract_uuid7_from_path(): + path = ( + "input/LSTN-design/pm_photoelectron_spectrum/" + "019d776b-e24c-741d-bc05-e3f6f7ec77c7/config.yml" + ) + assert gen.extract_uuid7_from_path(path) == "019d776b-e24c-741d-bc05-e3f6f7ec77c7" + + +def test_extract_uuid7_from_path_with_no_uuid7(): + path = "input/LSTN-design/pm_photoelectron_spectrum/not-a-uuid/config.yml" + assert gen.extract_uuid7_from_path(path) is None + + def test_is_valid_numeric_type(): """Test _is_valid_numeric_type function.""" # Test integer dtypes From 5487ddae875d07dafac0938344ebb9e95a193a70 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sat, 11 Apr 2026 11:52:37 +0200 Subject: [PATCH 07/21] new module --- src/simtools/data_model/workflow_metadata.py | 113 ++++++++++++++++++ src/simtools/runners/simtools_runner.py | 108 ++++------------- .../data_model/test_workflow_metadata.py | 79 ++++++++++++ .../runners/test_simtools_runner.py | 113 +++++------------- 4 files changed, 240 insertions(+), 173 deletions(-) create mode 100644 src/simtools/data_model/workflow_metadata.py create mode 100644 tests/unit_tests/data_model/test_workflow_metadata.py diff --git a/src/simtools/data_model/workflow_metadata.py b/src/simtools/data_model/workflow_metadata.py new file mode 100644 index 0000000000..dbd126bf89 --- /dev/null +++ b/src/simtools/data_model/workflow_metadata.py @@ -0,0 +1,113 @@ +"""Utilities for workflow-level metadata propagation into model-parameter metadata files.""" + +from copy import deepcopy +from pathlib import Path + +import simtools.utils.general as gen +from simtools.data_model.metadata_collector import MetadataCollector +from simtools.io import ascii_handler + + +def build_workflow_activity_metadata( + args_dict, + workflow_activity_id, + workflow_start, + workflow_end, + runtime_environment, + workflow_site, + workflow_instrument, +): + """Build workflow activity metadata from workflow execution context. + + Parameters + ---------- + args_dict : dict + Workflow application arguments. + workflow_activity_id : str + Workflow-level activity identifier. + workflow_start : datetime + Start time of the workflow. + workflow_end : datetime + End time of the workflow. + runtime_environment : dict or None + Runtime environment definition used for the workflow. + workflow_site : str or None + Site associated with the workflow. + workflow_instrument : str or None + Instrument associated with the workflow. + + Returns + ------- + dict + Activity block to be injected into model-parameter metadata files. + """ + metadata_args = dict(args_dict) + metadata_args["label"] = "setting_workflow" + metadata_args["activity_id"] = workflow_activity_id + metadata_args["activity_start"] = workflow_start.isoformat(timespec="seconds") + metadata_args["activity_end"] = workflow_end.isoformat(timespec="seconds") + metadata_args["runtime_environment"] = deepcopy(runtime_environment) + metadata_args["site"] = workflow_site + metadata_args["instrument"] = workflow_instrument + + collector = MetadataCollector(metadata_args, clean_meta=False) + return collector.get_top_level_metadata().get("cta", {}).get("activity", {}) + + +def update_model_parameter_metadata_file( + metadata_file, + workflow_activity, + associated_activities, + logger, +): + """Inject workflow metadata into a model-parameter metadata file. + + Parameters + ---------- + metadata_file : str or Path + Path to the model-parameter metadata file to update. + workflow_activity : dict + Workflow activity metadata block to set as top-level activity metadata. + associated_activities : list + Ordered activities associated with workflow execution. + logger : logging.Logger + Logger for progress and debug messages. + + Returns + ------- + None + Function updates file in place when it exists. + """ + metadata_path = Path(metadata_file) + if not metadata_path.exists(): + logger.debug(f"Model-parameter metadata file does not exist: {metadata_path}") + return + + metadata = ascii_handler.collect_data_from_file(metadata_path) + metadata = gen.change_dict_keys_case(metadata, True) + cta_meta = metadata.get("cta", {}) + cta_meta["activity"] = deepcopy(workflow_activity) + + context = cta_meta.setdefault("context", {}) + context_associated = context.get("associated_activities") or [] + context["associated_activities"] = _merge_associated_activities( + context_associated, + associated_activities, + ) + + metadata["cta"] = cta_meta + ascii_handler.write_data_to_file(metadata, metadata_path) + logger.info(f"Updated workflow metadata in {metadata_path}") + + +def _merge_associated_activities(existing_activities, new_activities): + """Merge associated activities preserving order and uniqueness.""" + merged_activities = [] + seen = set() + for activity in [*existing_activities, *new_activities]: + key = (activity.get("name"), activity.get("activity_id")) + if key in seen: + continue + seen.add(key) + merged_activities.append(activity) + return merged_activities diff --git a/src/simtools/runners/simtools_runner.py b/src/simtools/runners/simtools_runner.py index f43968eb24..4c27a2852f 100644 --- a/src/simtools/runners/simtools_runner.py +++ b/src/simtools/runners/simtools_runner.py @@ -7,7 +7,7 @@ import simtools.utils.general as gen from simtools import dependencies -from simtools.data_model.metadata_collector import MetadataCollector +from simtools.data_model import workflow_metadata from simtools.io import ascii_handler from simtools.job_execution import job_manager @@ -63,9 +63,6 @@ def run_applications(args_dict, logger): app_activity_id = app_configuration.get("activity_id") or gen.uuid() app_configuration["activity_id"] = app_activity_id associated_activities.append({"name": app, "activity_id": app_activity_id}) - metadata_file = _get_model_parameter_metadata_file(app, app_configuration) - if metadata_file is not None: - model_parameter_metadata_files.append(metadata_file) logger.info(f"Running application: {app}") result = job_manager.submit( @@ -75,6 +72,9 @@ def run_applications(args_dict, logger): configuration=app_configuration, runtime_environment=run_time, ) + metadata_file = _get_model_parameter_metadata_file(app, app_configuration) + if metadata_file is not None: + model_parameter_metadata_files.append(metadata_file) file.write("=" * 80 + "\n") file.write( f"Application: {app}\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}\n" @@ -82,21 +82,23 @@ def run_applications(args_dict, logger): finally: workflow_end = datetime.now(UTC) workflow_end = max(workflow_end, workflow_start) - workflow_metadata = _build_workflow_metadata( - args_dict=args_dict, - workflow_activity_id=workflow_activity_id, - workflow_start=workflow_start, - workflow_end=workflow_end, - runtime_environment=runtime_environment_snapshot, - workflow_site=workflow_site, - workflow_instrument=workflow_instrument, - ) - _update_model_parameter_metadata_files( - model_parameter_metadata_files=model_parameter_metadata_files, - workflow_metadata=workflow_metadata, - associated_activities=associated_activities, - logger=logger, - ) + if model_parameter_metadata_files: + workflow_activity = workflow_metadata.build_workflow_activity_metadata( + args_dict=args_dict, + workflow_activity_id=workflow_activity_id, + workflow_start=workflow_start, + workflow_end=workflow_end, + runtime_environment=runtime_environment_snapshot, + workflow_site=workflow_site, + workflow_instrument=workflow_instrument, + ) + for metadata_file in model_parameter_metadata_files: + workflow_metadata.update_model_parameter_metadata_file( + metadata_file=metadata_file, + workflow_activity=workflow_activity, + associated_activities=associated_activities, + logger=logger, + ) def _read_application_configuration(configuration_file, steps, logger, workflow_activity_id=None): @@ -162,29 +164,6 @@ def _read_application_configuration(configuration_file, steps, logger, workflow_ ) -def _build_workflow_metadata( - args_dict, - workflow_activity_id, - workflow_start, - workflow_end, - runtime_environment, - workflow_site, - workflow_instrument, -): - """Build workflow-level metadata dictionary with authoritative lifecycle timestamps.""" - metadata_args = dict(args_dict) - metadata_args["label"] = "setting_workflow" - metadata_args["activity_id"] = workflow_activity_id - metadata_args["activity_start"] = workflow_start.isoformat(timespec="seconds") - metadata_args["activity_end"] = workflow_end.isoformat(timespec="seconds") - metadata_args["runtime_environment"] = deepcopy(runtime_environment) - metadata_args["site"] = workflow_site - metadata_args["instrument"] = workflow_instrument - - collector = MetadataCollector(metadata_args, clean_meta=False) - return collector.get_top_level_metadata().get("cta", {}) - - def _get_model_parameter_metadata_file(application, app_configuration): """Return expected metadata file for model-parameter submission applications.""" if application != "simtools-submit-model-parameter-from-external": @@ -199,51 +178,6 @@ def _get_model_parameter_metadata_file(application, app_configuration): return Path(output_path) / parameter / f"{parameter}-{parameter_version}.meta.yml" -def _update_model_parameter_metadata_files( - model_parameter_metadata_files, - workflow_metadata, - associated_activities, - logger, -): - """Inject workflow metadata into model-parameter metadata files.""" - workflow_activity = deepcopy(workflow_metadata.get("activity", {})) - - for metadata_file in model_parameter_metadata_files: - metadata_path = Path(metadata_file) - if not metadata_path.exists(): - logger.debug(f"Model-parameter metadata file does not exist: {metadata_path}") - continue - - metadata = ascii_handler.collect_data_from_file(metadata_path) - metadata = gen.change_dict_keys_case(metadata, True) - cta_meta = metadata.get("cta", {}) - cta_meta["activity"] = deepcopy(workflow_activity) - - context = cta_meta.setdefault("context", {}) - context_associated = context.get("associated_activities") or [] - context["associated_activities"] = _merge_associated_activities( - context_associated, - associated_activities, - ) - - metadata["cta"] = cta_meta - ascii_handler.write_data_to_file(metadata, metadata_path) - logger.info(f"Updated workflow metadata in {metadata_path}") - - -def _merge_associated_activities(existing_activities, new_activities): - """Merge associated activities preserving order and uniqueness.""" - merged_activities = [] - seen = set() - for activity in [*existing_activities, *new_activities]: - key = (activity.get("name"), activity.get("activity_id")) - if key in seen: - continue - seen.add(key) - merged_activities.append(activity) - return merged_activities - - def _get_workflow_configuration_value(configurations, key): """Return first non-empty configuration value for a given key.""" for config in configurations: diff --git a/tests/unit_tests/data_model/test_workflow_metadata.py b/tests/unit_tests/data_model/test_workflow_metadata.py new file mode 100644 index 0000000000..355b228e4b --- /dev/null +++ b/tests/unit_tests/data_model/test_workflow_metadata.py @@ -0,0 +1,79 @@ +#!/usr/bin/python3 + +from pathlib import Path +from unittest import mock + +import yaml + +from simtools.data_model import workflow_metadata + + +def test_build_workflow_activity_metadata_uses_uncleaned_metadata(monkeypatch): + mock_collector = mock.Mock() + mock_collector.get_top_level_metadata.return_value = {"cta": {"activity": {"id": "wf-id"}}} + metadata_collector_cls = mock.Mock(return_value=mock_collector) + monkeypatch.setattr( + "simtools.data_model.workflow_metadata.MetadataCollector", metadata_collector_cls + ) + + activity = workflow_metadata.build_workflow_activity_metadata( + args_dict={"config_file": "dummy.yml"}, + workflow_activity_id="wf-id", + workflow_start=mock.Mock(isoformat=mock.Mock(return_value="2026-01-01T00:00:00+00:00")), + workflow_end=mock.Mock(isoformat=mock.Mock(return_value="2026-01-01T00:00:01+00:00")), + runtime_environment={"image": "test-image"}, + workflow_site="North", + workflow_instrument="LSTN-design", + ) + + metadata_collector_cls.assert_called_once() + assert metadata_collector_cls.call_args.args[0]["activity_id"] == "wf-id" + assert metadata_collector_cls.call_args.args[0]["site"] == "North" + assert metadata_collector_cls.call_args.args[0]["instrument"] == "LSTN-design" + assert metadata_collector_cls.call_args.kwargs["clean_meta"] is False + assert activity == {"id": "wf-id"} + + +def test_update_model_parameter_metadata_file(tmp_test_directory): + metadata_file = tmp_test_directory / "pm.meta.yml" + metadata_dict = { + "cta": { + "product": {"id": "prod-id"}, + "activity": {"id": "old-id"}, + "context": {"associated_activities": [{"name": "old", "activity_id": "old-id"}]}, + } + } + metadata_file.write_text(yaml.safe_dump(metadata_dict), encoding="utf-8") + + workflow_activity = {"id": "workflow-id", "name": "setting_workflow"} + associated_activities = [ + {"name": "app1", "activity_id": "a1"}, + {"name": "app2", "activity_id": "a2"}, + ] + + workflow_metadata.update_model_parameter_metadata_file( + metadata_file=metadata_file, + workflow_activity=workflow_activity, + associated_activities=associated_activities, + logger=mock.Mock(), + ) + + updated = yaml.safe_load(metadata_file.read_text(encoding="utf-8")) + assert updated["cta"]["product"]["id"] == "prod-id" + assert updated["cta"]["activity"]["id"] == "workflow-id" + assert updated["cta"]["context"]["associated_activities"] == [ + {"name": "old", "activity_id": "old-id"}, + {"name": "app1", "activity_id": "a1"}, + {"name": "app2", "activity_id": "a2"}, + ] + + +def test_update_model_parameter_metadata_file_missing_file(): + logger = mock.Mock() + workflow_metadata.update_model_parameter_metadata_file( + metadata_file=Path("missing.meta.yml"), + workflow_activity={"id": "workflow-id"}, + associated_activities=[], + logger=logger, + ) + logger.debug.assert_called_once() diff --git a/tests/unit_tests/runners/test_simtools_runner.py b/tests/unit_tests/runners/test_simtools_runner.py index 33b616f209..ec33641588 100644 --- a/tests/unit_tests/runners/test_simtools_runner.py +++ b/tests/unit_tests/runners/test_simtools_runner.py @@ -5,7 +5,6 @@ from unittest import mock import pytest -import yaml import simtools.utils.general as gen from simtools.job_execution.job_manager import JobExecutionError @@ -212,13 +211,14 @@ def test_run_applications_runs_and_logs(monkeypatch, tmp_test_directory): "simtools.runners.simtools_runner._read_application_configuration", mock.Mock(return_value=(mock_configurations, None, log_file_path, "wf-activity-id")), ) - workflow_build_mock = mock.Mock(return_value={"activity": {"id": "wf-activity-id"}}) + workflow_build_mock = mock.Mock(return_value={"id": "wf-activity-id"}) workflow_update_mock = mock.Mock() monkeypatch.setattr( - "simtools.runners.simtools_runner._build_workflow_metadata", workflow_build_mock + "simtools.runners.simtools_runner.workflow_metadata.build_workflow_activity_metadata", + workflow_build_mock, ) monkeypatch.setattr( - "simtools.runners.simtools_runner._update_model_parameter_metadata_files", + "simtools.runners.simtools_runner.workflow_metadata.update_model_parameter_metadata_file", workflow_update_mock, ) @@ -255,15 +255,8 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment mock_logger.info.assert_any_call("Skipping application: app2") mock_logger.info.assert_any_call("Running application: app3") version_string_mock.assert_called_once_with([], include_software_versions=False) - workflow_build_mock.assert_called_once() - workflow_update_mock.assert_called_once() - - associated = workflow_update_mock.call_args.kwargs["associated_activities"] - assert associated == [ - {"name": "app1", "activity_id": "cfg-id-1"}, - {"name": "app3", "activity_id": "cfg-id-3"}, - ] - assert workflow_update_mock.call_args.kwargs["model_parameter_metadata_files"] == [] + workflow_build_mock.assert_not_called() + workflow_update_mock.assert_not_called() def test_run_applications_passes_workflow_instrument_context(monkeypatch, tmp_test_directory): @@ -275,9 +268,12 @@ def test_run_applications_passes_workflow_instrument_context(monkeypatch, tmp_te } mock_configurations = [ { - "application": "app1", + "application": "simtools-submit-model-parameter-from-external", "run_application": True, "configuration": { + "parameter": "pm_photoelectron_spectrum", + "parameter_version": "2.0.1", + "output_path": "output/test_workflow", "site": "North", "telescope": "LSTN-design", "activity_id": "cfg-id-1", @@ -298,13 +294,14 @@ def test_run_applications_passes_workflow_instrument_context(monkeypatch, tmp_te "simtools.job_execution.job_manager.submit", mock.Mock(return_value=mock.Mock(stdout="ok", stderr="")), ) - workflow_build_mock = mock.Mock(return_value={"activity": {"id": "wf-activity-id"}}) + workflow_build_mock = mock.Mock(return_value={"id": "wf-activity-id"}) workflow_update_mock = mock.Mock() monkeypatch.setattr( - "simtools.runners.simtools_runner._build_workflow_metadata", workflow_build_mock + "simtools.runners.simtools_runner.workflow_metadata.build_workflow_activity_metadata", + workflow_build_mock, ) monkeypatch.setattr( - "simtools.runners.simtools_runner._update_model_parameter_metadata_files", + "simtools.runners.simtools_runner.workflow_metadata.update_model_parameter_metadata_file", workflow_update_mock, ) @@ -312,6 +309,7 @@ def test_run_applications_passes_workflow_instrument_context(monkeypatch, tmp_te assert workflow_build_mock.call_args.kwargs["workflow_site"] == "North" assert workflow_build_mock.call_args.kwargs["workflow_instrument"] == "LSTN-design" + workflow_update_mock.assert_called_once() def test_run_applications_handles_job_execution_exception(monkeypatch, tmp_test_directory): @@ -341,11 +339,12 @@ def mock_submit_failure(app, out_file, err_file, configuration=None, runtime_env monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit_failure) monkeypatch.setattr( - "simtools.runners.simtools_runner._build_workflow_metadata", - mock.Mock(return_value={"activity": {"id": "wf-activity-id"}}), + "simtools.runners.simtools_runner.workflow_metadata.build_workflow_activity_metadata", + mock.Mock(return_value={"id": "wf-activity-id"}), ) monkeypatch.setattr( - "simtools.runners.simtools_runner._update_model_parameter_metadata_files", mock.Mock() + "simtools.runners.simtools_runner.workflow_metadata.update_model_parameter_metadata_file", + mock.Mock(), ) with pytest.raises(JobExecutionError): @@ -494,11 +493,12 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit) monkeypatch.setattr( - "simtools.runners.simtools_runner._build_workflow_metadata", - mock.Mock(return_value={"activity": {"id": "wf-activity-id"}}), + "simtools.runners.simtools_runner.workflow_metadata.build_workflow_activity_metadata", + mock.Mock(return_value={"id": "wf-activity-id"}), ) monkeypatch.setattr( - "simtools.runners.simtools_runner._update_model_parameter_metadata_files", mock.Mock() + "simtools.runners.simtools_runner.workflow_metadata.update_model_parameter_metadata_file", + mock.Mock(), ) simtools_runner.run_applications(mock_args_dict, mock_logger) @@ -572,11 +572,12 @@ def test_run_applications_with_empty_configuration_list(monkeypatch, tmp_test_di mock_submit = mock.Mock() monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit) monkeypatch.setattr( - "simtools.runners.simtools_runner._build_workflow_metadata", - mock.Mock(return_value={"activity": {"id": "wf-activity-id"}}), + "simtools.runners.simtools_runner.workflow_metadata.build_workflow_activity_metadata", + mock.Mock(return_value={"id": "wf-activity-id"}), ) monkeypatch.setattr( - "simtools.runners.simtools_runner._update_model_parameter_metadata_files", mock.Mock() + "simtools.runners.simtools_runner.workflow_metadata.update_model_parameter_metadata_file", + mock.Mock(), ) simtools_runner.run_applications(mock_args_dict, mock_logger) @@ -626,32 +627,6 @@ def test_pull_image_raises_if_pull_fails(monkeypatch): simtools_runner._pull_image("podman", image) -def test_build_workflow_metadata_uses_uncleaned_metadata(monkeypatch): - mock_collector = mock.Mock() - mock_collector.get_top_level_metadata.return_value = {"cta": {"activity": {"id": "wf-id"}}} - metadata_collector_cls = mock.Mock(return_value=mock_collector) - monkeypatch.setattr( - "simtools.runners.simtools_runner.MetadataCollector", metadata_collector_cls - ) - - metadata = simtools_runner._build_workflow_metadata( - args_dict={"config_file": "dummy.yml"}, - workflow_activity_id="wf-id", - workflow_start=mock.Mock(isoformat=mock.Mock(return_value="2026-01-01T00:00:00+00:00")), - workflow_end=mock.Mock(isoformat=mock.Mock(return_value="2026-01-01T00:00:01+00:00")), - runtime_environment={"image": "test-image"}, - workflow_site="North", - workflow_instrument="LSTN-design", - ) - - metadata_collector_cls.assert_called_once() - assert metadata_collector_cls.call_args.args[0]["activity_id"] == "wf-id" - assert metadata_collector_cls.call_args.args[0]["site"] == "North" - assert metadata_collector_cls.call_args.args[0]["instrument"] == "LSTN-design" - assert metadata_collector_cls.call_args.kwargs["clean_meta"] is False - assert metadata == {"activity": {"id": "wf-id"}} - - def test_get_model_parameter_metadata_file(): config = { "output_path": "output/test", @@ -666,40 +641,6 @@ def test_get_model_parameter_metadata_file(): ) -def test_update_model_parameter_metadata_files(tmp_test_directory): - metadata_file = tmp_test_directory / "pm.meta.yml" - metadata_dict = { - "cta": { - "product": {"id": "prod-id"}, - "activity": {"id": "old-id"}, - "context": {"associated_activities": [{"name": "old", "activity_id": "old-id"}]}, - } - } - - metadata_file.write_text(yaml.safe_dump(metadata_dict), encoding="utf-8") - workflow_metadata = {"activity": {"id": "workflow-id", "name": "setting_workflow"}} - associated_activities = [ - {"name": "app1", "activity_id": "a1"}, - {"name": "app2", "activity_id": "a2"}, - ] - - simtools_runner._update_model_parameter_metadata_files( - model_parameter_metadata_files=[metadata_file], - workflow_metadata=workflow_metadata, - associated_activities=associated_activities, - logger=mock.Mock(), - ) - - updated = yaml.safe_load(metadata_file.read_text(encoding="utf-8")) - assert updated["cta"]["product"]["id"] == "prod-id" - assert updated["cta"]["activity"]["id"] == "workflow-id" - assert updated["cta"]["context"]["associated_activities"] == [ - {"name": "old", "activity_id": "old-id"}, - {"name": "app1", "activity_id": "a1"}, - {"name": "app2", "activity_id": "a2"}, - ] - - def test_get_workflow_configuration_value(): configurations = [ {"configuration": {"site": None}}, From 303f89d354a4746d505d22524bb65a77f85b2002 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sat, 11 Apr 2026 12:48:22 +0200 Subject: [PATCH 08/21] docs --- docs/source/api-reference/data_model.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/source/api-reference/data_model.md b/docs/source/api-reference/data_model.md index 16de69828f..3303649b1f 100644 --- a/docs/source/api-reference/data_model.md +++ b/docs/source/api-reference/data_model.md @@ -50,6 +50,13 @@ Data products ingested or produced by simtools generally follows the CTAO data m :members: ``` +## workflow_metadata + +```{eval-rst} +.. automodule:: data_model.workflow_metadata + :members: +``` + (datamodelschema)= ## schema From c4c728e096a1d04f5c78543b611ace59d93d1895 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sat, 11 Apr 2026 14:50:57 +0200 Subject: [PATCH 09/21] simplifications --- src/simtools/applications/run_application.py | 2 +- src/simtools/data_model/workflow_metadata.py | 19 +++--- src/simtools/runners/simtools_runner.py | 54 ++++++++++------- .../data_model/test_workflow_metadata.py | 7 +-- .../runners/test_simtools_runner.py | 60 +++++++++++++------ 5 files changed, 84 insertions(+), 58 deletions(-) diff --git a/src/simtools/applications/run_application.py b/src/simtools/applications/run_application.py index f3706a6455..4e9f1af81d 100644 --- a/src/simtools/applications/run_application.py +++ b/src/simtools/applications/run_application.py @@ -100,7 +100,7 @@ def main(): }, ) - simtools_runner.run_applications(app_context.args, app_context.logger) + simtools_runner.run_applications(app_context.args) if __name__ == "__main__": diff --git a/src/simtools/data_model/workflow_metadata.py b/src/simtools/data_model/workflow_metadata.py index dbd126bf89..bd62653b12 100644 --- a/src/simtools/data_model/workflow_metadata.py +++ b/src/simtools/data_model/workflow_metadata.py @@ -1,5 +1,6 @@ """Utilities for workflow-level metadata propagation into model-parameter metadata files.""" +import logging from copy import deepcopy from pathlib import Path @@ -7,6 +8,8 @@ from simtools.data_model.metadata_collector import MetadataCollector from simtools.io import ascii_handler +logger = logging.getLogger(__name__) + def build_workflow_activity_metadata( args_dict, @@ -14,8 +17,7 @@ def build_workflow_activity_metadata( workflow_start, workflow_end, runtime_environment, - workflow_site, - workflow_instrument, + workflow_context, ): """Build workflow activity metadata from workflow execution context. @@ -31,10 +33,8 @@ def build_workflow_activity_metadata( End time of the workflow. runtime_environment : dict or None Runtime environment definition used for the workflow. - workflow_site : str or None - Site associated with the workflow. - workflow_instrument : str or None - Instrument associated with the workflow. + workflow_context : dict + Context with keys 'site' and 'instrument' for the workflow. Returns ------- @@ -47,8 +47,8 @@ def build_workflow_activity_metadata( metadata_args["activity_start"] = workflow_start.isoformat(timespec="seconds") metadata_args["activity_end"] = workflow_end.isoformat(timespec="seconds") metadata_args["runtime_environment"] = deepcopy(runtime_environment) - metadata_args["site"] = workflow_site - metadata_args["instrument"] = workflow_instrument + metadata_args["site"] = workflow_context.get("site") + metadata_args["instrument"] = workflow_context.get("instrument") collector = MetadataCollector(metadata_args, clean_meta=False) return collector.get_top_level_metadata().get("cta", {}).get("activity", {}) @@ -58,7 +58,6 @@ def update_model_parameter_metadata_file( metadata_file, workflow_activity, associated_activities, - logger, ): """Inject workflow metadata into a model-parameter metadata file. @@ -70,8 +69,6 @@ def update_model_parameter_metadata_file( Workflow activity metadata block to set as top-level activity metadata. associated_activities : list Ordered activities associated with workflow execution. - logger : logging.Logger - Logger for progress and debug messages. Returns ------- diff --git a/src/simtools/runners/simtools_runner.py b/src/simtools/runners/simtools_runner.py index 4c27a2852f..857e29fdf6 100644 --- a/src/simtools/runners/simtools_runner.py +++ b/src/simtools/runners/simtools_runner.py @@ -1,5 +1,6 @@ """Tools for running applications in the simtools framework.""" +import logging import shutil from copy import deepcopy from datetime import UTC, datetime @@ -11,8 +12,10 @@ from simtools.io import ascii_handler from simtools.job_execution import job_manager +logger = logging.getLogger(__name__) -def run_applications(args_dict, logger): + +def run_applications(args_dict): """ Run simtools applications step-by-step as defined in a configuration file. @@ -20,8 +23,6 @@ def run_applications(args_dict, logger): ---------- args_dict : dict Dictionary containing command line arguments. - logger : logging.Logger - Logger for logging application output. """ ( configurations, @@ -31,16 +32,11 @@ def run_applications(args_dict, logger): ) = _read_application_configuration( args_dict["config_file"], args_dict.get("steps"), - logger, args_dict.get("activity_id"), ) workflow_start = datetime.now(UTC) associated_activities = [] runtime_environment_snapshot = deepcopy(runtime_environment) - workflow_site = _get_workflow_configuration_value(configurations, "site") - workflow_instrument = _get_workflow_configuration_value(configurations, "instrument") - if workflow_instrument is None: - workflow_instrument = _get_workflow_configuration_value(configurations, "telescope") model_parameter_metadata_files = [] run_time = ( @@ -80,28 +76,24 @@ def run_applications(args_dict, logger): f"Application: {app}\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}\n" ) finally: - workflow_end = datetime.now(UTC) - workflow_end = max(workflow_end, workflow_start) if model_parameter_metadata_files: workflow_activity = workflow_metadata.build_workflow_activity_metadata( args_dict=args_dict, workflow_activity_id=workflow_activity_id, workflow_start=workflow_start, - workflow_end=workflow_end, + workflow_end=max(datetime.now(UTC), workflow_start), runtime_environment=runtime_environment_snapshot, - workflow_site=workflow_site, - workflow_instrument=workflow_instrument, + workflow_context=_get_workflow_context(configurations), ) for metadata_file in model_parameter_metadata_files: workflow_metadata.update_model_parameter_metadata_file( metadata_file=metadata_file, workflow_activity=workflow_activity, associated_activities=associated_activities, - logger=logger, ) -def _read_application_configuration(configuration_file, steps, logger, workflow_activity_id=None): +def _read_application_configuration(configuration_file, steps, workflow_activity_id=None): """ Read application configuration from file and modify for setting workflows. @@ -118,8 +110,8 @@ def _read_application_configuration(configuration_file, steps, logger, workflow_ Configuration file name. steps : list List of steps to be executed (None: all steps). - logger : Logger - Logger object. + workflow_activity_id : str + Workflow activity id fallback from command-line context. Returns ------- @@ -134,15 +126,11 @@ def _read_application_configuration(configuration_file, steps, logger, workflow_ """ job_configuration = ascii_handler.collect_data_from_file(configuration_file) - configurations = job_configuration.get("applications") - path_activity_id = gen.extract_uuid7_from_path(configuration_file) workflow_activity_id = ( - job_configuration.get("activity_id") - or path_activity_id - or workflow_activity_id - or gen.uuid() + gen.extract_uuid7_from_path(configuration_file) or workflow_activity_id or gen.uuid() ) output_path, setting_workflow = _set_input_output_directories(configuration_file) + configurations = job_configuration.get("applications") logger.info(f"Setting workflow output path to {output_path}") for step_count, config in enumerate(configurations, start=1): config["run_application"] = step_count in steps if steps else True @@ -187,6 +175,26 @@ def _get_workflow_configuration_value(configurations, key): return None +def _get_workflow_context(configurations): + """Extract workflow context (site, instrument) from configurations. + + Parameters + ---------- + configurations : list + List of application configurations. + + Returns + ------- + dict + Context dict with 'site' and 'instrument' keys. + """ + return { + "site": _get_workflow_configuration_value(configurations, "site"), + "instrument": _get_workflow_configuration_value(configurations, "instrument") + or _get_workflow_configuration_value(configurations, "telescope"), + } + + def _replace_placeholders_in_configuration( configuration, output_path, setting_workflow, place_holder="__SETTING_WORKFLOW__" ): diff --git a/tests/unit_tests/data_model/test_workflow_metadata.py b/tests/unit_tests/data_model/test_workflow_metadata.py index 355b228e4b..1d1d1504f9 100644 --- a/tests/unit_tests/data_model/test_workflow_metadata.py +++ b/tests/unit_tests/data_model/test_workflow_metadata.py @@ -22,8 +22,7 @@ def test_build_workflow_activity_metadata_uses_uncleaned_metadata(monkeypatch): workflow_start=mock.Mock(isoformat=mock.Mock(return_value="2026-01-01T00:00:00+00:00")), workflow_end=mock.Mock(isoformat=mock.Mock(return_value="2026-01-01T00:00:01+00:00")), runtime_environment={"image": "test-image"}, - workflow_site="North", - workflow_instrument="LSTN-design", + workflow_context={"site": "North", "instrument": "LSTN-design"}, ) metadata_collector_cls.assert_called_once() @@ -55,7 +54,6 @@ def test_update_model_parameter_metadata_file(tmp_test_directory): metadata_file=metadata_file, workflow_activity=workflow_activity, associated_activities=associated_activities, - logger=mock.Mock(), ) updated = yaml.safe_load(metadata_file.read_text(encoding="utf-8")) @@ -69,11 +67,8 @@ def test_update_model_parameter_metadata_file(tmp_test_directory): def test_update_model_parameter_metadata_file_missing_file(): - logger = mock.Mock() workflow_metadata.update_model_parameter_metadata_file( metadata_file=Path("missing.meta.yml"), workflow_activity={"id": "workflow-id"}, associated_activities=[], - logger=logger, ) - logger.debug.assert_called_once() diff --git a/tests/unit_tests/runners/test_simtools_runner.py b/tests/unit_tests/runners/test_simtools_runner.py index ec33641588..f866b73056 100644 --- a/tests/unit_tests/runners/test_simtools_runner.py +++ b/tests/unit_tests/runners/test_simtools_runner.py @@ -179,7 +179,6 @@ def test_read_application_configuration_empty_applications( def test_run_applications_runs_and_logs(monkeypatch, tmp_test_directory): # Prepare mocks - mock_logger = mock.Mock() mock_args_dict = { "config_file": "dummy_config.yml", "steps": None, @@ -235,7 +234,7 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit) - simtools_runner.run_applications(mock_args_dict, mock_logger) + simtools_runner.run_applications(mock_args_dict) # Check log file contents with log_file_path.open("r", encoding="utf-8") as f: @@ -250,17 +249,12 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment assert "STDERR:\napp3_stderr" in content assert "Application: app2" not in content # skipped - # Check logger calls - mock_logger.info.assert_any_call("Running application: app1") - mock_logger.info.assert_any_call("Skipping application: app2") - mock_logger.info.assert_any_call("Running application: app3") version_string_mock.assert_called_once_with([], include_software_versions=False) workflow_build_mock.assert_not_called() workflow_update_mock.assert_not_called() def test_run_applications_passes_workflow_instrument_context(monkeypatch, tmp_test_directory): - mock_logger = mock.Mock() mock_args_dict = { "config_file": "dummy_config.yml", "steps": None, @@ -305,15 +299,14 @@ def test_run_applications_passes_workflow_instrument_context(monkeypatch, tmp_te workflow_update_mock, ) - simtools_runner.run_applications(mock_args_dict, mock_logger) + simtools_runner.run_applications(mock_args_dict) - assert workflow_build_mock.call_args.kwargs["workflow_site"] == "North" - assert workflow_build_mock.call_args.kwargs["workflow_instrument"] == "LSTN-design" + assert workflow_build_mock.call_args.kwargs["workflow_context"]["site"] == "North" + assert workflow_build_mock.call_args.kwargs["workflow_context"]["instrument"] == "LSTN-design" workflow_update_mock.assert_called_once() def test_run_applications_handles_job_execution_exception(monkeypatch, tmp_test_directory): - mock_logger = mock.Mock() mock_args_dict = { "config_file": "dummy_config.yml", "steps": None, @@ -348,7 +341,7 @@ def mock_submit_failure(app, out_file, err_file, configuration=None, runtime_env ) with pytest.raises(JobExecutionError): - simtools_runner.run_applications(mock_args_dict, mock_logger) + simtools_runner.run_applications(mock_args_dict) # Note: _convert_dict_to_args is now handled by job_manager module @@ -459,7 +452,6 @@ def test_read_runtime_environment_with_missing_options(monkeypatch): def test_run_applications_with_runtime_environment_ignored(monkeypatch, tmp_test_directory): """Test that runtime environment is ignored when ignore_runtime_environment is True.""" - mock_logger = mock.Mock() mock_args_dict = { "config_file": "dummy_config.yml", "steps": [1], @@ -501,7 +493,7 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment mock.Mock(), ) - simtools_runner.run_applications(mock_args_dict, mock_logger) + simtools_runner.run_applications(mock_args_dict) def test_read_runtime_environment_error_handling(monkeypatch): @@ -550,7 +542,6 @@ def test_read_runtime_environment_with_env_file_and_options(monkeypatch): def test_run_applications_with_empty_configuration_list(monkeypatch, tmp_test_directory): """Test run_applications with empty configuration list.""" - mock_logger = mock.Mock() mock_args_dict = { "config_file": "empty_config.yml", "steps": None, @@ -580,7 +571,7 @@ def test_run_applications_with_empty_configuration_list(monkeypatch, tmp_test_di mock.Mock(), ) - simtools_runner.run_applications(mock_args_dict, mock_logger) + simtools_runner.run_applications(mock_args_dict) # Check log file was created with version info with log_file_path.open("r", encoding="utf-8") as f: @@ -685,7 +676,42 @@ def test_read_application_configuration_prefers_path_uuid7( _, _, _, workflow_activity_id = simtools_runner._read_application_configuration( configuration_file, steps=None, - logger=mock_logger, + workflow_activity_id="generated-by-run-application", + ) + + assert workflow_activity_id == path_uuid + + +def test_read_application_configuration_ignores_top_level_activity_id( + monkeypatch, + mock_set_input_output_directories, + mock_change_dict_keys_case, +): + path_uuid = "019d776b-e24c-741d-bc05-e3f6f7ec77c7" + configuration_file = f"input/test/workflow/{path_uuid}/config.yml" + + monkeypatch.setattr( + "simtools.io.ascii_handler.collect_data_from_file", + mock.Mock( + return_value={ + "activity_id": "workflow-yaml-activity-id", + "applications": [{"application": "app1", "configuration": {}}], + } + ), + ) + monkeypatch.setattr( + "simtools.runners.simtools_runner._set_input_output_directories", + mock_set_input_output_directories, + ) + monkeypatch.setattr("simtools.utils.general.change_dict_keys_case", mock_change_dict_keys_case) + monkeypatch.setattr( + "simtools.runners.simtools_runner._replace_placeholders_in_configuration", + lambda config, output_path, setting_workflow: config, + ) + + _, _, _, workflow_activity_id = simtools_runner._read_application_configuration( + configuration_file, + steps=None, workflow_activity_id="generated-by-run-application", ) From c792b1b80f43d23dd43a80d731bb049b2febef73 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sat, 11 Apr 2026 14:55:21 +0200 Subject: [PATCH 10/21] tests --- tests/unit_tests/configuration/test_configurator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit_tests/configuration/test_configurator.py b/tests/unit_tests/configuration/test_configurator.py index 0996473bb2..50f82525c0 100644 --- a/tests/unit_tests/configuration/test_configurator.py +++ b/tests/unit_tests/configuration/test_configurator.py @@ -242,6 +242,7 @@ def test_initialize_output(configurator): # output is not configured (and not activity_id) configurator.config["test"] = False configurator.config["output_file"] = None + configurator.config.pop("activity_id", None) with pytest.raises(KeyError): configurator._initialize_output() From 25bc032f06f622b33e4c646c1a8250bde0282649 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sat, 11 Apr 2026 14:56:58 +0200 Subject: [PATCH 11/21] changelog --- docs/changes/2112.feature.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/changes/2112.feature.md diff --git a/docs/changes/2112.feature.md b/docs/changes/2112.feature.md new file mode 100644 index 0000000000..202acc0286 --- /dev/null +++ b/docs/changes/2112.feature.md @@ -0,0 +1 @@ +Add UUID7-based activity IDs for each application execution, propagate into metadata, and track associated activities in workflow metadata file. From 897603bef8d50f14673858c6401d8fb8ef857815 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sat, 11 Apr 2026 17:46:33 +0200 Subject: [PATCH 12/21] donnot write runtime in all cases --- src/simtools/runners/simtools_runner.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/simtools/runners/simtools_runner.py b/src/simtools/runners/simtools_runner.py index 857e29fdf6..df9c7b2555 100644 --- a/src/simtools/runners/simtools_runner.py +++ b/src/simtools/runners/simtools_runner.py @@ -82,7 +82,11 @@ def run_applications(args_dict): workflow_activity_id=workflow_activity_id, workflow_start=workflow_start, workflow_end=max(datetime.now(UTC), workflow_start), - runtime_environment=runtime_environment_snapshot, + runtime_environment=( + runtime_environment_snapshot + if not args_dict["ignore_runtime_environment"] + else None + ), workflow_context=_get_workflow_context(configurations), ) for metadata_file in model_parameter_metadata_files: From 3150e87196d12d02a4f5262ce373e83e6d855705 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sat, 11 Apr 2026 18:02:53 +0200 Subject: [PATCH 13/21] docstrings --- src/simtools/application_control.py | 2 +- src/simtools/applications/run_application.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/simtools/application_control.py b/src/simtools/application_control.py index 09f8c53a07..1e43a678ca 100644 --- a/src/simtools/application_control.py +++ b/src/simtools/application_control.py @@ -292,7 +292,7 @@ def main(): logger = setup_logging(logger_name, args_dict["log_level"], log_file=get_log_file(args_dict)) logger.info( f"simtools application {args_dict.get('application_label')}" - f" started with application ID {config.activity_id}" + f" started with activity ID {config.activity_id}" ) io_handler_instance = io_handler.IOHandler() if setup_io_handler else None diff --git a/src/simtools/applications/run_application.py b/src/simtools/applications/run_application.py index 4e9f1af81d..0fff0b475c 100644 --- a/src/simtools/applications/run_application.py +++ b/src/simtools/applications/run_application.py @@ -20,8 +20,10 @@ For simplified configuration, a placeholder called ``__SETTING_WORKFLOW__`` can be used in the configuration file. This placeholder will be replaced with the directory below ``input`` -(example: configuration file is in ``input/LSTN-design/num_gains/v2.0.0/config.yml``, -then the placeholder will be replaced with ``LSTN-design/num_gains/v2.0.0``). +(example: configuration file is in +``input/LSTN-design/num_gains/019d776b-e24c-741d-bc05-e3f6f7ec77c7/config.yml``, +then the placeholder will be replaced with +``LSTN-design/num_gains/019d776b-e24c-741d-bc05-e3f6f7ec77c7``). This will also be the directory for any output generated by the application. Run time environments can be defined in the configuration file using the ``runtime_environment`` From 800df580c6903b299c6af5dcec9a0a8e9faf468a Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sun, 12 Apr 2026 19:35:16 +0200 Subject: [PATCH 14/21] log file naming --- src/simtools/runners/simtools_runner.py | 18 ++++++++ .../runners/test_simtools_runner.py | 45 +++++++++++++++++-- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/simtools/runners/simtools_runner.py b/src/simtools/runners/simtools_runner.py index df9c7b2555..fcd952e680 100644 --- a/src/simtools/runners/simtools_runner.py +++ b/src/simtools/runners/simtools_runner.py @@ -38,6 +38,7 @@ def run_applications(args_dict): associated_activities = [] runtime_environment_snapshot = deepcopy(runtime_environment) model_parameter_metadata_files = [] + application_counter = 0 run_time = ( read_runtime_environment(runtime_environment) @@ -55,9 +56,16 @@ def run_applications(args_dict): logger.info(f"Skipping application: {app}") continue + application_counter += 1 + app_configuration = config.get("configuration", {}) app_activity_id = app_configuration.get("activity_id") or gen.uuid() app_configuration["activity_id"] = app_activity_id + + app_configuration["log_file"] = _get_application_log_file( + app, app_configuration, application_counter + ) + associated_activities.append({"name": app, "activity_id": app_activity_id}) logger.info(f"Running application: {app}") @@ -156,6 +164,16 @@ def _read_application_configuration(configuration_file, steps, workflow_activity ) +def _get_application_log_file(application, app_configuration, counter): + """Return log file path for an application executed via run_applications.""" + if app_configuration.get("log_file") is not None: + return app_configuration["log_file"] + output_path = app_configuration.get("output_path") + if output_path is None: + return None + return Path(output_path) / f"{application}-{counter:02d}.log" + + def _get_model_parameter_metadata_file(application, app_configuration): """Return expected metadata file for model-parameter submission applications.""" if application != "simtools-submit-model-parameter-from-external": diff --git a/tests/unit_tests/runners/test_simtools_runner.py b/tests/unit_tests/runners/test_simtools_runner.py index f866b73056..972f25b3b3 100644 --- a/tests/unit_tests/runners/test_simtools_runner.py +++ b/tests/unit_tests/runners/test_simtools_runner.py @@ -190,17 +190,29 @@ def test_run_applications_runs_and_logs(monkeypatch, tmp_test_directory): { "application": "app1", "run_application": True, - "configuration": {"key": "value1", "activity_id": "cfg-id-1"}, + "configuration": { + "key": "value1", + "activity_id": "cfg-id-1", + "output_path": str(tmp_test_directory), + }, }, { "application": "app2", "run_application": False, - "configuration": {"key": "value2", "activity_id": "cfg-id-2"}, + "configuration": { + "key": "value2", + "activity_id": "cfg-id-2", + "output_path": str(tmp_test_directory), + }, }, { "application": "app3", "run_application": True, - "configuration": {"key": "value3", "activity_id": "cfg-id-3"}, + "configuration": { + "key": "value3", + "activity_id": "cfg-id-3", + "output_path": str(tmp_test_directory), + }, }, ] log_file_path = tmp_test_directory / "simtools.log" @@ -226,7 +238,10 @@ def test_run_applications_runs_and_logs(monkeypatch, tmp_test_directory): monkeypatch.setattr("simtools.dependencies.get_version_string", version_string_mock) # Patch job_manager.submit + submit_calls = [] + def mock_submit(app, out_file, err_file, configuration=None, runtime_environment=None): + submit_calls.append({"app": app, "configuration": configuration}) result_mock = mock.Mock() result_mock.stdout = f"{app}_stdout" result_mock.stderr = f"{app}_stderr" @@ -249,6 +264,12 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment assert "STDERR:\napp3_stderr" in content assert "Application: app2" not in content # skipped + assert len(submit_calls) == 2 + assert submit_calls[0]["configuration"]["activity_id"] == "cfg-id-1" + assert submit_calls[1]["configuration"]["activity_id"] == "cfg-id-3" + assert submit_calls[0]["configuration"]["log_file"].name == "app1-01.log" + assert submit_calls[1]["configuration"]["log_file"].name == "app3-02.log" + version_string_mock.assert_called_once_with([], include_software_versions=False) workflow_build_mock.assert_not_called() workflow_update_mock.assert_not_called() @@ -618,6 +639,24 @@ def test_pull_image_raises_if_pull_fails(monkeypatch): simtools_runner._pull_image("podman", image) +def test_get_application_log_file_no_existing_log_file(tmp_test_directory): + app_configuration = {"output_path": str(tmp_test_directory)} + result = simtools_runner._get_application_log_file("simtools-derive-psf", app_configuration, 3) + assert result == tmp_test_directory / "simtools-derive-psf-03.log" + + +def test_get_application_log_file_returns_existing_log_file(tmp_test_directory): + existing = tmp_test_directory / "my_custom.log" + app_configuration = {"output_path": str(tmp_test_directory), "log_file": existing} + result = simtools_runner._get_application_log_file("simtools-derive-psf", app_configuration, 1) + assert result == existing + + +def test_get_application_log_file_returns_none_without_output_path(): + result = simtools_runner._get_application_log_file("simtools-derive-psf", {}, 1) + assert result is None + + def test_get_model_parameter_metadata_file(): config = { "output_path": "output/test", From ed0b36cac1a7a82c9d787cc2d704de947e2d1191 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sun, 12 Apr 2026 19:41:11 +0200 Subject: [PATCH 15/21] fix path --- src/simtools/applications/run_application.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simtools/applications/run_application.py b/src/simtools/applications/run_application.py index 0fff0b475c..abc666b3e9 100644 --- a/src/simtools/applications/run_application.py +++ b/src/simtools/applications/run_application.py @@ -95,7 +95,7 @@ def main(): """Run several simtools applications using a configuration file.""" app_context = build_application( usage="simtools-run-application --config_file config_file_name", - initialization_kwargs={"db_config": True}, + initialization_kwargs={"db_config": True, "paths": False}, startup_kwargs={ "setup_io_handler": False, "resolve_sim_software_executables": False, From 6fd9877320b896c9c72823e520239295d5df8adc Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sun, 12 Apr 2026 20:18:04 +0200 Subject: [PATCH 16/21] extract _SETTINGS_WORKFLOW__ --- .../applications/plot_tabular_data.py | 9 +++ src/simtools/runners/simtools_runner.py | 27 +++----- src/simtools/utils/general.py | 61 +++++++++++++++++++ tests/unit_tests/utils/test_general.py | 30 +++++++++ 4 files changed, 107 insertions(+), 20 deletions(-) diff --git a/src/simtools/applications/plot_tabular_data.py b/src/simtools/applications/plot_tabular_data.py index ceaa09775a..ec1702bb8a 100644 --- a/src/simtools/applications/plot_tabular_data.py +++ b/src/simtools/applications/plot_tabular_data.py @@ -69,6 +69,15 @@ def main(): PLOT_CONFIG_SCHEMA, ) ) + if "__SETTING_WORKFLOW__" in str(plot_config): + setting_workflow = gen.extract_subdirectories_from_path( + app_context.args["plot_config"], + anchor="input", + ) + plot_config = gen.replace_placeholders_recursively( + plot_config, + {"__SETTING_WORKFLOW__": setting_workflow}, + ) plot_tables.plot( config=plot_config["plot"], diff --git a/src/simtools/runners/simtools_runner.py b/src/simtools/runners/simtools_runner.py index fcd952e680..dd75a69071 100644 --- a/src/simtools/runners/simtools_runner.py +++ b/src/simtools/runners/simtools_runner.py @@ -239,14 +239,10 @@ def _replace_placeholders_in_configuration( dict Configuration dictionary with placeholders replaced. """ - for key, value in configuration.items(): - if isinstance(value, str): - configuration[key] = value.replace(place_holder, setting_workflow) - if isinstance(value, list): - configuration[key] = [ - item.replace(place_holder, setting_workflow) if isinstance(item, str) else item - for item in value - ] + configuration = gen.replace_placeholders_recursively( + configuration, + {place_holder: setting_workflow}, + ) if output_path: configuration["output_path"] = str(output_path) @@ -269,19 +265,10 @@ def _set_input_output_directories(path): tuple The first part is the 'input' directory, the second part is the subdirectory name """ - path = Path(path).resolve() - try: - input_index = path.parts.index("input") - # Get all parts after 'input', excluding the filename - subdirs = path.parts[input_index + 1 : -1] - setting_workflow = "/".join(subdirs) - workflow_dir = path.parts[input_index] - except (ValueError, IndexError) as exc: - raise ValueError(f"Could not find subdirectory under 'input': {exc}") from exc - - output_path = Path(str(workflow_dir).replace("input", "output")) / Path(setting_workflow) + setting_workflow = gen.extract_subdirectories_from_path(path, anchor="input") + output_path = Path("output") / Path(setting_workflow) output_path.mkdir(parents=True, exist_ok=True) - return output_path, "/".join(subdirs) + return output_path, setting_workflow def read_runtime_environment(runtime_environment, workdir="/workdir/external/"): diff --git a/src/simtools/utils/general.py b/src/simtools/utils/general.py index ecb791700d..ee18eded51 100644 --- a/src/simtools/utils/general.py +++ b/src/simtools/utils/general.py @@ -1005,3 +1005,64 @@ def extract_uuid7_from_path(path): if candidate.version == 7: return str(candidate) return None + + +def replace_placeholders_recursively(data, replacements): + """Replace placeholders recursively in strings nested in dicts/lists. + + Parameters + ---------- + data : dict, list, str, or object + Input structure to process. + replacements : dict + Mapping of placeholder strings to replacement strings. + + Returns + ------- + dict, list, str, or object + Processed copy with placeholders replaced in string values. + """ + if isinstance(data, dict): + return { + key: replace_placeholders_recursively(value, replacements) + for key, value in data.items() + } + if isinstance(data, list): + return [replace_placeholders_recursively(item, replacements) for item in data] + if isinstance(data, str): + for placeholder, replacement in replacements.items(): + data = data.replace(placeholder, replacement) + return data + + +def extract_subdirectories_from_path(path, anchor="input"): + """Extract subdirectories in a path after an anchor directory. + + Parameters + ---------- + path : str or Path + Path containing an anchor directory and a filename. + anchor : str + Directory name used as extraction anchor. + + Returns + ------- + str + Subdirectory path between anchor and file name, joined with '/'. + + Raises + ------ + ValueError + If anchor is not present or no subdirectories are found after the anchor. + """ + path = Path(path).resolve() + try: + anchor_index = path.parts.index(anchor) + subdirs = path.parts[anchor_index + 1 : -1] + except (ValueError, IndexError) as exc: + raise ValueError(f"Could not find subdirectory under '{anchor}': {exc}") from exc + + if len(subdirs) == 0: + raise ValueError(f"Could not find subdirectory under '{anchor}'") + + return "/".join(subdirs) diff --git a/tests/unit_tests/utils/test_general.py b/tests/unit_tests/utils/test_general.py index a2af6274fd..2f9445180b 100644 --- a/tests/unit_tests/utils/test_general.py +++ b/tests/unit_tests/utils/test_general.py @@ -581,6 +581,36 @@ def test_extract_uuid7_from_path_with_no_uuid7(): assert gen.extract_uuid7_from_path(path) is None +def test_replace_placeholders_recursively(): + input_data = { + "file_name": "__SETTING_WORKFLOW__/table.ecsv", + "nested": {"path": "prefix/__SETTING_WORKFLOW__/suffix"}, + "items": ["__SETTING_WORKFLOW__/a", 1, {"name": "__SETTING_WORKFLOW__/b"}], + } + expected_output = { + "file_name": "LSTN-design/workflow/table.ecsv", + "nested": {"path": "prefix/LSTN-design/workflow/suffix"}, + "items": ["LSTN-design/workflow/a", 1, {"name": "LSTN-design/workflow/b"}], + } + result = gen.replace_placeholders_recursively( + input_data, + {"__SETTING_WORKFLOW__": "LSTN-design/workflow"}, + ) + assert result == expected_output + + +def test_extract_subdirectories_from_path(): + path = "input/LSTN-design/pm_photoelectron_spectrum/019d7abc/config.yml" + result = gen.extract_subdirectories_from_path(path, anchor="input") + assert result == "LSTN-design/pm_photoelectron_spectrum/019d7abc" + + +def test_extract_subdirectories_from_path_missing_anchor_raises(): + path = "output/LSTN-design/pm_photoelectron_spectrum/019d7abc/config.yml" + with pytest.raises(ValueError, match=r"^Could not find subdirectory under 'input'"): + gen.extract_subdirectories_from_path(path, anchor="input") + + def test_is_valid_numeric_type(): """Test _is_valid_numeric_type function.""" # Test integer dtypes From e052d4262fd9cab9e428696125e8e0c6f1c6ed40 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Mon, 13 Apr 2026 09:25:22 +0200 Subject: [PATCH 17/21] correct PR [skip ci] --- docs/changes/{2112.feature.md => 2113.feature.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/changes/{2112.feature.md => 2113.feature.md} (100%) diff --git a/docs/changes/2112.feature.md b/docs/changes/2113.feature.md similarity index 100% rename from docs/changes/2112.feature.md rename to docs/changes/2113.feature.md From 39c35727708d1c214cac738b49a5d31cb46b7c3a Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Mon, 13 Apr 2026 14:22:42 +0200 Subject: [PATCH 18/21] consistent use of activity_name --- src/simtools/data_model/metadata_collector.py | 3 ++- src/simtools/data_model/workflow_metadata.py | 2 +- src/simtools/runners/simtools_runner.py | 3 ++- src/simtools/schemas/metadata.metaschema.yml | 22 +++++++++++++++---- src/simtools/settings.py | 2 ++ .../data_model/test_workflow_metadata.py | 14 +++++++----- tests/unit_tests/test_settings.py | 13 +++++++++++ 7 files changed, 46 insertions(+), 13 deletions(-) diff --git a/src/simtools/data_model/metadata_collector.py b/src/simtools/data_model/metadata_collector.py index 3a0370fdc2..5fe9a46536 100644 --- a/src/simtools/data_model/metadata_collector.py +++ b/src/simtools/data_model/metadata_collector.py @@ -16,6 +16,7 @@ from simtools.constants import METADATA_JSON_SCHEMA from simtools.data_model import metadata_model, schema from simtools.io import ascii_handler, io_handler +from simtools.settings import config from simtools.utils import names @@ -503,7 +504,7 @@ def _fill_activity_meta(self, activity_dict): Dictionary for top-level activity metadata. """ - activity_dict["name"] = self.args_dict.get("label", None) + activity_dict["name"] = self.args_dict.get("label") or config.activity_name activity_dict["type"] = "software" activity_dict["id"] = self.args_dict.get("activity_id", "UNDEFINED_ACTIVITY_ID") activity_dict["start"] = self.args_dict.get( diff --git a/src/simtools/data_model/workflow_metadata.py b/src/simtools/data_model/workflow_metadata.py index bd62653b12..fb65d96222 100644 --- a/src/simtools/data_model/workflow_metadata.py +++ b/src/simtools/data_model/workflow_metadata.py @@ -102,7 +102,7 @@ def _merge_associated_activities(existing_activities, new_activities): merged_activities = [] seen = set() for activity in [*existing_activities, *new_activities]: - key = (activity.get("name"), activity.get("activity_id")) + key = (activity.get("activity_name"), activity.get("activity_id")) if key in seen: continue seen.add(key) diff --git a/src/simtools/runners/simtools_runner.py b/src/simtools/runners/simtools_runner.py index dd75a69071..f233d38a52 100644 --- a/src/simtools/runners/simtools_runner.py +++ b/src/simtools/runners/simtools_runner.py @@ -61,12 +61,13 @@ def run_applications(args_dict): app_configuration = config.get("configuration", {}) app_activity_id = app_configuration.get("activity_id") or gen.uuid() app_configuration["activity_id"] = app_activity_id + app_configuration.setdefault("label", app) app_configuration["log_file"] = _get_application_log_file( app, app_configuration, application_counter ) - associated_activities.append({"name": app, "activity_id": app_activity_id}) + associated_activities.append({"activity_name": app, "activity_id": app_activity_id}) logger.info(f"Running application: {app}") result = job_manager.submit( diff --git a/src/simtools/schemas/metadata.metaschema.yml b/src/simtools/schemas/metadata.metaschema.yml index 29e0252494..3077e4be8b 100644 --- a/src/simtools/schemas/metadata.metaschema.yml +++ b/src/simtools/schemas/metadata.metaschema.yml @@ -257,6 +257,13 @@ definitions: - type: string - type: "null" default: null + activity_name: + description: |- + Name of the activity that produced this data product. + anyOf: + - type: string + - type: "null" + default: null ############### instrument: title: Instrument @@ -588,7 +595,7 @@ definitions: type: object additionalProperties: false properties: - name: + activity_name: description: |- Name of the associated activity. anyOf: @@ -604,7 +611,7 @@ definitions: - type: number default: null required: - - name + - activity_name - activity_id ... --- @@ -866,6 +873,13 @@ definitions: - type: string - type: "null" default: null + ACTIVITY_NAME: + description: |- + Name of the activity that produced this data product. + anyOf: + - type: string + - type: "null" + default: null ############### INSTRUMENT: title: Instrument @@ -1197,7 +1211,7 @@ definitions: type: object additionalProperties: false properties: - NAME: + ACTIVITY_NAME: description: |- Name of the associated activity. anyOf: @@ -1213,5 +1227,5 @@ definitions: - type: number default: null required: - - NAME + - ACTIVITY_NAME - ACTIVITY_ID diff --git a/src/simtools/settings.py b/src/simtools/settings.py index 404746528d..f23abe0fe4 100644 --- a/src/simtools/settings.py +++ b/src/simtools/settings.py @@ -23,6 +23,7 @@ def __init__(self): self.user = os.getenv("USER", "unknown") self.hostname = socket.gethostname() self.activity_id = uuid() + self.activity_name = None def load(self, args=None, db_config=None, resolve_sim_software_executables=True): """ @@ -46,6 +47,7 @@ def load(self, args=None, db_config=None, resolve_sim_software_executables=True) self.activity_id = args.get("activity_id") if args is not None else None if self.activity_id is None: self.activity_id = uuid() + self.activity_name = args.get("application_label") if args is not None else None self._sim_telarray_path = ( args.get("sim_telarray_path") if args is not None and "sim_telarray_path" in args diff --git a/tests/unit_tests/data_model/test_workflow_metadata.py b/tests/unit_tests/data_model/test_workflow_metadata.py index 1d1d1504f9..3e640f3711 100644 --- a/tests/unit_tests/data_model/test_workflow_metadata.py +++ b/tests/unit_tests/data_model/test_workflow_metadata.py @@ -39,15 +39,17 @@ def test_update_model_parameter_metadata_file(tmp_test_directory): "cta": { "product": {"id": "prod-id"}, "activity": {"id": "old-id"}, - "context": {"associated_activities": [{"name": "old", "activity_id": "old-id"}]}, + "context": { + "associated_activities": [{"activity_name": "old", "activity_id": "old-id"}] + }, } } metadata_file.write_text(yaml.safe_dump(metadata_dict), encoding="utf-8") workflow_activity = {"id": "workflow-id", "name": "setting_workflow"} associated_activities = [ - {"name": "app1", "activity_id": "a1"}, - {"name": "app2", "activity_id": "a2"}, + {"activity_name": "app1", "activity_id": "a1"}, + {"activity_name": "app2", "activity_id": "a2"}, ] workflow_metadata.update_model_parameter_metadata_file( @@ -60,9 +62,9 @@ def test_update_model_parameter_metadata_file(tmp_test_directory): assert updated["cta"]["product"]["id"] == "prod-id" assert updated["cta"]["activity"]["id"] == "workflow-id" assert updated["cta"]["context"]["associated_activities"] == [ - {"name": "old", "activity_id": "old-id"}, - {"name": "app1", "activity_id": "a1"}, - {"name": "app2", "activity_id": "a2"}, + {"activity_name": "old", "activity_id": "old-id"}, + {"activity_name": "app1", "activity_id": "a1"}, + {"activity_name": "app2", "activity_id": "a2"}, ] diff --git a/tests/unit_tests/test_settings.py b/tests/unit_tests/test_settings.py index b8974e61cb..4625bcdb40 100644 --- a/tests/unit_tests/test_settings.py +++ b/tests/unit_tests/test_settings.py @@ -80,6 +80,19 @@ def test_load_generates_activity_id_when_missing(config_instance): assert config_instance.activity_id is not None +@patch.dict(os.environ, {}, clear=True) +def test_load_sets_activity_name(config_instance): + args = {"application_label": "simtools-my-app"} + config_instance.load(args=args) + assert config_instance.activity_name == "simtools-my-app" + + +@patch.dict(os.environ, {}, clear=True) +def test_load_activity_name_none_when_missing(config_instance): + config_instance.load(args={}) + assert config_instance.activity_name is None + + @patch.dict(os.environ, {"SIMTOOLS_SIM_TELARRAY_PATH": "/env/simtel"}) def test_load_with_env_vars(config_instance): config_instance.load() From dc9d15bcd31d89358563cbf617de98ca22b26051 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Mon, 13 Apr 2026 14:38:47 +0200 Subject: [PATCH 19/21] complexity --- src/simtools/settings.py | 48 ++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/simtools/settings.py b/src/simtools/settings.py index f23abe0fe4..8c40774e62 100644 --- a/src/simtools/settings.py +++ b/src/simtools/settings.py @@ -44,39 +44,39 @@ def load(self, args=None, db_config=None, resolve_sim_software_executables=True) """ self._args = MappingProxyType(args) if args is not None else {} self._db_config = MappingProxyType(db_config) if db_config is not None else {} - self.activity_id = args.get("activity_id") if args is not None else None - if self.activity_id is None: - self.activity_id = uuid() + self.activity_id = self._get_activity_id(args) self.activity_name = args.get("application_label") if args is not None else None - self._sim_telarray_path = ( - args.get("sim_telarray_path") - if args is not None and "sim_telarray_path" in args - else os.getenv("SIMTOOLS_SIM_TELARRAY_PATH") + self._sim_telarray_path = self._get_config_value( + args, "sim_telarray_path", "SIMTOOLS_SIM_TELARRAY_PATH" ) - - self._sim_telarray_exe = ( - args.get("sim_telarray_executable") - if args is not None and "sim_telarray_executable" in args - else os.getenv("SIMTOOLS_SIM_TELARRAY_EXECUTABLE", "sim_telarray") - ) - - self._corsika_path = ( - args.get("corsika_path") - if args is not None and "corsika_path" in args - else os.getenv("SIMTOOLS_CORSIKA_PATH") + self._sim_telarray_exe = self._get_config_value( + args, + "sim_telarray_executable", + "SIMTOOLS_SIM_TELARRAY_EXECUTABLE", + default="sim_telarray", ) - - self._corsika_interaction_table_path = ( - args.get("corsika_interaction_table_path") - if args is not None and "corsika_interaction_table_path" in args - else os.getenv("SIMTOOLS_CORSIKA_INTERACTION_TABLE_PATH") + self._corsika_path = self._get_config_value(args, "corsika_path", "SIMTOOLS_CORSIKA_PATH") + self._corsika_interaction_table_path = self._get_config_value( + args, "corsika_interaction_table_path", "SIMTOOLS_CORSIKA_INTERACTION_TABLE_PATH" ) - if resolve_sim_software_executables and self._corsika_path is not None: self._corsika_exe = self._get_corsika_exec() else: self._corsika_exe = None + @staticmethod + def _get_config_value(args, arg_key, env_key, default=None): + """Get configuration value from arguments or environment variable.""" + if args is not None and arg_key in args: + return args.get(arg_key) + return os.getenv(env_key, default) + + @staticmethod + def _get_activity_id(args): + """Get activity ID from arguments or generate a new one.""" + activity_id = args.get("activity_id") if args is not None else None + return activity_id if activity_id is not None else uuid() + def _get_corsika_exec(self): """ Get the CORSIKA executable from environment variable or command line argument. From 0cff1b1afa7856ff2cbb3553439396cc540dcbf8 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Tue, 14 Apr 2026 12:45:38 +0200 Subject: [PATCH 20/21] review comments --- src/simtools/applications/db_add_file_to_db.py | 2 +- .../applications/db_add_value_from_json_to_db.py | 2 +- src/simtools/configuration/configurator.py | 2 +- src/simtools/data_model/metadata_collector.py | 2 +- src/simtools/runners/simtools_runner.py | 13 +++++-------- src/simtools/schemas/metadata.metaschema.yml | 1 - src/simtools/settings.py | 6 +++--- src/simtools/utils/general.py | 6 +++--- 8 files changed, 15 insertions(+), 19 deletions(-) diff --git a/src/simtools/applications/db_add_file_to_db.py b/src/simtools/applications/db_add_file_to_db.py index d265c273fe..698e182c5d 100644 --- a/src/simtools/applications/db_add_file_to_db.py +++ b/src/simtools/applications/db_add_file_to_db.py @@ -133,7 +133,7 @@ def confirm_and_insert_files(files_to_insert, args_dict, db, logger): plural = "" if len(files_to_insert) == 1 else "s" if args_dict.get("test_db", False): - args_dict["db"] = args_dict["db"] + gen.uuid() + args_dict["db"] = args_dict["db"] + gen.get_uuid() logger.info(f"Using test database: {args_dict['db']}") print(f"Should the following file{plural} be inserted to the {args_dict['db']} DB?:\n") diff --git a/src/simtools/applications/db_add_value_from_json_to_db.py b/src/simtools/applications/db_add_value_from_json_to_db.py index e237bfc992..9fa03a406a 100644 --- a/src/simtools/applications/db_add_value_from_json_to_db.py +++ b/src/simtools/applications/db_add_value_from_json_to_db.py @@ -57,7 +57,7 @@ def main(): app_context = build_application(initialization_kwargs={"db_config": True}) if app_context.args.get("test_db", False): - app_context.db_config["db_simulation_model_version"] = gen.uuid() + app_context.db_config["db_simulation_model_version"] = gen.get_uuid() app_context.logger.info( f"Using test database version {app_context.db_config['db_simulation_model_version']}" ) diff --git a/src/simtools/configuration/configurator.py b/src/simtools/configuration/configurator.py index 41032301f5..bf97f566c0 100644 --- a/src/simtools/configuration/configurator.py +++ b/src/simtools/configuration/configurator.py @@ -148,7 +148,7 @@ def initialize( self._fill_from_environmental_variables() if self.config.get("activity_id", None) is None: - self.config["activity_id"] = gen.uuid() + self.config["activity_id"] = gen.get_uuid() if self.config["label"] is None: self.config["label"] = self.label self._initialize_model_versions() diff --git a/src/simtools/data_model/metadata_collector.py b/src/simtools/data_model/metadata_collector.py index 5fe9a46536..0145e195dd 100644 --- a/src/simtools/data_model/metadata_collector.py +++ b/src/simtools/data_model/metadata_collector.py @@ -425,7 +425,7 @@ def _fill_product_meta(self, product_dict): self.schema_file = self.get_data_model_schema_file_name() self.schema_dict = self.get_data_model_schema_dict() - product_dict["id"] = gen.uuid() + product_dict["id"] = gen.get_uuid() product_dict["creation_time"] = gen.now_date_time_in_isoformat() product_dict["description"] = self.schema_dict.get("description", None) diff --git a/src/simtools/runners/simtools_runner.py b/src/simtools/runners/simtools_runner.py index f233d38a52..9a21336210 100644 --- a/src/simtools/runners/simtools_runner.py +++ b/src/simtools/runners/simtools_runner.py @@ -59,7 +59,7 @@ def run_applications(args_dict): application_counter += 1 app_configuration = config.get("configuration", {}) - app_activity_id = app_configuration.get("activity_id") or gen.uuid() + app_activity_id = app_configuration.get("activity_id") or gen.get_uuid() app_configuration["activity_id"] = app_activity_id app_configuration.setdefault("label", app) @@ -77,7 +77,7 @@ def run_applications(args_dict): configuration=app_configuration, runtime_environment=run_time, ) - metadata_file = _get_model_parameter_metadata_file(app, app_configuration) + metadata_file = _get_model_parameter_metadata_file(app_configuration) if metadata_file is not None: model_parameter_metadata_files.append(metadata_file) file.write("=" * 80 + "\n") @@ -140,7 +140,7 @@ def _read_application_configuration(configuration_file, steps, workflow_activity """ job_configuration = ascii_handler.collect_data_from_file(configuration_file) workflow_activity_id = ( - gen.extract_uuid7_from_path(configuration_file) or workflow_activity_id or gen.uuid() + gen.extract_uuid7_from_path(configuration_file) or workflow_activity_id or gen.get_uuid() ) output_path, setting_workflow = _set_input_output_directories(configuration_file) configurations = job_configuration.get("applications") @@ -154,7 +154,7 @@ def _read_application_configuration(configuration_file, steps, workflow_activity setting_workflow, ) if config["configuration"].get("activity_id") is None: - config["configuration"]["activity_id"] = gen.uuid() + config["configuration"]["activity_id"] = gen.get_uuid() configurations[step_count - 1] = config return ( @@ -175,11 +175,8 @@ def _get_application_log_file(application, app_configuration, counter): return Path(output_path) / f"{application}-{counter:02d}.log" -def _get_model_parameter_metadata_file(application, app_configuration): +def _get_model_parameter_metadata_file(app_configuration): """Return expected metadata file for model-parameter submission applications.""" - if application != "simtools-submit-model-parameter-from-external": - return None - parameter = app_configuration.get("parameter") parameter_version = app_configuration.get("parameter_version") output_path = app_configuration.get("output_path") diff --git a/src/simtools/schemas/metadata.metaschema.yml b/src/simtools/schemas/metadata.metaschema.yml index 3077e4be8b..1deb2e9a85 100644 --- a/src/simtools/schemas/metadata.metaschema.yml +++ b/src/simtools/schemas/metadata.metaschema.yml @@ -608,7 +608,6 @@ definitions: anyOf: - type: string - type: "null" - - type: number default: null required: - activity_name diff --git a/src/simtools/settings.py b/src/simtools/settings.py index 8c40774e62..9b7b4e3213 100644 --- a/src/simtools/settings.py +++ b/src/simtools/settings.py @@ -5,7 +5,7 @@ from pathlib import Path from types import MappingProxyType -from simtools.utils.general import find_executable_in_dir, uuid +from simtools.utils.general import find_executable_in_dir, get_uuid class _Config: @@ -22,7 +22,7 @@ def __init__(self): self._corsika_exe = None self.user = os.getenv("USER", "unknown") self.hostname = socket.gethostname() - self.activity_id = uuid() + self.activity_id = get_uuid() self.activity_name = None def load(self, args=None, db_config=None, resolve_sim_software_executables=True): @@ -75,7 +75,7 @@ def _get_config_value(args, arg_key, env_key, default=None): def _get_activity_id(args): """Get activity ID from arguments or generate a new one.""" activity_id = args.get("activity_id") if args is not None else None - return activity_id if activity_id is not None else uuid() + return activity_id if activity_id is not None else get_uuid() def _get_corsika_exec(self): """ diff --git a/src/simtools/utils/general.py b/src/simtools/utils/general.py index ee18eded51..8ae1e58366 100644 --- a/src/simtools/utils/general.py +++ b/src/simtools/utils/general.py @@ -972,7 +972,7 @@ def load_environment_variables(env_file=".env", env_list=None): return env_values -def uuid(): +def get_uuid(): """ Generate a UUID (7) string. @@ -1055,7 +1055,7 @@ def extract_subdirectories_from_path(path, anchor="input"): ValueError If anchor is not present or no subdirectories are found after the anchor. """ - path = Path(path).resolve() + path = Path(path) try: anchor_index = path.parts.index(anchor) subdirs = path.parts[anchor_index + 1 : -1] @@ -1065,4 +1065,4 @@ def extract_subdirectories_from_path(path, anchor="input"): if len(subdirs) == 0: raise ValueError(f"Could not find subdirectory under '{anchor}'") - return "/".join(subdirs) + return str(Path(*subdirs)) From 87aebf3aa0f4c92c8cf176d8bf762c8ce7e5b68e Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Tue, 14 Apr 2026 12:51:19 +0200 Subject: [PATCH 21/21] unit tests --- tests/unit_tests/runners/test_simtools_runner.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/unit_tests/runners/test_simtools_runner.py b/tests/unit_tests/runners/test_simtools_runner.py index 972f25b3b3..3b2b245af2 100644 --- a/tests/unit_tests/runners/test_simtools_runner.py +++ b/tests/unit_tests/runners/test_simtools_runner.py @@ -663,9 +663,7 @@ def test_get_model_parameter_metadata_file(): "parameter": "pm_photoelectron_spectrum", "parameter_version": "2.0.1", } - metadata_file = simtools_runner._get_model_parameter_metadata_file( - "simtools-submit-model-parameter-from-external", config - ) + metadata_file = simtools_runner._get_model_parameter_metadata_file(config) assert metadata_file == Path( "output/test/pm_photoelectron_spectrum/pm_photoelectron_spectrum-2.0.1.meta.yml" )