diff --git a/docs/changes/2113.feature.md b/docs/changes/2113.feature.md new file mode 100644 index 0000000000..202acc0286 --- /dev/null +++ b/docs/changes/2113.feature.md @@ -0,0 +1 @@ +Add UUID7-based activity IDs for each application execution, propagate into metadata, and track associated activities in workflow metadata file. diff --git a/docs/source/api-reference/data_model.md b/docs/source/api-reference/data_model.md index 16de69828f..3303649b1f 100644 --- a/docs/source/api-reference/data_model.md +++ b/docs/source/api-reference/data_model.md @@ -50,6 +50,13 @@ Data products ingested or produced by simtools generally follows the CTAO data m :members: ``` +## workflow_metadata + +```{eval-rst} +.. automodule:: data_model.workflow_metadata + :members: +``` + (datamodelschema)= ## schema diff --git a/environment.yml b/environment.yml index 0142d833bf..6bba068d35 100644 --- a/environment.yml +++ b/environment.yml @@ -35,6 +35,7 @@ dependencies: - scipy - sphinx - sphinx-design + - uuid6 # temporary dependency; not needed for python >= 3.14 - towncrier - toml - pip: @@ -46,4 +47,4 @@ dependencies: # create: conda env create -f environment.yml # activate: conda activate simtools-dev # update (conda/mamba): conda env update -f environment.yml --prune -# update (micromamba): micromamba update -f environment.yml +# update (micromamba): micromamba env update -f environment.yml -n simtools-dev diff --git a/pyproject.toml b/pyproject.toml index 785d1bbdd5..133bba94fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dependencies = [ "pyyaml", "scipy", "toml", + "uuid6", # temporary dependency; not needed for python >= 3.14 ] optional-dependencies.dev = [ "pre-commit", diff --git a/src/simtools/application_control.py b/src/simtools/application_control.py index caa0100652..1e43a678ca 100644 --- a/src/simtools/application_control.py +++ b/src/simtools/application_control.py @@ -5,7 +5,6 @@ import os import re from dataclasses import dataclass -from datetime import UTC, datetime from pathlib import Path import simtools.utils.general as gen @@ -59,7 +58,9 @@ def setup_logging(logger_name=None, log_level="INFO", log_file=None): log_file_path = Path(log_file) if log_file_path.parent: log_file_path.parent.mkdir(parents=True, exist_ok=True) - file_format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + file_format = logging.Formatter( + f"{config.activity_id} - %(name)s - %(levelname)s - %(message)s" + ) file_handler = logging.FileHandler(log_file_path) file_handler.setFormatter(file_format) file_handler.addFilter(redact_filter) @@ -73,7 +74,7 @@ def get_log_file(args_dict): """ Get log file path. - Generate log file path if needed from application name and startup time. + Generate log file path if needed from application name and application ID. Returns ------- @@ -85,8 +86,7 @@ def get_log_file(args_dict): if args_dict.get("application_label") is None or args_dict.get("output_path") is None: return None - timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ") - log_file = f"{args_dict['application_label']}_{timestamp}.log" + log_file = f"{args_dict['application_label']}_{config.activity_id}.log" Path(args_dict["output_path"]).mkdir(parents=True, exist_ok=True) return Path(args_dict["output_path"]) / log_file @@ -290,6 +290,10 @@ def main(): ) logger = setup_logging(logger_name, args_dict["log_level"], log_file=get_log_file(args_dict)) + logger.info( + f"simtools application {args_dict.get('application_label')}" + f" started with activity ID {config.activity_id}" + ) io_handler_instance = io_handler.IOHandler() if setup_io_handler else None diff --git a/src/simtools/applications/db_add_file_to_db.py b/src/simtools/applications/db_add_file_to_db.py index 40f9ab0177..698e182c5d 100644 --- a/src/simtools/applications/db_add_file_to_db.py +++ b/src/simtools/applications/db_add_file_to_db.py @@ -37,7 +37,6 @@ """ -import uuid from pathlib import Path import simtools.utils.general as gen @@ -134,7 +133,7 @@ def confirm_and_insert_files(files_to_insert, args_dict, db, logger): plural = "" if len(files_to_insert) == 1 else "s" if args_dict.get("test_db", False): - args_dict["db"] = args_dict["db"] + str(uuid.uuid4()) + args_dict["db"] = args_dict["db"] + gen.get_uuid() logger.info(f"Using test database: {args_dict['db']}") print(f"Should the following file{plural} be inserted to the {args_dict['db']} DB?:\n") diff --git a/src/simtools/applications/db_add_value_from_json_to_db.py b/src/simtools/applications/db_add_value_from_json_to_db.py index 5bcb0cdb05..9fa03a406a 100644 --- a/src/simtools/applications/db_add_value_from_json_to_db.py +++ b/src/simtools/applications/db_add_value_from_json_to_db.py @@ -25,7 +25,6 @@ """ -import uuid from pathlib import Path import simtools.utils.general as gen @@ -58,7 +57,7 @@ def main(): app_context = build_application(initialization_kwargs={"db_config": True}) if app_context.args.get("test_db", False): - app_context.db_config["db_simulation_model_version"] = str(uuid.uuid4()) + app_context.db_config["db_simulation_model_version"] = gen.get_uuid() app_context.logger.info( f"Using test database version {app_context.db_config['db_simulation_model_version']}" ) diff --git a/src/simtools/applications/plot_tabular_data.py b/src/simtools/applications/plot_tabular_data.py index ceaa09775a..ec1702bb8a 100644 --- a/src/simtools/applications/plot_tabular_data.py +++ b/src/simtools/applications/plot_tabular_data.py @@ -69,6 +69,15 @@ def main(): PLOT_CONFIG_SCHEMA, ) ) + if "__SETTING_WORKFLOW__" in str(plot_config): + setting_workflow = gen.extract_subdirectories_from_path( + app_context.args["plot_config"], + anchor="input", + ) + plot_config = gen.replace_placeholders_recursively( + plot_config, + {"__SETTING_WORKFLOW__": setting_workflow}, + ) plot_tables.plot( config=plot_config["plot"], diff --git a/src/simtools/applications/run_application.py b/src/simtools/applications/run_application.py index 3023b1d70a..abc666b3e9 100644 --- a/src/simtools/applications/run_application.py +++ b/src/simtools/applications/run_application.py @@ -20,8 +20,10 @@ For simplified configuration, a placeholder called ``__SETTING_WORKFLOW__`` can be used in the configuration file. This placeholder will be replaced with the directory below ``input`` -(example: configuration file is in ``input/LSTN-design/num_gains/20250214T134800/config.yml``, -then the placeholder will be replaced with ``LSTN-design/num_gains/20250214T134800``). +(example: configuration file is in +``input/LSTN-design/num_gains/019d776b-e24c-741d-bc05-e3f6f7ec77c7/config.yml``, +then the placeholder will be replaced with +``LSTN-design/num_gains/019d776b-e24c-741d-bc05-e3f6f7ec77c7``). This will also be the directory for any output generated by the application. Run time environments can be defined in the configuration file using the ``runtime_environment`` @@ -70,7 +72,6 @@ def _add_arguments(parser): """Register application-specific command line arguments.""" parser.add_argument( "--config_file", - dest="configuration_file", help="Application configuration.", type=str, required=True, @@ -94,14 +95,14 @@ def main(): """Run several simtools applications using a configuration file.""" app_context = build_application( usage="simtools-run-application --config_file config_file_name", - initialization_kwargs={"db_config": True}, + initialization_kwargs={"db_config": True, "paths": False}, startup_kwargs={ "setup_io_handler": False, "resolve_sim_software_executables": False, }, ) - simtools_runner.run_applications(app_context.args, app_context.logger) + simtools_runner.run_applications(app_context.args) if __name__ == "__main__": diff --git a/src/simtools/configuration/commandline_parser.py b/src/simtools/configuration/commandline_parser.py index 7f9367c0f5..c96482a35a 100644 --- a/src/simtools/configuration/commandline_parser.py +++ b/src/simtools/configuration/commandline_parser.py @@ -130,6 +130,12 @@ def initialize_output_arguments(self): def initialize_application_execution_arguments(self): """Initialize application execution arguments.""" _job_group = self.add_argument_group("execution") + _job_group.add_argument( + "--activity_id", + help="activity identifier", + type=str, + default=None, + ) _job_group.add_argument( "--test", help="test option for faster execution during development", diff --git a/src/simtools/configuration/configurator.py b/src/simtools/configuration/configurator.py index a1cc8a15af..bf97f566c0 100644 --- a/src/simtools/configuration/configurator.py +++ b/src/simtools/configuration/configurator.py @@ -3,7 +3,6 @@ import argparse import logging import sys -import uuid import astropy.units as u @@ -149,7 +148,7 @@ def initialize( self._fill_from_environmental_variables() if self.config.get("activity_id", None) is None: - self.config["activity_id"] = str(uuid.uuid4()) + self.config["activity_id"] = gen.get_uuid() if self.config["label"] is None: self.config["label"] = self.label self._initialize_model_versions() diff --git a/src/simtools/data_model/metadata_collector.py b/src/simtools/data_model/metadata_collector.py index 81bddf3c12..0145e195dd 100644 --- a/src/simtools/data_model/metadata_collector.py +++ b/src/simtools/data_model/metadata_collector.py @@ -8,7 +8,7 @@ import getpass import logging -import uuid +from copy import deepcopy from pathlib import Path import simtools.utils.general as gen @@ -16,6 +16,7 @@ from simtools.constants import METADATA_JSON_SCHEMA from simtools.data_model import metadata_model, schema from simtools.io import ascii_handler, io_handler +from simtools.settings import config from simtools.utils import names @@ -94,8 +95,8 @@ def get_top_level_metadata(self): """ try: - self.top_level_meta[self.observatory]["activity"]["end"] = ( - gen.now_date_time_in_isoformat() + self.top_level_meta[self.observatory]["activity"]["end"] = self.args_dict.get( + "activity_end", gen.now_date_time_in_isoformat() ) except KeyError: pass @@ -310,6 +311,10 @@ def _fill_context_meta(self, context_dict): except (KeyError, TypeError): self._logger.debug("No input product metadata appended to associated data.") + associated_activities = self.args_dict.get("associated_activities") + if associated_activities is not None and "associated_activities" in context_dict: + context_dict["associated_activities"] = deepcopy(associated_activities) + def _read_input_metadata_from_file(self, metadata_file_name_expression=None): """ Read and validate input metadata from file. @@ -420,7 +425,7 @@ def _fill_product_meta(self, product_dict): self.schema_file = self.get_data_model_schema_file_name() self.schema_dict = self.get_data_model_schema_dict() - product_dict["id"] = str(uuid.uuid4()) + product_dict["id"] = gen.get_uuid() product_dict["creation_time"] = gen.now_date_time_in_isoformat() product_dict["description"] = self.schema_dict.get("description", None) @@ -499,13 +504,19 @@ def _fill_activity_meta(self, activity_dict): Dictionary for top-level activity metadata. """ - activity_dict["name"] = self.args_dict.get("label", None) + activity_dict["name"] = self.args_dict.get("label") or config.activity_name activity_dict["type"] = "software" activity_dict["id"] = self.args_dict.get("activity_id", "UNDEFINED_ACTIVITY_ID") - activity_dict["start"] = gen.now_date_time_in_isoformat() - activity_dict["end"] = activity_dict["start"] + activity_dict["start"] = self.args_dict.get( + "activity_start", gen.now_date_time_in_isoformat() + ) + activity_dict["end"] = self.args_dict.get("activity_end", activity_dict["start"]) activity_dict["software"]["name"] = "simtools" activity_dict["software"]["version"] = simtools.version.__version__ + if "runtime_environment" in activity_dict: + activity_dict["runtime_environment"] = deepcopy( + self.args_dict.get("runtime_environment") + ) def _merge_config_dicts(self, dict_high, dict_low, add_new_fields=False): """ diff --git a/src/simtools/data_model/workflow_metadata.py b/src/simtools/data_model/workflow_metadata.py new file mode 100644 index 0000000000..fb65d96222 --- /dev/null +++ b/src/simtools/data_model/workflow_metadata.py @@ -0,0 +1,110 @@ +"""Utilities for workflow-level metadata propagation into model-parameter metadata files.""" + +import logging +from copy import deepcopy +from pathlib import Path + +import simtools.utils.general as gen +from simtools.data_model.metadata_collector import MetadataCollector +from simtools.io import ascii_handler + +logger = logging.getLogger(__name__) + + +def build_workflow_activity_metadata( + args_dict, + workflow_activity_id, + workflow_start, + workflow_end, + runtime_environment, + workflow_context, +): + """Build workflow activity metadata from workflow execution context. + + Parameters + ---------- + args_dict : dict + Workflow application arguments. + workflow_activity_id : str + Workflow-level activity identifier. + workflow_start : datetime + Start time of the workflow. + workflow_end : datetime + End time of the workflow. + runtime_environment : dict or None + Runtime environment definition used for the workflow. + workflow_context : dict + Context with keys 'site' and 'instrument' for the workflow. + + Returns + ------- + dict + Activity block to be injected into model-parameter metadata files. + """ + metadata_args = dict(args_dict) + metadata_args["label"] = "setting_workflow" + metadata_args["activity_id"] = workflow_activity_id + metadata_args["activity_start"] = workflow_start.isoformat(timespec="seconds") + metadata_args["activity_end"] = workflow_end.isoformat(timespec="seconds") + metadata_args["runtime_environment"] = deepcopy(runtime_environment) + metadata_args["site"] = workflow_context.get("site") + metadata_args["instrument"] = workflow_context.get("instrument") + + collector = MetadataCollector(metadata_args, clean_meta=False) + return collector.get_top_level_metadata().get("cta", {}).get("activity", {}) + + +def update_model_parameter_metadata_file( + metadata_file, + workflow_activity, + associated_activities, +): + """Inject workflow metadata into a model-parameter metadata file. + + Parameters + ---------- + metadata_file : str or Path + Path to the model-parameter metadata file to update. + workflow_activity : dict + Workflow activity metadata block to set as top-level activity metadata. + associated_activities : list + Ordered activities associated with workflow execution. + + Returns + ------- + None + Function updates file in place when it exists. + """ + metadata_path = Path(metadata_file) + if not metadata_path.exists(): + logger.debug(f"Model-parameter metadata file does not exist: {metadata_path}") + return + + metadata = ascii_handler.collect_data_from_file(metadata_path) + metadata = gen.change_dict_keys_case(metadata, True) + cta_meta = metadata.get("cta", {}) + cta_meta["activity"] = deepcopy(workflow_activity) + + context = cta_meta.setdefault("context", {}) + context_associated = context.get("associated_activities") or [] + context["associated_activities"] = _merge_associated_activities( + context_associated, + associated_activities, + ) + + metadata["cta"] = cta_meta + ascii_handler.write_data_to_file(metadata, metadata_path) + logger.info(f"Updated workflow metadata in {metadata_path}") + + +def _merge_associated_activities(existing_activities, new_activities): + """Merge associated activities preserving order and uniqueness.""" + merged_activities = [] + seen = set() + for activity in [*existing_activities, *new_activities]: + key = (activity.get("activity_name"), activity.get("activity_id")) + if key in seen: + continue + seen.add(key) + merged_activities.append(activity) + return merged_activities diff --git a/src/simtools/runners/simtools_runner.py b/src/simtools/runners/simtools_runner.py index dead33bf0b..9a21336210 100644 --- a/src/simtools/runners/simtools_runner.py +++ b/src/simtools/runners/simtools_runner.py @@ -1,15 +1,21 @@ """Tools for running applications in the simtools framework.""" +import logging import shutil +from copy import deepcopy +from datetime import UTC, datetime from pathlib import Path import simtools.utils.general as gen from simtools import dependencies +from simtools.data_model import workflow_metadata from simtools.io import ascii_handler from simtools.job_execution import job_manager +logger = logging.getLogger(__name__) -def run_applications(args_dict, logger): + +def run_applications(args_dict): """ Run simtools applications step-by-step as defined in a configuration file. @@ -17,12 +23,23 @@ def run_applications(args_dict, logger): ---------- args_dict : dict Dictionary containing command line arguments. - logger : logging.Logger - Logger for logging application output. """ - configurations, runtime_environment, log_file = _read_application_configuration( - args_dict["configuration_file"], args_dict.get("steps"), logger + ( + configurations, + runtime_environment, + log_file, + workflow_activity_id, + ) = _read_application_configuration( + args_dict["config_file"], + args_dict.get("steps"), + args_dict.get("activity_id"), ) + workflow_start = datetime.now(UTC) + associated_activities = [] + runtime_environment_snapshot = deepcopy(runtime_environment) + model_parameter_metadata_files = [] + application_counter = 0 + run_time = ( read_runtime_environment(runtime_environment) if not args_dict["ignore_runtime_environment"] @@ -32,25 +49,64 @@ def run_applications(args_dict, logger): with log_file.open("w", encoding="utf-8") as file: file.write("Running simtools applications\n") file.write(dependencies.get_version_string(run_time, include_software_versions=False)) - - for config in configurations: - app = config.get("application") - if not config.get("run_application"): - logger.info(f"Skipping application: {app}") - continue - logger.info(f"Running application: {app}") - result = job_manager.submit( - app, - out_file=None, - err_file=None, - configuration=config.get("configuration"), - runtime_environment=run_time, - ) - file.write("=" * 80 + "\n") - file.write(f"Application: {app}\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}\n") - - -def _read_application_configuration(configuration_file, steps, logger): + try: + for config in configurations: + app = config.get("application") + if not config.get("run_application"): + logger.info(f"Skipping application: {app}") + continue + + application_counter += 1 + + app_configuration = config.get("configuration", {}) + app_activity_id = app_configuration.get("activity_id") or gen.get_uuid() + app_configuration["activity_id"] = app_activity_id + app_configuration.setdefault("label", app) + + app_configuration["log_file"] = _get_application_log_file( + app, app_configuration, application_counter + ) + + associated_activities.append({"activity_name": app, "activity_id": app_activity_id}) + + logger.info(f"Running application: {app}") + result = job_manager.submit( + app, + out_file=None, + err_file=None, + configuration=app_configuration, + runtime_environment=run_time, + ) + metadata_file = _get_model_parameter_metadata_file(app_configuration) + if metadata_file is not None: + model_parameter_metadata_files.append(metadata_file) + file.write("=" * 80 + "\n") + file.write( + f"Application: {app}\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}\n" + ) + finally: + if model_parameter_metadata_files: + workflow_activity = workflow_metadata.build_workflow_activity_metadata( + args_dict=args_dict, + workflow_activity_id=workflow_activity_id, + workflow_start=workflow_start, + workflow_end=max(datetime.now(UTC), workflow_start), + runtime_environment=( + runtime_environment_snapshot + if not args_dict["ignore_runtime_environment"] + else None + ), + workflow_context=_get_workflow_context(configurations), + ) + for metadata_file in model_parameter_metadata_files: + workflow_metadata.update_model_parameter_metadata_file( + metadata_file=metadata_file, + workflow_activity=workflow_activity, + associated_activities=associated_activities, + ) + + +def _read_application_configuration(configuration_file, steps, workflow_activity_id=None): """ Read application configuration from file and modify for setting workflows. @@ -67,8 +123,8 @@ def _read_application_configuration(configuration_file, steps, logger): Configuration file name. steps : list List of steps to be executed (None: all steps). - logger : Logger - Logger object. + workflow_activity_id : str + Workflow activity id fallback from command-line context. Returns ------- @@ -78,11 +134,16 @@ def _read_application_configuration(configuration_file, steps, logger): Runtime environment configuration. Path Path to the log file. + str + Workflow activity id. """ job_configuration = ascii_handler.collect_data_from_file(configuration_file) - configurations = job_configuration.get("applications") + workflow_activity_id = ( + gen.extract_uuid7_from_path(configuration_file) or workflow_activity_id or gen.get_uuid() + ) output_path, setting_workflow = _set_input_output_directories(configuration_file) + configurations = job_configuration.get("applications") logger.info(f"Setting workflow output path to {output_path}") for step_count, config in enumerate(configurations, start=1): config["run_application"] = step_count in steps if steps else True @@ -92,15 +153,68 @@ def _read_application_configuration(configuration_file, steps, logger): output_path, setting_workflow, ) + if config["configuration"].get("activity_id") is None: + config["configuration"]["activity_id"] = gen.get_uuid() configurations[step_count - 1] = config return ( configurations, job_configuration.get("runtime_environment"), output_path / "simtools.log", + workflow_activity_id, ) +def _get_application_log_file(application, app_configuration, counter): + """Return log file path for an application executed via run_applications.""" + if app_configuration.get("log_file") is not None: + return app_configuration["log_file"] + output_path = app_configuration.get("output_path") + if output_path is None: + return None + return Path(output_path) / f"{application}-{counter:02d}.log" + + +def _get_model_parameter_metadata_file(app_configuration): + """Return expected metadata file for model-parameter submission applications.""" + parameter = app_configuration.get("parameter") + parameter_version = app_configuration.get("parameter_version") + output_path = app_configuration.get("output_path") + if not parameter or not parameter_version or not output_path: + return None + + return Path(output_path) / parameter / f"{parameter}-{parameter_version}.meta.yml" + + +def _get_workflow_configuration_value(configurations, key): + """Return first non-empty configuration value for a given key.""" + for config in configurations: + value = config.get("configuration", {}).get(key) + if value is not None: + return value + return None + + +def _get_workflow_context(configurations): + """Extract workflow context (site, instrument) from configurations. + + Parameters + ---------- + configurations : list + List of application configurations. + + Returns + ------- + dict + Context dict with 'site' and 'instrument' keys. + """ + return { + "site": _get_workflow_configuration_value(configurations, "site"), + "instrument": _get_workflow_configuration_value(configurations, "instrument") + or _get_workflow_configuration_value(configurations, "telescope"), + } + + def _replace_placeholders_in_configuration( configuration, output_path, setting_workflow, place_holder="__SETTING_WORKFLOW__" ): @@ -123,14 +237,10 @@ def _replace_placeholders_in_configuration( dict Configuration dictionary with placeholders replaced. """ - for key, value in configuration.items(): - if isinstance(value, str): - configuration[key] = value.replace(place_holder, setting_workflow) - if isinstance(value, list): - configuration[key] = [ - item.replace(place_holder, setting_workflow) if isinstance(item, str) else item - for item in value - ] + configuration = gen.replace_placeholders_recursively( + configuration, + {place_holder: setting_workflow}, + ) if output_path: configuration["output_path"] = str(output_path) @@ -153,19 +263,10 @@ def _set_input_output_directories(path): tuple The first part is the 'input' directory, the second part is the subdirectory name """ - path = Path(path).resolve() - try: - input_index = path.parts.index("input") - # Get all parts after 'input', excluding the filename - subdirs = path.parts[input_index + 1 : -1] - setting_workflow = "/".join(subdirs) - workflow_dir = path.parts[input_index] - except (ValueError, IndexError) as exc: - raise ValueError(f"Could not find subdirectory under 'input': {exc}") from exc - - output_path = Path(str(workflow_dir).replace("input", "output")) / Path(setting_workflow) + setting_workflow = gen.extract_subdirectories_from_path(path, anchor="input") + output_path = Path("output") / Path(setting_workflow) output_path.mkdir(parents=True, exist_ok=True) - return output_path, "/".join(subdirs) + return output_path, setting_workflow def read_runtime_environment(runtime_environment, workdir="/workdir/external/"): diff --git a/src/simtools/schemas/application_workflow.metaschema.yml b/src/simtools/schemas/application_workflow.metaschema.yml index 223842e246..0178161106 100644 --- a/src/simtools/schemas/application_workflow.metaschema.yml +++ b/src/simtools/schemas/application_workflow.metaschema.yml @@ -25,6 +25,9 @@ definitions: schema_name: type: string description: "Name of the schema." + activity_id: + type: string + description: "Workflow activity UUID. If omitted, generated automatically." runtime_environment: "$ref": "#/definitions/runtime_environment" required: diff --git a/src/simtools/schemas/metadata.metaschema.yml b/src/simtools/schemas/metadata.metaschema.yml index 713fed2570..1deb2e9a85 100644 --- a/src/simtools/schemas/metadata.metaschema.yml +++ b/src/simtools/schemas/metadata.metaschema.yml @@ -257,6 +257,13 @@ definitions: - type: string - type: "null" default: null + activity_name: + description: |- + Name of the activity that produced this data product. + anyOf: + - type: string + - type: "null" + default: null ############### instrument: title: Instrument @@ -454,6 +461,14 @@ definitions: format: date-time - type: "null" default: null + runtime_environment: + description: |- + Runtime environment configuration used to execute this activity. + anyOf: + - type: "null" + - type: object + additionalProperties: true + default: null ############### context: title: Context @@ -571,6 +586,32 @@ definitions: type: array items: $ref: '#/definitions/cta/properties/product' + associated_activities: + title: Associated activities. + description: |- + Activities associated with this data product. + type: array + items: + type: object + additionalProperties: false + properties: + activity_name: + description: |- + Name of the associated activity. + anyOf: + - type: string + - type: "null" + default: null + activity_id: + description: |- + Identifier of the associated activity. + anyOf: + - type: string + - type: "null" + default: null + required: + - activity_name + - activity_id ... --- $schema: http://json-schema.org/draft-06/schema# @@ -831,6 +872,13 @@ definitions: - type: string - type: "null" default: null + ACTIVITY_NAME: + description: |- + Name of the activity that produced this data product. + anyOf: + - type: string + - type: "null" + default: null ############### INSTRUMENT: title: Instrument @@ -1028,6 +1076,14 @@ definitions: format: date-time - type: "null" default: null + RUNTIME_ENVIRONMENT: + description: |- + Runtime environment configuration used to execute this activity. + anyOf: + - type: "null" + - type: object + additionalProperties: true + default: null ############### CONTEXT: title: Context @@ -1145,3 +1201,30 @@ definitions: type: array items: $ref: '#/definitions/CTA/properties/PRODUCT' + ASSOCIATED_ACTIVITIES: + title: Associated activities. + description: |- + Activities associated with this data product. + type: array + items: + type: object + additionalProperties: false + properties: + ACTIVITY_NAME: + description: |- + Name of the associated activity. + anyOf: + - type: string + - type: "null" + default: null + ACTIVITY_ID: + description: |- + Identifier of the associated activity. + anyOf: + - type: string + - type: "null" + - type: number + default: null + required: + - ACTIVITY_NAME + - ACTIVITY_ID diff --git a/src/simtools/settings.py b/src/simtools/settings.py index 64004c387a..9b7b4e3213 100644 --- a/src/simtools/settings.py +++ b/src/simtools/settings.py @@ -5,7 +5,7 @@ from pathlib import Path from types import MappingProxyType -from simtools.utils.general import find_executable_in_dir +from simtools.utils.general import find_executable_in_dir, get_uuid class _Config: @@ -22,6 +22,8 @@ def __init__(self): self._corsika_exe = None self.user = os.getenv("USER", "unknown") self.hostname = socket.gethostname() + self.activity_id = get_uuid() + self.activity_name = None def load(self, args=None, db_config=None, resolve_sim_software_executables=True): """ @@ -42,35 +44,39 @@ def load(self, args=None, db_config=None, resolve_sim_software_executables=True) """ self._args = MappingProxyType(args) if args is not None else {} self._db_config = MappingProxyType(db_config) if db_config is not None else {} - self._sim_telarray_path = ( - args.get("sim_telarray_path") - if args is not None and "sim_telarray_path" in args - else os.getenv("SIMTOOLS_SIM_TELARRAY_PATH") + self.activity_id = self._get_activity_id(args) + self.activity_name = args.get("application_label") if args is not None else None + self._sim_telarray_path = self._get_config_value( + args, "sim_telarray_path", "SIMTOOLS_SIM_TELARRAY_PATH" ) - - self._sim_telarray_exe = ( - args.get("sim_telarray_executable") - if args is not None and "sim_telarray_executable" in args - else os.getenv("SIMTOOLS_SIM_TELARRAY_EXECUTABLE", "sim_telarray") - ) - - self._corsika_path = ( - args.get("corsika_path") - if args is not None and "corsika_path" in args - else os.getenv("SIMTOOLS_CORSIKA_PATH") + self._sim_telarray_exe = self._get_config_value( + args, + "sim_telarray_executable", + "SIMTOOLS_SIM_TELARRAY_EXECUTABLE", + default="sim_telarray", ) - - self._corsika_interaction_table_path = ( - args.get("corsika_interaction_table_path") - if args is not None and "corsika_interaction_table_path" in args - else os.getenv("SIMTOOLS_CORSIKA_INTERACTION_TABLE_PATH") + self._corsika_path = self._get_config_value(args, "corsika_path", "SIMTOOLS_CORSIKA_PATH") + self._corsika_interaction_table_path = self._get_config_value( + args, "corsika_interaction_table_path", "SIMTOOLS_CORSIKA_INTERACTION_TABLE_PATH" ) - if resolve_sim_software_executables and self._corsika_path is not None: self._corsika_exe = self._get_corsika_exec() else: self._corsika_exe = None + @staticmethod + def _get_config_value(args, arg_key, env_key, default=None): + """Get configuration value from arguments or environment variable.""" + if args is not None and arg_key in args: + return args.get(arg_key) + return os.getenv(env_key, default) + + @staticmethod + def _get_activity_id(args): + """Get activity ID from arguments or generate a new one.""" + activity_id = args.get("activity_id") if args is not None else None + return activity_id if activity_id is not None else get_uuid() + def _get_corsika_exec(self): """ Get the CORSIKA executable from environment variable or command line argument. diff --git a/src/simtools/utils/general.py b/src/simtools/utils/general.py index 64f11e002f..8ae1e58366 100644 --- a/src/simtools/utils/general.py +++ b/src/simtools/utils/general.py @@ -13,6 +13,7 @@ import dotenv import numpy as np +import uuid6 _logger = logging.getLogger(__name__) @@ -969,3 +970,99 @@ def load_environment_variables(env_file=".env", env_list=None): cleaned_value = env_value.split("#")[0].strip().replace('"', "").replace("'", "") env_values[key.removeprefix("SIMTOOLS_").lower()] = cleaned_value return env_values + + +def get_uuid(): + """ + Generate a UUID (7) string. + + Returns + ------- + str + A UUID string. + """ + return str(uuid6.uuid7()) + + +def extract_uuid7_from_path(path): + """Extract UUID7 from path components if present. + + Parameters + ---------- + path : str or Path + Path potentially containing a UUID7 component. + + Returns + ------- + str or None + UUID7 string if found, otherwise None. + """ + for path_part in reversed(Path(path).parts): + try: + candidate = uuid6.UUID(path_part) + except (ValueError, TypeError): + continue + if candidate.version == 7: + return str(candidate) + return None + + +def replace_placeholders_recursively(data, replacements): + """Replace placeholders recursively in strings nested in dicts/lists. + + Parameters + ---------- + data : dict, list, str, or object + Input structure to process. + replacements : dict + Mapping of placeholder strings to replacement strings. + + Returns + ------- + dict, list, str, or object + Processed copy with placeholders replaced in string values. + """ + if isinstance(data, dict): + return { + key: replace_placeholders_recursively(value, replacements) + for key, value in data.items() + } + if isinstance(data, list): + return [replace_placeholders_recursively(item, replacements) for item in data] + if isinstance(data, str): + for placeholder, replacement in replacements.items(): + data = data.replace(placeholder, replacement) + return data + + +def extract_subdirectories_from_path(path, anchor="input"): + """Extract subdirectories in a path after an anchor directory. + + Parameters + ---------- + path : str or Path + Path containing an anchor directory and a filename. + anchor : str + Directory name used as extraction anchor. + + Returns + ------- + str + Subdirectory path between anchor and file name, joined with '/'. + + Raises + ------ + ValueError + If anchor is not present or no subdirectories are found after the anchor. + """ + path = Path(path) + try: + anchor_index = path.parts.index(anchor) + subdirs = path.parts[anchor_index + 1 : -1] + except (ValueError, IndexError) as exc: + raise ValueError(f"Could not find subdirectory under '{anchor}': {exc}") from exc + + if len(subdirs) == 0: + raise ValueError(f"Could not find subdirectory under '{anchor}'") + + return str(Path(*subdirs)) diff --git a/tests/unit_tests/configuration/test_commandline_parser.py b/tests/unit_tests/configuration/test_commandline_parser.py index adc0ba4bbb..0f55fb2f0a 100644 --- a/tests/unit_tests/configuration/test_commandline_parser.py +++ b/tests/unit_tests/configuration/test_commandline_parser.py @@ -250,6 +250,15 @@ def test_initialize_default_arguments(): assert "output" in [str(group.title) for group in job_groups] +def test_initialize_default_arguments_accepts_activity_id(): + parser_with_defaults = parser.CommandLineParser() + parser_with_defaults.initialize_default_arguments() + + args = parser_with_defaults.parse_args(["--activity_id", "my-test-activity-id"]) + + assert args.activity_id == "my-test-activity-id" + + def test_initialize_application_arguments(): app_parser = parser.CommandLineParser() app_parser.initialize_application_arguments( diff --git a/tests/unit_tests/configuration/test_configurator.py b/tests/unit_tests/configuration/test_configurator.py index 0996473bb2..50f82525c0 100644 --- a/tests/unit_tests/configuration/test_configurator.py +++ b/tests/unit_tests/configuration/test_configurator.py @@ -242,6 +242,7 @@ def test_initialize_output(configurator): # output is not configured (and not activity_id) configurator.config["test"] = False configurator.config["output_file"] = None + configurator.config.pop("activity_id", None) with pytest.raises(KeyError): configurator._initialize_output() diff --git a/tests/unit_tests/data_model/test_workflow_metadata.py b/tests/unit_tests/data_model/test_workflow_metadata.py new file mode 100644 index 0000000000..3e640f3711 --- /dev/null +++ b/tests/unit_tests/data_model/test_workflow_metadata.py @@ -0,0 +1,76 @@ +#!/usr/bin/python3 + +from pathlib import Path +from unittest import mock + +import yaml + +from simtools.data_model import workflow_metadata + + +def test_build_workflow_activity_metadata_uses_uncleaned_metadata(monkeypatch): + mock_collector = mock.Mock() + mock_collector.get_top_level_metadata.return_value = {"cta": {"activity": {"id": "wf-id"}}} + metadata_collector_cls = mock.Mock(return_value=mock_collector) + monkeypatch.setattr( + "simtools.data_model.workflow_metadata.MetadataCollector", metadata_collector_cls + ) + + activity = workflow_metadata.build_workflow_activity_metadata( + args_dict={"config_file": "dummy.yml"}, + workflow_activity_id="wf-id", + workflow_start=mock.Mock(isoformat=mock.Mock(return_value="2026-01-01T00:00:00+00:00")), + workflow_end=mock.Mock(isoformat=mock.Mock(return_value="2026-01-01T00:00:01+00:00")), + runtime_environment={"image": "test-image"}, + workflow_context={"site": "North", "instrument": "LSTN-design"}, + ) + + metadata_collector_cls.assert_called_once() + assert metadata_collector_cls.call_args.args[0]["activity_id"] == "wf-id" + assert metadata_collector_cls.call_args.args[0]["site"] == "North" + assert metadata_collector_cls.call_args.args[0]["instrument"] == "LSTN-design" + assert metadata_collector_cls.call_args.kwargs["clean_meta"] is False + assert activity == {"id": "wf-id"} + + +def test_update_model_parameter_metadata_file(tmp_test_directory): + metadata_file = tmp_test_directory / "pm.meta.yml" + metadata_dict = { + "cta": { + "product": {"id": "prod-id"}, + "activity": {"id": "old-id"}, + "context": { + "associated_activities": [{"activity_name": "old", "activity_id": "old-id"}] + }, + } + } + metadata_file.write_text(yaml.safe_dump(metadata_dict), encoding="utf-8") + + workflow_activity = {"id": "workflow-id", "name": "setting_workflow"} + associated_activities = [ + {"activity_name": "app1", "activity_id": "a1"}, + {"activity_name": "app2", "activity_id": "a2"}, + ] + + workflow_metadata.update_model_parameter_metadata_file( + metadata_file=metadata_file, + workflow_activity=workflow_activity, + associated_activities=associated_activities, + ) + + updated = yaml.safe_load(metadata_file.read_text(encoding="utf-8")) + assert updated["cta"]["product"]["id"] == "prod-id" + assert updated["cta"]["activity"]["id"] == "workflow-id" + assert updated["cta"]["context"]["associated_activities"] == [ + {"activity_name": "old", "activity_id": "old-id"}, + {"activity_name": "app1", "activity_id": "a1"}, + {"activity_name": "app2", "activity_id": "a2"}, + ] + + +def test_update_model_parameter_metadata_file_missing_file(): + workflow_metadata.update_model_parameter_metadata_file( + metadata_file=Path("missing.meta.yml"), + workflow_activity={"id": "workflow-id"}, + associated_activities=[], + ) diff --git a/tests/unit_tests/runners/test_simtools_runner.py b/tests/unit_tests/runners/test_simtools_runner.py index 6db5b9d15e..3b2b245af2 100644 --- a/tests/unit_tests/runners/test_simtools_runner.py +++ b/tests/unit_tests/runners/test_simtools_runner.py @@ -6,6 +6,7 @@ import pytest +import simtools.utils.general as gen from simtools.job_execution.job_manager import JobExecutionError from simtools.runners import simtools_runner @@ -134,12 +135,16 @@ def test_read_application_configuration_selected_steps( lambda config, output_path, setting_workflow: {**config, "output_path": str(output_path)}, ) - configs, _, _ = simtools_runner._read_application_configuration( + configs, _, _, workflow_activity_id = simtools_runner._read_application_configuration( DUMMY_CONFIG_FILE, [2], mock_logger ) assert configs[0]["run_application"] is False assert configs[1]["run_application"] is True assert configs[2]["run_application"] is False + assert workflow_activity_id is not None + assert configs[0]["configuration"]["activity_id"] is not None + assert configs[1]["configuration"]["activity_id"] is not None + assert configs[2]["configuration"]["activity_id"] is not None def test_read_application_configuration_empty_applications( @@ -164,34 +169,68 @@ def test_read_application_configuration_empty_applications( lambda config, output_path, setting_workflow: config, ) - configs, _, log_file = simtools_runner._read_application_configuration( + configs, _, log_file, workflow_activity_id = simtools_runner._read_application_configuration( DUMMY_CONFIG_FILE, None, mock_logger ) assert configs == [] assert isinstance(log_file, Path) + assert workflow_activity_id is not None def test_run_applications_runs_and_logs(monkeypatch, tmp_test_directory): # Prepare mocks - mock_logger = mock.Mock() mock_args_dict = { - "configuration_file": "dummy_config.yml", + "config_file": "dummy_config.yml", "steps": None, "ignore_runtime_environment": False, } # Prepare configurations returned by _read_application_configuration mock_configurations = [ - {"application": "app1", "run_application": True, "configuration": {"key": "value1"}}, - {"application": "app2", "run_application": False, "configuration": {"key": "value2"}}, - {"application": "app3", "run_application": True, "configuration": {"key": "value3"}}, + { + "application": "app1", + "run_application": True, + "configuration": { + "key": "value1", + "activity_id": "cfg-id-1", + "output_path": str(tmp_test_directory), + }, + }, + { + "application": "app2", + "run_application": False, + "configuration": { + "key": "value2", + "activity_id": "cfg-id-2", + "output_path": str(tmp_test_directory), + }, + }, + { + "application": "app3", + "run_application": True, + "configuration": { + "key": "value3", + "activity_id": "cfg-id-3", + "output_path": str(tmp_test_directory), + }, + }, ] log_file_path = tmp_test_directory / "simtools.log" # Patch _read_application_configuration monkeypatch.setattr( "simtools.runners.simtools_runner._read_application_configuration", - mock.Mock(return_value=(mock_configurations, None, log_file_path)), + mock.Mock(return_value=(mock_configurations, None, log_file_path, "wf-activity-id")), + ) + workflow_build_mock = mock.Mock(return_value={"id": "wf-activity-id"}) + workflow_update_mock = mock.Mock() + monkeypatch.setattr( + "simtools.runners.simtools_runner.workflow_metadata.build_workflow_activity_metadata", + workflow_build_mock, + ) + monkeypatch.setattr( + "simtools.runners.simtools_runner.workflow_metadata.update_model_parameter_metadata_file", + workflow_update_mock, ) # Patch dependencies.get_version_string @@ -199,7 +238,10 @@ def test_run_applications_runs_and_logs(monkeypatch, tmp_test_directory): monkeypatch.setattr("simtools.dependencies.get_version_string", version_string_mock) # Patch job_manager.submit + submit_calls = [] + def mock_submit(app, out_file, err_file, configuration=None, runtime_environment=None): + submit_calls.append({"app": app, "configuration": configuration}) result_mock = mock.Mock() result_mock.stdout = f"{app}_stdout" result_mock.stderr = f"{app}_stderr" @@ -207,7 +249,7 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit) - simtools_runner.run_applications(mock_args_dict, mock_logger) + simtools_runner.run_applications(mock_args_dict) # Check log file contents with log_file_path.open("r", encoding="utf-8") as f: @@ -222,17 +264,72 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment assert "STDERR:\napp3_stderr" in content assert "Application: app2" not in content # skipped - # Check logger calls - mock_logger.info.assert_any_call("Running application: app1") - mock_logger.info.assert_any_call("Skipping application: app2") - mock_logger.info.assert_any_call("Running application: app3") + assert len(submit_calls) == 2 + assert submit_calls[0]["configuration"]["activity_id"] == "cfg-id-1" + assert submit_calls[1]["configuration"]["activity_id"] == "cfg-id-3" + assert submit_calls[0]["configuration"]["log_file"].name == "app1-01.log" + assert submit_calls[1]["configuration"]["log_file"].name == "app3-02.log" + version_string_mock.assert_called_once_with([], include_software_versions=False) + workflow_build_mock.assert_not_called() + workflow_update_mock.assert_not_called() + + +def test_run_applications_passes_workflow_instrument_context(monkeypatch, tmp_test_directory): + mock_args_dict = { + "config_file": "dummy_config.yml", + "steps": None, + "ignore_runtime_environment": False, + } + mock_configurations = [ + { + "application": "simtools-submit-model-parameter-from-external", + "run_application": True, + "configuration": { + "parameter": "pm_photoelectron_spectrum", + "parameter_version": "2.0.1", + "output_path": "output/test_workflow", + "site": "North", + "telescope": "LSTN-design", + "activity_id": "cfg-id-1", + }, + }, + ] + log_file_path = tmp_test_directory / "simtools.log" + + monkeypatch.setattr( + "simtools.runners.simtools_runner._read_application_configuration", + mock.Mock(return_value=(mock_configurations, None, log_file_path, "wf-activity-id")), + ) + monkeypatch.setattr( + "simtools.dependencies.get_version_string", + mock.Mock(return_value="simtools version: 1.2.3\n"), + ) + monkeypatch.setattr( + "simtools.job_execution.job_manager.submit", + mock.Mock(return_value=mock.Mock(stdout="ok", stderr="")), + ) + workflow_build_mock = mock.Mock(return_value={"id": "wf-activity-id"}) + workflow_update_mock = mock.Mock() + monkeypatch.setattr( + "simtools.runners.simtools_runner.workflow_metadata.build_workflow_activity_metadata", + workflow_build_mock, + ) + monkeypatch.setattr( + "simtools.runners.simtools_runner.workflow_metadata.update_model_parameter_metadata_file", + workflow_update_mock, + ) + + simtools_runner.run_applications(mock_args_dict) + + assert workflow_build_mock.call_args.kwargs["workflow_context"]["site"] == "North" + assert workflow_build_mock.call_args.kwargs["workflow_context"]["instrument"] == "LSTN-design" + workflow_update_mock.assert_called_once() def test_run_applications_handles_job_execution_exception(monkeypatch, tmp_test_directory): - mock_logger = mock.Mock() mock_args_dict = { - "configuration_file": "dummy_config.yml", + "config_file": "dummy_config.yml", "steps": None, "ignore_runtime_environment": False, } @@ -244,7 +341,7 @@ def test_run_applications_handles_job_execution_exception(monkeypatch, tmp_test_ monkeypatch.setattr( "simtools.runners.simtools_runner._read_application_configuration", - mock.Mock(return_value=(mock_configurations, None, log_file_path)), + mock.Mock(return_value=(mock_configurations, None, log_file_path, "wf-activity-id")), ) monkeypatch.setattr( "simtools.dependencies.get_version_string", @@ -255,9 +352,17 @@ def mock_submit_failure(app, out_file, err_file, configuration=None, runtime_env raise JobExecutionError("Job failed") monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit_failure) + monkeypatch.setattr( + "simtools.runners.simtools_runner.workflow_metadata.build_workflow_activity_metadata", + mock.Mock(return_value={"id": "wf-activity-id"}), + ) + monkeypatch.setattr( + "simtools.runners.simtools_runner.workflow_metadata.update_model_parameter_metadata_file", + mock.Mock(), + ) with pytest.raises(JobExecutionError): - simtools_runner.run_applications(mock_args_dict, mock_logger) + simtools_runner.run_applications(mock_args_dict) # Note: _convert_dict_to_args is now handled by job_manager module @@ -368,9 +473,8 @@ def test_read_runtime_environment_with_missing_options(monkeypatch): def test_run_applications_with_runtime_environment_ignored(monkeypatch, tmp_test_directory): """Test that runtime environment is ignored when ignore_runtime_environment is True.""" - mock_logger = mock.Mock() mock_args_dict = { - "configuration_file": "dummy_config.yml", + "config_file": "dummy_config.yml", "steps": [1], "ignore_runtime_environment": True, } @@ -383,7 +487,9 @@ def test_run_applications_with_runtime_environment_ignored(monkeypatch, tmp_test monkeypatch.setattr( "simtools.runners.simtools_runner._read_application_configuration", - mock.Mock(return_value=(mock_configurations, runtime_environment, log_file_path)), + mock.Mock( + return_value=(mock_configurations, runtime_environment, log_file_path, "wf-activity-id") + ), ) monkeypatch.setattr( "simtools.dependencies.get_version_string", @@ -399,8 +505,16 @@ def mock_submit(app, out_file, err_file, configuration=None, runtime_environment return result_mock monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit) + monkeypatch.setattr( + "simtools.runners.simtools_runner.workflow_metadata.build_workflow_activity_metadata", + mock.Mock(return_value={"id": "wf-activity-id"}), + ) + monkeypatch.setattr( + "simtools.runners.simtools_runner.workflow_metadata.update_model_parameter_metadata_file", + mock.Mock(), + ) - simtools_runner.run_applications(mock_args_dict, mock_logger) + simtools_runner.run_applications(mock_args_dict) def test_read_runtime_environment_error_handling(monkeypatch): @@ -449,9 +563,8 @@ def test_read_runtime_environment_with_env_file_and_options(monkeypatch): def test_run_applications_with_empty_configuration_list(monkeypatch, tmp_test_directory): """Test run_applications with empty configuration list.""" - mock_logger = mock.Mock() mock_args_dict = { - "configuration_file": "empty_config.yml", + "config_file": "empty_config.yml", "steps": None, "ignore_runtime_environment": False, } @@ -460,7 +573,7 @@ def test_run_applications_with_empty_configuration_list(monkeypatch, tmp_test_di monkeypatch.setattr( "simtools.runners.simtools_runner._read_application_configuration", - mock.Mock(return_value=([], None, log_file_path)), + mock.Mock(return_value=([], None, log_file_path, "wf-activity-id")), ) monkeypatch.setattr( "simtools.dependencies.get_version_string", @@ -470,8 +583,16 @@ def test_run_applications_with_empty_configuration_list(monkeypatch, tmp_test_di # Should not call job_manager.submit at all mock_submit = mock.Mock() monkeypatch.setattr("simtools.job_execution.job_manager.submit", mock_submit) + monkeypatch.setattr( + "simtools.runners.simtools_runner.workflow_metadata.build_workflow_activity_metadata", + mock.Mock(return_value={"id": "wf-activity-id"}), + ) + monkeypatch.setattr( + "simtools.runners.simtools_runner.workflow_metadata.update_model_parameter_metadata_file", + mock.Mock(), + ) - simtools_runner.run_applications(mock_args_dict, mock_logger) + simtools_runner.run_applications(mock_args_dict) # Check log file was created with version info with log_file_path.open("r", encoding="utf-8") as f: @@ -516,3 +637,119 @@ def test_pull_image_raises_if_pull_fails(monkeypatch): with pytest.raises(RuntimeError, match="Failed to pull image"): simtools_runner._pull_image("podman", image) + + +def test_get_application_log_file_no_existing_log_file(tmp_test_directory): + app_configuration = {"output_path": str(tmp_test_directory)} + result = simtools_runner._get_application_log_file("simtools-derive-psf", app_configuration, 3) + assert result == tmp_test_directory / "simtools-derive-psf-03.log" + + +def test_get_application_log_file_returns_existing_log_file(tmp_test_directory): + existing = tmp_test_directory / "my_custom.log" + app_configuration = {"output_path": str(tmp_test_directory), "log_file": existing} + result = simtools_runner._get_application_log_file("simtools-derive-psf", app_configuration, 1) + assert result == existing + + +def test_get_application_log_file_returns_none_without_output_path(): + result = simtools_runner._get_application_log_file("simtools-derive-psf", {}, 1) + assert result is None + + +def test_get_model_parameter_metadata_file(): + config = { + "output_path": "output/test", + "parameter": "pm_photoelectron_spectrum", + "parameter_version": "2.0.1", + } + metadata_file = simtools_runner._get_model_parameter_metadata_file(config) + assert metadata_file == Path( + "output/test/pm_photoelectron_spectrum/pm_photoelectron_spectrum-2.0.1.meta.yml" + ) + + +def test_get_workflow_configuration_value(): + configurations = [ + {"configuration": {"site": None}}, + {"configuration": {"site": "North"}}, + ] + assert simtools_runner._get_workflow_configuration_value(configurations, "site") == "North" + assert simtools_runner._get_workflow_configuration_value(configurations, "instrument") is None + + +def test_extract_uuid7_from_configuration_path(): + config_file = ( + "input/LSTN-design/pm_photoelectron_spectrum/" + "019d776b-e24c-741d-bc05-e3f6f7ec77c7/config.yml" + ) + extracted = gen.extract_uuid7_from_path(config_file) + assert extracted == "019d776b-e24c-741d-bc05-e3f6f7ec77c7" + + +def test_read_application_configuration_prefers_path_uuid7( + monkeypatch, + mock_logger, + mock_set_input_output_directories, + mock_change_dict_keys_case, +): + path_uuid = "019d776b-e24c-741d-bc05-e3f6f7ec77c7" + configuration_file = f"input/test/workflow/{path_uuid}/config.yml" + + monkeypatch.setattr( + "simtools.io.ascii_handler.collect_data_from_file", + mock.Mock(return_value={"applications": [{"application": "app1", "configuration": {}}]}), + ) + monkeypatch.setattr( + "simtools.runners.simtools_runner._set_input_output_directories", + mock_set_input_output_directories, + ) + monkeypatch.setattr("simtools.utils.general.change_dict_keys_case", mock_change_dict_keys_case) + monkeypatch.setattr( + "simtools.runners.simtools_runner._replace_placeholders_in_configuration", + lambda config, output_path, setting_workflow: config, + ) + + _, _, _, workflow_activity_id = simtools_runner._read_application_configuration( + configuration_file, + steps=None, + workflow_activity_id="generated-by-run-application", + ) + + assert workflow_activity_id == path_uuid + + +def test_read_application_configuration_ignores_top_level_activity_id( + monkeypatch, + mock_set_input_output_directories, + mock_change_dict_keys_case, +): + path_uuid = "019d776b-e24c-741d-bc05-e3f6f7ec77c7" + configuration_file = f"input/test/workflow/{path_uuid}/config.yml" + + monkeypatch.setattr( + "simtools.io.ascii_handler.collect_data_from_file", + mock.Mock( + return_value={ + "activity_id": "workflow-yaml-activity-id", + "applications": [{"application": "app1", "configuration": {}}], + } + ), + ) + monkeypatch.setattr( + "simtools.runners.simtools_runner._set_input_output_directories", + mock_set_input_output_directories, + ) + monkeypatch.setattr("simtools.utils.general.change_dict_keys_case", mock_change_dict_keys_case) + monkeypatch.setattr( + "simtools.runners.simtools_runner._replace_placeholders_in_configuration", + lambda config, output_path, setting_workflow: config, + ) + + _, _, _, workflow_activity_id = simtools_runner._read_application_configuration( + configuration_file, + steps=None, + workflow_activity_id="generated-by-run-application", + ) + + assert workflow_activity_id == path_uuid diff --git a/tests/unit_tests/test_application_control.py b/tests/unit_tests/test_application_control.py index a3f9b03619..2d73150e40 100644 --- a/tests/unit_tests/test_application_control.py +++ b/tests/unit_tests/test_application_control.py @@ -18,6 +18,7 @@ setup_logging, startup_application, ) +from simtools.settings import config def _reset_stream(handler): @@ -690,7 +691,9 @@ def test_setup_logging_with_file_handler(tmp_path): assert len(file_handlers) > 0 assert file_handlers[0].baseFilename == str(log_file) assert log_file.exists() - assert "Test message" in log_file.read_text() + content = log_file.read_text() + assert "Test message" in content + assert config.activity_id in content finally: for handler in list(logger.handlers): handler.close() diff --git a/tests/unit_tests/test_settings.py b/tests/unit_tests/test_settings.py index 68cd6119e4..4625bcdb40 100644 --- a/tests/unit_tests/test_settings.py +++ b/tests/unit_tests/test_settings.py @@ -66,6 +66,33 @@ def test_load_with_db_config(config_instance): assert config_instance._db_config == db_config +@patch.dict(os.environ, {}, clear=True) +def test_load_sets_activity_id_from_args(config_instance): + args = {"activity_id": "019d7bde-b85e-7bae-8edd-e3d1594f7458"} + config_instance.load(args=args) + assert config_instance.activity_id == args["activity_id"] + + +@patch.dict(os.environ, {}, clear=True) +def test_load_generates_activity_id_when_missing(config_instance): + config_instance.load(args={}) + assert isinstance(config_instance.activity_id, str) + assert config_instance.activity_id is not None + + +@patch.dict(os.environ, {}, clear=True) +def test_load_sets_activity_name(config_instance): + args = {"application_label": "simtools-my-app"} + config_instance.load(args=args) + assert config_instance.activity_name == "simtools-my-app" + + +@patch.dict(os.environ, {}, clear=True) +def test_load_activity_name_none_when_missing(config_instance): + config_instance.load(args={}) + assert config_instance.activity_name is None + + @patch.dict(os.environ, {"SIMTOOLS_SIM_TELARRAY_PATH": "/env/simtel"}) def test_load_with_env_vars(config_instance): config_instance.load() diff --git a/tests/unit_tests/utils/test_general.py b/tests/unit_tests/utils/test_general.py index f89df95d11..2f9445180b 100644 --- a/tests/unit_tests/utils/test_general.py +++ b/tests/unit_tests/utils/test_general.py @@ -568,6 +568,49 @@ def test_now_date_time_in_isoformat(): assert datetime.datetime.fromisoformat(now) is not None +def test_extract_uuid7_from_path(): + path = ( + "input/LSTN-design/pm_photoelectron_spectrum/" + "019d776b-e24c-741d-bc05-e3f6f7ec77c7/config.yml" + ) + assert gen.extract_uuid7_from_path(path) == "019d776b-e24c-741d-bc05-e3f6f7ec77c7" + + +def test_extract_uuid7_from_path_with_no_uuid7(): + path = "input/LSTN-design/pm_photoelectron_spectrum/not-a-uuid/config.yml" + assert gen.extract_uuid7_from_path(path) is None + + +def test_replace_placeholders_recursively(): + input_data = { + "file_name": "__SETTING_WORKFLOW__/table.ecsv", + "nested": {"path": "prefix/__SETTING_WORKFLOW__/suffix"}, + "items": ["__SETTING_WORKFLOW__/a", 1, {"name": "__SETTING_WORKFLOW__/b"}], + } + expected_output = { + "file_name": "LSTN-design/workflow/table.ecsv", + "nested": {"path": "prefix/LSTN-design/workflow/suffix"}, + "items": ["LSTN-design/workflow/a", 1, {"name": "LSTN-design/workflow/b"}], + } + result = gen.replace_placeholders_recursively( + input_data, + {"__SETTING_WORKFLOW__": "LSTN-design/workflow"}, + ) + assert result == expected_output + + +def test_extract_subdirectories_from_path(): + path = "input/LSTN-design/pm_photoelectron_spectrum/019d7abc/config.yml" + result = gen.extract_subdirectories_from_path(path, anchor="input") + assert result == "LSTN-design/pm_photoelectron_spectrum/019d7abc" + + +def test_extract_subdirectories_from_path_missing_anchor_raises(): + path = "output/LSTN-design/pm_photoelectron_spectrum/019d7abc/config.yml" + with pytest.raises(ValueError, match=r"^Could not find subdirectory under 'input'"): + gen.extract_subdirectories_from_path(path, anchor="input") + + def test_is_valid_numeric_type(): """Test _is_valid_numeric_type function.""" # Test integer dtypes