Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changes/2113.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add UUID7-based activity IDs for each application execution, propagate into metadata, and track associated activities in workflow metadata file.
7 changes: 7 additions & 0 deletions docs/source/api-reference/data_model.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ Data products ingested or produced by simtools generally follows the CTAO data m
:members:
```

## workflow_metadata

```{eval-rst}
.. automodule:: data_model.workflow_metadata
:members:
```

(datamodelschema)=

## schema
Expand Down
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies:
- scipy
- sphinx
- sphinx-design
- uuid6 # temporary dependency; not needed for python >= 3.14
- towncrier
- toml
- pip:
Expand All @@ -46,4 +47,4 @@ dependencies:
# create: conda env create -f environment.yml
# activate: conda activate simtools-dev
# update (conda/mamba): conda env update -f environment.yml --prune
# update (micromamba): micromamba update -f environment.yml
# update (micromamba): micromamba env update -f environment.yml -n simtools-dev
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ dependencies = [
"pyyaml",
"scipy",
"toml",
"uuid6", # temporary dependency; not needed for python >= 3.14
]
optional-dependencies.dev = [
"pre-commit",
Expand Down
14 changes: 9 additions & 5 deletions src/simtools/application_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import os
import re
from dataclasses import dataclass
from datetime import UTC, datetime
from pathlib import Path

import simtools.utils.general as gen
Expand Down Expand Up @@ -59,7 +58,9 @@ def setup_logging(logger_name=None, log_level="INFO", log_file=None):
log_file_path = Path(log_file)
if log_file_path.parent:
log_file_path.parent.mkdir(parents=True, exist_ok=True)
file_format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
file_format = logging.Formatter(
f"{config.activity_id} - %(name)s - %(levelname)s - %(message)s"
Comment thread
GernotMaier marked this conversation as resolved.
)
file_handler = logging.FileHandler(log_file_path)
file_handler.setFormatter(file_format)
file_handler.addFilter(redact_filter)
Expand All @@ -73,7 +74,7 @@ def get_log_file(args_dict):
"""
Get log file path.

Generate log file path if needed from application name and startup time.
Generate log file path if needed from application name and application ID.

Returns
-------
Expand All @@ -85,8 +86,7 @@ def get_log_file(args_dict):
if args_dict.get("application_label") is None or args_dict.get("output_path") is None:
return None

timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ")
log_file = f"{args_dict['application_label']}_{timestamp}.log"
log_file = f"{args_dict['application_label']}_{config.activity_id}.log"
Path(args_dict["output_path"]).mkdir(parents=True, exist_ok=True)
return Path(args_dict["output_path"]) / log_file

Expand Down Expand Up @@ -290,6 +290,10 @@ def main():
)

logger = setup_logging(logger_name, args_dict["log_level"], log_file=get_log_file(args_dict))
logger.info(
f"simtools application {args_dict.get('application_label')}"
f" started with activity ID {config.activity_id}"
)

io_handler_instance = io_handler.IOHandler() if setup_io_handler else None

Expand Down
3 changes: 1 addition & 2 deletions src/simtools/applications/db_add_file_to_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@

"""

import uuid
from pathlib import Path

import simtools.utils.general as gen
Expand Down Expand Up @@ -134,7 +133,7 @@ def confirm_and_insert_files(files_to_insert, args_dict, db, logger):
plural = "" if len(files_to_insert) == 1 else "s"

if args_dict.get("test_db", False):
args_dict["db"] = args_dict["db"] + str(uuid.uuid4())
args_dict["db"] = args_dict["db"] + gen.get_uuid()
logger.info(f"Using test database: {args_dict['db']}")

print(f"Should the following file{plural} be inserted to the {args_dict['db']} DB?:\n")
Expand Down
3 changes: 1 addition & 2 deletions src/simtools/applications/db_add_value_from_json_to_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

"""

import uuid
from pathlib import Path

import simtools.utils.general as gen
Expand Down Expand Up @@ -58,7 +57,7 @@ def main():
app_context = build_application(initialization_kwargs={"db_config": True})

if app_context.args.get("test_db", False):
app_context.db_config["db_simulation_model_version"] = str(uuid.uuid4())
app_context.db_config["db_simulation_model_version"] = gen.get_uuid()
app_context.logger.info(
f"Using test database version {app_context.db_config['db_simulation_model_version']}"
)
Expand Down
9 changes: 9 additions & 0 deletions src/simtools/applications/plot_tabular_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,15 @@ def main():
PLOT_CONFIG_SCHEMA,
)
)
if "__SETTING_WORKFLOW__" in str(plot_config):
setting_workflow = gen.extract_subdirectories_from_path(
app_context.args["plot_config"],
anchor="input",
)
plot_config = gen.replace_placeholders_recursively(
plot_config,
{"__SETTING_WORKFLOW__": setting_workflow},
)

plot_tables.plot(
config=plot_config["plot"],
Expand Down
11 changes: 6 additions & 5 deletions src/simtools/applications/run_application.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@

For simplified configuration, a placeholder called ``__SETTING_WORKFLOW__`` can be used in the
configuration file. This placeholder will be replaced with the directory below ``input``
(example: configuration file is in ``input/LSTN-design/num_gains/20250214T134800/config.yml``,
then the placeholder will be replaced with ``LSTN-design/num_gains/20250214T134800``).
(example: configuration file is in
``input/LSTN-design/num_gains/019d776b-e24c-741d-bc05-e3f6f7ec77c7/config.yml``,
then the placeholder will be replaced with
``LSTN-design/num_gains/019d776b-e24c-741d-bc05-e3f6f7ec77c7``).
This will also be the directory for any output generated by the application.

Run time environments can be defined in the configuration file using the ``runtime_environment``
Expand Down Expand Up @@ -70,7 +72,6 @@ def _add_arguments(parser):
"""Register application-specific command line arguments."""
parser.add_argument(
"--config_file",
dest="configuration_file",
help="Application configuration.",
type=str,
required=True,
Expand All @@ -94,14 +95,14 @@ def main():
"""Run several simtools applications using a configuration file."""
app_context = build_application(
usage="simtools-run-application --config_file config_file_name",
initialization_kwargs={"db_config": True},
initialization_kwargs={"db_config": True, "paths": False},
startup_kwargs={
"setup_io_handler": False,
"resolve_sim_software_executables": False,
},
)

simtools_runner.run_applications(app_context.args, app_context.logger)
simtools_runner.run_applications(app_context.args)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this intentional to remove the logger injection here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes - to have the usage of logger in simtools_runner consistent with other modules.



if __name__ == "__main__":
Expand Down
6 changes: 6 additions & 0 deletions src/simtools/configuration/commandline_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ def initialize_output_arguments(self):
def initialize_application_execution_arguments(self):
"""Initialize application execution arguments."""
_job_group = self.add_argument_group("execution")
_job_group.add_argument(
"--activity_id",
help="activity identifier",
type=str,
default=None,
)
_job_group.add_argument(
"--test",
help="test option for faster execution during development",
Expand Down
3 changes: 1 addition & 2 deletions src/simtools/configuration/configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import argparse
import logging
import sys
import uuid

import astropy.units as u

Expand Down Expand Up @@ -149,7 +148,7 @@ def initialize(
self._fill_from_environmental_variables()

if self.config.get("activity_id", None) is None:
self.config["activity_id"] = str(uuid.uuid4())
self.config["activity_id"] = gen.get_uuid()
if self.config["label"] is None:
self.config["label"] = self.label
self._initialize_model_versions()
Expand Down
25 changes: 18 additions & 7 deletions src/simtools/data_model/metadata_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@

import getpass
import logging
import uuid
from copy import deepcopy
from pathlib import Path

import simtools.utils.general as gen
import simtools.version
from simtools.constants import METADATA_JSON_SCHEMA
from simtools.data_model import metadata_model, schema
from simtools.io import ascii_handler, io_handler
from simtools.settings import config
from simtools.utils import names


Expand Down Expand Up @@ -94,8 +95,8 @@ def get_top_level_metadata(self):

"""
try:
self.top_level_meta[self.observatory]["activity"]["end"] = (
gen.now_date_time_in_isoformat()
self.top_level_meta[self.observatory]["activity"]["end"] = self.args_dict.get(
"activity_end", gen.now_date_time_in_isoformat()
)
except KeyError:
pass
Expand Down Expand Up @@ -310,6 +311,10 @@ def _fill_context_meta(self, context_dict):
except (KeyError, TypeError):
self._logger.debug("No input product metadata appended to associated data.")

associated_activities = self.args_dict.get("associated_activities")
if associated_activities is not None and "associated_activities" in context_dict:
context_dict["associated_activities"] = deepcopy(associated_activities)

def _read_input_metadata_from_file(self, metadata_file_name_expression=None):
"""
Read and validate input metadata from file.
Expand Down Expand Up @@ -420,7 +425,7 @@ def _fill_product_meta(self, product_dict):
self.schema_file = self.get_data_model_schema_file_name()
self.schema_dict = self.get_data_model_schema_dict()

product_dict["id"] = str(uuid.uuid4())
product_dict["id"] = gen.get_uuid()
product_dict["creation_time"] = gen.now_date_time_in_isoformat()
product_dict["description"] = self.schema_dict.get("description", None)

Expand Down Expand Up @@ -499,13 +504,19 @@ def _fill_activity_meta(self, activity_dict):
Dictionary for top-level activity metadata.

"""
activity_dict["name"] = self.args_dict.get("label", None)
activity_dict["name"] = self.args_dict.get("label") or config.activity_name
activity_dict["type"] = "software"
activity_dict["id"] = self.args_dict.get("activity_id", "UNDEFINED_ACTIVITY_ID")
activity_dict["start"] = gen.now_date_time_in_isoformat()
activity_dict["end"] = activity_dict["start"]
activity_dict["start"] = self.args_dict.get(
"activity_start", gen.now_date_time_in_isoformat()
)
activity_dict["end"] = self.args_dict.get("activity_end", activity_dict["start"])
activity_dict["software"]["name"] = "simtools"
activity_dict["software"]["version"] = simtools.version.__version__
if "runtime_environment" in activity_dict:
activity_dict["runtime_environment"] = deepcopy(
self.args_dict.get("runtime_environment")
)

def _merge_config_dicts(self, dict_high, dict_low, add_new_fields=False):
"""
Expand Down
110 changes: 110 additions & 0 deletions src/simtools/data_model/workflow_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""Utilities for workflow-level metadata propagation into model-parameter metadata files."""

import logging
from copy import deepcopy
from pathlib import Path

import simtools.utils.general as gen
from simtools.data_model.metadata_collector import MetadataCollector
from simtools.io import ascii_handler

logger = logging.getLogger(__name__)


def build_workflow_activity_metadata(
args_dict,
workflow_activity_id,
workflow_start,
workflow_end,
runtime_environment,
workflow_context,
):
"""Build workflow activity metadata from workflow execution context.

Parameters
----------
args_dict : dict
Workflow application arguments.
workflow_activity_id : str
Workflow-level activity identifier.
workflow_start : datetime
Start time of the workflow.
workflow_end : datetime
End time of the workflow.
runtime_environment : dict or None
Runtime environment definition used for the workflow.
workflow_context : dict
Context with keys 'site' and 'instrument' for the workflow.

Returns
-------
dict
Activity block to be injected into model-parameter metadata files.
"""
metadata_args = dict(args_dict)
metadata_args["label"] = "setting_workflow"
metadata_args["activity_id"] = workflow_activity_id
metadata_args["activity_start"] = workflow_start.isoformat(timespec="seconds")
metadata_args["activity_end"] = workflow_end.isoformat(timespec="seconds")
metadata_args["runtime_environment"] = deepcopy(runtime_environment)
metadata_args["site"] = workflow_context.get("site")
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all of these could return None. Should we be more careful here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and this is fine. At least for now workflows shouldn't fail due to missing metadata entries on site or telescope name.

metadata_args["instrument"] = workflow_context.get("instrument")

collector = MetadataCollector(metadata_args, clean_meta=False)
return collector.get_top_level_metadata().get("cta", {}).get("activity", {})
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we assume cta is there?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right that in general we do not hardwire the observatory name. This case is a bit different, as we only one metadata schema (SimtoolsOutputMetadata), which is for CTA (and has this key included). Using a different metadata scheme would require a bit of work - and we can do this when this is becoming relevant.



def update_model_parameter_metadata_file(
metadata_file,
workflow_activity,
associated_activities,
):
"""Inject workflow metadata into a model-parameter metadata file.

Parameters
----------
metadata_file : str or Path
Path to the model-parameter metadata file to update.
workflow_activity : dict
Workflow activity metadata block to set as top-level activity metadata.
associated_activities : list
Ordered activities associated with workflow execution.

Returns
-------
None
Function updates file in place when it exists.
"""
metadata_path = Path(metadata_file)
if not metadata_path.exists():
logger.debug(f"Model-parameter metadata file does not exist: {metadata_path}")
return

metadata = ascii_handler.collect_data_from_file(metadata_path)
metadata = gen.change_dict_keys_case(metadata, True)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this here actually? Or in other words why do we allow ambigouity with cases?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have seen metadata definition from CTA with upper, lower, mixed case. simtools used in the past upper case, but moved to lower. I think this is fine to be robust here.

cta_meta = metadata.get("cta", {})
cta_meta["activity"] = deepcopy(workflow_activity)

context = cta_meta.setdefault("context", {})
context_associated = context.get("associated_activities") or []
context["associated_activities"] = _merge_associated_activities(
context_associated,
associated_activities,
)

metadata["cta"] = cta_meta
ascii_handler.write_data_to_file(metadata, metadata_path)
logger.info(f"Updated workflow metadata in {metadata_path}")


def _merge_associated_activities(existing_activities, new_activities):
"""Merge associated activities preserving order and uniqueness."""
merged_activities = []
seen = set()
for activity in [*existing_activities, *new_activities]:
key = (activity.get("activity_name"), activity.get("activity_id"))
if key in seen:
continue
seen.add(key)
merged_activities.append(activity)
return merged_activities
Loading
Loading