-
Notifications
You must be signed in to change notification settings - Fork 3
Improve activity and run_application metadata
#2113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
51c0ade
4b27929
91e22e1
88781ec
ceb8f31
d809c56
5487dda
303f89d
c4c728e
c792b1b
25bc032
897603b
3150e87
800df58
ed0b36c
6fd9877
e052d42
39c3572
dc9d15b
d1385dd
0cff1b1
87aebf3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Add UUID7-based activity IDs for each application execution, propagate into metadata, and track associated activities in workflow metadata file. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,8 +20,10 @@ | |
|
|
||
| For simplified configuration, a placeholder called ``__SETTING_WORKFLOW__`` can be used in the | ||
| configuration file. This placeholder will be replaced with the directory below ``input`` | ||
| (example: configuration file is in ``input/LSTN-design/num_gains/20250214T134800/config.yml``, | ||
| then the placeholder will be replaced with ``LSTN-design/num_gains/20250214T134800``). | ||
| (example: configuration file is in | ||
| ``input/LSTN-design/num_gains/019d776b-e24c-741d-bc05-e3f6f7ec77c7/config.yml``, | ||
| then the placeholder will be replaced with | ||
| ``LSTN-design/num_gains/019d776b-e24c-741d-bc05-e3f6f7ec77c7``). | ||
| This will also be the directory for any output generated by the application. | ||
|
|
||
| Run time environments can be defined in the configuration file using the ``runtime_environment`` | ||
|
|
@@ -70,7 +72,6 @@ def _add_arguments(parser): | |
| """Register application-specific command line arguments.""" | ||
| parser.add_argument( | ||
| "--config_file", | ||
| dest="configuration_file", | ||
| help="Application configuration.", | ||
| type=str, | ||
| required=True, | ||
|
|
@@ -94,14 +95,14 @@ def main(): | |
| """Run several simtools applications using a configuration file.""" | ||
| app_context = build_application( | ||
| usage="simtools-run-application --config_file config_file_name", | ||
| initialization_kwargs={"db_config": True}, | ||
| initialization_kwargs={"db_config": True, "paths": False}, | ||
| startup_kwargs={ | ||
| "setup_io_handler": False, | ||
| "resolve_sim_software_executables": False, | ||
| }, | ||
| ) | ||
|
|
||
| simtools_runner.run_applications(app_context.args, app_context.logger) | ||
| simtools_runner.run_applications(app_context.args) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this intentional to remove the logger injection here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes - to have the usage of logger in simtools_runner consistent with other modules. |
||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,110 @@ | ||
| """Utilities for workflow-level metadata propagation into model-parameter metadata files.""" | ||
|
|
||
| import logging | ||
| from copy import deepcopy | ||
| from pathlib import Path | ||
|
|
||
| import simtools.utils.general as gen | ||
| from simtools.data_model.metadata_collector import MetadataCollector | ||
| from simtools.io import ascii_handler | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def build_workflow_activity_metadata( | ||
| args_dict, | ||
| workflow_activity_id, | ||
| workflow_start, | ||
| workflow_end, | ||
| runtime_environment, | ||
| workflow_context, | ||
| ): | ||
| """Build workflow activity metadata from workflow execution context. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| args_dict : dict | ||
| Workflow application arguments. | ||
| workflow_activity_id : str | ||
| Workflow-level activity identifier. | ||
| workflow_start : datetime | ||
| Start time of the workflow. | ||
| workflow_end : datetime | ||
| End time of the workflow. | ||
| runtime_environment : dict or None | ||
| Runtime environment definition used for the workflow. | ||
| workflow_context : dict | ||
| Context with keys 'site' and 'instrument' for the workflow. | ||
|
|
||
| Returns | ||
| ------- | ||
| dict | ||
| Activity block to be injected into model-parameter metadata files. | ||
| """ | ||
| metadata_args = dict(args_dict) | ||
| metadata_args["label"] = "setting_workflow" | ||
| metadata_args["activity_id"] = workflow_activity_id | ||
| metadata_args["activity_start"] = workflow_start.isoformat(timespec="seconds") | ||
| metadata_args["activity_end"] = workflow_end.isoformat(timespec="seconds") | ||
| metadata_args["runtime_environment"] = deepcopy(runtime_environment) | ||
| metadata_args["site"] = workflow_context.get("site") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. all of these could return None. Should we be more careful here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, and this is fine. At least for now workflows shouldn't fail due to missing metadata entries on site or telescope name. |
||
| metadata_args["instrument"] = workflow_context.get("instrument") | ||
|
|
||
| collector = MetadataCollector(metadata_args, clean_meta=False) | ||
| return collector.get_top_level_metadata().get("cta", {}).get("activity", {}) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we assume cta is there?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right that in general we do not hardwire the observatory name. This case is a bit different, as we only one metadata schema (SimtoolsOutputMetadata), which is for CTA (and has this key included). Using a different metadata scheme would require a bit of work - and we can do this when this is becoming relevant. |
||
|
|
||
|
|
||
| def update_model_parameter_metadata_file( | ||
| metadata_file, | ||
| workflow_activity, | ||
| associated_activities, | ||
| ): | ||
| """Inject workflow metadata into a model-parameter metadata file. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| metadata_file : str or Path | ||
| Path to the model-parameter metadata file to update. | ||
| workflow_activity : dict | ||
| Workflow activity metadata block to set as top-level activity metadata. | ||
| associated_activities : list | ||
| Ordered activities associated with workflow execution. | ||
|
|
||
| Returns | ||
| ------- | ||
| None | ||
| Function updates file in place when it exists. | ||
| """ | ||
| metadata_path = Path(metadata_file) | ||
| if not metadata_path.exists(): | ||
| logger.debug(f"Model-parameter metadata file does not exist: {metadata_path}") | ||
| return | ||
|
|
||
| metadata = ascii_handler.collect_data_from_file(metadata_path) | ||
| metadata = gen.change_dict_keys_case(metadata, True) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need this here actually? Or in other words why do we allow ambigouity with cases?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have seen metadata definition from CTA with upper, lower, mixed case. simtools used in the past upper case, but moved to lower. I think this is fine to be robust here. |
||
| cta_meta = metadata.get("cta", {}) | ||
| cta_meta["activity"] = deepcopy(workflow_activity) | ||
|
|
||
| context = cta_meta.setdefault("context", {}) | ||
| context_associated = context.get("associated_activities") or [] | ||
| context["associated_activities"] = _merge_associated_activities( | ||
| context_associated, | ||
| associated_activities, | ||
| ) | ||
|
|
||
| metadata["cta"] = cta_meta | ||
| ascii_handler.write_data_to_file(metadata, metadata_path) | ||
| logger.info(f"Updated workflow metadata in {metadata_path}") | ||
|
|
||
|
|
||
| def _merge_associated_activities(existing_activities, new_activities): | ||
| """Merge associated activities preserving order and uniqueness.""" | ||
| merged_activities = [] | ||
| seen = set() | ||
| for activity in [*existing_activities, *new_activities]: | ||
| key = (activity.get("activity_name"), activity.get("activity_id")) | ||
| if key in seen: | ||
| continue | ||
| seen.add(key) | ||
| merged_activities.append(activity) | ||
| return merged_activities | ||
Uh oh!
There was an error while loading. Please reload this page.