Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion mmif/utils/workflow_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,11 @@ def generate_workflow_identifier(mmif_input: Union[str, Path, Mmif],
Generate a workflow identifier string from a MMIF file or object.

The identifier follows the storage directory structure format:
app_name/version/param_hash/app_name2/version2/param_hash2/...
source_composition/app_name/version/param_hash/app_name2/version2/param_hash2/...

The leading ``source_composition`` segment encodes the top-level
document mix as ``Type-N`` pairs joined by ``-`` and sorted by type
name (e.g. ``TextDocument-1-VideoDocument-1``).

Uses view.metadata.parameters (raw user-passed values) for hashing
to ensure reproducibility. Views with errors or warnings are excluded
Expand All @@ -128,6 +132,10 @@ def generate_workflow_identifier(mmif_input: Union[str, Path, Mmif],
data = _read_mmif_from_path(mmif_input)
segments = []

# First prefix is source information, sorted by document type
sources = Counter(doc.at_type.shortname for doc in data.documents)
segments.append('-'.join([f'{k}-{sources[k]}' for k in sorted(sources.keys())]))

# Group views into runs
grouped_apps = group_views_by_app(data.views)

Expand Down
6 changes: 3 additions & 3 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,9 +471,9 @@ def test_generate_workflow_identifier_grouped(self):
try:
workflow_id = wfh.generate_workflow_identifier(tmp_file)
segments = workflow_id.split('/')
self.assertEqual(len(segments), 6)
self.assertIn('app1', segments[0])
self.assertIn('app2', segments[3])
self.assertEqual(len(segments), 7)
self.assertIn('app1', segments[1])
self.assertIn('app2', segments[4])
finally:
os.unlink(tmp_file)

Expand Down
Loading