Skip to content

Commit

Permalink
Parser.parse_from_node: Validate outputs against process spec (#6159)
Browse files Browse the repository at this point in the history
The `parse_from_node` clasmethod is a utility function to call the
`parse` method of a `Parser` implementation for a given `CalcJobNode`.
It automatically calls `parse` in the correct manner, passing the
`retrieved` output node, and wrapping it in a calcfunction to optionally
store the provenance.

However, since the `calcfunction` by default has a dynamic output
namespace and so accepts all outputs, it would not perform the same
output validation that the original `CalcJob` would have done since that
most likely will have defined specific outputs. Especially given that
the `parse_from_node` method would often be used in unit tests to test
`Parser` implementations, having the output validation be different
would make it possible to mis bugs. For example, if the parser assigns
an invalid output node, this would go unnoticed.

The `parse_from_node` is updated to patch the output specification of
the `calcfunction` with that of the process class that was used to
created the `CalcJobNode` which is being passed as an argument. As long
as the process can of course be loaded. This ensures that when the
`calcfunction` return the outputs returned by `Parser.parse` they are
validated against the output specification of the original `CalcJob`
class. If it fails, a `ValueError` is raised.
  • Loading branch information
sphuber committed Oct 23, 2023
1 parent d3807d4 commit d16792f
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 2 deletions.
18 changes: 16 additions & 2 deletions aiida/parsers/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple

from aiida.common import exceptions, extendeddicts
from aiida.common import exceptions, extendeddicts, log
from aiida.engine import ExitCode, ExitCodesNamespace, calcfunction
from aiida.engine.processes.ports import CalcJobOutputPort

Expand All @@ -24,6 +24,8 @@

__all__ = ('Parser',)

LOGGER = log.AIIDA_LOGGER.getChild('parser')


class Parser(ABC):
"""Base class for a Parser that can parse the outputs produced by a CalcJob process."""
Expand Down Expand Up @@ -147,6 +149,19 @@ def parse_calcfunction(**kwargs):
"""
from aiida.engine import Process

# Fetch the current process, which should be this calcfunction and update its outputs spec with that of the
# output spec of the process class that was used for the original ``CalcJobNode``. This ensures that when
# the outputs are attached, they are validated against the original output specification.
process = Process.current()

try:
process.spec()._ports['outputs'] = node.process_class.spec().outputs # type: ignore[union-attr] # pylint: disable=protected-access
except ValueError:
LOGGER.warning(
f'Could not load the process class of node `{node}`. This means that the output specification of '
'the original ``CalcJob`` plugin cannot be determined and so the outputs cannot be validated.'
)

if retrieved_temporary_folder is not None:
kwargs['retrieved_temporary_folder'] = retrieved_temporary_folder

Expand All @@ -158,7 +173,6 @@ def parse_calcfunction(**kwargs):
# process as well, which should represent this `CalcFunctionNode`. Otherwise the caller of the
# `parse_from_node` method will get an empty dictionary as a result, despite the `Parser.parse` method
# having registered outputs.
process = Process.current()
process.out_many(outputs) # type: ignore[union-attr]
return exit_code

Expand Down
35 changes: 35 additions & 0 deletions tests/parsers/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,17 @@ def prepare_for_submission(self): # pylint: disable=arguments-differ
pass


class BrokenArithmeticAddParser(Parser):
"""Parser for an ``ArithmeticAddCalculation`` job that is intentionally broken.
It attaches an output that is not defined by the spec of the ``ArithmeticAddCalculation``.
"""

def parse(self, **kwargs):
"""Intentionally attach an output that is not defined in the output spec of the calculation."""
self.out('invalid_output', orm.Str('0'))


class TestParser:
"""Test backend entities and their collections"""

Expand Down Expand Up @@ -124,3 +135,27 @@ def test_parse_from_node(self):

# Verify that the `retrieved_temporary_folder` keyword can be passed, there is no validation though
result, calcfunction = ArithmeticAddParser.parse_from_node(node, retrieved_temporary_folder='/some/path')

@pytest.mark.requires_rmq
def test_parse_from_node_output_validation(self):
"""Test that the ``parse_from_node`` properly validates attached outputs.
The calculation node represents the parsing process.
"""
output_filename = 'aiida.out'

# Mock the `CalcJobNode` which should have the `retrieved` folder containing the sum in the outputfile file
# This is the value that should be parsed into the `sum` output node
node = orm.CalcJobNode(computer=self.computer, process_type=ArithmeticAddCalculation.build_process_type())
node.set_option('resources', {'num_machines': 1, 'num_mpiprocs_per_machine': 1})
node.set_option('max_wallclock_seconds', 1800)
node.set_option('output_filename', output_filename)
node.store()

retrieved = orm.FolderData()
retrieved.base.repository.put_object_from_filelike(io.StringIO('1'), output_filename)
retrieved.store()
retrieved.base.links.add_incoming(node, link_type=LinkType.CREATE, link_label='retrieved')

with pytest.raises(ValueError, match=r'Error validating output .* for port .*: Unexpected ports .*'):
BrokenArithmeticAddParser.parse_from_node(node)

0 comments on commit d16792f

Please sign in to comment.