Add GpawParser and GpawBaseWorkChain (#15)

Major changes: 1. Add `GpawParser`: In case `gpaw` is the code, the GPAW parser will take parse and store many attributes specific to this code, over and beyond what is expected from `AseParser`. It also provides more detailed error options. 2. Add `GpawBaseWorkChain`: A simple `BaseWorkchain` for restarts. Minor changes: 1. Store and create files in `calculations`: Create `gpw` files which are helpful for restarts, allow for WriteIntervals (see also https://wiki.fysik.dtu.dk/gpaw/documentation/manual.html#restarting-a-calculation)
aiidaplugins · Aug 25, 2022 · 3b22bc8 · 3b22bc8
1 parent 322fb8a
commit 3b22bc8
Show file tree

Hide file tree

Showing 27 changed files with 1,657 additions and 55 deletions.
diff --git a/aiida_ase/calculations/ase.py b/aiida_ase/calculations/ase.py
@@ -15,11 +15,14 @@ class AseCalculation(engine.CalcJob):
 
     _default_parser = 'ase.ase'
     _INPUT_FILE_NAME = 'aiida_script.py'
-    _OUTPUT_FILE_NAME = 'results.json'
-    _TXT_OUTPUT_FILE_NAME = 'aiida.out'
-    _input_aseatoms = 'aiida_atoms.json'
-    _output_aseatoms = 'aiida_out_atoms.json'
-    _OPTIMIZER_FILE_NAME = 'aiida_optimizer.log'
+    _OUTPUT_FILE_NAME = 'results.json'  # Written at the very end
+    _TXT_OUTPUT_FILE_NAME = 'aiida.out'  # The log file of the calculation
+    _input_aseatoms = 'aiida_atoms.json'  # The input file written for an ASE calc
+    _output_aseatoms = 'aiida_out_atoms.json'  # For a relaxation, equivalent of qn.traj
+    _OPTIMIZER_FILE_NAME = 'aiida_optimizer.log'  # stdout for optimiser
+    _write_gpw_file = False
+    _GPW_FILE_NAME = 'aiida_gpw.gpw'
+    _freq_gpw_write = 0
 
     @classmethod
     def define(cls, spec):
@@ -33,18 +36,35 @@ def define(cls, spec):
             help='Filename to which the content of stderr of the code that is to be run will be written.')
         spec.input('metadata.options.parser_name', valid_type=str, default=cls._default_parser,
             help='Define the parser to be used by setting its entry point name.')
+        spec.input('metadata.options.optimizer_stdout', valid_type=str, default=cls._OPTIMIZER_FILE_NAME,
+            help='Optimiser filename for relaxation')
+        spec.input('metadata.options.gpw_filename', valid_type=str, default=cls._GPW_FILE_NAME,
+            help='Filename for .gpw file')
+        spec.input('metadata.options.freq_gpw_write', valid_type=int, default=cls._freq_gpw_write,
+            help='Frequency to write the GPW file')
+        spec.input('metadata.options.write_gpw', valid_type=bool, default=cls._write_gpw_file,
+            help='Write the gpw file, useful for post processing')
+        spec.input('metadata.options.log_filename', valid_type=str, default=cls._TXT_OUTPUT_FILE_NAME,
+            help='Filename for the log file written out by the code')
         spec.input('structure', valid_type=StructureData, help='The input structure.')
         spec.input('kpoints', valid_type=KpointsData, required=False, help='The k-points to use for the calculation.')
         spec.input('parameters', valid_type=Dict, help='Input parameters for the namelists.')
         spec.input('settings', valid_type=Dict, required=False, help='Optional settings that control the plugin.')
-        spec.input('metadata.options.optimizer_stdout', valid_type=str, default=cls._OPTIMIZER_FILE_NAME,
-            help='Optimiser filename for relaxation')
 
         spec.output('structure', valid_type=orm.StructureData, required=False)
         spec.output('parameters', valid_type=orm.Dict, required=False)
         spec.output('array', valid_type=orm.ArrayData, required=False)
+        spec.output('trajectory', valid_type=orm.TrajectoryData, required=False)
 
         spec.exit_code(300, 'ERROR_OUTPUT_FILES', message='One of the expected output files was missing.')
+        spec.exit_code(301, 'ERROR_LOG_FILES', message='The log file from the DFT code was not written out.')
+        spec.exit_code(302, 'ERROR_RELAX_NOT_COMPLETE', message='Relaxation did not complete.')
+        spec.exit_code(303, 'ERROR_SCF_NOT_COMPLETE', message='SCF Failed.')
+        spec.exit_code(305, 'ERROR_UNEXPECTED_EXCEPTION', message='Cannot identify what went wrong.')
+        spec.exit_code(306, 'ERROR_PAW_NOT_FOUND', message='gpaw could not find the PAW potentials.')
+        spec.exit_code(307, 'ERROR_ATTRIBUTE_ERROR', message='Attribute Error found in the stderr file.')
+        spec.exit_code(308, 'ERROR_FERMI_LEVEL_INF', message='Fermi level is infinite.')
+        spec.exit_code(400, 'ERROR_OUT_OF_WALLTIME', message='The calculation ran out of walltime.')
         # yapf: enable
 
     def prepare_for_submission(self, folder):
@@ -136,13 +156,13 @@ def return_a_function(v):
                             the_v = v2
                         args_dict[k2] = the_v
 
-                    v2 = '{}({})'.format(
-                        v['@function'], ', '.join(['{}={}'.format(k_, v_) for k_, v_ in args_dict.items()])
+                    v2 = '{}({})'.format( # pylint: disable=consider-using-f-string
+                        v['@function'], ', '.join([f'{k_}={v_}' for k_, v_ in args_dict.items()]) # pylint: disable=consider-using-f-string
                     )
                     return v2
                 return v
 
-            tmp_list = ['{}={}'.format(k, return_a_function(v)) for k, v in calc_args.items()]
+            tmp_list = ['{}={}'.format(k, return_a_function(v)) for k, v in calc_args.items()]  # pylint: disable=consider-using-f-string
 
             calc_argsstr = ', '.join(tmp_list)
 
@@ -153,8 +173,16 @@ def return_a_function(v):
                 try:
                     mesh = self.inputs.kpoints.get_kpoints_mesh()[0]
                 except AttributeError:
-                    raise common.InputValidationError("Coudn't find a mesh of kpoints" ' in the KpointsData')
-                calc_argsstr = ', '.join([calc_argsstr] + ['kpts=({},{},{})'.format(*mesh)])
+                    raise common.InputValidationError("Coudn't find a mesh of kpoints in the KpointsData")
+                if 'kpoints_options' in parameters_dict:
+                    kpts_argsstr = "kpts={'size':" + '({}, {}, {})'.format(*mesh)  # pylint: disable=consider-using-f-string
+                    for k, v in parameters_dict['kpoints_options'].items():
+                        kpts_argsstr += f", '{k}':{v}"
+                    kpts_argsstr += '}'
+                    parameters_dict.pop('kpoints_options')
+                    calc_argsstr = ', '.join([calc_argsstr] + [kpts_argsstr])
+                else:
+                    calc_argsstr = ', '.join([calc_argsstr] + ['kpts=({},{},{})'.format(*mesh)])  # pylint: disable=consider-using-f-string
 
         # =============== prepare the methods of atoms.get(), to save results
 
@@ -174,7 +202,7 @@ def return_a_function(v):
         try:
             if 'PW' in calc_args['mode'].values():
                 all_imports.append('from gpaw import PW')
-        except KeyError:
+        except (KeyError, AttributeError):
             pass
 
         extra_imports = parameters_dict.pop('extra_imports', [])
@@ -185,9 +213,9 @@ def return_a_function(v):
                 if not all([isinstance(j, str) for j in i]):
                     raise ValueError('extra import must contain strings')
                 if len(i) == 2:
-                    all_imports.append('from {} import {}'.format(*i))
+                    all_imports.append('from {} import {}'.format(*i))  # pylint: disable=consider-using-f-string
                 elif len(i) == 3:
-                    all_imports.append('from {} import {} as {}'.format(*i))
+                    all_imports.append('from {} import {} as {}'.format(*i))  # pylint: disable=consider-using-f-string
                 else:
                     raise ValueError('format for extra imports not recognized')
             else:
@@ -218,6 +246,24 @@ def return_a_function(v):
         input_txt += '\n'
 
         if optimizer is not None:
+            # check if the gpw file has been requested
+            if self.inputs.metadata.options.write_gpw:
+                # attach a class which tells the calculator
+                # when to write the gpw file
+                # (this is needed for the restart)
+                # Similar to https://wiki.fysik.dtu.dk/gpaw/documentation/manual.html#restarting-a-calculation
+                gpw_filename = self.metadata.options.gpw_filename
+                occasion = self.inputs.metadata.options.freq_gpw_write
+                if occasion > 0:
+                    input_txt += 'class WriteIntervals:\n'
+                    input_txt += '    def __init__(self, fname):\n'
+                    input_txt += '        self.fname = fname\n'
+                    input_txt += '        self.iter=0\n'
+                    input_txt += '    def write(self):\n'
+                    input_txt += '        calculator.write(self.fname)\n'
+                    input_txt += f'        self.iter += {occasion}\n'
+                    input_txt += f"calculator.attach(WriteIntervals('{gpw_filename}').write, {occasion})\n"
+
             # here block the trajectory file name: trajectory = 'aiida.traj'
             input_txt += f'optimizer = custom_optimizer({optimizer_argsstr})\n'
             input_txt += f'optimizer.run({optimizer_runargsstr})\n'
@@ -233,13 +279,6 @@ def return_a_function(v):
             input_txt += f"results['{getter}'] = calculator.get_{getter}({getter_args})\n"
         input_txt += '\n'
 
-        # Convert to lists
-        input_txt += 'for k,v in results.items():\n'
-        input_txt += '    if isinstance(results[k],(numpy.matrix,numpy.ndarray)):\n'
-        input_txt += '        results[k] = results[k].tolist()\n'
-
-        input_txt += '\n'
-
         post_lines = parameters_dict.pop('post_lines', None)
         if post_lines is not None:
             if not isinstance(post_lines, (list, tuple)):
@@ -248,6 +287,12 @@ def return_a_function(v):
                 raise ValueError('Postlines must be a list of strings')
             input_txt += '\n'.join(post_lines) + '\n\n'
 
+        # Convert to lists
+        input_txt += 'for k,v in results.items():\n'
+        input_txt += '    if isinstance(results[k],(numpy.matrix,numpy.ndarray)):\n'
+        input_txt += '        results[k] = results[k].tolist()\n'
+
+        input_txt += '\n'
         # Dump results to file
         right_open = 'paropen' if self.options.withmpi else 'open'
         input_txt += f"with {right_open}('{self._OUTPUT_FILE_NAME}', 'w') as f:\n"
@@ -259,6 +304,11 @@ def return_a_function(v):
             input_txt += f"atoms.write('{self._output_aseatoms}')\n"
             input_txt += '\n'
 
+        # Write out the final gpw file if requested
+        if self.inputs.metadata.options.write_gpw:
+            input_txt += f"calculator.write('{self.inputs.metadata.options.gpw_filename}')\n"
+            input_txt += '\n'
+
         # write all the input script to a file
         with folder.open(self._INPUT_FILE_NAME, 'w') as handle:
             handle.write(input_txt)
@@ -276,8 +326,6 @@ def return_a_function(v):
         calcinfo = common.CalcInfo()
 
         calcinfo.uuid = self.uuid
-        # Empty command line by default
-        # calcinfo.cmdline_params = settings.pop('CMDLINE', [])
         calcinfo.local_copy_list = local_copy_list
         calcinfo.remote_copy_list = remote_copy_list
 
@@ -288,23 +336,20 @@ def return_a_function(v):
         cmdline_params.append(self._INPUT_FILE_NAME)
         codeinfo.cmdline_params = cmdline_params
 
-        #calcinfo.stdin_name = self._INPUT_FILE_NAME
-        codeinfo.stdout_name = self._TXT_OUTPUT_FILE_NAME
+        codeinfo.stdout_name = self.inputs.metadata.options.log_filename
         codeinfo.code_uuid = self.inputs.code.uuid
         calcinfo.codes_info = [codeinfo]
 
         # Retrieve files
         calcinfo.retrieve_list = []
         calcinfo.retrieve_list.append(self.options.output_filename)
         calcinfo.retrieve_list.append(self._output_aseatoms)
+        calcinfo.retrieve_list.append(self._TXT_OUTPUT_FILE_NAME)
         if optimizer is not None:
             calcinfo.retrieve_list.append(self._OPTIMIZER_FILE_NAME)
 
         calcinfo.retrieve_list += additional_retrieve_list
 
-        # TODO: I should have two ways of running it: with gpaw-python in parallel
-        # and executing python if in serial
-
         return calcinfo
 
 

diff --git a/aiida_ase/parsers/ase.py b/aiida_ase/parsers/ase.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 """Parser implementation for the ``AseCalculation``."""
 import json
+import math
 import numpy
 
 from aiida import parsers
@@ -10,6 +11,7 @@
 Dict = plugins.DataFactory('dict')
 ArrayData = plugins.DataFactory('array')
 StructureData = plugins.DataFactory('structure')
+TrajectoryData = plugins.DataFactory('array.trajectory')
 AseCalculation = plugins.CalculationFactory('ase.ase')
 
 
@@ -65,3 +67,142 @@ def parse(self, **kwargs):  # pylint: disable=inconsistent-return-statements
             self.out('parameters', Dict(dict=json_params))
 
         return
+
+
+class GpawParser(parsers.Parser):
+    """Parser implementation for GPAW through an ``AseCalculation``."""
+
+    def parse(self, **kwargs):  # pylint: disable=inconsistent-return-statements,too-many-branches,too-many-locals,too-many-return-statements,too-many-statements
+        """Parse the retrieved files from a ``AseCalculation``."""
+
+        # check what is inside the folder
+        list_of_files = self.retrieved.list_object_names()
+
+        # check if it was a relaxation
+        optimizer = self.node.inputs.parameters.get_dict().pop('optimizer', None)
+
+        # output json file
+        if AseCalculation._OUTPUT_FILE_NAME in list_of_files:  # pylint: disable=protected-access
+            # This calculation is likely to have been alright
+            pass
+        elif AseCalculation._TXT_OUTPUT_FILE_NAME in list_of_files:  # pylint: disable=protected-access
+            # An output structure was not found but there is a txt file
+            # Probably helpful for restarts
+            self.logger.error('Output results was not found, inspecting log file')
+            # Checking for possible errors common to all calculations
+            with self.retrieved.open('_scheduler-stderr.txt', 'r') as handle:
+                lines = handle.readlines()
+                if check_paw_missing(lines):
+                    self.logger.error('Could not find paw potentials')
+                    return self.exit_codes.ERROR_PAW_NOT_FOUND
+                if check_attribute_error(lines):
+                    self.logger.error('AttributeError in GPAW')
+                    return self.exit_codes.ERROR_ATTRIBUTE_ERROR
+
+            if optimizer is not None:
+                # This is a relaxation calculation that did not complete
+                # try to get all the structures that are available
+                try:
+                    with self.retrieved.open(self.node.get_attribute('log_filename'), 'r') as handle:
+                        all_ase_traj = read(handle, index=':', format='gpaw-out')
+                    trajectory = store_to_trajectory_data(all_ase_traj)
+                    self.outputs.trajectory = trajectory
+                    return self.exit_codes.ERROR_RELAX_NOT_COMPLETE
+                except Exception:  # pylint: disable=broad-except
+                    # If it made it to here then the error is due to the SCF not completing
+                    self.logger.error('First relaxation step not completed')
+                    return self.exit_codes.ERROR_SCF_NOT_COMPLETE
+            else:
+                # This is an SCF calculation that did not complete
+                self.logger.error('SCF did not complete')
+                return self.exit_codes.ERROR_SCF_NOT_COMPLETE
+        else:
+            # Neither log file nor end result file were produced
+            # Likely to be bad news
+            return self.exit_codes.ERROR_UNEXPECTED_EXCEPTION
+
+        # Check if output structure is needed
+        if optimizer is not None:
+            # If we are here the calculation did complete sucessfully
+            with self.retrieved.open(AseCalculation._output_aseatoms, 'r') as handle:  # pylint: disable=protected-access
+                atoms = read(handle, format='json')
+                self.out('structure', StructureData(ase=atoms))
+            # Store the trajectory as well
+            with self.retrieved.open(self.node.get_attribute('log_filename'), 'r') as handle:
+                all_ase_traj = read(handle, index=':', format='gpaw-out')
+            self.outputs.trajectory = store_to_trajectory_data(all_ase_traj)
+        # load the results dictionary
+        with self.retrieved.open(AseCalculation._OUTPUT_FILE_NAME, 'r') as handle:  # pylint: disable=protected-access
+            json_params = json.load(handle)
+
+        # get the relavent data from the log file for the final structure
+        with self.retrieved.open(self.node.get_attribute('log_filename'), 'r') as handle:
+            atoms_log = read(handle, format='gpaw-out')
+        create_output_parameters(atoms_log, json_params)
+
+        # Check that the parameters are not inf or nan
+        if math.isnan(json_params['fermi_energy']) or math.isinf(json_params['fermi_energy']):
+            self.logger.error('Fermi energy is inf or nan')
+            return self.exit_codes.ERROR_FERMI_LEVEL_INF
+
+        # look at warnings
+        with self.retrieved.open('_scheduler-stderr.txt', 'r') as handle:
+            errors = handle.read()
+        if errors:
+            json_params['warnings'] = [errors]
+
+        # extract arrays from json_params
+        dictionary_array = {}
+        for k, v in list(json_params.items()):
+            if isinstance(v, (list, tuple, numpy.ndarray)):
+                dictionary_array[k] = json_params.pop(k)
+
+        if dictionary_array:
+            array_data = ArrayData()
+            for k, v in dictionary_array.items():
+                array_data.set_array(k, numpy.array(v))
+            self.out('array', array_data)
+
+        if json_params:
+            self.out('parameters', Dict(dict=json_params))
+
+        return
+
+
+def check_paw_missing(lines):
+    """Check if paw potentials are missing and that is the source of the error."""
+    for line in lines:
+        if 'Could not find required PAW dataset file' in line:
+            return True
+    return False
+
+
+def check_attribute_error(lines):
+    """Checks if there is an AssertionError printed out in the output file."""
+    for line in lines:
+        if 'AttributeError' in line:
+            return True
+    return False
+
+
+def create_output_parameters(atoms_log, json_params):
+    """Create the output parameters from the log file."""
+    results_calc = atoms_log.calc.results
+    json_params['energy'] = atoms_log.get_potential_energy()
+    json_params['energy_contributions'] = atoms_log.calc.energy_contributions
+    json_params['forces'] = atoms_log.get_forces()
+    json_params['stress'] = results_calc.pop('stress', None)
+    json_params['magmoms'] = results_calc.pop('magmoms', None)
+    json_params['dipole'] = results_calc.pop('dipole', None)
+    json_params['pbc'] = atoms_log.get_pbc()
+    json_params['fermi_energy'] = atoms_log.calc.eFermi
+    json_params['eigenvalues'] = atoms_log.calc.get_eigenvalues()
+
+
+def store_to_trajectory_data(all_ase_traj):
+    """Store ase atoms object into a TrajectoryFile."""
+    all_aiida_atoms = []
+    for atoms in all_ase_traj:
+        structure = StructureData(ase=atoms)
+        all_aiida_atoms.append(structure)
+    return TrajectoryData(all_aiida_atoms)
diff --git a/aiida_ase/workflows/__init__.py b/aiida_ase/workflows/__init__.py