Skip to content

Commit

Permalink
Add GpawParser and GpawBaseWorkChain (#15)
Browse files Browse the repository at this point in the history
Major changes:

 1. Add `GpawParser`: In case `gpaw` is the code, the GPAW parser will
    take parse and store many attributes specific to this code, over and
    beyond what is expected from `AseParser`. It also provides more
    detailed error options.
 2. Add `GpawBaseWorkChain`: A simple `BaseWorkchain` for restarts.

Minor changes:

 1. Store and create files in `calculations`: Create `gpw` files which
    are helpful for restarts, allow for WriteIntervals (see also 
    https://wiki.fysik.dtu.dk/gpaw/documentation/manual.html#restarting-a-calculation)
  • Loading branch information
sudarshanv01 committed Aug 25, 2022
1 parent 322fb8a commit 3b22bc8
Show file tree
Hide file tree
Showing 27 changed files with 1,657 additions and 55 deletions.
103 changes: 74 additions & 29 deletions aiida_ase/calculations/ase.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@ class AseCalculation(engine.CalcJob):

_default_parser = 'ase.ase'
_INPUT_FILE_NAME = 'aiida_script.py'
_OUTPUT_FILE_NAME = 'results.json'
_TXT_OUTPUT_FILE_NAME = 'aiida.out'
_input_aseatoms = 'aiida_atoms.json'
_output_aseatoms = 'aiida_out_atoms.json'
_OPTIMIZER_FILE_NAME = 'aiida_optimizer.log'
_OUTPUT_FILE_NAME = 'results.json' # Written at the very end
_TXT_OUTPUT_FILE_NAME = 'aiida.out' # The log file of the calculation
_input_aseatoms = 'aiida_atoms.json' # The input file written for an ASE calc
_output_aseatoms = 'aiida_out_atoms.json' # For a relaxation, equivalent of qn.traj
_OPTIMIZER_FILE_NAME = 'aiida_optimizer.log' # stdout for optimiser
_write_gpw_file = False
_GPW_FILE_NAME = 'aiida_gpw.gpw'
_freq_gpw_write = 0

@classmethod
def define(cls, spec):
Expand All @@ -33,18 +36,35 @@ def define(cls, spec):
help='Filename to which the content of stderr of the code that is to be run will be written.')
spec.input('metadata.options.parser_name', valid_type=str, default=cls._default_parser,
help='Define the parser to be used by setting its entry point name.')
spec.input('metadata.options.optimizer_stdout', valid_type=str, default=cls._OPTIMIZER_FILE_NAME,
help='Optimiser filename for relaxation')
spec.input('metadata.options.gpw_filename', valid_type=str, default=cls._GPW_FILE_NAME,
help='Filename for .gpw file')
spec.input('metadata.options.freq_gpw_write', valid_type=int, default=cls._freq_gpw_write,
help='Frequency to write the GPW file')
spec.input('metadata.options.write_gpw', valid_type=bool, default=cls._write_gpw_file,
help='Write the gpw file, useful for post processing')
spec.input('metadata.options.log_filename', valid_type=str, default=cls._TXT_OUTPUT_FILE_NAME,
help='Filename for the log file written out by the code')
spec.input('structure', valid_type=StructureData, help='The input structure.')
spec.input('kpoints', valid_type=KpointsData, required=False, help='The k-points to use for the calculation.')
spec.input('parameters', valid_type=Dict, help='Input parameters for the namelists.')
spec.input('settings', valid_type=Dict, required=False, help='Optional settings that control the plugin.')
spec.input('metadata.options.optimizer_stdout', valid_type=str, default=cls._OPTIMIZER_FILE_NAME,
help='Optimiser filename for relaxation')

spec.output('structure', valid_type=orm.StructureData, required=False)
spec.output('parameters', valid_type=orm.Dict, required=False)
spec.output('array', valid_type=orm.ArrayData, required=False)
spec.output('trajectory', valid_type=orm.TrajectoryData, required=False)

spec.exit_code(300, 'ERROR_OUTPUT_FILES', message='One of the expected output files was missing.')
spec.exit_code(301, 'ERROR_LOG_FILES', message='The log file from the DFT code was not written out.')
spec.exit_code(302, 'ERROR_RELAX_NOT_COMPLETE', message='Relaxation did not complete.')
spec.exit_code(303, 'ERROR_SCF_NOT_COMPLETE', message='SCF Failed.')
spec.exit_code(305, 'ERROR_UNEXPECTED_EXCEPTION', message='Cannot identify what went wrong.')
spec.exit_code(306, 'ERROR_PAW_NOT_FOUND', message='gpaw could not find the PAW potentials.')
spec.exit_code(307, 'ERROR_ATTRIBUTE_ERROR', message='Attribute Error found in the stderr file.')
spec.exit_code(308, 'ERROR_FERMI_LEVEL_INF', message='Fermi level is infinite.')
spec.exit_code(400, 'ERROR_OUT_OF_WALLTIME', message='The calculation ran out of walltime.')
# yapf: enable

def prepare_for_submission(self, folder):
Expand Down Expand Up @@ -136,13 +156,13 @@ def return_a_function(v):
the_v = v2
args_dict[k2] = the_v

v2 = '{}({})'.format(
v['@function'], ', '.join(['{}={}'.format(k_, v_) for k_, v_ in args_dict.items()])
v2 = '{}({})'.format( # pylint: disable=consider-using-f-string
v['@function'], ', '.join([f'{k_}={v_}' for k_, v_ in args_dict.items()]) # pylint: disable=consider-using-f-string
)
return v2
return v

tmp_list = ['{}={}'.format(k, return_a_function(v)) for k, v in calc_args.items()]
tmp_list = ['{}={}'.format(k, return_a_function(v)) for k, v in calc_args.items()] # pylint: disable=consider-using-f-string

calc_argsstr = ', '.join(tmp_list)

Expand All @@ -153,8 +173,16 @@ def return_a_function(v):
try:
mesh = self.inputs.kpoints.get_kpoints_mesh()[0]
except AttributeError:
raise common.InputValidationError("Coudn't find a mesh of kpoints" ' in the KpointsData')
calc_argsstr = ', '.join([calc_argsstr] + ['kpts=({},{},{})'.format(*mesh)])
raise common.InputValidationError("Coudn't find a mesh of kpoints in the KpointsData")
if 'kpoints_options' in parameters_dict:
kpts_argsstr = "kpts={'size':" + '({}, {}, {})'.format(*mesh) # pylint: disable=consider-using-f-string
for k, v in parameters_dict['kpoints_options'].items():
kpts_argsstr += f", '{k}':{v}"
kpts_argsstr += '}'
parameters_dict.pop('kpoints_options')
calc_argsstr = ', '.join([calc_argsstr] + [kpts_argsstr])
else:
calc_argsstr = ', '.join([calc_argsstr] + ['kpts=({},{},{})'.format(*mesh)]) # pylint: disable=consider-using-f-string

# =============== prepare the methods of atoms.get(), to save results

Expand All @@ -174,7 +202,7 @@ def return_a_function(v):
try:
if 'PW' in calc_args['mode'].values():
all_imports.append('from gpaw import PW')
except KeyError:
except (KeyError, AttributeError):
pass

extra_imports = parameters_dict.pop('extra_imports', [])
Expand All @@ -185,9 +213,9 @@ def return_a_function(v):
if not all([isinstance(j, str) for j in i]):
raise ValueError('extra import must contain strings')
if len(i) == 2:
all_imports.append('from {} import {}'.format(*i))
all_imports.append('from {} import {}'.format(*i)) # pylint: disable=consider-using-f-string
elif len(i) == 3:
all_imports.append('from {} import {} as {}'.format(*i))
all_imports.append('from {} import {} as {}'.format(*i)) # pylint: disable=consider-using-f-string
else:
raise ValueError('format for extra imports not recognized')
else:
Expand Down Expand Up @@ -218,6 +246,24 @@ def return_a_function(v):
input_txt += '\n'

if optimizer is not None:
# check if the gpw file has been requested
if self.inputs.metadata.options.write_gpw:
# attach a class which tells the calculator
# when to write the gpw file
# (this is needed for the restart)
# Similar to https://wiki.fysik.dtu.dk/gpaw/documentation/manual.html#restarting-a-calculation
gpw_filename = self.metadata.options.gpw_filename
occasion = self.inputs.metadata.options.freq_gpw_write
if occasion > 0:
input_txt += 'class WriteIntervals:\n'
input_txt += ' def __init__(self, fname):\n'
input_txt += ' self.fname = fname\n'
input_txt += ' self.iter=0\n'
input_txt += ' def write(self):\n'
input_txt += ' calculator.write(self.fname)\n'
input_txt += f' self.iter += {occasion}\n'
input_txt += f"calculator.attach(WriteIntervals('{gpw_filename}').write, {occasion})\n"

# here block the trajectory file name: trajectory = 'aiida.traj'
input_txt += f'optimizer = custom_optimizer({optimizer_argsstr})\n'
input_txt += f'optimizer.run({optimizer_runargsstr})\n'
Expand All @@ -233,13 +279,6 @@ def return_a_function(v):
input_txt += f"results['{getter}'] = calculator.get_{getter}({getter_args})\n"
input_txt += '\n'

# Convert to lists
input_txt += 'for k,v in results.items():\n'
input_txt += ' if isinstance(results[k],(numpy.matrix,numpy.ndarray)):\n'
input_txt += ' results[k] = results[k].tolist()\n'

input_txt += '\n'

post_lines = parameters_dict.pop('post_lines', None)
if post_lines is not None:
if not isinstance(post_lines, (list, tuple)):
Expand All @@ -248,6 +287,12 @@ def return_a_function(v):
raise ValueError('Postlines must be a list of strings')
input_txt += '\n'.join(post_lines) + '\n\n'

# Convert to lists
input_txt += 'for k,v in results.items():\n'
input_txt += ' if isinstance(results[k],(numpy.matrix,numpy.ndarray)):\n'
input_txt += ' results[k] = results[k].tolist()\n'

input_txt += '\n'
# Dump results to file
right_open = 'paropen' if self.options.withmpi else 'open'
input_txt += f"with {right_open}('{self._OUTPUT_FILE_NAME}', 'w') as f:\n"
Expand All @@ -259,6 +304,11 @@ def return_a_function(v):
input_txt += f"atoms.write('{self._output_aseatoms}')\n"
input_txt += '\n'

# Write out the final gpw file if requested
if self.inputs.metadata.options.write_gpw:
input_txt += f"calculator.write('{self.inputs.metadata.options.gpw_filename}')\n"
input_txt += '\n'

# write all the input script to a file
with folder.open(self._INPUT_FILE_NAME, 'w') as handle:
handle.write(input_txt)
Expand All @@ -276,8 +326,6 @@ def return_a_function(v):
calcinfo = common.CalcInfo()

calcinfo.uuid = self.uuid
# Empty command line by default
# calcinfo.cmdline_params = settings.pop('CMDLINE', [])
calcinfo.local_copy_list = local_copy_list
calcinfo.remote_copy_list = remote_copy_list

Expand All @@ -288,23 +336,20 @@ def return_a_function(v):
cmdline_params.append(self._INPUT_FILE_NAME)
codeinfo.cmdline_params = cmdline_params

#calcinfo.stdin_name = self._INPUT_FILE_NAME
codeinfo.stdout_name = self._TXT_OUTPUT_FILE_NAME
codeinfo.stdout_name = self.inputs.metadata.options.log_filename
codeinfo.code_uuid = self.inputs.code.uuid
calcinfo.codes_info = [codeinfo]

# Retrieve files
calcinfo.retrieve_list = []
calcinfo.retrieve_list.append(self.options.output_filename)
calcinfo.retrieve_list.append(self._output_aseatoms)
calcinfo.retrieve_list.append(self._TXT_OUTPUT_FILE_NAME)
if optimizer is not None:
calcinfo.retrieve_list.append(self._OPTIMIZER_FILE_NAME)

calcinfo.retrieve_list += additional_retrieve_list

# TODO: I should have two ways of running it: with gpaw-python in parallel
# and executing python if in serial

return calcinfo


Expand Down
141 changes: 141 additions & 0 deletions aiida_ase/parsers/ase.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""Parser implementation for the ``AseCalculation``."""
import json
import math
import numpy

from aiida import parsers
Expand All @@ -10,6 +11,7 @@
Dict = plugins.DataFactory('dict')
ArrayData = plugins.DataFactory('array')
StructureData = plugins.DataFactory('structure')
TrajectoryData = plugins.DataFactory('array.trajectory')
AseCalculation = plugins.CalculationFactory('ase.ase')


Expand Down Expand Up @@ -65,3 +67,142 @@ def parse(self, **kwargs): # pylint: disable=inconsistent-return-statements
self.out('parameters', Dict(dict=json_params))

return


class GpawParser(parsers.Parser):
"""Parser implementation for GPAW through an ``AseCalculation``."""

def parse(self, **kwargs): # pylint: disable=inconsistent-return-statements,too-many-branches,too-many-locals,too-many-return-statements,too-many-statements
"""Parse the retrieved files from a ``AseCalculation``."""

# check what is inside the folder
list_of_files = self.retrieved.list_object_names()

# check if it was a relaxation
optimizer = self.node.inputs.parameters.get_dict().pop('optimizer', None)

# output json file
if AseCalculation._OUTPUT_FILE_NAME in list_of_files: # pylint: disable=protected-access
# This calculation is likely to have been alright
pass
elif AseCalculation._TXT_OUTPUT_FILE_NAME in list_of_files: # pylint: disable=protected-access
# An output structure was not found but there is a txt file
# Probably helpful for restarts
self.logger.error('Output results was not found, inspecting log file')
# Checking for possible errors common to all calculations
with self.retrieved.open('_scheduler-stderr.txt', 'r') as handle:
lines = handle.readlines()
if check_paw_missing(lines):
self.logger.error('Could not find paw potentials')
return self.exit_codes.ERROR_PAW_NOT_FOUND
if check_attribute_error(lines):
self.logger.error('AttributeError in GPAW')
return self.exit_codes.ERROR_ATTRIBUTE_ERROR

if optimizer is not None:
# This is a relaxation calculation that did not complete
# try to get all the structures that are available
try:
with self.retrieved.open(self.node.get_attribute('log_filename'), 'r') as handle:
all_ase_traj = read(handle, index=':', format='gpaw-out')
trajectory = store_to_trajectory_data(all_ase_traj)
self.outputs.trajectory = trajectory
return self.exit_codes.ERROR_RELAX_NOT_COMPLETE
except Exception: # pylint: disable=broad-except
# If it made it to here then the error is due to the SCF not completing
self.logger.error('First relaxation step not completed')
return self.exit_codes.ERROR_SCF_NOT_COMPLETE
else:
# This is an SCF calculation that did not complete
self.logger.error('SCF did not complete')
return self.exit_codes.ERROR_SCF_NOT_COMPLETE
else:
# Neither log file nor end result file were produced
# Likely to be bad news
return self.exit_codes.ERROR_UNEXPECTED_EXCEPTION

# Check if output structure is needed
if optimizer is not None:
# If we are here the calculation did complete sucessfully
with self.retrieved.open(AseCalculation._output_aseatoms, 'r') as handle: # pylint: disable=protected-access
atoms = read(handle, format='json')
self.out('structure', StructureData(ase=atoms))
# Store the trajectory as well
with self.retrieved.open(self.node.get_attribute('log_filename'), 'r') as handle:
all_ase_traj = read(handle, index=':', format='gpaw-out')
self.outputs.trajectory = store_to_trajectory_data(all_ase_traj)
# load the results dictionary
with self.retrieved.open(AseCalculation._OUTPUT_FILE_NAME, 'r') as handle: # pylint: disable=protected-access
json_params = json.load(handle)

# get the relavent data from the log file for the final structure
with self.retrieved.open(self.node.get_attribute('log_filename'), 'r') as handle:
atoms_log = read(handle, format='gpaw-out')
create_output_parameters(atoms_log, json_params)

# Check that the parameters are not inf or nan
if math.isnan(json_params['fermi_energy']) or math.isinf(json_params['fermi_energy']):
self.logger.error('Fermi energy is inf or nan')
return self.exit_codes.ERROR_FERMI_LEVEL_INF

# look at warnings
with self.retrieved.open('_scheduler-stderr.txt', 'r') as handle:
errors = handle.read()
if errors:
json_params['warnings'] = [errors]

# extract arrays from json_params
dictionary_array = {}
for k, v in list(json_params.items()):
if isinstance(v, (list, tuple, numpy.ndarray)):
dictionary_array[k] = json_params.pop(k)

if dictionary_array:
array_data = ArrayData()
for k, v in dictionary_array.items():
array_data.set_array(k, numpy.array(v))
self.out('array', array_data)

if json_params:
self.out('parameters', Dict(dict=json_params))

return


def check_paw_missing(lines):
"""Check if paw potentials are missing and that is the source of the error."""
for line in lines:
if 'Could not find required PAW dataset file' in line:
return True
return False


def check_attribute_error(lines):
"""Checks if there is an AssertionError printed out in the output file."""
for line in lines:
if 'AttributeError' in line:
return True
return False


def create_output_parameters(atoms_log, json_params):
"""Create the output parameters from the log file."""
results_calc = atoms_log.calc.results
json_params['energy'] = atoms_log.get_potential_energy()
json_params['energy_contributions'] = atoms_log.calc.energy_contributions
json_params['forces'] = atoms_log.get_forces()
json_params['stress'] = results_calc.pop('stress', None)
json_params['magmoms'] = results_calc.pop('magmoms', None)
json_params['dipole'] = results_calc.pop('dipole', None)
json_params['pbc'] = atoms_log.get_pbc()
json_params['fermi_energy'] = atoms_log.calc.eFermi
json_params['eigenvalues'] = atoms_log.calc.get_eigenvalues()


def store_to_trajectory_data(all_ase_traj):
"""Store ase atoms object into a TrajectoryFile."""
all_aiida_atoms = []
for atoms in all_ase_traj:
structure = StructureData(ase=atoms)
all_aiida_atoms.append(structure)
return TrajectoryData(all_aiida_atoms)
Empty file added aiida_ase/workflows/__init__.py
Empty file.

0 comments on commit 3b22bc8

Please sign in to comment.