Skip to content

Commit

Permalink
PwBaseWorkChain: new handler for BFGS history failure
Browse files Browse the repository at this point in the history
When the BFGS history fails during ionic minimizaiton,
the current handler simply restarts from scratch. This 
simplistic solution has limited effectiveness. We improve
by trying first to lower the trusted radius. If the history
fails again, then we switch to the `damp` algorithm. The 
latter solution proved to converge the ionic minimization 
after the occurrence of a BFGS history failure on a large
test set.

Co-authored-by: Marnik Bercx <mbercx@gmail.com>
  • Loading branch information
bastonero and mbercx committed Dec 22, 2023
1 parent 740e0be commit 0224f8a
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,7 @@
'press_conv_thr': 0.5,
'smearing': '',
'startmag': 0.,
'wf_collect': False,
'wf_collect': True,
'trust_radius_min': 1.0e-3,
'ion_dynamics': 'bfgs',
})
55 changes: 53 additions & 2 deletions src/aiida_quantumespresso/workflows/pw/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class PwBaseWorkChain(ProtocolMixin, BaseRestartWorkChain):
'delta_factor_max_seconds': 0.95,
'delta_factor_nbnd': 0.05,
'delta_minimum_nbnd': 4,
'delta_factor_trust_radius_min': 0.1,
})

@classmethod
Expand Down Expand Up @@ -249,6 +250,13 @@ def setup(self):
self.ctx.inputs.parameters.setdefault('ELECTRONS', {})
self.ctx.inputs.parameters.setdefault('SYSTEM', {})

calculation_type = self.ctx.inputs.parameters['CONTROL'].get('calculation', None)
if calculation_type in ['relax', 'md']:
self.ctx.inputs.parameters.setdefault('IONS', {})
if calculation_type in ['vc-relax', 'vc-md']:
self.ctx.inputs.parameters.setdefault('IONS', {})
self.ctx.inputs.parameters.setdefault('CELL', {})

self.ctx.inputs.settings = self.ctx.inputs.settings.get_dict() if 'settings' in self.ctx.inputs else {}

def validate_kpoints(self):
Expand Down Expand Up @@ -483,6 +491,49 @@ def handle_vcrelax_converged_except_final_scf(self, calculation):
self.results() # Call the results method to attach the output nodes
return ProcessHandlerReport(True, self.exit_codes.ERROR_IONIC_CONVERGENCE_REACHED_EXCEPT_IN_FINAL_SCF)

@process_handler(
priority=561,
exit_codes=[
PwCalculation.exit_codes.ERROR_IONIC_CYCLE_BFGS_HISTORY_FAILURE,
PwCalculation.exit_codes.ERROR_IONIC_CYCLE_BFGS_HISTORY_AND_FINAL_SCF_FAILURE,
]
)
def handle_relax_recoverable_ionic_convergence_bfgs_history_error(self, calculation):
"""Handle failure of the ionic minimization algorithm (BFGS).
When BFGS history fails, this can mean two things: the structure is close to the global minimum,
but the moves the algorithm wants to do are smaller than `trust_radius_min`, or the structure is
close to a local minimum (hard to detect). For the first, we restart with lowered trust_radius_min.
For the first case, one can lower the trust radius; for the second one, one can exploit a different
algorithm, e.g. `damp` (and `damp-w` for vc-relax).
"""
trust_radius_min = self.ctx.inputs.parameters['IONS'].get('trust_radius_min', qe_defaults.trust_radius_min)
calculation_type = self.ctx.inputs.parameters['CONTROL'].get('calculation', 'relax')

if calculation_type == 'relax':
self.ctx.inputs.parameters['IONS']['ion_dynamics'] = 'damp'
action = 'bfgs history (ionic only) failure: restarting with `damp` dynamics.'

elif calculation_type == 'vc-relax' and trust_radius_min > 1.0e-4:
self.ctx.inputs.parameters['IONS']['trust_radius_ini'] = trust_radius_min # start close
new_trust_radius_min = trust_radius_min * self.defaults.delta_factor_trust_radius_min
self.ctx.inputs.parameters['IONS']['trust_radius_min'] = new_trust_radius_min
action = f'bfgs history (vc-relax) failure: restarting with `trust_radius_min={new_trust_radius_min:.5f}`.'

elif calculation_type == 'vc-relax':
self.ctx.inputs.parameters['IONS']['ion_dynamics'] = 'damp'
self.ctx.inputs.parameters['CELL']['cell_dynamics'] = 'damp-w'
action = 'bfgs history (vc-relax) failure: restarting with `damp(-w)` dynamics.'

else:
return ProcessHandlerReport(False)

self.ctx.inputs.structure = calculation.outputs.output_structure

self.set_restart_type(RestartType.FROM_CHARGE_DENSITY, calculation.outputs.remote_folder)
self.report_error_handled(calculation, action)
return ProcessHandlerReport(True)

@process_handler(
priority=560,
exit_codes=[
Expand All @@ -501,12 +552,12 @@ def handle_relax_recoverable_ionic_convergence_error(self, calculation):
self.ctx.inputs.structure = calculation.outputs.output_structure
action = 'no ionic convergence but clean shutdown: restarting from scratch but using output structure.'

self.set_restart_type(RestartType.FROM_SCRATCH)
self.set_restart_type(RestartType.FROM_CHARGE_DENSITY, calculation.outputs.remote_folder)
self.report_error_handled(calculation, action)
return ProcessHandlerReport(True)

@process_handler(
priority=559, exit_codes=[
priority=555, exit_codes=[
PwCalculation.exit_codes.ERROR_RADIAL_FFT_SIGNIFICANT_VOLUME_CONTRACTION,
]
)
Expand Down
64 changes: 62 additions & 2 deletions tests/workflows/pw/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,22 @@ def test_handle_vcrelax_converged_except_final_scf(generate_workchain_pw):
PwCalculation.exit_codes.ERROR_IONIC_CYCLE_BFGS_HISTORY_AND_FINAL_SCF_FAILURE,
)
)
def test_handle_relax_recoverable_ionic_convergence_error(generate_workchain_pw, generate_structure, exit_code):
def test_handle_relax_recoverable_ionic_convergence_error(
generate_workchain_pw, generate_structure, generate_remote_data, fixture_localhost, exit_code
):
"""Test `PwBaseWorkChain.handle_relax_recoverable_ionic_convergence_error`."""
structure = generate_structure()
process = generate_workchain_pw(pw_outputs={'output_structure': structure}, exit_code=exit_code)
remote_data = generate_remote_data(computer=fixture_localhost, remote_path='/path/to/remote')
process = generate_workchain_pw(
pw_outputs={
'output_structure': structure,
'remote_folder': remote_data
}, exit_code=exit_code
)
process.setup()

process.ctx.inputs.parameters['CONTROL']['calculation'] = 'relax'
process.ctx.inputs.parameters.setdefault('IONS', {})['ion_dynamics'] = 'bfgs'
result = process.handle_relax_recoverable_ionic_convergence_error(process.ctx.children[-1])
assert isinstance(result, ProcessHandlerReport)
assert result.do_break
Expand All @@ -221,6 +231,56 @@ def test_handle_relax_recoverable_ionic_convergence_error(generate_workchain_pw,
assert result.status == 0


@pytest.mark.parametrize(
'exit_code', (
PwCalculation.exit_codes.ERROR_IONIC_CYCLE_BFGS_HISTORY_FAILURE,
PwCalculation.exit_codes.ERROR_IONIC_CYCLE_BFGS_HISTORY_AND_FINAL_SCF_FAILURE,
)
)
def test_handle_relax_recoverable_ionic_convergence_bfgs_history_error(
generate_workchain_pw, generate_structure, generate_remote_data, fixture_localhost, exit_code
):
"""Test `PwBaseWorkChain.handle_relax_recoverable_ionic_convergence_bfgs_history_error`."""
structure = generate_structure()
remote_data = generate_remote_data(computer=fixture_localhost, remote_path='/path/to/remote')
process = generate_workchain_pw(
pw_outputs={
'output_structure': structure,
'remote_folder': remote_data
}, exit_code=exit_code
)
process.setup()

# For `relax`, switch to `damp`
process.ctx.inputs.parameters['CONTROL']['calculation'] = 'relax'
process.ctx.inputs.parameters.setdefault('IONS', {})['ion_dynamics'] = 'bfgs'
result = process.handle_relax_recoverable_ionic_convergence_bfgs_history_error(process.ctx.children[-1])
assert isinstance(result, ProcessHandlerReport)
assert result.do_break
assert result.exit_code.status == 0
assert process.ctx.inputs.parameters['IONS']['ion_dynamics'] == 'damp'

# For `vc-relax`, try changing first the `trust_min_radius`
process.ctx.inputs.parameters['CONTROL']['calculation'] = 'vc-relax'
process.ctx.inputs.parameters.setdefault('IONS', {})['ion_dynamics'] = 'bfgs'
process.ctx.inputs.parameters.setdefault('CELL', {})['cell_dynamics'] = 'bfgs'
result = process.handle_relax_recoverable_ionic_convergence_bfgs_history_error(process.ctx.children[-1])
assert isinstance(result, ProcessHandlerReport)
assert result.do_break
assert result.exit_code.status == 0
assert process.ctx.inputs.parameters['CONTROL']['restart_mode'] == 'from_scratch'
assert process.ctx.inputs.parameters['IONS']['trust_radius_ini'] == 1.0e-3
assert process.ctx.inputs.parameters['IONS']['trust_radius_min'] == 1.0e-4

# Then, try `damp` dynamics as a last resort
result = process.handle_relax_recoverable_ionic_convergence_bfgs_history_error(process.ctx.children[-1])
assert isinstance(result, ProcessHandlerReport)
assert result.do_break
assert result.exit_code.status == 0
assert process.ctx.inputs.parameters['IONS']['ion_dynamics'] == 'damp'
assert process.ctx.inputs.parameters['CELL']['cell_dynamics'] == 'damp-w'


def test_handle_vcrelax_recoverable_fft_significant_volume_contraction_error(generate_workchain_pw, generate_structure):
"""Test `PwBaseWorkChain.handle_vcrelax_recoverable_fft_significant_volume_contraction_error`."""
exit_code = PwCalculation.exit_codes.ERROR_RADIAL_FFT_SIGNIFICANT_VOLUME_CONTRACTION
Expand Down

0 comments on commit 0224f8a

Please sign in to comment.