Skip to content

Commit

Permalink
New computational resource widget with resource setup (#566)
Browse files Browse the repository at this point in the history
This PR creates a new QEappComputationalResourcesWidget from one of the aiidalab-widigets-base. The new widget supports
- setting the resource for each of the selected code.
- setting for details: --ntasks-per-node and --cpus-per-task
- Besides, some settings are special to a code, such as the parallelization levels (pools, images, nk, etc.) in the pw.x code. Thus, this PR creates a new PWscfWidget for the pw.x, which is inherited from the QEappComputationalResourcesWidget.
  • Loading branch information
superstar54 committed Apr 24, 2024
1 parent ae79e3e commit 99c059d
Show file tree
Hide file tree
Showing 14 changed files with 440 additions and 230 deletions.
6 changes: 3 additions & 3 deletions docs/source/development/plugin.rst
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ The ``get_builder`` function will return a ``builder`` for the ``EOSWorkChain``,
def get_builder(codes, structure, parameters, **kwargs):
protocol = parameters["workchain"].pop('protocol', "fast")
pw_code = codes.get("pw")
pw_code = codes.get("pw")['code']
overrides = {
"pw": parameters["advanced"],
}
Expand Down Expand Up @@ -334,9 +334,9 @@ Here is the example of the built-in `pdos` plugins with codes `dos.x` and `projw

.. code-block:: python
from aiidalab_widgets_base import ComputationalResourcesWidget
from aiidalab_qe.common.widgets import QEAppComputationalResourcesWidget
dos_code = ComputationalResourcesWidget(
dos_code = QEAppComputationalResourcesWidget(
description="dos.x",
default_calc_job_plugin="quantumespresso.dos",
)
Expand Down
20 changes: 13 additions & 7 deletions src/aiidalab_qe/app/parameters/qeapp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,17 @@ advanced:
tot_charge: 0
vdw_corr: none

## Codes
## Computational resources
codes:
dos: dos-7.2@localhost
projwfc: projwfc-7.2@localhost
pw: pw-7.2@localhost
pp: pp-7.2@localhost
xspectra: xspectra-7.2@localhost
hp: hp-7.2@localhost
dos:
code: dos-7.2@localhost
projwfc:
code: projwfc-7.2@localhost
pw:
code: pw-7.2@localhost
pp:
code: pp-7.2@localhost
xspectra:
code: xspectra-7.2@localhost
hp:
code: hp-7.2@localhost
171 changes: 65 additions & 106 deletions src/aiidalab_qe/app/submission/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,24 @@

from __future__ import annotations

import os

import ipywidgets as ipw
import traitlets as tl
from aiida import orm
from aiida.common import NotExistent
from aiida.engine import ProcessBuilderNamespace, submit
from aiidalab_widgets_base import ComputationalResourcesWidget, WizardAppWidgetStep
from aiidalab_widgets_base import WizardAppWidgetStep
from IPython.display import display

from aiidalab_qe.app.parameters import DEFAULT_PARAMETERS
from aiidalab_qe.app.utils import get_entry_items
from aiidalab_qe.common.setup_codes import QESetupWidget
from aiidalab_qe.common.setup_pseudos import PseudosInstallWidget
from aiidalab_qe.common.widgets import (
PwCodeResourceSetupWidget,
QEAppComputationalResourcesWidget,
)
from aiidalab_qe.workflows import QeAppWorkChain

from .resource import ParallelizationSettings, ResourceSelectionWidget


class SubmitQeAppWorkChainStep(ipw.VBox, WizardAppWidgetStep):
"""Step for submission of a bands workchain."""
Expand Down Expand Up @@ -69,17 +69,11 @@ def __init__(self, qe_auto_setup=True, **kwargs):
self.message_area = ipw.Output()
self._submission_blocker_messages = ipw.HTML()

self.pw_code = ComputationalResourcesWidget(
self.pw_code = PwCodeResourceSetupWidget(
description="pw.x:", default_calc_job_plugin="quantumespresso.pw"
)

self.resources_config = ResourceSelectionWidget()
self.parallelization = ParallelizationSettings()

self.set_resource_defaults()

self.pw_code.observe(self._update_state, "value")
self.pw_code.observe(self._update_resources, "value")

# add plugin's entry points
self.codes = {"pw": self.pw_code}
Expand All @@ -94,8 +88,6 @@ def __init__(self, qe_auto_setup=True, **kwargs):
self.codes[name] = code
code.observe(self._update_state, "value")
self.code_children.append(self.codes[name])
# set default codes
self.set_selected_codes(DEFAULT_PARAMETERS["codes"])
# set process label and description
self.process_label = ipw.Text(
description="Label:", layout=ipw.Layout(width="auto", indent="0px")
Expand Down Expand Up @@ -138,8 +130,6 @@ def __init__(self, qe_auto_setup=True, **kwargs):
super().__init__(
children=[
*self.code_children,
self.resources_config,
self.parallelization,
self.message_area,
self.sssp_installation_status,
self.qe_setup_status,
Expand All @@ -150,6 +140,8 @@ def __init__(self, qe_auto_setup=True, **kwargs):
self.submit_button,
]
)
# set default codes
self.set_selected_codes(DEFAULT_PARAMETERS["codes"])

@tl.observe("internal_submission_blockers", "external_submission_blockers")
def _observe_submission_blockers(self, _change):
Expand Down Expand Up @@ -183,6 +175,16 @@ def _identify_submission_blockers(self):
if not self.sssp_installation_status.installed:
yield "The SSSP library is not installed."

# check if the QEAppComputationalResourcesWidget is used
for name, code in self.codes.items():
# skip if the code is not displayed, convenient for the plugin developer
if code.layout.display == "none":
continue
if not isinstance(code, QEAppComputationalResourcesWidget):
yield (
f"Error: hi, plugin developer, please use the QEAppComputationalResourcesWidget from aiidalab_qe.common.widgets for code {name}."
)

def _update_state(self, _=None):
# If the previous step has failed, this should fail as well.
if self.previous_step_state is self.State.FAIL:
Expand Down Expand Up @@ -215,14 +217,12 @@ def _toggle_install_widgets(self, change):

def _auto_select_code(self, change):
if change["new"] and not change["old"]:
for name, code_widget in self.codes.items():
for name, code in self.codes.items():
if not DEFAULT_PARAMETERS["codes"].get(name):
continue
try:
code_widget.refresh()
code_widget.value = orm.load_code(
DEFAULT_PARAMETERS["codes"][name]
).uuid
code.code_selection.refresh()
code.value = orm.load_code(DEFAULT_PARAMETERS["codes"][name]).uuid
except NotExistent:
pass

Expand All @@ -241,55 +241,6 @@ def _show_alert_message(self, message, alert_class="info"):
)
)

def _update_resources(self, change):
if change["new"] and (
change["old"] is None
or orm.load_code(change["new"]).computer.pk
!= orm.load_code(change["old"]).computer.pk
):
self.set_resource_defaults(orm.load_code(change["new"]).computer)

def get_resources(self):
resources = {
"num_machines": self.resources_config.num_nodes.value,
"num_mpiprocs_per_machine": self.resources_config.num_cpus.value,
"npools": self.parallelization.npools.value,
}
return resources

def set_resources(self, resources):
self.resources_config.num_nodes.value = resources["num_machines"]
self.resources_config.num_cpus.value = resources["num_mpiprocs_per_machine"]
self.parallelization.npools.value = resources["npools"]

def set_resource_defaults(self, computer=None):
if computer is None or computer.hostname == "localhost":
self.resources_config.num_nodes.disabled = True
self.resources_config.num_nodes.value = 1
self.resources_config.num_cpus.max = os.cpu_count()
self.resources_config.num_cpus.value = 1
self.resources_config.num_cpus.description = "CPUs"
self.parallelization.npools.value = 1
else:
default_mpiprocs = computer.get_default_mpiprocs_per_machine()
self.resources_config.num_nodes.disabled = False
self.resources_config.num_cpus.max = default_mpiprocs
self.resources_config.num_cpus.value = default_mpiprocs
self.resources_config.num_cpus.description = "CPUs/node"
self.parallelization.npools.value = self._get_default_parallelization()

self._check_resources()

def _get_default_parallelization(self):
"""A _very_ rudimentary approach for obtaining a minimal npools setting."""
num_mpiprocs = (
self.resources_config.num_nodes.value * self.resources_config.num_cpus.value
)

for i in range(1, num_mpiprocs + 1):
if num_mpiprocs % i == 0 and num_mpiprocs // i < self.MAX_MPI_PER_POOL:
return i

def _check_resources(self):
"""Check whether the currently selected resources will be sufficient and warn if not."""
if not self.pw_code.value:
Expand Down Expand Up @@ -349,10 +300,14 @@ def get_selected_codes(self):
return: A dict with the code names as keys and the code UUIDs as values.
"""
codes = {key: code.value for key, code in self.codes.items()}
codes = {
key: code.parameters
for key, code in self.codes.items()
if code.layout.display != "none"
}
return codes

def set_selected_codes(self, codes):
def set_selected_codes(self, code_data):
"""Set the inputs in the GUI based on a set of codes."""

# Codes
Expand All @@ -365,12 +320,20 @@ def _get_code_uuid(code):

with self.hold_trait_notifications():
for name, code in self.codes.items():
if name not in code_data:
continue
# check if the code is installed and usable
# note: if code is imported from another user, it is not usable and thus will not be
# treated as an option in the ComputationalResourcesWidget.
code_options = [o[1] for o in self.pw_code.code_select_dropdown.options]
if _get_code_uuid(codes.get(name)) in code_options:
code.value = _get_code_uuid(codes.get(name))
code_options = [
o[1] for o in code.code_selection.code_select_dropdown.options
]
if _get_code_uuid(code_data.get(name)["code"]) in code_options:
# get code uuid from code label in case of using DEFAULT_PARAMETERS
code_data.get(name)["code"] = _get_code_uuid(
code_data.get(name)["code"]
)
code.parameters = code_data.get(name)

def update_codes_display(self):
"""Hide code if no related property is selected."""
Expand Down Expand Up @@ -432,46 +395,44 @@ def _create_builder(self) -> ProcessBuilderNamespace:
from copy import deepcopy

self.ui_parameters = deepcopy(self.input_parameters)
self.ui_parameters["resources"] = self.get_resources()
# add codes and resource info into ui_parameters
self.ui_parameters.update(self.get_submission_parameters())
submission_parameters = self.get_submission_parameters()
self.ui_parameters.update(submission_parameters)
builder = QeAppWorkChain.get_builder_from_protocol(
structure=self.input_structure,
parameters=deepcopy(self.ui_parameters),
)

self._update_builder(builder, self.MAX_MPI_PER_POOL)
self._update_builder(builder, submission_parameters["codes"])

return builder

def _update_builder(self, buildy, max_mpi_per_pool):
resources = self.get_resources()
npools = resources.pop("npools", 1)
"""Update the resources and parallelization of the ``QeAppWorkChain`` builder."""
for k, v in buildy.items():
if isinstance(v, (dict, ProcessBuilderNamespace)):
if k == "pw" and v["pseudos"]:
v["parallelization"] = orm.Dict(dict={"npool": npools})
if k == "projwfc":
v["settings"] = orm.Dict(dict={"cmdline": ["-nk", str(npools)]})
if k == "dos":
v["metadata"]["options"]["resources"] = {
"num_machines": 1,
"num_mpiprocs_per_machine": min(
max_mpi_per_pool,
resources["num_mpiprocs_per_machine"],
),
}
# Continue to the next item to avoid overriding the resources in the
# recursive `update_builder` call.
continue
if k == "resources":
buildy["resources"] = resources
else:
self._update_builder(v, max_mpi_per_pool)
def _update_builder(self, builder, codes):
"""Update the resources and parallelization of the ``relax`` builder."""
# update resources
builder.relax.base.pw.metadata.options.resources = {
"num_machines": codes.get("pw")["nodes"],
"num_mpiprocs_per_machine": codes.get("pw")["ntasks_per_node"],
"num_cores_per_mpiproc": codes.get("pw")["cpus_per_task"],
}
builder.relax.base.pw.parallelization = orm.Dict(
dict=codes["pw"]["parallelization"]
)

def set_submission_parameters(self, parameters):
self.set_resources(parameters["resources"])
# backward compatibility for v2023.11
# which have a separate "resources" section for pw code
if "resources" in parameters:
parameters["codes"] = {
key: {"code": value} for key, value in parameters["codes"].items()
}
parameters["codes"]["pw"]["nodes"] = parameters["resources"]["num_machines"]
parameters["codes"]["pw"]["cpus"] = parameters["resources"][
"num_mpiprocs_per_machine"
]
parameters["codes"]["pw"]["parallelization"] = {
"npool": parameters["resources"]["npools"]
}
self.set_selected_codes(parameters["codes"])
# label and description are not stored in the parameters, but in the process directly
if self.process:
Expand All @@ -482,7 +443,6 @@ def get_submission_parameters(self):
"""Get the parameters for the submission step."""
return {
"codes": self.get_selected_codes(),
"resources": self.get_resources(),
}

def reset(self):
Expand All @@ -491,4 +451,3 @@ def reset(self):
self.process = None
self.input_structure = None
self.set_selected_codes(DEFAULT_PARAMETERS["codes"])
self.set_resource_defaults()
Loading

0 comments on commit 99c059d

Please sign in to comment.