diff --git a/src/ansys/fluent/core/launcher/fluent_launcher_options.json b/src/ansys/fluent/core/launcher/fluent_launcher_options.json index c601d020e24f..28f304aed9b1 100644 --- a/src/ansys/fluent/core/launcher/fluent_launcher_options.json +++ b/src/ansys/fluent/core/launcher/fluent_launcher_options.json @@ -23,10 +23,6 @@ }, "fluent_format": "{}" }, - "processor_count": { - "type": "int", - "fluent_format": " -t{}" - }, "journal_filename": { "type": "str", "fluent_format": " -i {}" diff --git a/src/ansys/fluent/core/launcher/launcher.py b/src/ansys/fluent/core/launcher/launcher.py index 185d6a565217..68132d2a80ca 100644 --- a/src/ansys/fluent/core/launcher/launcher.py +++ b/src/ansys/fluent/core/launcher/launcher.py @@ -16,6 +16,7 @@ from ansys.fluent.core.fluent_connection import _FluentConnection from ansys.fluent.core.launcher.fluent_container import start_fluent_container +from ansys.fluent.core.scheduler import build_parallel_options, load_machines from ansys.fluent.core.session import Session, _BaseSession, parse_server_info_file from ansys.fluent.core.session_meshing import Meshing from ansys.fluent.core.session_pure_meshing import PureMeshing @@ -194,6 +195,10 @@ def _build_fluent_launch_args_string(**kwargs) -> str: json_key = json.dumps(argval) argval = fluent_map[json_key] launch_args_string += v["fluent_format"].replace("{}", str(argval)) + addArgs = kwargs["additional_arguments"] + if "-t" not in addArgs and "-cnf=" not in addArgs: + mlist = load_machines(ncores=kwargs["processor_count"]) + launch_args_string += " " + build_parallel_options(mlist) return launch_args_string @@ -301,7 +306,8 @@ def launch_fluent( is used. Options are ``"double"`` and ``"single"``. processor_count : int, optional Number of processors. The default is ``None``, in which case ``1`` - is used. + processor is used. In job scheduler environments the total number of + allocated cores is clamped to this value. journal_filename : str, optional Name of the journal file to read. The default is ``None``. start_timeout : int, optional @@ -365,6 +371,12 @@ def launch_fluent( ------- ansys.fluent.session.Session Fluent session. + + Notes + ----- + In job scheduler environments such as SLURM, LSF, PBS, etc... the allocated + machines and core counts are queried from the scheduler environment and + passed to Fluent. """ argvals = locals() diff --git a/src/ansys/fluent/core/scheduler/__init__.py b/src/ansys/fluent/core/scheduler/__init__.py index 4f2b808606ac..e00914959df9 100644 --- a/src/ansys/fluent/core/scheduler/__init__.py +++ b/src/ansys/fluent/core/scheduler/__init__.py @@ -1,9 +1,12 @@ """A package providing job scheduler support.""" +import socket + from ansys.fluent.core.scheduler.load_machines import load_machines # noqa: F401 from ansys.fluent.core.scheduler.machine_list import MachineList -_fluentOpt = "-t%n% -cnf=%machineList%" +_ncoresOpt = "-t%n%" +_machinesOpt = " -cnf=%machineList%" _procSep = ":" _machineSep = "," @@ -15,17 +18,33 @@ def build_parallel_options(machine_list: MachineList) -> str: ---------- machine_list : MachineList List of machines obtained by calling `load_machines`. + + Notes + ----- + When running serial no options are passed back to the caller as Fluent can + be started without additional arguments in that case. + + If the parallel options are being built on the same machine as Fluent is run + on, and it's local parallel, then the -cnf argument is not constructed. """ - parOpt = _fluentOpt.replace("%n%", str(machine_list.number_of_cores)) - cnfList = ( - machine_list[0].host_name + _procSep + str(machine_list[0].number_of_cores) - ) - for m in range(1, len(machine_list)): - cnfList += ( - _machineSep - + machine_list[m].host_name - + _procSep - + str(machine_list[m].number_of_cores) + parOpt = "" + if ( + machine_list.num_machines == 1 + and socket.gethostname() == machine_list[0].host_name + ): + if machine_list.number_of_cores > 1: + parOpt = _ncoresOpt.replace("%n%", str(machine_list.number_of_cores)) + else: + parOpt = _ncoresOpt.replace("%n%", str(machine_list.number_of_cores)) + cnfList = ( + machine_list[0].host_name + _procSep + str(machine_list[0].number_of_cores) ) - parOpt = parOpt.replace("%machineList%", cnfList) + for m in range(1, len(machine_list)): + cnfList += ( + _machineSep + + machine_list[m].host_name + + _procSep + + str(machine_list[m].number_of_cores) + ) + parOpt += _machinesOpt.replace("%machineList%", cnfList) return parOpt diff --git a/src/ansys/fluent/core/scheduler/load_machines.py b/src/ansys/fluent/core/scheduler/load_machines.py index 042fb01f666e..759c2c08085c 100644 --- a/src/ansys/fluent/core/scheduler/load_machines.py +++ b/src/ansys/fluent/core/scheduler/load_machines.py @@ -48,12 +48,12 @@ def load_machines( list for LSF, PBS_NODEFILE for PBS and SLURM_JOB_NODELIST on SLURM. Unsupported job schedulers may provide alternative ways of providing a list of machines, in that case the list must be pre-parsed and provided via the - machineDict parameter. + `machine_info` or `host_info` parameters. - Depending on the SLURM environment, the hostnames contained within the - SLURM_JOB_NODELIST variable may not be valid to ssh to. In that case we - cannot pass these names to the solver. So, in the SLURM branch there is a - test to check if we can ssh to the first host, if not, get 'actual' machine + In some SLURM environments, the hostnames contained within the variable + SLURM_JOB_NODELIST may not be valid to ssh to. In that case those names + cannot be passed to the solver. So, in the SLURM branch there is a test to + check if ssh to the first host is working, and if not, get 'actual' machine names using scontrol. """ @@ -101,7 +101,7 @@ def load_machines( elif ncores is None: machine_list = _get_local_machine(1) - if ncores is not None and machine_list.number_of_cores != ncores: + if ncores is not None and ncores < machine_list.number_of_cores: # If both machine list and number of cores are provided, edit the # machine list to use exactly the number of cores indicated. machine_list = _restrict_machines_to_core_count(machine_list, ncores)