From 01a0e533e2a0c4e53cbca73c87b289f26af6af84 Mon Sep 17 00:00:00 2001 From: Adrian Hurtado Date: Tue, 7 May 2024 14:13:58 -0400 Subject: [PATCH] Runs end to end with energy... need response properties --- qcengine/programs/madness/harvester.py | 379 +------------------------ qcengine/programs/madness/runner.py | 305 ++++++++------------ 2 files changed, 126 insertions(+), 558 deletions(-) diff --git a/qcengine/programs/madness/harvester.py b/qcengine/programs/madness/harvester.py index 42ed85ffd..096e92c53 100644 --- a/qcengine/programs/madness/harvester.py +++ b/qcengine/programs/madness/harvester.py @@ -10,286 +10,22 @@ # import qcelemental as qcel from qcelemental.models import Molecule from qcelemental.models.results import AtomicResultProperties -from qcelemental.molparse import regex from ..util import PreservingDict logger = logging.getLogger(__name__) -def harvest_moldft_output(outtext: str) -> Tuple[PreservingDict, Molecule, list, str, str]: - """Function to read an entire MADNESS output file. - - Read all of the different "line search" segments of a file and returns - values from the last segment for which a geometry was written. - - Args: - outtext (str): Output written to stdout - Returns: - - (PreservingDict) Variables extracted from the output file in the last complete step - - (Molecule): Molecule from the last complete step - - (list): Gradient from the last complete step - - (str): Version string - - (str): Error message, if any - """ - - # Loop over all steps - pass_psivar = [] - pass_coord = [] - pass_grad = [] - # Write now we split at Converge - counter = 1 - - splits = re.split(r"Converged!", outtext, re.MULTILINE)[-2] - final_outpass = re.split(r"Iteration", splits, re.MULTILINE)[-1] - psivar, madcoord, madgrad, version, error = harvest_outfile_moldft_pass(final_outpass) - - return psivar, madcoord, madgrad, version, error - - -def harvest_outfile_moldft_pass(outtext): - """Function to read Madness output file *outtext* and parse important - quantum chemical information from it in - - """ - psivar = PreservingDict() - psivar_coord = None - psivar_grad = None - version = "" - error = "" # TODO (wardlt): The error string is never used. - - NUMBER = r"(?x:" + regex.NUMBER + ")" - # fmt: off - - # Process version - mobj = re.search( - r'^\s+' + r'MADNESS' + r'\s+' + - r'(\d+.\d\d+.\d)' + r'\s' + r'multiresolution suite' + r'\s*$', - outtext, re.MULTILINE) - # fmt: on - if mobj: - logger.debug("matched version") - version = mobj.group(1) - - # Process SCF - # 1)Fail to converge (TODO Robert ask for failed convergence) - # fmt: off - mobj = re.search( - r'^\s+' + r'(?:Calculation failed to converge)' + r'\s*$', outtext, re.MULTILINE) - # fmt: on - if mobj: - logger.debug("failed to converge") - - # 2)Calculation converged - else: - OPTIONS = [r"exchange-correlation", r"nuclear-repulsion", r"total"] - PSIVAR = ["EXCHANGE-CORRELATION", "NUCLEAR REPULSION ENERGY", "TOTAL SCF ENERGY"] - # OPTIONS=[r'kinetic',r'nonlocal psp',r'nuclear attraction',r'coulomb',r'PCM',r'exchange-correlation',r'nuclear-repulsion',r'total'] - # PSIVAR=['KINETIC ENERGY','NONLOCAL PSP','NUCLEAR ATTRACTION ENERGY','COULOMB','PCM','EXCHANGE-CORRELATION','NUCLEAR REPULSION ENERGY','TOTAL SCF ENERGY'] - optDict = dict(zip(OPTIONS, PSIVAR)) - - for var, VAR in optDict.items(): - mobj = re.search(r"^\s+" + var + r"\s*" + NUMBER + r"s*$", outtext, re.MULTILINE) - if mobj: - logger.debug("matched SCF") # not sure what this means - psivar[VAR] = mobj.group(1) - # Other options - - # Process CURRENT energies (TODO: needs better way) - if "TOTAL SCF ENERGY" in psivar: - psivar["CURRENT REFERENCE ENERGY"] = psivar["TOTAL SCF ENERGY"] - psivar["CURRENT ENERGY"] = psivar["TOTAL SCF ENERGY"] - - return psivar, psivar_coord, psivar_grad, version, error - - -def harvest_hessian(hess: str) -> np.ndarray: - pass - - -# """Parses the contents of the NWChem hess file into a hessian array. - -# Args: -# hess (str): Contents of the hess file -# Returns: -# (np.ndarray) Hessian matrix as a 2D array -# """ - -# Change the "D[+-]" notation of Fortran output to "E[+-]" used by Python -# hess_conv = hess.replace("D", "E") - -# # Parse all of the float values -# hess_tri = [float(x) for x in hess_conv.strip().splitlines()] - -# # The value in the Hessian matrix is the lower triangle printed row-wise (e.g., 0,0 -> 1,0 -> 1,1 -> ...) -# n = int(np.sqrt(8 * len(hess_tri) + 1) - 1) // 2 # Size of the 2D matrix - -# # Add the lower diagonal -# hess_arr = np.zeros((n, n)) -# hess_arr[np.tril_indices(n)] = hess_tri - -# # Transpose and then set the lower diagonal again -# hess_arr = np.transpose(hess_arr) # Numpy implementations might only change the ordering to column-major -# hess_arr[np.tril_indices(n)] = hess_tri - -# return hess_arr.T # So that the array is listed in C-order, needed by some alignment routines - - -# gets calc info from psivars preservingDict -# before -def extract_formatted_properties(psivars: PreservingDict) -> AtomicResultProperties: - """Get named properties out of the general variables extracted out of the result file - - Args: - psivars (PreservingDict): Dictionary of the output results - Returns: - (AtomicResultProperties) Properties in a standard format - """ - # TODO (wardlt): Get more of the named variables out of the NWChem file - - # Initialize the output - output = dict() - print("extract_formatted_properties", output) - - # Extract the Calc Info - output.update( - { - # Not a thing in madness - "calcinfo_nbasis": psivars.get("N BASIS", None), - "calcinfo_nmo": psivars.get("N MO", None), # Number of Mo orbitals - # Get madness to print this out - "calcinfo_natom": psivars.get("N ATOMS", None), - # TODO (figure out how to read) - "calcinfo_nalpha": psivars.get("N ALPHA ELECTRONS", None), - "calcinfo_nbeta": psivars.get("N BETA ELECTRONS", None), - } - ) - - # Get the "canonical" properties - # output["return_energy"] = psivars["CURRENT ENERGY"] - # output["nuclear_repulsion_energy"] = psivars["NUCLEAR REPULSION ENERGY"] - - # Get the SCF properties - # output["scf_total_energy"] = psivars.get("TOTAL SCF ENERGY", None) - # output["scf_xc_energy"] = psivars.get("EXCHANGE-CORRELATION", None) - # TODO AdrianH right madness to output these variables - # output["scf_one_electron_energy"] = psivars.get("ONE-ELECTRON ENERGY", None) - # output["scf_two_electron_energy"] = psivars.get("TWO-ELECTRON ENERGY", None) - # output["scf_dispersion_correction_energy"] = psivars.get("DFT DISPERSION ENERGY", None) - - return AtomicResultProperties(**output) - - -def harvest(in_mol: Molecule, outfiles) -> Tuple[PreservingDict, None, None, Molecule, str, str]: - """Parses all the pieces of output from Madness: the stdout in - *nwout* Scratch files are not yet considered at this moment. - - Args: - in_mol (Molecule): Input molecule - madout (str): Madness output molecule - outfiles (dict): Dictionary of outfile files and their contents - Returns: - - (PreservingDict) Variables extracted from the output file in the last complete step - - (None): Hessian from the last complete step (Not yet implemented) - - (None): Gradient from the last complete step (Not yet implemented) - - (Molecule): Molecule from the last complete step - - (str): Version string - - (str): Error message, if any - """ - out_psivar = PreservingDict() - # Parse the Madness output - # This is a weird unpacking but i'm sure i'll find a more elegant way to do this later - moldft_info = outfiles.get("moldft") - moldft_outfiles = moldft_info.get("outfiles") - # At this point scf prints a list of json outputs where each list refers to the scf at givin protocol - # Here I load the scf_info and calc_info as json - # scf_info = json.loads(moldft_outfiles.get("scf_info.json")) - calc_info = json.loads(moldft_outfiles.get("calc_info.json")) - # Write harvest scf_info and harvest calc_info - out_calc_vars = harvest_calc_info(calc_info) - # out_scf_vars = harvest_scf_info(scf_info) - out_psivar.update(out_calc_vars) - # out_psivar.update(out_scf_vars) - - if "molresponse" in outfiles.keys(): - molresponse_info = outfiles.get("moldft") - molresponse_outfiles = molresponse_info.get("outfiles") - # At this point scf prints a list of json outputs where each list refers to the scf at given protocol - # Here I load the scf_info and calc_info as json - response_info = json.loads(molresponse_outfiles.get("response_base.json")) - response_params, response_data_dict = read_molrespone_json(response_info) - - Idontneed_vars, out_mol, out_grad, version, error = harvest_moldft_output(outfiles["moldft"]["stdout"]) - if "molresponse" in outfiles.keys(): - response_psi_var = harvest_response_file(outfiles["molresponse"]["stdout"]) - out_psivar.update(response_psi_var) - # If available, read higher-accuracy gradients - # These were output using a Python Task in Madness to read them out of the database - if outfiles.get("mad.grad") is not None: - logger.debug("Reading higher-accuracy gradients") - out_grad = json.loads(outfiles.get("mad.grad")) - # If available, read the hessian - # TODO read in the geometry outputs from a geometry optimization - # out_mol = None - out_hess = None - if outfiles.get("mad.hess") is not None: - out_hess = harvest_hessian(outfiles.get("mad.hess")) - - # Make sure the input and output molecules are the same - if out_mol: - if in_mol: - if abs(out_mol.nuclear_repulsion_energy() - in_mol.nuclear_repulsion_energy()) > 1.0e-3: - raise ValueError( - """Madness outfile (NRE: %f) inconsistent with MADNESS input (NRE: %f).""" - % (out_mol.nuclear_repulsion_energy(), in_mol.nuclear_repulsion_energy()) - ) - # else: - # raise ValueError("""No coordinate information extracted from Madness output.""") - - # If present, align the gradients and hessian with the original molecular coordinates - # Madness rotates the coordinates of the input molecule. `out_mol` contains the coordinates for the - # rotated molecule, which we can use to determine how to rotate the gradients/hessian - # amol, data = out_mol.align(in_mol, atoms_map=False, mols_align=True, verbose=0) - - # mill = data["mill"] # Retrieve tool with alignment routines - - # if out_grad is not None: - # out_grad = mill.align_gradient(np.array(out_grad).reshape(-1, 3)) - # if out_hess is not None: - # out_hess = mill.align_hessian(np.array(out_hess)) - # TODO create a madness json that outputs basic info like the version and github hash? - - return out_psivar, out_hess, out_grad, out_mol, version, error - - # collect the scf_info json # first iterate through single numbers # Then iteration throught tensor values # this format should work for response values in the future def harvest_scf_info(scf_info): + """Harvest the SCF information from the SCF JSON""" psivar = PreservingDict() # We only need to last set in the list - scf_info = scf_info[-1][0] - print("harvest scf info", scf_info) - - scf_number_vars = [ - "scf_one_electron_energy", - "scf_two_electron_energy", - "nuclear_repulsion_energy", - "scf_vv10_energy", - "scf_xc_energy", - "scf_dispersion_correction_energy", - "scf_total_energy", - "scf_iterations", - ] - - for var in scf_number_vars: - if scf_info.get(var) is not None: - psivar[var.upper()] = scf_info.get(var) - scf_tensor_vars = ["scf_dipole_moment"] - for var in scf_tensor_vars: if scf_info.get(var) is not None: psivar[var.upper()] = tensor_to_numpy(scf_info.get(var)) @@ -298,8 +34,10 @@ def harvest_scf_info(scf_info): def harvest_calc_info(calc_info): + """Harvest the Calc information from the Calc JSON""" psivar = PreservingDict() qcvars = ["calcinfo_nbasis", "calcinfo_nmo", "calcinfo_nalpha", "calcinfo_nbeta", "calcinfo_natom", "return_energy"] + # TODO (ahurta92) I can add more qcvars here from ['scf_e_data'] coulomb kinetic, xc, pcm, nuclear, etc ... for var in qcvars: if calc_info.get(var) is not None: @@ -315,7 +53,7 @@ def tensor_to_numpy(j): return np.reshape(array, tuple(j["dims"])) -def read_frequency_proto_iter_data(my_iter_data, num_states, num_orbitals): +def read_frequency_proto_iter_data(my_iter_data, num_states): num_iters = len(my_iter_data) dres = np.empty((num_iters, num_states)) res_X = np.empty((num_iters, num_states)) @@ -334,7 +72,7 @@ def read_frequency_proto_iter_data(my_iter_data, num_states, num_orbitals): return data -def read_excited_proto_iter_data(my_iter_data, num_states, num_orbitals): +def read_excited_proto_iter_data(my_iter_data, num_states): num_iters = len(my_iter_data) dres = np.empty((num_iters, num_states)) res_X = np.empty((num_iters, num_states)) @@ -367,110 +105,7 @@ def read_molrespone_json(response_info): protos.append(protocol_data[p]["proto"]) iter_data = protocol_data[p]["iter_data"] if response_parameters["excited_state"]: - proto_data.append(read_excited_proto_iter_data(iter_data, n_states, n_orbitals)) + proto_data.append(read_excited_proto_iter_data(iter_data, n_states)) else: - proto_data.append(read_frequency_proto_iter_data(iter_data, n_states, n_orbitals)) + proto_data.append(read_frequency_proto_iter_data(iter_data, n_states)) return response_parameters, proto_data - - -def harvest_response_file(outtext): - psivar = PreservingDict() - psivar_coord = None - psivar_grad = None - version = "" - error = "" # TODO (wardlt): The error string is never used. - pass_psivar = [] - pass_coord = [] - pass_grad = [] - # Write now we split at Converge - counter = 1 - - splits = re.split(r"Converged!", outtext, re.MULTILINE) - print(splits) - splits = splits[-1] - data = re.split(r"Iteration", splits, re.MULTILINE)[-1] - print(data) - - NUMBER = r"(?x:" + regex.NUMBER + ")" # NUMBER - NUMSPACE = NUMBER + r"\s*" # NUMBER + SPACE - - OPTIONS = [ - r"Number of Response States:", - r"Number of Ground States:", - r"k =", - ] - PSIVAR = ["NUM STATES", "NUM ORBITALS", "K"] - optDict = dict(zip(OPTIONS, PSIVAR)) - - for var, VAR in optDict.items(): - mobj = re.search(r"^\s*" + var + r"\s*" + NUMBER + r"\s*$", outtext, re.MULTILINE) - # print(mobj) - if mobj: - psivar[VAR] = mobj.group(1) - # Grab the Orbital Energies There are NUM ORBITALS - num_states = int(psivar["NUM STATES"]) - num_orbitals = int(psivar["NUM ORBITALS"]) - - print(num_states) - print(num_orbitals) - # print(NUMSPACE) - NUMSPACEORB = str() - for i in range(num_orbitals): - NUMSPACEORB += NUMSPACE - # print(NUMSPACEORB) - - var = r"Orbital Energies: \[\*\]" - VAR = "ORBITAL ENERGIES" - mobj = re.search( - r"^\s*" + var + r"\s*" + NUMSPACEORB + r"$", - outtext, - re.MULTILINE, - ) - # print(mobj) - - if mobj: - oe_list = [] - for i in range(num_orbitals): - oe_list.append(mobj.group(i + 1)) - - psivar[VAR] = np.array(oe_list, dtype=float) - - psivar = grab_tensor(r"Ground state overlap:", "OVERLAP", num_orbitals, num_orbitals, psivar, outtext) - psivar = grab_tensor(r"Ground state hamiltonian:", "HAMILTONIAN", num_orbitals, num_orbitals, psivar, outtext) - psivar = grab_tensor(r"Polarizability Final", "POLARIZABILITY", num_states, num_states, psivar, data) - return psivar - - -# Translate a madness tensor defined within json output to a numpy array - - -def grab_tensor(var, VAR, row, col, psivar, data): - first_line = r"^\s*" + var + r"\s+" - NUMBER = r"(?x:" + regex.NUMBER + ")" # NUMBER - NUMSPACE = NUMBER + r"\s*" # NUMBER + SPACE - # print(first_line) - - CAPTURE_LINE = str() - for j in range(col): - CAPTURE_LINE += NUMSPACE - total = first_line - for i in range(row): - front = r"^\[" + str(i) + r",\*\]\s*" - line = front + CAPTURE_LINE - total += line - # print(line) - - mobj = re.search( - total, - data, - re.MULTILINE, - ) - # print(mobj) - if mobj: - oe_list = [] - for i in range(row): - for j in range(col): - oe_list.append(mobj.group(i + 1)) - tensor = np.array(oe_list) - psivar[VAR] = tensor.reshape((row, col)) - return psivar diff --git a/qcengine/programs/madness/runner.py b/qcengine/programs/madness/runner.py index 17b24f5fb..b8decf9ef 100644 --- a/qcengine/programs/madness/runner.py +++ b/qcengine/programs/madness/runner.py @@ -1,6 +1,6 @@ +""" Calls the Madness moldft executable. """ -Calls the Madness moldft executable. -""" + # import re import copy import json @@ -10,7 +10,6 @@ from typing import Any, Dict, Optional, Tuple from pathlib import Path -import numpy as np import qcelemental as qcel from qcelemental.models import AtomicResult, Provenance, AtomicInput from qcelemental.util import safe_version, which @@ -19,12 +18,10 @@ from qcengine.exceptions import UnknownError from ...exceptions import InputError -from ...util import execute, create_mpi_invocation from ..model import ProgramHarness -from .germinate import muster_modelchem -from .harvester import extract_formatted_properties, harvest -from .keywords import format_keywords +from .harvester import harvest_scf_info, harvest_calc_info +from ...util import create_mpi_invocation, execute pp = pprint.PrettyPrinter(width=120, compact=True, indent=1) logger = logging.getLogger(__name__) @@ -78,32 +75,25 @@ def found(raise_error: bool = False) -> bool: ## gotta figure out which input file and from where def get_version(self) -> str: - if self.found(raise_error=True): - # Get the node configuration config = get_config() - - # Run MADNESS - which_prog = which("mad-dft") - which_prog=str(which_prog) - + which_prog = str(which("mad-dft")) if config.use_mpiexec: command = create_mpi_invocation(str(which_prog), config) else: command = [which_prog] - if which_prog not in self.version_cache: success, output = execute( command, infiles=None, scratch_directory=config.scratch_directory, ) - for line in output["stdout"].splitlines(): if "multiresolution suite" in line: version = line.strip().split()[1] + print("version", version) self.version_cache[which_prog] = safe_version(version) - return str(self.version_cache[which_prog]) + return str(self.version_cache[which_prog]) else: raise ModuleNotFoundError("MADNESS executable not found.") @@ -114,35 +104,65 @@ def compute(self, input_model: "AtomicInput", config: "TaskConfig") -> "AtomicRe self.found(raise_error=True) job_inputs = self.build_input(input_model, config) - success, output = self.execute(job_inputs, extra_outfiles=["calc_info.json"]) - if "There is an error in the input file" in output["moldft"]["stdout"]: - raise InputError(output["moldft"]["stdout"]) - if "not compiled" in output["moldft"]["stdout"]: - # recoverable with a different compilation with optional modules - raise InputError(output["moldft"]["stdout"]) + + tmpdir = config.scratch_directory + + success, output = execute( + job_inputs["commands"], + job_inputs["infiles"], + outfiles=["calc_path.json"], + scratch_directory=tmpdir, + scratch_messy=True, + scratch_exist_ok=True, + ) + + def collect_output_jsons(calc_path_json, output_dir): + # first print calc_path_json + print("calc_path_json: ", calc_path_json) + calc_path_json = json.loads(calc_path_json) + # first get calc_info and scf_info + calc_info_path = Path(output_dir) / calc_path_json["moldft"]["outfiles"]["calc_info"] + scf_info_path = Path(output_dir) / calc_path_json["moldft"]["outfiles"]["scf_info"] + + with open(calc_info_path) as file: + calc_info_json = json.load(file) + with open(scf_info_path) as file: + scf_info_json = json.load(file) + + output_json = {} + output_json["moldft"] = {} + output_json["moldft"]["calc_info"] = calc_info_json + output_json["moldft"]["scf_info"] = scf_info_json + + # now get the response + if calc_path_json["response"]["calc_dirs"] is not None: + for response_base, response_calc in zip( + calc_path_json["response"]["outfiles"], calc_path_json["response"]["calc_dirs"] + ): + print(response_base, response_calc) + dir_path = Path(response_calc).stem + print(dir_path) + response_base_path = output_dir / response_base + output_json["response"] = {} + with open(response_base_path) as file: + output_json["response"][dir_path] = json.load(file) + return output_json + if success: - num_commands = len(output) - if num_commands == 2: - stdin = job_inputs["infiles"]["moldft"]["input"] - output["moldft"]["outfiles"]["stdout"] = output["moldft"]["stdout"] - output["moldft"]["outfiles"]["stderr"] = output["moldft"]["stderr"] - output["molresponse"]["outfiles"]["stdout"] = output["molresponse"]["stdout"] - output["molresponse"]["outfiles"]["stderr"] = output["molresponse"]["stderr"] + output_dir = output["scratch_directory"] + output_json = collect_output_jsons(output["outfiles"]["calc_path.json"], output_dir) + output_json["stdout"] = output["stdout"] + output_json["stderr"] = output["stderr"] - else: - stdin = job_inputs["infiles"]["moldft"]["input"] - output["moldft"]["outfiles"]["stdout"] = output["moldft"]["stdout"] - output["moldft"]["outfiles"]["stderr"] = output["moldft"]["stderr"] - output["moldft"]["outfiles"]["input"] = stdin - return self.parse_output(output, input_model) + return self.parse_output(output_json, input_model) else: print(output["stdout"]) - raise UnknownError(output["stderr"]) def build_input( self, input_model: AtomicInput, config: TaskConfig, template: Optional[str] = None ) -> Dict[str, Any]: + # madnessrec = { "infiles": {}, @@ -152,179 +172,92 @@ def build_input( ## These are the madness keywords opts = copy.deepcopy(input_model.keywords) - opts = {k.lower(): v for k, v in opts.items()} - print(opts) + print("opts1: ", opts) + print(input_model.keywords) + + json_input = json.dumps(input_model.keywords, indent=4) + print("json input", json_input) # Handle Molecule molcmd, moldata = input_model.molecule.to_string(dtype="madness", units="bohr", return_data=True) - print(moldata) - print(molcmd) - molData = {} - for k, v in moldata["keywords"].items(): - molData["dft__" + k] = v - opts.update(molData) - - ## Handle Calc Type (ROBERT) - ## now returns respnse options as well - mdccmd, mdcopts = muster_modelchem(input_model.model.method, input_model.driver) - opts.update(mdcopts) - ## Handle the basis set (ROBERT) the question is what value of k - + print("moldata", moldata) + print("molcmd", molcmd) # Log the job settings (LORI) Not sure if i need this logger.debug("JOB_OPTS") logger.debug(pp.pformat(opts)) - # Handle conversion from schema (flat key/value) keywords into local format - optcmd = format_keywords(opts) - # I need to split to geometry keywords and add it to the end of the geometry command in molcommand - # if the geometry keyword exits - if optcmd.find("geometry") != -1: - geo_index = optcmd.find("geometry") # find first occurrence of geometry - end_index = optcmd[geo_index:].find("end") # find first occurrence of end after geometry - geometry_input = optcmd[ - geo_index + 8 : end_index + geo_index - ] # grab everything in between geometry and end - - optcmd = optcmd[0:geo_index] + optcmd[geo_index + end_index + 4 :] # optcmd becomes everything else - molcmd = molcmd.replace( - "end", geometry_input.strip() + "\nend" - ) # replace end with the added geometry input - # print("optcmd\n", optcmd) - # print("molcmd\n", molcmd) - - # print(optcmd.find("geometry")) - # print(optcmd) - madnessrec["commands"] = {} - if mdccmd == "response": - dft_cmds = optcmd.split(mdccmd) - dft_cmds[1] = "response\n" + dft_cmds[1] - - madnessrec["infiles"]["moldft"] = {} - madnessrec["infiles"]["moldft"]["input"] = dft_cmds[0] + molcmd - madnessrec["infiles"]["molresponse"] = {} - madnessrec["infiles"]["molresponse"]["rinput"] = dft_cmds[1] - madnessrec["commands"]["moldft"] = [which("moldft")] - madnessrec["commands"]["molresponse"] = [which("molresponse")] - else: - dft_cmds = optcmd - madnessrec["infiles"]["moldft"] = {} - madnessrec["infiles"]["moldft"]["input"] = dft_cmds + molcmd - madnessrec["commands"]["moldft"] = [which("moldft")] - - # print(madnessrec) + madnessrec["infiles"]["input.json"] = json_input + madnessrec["infiles"]["mol_input"] = molcmd + madnessrec["commands"] = [which("mad-dft"), "input.json", "mol_input"] # optcmd="dft\n xc hf \nend\n" # print(madnessrec["infiles"]["input"]) return madnessrec - def execute( - self, inputs: Dict[str, Any], *, extra_outfiles=None, extra_commands=None, scratch_name=None, timeout=None - ) -> Tuple[bool, Dict]: - num_commands = len(inputs["commands"]) - oexe = {} - if num_commands == 2: - success, dexe = execute( - inputs["commands"]["moldft"], - inputs["infiles"]["moldft"], - ["calc_info.json", "scf_info.json"], - scratch_exist_ok=True, - scratch_name=inputs.get("scratch_name", None), - scratch_directory=inputs["scratch_directory"], - scratch_messy=True, - ) - oexe["moldft"] = dexe - success, dexe_response = execute( - inputs["commands"]["molresponse"], - inputs["infiles"]["molresponse"], - ["response_base.json"], - scratch_messy=True, - scratch_name=Path(dexe["scratch_directory"]).name, - scratch_exist_ok=True, - ) - oexe["molresponse"] = dexe_response - # print(dexe) - # print(dexe_response) - return success, oexe - else: - # print(inputs["commands"]["moldft"]) - success, dexe = execute( - inputs["commands"]["moldft"], - inputs["infiles"]["moldft"], - ["calc_info.json", "scf_info.json"], - scratch_exist_ok=True, - scratch_name=inputs.get("scratch_name", None), - scratch_directory=inputs["scratch_directory"], - scratch_messy=True, - ) - oexe["moldft"] = dexe - return success, oexe - - def parse_output(self, outfiles, input_model: "AtomicInput") -> "AtomicResult": # lgtm: [py/similar-function] - - qcvars, madhess, madgrad, madmol, version, errorTMP = harvest(input_model.molecule, outfiles) - - moldft_out = outfiles.pop("moldft") - - print(moldft_out["outfiles"].keys()) - - m_stdout = moldft_out.pop("stdout") - m_stderr = moldft_out.pop("stderr") - print("after pop", moldft_out["outfiles"].keys()) - - response_out = None - r_stdout = None - r_stderr = None - if "molresponse" in outfiles.keys(): - response_out = outfiles.pop("molresponse") - r_stdout = response_out.pop("stdout") - r_stderr = response_out.pop("stderr") - - stdout = m_stdout - if r_stdout is not None: - stdout.update(r_stdout) - - if madgrad is not None: - qcvars["CURRENT GRADIENT"] = madgrad - - if madhess is not None: - qcvars["CURRENT HESSIAN"] = madhess - # Normalize the output as a float or list of floats - if input_model.driver.upper() == "PROPERTIES": - retres = qcvars[f"RETURN_ENERGY"] - else: - retres = qcvars["RETURN_ENERGY"] + def parse_output(self, output_json: Dict, input_model: "AtomicInput") -> "AtomicResult": + + madmol = input_model.molecule # qcprops = extract_formatted_properties(output_json) + + print("output_json keys", output_json.keys()) + + outfiles = {} + outfiles["moldft"] = output_json["moldft"] + outfiles["response"] = output_json.get("response", None) + + scf_info = harvest_scf_info(output_json["moldft"]["scf_info"]) + print("scf_info", scf_info) + calcinfo = harvest_calc_info(output_json["moldft"]["calc_info"]) + print("calcinfo", calcinfo) + + qcvars = {**scf_info, **calcinfo} + print("qcvars", qcvars) + + properties = {k.lower(): v for k, v in qcvars.items()} + native_files = {} + native_files["calc_info.json"] = json.dumps(outfiles["moldft"]["calc_info"]) + native_files["scf_info.json"] = json.dumps(outfiles["moldft"]["scf_info"]) + if outfiles["response"] is not None: + native_files["response"] = {} + for response_key in outfiles["response"].keys(): + native_files["response"][response_key] = json.dumps(outfiles["response"][response_key]) + print("native_files", native_files.keys()) + print("native files response keys", native_files["response"].keys()) + + provenance = Provenance(creator="madness", version=self.get_version(), routine="madness") + + def get_return_results(input_model, qcvars): + if input_model.driver == "energy": + retres = qcvars["RETURN_ENERGY"] + elif input_model.driver == "gradient": + retres = qcvars[ + "CURRENT GRADIENT" + ] # in madness I need to create a gradient and hession output json file + elif input_model.driver == "hessian": + retres = qcvars["CURRENT HESSIAN"] + else: + raise InputError("Driver not understood") + return retres - if isinstance(retres, Decimal): - retres = float(retres) - elif isinstance(retres, np.ndarray): - retres = retres.tolist() + retres = get_return_results(input_model, qcvars) + stdout = output_json.pop("stdout") + stderr = output_json.pop("stderr") - # Get the formatted properties - qcprops = extract_formatted_properties(qcvars) # Format them inout an output - m_native_files = {k: v for k, v in moldft_out["outfiles"].items() if v is not None} - native_files = { - "input": m_native_files["input"], - "calc_info": m_native_files["calc_info.json"], - } output_data = { - "schema_name": "qcschema_output", "schema_version": 1, - "extras": {"outfiles": outfiles, **input_model.extras}, + "molecule": madmol, # overwrites with outfile Cartesians in case fix_*=F + "extras": {**input_model.extras}, "native_files": native_files, - "properties": qcprops, - "provenance": Provenance(creator="MADNESS", version=self.get_version(), routine="madness"), + "properties": properties, + "provenance": provenance, "return_result": retres, + "stderr": stderr, "stdout": stdout, + "success": True, } # got to even out who needs plump/flat/Decimal/float/ndarray/list # Decimal --> str preserves precision output_data["extras"]["qcvars"] = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcel.util.unnp(qcvars, flat=True).items() } - output_data["extras"]["outfiles"] = { - "input": native_files["input"], - "calc_info": json.loads(native_files["calc_info"]), - } - output_data["success"] = True return AtomicResult(**{**input_model.dict(), **output_data})