From 01a0e533e2a0c4e53cbca73c87b289f26af6af84 Mon Sep 17 00:00:00 2001
From: Adrian Hurtado <adrian.hurtado@stonybrook.edu>
Date: Tue, 7 May 2024 14:13:58 -0400
Subject: [PATCH] Runs end to end with energy... need response properties

---
 qcengine/programs/madness/harvester.py | 379 +------------------------
 qcengine/programs/madness/runner.py    | 305 ++++++++------------
 2 files changed, 126 insertions(+), 558 deletions(-)

diff --git a/qcengine/programs/madness/harvester.py b/qcengine/programs/madness/harvester.py
index 42ed85ffd..096e92c53 100644
--- a/qcengine/programs/madness/harvester.py
+++ b/qcengine/programs/madness/harvester.py
@@ -10,286 +10,22 @@
 # import qcelemental as qcel
 from qcelemental.models import Molecule
 from qcelemental.models.results import AtomicResultProperties
-from qcelemental.molparse import regex
 
 from ..util import PreservingDict
 
 logger = logging.getLogger(__name__)
 
 
-def harvest_moldft_output(outtext: str) -> Tuple[PreservingDict, Molecule, list, str, str]:
-    """Function to read an entire MADNESS output file.
-
-    Read all of the different "line search" segments of a file and returns
-    values from the last segment for which a geometry was written.
-
-    Args:
-        outtext (str): Output written to stdout
-    Returns:
-        - (PreservingDict) Variables extracted from the output file in the last complete step
-        - (Molecule): Molecule from the last complete step
-        - (list): Gradient from the last complete step
-        - (str): Version string
-        - (str): Error message, if any
-    """
-
-    # Loop over all steps
-    pass_psivar = []
-    pass_coord = []
-    pass_grad = []
-    # Write now we split at Converge
-    counter = 1
-
-    splits = re.split(r"Converged!", outtext, re.MULTILINE)[-2]
-    final_outpass = re.split(r"Iteration", splits, re.MULTILINE)[-1]
-    psivar, madcoord, madgrad, version, error = harvest_outfile_moldft_pass(final_outpass)
-
-    return psivar, madcoord, madgrad, version, error
-
-
-def harvest_outfile_moldft_pass(outtext):
-    """Function to read Madness output file *outtext* and parse important
-    quantum chemical information from it in
-
-    """
-    psivar = PreservingDict()
-    psivar_coord = None
-    psivar_grad = None
-    version = ""
-    error = ""  # TODO (wardlt): The error string is never used.
-
-    NUMBER = r"(?x:" + regex.NUMBER + ")"
-    # fmt: off
-
-    # Process version
-    mobj = re.search(
-        r'^\s+' + r'MADNESS' + r'\s+' +
-        r'(\d+.\d\d+.\d)' + r'\s' + r'multiresolution suite' + r'\s*$',
-        outtext, re.MULTILINE)
-    # fmt: on
-    if mobj:
-        logger.debug("matched version")
-        version = mobj.group(1)
-
-    # Process SCF
-    # 1)Fail to converge (TODO Robert ask for failed convergence)
-    # fmt: off
-    mobj = re.search(
-        r'^\s+' + r'(?:Calculation failed to converge)' + r'\s*$', outtext, re.MULTILINE)
-    # fmt: on
-    if mobj:
-        logger.debug("failed to converge")
-
-    # 2)Calculation converged
-    else:
-        OPTIONS = [r"exchange-correlation", r"nuclear-repulsion", r"total"]
-        PSIVAR = ["EXCHANGE-CORRELATION", "NUCLEAR REPULSION ENERGY", "TOTAL SCF ENERGY"]
-        # OPTIONS=[r'kinetic',r'nonlocal psp',r'nuclear attraction',r'coulomb',r'PCM',r'exchange-correlation',r'nuclear-repulsion',r'total']
-        # PSIVAR=['KINETIC ENERGY','NONLOCAL PSP','NUCLEAR ATTRACTION ENERGY','COULOMB','PCM','EXCHANGE-CORRELATION','NUCLEAR REPULSION ENERGY','TOTAL SCF ENERGY']
-        optDict = dict(zip(OPTIONS, PSIVAR))
-
-        for var, VAR in optDict.items():
-            mobj = re.search(r"^\s+" + var + r"\s*" + NUMBER + r"s*$", outtext, re.MULTILINE)
-            if mobj:
-                logger.debug("matched SCF")  # not sure what this means
-                psivar[VAR] = mobj.group(1)
-    # Other options
-
-    # Process CURRENT energies (TODO: needs better way)
-    if "TOTAL SCF ENERGY" in psivar:
-        psivar["CURRENT REFERENCE ENERGY"] = psivar["TOTAL SCF ENERGY"]
-        psivar["CURRENT ENERGY"] = psivar["TOTAL SCF ENERGY"]
-
-    return psivar, psivar_coord, psivar_grad, version, error
-
-
-def harvest_hessian(hess: str) -> np.ndarray:
-    pass
-
-
-#    """Parses the contents of the NWChem hess file into a hessian array.
-
-#     Args:
-#         hess (str): Contents of the hess file
-#     Returns:
-#         (np.ndarray) Hessian matrix as a 2D array
-#     """
-
-# Change the "D[+-]" notation of Fortran output to "E[+-]" used by Python
-#     hess_conv = hess.replace("D", "E")
-
-#     # Parse all of the float values
-#     hess_tri = [float(x) for x in hess_conv.strip().splitlines()]
-
-#     # The value in the Hessian matrix is the lower triangle printed row-wise (e.g., 0,0 -> 1,0 -> 1,1 -> ...)
-#     n = int(np.sqrt(8 * len(hess_tri) + 1) - 1) // 2  # Size of the 2D matrix
-
-#     # Add the lower diagonal
-#     hess_arr = np.zeros((n, n))
-#     hess_arr[np.tril_indices(n)] = hess_tri
-
-#     # Transpose and then set the lower diagonal again
-#     hess_arr = np.transpose(hess_arr)  # Numpy implementations might only change the ordering to column-major
-#     hess_arr[np.tril_indices(n)] = hess_tri
-
-#     return hess_arr.T  # So that the array is listed in C-order, needed by some alignment routines
-
-
-# gets calc info from psivars preservingDict
-# before
-def extract_formatted_properties(psivars: PreservingDict) -> AtomicResultProperties:
-    """Get named properties out of the general variables extracted out of the result file
-
-    Args:
-        psivars (PreservingDict): Dictionary of the output results
-    Returns:
-        (AtomicResultProperties) Properties in a standard format
-    """
-    # TODO (wardlt): Get more of the named variables out of the NWChem file
-
-    # Initialize the output
-    output = dict()
-    print("extract_formatted_properties", output)
-
-    # Extract the Calc Info
-    output.update(
-        {
-            # Not a thing in madness
-            "calcinfo_nbasis": psivars.get("N BASIS", None),
-            "calcinfo_nmo": psivars.get("N MO", None),  # Number of Mo orbitals
-            # Get madness to print this out
-            "calcinfo_natom": psivars.get("N ATOMS", None),
-            # TODO (figure out how to read)
-            "calcinfo_nalpha": psivars.get("N ALPHA ELECTRONS", None),
-            "calcinfo_nbeta": psivars.get("N BETA ELECTRONS", None),
-        }
-    )
-
-    # Get the "canonical" properties
-    # output["return_energy"] = psivars["CURRENT ENERGY"]
-    # output["nuclear_repulsion_energy"] = psivars["NUCLEAR REPULSION ENERGY"]
-
-    # Get the SCF properties
-    # output["scf_total_energy"] = psivars.get("TOTAL SCF ENERGY", None)
-    # output["scf_xc_energy"] = psivars.get("EXCHANGE-CORRELATION", None)
-    # TODO AdrianH right madness to output these variables
-    # output["scf_one_electron_energy"] = psivars.get("ONE-ELECTRON ENERGY", None)
-    # output["scf_two_electron_energy"] = psivars.get("TWO-ELECTRON ENERGY", None)
-    # output["scf_dispersion_correction_energy"] = psivars.get("DFT DISPERSION ENERGY", None)
-
-    return AtomicResultProperties(**output)
-
-
-def harvest(in_mol: Molecule, outfiles) -> Tuple[PreservingDict, None, None, Molecule, str, str]:
-    """Parses all the pieces of output from Madness: the stdout in
-    *nwout* Scratch files are not yet considered at this moment.
-
-    Args:
-        in_mol (Molecule): Input molecule
-        madout (str): Madness output molecule
-        outfiles (dict): Dictionary of outfile files and their contents
-    Returns:
-        - (PreservingDict) Variables extracted from the output file in the last complete step
-        - (None): Hessian from the last complete step (Not yet implemented)
-        - (None): Gradient from the last complete step (Not yet implemented)
-        - (Molecule): Molecule from the last complete step
-        - (str): Version string
-        - (str): Error message, if any
-    """
-    out_psivar = PreservingDict()
-    # Parse the Madness output
-    # This is a weird unpacking but i'm sure i'll find a more elegant way to do this later
-    moldft_info = outfiles.get("moldft")
-    moldft_outfiles = moldft_info.get("outfiles")
-    # At this point scf prints a list of json outputs where each list refers to the scf at givin protocol
-    # Here I load the scf_info and calc_info as json
-    # scf_info = json.loads(moldft_outfiles.get("scf_info.json"))
-    calc_info = json.loads(moldft_outfiles.get("calc_info.json"))
-    # Write harvest scf_info and harvest calc_info
-    out_calc_vars = harvest_calc_info(calc_info)
-    # out_scf_vars = harvest_scf_info(scf_info)
-    out_psivar.update(out_calc_vars)
-    # out_psivar.update(out_scf_vars)
-
-    if "molresponse" in outfiles.keys():
-        molresponse_info = outfiles.get("moldft")
-        molresponse_outfiles = molresponse_info.get("outfiles")
-        # At this point scf prints a list of json outputs where each list refers to the scf at given protocol
-        # Here I load the scf_info and calc_info as json
-        response_info = json.loads(molresponse_outfiles.get("response_base.json"))
-        response_params, response_data_dict = read_molrespone_json(response_info)
-
-    Idontneed_vars, out_mol, out_grad, version, error = harvest_moldft_output(outfiles["moldft"]["stdout"])
-    if "molresponse" in outfiles.keys():
-        response_psi_var = harvest_response_file(outfiles["molresponse"]["stdout"])
-        out_psivar.update(response_psi_var)
-    # If available, read higher-accuracy gradients
-    #  These were output using a Python Task in Madness to read them out of the database
-    if outfiles.get("mad.grad") is not None:
-        logger.debug("Reading higher-accuracy gradients")
-        out_grad = json.loads(outfiles.get("mad.grad"))
-    # If available, read the hessian
-    # TODO read in the geometry outputs from a geometry optimization
-    # out_mol = None
-    out_hess = None
-    if outfiles.get("mad.hess") is not None:
-        out_hess = harvest_hessian(outfiles.get("mad.hess"))
-
-    # Make sure the input and output molecules are the same
-    if out_mol:
-        if in_mol:
-            if abs(out_mol.nuclear_repulsion_energy() - in_mol.nuclear_repulsion_energy()) > 1.0e-3:
-                raise ValueError(
-                    """Madness outfile (NRE: %f) inconsistent with MADNESS input (NRE: %f)."""
-                    % (out_mol.nuclear_repulsion_energy(), in_mol.nuclear_repulsion_energy())
-                )
-    # else:
-    #    raise ValueError("""No coordinate information extracted from Madness output.""")
-
-    # If present, align the gradients and hessian with the original molecular coordinates
-    #  Madness rotates the coordinates of the input molecule. `out_mol` contains the coordinates for the
-    #  rotated molecule, which we can use to determine how to rotate the gradients/hessian
-    # amol, data = out_mol.align(in_mol, atoms_map=False, mols_align=True, verbose=0)
-
-    # mill = data["mill"]  # Retrieve tool with alignment routines
-
-    # if out_grad is not None:
-    #    out_grad = mill.align_gradient(np.array(out_grad).reshape(-1, 3))
-    # if out_hess is not None:
-    #    out_hess = mill.align_hessian(np.array(out_hess))
-    # TODO create a madness json that outputs basic info like the version and github hash?
-
-    return out_psivar, out_hess, out_grad, out_mol, version, error
-
-
 # collect the scf_info json
 # first iterate through single numbers
 # Then iteration throught tensor values
 # this format should work for response values in the future
 def harvest_scf_info(scf_info):
+    """Harvest the SCF information from the SCF JSON"""
     psivar = PreservingDict()
 
     # We only need to last set in the list
-    scf_info = scf_info[-1][0]
-    print("harvest scf info", scf_info)
-
-    scf_number_vars = [
-        "scf_one_electron_energy",
-        "scf_two_electron_energy",
-        "nuclear_repulsion_energy",
-        "scf_vv10_energy",
-        "scf_xc_energy",
-        "scf_dispersion_correction_energy",
-        "scf_total_energy",
-        "scf_iterations",
-    ]
-
-    for var in scf_number_vars:
-        if scf_info.get(var) is not None:
-            psivar[var.upper()] = scf_info.get(var)
-
     scf_tensor_vars = ["scf_dipole_moment"]
-
     for var in scf_tensor_vars:
         if scf_info.get(var) is not None:
             psivar[var.upper()] = tensor_to_numpy(scf_info.get(var))
@@ -298,8 +34,10 @@ def harvest_scf_info(scf_info):
 
 
 def harvest_calc_info(calc_info):
+    """Harvest the Calc information from the Calc JSON"""
     psivar = PreservingDict()
     qcvars = ["calcinfo_nbasis", "calcinfo_nmo", "calcinfo_nalpha", "calcinfo_nbeta", "calcinfo_natom", "return_energy"]
+    # TODO (ahurta92) I can add more qcvars here from ['scf_e_data']  coulomb kinetic, xc, pcm, nuclear, etc ...
 
     for var in qcvars:
         if calc_info.get(var) is not None:
@@ -315,7 +53,7 @@ def tensor_to_numpy(j):
     return np.reshape(array, tuple(j["dims"]))
 
 
-def read_frequency_proto_iter_data(my_iter_data, num_states, num_orbitals):
+def read_frequency_proto_iter_data(my_iter_data, num_states):
     num_iters = len(my_iter_data)
     dres = np.empty((num_iters, num_states))
     res_X = np.empty((num_iters, num_states))
@@ -334,7 +72,7 @@ def read_frequency_proto_iter_data(my_iter_data, num_states, num_orbitals):
     return data
 
 
-def read_excited_proto_iter_data(my_iter_data, num_states, num_orbitals):
+def read_excited_proto_iter_data(my_iter_data, num_states):
     num_iters = len(my_iter_data)
     dres = np.empty((num_iters, num_states))
     res_X = np.empty((num_iters, num_states))
@@ -367,110 +105,7 @@ def read_molrespone_json(response_info):
         protos.append(protocol_data[p]["proto"])
         iter_data = protocol_data[p]["iter_data"]
         if response_parameters["excited_state"]:
-            proto_data.append(read_excited_proto_iter_data(iter_data, n_states, n_orbitals))
+            proto_data.append(read_excited_proto_iter_data(iter_data, n_states))
         else:
-            proto_data.append(read_frequency_proto_iter_data(iter_data, n_states, n_orbitals))
+            proto_data.append(read_frequency_proto_iter_data(iter_data, n_states))
     return response_parameters, proto_data
-
-
-def harvest_response_file(outtext):
-    psivar = PreservingDict()
-    psivar_coord = None
-    psivar_grad = None
-    version = ""
-    error = ""  # TODO (wardlt): The error string is never used.
-    pass_psivar = []
-    pass_coord = []
-    pass_grad = []
-    # Write now we split at Converge
-    counter = 1
-
-    splits = re.split(r"Converged!", outtext, re.MULTILINE)
-    print(splits)
-    splits = splits[-1]
-    data = re.split(r"Iteration", splits, re.MULTILINE)[-1]
-    print(data)
-
-    NUMBER = r"(?x:" + regex.NUMBER + ")"  # NUMBER
-    NUMSPACE = NUMBER + r"\s*"  # NUMBER + SPACE
-
-    OPTIONS = [
-        r"Number of Response States:",
-        r"Number of Ground States:",
-        r"k =",
-    ]
-    PSIVAR = ["NUM STATES", "NUM ORBITALS", "K"]
-    optDict = dict(zip(OPTIONS, PSIVAR))
-
-    for var, VAR in optDict.items():
-        mobj = re.search(r"^\s*" + var + r"\s*" + NUMBER + r"\s*$", outtext, re.MULTILINE)
-        # print(mobj)
-        if mobj:
-            psivar[VAR] = mobj.group(1)
-    # Grab the Orbital Energies  There are NUM ORBITALS
-    num_states = int(psivar["NUM STATES"])
-    num_orbitals = int(psivar["NUM ORBITALS"])
-
-    print(num_states)
-    print(num_orbitals)
-    # print(NUMSPACE)
-    NUMSPACEORB = str()
-    for i in range(num_orbitals):
-        NUMSPACEORB += NUMSPACE
-    # print(NUMSPACEORB)
-
-    var = r"Orbital Energies: \[\*\]"
-    VAR = "ORBITAL ENERGIES"
-    mobj = re.search(
-        r"^\s*" + var + r"\s*" + NUMSPACEORB + r"$",
-        outtext,
-        re.MULTILINE,
-    )
-    # print(mobj)
-
-    if mobj:
-        oe_list = []
-        for i in range(num_orbitals):
-            oe_list.append(mobj.group(i + 1))
-
-        psivar[VAR] = np.array(oe_list, dtype=float)
-
-    psivar = grab_tensor(r"Ground state overlap:", "OVERLAP", num_orbitals, num_orbitals, psivar, outtext)
-    psivar = grab_tensor(r"Ground state hamiltonian:", "HAMILTONIAN", num_orbitals, num_orbitals, psivar, outtext)
-    psivar = grab_tensor(r"Polarizability Final", "POLARIZABILITY", num_states, num_states, psivar, data)
-    return psivar
-
-
-# Translate a madness tensor defined within json output to a numpy array
-
-
-def grab_tensor(var, VAR, row, col, psivar, data):
-    first_line = r"^\s*" + var + r"\s+"
-    NUMBER = r"(?x:" + regex.NUMBER + ")"  # NUMBER
-    NUMSPACE = NUMBER + r"\s*"  # NUMBER + SPACE
-    # print(first_line)
-
-    CAPTURE_LINE = str()
-    for j in range(col):
-        CAPTURE_LINE += NUMSPACE
-    total = first_line
-    for i in range(row):
-        front = r"^\[" + str(i) + r",\*\]\s*"
-        line = front + CAPTURE_LINE
-        total += line
-    #    print(line)
-
-    mobj = re.search(
-        total,
-        data,
-        re.MULTILINE,
-    )
-    # print(mobj)
-    if mobj:
-        oe_list = []
-        for i in range(row):
-            for j in range(col):
-                oe_list.append(mobj.group(i + 1))
-        tensor = np.array(oe_list)
-        psivar[VAR] = tensor.reshape((row, col))
-    return psivar
diff --git a/qcengine/programs/madness/runner.py b/qcengine/programs/madness/runner.py
index 17b24f5fb..b8decf9ef 100644
--- a/qcengine/programs/madness/runner.py
+++ b/qcengine/programs/madness/runner.py
@@ -1,6 +1,6 @@
+""" Calls the Madness moldft executable.
 """
-Calls the Madness moldft executable.
-"""
+
 # import re
 import copy
 import json
@@ -10,7 +10,6 @@
 from typing import Any, Dict, Optional, Tuple
 from pathlib import Path
 
-import numpy as np
 import qcelemental as qcel
 from qcelemental.models import AtomicResult, Provenance, AtomicInput
 from qcelemental.util import safe_version, which
@@ -19,12 +18,10 @@
 from qcengine.exceptions import UnknownError
 
 from ...exceptions import InputError
-from ...util import execute, create_mpi_invocation
 from ..model import ProgramHarness
 
-from .germinate import muster_modelchem
-from .harvester import extract_formatted_properties, harvest
-from .keywords import format_keywords
+from .harvester import harvest_scf_info, harvest_calc_info
+from ...util import create_mpi_invocation, execute
 
 pp = pprint.PrettyPrinter(width=120, compact=True, indent=1)
 logger = logging.getLogger(__name__)
@@ -78,32 +75,25 @@ def found(raise_error: bool = False) -> bool:
 
     ## gotta figure out which input file and from where
     def get_version(self) -> str:
-
         if self.found(raise_error=True):
-        # Get the node configuration
             config = get_config()
-
-            # Run MADNESS
-            which_prog = which("mad-dft")
-            which_prog=str(which_prog)
-
+            which_prog = str(which("mad-dft"))
             if config.use_mpiexec:
                 command = create_mpi_invocation(str(which_prog), config)
             else:
                 command = [which_prog]
-
             if which_prog not in self.version_cache:
                 success, output = execute(
                     command,
                     infiles=None,
                     scratch_directory=config.scratch_directory,
                 )
-
                 for line in output["stdout"].splitlines():
                     if "multiresolution suite" in line:
                         version = line.strip().split()[1]
+                        print("version", version)
                         self.version_cache[which_prog] = safe_version(version)
-            return str(self.version_cache[which_prog])
+                return str(self.version_cache[which_prog])
         else:
             raise ModuleNotFoundError("MADNESS executable not found.")
 
@@ -114,35 +104,65 @@ def compute(self, input_model: "AtomicInput", config: "TaskConfig") -> "AtomicRe
         self.found(raise_error=True)
 
         job_inputs = self.build_input(input_model, config)
-        success, output = self.execute(job_inputs, extra_outfiles=["calc_info.json"])
-        if "There is an error in the input file" in output["moldft"]["stdout"]:
-            raise InputError(output["moldft"]["stdout"])
-        if "not compiled" in output["moldft"]["stdout"]:
-            # recoverable with a different compilation with optional modules
-            raise InputError(output["moldft"]["stdout"])
+
+        tmpdir = config.scratch_directory
+
+        success, output = execute(
+            job_inputs["commands"],
+            job_inputs["infiles"],
+            outfiles=["calc_path.json"],
+            scratch_directory=tmpdir,
+            scratch_messy=True,
+            scratch_exist_ok=True,
+        )
+
+        def collect_output_jsons(calc_path_json, output_dir):
+            # first print calc_path_json
+            print("calc_path_json: ", calc_path_json)
+            calc_path_json = json.loads(calc_path_json)
+            # first get calc_info and scf_info
+            calc_info_path = Path(output_dir) / calc_path_json["moldft"]["outfiles"]["calc_info"]
+            scf_info_path = Path(output_dir) / calc_path_json["moldft"]["outfiles"]["scf_info"]
+
+            with open(calc_info_path) as file:
+                calc_info_json = json.load(file)
+            with open(scf_info_path) as file:
+                scf_info_json = json.load(file)
+
+            output_json = {}
+            output_json["moldft"] = {}
+            output_json["moldft"]["calc_info"] = calc_info_json
+            output_json["moldft"]["scf_info"] = scf_info_json
+
+            # now get the response
+            if calc_path_json["response"]["calc_dirs"] is not None:
+                for response_base, response_calc in zip(
+                    calc_path_json["response"]["outfiles"], calc_path_json["response"]["calc_dirs"]
+                ):
+                    print(response_base, response_calc)
+                    dir_path = Path(response_calc).stem
+                    print(dir_path)
+                    response_base_path = output_dir / response_base
+                    output_json["response"] = {}
+                    with open(response_base_path) as file:
+                        output_json["response"][dir_path] = json.load(file)
+            return output_json
+
         if success:
-            num_commands = len(output)
-            if num_commands == 2:
-                stdin = job_inputs["infiles"]["moldft"]["input"]
-                output["moldft"]["outfiles"]["stdout"] = output["moldft"]["stdout"]
-                output["moldft"]["outfiles"]["stderr"] = output["moldft"]["stderr"]
-                output["molresponse"]["outfiles"]["stdout"] = output["molresponse"]["stdout"]
-                output["molresponse"]["outfiles"]["stderr"] = output["molresponse"]["stderr"]
+            output_dir = output["scratch_directory"]
+            output_json = collect_output_jsons(output["outfiles"]["calc_path.json"], output_dir)
+            output_json["stdout"] = output["stdout"]
+            output_json["stderr"] = output["stderr"]
 
-            else:
-                stdin = job_inputs["infiles"]["moldft"]["input"]
-                output["moldft"]["outfiles"]["stdout"] = output["moldft"]["stdout"]
-                output["moldft"]["outfiles"]["stderr"] = output["moldft"]["stderr"]
-                output["moldft"]["outfiles"]["input"] = stdin
-            return self.parse_output(output, input_model)
+            return self.parse_output(output_json, input_model)
         else:
             print(output["stdout"])
-
             raise UnknownError(output["stderr"])
 
     def build_input(
         self, input_model: AtomicInput, config: TaskConfig, template: Optional[str] = None
     ) -> Dict[str, Any]:
+
         #
         madnessrec = {
             "infiles": {},
@@ -152,179 +172,92 @@ def build_input(
 
         ## These are the madness keywords
         opts = copy.deepcopy(input_model.keywords)
-        opts = {k.lower(): v for k, v in opts.items()}
-        print(opts)
+        print("opts1: ", opts)
 
+        print(input_model.keywords)
+
+        json_input = json.dumps(input_model.keywords, indent=4)
+        print("json input", json_input)
         # Handle Molecule
         molcmd, moldata = input_model.molecule.to_string(dtype="madness", units="bohr", return_data=True)
-        print(moldata)
-        print(molcmd)
-        molData = {}
-        for k, v in moldata["keywords"].items():
-            molData["dft__" + k] = v
-        opts.update(molData)
-
-        ## Handle Calc Type (ROBERT)
-        ## now returns respnse options as well
-        mdccmd, mdcopts = muster_modelchem(input_model.model.method, input_model.driver)
-        opts.update(mdcopts)
-        ## Handle the basis set (ROBERT) the question is what value of k
-
+        print("moldata", moldata)
+        print("molcmd", molcmd)
         # Log the job settings (LORI)  Not sure if i need this
         logger.debug("JOB_OPTS")
         logger.debug(pp.pformat(opts))
 
-        # Handle conversion from schema (flat key/value) keywords into local format
-        optcmd = format_keywords(opts)
-        # I need to split to geometry keywords and add it to the end of the geometry command in molcommand
-        # if the geometry keyword exits
-        if optcmd.find("geometry") != -1:
-            geo_index = optcmd.find("geometry")  # find first occurrence of geometry
-            end_index = optcmd[geo_index:].find("end")  # find first occurrence of end after geometry
-            geometry_input = optcmd[
-                geo_index + 8 : end_index + geo_index
-            ]  # grab everything in between geometry and end
-
-            optcmd = optcmd[0:geo_index] + optcmd[geo_index + end_index + 4 :]  # optcmd becomes everything else
-            molcmd = molcmd.replace(
-                "end", geometry_input.strip() + "\nend"
-            )  # replace end with the added geometry input
-            # print("optcmd\n", optcmd)
-            # print("molcmd\n", molcmd)
-
-        # print(optcmd.find("geometry"))
-        # print(optcmd)
-        madnessrec["commands"] = {}
-        if mdccmd == "response":
-            dft_cmds = optcmd.split(mdccmd)
-            dft_cmds[1] = "response\n" + dft_cmds[1]
-
-            madnessrec["infiles"]["moldft"] = {}
-            madnessrec["infiles"]["moldft"]["input"] = dft_cmds[0] + molcmd
-            madnessrec["infiles"]["molresponse"] = {}
-            madnessrec["infiles"]["molresponse"]["rinput"] = dft_cmds[1]
-            madnessrec["commands"]["moldft"] = [which("moldft")]
-            madnessrec["commands"]["molresponse"] = [which("molresponse")]
-        else:
-            dft_cmds = optcmd
-            madnessrec["infiles"]["moldft"] = {}
-            madnessrec["infiles"]["moldft"]["input"] = dft_cmds + molcmd
-            madnessrec["commands"]["moldft"] = [which("moldft")]
-
-        # print(madnessrec)
+        madnessrec["infiles"]["input.json"] = json_input
+        madnessrec["infiles"]["mol_input"] = molcmd
+        madnessrec["commands"] = [which("mad-dft"), "input.json", "mol_input"]
         # optcmd="dft\n xc hf \nend\n"
         # print(madnessrec["infiles"]["input"])
         return madnessrec
 
-    def execute(
-        self, inputs: Dict[str, Any], *, extra_outfiles=None, extra_commands=None, scratch_name=None, timeout=None
-    ) -> Tuple[bool, Dict]:
-        num_commands = len(inputs["commands"])
-        oexe = {}
-        if num_commands == 2:
-            success, dexe = execute(
-                inputs["commands"]["moldft"],
-                inputs["infiles"]["moldft"],
-                ["calc_info.json", "scf_info.json"],
-                scratch_exist_ok=True,
-                scratch_name=inputs.get("scratch_name", None),
-                scratch_directory=inputs["scratch_directory"],
-                scratch_messy=True,
-            )
-            oexe["moldft"] = dexe
-            success, dexe_response = execute(
-                inputs["commands"]["molresponse"],
-                inputs["infiles"]["molresponse"],
-                ["response_base.json"],
-                scratch_messy=True,
-                scratch_name=Path(dexe["scratch_directory"]).name,
-                scratch_exist_ok=True,
-            )
-            oexe["molresponse"] = dexe_response
-            # print(dexe)
-            # print(dexe_response)
-            return success, oexe
-        else:
-            # print(inputs["commands"]["moldft"])
-            success, dexe = execute(
-                inputs["commands"]["moldft"],
-                inputs["infiles"]["moldft"],
-                ["calc_info.json", "scf_info.json"],
-                scratch_exist_ok=True,
-                scratch_name=inputs.get("scratch_name", None),
-                scratch_directory=inputs["scratch_directory"],
-                scratch_messy=True,
-            )
-            oexe["moldft"] = dexe
-            return success, oexe
-
-    def parse_output(self, outfiles, input_model: "AtomicInput") -> "AtomicResult":  # lgtm: [py/similar-function]
-
-        qcvars, madhess, madgrad, madmol, version, errorTMP = harvest(input_model.molecule, outfiles)
-
-        moldft_out = outfiles.pop("moldft")
-
-        print(moldft_out["outfiles"].keys())
-
-        m_stdout = moldft_out.pop("stdout")
-        m_stderr = moldft_out.pop("stderr")
-        print("after pop", moldft_out["outfiles"].keys())
-
-        response_out = None
-        r_stdout = None
-        r_stderr = None
-        if "molresponse" in outfiles.keys():
-            response_out = outfiles.pop("molresponse")
-            r_stdout = response_out.pop("stdout")
-            r_stderr = response_out.pop("stderr")
-
-        stdout = m_stdout
-        if r_stdout is not None:
-            stdout.update(r_stdout)
-
-        if madgrad is not None:
-            qcvars["CURRENT GRADIENT"] = madgrad
-
-        if madhess is not None:
-            qcvars["CURRENT HESSIAN"] = madhess
-        # Normalize the output as a float or list of floats
-        if input_model.driver.upper() == "PROPERTIES":
-            retres = qcvars[f"RETURN_ENERGY"]
-        else:
-            retres = qcvars["RETURN_ENERGY"]
+    def parse_output(self, output_json: Dict, input_model: "AtomicInput") -> "AtomicResult":
+
+        madmol = input_model.molecule  # qcprops = extract_formatted_properties(output_json)
+
+        print("output_json keys", output_json.keys())
+
+        outfiles = {}
+        outfiles["moldft"] = output_json["moldft"]
+        outfiles["response"] = output_json.get("response", None)
+
+        scf_info = harvest_scf_info(output_json["moldft"]["scf_info"])
+        print("scf_info", scf_info)
+        calcinfo = harvest_calc_info(output_json["moldft"]["calc_info"])
+        print("calcinfo", calcinfo)
+
+        qcvars = {**scf_info, **calcinfo}
+        print("qcvars", qcvars)
+
+        properties = {k.lower(): v for k, v in qcvars.items()}
+        native_files = {}
+        native_files["calc_info.json"] = json.dumps(outfiles["moldft"]["calc_info"])
+        native_files["scf_info.json"] = json.dumps(outfiles["moldft"]["scf_info"])
+        if outfiles["response"] is not None:
+            native_files["response"] = {}
+            for response_key in outfiles["response"].keys():
+                native_files["response"][response_key] = json.dumps(outfiles["response"][response_key])
+        print("native_files", native_files.keys())
+        print("native files response keys", native_files["response"].keys())
+
+        provenance = Provenance(creator="madness", version=self.get_version(), routine="madness")
+
+        def get_return_results(input_model, qcvars):
+            if input_model.driver == "energy":
+                retres = qcvars["RETURN_ENERGY"]
+            elif input_model.driver == "gradient":
+                retres = qcvars[
+                    "CURRENT GRADIENT"
+                ]  # in madness I need to create a gradient and hession output json file
+            elif input_model.driver == "hessian":
+                retres = qcvars["CURRENT HESSIAN"]
+            else:
+                raise InputError("Driver not understood")
+            return retres
 
-        if isinstance(retres, Decimal):
-            retres = float(retres)
-        elif isinstance(retres, np.ndarray):
-            retres = retres.tolist()
+        retres = get_return_results(input_model, qcvars)
+        stdout = output_json.pop("stdout")
+        stderr = output_json.pop("stderr")
 
-        # Get the formatted properties
-        qcprops = extract_formatted_properties(qcvars)
         # Format them inout an output
-        m_native_files = {k: v for k, v in moldft_out["outfiles"].items() if v is not None}
-        native_files = {
-            "input": m_native_files["input"],
-            "calc_info": m_native_files["calc_info.json"],
-        }
         output_data = {
-            "schema_name": "qcschema_output",
             "schema_version": 1,
-            "extras": {"outfiles": outfiles, **input_model.extras},
+            "molecule": madmol,  # overwrites with outfile Cartesians in case fix_*=F
+            "extras": {**input_model.extras},
             "native_files": native_files,
-            "properties": qcprops,
-            "provenance": Provenance(creator="MADNESS", version=self.get_version(), routine="madness"),
+            "properties": properties,
+            "provenance": provenance,
             "return_result": retres,
+            "stderr": stderr,
             "stdout": stdout,
+            "success": True,
         }
         # got to even out who needs plump/flat/Decimal/float/ndarray/list
         # Decimal --> str preserves precision
         output_data["extras"]["qcvars"] = {
             k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcel.util.unnp(qcvars, flat=True).items()
         }
-        output_data["extras"]["outfiles"] = {
-            "input": native_files["input"],
-            "calc_info": json.loads(native_files["calc_info"]),
-        }
 
-        output_data["success"] = True
         return AtomicResult(**{**input_model.dict(), **output_data})