Merge pull request #4 from joegair/main

update scripts begin parsing multi-job orca files
ekwan · Feb 9, 2023 · e00e298 · e00e298
2 parents b364bfb + 10a952b
commit e00e298
Show file tree

Hide file tree

Showing 5 changed files with 44 additions and 8 deletions.
diff --git a/cctk/parse_orca.py b/cctk/parse_orca.py
@@ -48,7 +48,7 @@ def read_energies(lines):
 
 def split_multiple_inputs(filename):
     """
-    Splits ``filename`` into blocks by searching for _________.
+    Splits ``filename`` into blocks by searching for COMPOUND JOB  #.
 
     Args:
         filename (str): path to file
@@ -61,12 +61,12 @@ def split_multiple_inputs(filename):
     start_block = 0
     with open(filename, "r") as lines:
         for idx, line in enumerate(lines):
-            if re.search("Entering Link 1", line): # this will never be true for an Orca file -- this is just a stopgap
+            if re.search("COMPOUND  JOB \d{1,}", line):
                 output_blocks.append(LazyLineObject(file=filename, start=start_block, end=idx))
                 start_block = idx
     output_blocks.append(LazyLineObject(file=filename, start=start_block, end=idx))
 
-    return output_blocks
+    return output_blocks[:]
 
 def read_mulliken_charges(lines):
     """

diff --git a/scripts/analyze_orca.py b/scripts/analyze_orca.py
@@ -71,7 +71,10 @@
 
 df.rename(columns={"rms_displacement": "rms_disp", "quasiharmonic_gibbs_free_energy": "GFE (corrected)"}, inplace=True)
 df["filename"] = df["filename"].apply(lambda x: x[-60:])
-df["GFE (corrected)"] = df["GFE (corrected)"].apply(lambda x: f"{x:.5f}")
+# df["GFE (corrected)"] = df["GFE (corrected)"].apply(lambda x: f"{x:.5f}") 
+# the commented out line above sometimes breaks the script 
+# with error "ValueError: Unknown format code 'f' for object of type 'str'"
+# see /Users/gairj/research/calcs/ORCA/orca_evaluation/testing_space/aws/test01_general_cloud/output
 df["rms_step"] = df["rms_step"].apply(lambda x: f"\033[92m{x}\033[0m" if float(x or 0) < 0.0001 else f"\033[93m{x}\033[0m")
 df["rms_gradient"] = df["rms_gradient"].apply(lambda x: f"\033[92m{x}\033[0m" if float(x or 0) < 0.003 else f"\033[93m{x}\033[0m")
 df["success"] = df["success"].apply(lambda x: f"\033[92m{x}\033[0m" if x else f"\033[93m{x}\033[0m")

diff --git a/scripts/gen_start_g16.py b/scripts/gen_start_g16.py
@@ -0,0 +1,9 @@
+import cctk, sys
+
+# this is a script for generating gjf files from molecule names
+# usage: gen_start.py mol_name file_name.gjf
+# example: gen_start_g16.py FK506 fk506.gjf
+
+mol = cctk.Molecule.new_from_name(sys.argv[1])
+cctk.GaussianFile.write_molecule_to_file(sys.argv[2], mol, route_card="#p opt b3lyp/6-31g(d) empiricaldispersion=gd3bj")
+print("done")
diff --git a/scripts/gen_start_orca.py b/scripts/gen_start_orca.py
@@ -0,0 +1,17 @@
+import cctk, sys
+
+# this is a script for generating gjf files from molecule names
+# usage: gen_start_orca.py mol_name file_name.inp
+# example: gen_start_orca.py FK506 fk506.inp
+
+mol = cctk.Molecule.new_from_name(sys.argv[1])
+cctk.OrcaFile.write_molecule_to_file(sys.argv[2], mol, 
+	header="! opt b3lyp/G 6-31g(d) D3 Normalprint Printbasis PrintMOs #CPCM",
+	variables={"maxcore": 1000},
+	blocks={"pal": ["nproc 4"] 
+            # , "mdci": ["density none"]
+            # , "cpcm": ["smd true", "SMDsolvent \"dichloromethane\""]
+            # , "scf": ["Print[P_SCFMemInfo] 1"]
+            },
+        )
+print("done")
diff --git a/scripts/resubmit.py b/scripts/resubmit.py
@@ -1,8 +1,8 @@
 import sys, argparse, re, glob
 
-from cctk import GaussianFile
+from cctk import GaussianFile, OrcaFile
 
-#### This is a script to resubmit failed Gaussian files.
+#### This is a script to resubmit failed Gaussian and ORCA files.
 #### Parameters:
 #### ``--type, -t``: which jobs to resubmit 
 ####     "failed": will resubmit jobs with no successes
@@ -16,6 +16,7 @@
 #### NOTE: This file will reject any file that contains the string "slurm."
 
 #### Corin Wagen and Eugene Kwan, 2019
+#### update to accept ORCA output, Joe Gair 2023
 
 parser = argparse.ArgumentParser(prog="resubmit.py")
 parser.add_argument("--type", "-t", type=str)
@@ -31,7 +32,13 @@
         continue
 
     try:
-        output_file = GaussianFile.read_file(filename)
+        if isinstance(GaussianFile.read_file(filename), GaussianFile):
+            output_file = GaussianFile.read_file(filename)
+            newfile_tail = "gjf"
+        elif isinstance(OrcaFile.read_file(filename), OrcaFile):
+            output_file = OrcaFile.read_file(filename)
+            newfile_tail = "inp"
+        else: print(f"error: {filename} is not recognized as an OrcaFile or a GaussianFile")
         if isinstance(output_file, list):
             output_file = output_file[-1]
         if args["perturb"]:
@@ -40,7 +47,7 @@
 
         if ((success == 0) and (args["type"] == "failed")) or (args["type"] == "all") or (args["type"] is None):
             newfile = filename.rsplit('/',1)[-1]
-            newfile = re.sub(r"out$", "gjf", newfile)
+            newfile = re.sub(r"out$", newfile_tail, newfile)
 
             if args["output"]:
                 newfile = args["output"]