In [1]:
import re
import csv
from collections import defaultdict

In [2]:
# Pattern to identify conformer runs
conformer_run_pattern = re.compile(
    r"Running local queue job conformer(\d+) \((a\d+)\) using gaussian for (TS\d+)"
)

# Pattern to identify failed jobs
failed_job_pattern = re.compile(
    r"Warning: Troubleshooting (TS\d+) job (conformer\d+|opt_\w+) which failed with status: \"errored,\""
)

# Pattern to identify methods attempted in failure
methods_attempted_pattern = re.compile(
    r"Error: Could not troubleshoot geometry optimization for (TS\d+)! Tried troubleshooting with the following methods: \[(.*?)\]"
)

# Pattern to identify successful conformers
successful_conformer_pattern = re.compile(
    r"TS guess\s+(\d+) for (TS\d+)\."
)

# Pattern to identify optimization runs
optimization_run_pattern = re.compile(
    r"Running local queue job (opt_\w+) using gaussian for (TS\d+) \(conformer (\d+)\)"
)

# Pattern to identify optimization failures with methods
optimization_failure_methods_pattern = re.compile(
    r"Troubleshooting opt job in gaussian for (TS\d+) using (.*)"
)

# Pattern to identify status dictionary after failed optimization
status_dict_pattern = re.compile(
    r"TS (TS\d+) did not converge\. Status is:\s+(\{.*\})"
)

In [3]:
# Data structure to store conformer information
ts_conformers = defaultdict(lambda: {"conformers": [], "failed_conformers": {}})

# Data structure to store optimization information
ts_optimizations = defaultdict(lambda: defaultdict(lambda: {"jobs": [], "status": None}))

In [4]:
parse_arc_log("/home/calvin/Dropbox/PersonalFolders/Calvin/ATLAS_Converged/rmg_rxn_746/arc.log")

NameError: name 'parse_arc_log' is not defined

In [5]:
def write_conformers_to_csv(ts_conformers, csv_file_path):
    with open(csv_file_path, 'w', newline='') as csvfile:
        fieldnames = ['TS', 'Conformer', 'Status', 'Methods']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for ts, data in ts_conformers.items():
            # Write successful conformers
            for conformer in data["conformers"]:
                writer.writerow({
                    'TS': ts,
                    'Conformer': f"Conformer {conformer['conformer']}",
                    'Status': conformer['status'],
                    'Methods': ""
                })
            # Write failed conformers
            for conformer, details in data["failed_conformers"].items():
                writer.writerow({
                    'TS': ts,
                    'Conformer': f"Conformer {conformer}",
                    'Status': details['status'],
                    'Methods': details['methods']
                })

def write_optimizations_to_csv(ts_optimizations, csv_file_path):
    with open(csv_file_path, 'w', newline='') as csvfile:
        fieldnames = ['TS', 'Optimization', 'Jobs', 'Status']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for ts, optimizations in ts_optimizations.items():
            for opt_num, details in optimizations.items():
                writer.writerow({
                    'TS': ts,
                    'Optimization': f"Optimization {opt_num}",
                    'Jobs': details['jobs'],
                    'Status': details.get('status', "")
                })

In [6]:
log_file_path = "/home/calvin/Dropbox/PersonalFolders/Calvin/ATLAS_Converged/rmg_rxn_746/arc.log"
conformers_csv = 'conformers.csv'
optimizations_csv = 'optimizations.csv'

In [7]:
    ts_conformers, ts_optimizations = parse_arc_log(log_file_path)
    
    write_conformers_to_csv(ts_conformers, conformers_csv)
    write_optimizations_to_csv(ts_optimizations, optimizations_csv)

    print(f"Conformers data written to {conformers_csv}")
    print(f"Optimizations data written to {optimizations_csv}")

NameError: name 'parse_arc_log' is not defined

In [8]:
import re
import csv
from collections import defaultdict

# Define regex patterns
conformer_run_pattern = re.compile(
    r"Running local queue job conformer(\d+) \((a\d+)\) using gaussian for (TS\d+)"
)

failed_job_pattern = re.compile(
    r"Warning: Troubleshooting (TS\d+) job (conformer\d+|opt_\w+) which failed with status: \"errored,\""
)

methods_attempted_pattern = re.compile(
    r"Error: Could not troubleshoot geometry optimization for (TS\d+)! Tried troubleshooting with the following methods: \[(.*?)\]"
)

successful_conformer_pattern = re.compile(
    r"TS guess\s+(\d+) for (TS\d+)\."
)

# Initialize data structures
ts_conformers = defaultdict(dict)  # {TS: {conformer_num: {status: ..., methods: ...}}}

def parse_arc_log(log_file_path):
    with open(log_file_path, 'r') as file:
        lines = file.readlines()

    i = 0
    while i < len(lines):
        line = lines[i].strip()

        # Check for conformer runs
        conformer_run_match = conformer_run_pattern.match(line)
        if conformer_run_match:
            conformer_num, job_id, ts = conformer_run_match.groups()
            conformer_num = int(conformer_num)
            ts_conformers[ts][conformer_num] = {
                "Conformer": f"Conformer {conformer_num}",
                "Status": "RUNNING",
                "Methods": ""
            }
            i += 1
            continue

        # Check for failed jobs
        failed_job_match = failed_job_pattern.match(line)
        if failed_job_match:
            ts, job = failed_job_match.groups()
            if job.startswith("conformer"):
                conformer_num = int(re.findall(r'\d+', job)[0])
                if ts in ts_conformers and conformer_num in ts_conformers[ts]:
                    ts_conformers[ts][conformer_num]["Status"] = "FAILED"
                else:
                    # If the conformer wasn't previously RUNNING, initialize it
                    ts_conformers[ts][conformer_num] = {
                        "Conformer": f"Conformer {conformer_num}",
                        "Status": "FAILED",
                        "Methods": ""
                    }
                # Extract methods from subsequent lines
                # Assuming methods are listed in the line that contains "Tried troubleshooting..."
                methods_line = lines[i + 4].strip() if i + 4 < len(lines) else ""
                methods_match = re.findall(r"'([^']+)'", methods_line)
                if methods_match:
                    ts_conformers[ts][conformer_num]["Methods"] = methods_match
            elif job.startswith("opt_"):
                # Handle optimization jobs if needed
                pass  # For this example, we're focusing on conformers
            i += 1
            continue

        # Check for successful conformers
        successful_conformer_match = successful_conformer_pattern.match(line)
        if successful_conformer_match:
            conformer_num, ts = successful_conformer_match.groups()
            conformer_num = int(conformer_num)
            if ts in ts_conformers and conformer_num in ts_conformers[ts]:
                ts_conformers[ts][conformer_num]["Status"] = "SUCCESS"
            else:
                # If the conformer wasn't previously RUNNING, initialize it as SUCCESS
                ts_conformers[ts][conformer_num] = {
                    "Conformer": f"Conformer {conformer_num}",
                    "Status": "SUCCESS",
                    "Methods": ""
                }
            i += 1
            continue

        i += 1

    return ts_conformers

def write_conformers_to_csv(ts_conformers, csv_file_path):
    with open(csv_file_path, 'w', newline='') as csvfile:
        fieldnames = ['TS', 'Conformer', 'Status', 'Methods']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for ts, conformers in ts_conformers.items():
            for conformer_num, details in sorted(conformers.items()):
                writer.writerow({
                    'TS': ts,
                    'Conformer': details["Conformer"],
                    'Status': details["Status"],
                    'Methods': ', '.join(details["Methods"]) if details["Methods"] else ""
                })

def main():
    log_file_path = "/home/calvin/Dropbox/PersonalFolders/Calvin/ATLAS_Converged/rmg_rxn_746/arc.log"
    conformers_csv = 'conformers.csv'

    ts_conformers = parse_arc_log(log_file_path)
    
    write_conformers_to_csv(ts_conformers, conformers_csv)

    print(f"Conformers data written to {conformers_csv}")

if __name__ == "__main__":
    main()

Conformers data written to conformers.csv


In [9]:
log_file_path = "/home/calvin/Dropbox/PersonalFolders/Calvin/ATLAS_Converged/rmg_rxn_745/arc.log"

ts_conformers = parse_arc_log(log_file_path)
ts_conformers

defaultdict(dict,
            {'TS0': {0: {'Conformer': 'Conformer 0',
               'Status': 'SUCCESS',
               'Methods': ''},
              1: {'Conformer': 'Conformer 1',
               'Status': 'RUNNING',
               'Methods': ''},
              2: {'Conformer': 'Conformer 2',
               'Status': 'RUNNING',
               'Methods': ''},
              3: {'Conformer': 'Conformer 3',
               'Status': 'SUCCESS',
               'Methods': ''},
              4: {'Conformer': 'Conformer 4',
               'Status': 'FAILED',
               'Methods': ['cartesian',
                'int=(Acc2E=14)',
                'NoSymm',
                'opt=(maxcycle=200)',
                'all_attempted']}}})

In [10]:
ts_conformers

defaultdict(dict,
            {'TS0': {0: {'Conformer': 'Conformer 0',
               'Status': 'SUCCESS',
               'Methods': ''},
              1: {'Conformer': 'Conformer 1',
               'Status': 'RUNNING',
               'Methods': ''},
              2: {'Conformer': 'Conformer 2',
               'Status': 'RUNNING',
               'Methods': ''},
              3: {'Conformer': 'Conformer 3',
               'Status': 'SUCCESS',
               'Methods': ''},
              4: {'Conformer': 'Conformer 4',
               'Status': 'FAILED',
               'Methods': ['cartesian',
                'int=(Acc2E=14)',
                'NoSymm',
                'opt=(maxcycle=200)',
                'all_attempted']}}})

In [11]:
# Parse the ARC log file for Optimsation jobs of TS0

# Read in ts_conformers from the previous step, a defaultdict of TSs with conformers

successful_conformers = [conformer for conformer, details in ts_conformers["TS0"].items() if details["Status"] == "SUCCESS"]

In [12]:
successful_conformers

[0, 3]

In [13]:
# read in the arc.log file



ts_optimizations = {"TS0": {}}
#log_file_path = "/home/calvin/Dropbox/PersonalFolders/Calvin/ATLAS_Converged/rmg_rxn_746/arc.log"
log_file_path = "/home/calvin/Dropbox/PersonalFolders/Calvin/ATLAS_Converged/rmg_rxn_745/arc.log"

with open(log_file_path, 'r') as file:
    lines = file.readlines()

successful_conformers_copy = successful_conformers.copy()
# Check for TS guess  [digits from successful_conformers] for TS0, and if found remove from successful_conformers_copy. if items still in list by the end, value error

successful_conformer_pattern = re.compile(
    r"TS guess\s+(\d+) for (TS\d+)\."
)

i = 0
while i < len(lines):
    line = lines[i].strip()
    successful_conformer_match = successful_conformer_pattern.match(line)
    if successful_conformer_match:
        conformer_num, ts = successful_conformer_match.groups()
        conformer_num = int(conformer_num)
        if ts == "TS0" and conformer_num in successful_conformers_copy:
            successful_conformers_copy.remove(conformer_num)
    i += 1
    
if successful_conformers_copy:
    raise ValueError(f"Failed to find successful conformers for TS0: {successful_conformers_copy}")

# Get max value from sucessful conformers and find TS guess  [MAX VALUE] for TS0 and then search the next lines for 'Optimizing species TS0' and then at the end of the line see which 'conformer [d+]' it is

max_conformer = max(successful_conformers)

# Find the pattern "Optimizing species TS0"

optimization_run_pattern = re.compile(
    r"Running local queue job (opt_\w+) using gaussian for (TS\d+) \(conformer (\d+)\)"
)

job_number = []
conformer_number = None
for i, line in enumerate(lines):


    if "Optimizing species TS0" in line:
        # Get the conformer number/digit
        line = line.strip()
        conformer_number = re.findall(r'\d+', line)
        print(conformer_number)
    if conformer_number:
        # Go to next line, get the job number, for example Running local queue job opt_a85626 using gaussian for TS0 (conformer 2)
        job_number_pattern = re.compile(
            r"Running local queue job (opt_\w+) using gaussian for (TS\d+) \(conformer (\d+)\)"
        )
        # Make sure the conformer number is the same as the one found in the previous line
        try:
            job_number_match = job_number_pattern.match(lines[i+1].strip())
        except IndexError:
            print(lines[i])
        if job_number_match:
            job_number = job_number_match.groups()
            if job_number[2] == conformer_number[1]:
                print(job_number[0])
                
                # Now record the job number and conformer number in a dictionary
                if conformer_number[1] not in ts_optimizations["TS0"]:
                    if job_number[0] not in ts_optimizations["TS0"]:
                        ts_optimizations["TS0"][conformer_number[1]] = {
                            "job": [job_number[0]]
                        }
                    else:
                        ts_optimizations["TS0"][conformer_number[1]]["job"].append(job_number[0])
    
    # Need to see if it troubleshooted the current conformer, and if so, record the methods used and job number
    if job_number and conformer_number:
        if job_number[0] in line and 'Warning: Troubleshooting' in line:
            print(line)
            # Now need to find the next job it runs
            for j in range(i+1, len(lines)):
                if 'Running local queue job' in lines[j] and f"conformer {conformer_number[1]}" in lines[j]:
                    job_number = job_number_pattern.match(lines[j].strip()).groups()
                    # Access dict and add the job number 
                    ts_optimizations["TS0"][conformer_number[1]]["job"].append(job_number[0])
                    break
                elif 'Troubleshooting opt job in gaussian' in lines[j] and 'was not successful' in lines[j] and 'Will not troubleshoot again':
                    # Grab the next line to see the methods used - collect everyithing after 'with the following methods:'
                    line_methods = lines[j+1].strip()
                    methods_attempted_pattern = re.compile(
    r"Error: Could not troubleshoot geometry optimization for (TS\d+)! Tried troubleshooting with the following methods: \[(.*?)\]"
)
                    methods = methods_attempted_pattern.match(line_methods).groups()
                    # Drop 'TS0' from the methods
                    methods = methods[1].split(', ')
                    # Access dict and add the methods
                    ts_optimizations["TS0"][conformer_number[1]]["status"] = 'Failed'
                    ts_optimizations["TS0"][conformer_number[1]]["methods"] = methods
            
                
                
        elif "Running local queue" in line and  f"conformer {conformer_number[1]}" in line and '(fine opt)' in line:
            # Get the job_number
            opt_pattern = re.compile(
                r"Running local queue job (\w+) using gaussian for (TS\d+) .*?\(conformer (\d+)\)"
                )
            job_opt_number = opt_pattern.match(line.strip()).groups()
            # Access dict and add the opt job number
            ts_optimizations["TS0"][conformer_number[1]]["opt_job"] = job_opt_number[0]
            
            # See if it was successful
            
            for y in range(i+1, len(lines)):
                if 'Ending job' in lines[y] and job_opt_number[0] in lines[y]:
                    if 'Optimized geometry for TS0' in lines[y+2]:
                        ts_optimizations["TS0"][conformer_number[1]]["opt_status"] = 'Success'
                        break
        
        if 'opt_status' in ts_optimizations["TS0"][conformer_number[1]]:
            if ts_optimizations["TS0"][conformer_number[1]]["opt_status"] == 'Success':
                freq_pattern = re.compile(
                    r"Running local queue job (\w+) using gaussian for (TS\d+)"
                )
                
                for z in range(i+1, len(lines)):
                    if 'Running local queue job freq_' in lines[z]:
                        freq_job_number = freq_pattern.match(lines[z].strip()).groups()
                        # Access dict and add the freq job number
                        ts_optimizations["TS0"][conformer_number[1]]["freq_job"] = freq_job_number[0]
                        
                        # Need to find when it ends
                        for w in range(z+1, len(lines)):
                            if 'Ending job' in lines[w] and freq_job_number[0] in lines[w]:
                                # Check 1 line after to see if has 'TS TS0 has exactly one imaginary frequency'
                                if 'TS TS0 has exactly one imaginary frequency' in lines[w+1]:
                                    # Capture the frequency after the ':' and add to dict
                                    freq = lines[w+1].split(':')[-1].strip()
                                    ts_optimizations["TS0"][conformer_number[1]]["freq"] = freq
                                    # Reset conformer_number
                                    conformer_number = None
                                    break
                        break
                    


['0', '0']
opt_a85721
['0', '3']
opt_a85744


In [14]:
ts_optimizations

{'TS0': {'0': {'job': ['opt_a85721'],
   'opt_job': 'opt_a85729',
   'opt_status': 'Success',
   'freq_job': 'freq_a85739',
   'freq': '-335.2912'},
  '3': {'job': ['opt_a85744'],
   'opt_job': 'opt_a85750',
   'opt_status': 'Success',
   'freq_job': 'freq_a85780',
   'freq': '-126.0315'}}}