In [1]:
import pandas as pd

This notebook processes a set of \*-time.txt files created by "grep"-ing through all output files.
Since each method has slightly different output formats, we first convert the text into structured CSV.

In [2]:
# process the PM7 timing files from OpenMOPAC
with open("pm7-time.csv","w") as csv:
    print('mol,geom,seconds', file=csv)
    with open("pm7-time.txt") as data:
        for line in data:
            # ./CHG_jobs/astex_1gm8/rmsd1-mmff.pm7:          WALL-CLOCK TIME         =          0.055 SECONDS
            items = line.split()
            if len(items) != 6:
                continue
            mol = items[0].split('/')[2]
            geom = items[0].split('/')[3].split('.')[0]
            seconds = items[4]
            print(mol, geom, seconds, sep=',', file=csv)

In [3]:
# process the Orca timing files
first_methods = ['b3lyp-svp', 'b3lyp-tz', 'b97-3c', 'dlpno', 'mp2', 'pbe-svp', 'pbe', 'pbeh-3c', 'wb97x']
for method in first_methods:
    with open("{}-time.csv".format(method),'w') as csv:
        print('mol,geom,seconds', file=csv)
        with open("{}-time.txt".format(method)) as data:
            for line in data:
                # astex_1gm8 rmsd112-opt.out.bz2 TOTAL RUN TIME: 0 days 0 hours 34 minutes 23 seconds 767 msec
                items = line.split()
                if len(items) != 15:
                    continue
                mol = items[0]
                geom = items[1].split('.')[0]
                total = (((float(items[5])*24)+float(items[7]))*60+float(items[9]))*60+float(items[11]) + float(items[13])/1000.0
                print(mol, geom, total, sep=',', file=csv)

In [4]:
# process the XTB output timing files
xtb_methods = ['gfn', 'gfn2']
for method in xtb_methods:
    with open("{}-time.csv".format(method),'w') as csv:
        print('mol,geom,seconds', file=csv)
        with open("{}-time.txt".format(method)) as data:
            for line in data:
                # astex_1gm8 rmsd112-mmff.gfn cpu  time for all    1.48 s
                items = line.split()
                if len(items) != 8:
                    continue
                mol = items[0]
                geom = items[1].split('.')[0]
                print(mol, geom, items[6], sep=',', file=csv)

In [6]:
# we're going to build up a big data frame by merging
df = pd.read_csv("dlpno-time.csv")
df.columns = ['mol', 'geom', 'dlpno']
# read the rest of them
methods = ['b3lyp-svp', 'b3lyp-tz', 'b97-3c', 'mp2', 'pbe-svp', 'pbe', 'pbeh-3c', 'wb97x', 'gfn', 'gfn2', 'pm7']
for method in methods:
    df_method = pd.read_csv("{}-time.csv".format(method))
    df_method.columns = ['mol', 'geom', method]
    df = pd.merge(df, df_method)

In [7]:
df.columns

Index(['mol', 'geom', 'dlpno', 'b3lyp-svp', 'b3lyp-tz', 'b97-3c', 'mp2',
       'pbe-svp', 'pbe', 'pbeh-3c', 'wb97x', 'gfn', 'gfn2', 'pm7'],
      dtype='object')

In [8]:
# write out the merged table
df.to_csv("total-timing.csv")