In [1]:
import numpy as np
import pandas as pd

In [2]:
def get_df_err(df, df_sub, ref=None):
    if ref is None:
        ref = df_sub
    # In case if column headers of reference is not the same to df
    if isinstance(df_sub, pd.DataFrame):
        df_sub.columns = df.columns
        ref.columns = df.columns
    df_err_val = df.sub(df_sub, axis="index")
    df_err_rel = df_err_val.div(ref, axis="index") * 100
    df_z = df - df.mean()
    ref_z = df_sub - df_sub.mean()
    df_err = {
        "MaxE/A^3": df_err_val.abs().max(),
        "MAD/A^3": df_err_val.abs().mean(),
        "RMSD/A^3": np.sqrt((df_err_val**2).mean()),
        "RelMaxE/%": df_err_rel.abs().max(),
        "RelMAD/%": df_err_rel.abs().mean(),
        "RelRMSD/%": np.sqrt((df_err_rel**2).mean()),
    }
    # In case df is pd.Series instead of pd.DataFrame 
    try:
        return pd.DataFrame(df_err).T
    except ValueError:
        return pd.Series(df_err)

## Table of Small14 (Isotropic)

In [3]:
df_iso_data = pd.read_csv("small14-CCSDt-isotropic.csv", header=[0], index_col=[0, 1])

In [4]:
basis_list = [
    "aug-cc-pVDZ", "aug-cc-pVTZ", "aug-cc-pVQZ", "aug-cc-pV5Z",
    "aug-cc-pCVDZ", "aug-cc-pCVTZ", "aug-cc-pCVQZ", "aug-cc-pCV5Z",
    "aug-cc-pV[DT]Z", "aug-cc-pV[TQ]Z", "aug-cc-pV[Q5]Z",
    "aug-cc-pCV[DT]Z", "aug-cc-pCV[TQ]Z", "aug-cc-pCV[Q5]Z",
]
mol_list = ['Cl2', 'CO', 'CO2', 'H2O', 'N2', 'NH3', 'O2', 'PH3', 'SH2', 'SiH4', 'SO-trip', 'SO2', 'FCN', 'HCHS']

In [5]:
contrib_list = ["SCF", "Corr2", "CorrD", "CorrD(T)"]
contrib_ref_list = ["SCF", "Corr2", "CorrD", "CorrD(T)", "MP2", "CCSD", "CCSD(T)"]

## Generate Data for Validation

In [6]:
df_iso = pd.DataFrame(index=pd.MultiIndex.from_product([basis_list, mol_list]), columns=contrib_list)

In [7]:
df_iso["SCF"] = df_iso_data["SCF"]
df_iso["Corr2"] = df_iso_data["MP2"] - df_iso_data["SCF"]
df_iso["CorrD"] = df_iso_data["CCSD"] - df_iso_data["MP2"]
df_iso["CorrD(T)"] = df_iso_data["CCSD(T)"] - df_iso_data["MP2"]

In [8]:
df_iso.loc["aug-cc-pV[DT]Z"] = np.asarray(( 27 * df_iso.loc["aug-cc-pVTZ"] -  8 * df_iso.loc["aug-cc-pVDZ"]) / 19)
df_iso.loc["aug-cc-pV[TQ]Z"] = np.asarray(( 64 * df_iso.loc["aug-cc-pVQZ"] - 27 * df_iso.loc["aug-cc-pVTZ"]) / 37)
df_iso.loc["aug-cc-pV[Q5]Z"] = np.asarray((125 * df_iso.loc["aug-cc-pV5Z"] - 64 * df_iso.loc["aug-cc-pVQZ"]) / 61)

In [9]:
df_iso.loc["aug-cc-pCV[DT]Z"] = np.asarray(( 27 * df_iso.loc["aug-cc-pCVTZ"] -  8 * df_iso.loc["aug-cc-pCVDZ"]) / 19)
df_iso.loc["aug-cc-pCV[TQ]Z"] = np.asarray(( 64 * df_iso.loc["aug-cc-pCVQZ"] - 27 * df_iso.loc["aug-cc-pCVTZ"]) / 37)
df_iso.loc["aug-cc-pCV[Q5]Z"] = np.asarray((125 * df_iso.loc["aug-cc-pCV5Z"] - 64 * df_iso.loc["aug-cc-pCVQZ"]) / 61)

In [10]:
df_iso.loc[["aug-cc-pV[DT]Z", "aug-cc-pV[TQ]Z", "aug-cc-pV[Q5]Z", "aug-cc-pCV[DT]Z", "aug-cc-pCV[TQ]Z", "aug-cc-pCV[Q5]Z"], "SCF"] = np.nan

In [11]:
df_iso_ref = pd.DataFrame(index=mol_list, columns=contrib_ref_list)
df_iso_ref["SCF"] = df_iso.loc["aug-cc-pCV5Z", "SCF"]
df_iso_ref[["Corr2", "CorrD", "CorrD(T)"]] = df_iso.loc["aug-cc-pCV[Q5]Z", ["Corr2", "CorrD", "CorrD(T)"]]
df_iso_ref["MP2"] = df_iso_ref["SCF"] + df_iso_ref["Corr2"]
df_iso_ref["CCSD"] = df_iso_ref["MP2"] + df_iso_ref["CorrD"]
df_iso_ref["CCSD(T)"] = df_iso_ref["MP2"] + df_iso_ref["CorrD(T)"]

In [12]:
df_iso.to_csv("small14-isotropic.csv")

## Validation Results (Table 2 in main text)

In [13]:
df_iso_valid = pd.DataFrame(index=basis_list, columns=contrib_list)
for basis in basis_list:
    df_iso_valid.loc[basis] = get_df_err(df_iso.loc[basis], df_iso_ref[contrib_list], df_iso_ref["CCSD(T)"]).loc["RelRMSD/%"]

In [14]:
df_iso_valid.loc[[
    "aug-cc-pVDZ", "aug-cc-pCVDZ",
    "aug-cc-pVTZ", "aug-cc-pCVTZ",
    "aug-cc-pV[DT]Z", "aug-cc-pCV[DT]Z",
    "aug-cc-pVQZ", "aug-cc-pCVQZ",
    "aug-cc-pV[TQ]Z",  "aug-cc-pCV[TQ]Z",
    "aug-cc-pV5Z", "aug-cc-pCV5Z",
    "aug-cc-pV[Q5]Z", "aug-cc-pCV[Q5]Z",
]].style.format("{:.3f}")

Unnamed: 0,SCF,Corr2,CorrD,CorrD(T)
aug-cc-pVDZ,3.385,0.853,1.113,0.787
aug-cc-pCVDZ,3.376,0.799,1.053,0.748
aug-cc-pVTZ,0.703,0.516,0.406,0.297
aug-cc-pCVTZ,0.671,0.385,0.339,0.27
aug-cc-pV[DT]Z,,0.535,0.155,0.144
aug-cc-pCV[DT]Z,,0.373,0.156,0.178
aug-cc-pVQZ,0.097,0.403,0.168,0.124
aug-cc-pCVQZ,0.084,0.244,0.12,0.101
aug-cc-pV[TQ]Z,,0.359,0.111,0.086
aug-cc-pCV[TQ]Z,,0.179,0.131,0.096


## Special Cases (footnotes of Table 2 in main text)

aCV[DT]Z: excluding O2, SO

In [15]:
mol_list_exclude_sp = ['Cl2', 'CO', 'CO2', 'H2O', 'N2', 'NH3', 'PH3', 'SH2', 'SiH4', 'SO2', 'FCN', 'HCHS']

In [16]:
get_df_err(
    df_iso.loc["aug-cc-pCV[DT]Z"].loc[mol_list_exclude_sp],
    df_iso_ref[contrib_list].loc[mol_list_exclude_sp],
    df_iso_ref["CCSD(T)"].loc[mol_list_exclude_sp]).loc["RelRMSD/%"]

SCF              NaN
Corr2       0.355010
CorrD       0.126222
CorrD(T)    0.135834
Name: RelRMSD/%, dtype: float64

aCV[TQ]Z: excluding Cl2

In [17]:
mol_list_exclude_cl2 = ['CO', 'CO2', 'H2O', 'N2', 'NH3', 'O2', 'PH3', 'SH2', 'SiH4', 'SO-trip', 'SO2', 'FCN', 'HCHS']

In [18]:
get_df_err(
    df_iso.loc["aug-cc-pCV[TQ]Z"].loc[mol_list_exclude_cl2],
    df_iso_ref[contrib_list].loc[mol_list_exclude_cl2],
    df_iso_ref["CCSD(T)"].loc[mol_list_exclude_cl2]).loc["RelRMSD/%"]

SCF              NaN
Corr2       0.145040
CorrD       0.088966
CorrD(T)    0.069069
Name: RelRMSD/%, dtype: float64