In [1]:
import numpy as np
import pandas as pd

In [2]:
def get_df_err(df, df_sub, ref=None):
    if ref is None:
        ref = df_sub
    # In case if column headers of reference is not the same to df
    if isinstance(df_sub, pd.DataFrame):
        df_sub.columns = df.columns
        ref.columns = df.columns
    df_err_val = df.sub(df_sub, axis="index")
    df_err_rel = df_err_val.div(ref, axis="index") * 100
    df_z = df - df.mean()
    ref_z = df_sub - df_sub.mean()
    df_err = {
        "MaxE/A^3": df_err_val.abs().max(),
        "MAD/A^3": df_err_val.abs().mean(),
        "RMSD/A^3": np.sqrt((df_err_val**2).mean()),
        "RelMaxE/%": df_err_rel.abs().max(),
        "RelMAD/%": df_err_rel.abs().mean(),
        "RelRMSD/%": np.sqrt((df_err_rel**2).mean()),
    }
    # In case df is pd.Series instead of pd.DataFrame 
    try:
        return pd.DataFrame(df_err).T
    except ValueError:
        return pd.Series(df_err)

## Table of Small14 (Anisotropic)

In [3]:
df_aniso_data = pd.read_csv("small14-CCSDt-anisotropic.csv", header=[0], index_col=[0, 1])

In [4]:
basis_list = [
    "aug-cc-pVDZ", "aug-cc-pVTZ", "aug-cc-pVQZ", "aug-cc-pV5Z",
    "aug-cc-pCVDZ", "aug-cc-pCVTZ", "aug-cc-pCVQZ", "aug-cc-pCV5Z",
    "aug-cc-pV[DT]Z", "aug-cc-pV[TQ]Z", "aug-cc-pV[Q5]Z",
    "aug-cc-pCV[DT]Z", "aug-cc-pCV[TQ]Z", "aug-cc-pCV[Q5]Z",
]
mol_list = ['Cl2', 'CO', 'CO2', 'H2O', 'N2', 'NH3', 'O2', 'PH3', 'SH2', 'SiH4', 'SO-trip', 'SO2', 'FCN', 'HCHS']

In [5]:
contrib_list = ["SCF", "Corr2", "CorrD", "CorrD(T)"]
contrib_ref_list = ["SCF", "Corr2", "CorrD", "CorrD(T)", "MP2", "CCSD", "CCSD(T)"]

## Generate Data for Validation

In [6]:
df_aniso = pd.DataFrame(index=pd.MultiIndex.from_product([basis_list, mol_list]), columns=contrib_list)

In [7]:
df_aniso["SCF"] = df_aniso_data["SCF"]
df_aniso["Corr2"] = df_aniso_data["MP2"] - df_aniso_data["SCF"]
df_aniso["CorrD"] = df_aniso_data["CCSD"] - df_aniso_data["MP2"]
df_aniso["CorrD(T)"] = df_aniso_data["CCSD(T)"] - df_aniso_data["MP2"]

In [8]:
df_aniso.loc["aug-cc-pV[DT]Z"] = np.asarray(( 27 * df_aniso.loc["aug-cc-pVTZ"] -  8 * df_aniso.loc["aug-cc-pVDZ"]) / 19)
df_aniso.loc["aug-cc-pV[TQ]Z"] = np.asarray(( 64 * df_aniso.loc["aug-cc-pVQZ"] - 27 * df_aniso.loc["aug-cc-pVTZ"]) / 37)
df_aniso.loc["aug-cc-pV[Q5]Z"] = np.asarray((125 * df_aniso.loc["aug-cc-pV5Z"] - 64 * df_aniso.loc["aug-cc-pVQZ"]) / 61)

In [9]:
df_aniso.loc["aug-cc-pCV[DT]Z"] = np.asarray(( 27 * df_aniso.loc["aug-cc-pCVTZ"] -  8 * df_aniso.loc["aug-cc-pCVDZ"]) / 19)
df_aniso.loc["aug-cc-pCV[TQ]Z"] = np.asarray(( 64 * df_aniso.loc["aug-cc-pCVQZ"] - 27 * df_aniso.loc["aug-cc-pCVTZ"]) / 37)
df_aniso.loc["aug-cc-pCV[Q5]Z"] = np.asarray((125 * df_aniso.loc["aug-cc-pCV5Z"] - 64 * df_aniso.loc["aug-cc-pCVQZ"]) / 61)

In [10]:
df_aniso.loc[["aug-cc-pV[DT]Z", "aug-cc-pV[TQ]Z", "aug-cc-pV[Q5]Z", "aug-cc-pCV[DT]Z", "aug-cc-pCV[TQ]Z", "aug-cc-pCV[Q5]Z"], "SCF"] = np.nan

In [11]:
df_aniso_ref = pd.DataFrame(index=mol_list, columns=contrib_ref_list)
df_aniso_ref["SCF"] = df_aniso.loc["aug-cc-pCV5Z", "SCF"]
df_aniso_ref[["Corr2", "CorrD", "CorrD(T)"]] = df_aniso.loc["aug-cc-pCV[Q5]Z", ["Corr2", "CorrD", "CorrD(T)"]]
df_aniso_ref["MP2"] = df_aniso_ref["SCF"] + df_aniso_ref["Corr2"]
df_aniso_ref["CCSD"] = df_aniso_ref["MP2"] + df_aniso_ref["CorrD"]
df_aniso_ref["CCSD(T)"] = df_aniso_ref["MP2"] + df_aniso_ref["CorrD(T)"]

In [12]:
df_aniso.to_csv("small14-anisotropic.csv")

## Validation Results (Table 3 in main text)

In [13]:
# mask = list(df_aniso_ref["CCSD(T)"].index[df_aniso_ref["CCSD(T)"] > 0.5])

In [14]:
mask = ['Cl2', 'CO', 'CO2', 'N2', 'SO2', 'FCN', 'HCHS']

In [15]:
df_aniso_valid = pd.DataFrame(index=basis_list, columns=contrib_list)
for basis in basis_list:
    df_aniso_valid.loc[basis] = get_df_err(
        df_aniso.loc[basis].loc[mask],
        df_aniso_ref[contrib_list].loc[mask],
        df_aniso_ref["CCSD(T)"].loc[mask]).loc["RelRMSD/%"]

In [16]:
df_aniso_valid.loc[[
    "aug-cc-pVDZ", "aug-cc-pCVDZ",
    "aug-cc-pVTZ", "aug-cc-pCVTZ",
    "aug-cc-pV[DT]Z", "aug-cc-pCV[DT]Z",
    "aug-cc-pVQZ", "aug-cc-pCVQZ",
    "aug-cc-pV[TQ]Z",  "aug-cc-pCV[TQ]Z",
    "aug-cc-pV5Z", "aug-cc-pCV5Z",
    "aug-cc-pV[Q5]Z", "aug-cc-pCV[Q5]Z",
]].style.format("{:.3f}")

Unnamed: 0,SCF,Corr2,CorrD,CorrD(T)
aug-cc-pVDZ,5.825,1.161,1.515,0.864
aug-cc-pCVDZ,5.939,1.137,1.32,0.842
aug-cc-pVTZ,1.632,0.468,0.536,0.328
aug-cc-pCVTZ,1.127,0.42,0.395,0.313
aug-cc-pV[DT]Z,,0.603,0.2,0.177
aug-cc-pCV[DT]Z,,0.408,0.173,0.177
aug-cc-pVQZ,0.588,0.39,0.276,0.186
aug-cc-pCVQZ,0.203,0.251,0.122,0.101
aug-cc-pV[TQ]Z,,0.417,0.246,0.23
aug-cc-pCV[TQ]Z,,0.229,0.168,0.131


## Special Cases (footnotes of Table 3 in main text)

In [17]:
mask = ['CO', 'CO2', 'N2', 'SO2', 'FCN']

In [18]:
t = pd.DataFrame(index=basis_list, columns=contrib_list)
for basis in basis_list:
    t.loc[basis] = get_df_err(
        df_aniso.loc[basis].loc[mask],
        df_aniso_ref[contrib_list].loc[mask],
        df_aniso_ref["CCSD(T)"].loc[mask]).loc["RelRMSD/%"]

In [19]:
t.loc[[
    # "aug-cc-pVDZ", "aug-cc-pCVDZ",
    # "aug-cc-pVTZ", "aug-cc-pCVTZ",
    "aug-cc-pV[DT]Z", "aug-cc-pCV[DT]Z",
    # "aug-cc-pVQZ", "aug-cc-pCVQZ",
    "aug-cc-pV[TQ]Z",  "aug-cc-pCV[TQ]Z",
    # "aug-cc-pV5Z", "aug-cc-pCV5Z",
    # "aug-cc-pV[Q5]Z", "aug-cc-pCV[Q5]Z",
]].style.format("{:.3f}")

Unnamed: 0,SCF,Corr2,CorrD,CorrD(T)
aug-cc-pV[DT]Z,,0.372,0.23,0.201
aug-cc-pCV[DT]Z,,0.41,0.139,0.099
aug-cc-pV[TQ]Z,,0.458,0.195,0.134
aug-cc-pCV[TQ]Z,,0.264,0.056,0.075


## Compare RMSD results (Table S7 in Supporting Information)

In [20]:
mask1 = ['Cl2', 'CO', 'CO2', 'N2', 'SO2', 'FCN', 'HCHS']
mask2 = ['H2O', 'NH3', 'PH3', 'SH2', 'SiH4']
t = pd.DataFrame(index=basis_list, columns=pd.MultiIndex.from_product([["large aniso", "small aniso"], contrib_list]))
for basis in basis_list:
    t.at[basis, "large aniso"] = np.asarray(get_df_err(
        df_aniso.loc[basis].loc[mask1],
        df_aniso_ref[contrib_list].loc[mask1],
        df_aniso_ref["CCSD(T)"].loc[mask1]).loc["RMSD/A^3"])
    t.at[basis, "small aniso"] = np.asarray(get_df_err(
        df_aniso.loc[basis].loc[mask2],
        df_aniso_ref[contrib_list].loc[mask2],
        df_aniso_ref["CCSD(T)"].loc[mask2]).loc["RMSD/A^3"])

In [21]:
t.loc[[
    "aug-cc-pVDZ", "aug-cc-pCVDZ",
    "aug-cc-pVTZ", "aug-cc-pCVTZ",
    "aug-cc-pV[DT]Z", "aug-cc-pCV[DT]Z",
    "aug-cc-pVQZ", "aug-cc-pCVQZ",
    "aug-cc-pV[TQ]Z",  "aug-cc-pCV[TQ]Z",
    "aug-cc-pV5Z", "aug-cc-pCV5Z",
    "aug-cc-pV[Q5]Z", "aug-cc-pCV[Q5]Z",
]].style.format("{:.4f}")

Unnamed: 0_level_0,large aniso,large aniso,large aniso,large aniso,small aniso,small aniso,small aniso,small aniso
Unnamed: 0_level_1,SCF,Corr2,CorrD,CorrD(T),SCF,Corr2,CorrD,CorrD(T)
aug-cc-pVDZ,0.101,0.0177,0.0284,0.0142,0.0347,0.0175,0.0218,0.0164
aug-cc-pCVDZ,0.0997,0.0162,0.0246,0.0134,0.0339,0.0176,0.0202,0.0158
aug-cc-pVTZ,0.0282,0.0101,0.0105,0.0057,0.0117,0.016,0.0079,0.006
aug-cc-pCVTZ,0.0178,0.0061,0.0082,0.006,0.011,0.0139,0.0072,0.0059
aug-cc-pV[DT]Z,,0.013,0.0036,0.0032,,0.02,0.0044,0.0038
aug-cc-pCV[DT]Z,,0.0077,0.0037,0.0041,,0.0168,0.0052,0.0046
aug-cc-pVQZ,0.0112,0.008,0.0053,0.0037,0.003,0.0125,0.0036,0.0022
aug-cc-pCVQZ,0.0041,0.003,0.0021,0.0018,0.0038,0.0087,0.0036,0.0023
aug-cc-pV[TQ]Z,,0.0076,0.0054,0.0053,,0.0103,0.0032,0.0024
aug-cc-pCV[TQ]Z,,0.0023,0.004,0.0028,,0.0051,0.0035,0.0025
