# Process of Reference Value of HR46

In [1]:
import pandas as pd
import numpy as np

In [2]:
def get_df_err(df, df_sub, ref=None):
    if ref is None:
        ref = df_sub
    # In case if column headers of reference is not the same to df
    if isinstance(df_sub, pd.DataFrame):
        df_sub.columns = df.columns
        ref.columns = df.columns
    df_err_val = df.sub(df_sub, axis="index")
    df_err_rel = df_err_val.div(ref, axis="index") * 100
    df_z = df - df.mean()
    ref_z = df_sub - df_sub.mean()
    df_err = {
        "MaxE/A^3": df_err_val.abs().max(),
        "MAD/A^3": df_err_val.abs().mean(),
        "RMSD/A^3": np.sqrt((df_err_val**2).mean()),
        "RelMaxE/%": df_err_rel.abs().max(),
        "RelMAD/%": df_err_rel.abs().mean(),
        "RelRMSD/%": np.sqrt((df_err_rel**2).mean()),
    }
    # In case df is pd.Series instead of pd.DataFrame 
    try:
        return pd.DataFrame(df_err).T
    except ValueError:
        return pd.Series(df_err)

## Read Data

In [3]:
df_cc_iso = pd.read_csv("T144-CCSDt-isotropic.csv", header=[0], index_col=[0, 1])
df_cc_aniso = pd.read_csv("T144-CCSDt-anisotropic.csv", header=[0], index_col=[0, 1])

In [4]:
df_mp_iso = pd.read_csv("T144-RIMP2-isotropic.csv", header=[0], index_col=[0, 1])
df_mp_aniso = pd.read_csv("T144-RIMP2-anisotropic.csv", header=[0], index_col=[0, 1])

In [5]:
mol_list = list(df_cc_iso.loc["aug-cc-pVDZ"].index)

In [6]:
mol_set1 = ['0097', '0654', '0142', '0103', '0757', '0399', '0393', '0580', '0158', '0599', '1193', '0769', '1032', '1475', '0119', '0886', '0351', '0470', '0213', '0093', '0762', '1197', '0655', '0353', '0818', '1207', '1241', '0223', '1194', '1046', '0909', '0205', '0670', '0767']
mol_set2 = ['0529', '0533', '1047', '1048', '1049', '1563', '1564', '0556', '1074', '1588', '0056', '1085', '1603', '1604', '1607', '1610', '1617', '1619', '1108', '1109', '1622', '1625', '1626', '1115', '1117', '1119', '1630', '1633', '1634', '0617', '1135', '0112', '0117', '1149', '0129', '0159', '1209', '0710', '0722', '1250', '1251', '1256', '0753', '0245', '0761', '0765', '0256', '0257', '0768', '0267', '0276', '0281', '0282', '0287', '1314', '1315', '0306', '0820', '1332', '0318', '1344', '0833', '0841', '0843', '0332', '0846', '0847', '0336', '1361', '0852', '0342', '1367', '0346', '1378', '1382', '0361', '1391', '1395', '0884', '1396', '0890', '0381', '1406', '1408', '1409', '0390', '0397', '1428', '1429', '1438', '1444', '0936', '0425', '1455', '1457', '0438', '0445', '1479', '0969', '0460', '0974', '0976', '0987', '0989', '1502', '0998', '0489', '1003', '1014', '1019']

mol_set1 = [int(i) for i in mol_set1]
mol_set2 = [int(i) for i in mol_set2]

## Correlation Data

In [7]:
df_corr2_iso = df_mp_iso["RI-MP2"] - df_mp_iso["RI-JK"]
df_corr2_aniso = df_mp_aniso["RI-MP2"] - df_mp_aniso["RI-JK"]

In [8]:
df_corrd_iso = df_cc_iso["CCSD"] - df_cc_iso["MP2"]
df_corrd_aniso = df_cc_aniso["CCSD"] - df_cc_aniso["MP2"]

In [9]:
df_corrdt_iso = df_cc_iso["CCSD(T)"] - df_cc_iso["MP2"]
df_corrdt_aniso = df_cc_aniso["CCSD(T)"] - df_cc_aniso["MP2"]

## Isotropic Reference (Table S3)

In [10]:
df_ref_iso = pd.DataFrame(index=mol_list, columns=["SCF", "Corr2", "CorrD", "CorrD(T)", "MP2", "CCSD", "CCSD(T)"], dtype=float)
df_ref_iso.loc[:, "SCF"] = df_mp_iso.loc[("aug-cc-pCV5Z"), "RI-JK"]
df_ref_iso.loc[:, "Corr2"] = (125 * df_corr2_iso.loc["aug-cc-pCV5Z"] - 64 * df_corr2_iso.loc["aug-cc-pCVQZ"]) / 61

In [11]:
df_ref_iso.loc[mol_set1, "CorrD"] = (64 * df_corrd_iso.loc["aug-cc-pVQZ"].loc[mol_set1] - 27 * df_corrd_iso.loc["aug-cc-pVTZ"].loc[mol_set1]) / 37
df_ref_iso.loc[mol_set2, "CorrD"] = (27 * df_corrd_iso.loc["aug-cc-pVTZ"].loc[mol_set2] - 8 * df_corrd_iso.loc["aug-cc-pVDZ"].loc[mol_set2]) / 19

In [12]:
df_ref_iso.loc[mol_set1, "CorrD(T)"] = (64 * df_corrdt_iso.loc["aug-cc-pVQZ"].loc[mol_set1] - 27 * df_corrdt_iso.loc["aug-cc-pVTZ"].loc[mol_set1]) / 37
df_ref_iso.loc[mol_set2, "CorrD(T)"] = (27 * df_corrdt_iso.loc["aug-cc-pVTZ"].loc[mol_set2] - 8 * df_corrdt_iso.loc["aug-cc-pVDZ"].loc[mol_set2]) / 19

In [13]:
df_ref_iso.loc[:, "MP2"] = df_ref_iso["SCF"] + df_ref_iso["Corr2"]
df_ref_iso.loc[:, "CCSD"] = df_ref_iso["MP2"] + df_ref_iso["CorrD"]
df_ref_iso.loc[:, "CCSD(T)"] = df_ref_iso["MP2"] + df_ref_iso["CorrD(T)"]

In [14]:
df_ref_iso.to_csv("T144-ref-iso.csv")

## Anisotropic Reference (Table S4)

In [15]:
df_ref_aniso = pd.DataFrame(index=mol_list, columns=["SCF", "Corr2", "CorrD", "CorrD(T)", "MP2", "CCSD", "CCSD(T)"], dtype=float)
df_ref_aniso.loc[:, "SCF"] = df_mp_aniso.loc[("aug-cc-pCV5Z"), "RI-JK"]
df_ref_aniso.loc[:, "Corr2"] = (125 * df_corr2_aniso.loc["aug-cc-pCV5Z"] - 64 * df_corr2_aniso.loc["aug-cc-pCVQZ"]) / 61

In [16]:
df_ref_aniso.loc[mol_set1, "CorrD"] = (64 * df_corrd_aniso.loc["aug-cc-pVQZ"].loc[mol_set1] - 27 * df_corrd_aniso.loc["aug-cc-pVTZ"].loc[mol_set1]) / 37
df_ref_aniso.loc[mol_set2, "CorrD"] = (27 * df_corrd_aniso.loc["aug-cc-pVTZ"].loc[mol_set2] - 8 * df_corrd_aniso.loc["aug-cc-pVDZ"].loc[mol_set2]) / 19

In [17]:
df_ref_aniso.loc[mol_set1, "CorrD(T)"] = (64 * df_corrdt_aniso.loc["aug-cc-pVQZ"].loc[mol_set1] - 27 * df_corrdt_aniso.loc["aug-cc-pVTZ"].loc[mol_set1]) / 37
df_ref_aniso.loc[mol_set2, "CorrD(T)"] = (27 * df_corrdt_aniso.loc["aug-cc-pVTZ"].loc[mol_set2] - 8 * df_corrdt_aniso.loc["aug-cc-pVDZ"].loc[mol_set2]) / 19

In [18]:
df_ref_aniso.loc[:, "MP2"] = df_ref_aniso["SCF"] + df_ref_aniso["Corr2"]
df_ref_aniso.loc[:, "CCSD"] = df_ref_aniso["MP2"] + df_ref_aniso["CorrD"]
df_ref_aniso.loc[:, "CCSD(T)"] = df_ref_aniso["MP2"] + df_ref_aniso["CorrD(T)"]

In [19]:
df_ref_aniso.to_csv("T144-ref-aniso.csv")

## Benchmark

### Error between RI-JK/RI-Corr2 and numerical SCF/Corr2 (sec 3.1)

In [26]:
get_df_err(
    df_mp_iso.loc["aug-cc-pVTZ"]["RI-JK"],
    df_cc_iso.loc["aug-cc-pVTZ"]["SCF"])["RelRMSD/%"]

0.021631569473748294

In [27]:
get_df_err(
    df_mp_iso.loc["aug-cc-pVTZ"]["RI-MP2"] - df_mp_iso.loc["aug-cc-pVTZ"]["RI-JK"],
    df_cc_iso.loc["aug-cc-pVTZ"]["MP2"] - df_cc_iso.loc["aug-cc-pVTZ"]["SCF"],
    df_cc_iso.loc["aug-cc-pVTZ"]["MP2"]
)["RelRMSD/%"]

0.00849995104528952

### Isotropic (Table 7)

In [20]:
get_df_err(df_ref_iso[["SCF", "MP2", "CCSD"]], df_ref_iso["CCSD(T)"])

Unnamed: 0,SCF,MP2,CCSD
MaxE/A^3,1.152647,0.272554,0.359205
MAD/A^3,0.332569,0.066439,0.141797
RMSD/A^3,0.401391,0.080643,0.158144
RelMaxE/%,9.344948,2.089501,2.757119
RelMAD/%,3.569487,0.702777,1.497503
RelRMSD/%,4.12604,0.812513,1.567345


### Anisotropic (Table 8)

In [21]:
mask = df_ref_aniso.index[df_ref_aniso["CCSD(T)"] > 0.5]

In [22]:
get_df_err(df_ref_aniso[["SCF", "MP2", "CCSD"]].loc[mask], df_ref_aniso["CCSD(T)"].loc[mask])

Unnamed: 0,SCF,MP2,CCSD
MaxE/A^3,2.181284,0.617696,0.815532
MAD/A^3,0.384829,0.064261,0.128288
RMSD/A^3,0.592648,0.103108,0.193587
RelMaxE/%,21.575682,6.294857,4.837798
RelMAD/%,6.525448,1.20213,1.983073
RelRMSD/%,8.482593,1.625093,2.312709


## Set Err

In [24]:
index_seterr_list = [
    "Set I, aV[DT]Z, aV[TQ]Z",
]

### Isotropic (Table 5 and Table S5)

In [25]:
df_iso_seterr = pd.DataFrame(index=index_seterr_list, columns=["CorrD", "CorrD(T)"])

In [26]:
df_iso_seterr.loc["Set I, aV[DT]Z, aV[TQ]Z", "CorrD"] = get_df_err(
    ( 27 * df_corrd_iso.loc["aug-cc-pVTZ"].loc[mol_set1] -  8 * df_corrd_iso.loc["aug-cc-pVDZ"].loc[mol_set1]) / 19,
    ( 64 * df_corrd_iso.loc["aug-cc-pVQZ"].loc[mol_set1] - 27 * df_corrd_iso.loc["aug-cc-pVTZ"].loc[mol_set1]) / 37,
    df_ref_iso.loc[mol_set1, "CCSD(T)"]
).loc["RelRMSD/%"]

In [27]:
df_iso_seterr.loc["Set I, aV[DT]Z, aV[TQ]Z", "CorrD(T)"] = get_df_err(
    ( 27 * df_corrdt_iso.loc["aug-cc-pVTZ"].loc[mol_set1] -  8 * df_corrdt_iso.loc["aug-cc-pVDZ"].loc[mol_set1]) / 19,
    ( 64 * df_corrdt_iso.loc["aug-cc-pVQZ"].loc[mol_set1] - 27 * df_corrdt_iso.loc["aug-cc-pVTZ"].loc[mol_set1]) / 37,
    df_ref_iso.loc[mol_set1, "CCSD(T)"]
).loc["RelRMSD/%"]

In [28]:
df_iso_seterr

Unnamed: 0,CorrD,CorrD(T)
"Set I, aV[DT]Z, aV[TQ]Z",0.140338,0.127455


### Anisotropic (Table 6 and Table S6)

In [29]:
df_aniso_seterr = pd.DataFrame(index=index_seterr_list, columns=["CorrD", "CorrD(T)"])

In [30]:
mol_set1_aniso = list(df_ref_aniso.loc[mol_set1, "CCSD(T)"].index[df_ref_aniso.loc[mol_set1, "CCSD(T)"] > 0.5])
print(mol_set1_aniso)

[97, 654, 142, 103, 757, 399, 393, 580, 158, 599, 1193, 769, 1032, 1475, 119, 886, 470, 213, 93, 762, 1197, 655, 818, 1207, 1241, 223, 1194, 1046, 909, 205, 670, 767]


In [31]:
len(mol_set1_aniso)

32

In [32]:
df_aniso_seterr.loc["Set I, aV[DT]Z, aV[TQ]Z", "CorrD"] = get_df_err(
    ( 27 * df_corrd_aniso.loc["aug-cc-pVTZ"].loc[mol_set1_aniso] -  8 * df_corrd_aniso.loc["aug-cc-pVDZ"].loc[mol_set1_aniso]) / 19,
    ( 64 * df_corrd_aniso.loc["aug-cc-pVQZ"].loc[mol_set1_aniso] - 27 * df_corrd_aniso.loc["aug-cc-pVTZ"].loc[mol_set1_aniso]) / 37,
    df_ref_aniso.loc[mol_set1, "CCSD(T)"]
).loc["RelRMSD/%"]

In [33]:
df_aniso_seterr.loc["Set I, aV[DT]Z, aV[TQ]Z", "CorrD(T)"] = get_df_err(
    ( 27 * df_corrdt_aniso.loc["aug-cc-pVTZ"].loc[mol_set1_aniso] -  8 * df_corrdt_aniso.loc["aug-cc-pVDZ"].loc[mol_set1_aniso]) / 19,
    ( 64 * df_corrdt_aniso.loc["aug-cc-pVQZ"].loc[mol_set1_aniso] - 27 * df_corrdt_aniso.loc["aug-cc-pVTZ"].loc[mol_set1_aniso]) / 37,
    df_ref_aniso.loc[mol_set1, "CCSD(T)"]
).loc["RelRMSD/%"]

In [34]:
df_aniso_seterr

Unnamed: 0,CorrD,CorrD(T)
"Set I, aV[DT]Z, aV[TQ]Z",0.213987,0.204507
