In [3]:
import numpy as np
import pandas as pd
from util import (BOHR, read_mat, read_comp, get_iso, get_aniso, read_by_prompt, get_df_err, get_rmsre_3comp, get_relrmsd_3comp)
import itertools
import warnings

warnings.filterwarnings("ignore")
np.set_printoptions(8, suppress=True, linewidth=150)
pd.set_option('display.max_rows', None)
pd.set_option("display.precision", 3)
pd.set_option("float_format", '{:.3f}'.format)

In [4]:
import matplotlib.pyplot as plt
from matplotlib_inline.backend_inline import set_matplotlib_formats
%matplotlib inline

set_matplotlib_formats('svg')

In [5]:
def get_df_iso(df):
    xx, yy, zz = df["xx"], df["yy"], df["zz"]
    return 1 / 3 * (xx + yy + zz)

def get_df_aniso(df):
    xx, yy, zz, xy, yz, zx = df["xx"], df["yy"], df["zz"], df["xy"], df["yz"], df["zx"]
    return np.sqrt(0.5) * ((xx - yy)**2 + (yy - zz)**2 + (zz - xx)**2 + 6 * (xy**2 + yz**2 + zx**2))**0.5

## 读取基本数据

In [6]:
mol_hh101 = ['AlF', 'Ar', 'BF', 'BH2', 'BH2Cl', 'BH2F', 'BH3', 'BHF2', 'BeH', 'BeH2', 'C2H2', 'C2H4', 'CH2-t', 'CH2BH', 'CH2F', 'CH3', 'CH3BH2', 'CH3Cl', 'CH3F', 'CH3NH2', 'CH3OH', 'CH3SH', 'CH4', 'CO', 'CO2', 'CS', 'CSO', 'Cl2', 'ClCN', 'ClF', 'FCN', 'FCO', 'FH-OH', 'FNO', 'H', 'H2', 'H2CN', 'H2O', 'H2O-Li', 'HBO', 'HBS', 'HCCCl', 'HCCF', 'HCHO', 'HCHS', 'HCN', 'HCO', 'HCONH2', 'HCOOH', 'HCP', 'HCl', 'HF', 'HNC', 'HO2', 'HOCl', 'HOOH', 'He', 'Li', 'LiBH4', 'LiCN', 'LiCl', 'LiH', 'Mg', 'Mg2', 'N', 'N2', 'N2H2', 'N2H4', 'NH', 'NH2', 'NH2Cl', 'NH2F', 'NH2OH', 'NH3', 'NH3O', 'Na', 'NaCN', 'NaCl', 'NaH', 'Ne', 'OCl2', 'OF2', 'P', 'P2H4', 'PH', 'PH2', 'PH2OH', 'PH3', 'PH3O', 'S2', 'S2H2', 'SCl2', 'SF2', 'SH2', 'SO-trip', 'SO2', 'SiH3', 'SiH3Cl', 'SiH3F', 'SiH4', 'SiO']
mol_hh100 = mol_hh101.copy()
mol_hh100.remove("H")  # hydrogen removed

In [7]:
# B2PLYP convergence
df_conv = pd.read_csv("raw_data/conv_dh_B2PLYP.csv", index_col=[0, 1], header=[0, 1, 2])
# remove 1363 of T145
df_conv = df_conv.drop(("T145", "1363"))

In [8]:
# reference data
df_ref_hr46_iso = pd.read_csv("raw_data/HR46-ref-iso.csv", index_col=[0], header=[0])["CCSD(T)"]
df_ref_hr46_aniso = pd.read_csv("raw_data/HR46-ref-aniso.csv", index_col=[0], header=[0])["CCSD(T)"]
df_ref_t144_iso = pd.read_csv("raw_data/T144-ref-iso.csv", index_col=[0], header=[0])["CCSD(T)"]
df_ref_t144_aniso = pd.read_csv("raw_data/T144-ref-aniso.csv", index_col=[0], header=[0])["CCSD(T)"]
df_ref_hh100_comp = pd.read_csv("raw_data/HH132-hait_ref.csv", index_col=[0], header=[0, 1]).loc[mol_hh100]["CCSD(T)"]

In [9]:
mask_hh100_sp = pd.read_csv("raw_data/HH132-hait_ref.csv", index_col=[0], header=[0, 1]).loc[mol_hh100][("Spin Polarization", "Spin Polarization")] == "SP"
mask_hr46_aniso = df_ref_hr46_aniso > 0.5
mask_t144_aniso = df_ref_t144_aniso > 0.5

In [10]:
columns = pd.MultiIndex.from_tuples([t[:2]  for t in df_conv.columns if t[-1] == "xx"])
df_conv_iso = pd.DataFrame(index=df_conv.index, columns=columns)
df_conv_aniso = pd.DataFrame(index=df_conv.index, columns=columns)
for basis, atr in columns:
    df_conv_iso.loc[:, (basis, atr)] = get_df_iso(df_conv[(basis, atr)])
    df_conv_aniso.loc[:, (basis, atr)] = get_df_aniso(df_conv[(basis, atr)])
# contribution of anisotropic is not linear
for basis, atr in columns:
    if atr != "pt2": continue
    df_conv_aniso.loc[:, (basis, "pt2")] = df_conv_aniso.loc[:, (basis, "tot")] - df_conv_aniso.loc[:, (basis, "low_rung")]

## aCV[Q5]Z CBS 参考值

In [11]:
df_hh100_cbs = df_conv.loc["HH118", ("aCV5Z", "low_rung")] + 1 / 61 * (125 * df_conv.loc["HH118", ("aCV5Z", "pt2")] - 64 * df_conv.loc["HH118", ("aCVQZ", "pt2")])
df_hh100_cbs = df_hh100_cbs.loc[mol_hh100]

In [12]:
df_hr46_iso = df_conv_iso.loc["HR46"].copy()
df_t144_iso = df_conv_iso.loc["T145"].copy()
df_hr46_aniso = df_conv_aniso.loc["HR46"].copy()
df_t144_aniso = df_conv_aniso.loc["T145"].copy()

In [15]:
for cbs_scheme, cbs_1, cbs_2, coef_1, coef_2 in [
    # ("aCV[DT]Z", "aCVDZ", "aCVTZ", - 8 / 19,  27 / 19),
    ("aCV[TQ]Z", "aCVTZ", "aCVQZ", -27 / 37,  64 / 37),
    ("aCV[Q5]Z", "aCVQZ", "aCV5Z", -64 / 61, 125 / 61),
    # ("apc[12]" , "apc1" , "apc2" , - 8 / 19,  27 / 19),
    ("apc[23]" , "apc2" , "apc3" , -27 / 37,  64 / 37),
    ("apc[34]" , "apc3" , "apc4" , -64 / 61, 125 / 61),
]:
    for df in [df_hr46_iso, df_t144_iso, df_hr46_aniso, df_t144_aniso]:
        df.loc[:, (cbs_scheme, "low_rung")] = df.loc[:, (cbs_2, "low_rung")]
        df.loc[:, (cbs_scheme, "pt2")] = coef_1 * df.loc[:, (cbs_1, "pt2")] + coef_2 * df.loc[:, (cbs_2, "pt2")]
        df.loc[:, (cbs_scheme, "tot")] = df.loc[:, (cbs_scheme, "low_rung")] + df.loc[:, (cbs_scheme, "pt2")]

In [18]:
get_df_err(df_hr46_iso[("aCV[TQ]Z", "pt2")], df_hr46_iso[("aCV[Q5]Z", "pt2")], df_hr46_iso[("aCV[TQ]Z", "tot")])

MaxE/A^3    0.017
MAD/A^3     0.002
RMSD/A^3    0.003
RelMaxE/%   0.254
RelMAD/%    0.031
RelRMSD/%   0.057
dtype: float64

In [20]:
get_df_err(df_hr46_iso[("aCV5Z", "pt2")], df_hr46_iso[("aCV[Q5]Z", "pt2")], df_hr46_iso[("aCV[TQ]Z", "tot")])

MaxE/A^3    0.008
MAD/A^3     0.002
RMSD/A^3    0.002
RelMaxE/%   0.123
RelMAD/%    0.031
RelRMSD/%   0.036
dtype: float64

In [29]:
get_df_err(df_hr46_iso[("apc2", "low_rung")], df_hr46_iso[("aCV[Q5]Z", "low_rung")], df_hr46_iso[("aCV[TQ]Z", "tot")])

MaxE/A^3    0.027
MAD/A^3     0.011
RMSD/A^3    0.013
RelMaxE/%   0.604
RelMAD/%    0.225
RelRMSD/%   0.262
dtype: float64

In [30]:
get_df_err(df_t144_iso[("apc2", "low_rung")], df_t144_iso[("aCV[Q5]Z", "low_rung")], df_t144_iso[("aCV[TQ]Z", "tot")])

MaxE/A^3    0.071
MAD/A^3     0.020
RMSD/A^3    0.023
RelMaxE/%   0.505
RelMAD/%    0.208
RelRMSD/%   0.222
dtype: float64

In [31]:
get_df_err(df_hr46_iso[("apc2", "pt2")], df_hr46_iso[("aCV[Q5]Z", "pt2")], df_hr46_iso[("aCV[TQ]Z", "tot")])

MaxE/A^3    0.024
MAD/A^3     0.006
RMSD/A^3    0.008
RelMaxE/%   0.388
RelMAD/%    0.112
RelRMSD/%   0.144
dtype: float64