In [1]:
import numpy as np
import pandas as pd
from util import (BOHR, read_mat, read_comp, get_iso, get_aniso, read_by_prompt, get_df_err, get_rmsre_3comp, get_relrmsd_3comp)
import itertools
import warnings

warnings.filterwarnings("ignore")
np.set_printoptions(8, suppress=True, linewidth=150)
pd.set_option('display.max_rows', None)
pd.set_option("display.precision", 3)
pd.set_option("float_format", '{:.3f}'.format)

In [2]:
import matplotlib.pyplot as plt
from matplotlib_inline.backend_inline import set_matplotlib_formats
%matplotlib inline

set_matplotlib_formats('svg')

In [3]:
def get_df_iso(df):
    xx, yy, zz = df["xx"], df["yy"], df["zz"]
    return 1 / 3 * (xx + yy + zz)

def get_df_aniso(df):
    xx, yy, zz, xy, yz, zx = df["xx"], df["yy"], df["zz"], df["xy"], df["yz"], df["zx"]
    return np.sqrt(0.5) * ((xx - yy)**2 + (yy - zz)**2 + (zz - xx)**2 + 6 * (xy**2 + yz**2 + zx**2))**0.5

## 读取基本数据

In [4]:
mol_hh101 = ['AlF', 'Ar', 'BF', 'BH2', 'BH2Cl', 'BH2F', 'BH3', 'BHF2', 'BeH', 'BeH2', 'C2H2', 'C2H4', 'CH2-t', 'CH2BH', 'CH2F', 'CH3', 'CH3BH2', 'CH3Cl', 'CH3F', 'CH3NH2', 'CH3OH', 'CH3SH', 'CH4', 'CO', 'CO2', 'CS', 'CSO', 'Cl2', 'ClCN', 'ClF', 'FCN', 'FCO', 'FH-OH', 'FNO', 'H', 'H2', 'H2CN', 'H2O', 'H2O-Li', 'HBO', 'HBS', 'HCCCl', 'HCCF', 'HCHO', 'HCHS', 'HCN', 'HCO', 'HCONH2', 'HCOOH', 'HCP', 'HCl', 'HF', 'HNC', 'HO2', 'HOCl', 'HOOH', 'He', 'Li', 'LiBH4', 'LiCN', 'LiCl', 'LiH', 'Mg', 'Mg2', 'N', 'N2', 'N2H2', 'N2H4', 'NH', 'NH2', 'NH2Cl', 'NH2F', 'NH2OH', 'NH3', 'NH3O', 'Na', 'NaCN', 'NaCl', 'NaH', 'Ne', 'OCl2', 'OF2', 'P', 'P2H4', 'PH', 'PH2', 'PH2OH', 'PH3', 'PH3O', 'S2', 'S2H2', 'SCl2', 'SF2', 'SH2', 'SO-trip', 'SO2', 'SiH3', 'SiH3Cl', 'SiH3F', 'SiH4', 'SiO']
mol_hh100 = mol_hh101.copy()
mol_hh100.remove("H")  # hydrogen removed

In [5]:
# B2PLYP convergence
df_conv = pd.read_csv("raw_data/conv_dh_B2PLYP.csv", index_col=[0, 1], header=[0, 1, 2])
# remove 1363 of T145
df_conv = df_conv.drop(("T145", "1363"))

In [6]:
# reference data
df_ref_hr46_iso = pd.read_csv("raw_data/HR46-ref-iso.csv", index_col=[0], header=[0])["CCSD(T)"]
df_ref_hr46_aniso = pd.read_csv("raw_data/HR46-ref-aniso.csv", index_col=[0], header=[0])["CCSD(T)"]
df_ref_t144_iso = pd.read_csv("raw_data/T144-ref-iso.csv", index_col=[0], header=[0])["CCSD(T)"]
df_ref_t144_aniso = pd.read_csv("raw_data/T144-ref-aniso.csv", index_col=[0], header=[0])["CCSD(T)"]
df_ref_t144_iso.index = df_conv.loc["T145"].index
df_ref_t144_aniso.index = df_conv.loc["T145"].index
df_ref_hh100_comp = pd.read_csv("raw_data/HH132-hait_ref.csv", index_col=[0], header=[0, 1]).loc[mol_hh100]["CCSD(T)"]

In [7]:
mask_hh100_sp = pd.read_csv("raw_data/HH132-hait_ref.csv", index_col=[0], header=[0, 1]).loc[mol_hh100][("Spin Polarization", "Spin Polarization")] == "SP"
mask_hr46_aniso = df_ref_hr46_aniso > 0.5
mask_t144_aniso = df_ref_t144_aniso > 0.5

In [8]:
columns = pd.MultiIndex.from_tuples([t[:2]  for t in df_conv.columns if t[-1] == "xx"])
df_conv_iso = pd.DataFrame(index=df_conv.index, columns=columns)
df_conv_aniso = pd.DataFrame(index=df_conv.index, columns=columns)
for basis, atr in columns:
    df_conv_iso.loc[:, (basis, atr)] = get_df_iso(df_conv[(basis, atr)])
    df_conv_aniso.loc[:, (basis, atr)] = get_df_aniso(df_conv[(basis, atr)])
# contribution of anisotropic is not linear
for basis, atr in columns:
    if atr != "pt2": continue
    df_conv_aniso.loc[:, (basis, "pt2")] = df_conv_aniso.loc[:, (basis, "tot")] - df_conv_aniso.loc[:, (basis, "low_rung")]

## aCV[Q5]Z CBS 参考值

In [9]:
df_hh100_cbs = df_conv.loc["HH118", ("aCV5Z", "low_rung")] + 1 / 61 * (125 * df_conv.loc["HH118", ("aCV5Z", "pt2")] - 64 * df_conv.loc["HH118", ("aCVQZ", "pt2")])
df_hh100_cbs = df_hh100_cbs.loc[mol_hh100]

In [10]:
df_hr46_iso = df_conv_iso.loc["HR46"].copy()
df_t144_iso = df_conv_iso.loc["T145"].copy()
df_hr46_aniso = df_conv_aniso.loc["HR46"].copy()
df_t144_aniso = df_conv_aniso.loc["T145"].copy()
df_hh100_iso = df_conv_iso.loc["HH118"].loc[mol_hh100].copy()
df_hh100_comp = df_conv.loc["HH118"].loc[mol_hh100].copy()

In [11]:
for cbs_scheme, cbs_1, cbs_2, coef_1, coef_2 in [
    ("aCV[DT]Z", "aCVDZ", "aCVTZ", - 8 / 19,  27 / 19),
    ("aCV[TQ]Z", "aCVTZ", "aCVQZ", -27 / 37,  64 / 37),
    ("aCV[Q5]Z", "aCVQZ", "aCV5Z", -64 / 61, 125 / 61),
    ("apc[12]" , "apc1" , "apc2" , - 8 / 19,  27 / 19),
    ("apc[23]" , "apc2" , "apc3" , -27 / 37,  64 / 37),
    ("apc[34]" , "apc3" , "apc4" , -64 / 61, 125 / 61),
]:
    for df in [df_hr46_iso, df_t144_iso, df_hr46_aniso, df_t144_aniso, df_hh100_iso]:
        df.loc[:, (cbs_scheme, "low_rung")] = df.loc[:, (cbs_2, "low_rung")]
        df.loc[:, (cbs_scheme, "pt2")] = coef_1 * df.loc[:, (cbs_1, "pt2")] + coef_2 * df.loc[:, (cbs_2, "pt2")]
        df.loc[:, (cbs_scheme, "tot")] = df.loc[:, (cbs_scheme, "low_rung")] + df.loc[:, (cbs_scheme, "pt2")]
    df = df_hh100_comp
    for ts in ["xx", "yy", "zz", "xy", "yz", "zx"]:
        df.loc[:, (cbs_scheme, "low_rung", ts)] = df.loc[:, (cbs_2, "low_rung", ts)]
        df.loc[:, (cbs_scheme, "pt2", ts)] = coef_1 * df.loc[:, (cbs_1, "pt2", ts)] + coef_2 * df.loc[:, (cbs_2, "pt2", ts)]
        df.loc[:, (cbs_scheme, "tot", ts)] = df.loc[:, (cbs_scheme, "low_rung", ts)] + df.loc[:, (cbs_scheme, "pt2", ts)]

In [12]:
scheme_list = [
    "aCVDZ", "apc1",
    "aCVTZ", "apc2", "aCV[DT]Z", "apc[12]",
    "aCVQZ", "apc3", "aCV[TQ]Z", "apc[23]",
    "aCV5Z", "apc4", "aCV[Q5]Z", "apc[34]"]
dataset_list = ["HH101 (NSP)", "HH101 (SP)", "HR46", "T144"]
comp_list = ["low_rung", "pt2", "tot"]

In [13]:
df_iso_err = pd.DataFrame(index=scheme_list, columns=pd.MultiIndex.from_product([dataset_list, comp_list]))
for scheme in scheme_list:
    for comp in comp_list:
        if "[" in scheme and comp == "low_rung": continue
        df_iso_err.loc[scheme, ("HR46", comp)] = get_df_err(
            df_hr46_iso[(scheme, comp)],
            df_hr46_iso[("aCV[Q5]Z", comp)],
            df_hr46_iso[("aCV[TQ]Z", "tot")])["RelRMSD/%"]
        df_iso_err.loc[scheme, ("T144", comp)] = get_df_err(
            df_t144_iso[(scheme, comp)],
            df_t144_iso[("aCV[Q5]Z", comp)],
            df_t144_iso[("aCV[TQ]Z", "tot")])["RelRMSD/%"]
        df_iso_err.loc[scheme, ("HH101 (NSP)", comp)] = get_df_err(
            df_hh100_iso.loc[~mask_hh100_sp, (scheme, comp)],
            df_hh100_iso.loc[~mask_hh100_sp, ("aCV[Q5]Z", comp)],
            df_hh100_iso.loc[~mask_hh100_sp, ("aCV[TQ]Z", "tot")])["RelRMSD/%"]
        df_iso_err.loc[scheme, ("HH101 (SP)", comp)] = get_df_err(
            df_hh100_iso.loc[mask_hh100_sp, (scheme, comp)],
            df_hh100_iso.loc[mask_hh100_sp, ("aCV[Q5]Z", comp)],
            df_hh100_iso.loc[mask_hh100_sp, ("aCV[TQ]Z", "tot")])["RelRMSD/%"]

In [14]:
df_iso_err[["HR46", "T144"]].fillna("")

Unnamed: 0_level_0,HR46,HR46,HR46,T144,T144,T144
Unnamed: 0_level_1,low_rung,pt2,tot,low_rung,pt2,tot
aCVDZ,2.428,0.25,2.528,1.559,0.185,1.447
apc1,1.001,0.188,0.905,0.855,0.225,0.686
aCVTZ,0.542,0.12,0.571,0.241,0.134,0.152
apc2,0.262,0.144,0.187,0.222,0.166,0.099
aCV[DT]Z,,0.111,0.533,,0.124,0.146
apc[12],,0.152,0.224,,0.15,0.114
aCVQZ,0.086,0.069,0.121,0.018,0.082,0.073
apc3,0.067,0.061,0.12,0.029,0.078,0.104
aCV[TQ]Z,,0.057,0.097,,0.049,0.041
apc[23],,0.029,0.073,,0.019,0.044


In [15]:
df_iso_err[["HH101 (NSP)", "HH101 (SP)"]].fillna("")

Unnamed: 0_level_0,HH101 (NSP),HH101 (NSP),HH101 (NSP),HH101 (SP),HH101 (SP),HH101 (SP)
Unnamed: 0_level_1,low_rung,pt2,tot,low_rung,pt2,tot
aCVDZ,4.565,0.569,4.939,3.721,0.746,4.03
apc1,1.783,0.882,1.991,1.642,0.944,1.8
aCVTZ,1.358,0.294,1.568,0.866,0.325,1.006
apc2,0.508,0.334,0.45,0.622,0.618,0.693
aCV[DT]Z,,0.218,1.469,,0.213,0.916
apc[12],,0.55,0.578,,0.5,0.605
aCVQZ,0.302,0.15,0.432,0.146,0.205,0.282
apc3,0.196,0.109,0.248,0.167,0.143,0.19
aCV[TQ]Z,,0.07,0.344,,0.161,0.219
apc[23],,0.257,0.329,,0.224,0.344


In [16]:
df_aniso_err = pd.DataFrame(index=scheme_list, columns=pd.MultiIndex.from_product([["HR46", "T144"], comp_list]))
for scheme in scheme_list:
    for comp in comp_list:
        if "[" in scheme and comp == "low_rung": continue
        df_aniso_err.loc[scheme, ("HR46", comp)] = get_df_err(
            df_hr46_aniso.loc[mask_hr46_aniso, (scheme, comp)],
            df_hr46_aniso.loc[mask_hr46_aniso, ("aCV[Q5]Z", comp)],
            df_hr46_aniso.loc[mask_hr46_aniso, ("aCV[TQ]Z", "tot")])["RelRMSD/%"]
        df_aniso_err.loc[scheme, ("T144", comp)] = get_df_err(
            df_t144_aniso.loc[mask_t144_aniso, (scheme, comp)],
            df_t144_aniso.loc[mask_t144_aniso, ("aCV[Q5]Z", comp)],
            df_t144_aniso.loc[mask_t144_aniso, ("aCV[TQ]Z", "tot")])["RelRMSD/%"]

In [17]:
df_aniso_err.fillna("")

Unnamed: 0_level_0,HR46,HR46,HR46,T144,T144,T144
Unnamed: 0_level_1,low_rung,pt2,tot,low_rung,pt2,tot
aCVDZ,3.389,0.505,3.596,1.742,0.264,1.86
apc1,2.271,0.892,2.885,1.321,0.446,1.53
aCVTZ,0.777,0.122,0.845,0.313,0.09,0.374
apc2,0.342,0.311,0.503,0.159,0.145,0.201
aCV[DT]Z,,0.23,0.845,,0.137,0.383
apc[12],,0.13,0.351,,0.103,0.17
aCVQZ,0.14,0.073,0.196,0.053,0.059,0.09
apc3,0.102,0.124,0.174,0.061,0.058,0.085
aCV[TQ]Z,,0.058,0.183,,0.043,0.079
apc[23],,0.099,0.115,,0.05,0.055


In [18]:
df_comp_err = pd.DataFrame(index=scheme_list, columns=pd.MultiIndex.from_product([["HH101 (NSP)", "HH101 (SP)"], comp_list]))
for scheme in scheme_list:
    for comp in comp_list:
        if "[" in scheme and comp == "low_rung": continue
        df_comp_err.loc[scheme, ("HH101 (NSP)", comp)] = get_relrmsd_3comp(get_df_err(
            df_hh100_comp.loc[~mask_hh100_sp, (scheme, comp)],
            df_hh100_comp.loc[~mask_hh100_sp, ("aCV[Q5]Z", comp)],
            df_hh100_comp.loc[~mask_hh100_sp, ("aCV[TQ]Z", "tot")])[["xx", "yy", "zz"]])
        df_comp_err.loc[scheme, ("HH101 (SP)", comp)] = get_relrmsd_3comp(get_df_err(
            df_hh100_comp.loc[mask_hh100_sp, (scheme, comp)],
            df_hh100_comp.loc[mask_hh100_sp, ("aCV[Q5]Z", comp)],
            df_hh100_comp.loc[mask_hh100_sp, ("aCV[TQ]Z", "tot")])[["xx", "yy", "zz"]])

In [19]:
df_comp_err.fillna("")

Unnamed: 0_level_0,HH101 (NSP),HH101 (NSP),HH101 (NSP),HH101 (SP),HH101 (SP),HH101 (SP)
Unnamed: 0_level_1,low_rung,pt2,tot,low_rung,pt2,tot
aCVDZ,4.99,0.619,5.39,4.276,0.815,4.673
apc1,2.069,0.9,2.278,1.804,0.962,1.934
aCVTZ,1.455,0.324,1.69,1.019,0.364,1.211
apc2,0.607,0.349,0.561,0.656,0.628,0.712
aCV[DT]Z,,0.24,1.584,,0.24,1.104
apc[12],,0.56,0.674,,0.516,0.632
aCVQZ,0.323,0.166,0.468,0.181,0.218,0.331
apc3,0.214,0.181,0.302,0.182,0.146,0.202
aCV[TQ]Z,,0.077,0.37,,0.163,0.25
apc[23],,0.343,0.41,,0.231,0.354


In [25]:
mask_t144_aniso.index[~mask_t144_aniso]

Index(['0351', '0353', '0998'], dtype='object')

In [24]:
mask_t144_aniso.sum()

141