In [2]:
import numpy as np
import pandas as pd
from util import (BOHR, read_mat, read_comp, get_iso, get_aniso, read_by_prompt, get_df_err, get_rmsre_3comp, get_relrmsd_3comp)
import itertools
import warnings
import basis_set_exchange as bse
from functools import partial

warnings.filterwarnings("ignore")
np.set_printoptions(8, suppress=True, linewidth=150)
pd.set_option('display.max_rows', None)
pd.set_option("display.precision", 3)
pd.set_option("float_format", '{:.3f}'.format)

In [3]:
import matplotlib.pyplot as plt
from matplotlib_inline.backend_inline import set_matplotlib_formats
%matplotlib inline

set_matplotlib_formats('svg')

In [4]:
def get_df_iso(df):
    xx, yy, zz = df["xx"], df["yy"], df["zz"]
    return 1 / 3 * (xx + yy + zz)

def get_df_aniso(df):
    xx, yy, zz, xy, yz, zx = df["xx"], df["yy"], df["zz"], df["xy"], df["yz"], df["zx"]
    return np.sqrt(0.5) * ((xx - yy)**2 + (yy - zz)**2 + (zz - xx)**2 + 6 * (xy**2 + yz**2 + zx**2))**0.5

## 读取基本数据

In [5]:
# molecular species list
mol_hh101 = ['AlF', 'Ar', 'BF', 'BH2', 'BH2Cl', 'BH2F', 'BH3', 'BHF2', 'BeH', 'BeH2', 'C2H2', 'C2H4', 'CH2-t', 'CH2BH', 'CH2F', 'CH3', 'CH3BH2', 'CH3Cl', 'CH3F', 'CH3NH2', 'CH3OH', 'CH3SH', 'CH4', 'CO', 'CO2', 'CS', 'CSO', 'Cl2', 'ClCN', 'ClF', 'FCN', 'FCO', 'FH-OH', 'FNO', 'H', 'H2', 'H2CN', 'H2O', 'H2O-Li', 'HBO', 'HBS', 'HCCCl', 'HCCF', 'HCHO', 'HCHS', 'HCN', 'HCO', 'HCONH2', 'HCOOH', 'HCP', 'HCl', 'HF', 'HNC', 'HO2', 'HOCl', 'HOOH', 'He', 'Li', 'LiBH4', 'LiCN', 'LiCl', 'LiH', 'Mg', 'Mg2', 'N', 'N2', 'N2H2', 'N2H4', 'NH', 'NH2', 'NH2Cl', 'NH2F', 'NH2OH', 'NH3', 'NH3O', 'Na', 'NaCN', 'NaCl', 'NaH', 'Ne', 'OCl2', 'OF2', 'P', 'P2H4', 'PH', 'PH2', 'PH2OH', 'PH3', 'PH3O', 'S2', 'S2H2', 'SCl2', 'SF2', 'SH2', 'SO-trip', 'SO2', 'SiH3', 'SiH3Cl', 'SiH3F', 'SiH4', 'SiO']

# reference data
df_ref_hr46_iso = pd.read_csv("raw_data/HR46-ref-iso.csv", index_col=[0], header=[0])
df_ref_hr46_aniso = pd.read_csv("raw_data/HR46-ref-aniso.csv", index_col=[0], header=[0])
df_ref_t144_iso = pd.read_csv("raw_data/T144-ref-iso.csv", index_col=[0], header=[0])
df_ref_t144_aniso = pd.read_csv("raw_data/T144-ref-aniso.csv", index_col=[0], header=[0])

df_ref_hr46_iso.index = [f.replace("_", "-") for f in df_ref_hr46_iso.index]
df_ref_hr46_aniso.index = [f.replace("_", "-") for f in df_ref_hr46_aniso.index]
df_ref_t144_iso.index = [f"{f:04d}" for f in df_ref_t144_iso.index]
df_ref_t144_aniso.index = [f"{f:04d}" for f in df_ref_t144_aniso.index]
df_ref_hr46_iso.columns = df_ref_hr46_aniso.columns = df_ref_t144_iso.columns = df_ref_t144_aniso.columns = ["HF", "Corr2", "CorrD", "CorrD(T)", "MP2", "CCSD", "CCSD(T)"]

df_ref_hh101_comp = pd.read_csv("raw_data/HH132-hait_ref.csv", index_col=[0], header=[0, 1]).loc[mol_hh101]
df_ref_hh101_iso = pd.DataFrame(columns=["HF", "MP2", "CCSD", "CCSD(T)"], index=df_ref_hh101_comp.index)
for method in df_ref_hh101_iso.columns:
    df_ref_hh101_iso.loc[:, method] = get_df_iso(df_ref_hh101_comp[method])

# masks
mask_hh101_sp = pd.read_csv("raw_data/HH132-hait_ref.csv", index_col=[0], header=[0, 1]).loc[mol_hh101][("Spin Polarization", "Spin Polarization")] == "SP"
mask_hr46_aniso = df_ref_hr46_aniso["CCSD(T)"] > 0.5
mask_t144_aniso = df_ref_t144_aniso["CCSD(T)"] > 0.5

In [6]:
# read benchmark results
df_bench_raw = pd.read_csv("raw_data/all_benchmark.csv", header=[0, 1, 2], index_col=[0, 1])

In [7]:
# parse isotropic

method_list = [l[:-1] for l in df_bench_raw.columns if l[-1] == "xx"] + [("WFT", method) for method in ("HF", "MP2", "CCSD", "CCSD(T)")]
df_bench_iso = pd.DataFrame(
    columns=pd.MultiIndex.from_tuples(method_list),
    index=pd.MultiIndex.from_tuples(
          [("HR46", mol) for mol in mask_hr46_aniso.index]
        + [("T144", mol) for mol in mask_t144_aniso.index]
        + [("HH101 (NSP)", mol) for mol in mask_hh101_sp[~mask_hh101_sp].index]
        + [("HH101 (SP)", mol) for mol in mask_hh101_sp[mask_hh101_sp].index]))

for rung, method in method_list:
    if rung != "WFT":  # DFT data
        tdf_iso = get_df_iso(df_bench_raw[rung, method])
        for dataset, dataset_origin in [("HH101 (NSP)", "HH118"), ("HH101 (SP)", "HH118"), ("HR46", "HR46"), ("T144", "T145")]:
            for mol in df_bench_iso.loc[dataset].index:
                df_bench_iso.loc[(dataset, mol), (rung, method)] = tdf_iso.loc[(dataset_origin, mol)]
    else:  # WFT data
        for dataset, df in zip(["HH101 (NSP)", "HH101 (SP)", "HR46", "T144"], [df_ref_hh101_iso, df_ref_hh101_iso, df_ref_hr46_iso, df_ref_t144_iso]):
            for mol in df_bench_iso.loc[dataset].index:
                df_bench_iso.loc[(dataset, mol), ("WFT", method)] = df.loc[mol, method]
df_bench_iso.to_csv("result-iso.csv")

In [8]:
# parse anisotropic

method_list = [l[:-1] for l in df_bench_raw.columns if l[-1] == "xx"] + [("WFT", method) for method in ("HF", "MP2", "CCSD", "CCSD(T)")]
df_bench_aniso = pd.DataFrame(
    columns=pd.MultiIndex.from_tuples(method_list),
    index=pd.MultiIndex.from_tuples(
          [("HR46", mol) for mol in mask_hr46_aniso[mask_hr46_aniso].index]
        + [("T144", mol) for mol in mask_t144_aniso[mask_t144_aniso].index]))

for rung, method in method_list:
    if rung != "WFT":  # DFT data
        tdf_aniso = get_df_aniso(df_bench_raw[rung, method])
        for dataset, dataset_origin in [("HR46", "HR46"), ("T144", "T145")]:
            for mol in df_bench_aniso.loc[dataset].index:
                df_bench_aniso.loc[(dataset, mol), (rung, method)] = tdf_aniso.loc[(dataset_origin, mol)]
    else:  # WFT data
        for mol in df_bench_aniso.loc["HR46"].index:
            df_bench_aniso.loc[("HR46", mol), ("WFT", method)] = df_ref_hr46_aniso.loc[mol, method]
        for mol in df_bench_aniso.loc["T144"].index:
            df_bench_aniso.loc[("T144", mol), ("WFT", method)] = df_ref_t144_aniso.loc[mol, method]
df_bench_aniso.to_csv("result-aniso.csv")

In [9]:
# parse components

method_list = [l[:-1] for l in df_bench_raw.columns if l[-1] == "xx"] + [("WFT", method) for method in ("HF", "MP2", "CCSD", "CCSD(T)")]
df_bench_comp = pd.DataFrame(
    columns=pd.MultiIndex.from_tuples([(list(l) + [comp]) for l in method_list for comp in ("xx", "yy", "zz")]),
    index=pd.MultiIndex.from_tuples(
          [("HH101 (NSP)", mol) for mol in mask_hh101_sp[~mask_hh101_sp].index]
        + [("HH101 (SP)", mol) for mol in mask_hh101_sp[mask_hh101_sp].index]))

for rung, method in method_list:
    for dataset, mol in df_bench_comp.index:
        for comp in ["xx", "yy", "zz"]:
            if rung != "WFT":
                df_bench_comp.loc[(dataset, mol), (rung, method, comp)] = df_bench_raw.loc[("HH118", mol), (rung, method, comp)]
            else:
                df_bench_comp.loc[(dataset, mol), ("WFT", method, comp)] = df_ref_hh101_comp.loc[mol, (method, comp)]
df_bench_comp.to_csv("result-comp.csv")

## 测评结果

In [10]:
df_res_iso = pd.DataFrame(index=df_bench_iso.columns, columns=["HR46", "T144", "HH101 (NSP)", "HH101 (SP)"])
for rung, method in df_res_iso.index:
    for dataset in ["HR46", "T144", "HH101 (NSP)", "HH101 (SP)"]:
        df_res_iso.loc[(rung, method), dataset] = get_df_err(df_bench_iso.loc[dataset, (rung, method)], df_bench_iso.loc[dataset, ("WFT", "CCSD(T)")])["RelRMSD/%"]

In [11]:
df_res_aniso = pd.DataFrame(index=df_bench_aniso.columns, columns=["HR46", "T144"])
for rung, method in df_res_aniso.index:
    for dataset in ["HR46", "T144"]:
        df_res_aniso.loc[(rung, method), dataset] = get_df_err(df_bench_aniso.loc[dataset, (rung, method)], df_bench_aniso.loc[dataset, ("WFT", "CCSD(T)")])["RelRMSD/%"]

In [12]:
df_res_comp = pd.DataFrame(index=df_bench_iso.columns, columns=["HH101 (NSP)", "HH101 (SP)"])
for rung, method in df_res_comp.index:
    for dataset in ["HH101 (NSP)", "HH101 (SP)"]:
        df_res_comp.loc[(rung, method), dataset] = get_relrmsd_3comp(get_df_err(df_bench_comp.loc[dataset, (rung, method)], df_bench_comp.loc[dataset, ("WFT", "CCSD(T)")]))

In [14]:
# xc information
tab_xc = pd.read_csv("raw_data/functionals.csv", index_col=[0])
tab_xc.year = tab_xc.year.fillna(0).astype("int32")

# merge results
df_res = pd.concat([df_res_iso, df_res_aniso, df_res_comp], axis=1)
df_res.columns = pd.MultiIndex.from_tuples(
      [("isotropic", d) for d in ["HR46", "T144", "HH101 (NSP)", "HH101 (SP)"]]
    + [("anisotropic", d) for d in ["HR46", "T144"]]
    + [("components", d) for d in ["HH101 (NSP)", "HH101 (SP)"]])

# change functional names to convention of chap-04
df_res.to_csv("benchmark.csv")
with open("benchmark.csv", "r") as f:
    token = f.read()
for xc_1, xc_2 in [
        ("B2GPPLYP", "B2GP-PLYP"),
        ("CAMB3LYP", "CAM-B3LYP"),
        ("PTPSS", "PTPSS-D3Zero"),
        ("SOS-PBE0-DH", "SOS0-PBE0-DH"),
        ("SOS-PBE-QIDH", "SOS0-PBE-QIDH"),
        ("SOS-RSX-0DH", "SOS-RSX-PBE0-DH"),
        ("SOS-RSX-QIDH", "SOS-RSX-PBE-QIDH"),
        ("ωB2GPPLYP", "ωB2GP-PLYP")]:
    token = token.replace(xc_1, xc_2)
with open("benchmark.csv", "w") as f:
    f.write(token)
df_res_old = pd.read_csv("benchmark.csv", index_col=[0, 1], header=[0, 1])

# reformulate dataframe
df_res = pd.DataFrame(
    index=df_res_old.index,
    columns=pd.MultiIndex.from_tuples(
      [("information", d) for d in tab_xc.columns]
    + [("isotropic", d) for d in ["HR46", "T144", "HH101 (NSP)", "HH101 (SP)"]]
    + [("anisotropic", d) for d in ["HR46", "T144"]]
    + [("components", d) for d in ["HH101 (NSP)", "HH101 (SP)"]]
    + [("wtmad", "wtmad")]))
for cls in ["isotropic", "anisotropic", "components"]:
    df_res[cls] = df_res_old[cls]

# write xc information
df_res.index = [l[1] for l in df_res.index]
for xc in df_res.index:
    for info in tab_xc.columns:
        df_res.loc[xc, ("information", info)] = tab_xc.loc[xc, info]
df_res = df_res.drop("CCSD(T)", axis=0)
df_res["information", "year"] = df_res["information", "year"].fillna(0).astype("int32")

# compute wtmad
# weight: the more close to one, the more close to average behavior of functionals
df_res_wt = df_res[["isotropic", "anisotropic", "components"]].mean()
df_res_wt = 1 / df_res_wt * 0.125
df_res[("wtmad", "wtmad")] = (df_res[["isotropic", "anisotropic", "components"]] * df_res_wt).sum(axis=1)

xc_info = df_res["information"]
df_res = df_res.sort_values(("wtmad", "wtmad"), axis=0)
df_res.to_csv("benchmark.csv")

In [15]:
pd.DataFrame(df_res_wt).style.format("{:.15f}")

Unnamed: 0,Unnamed: 1,0
isotropic,HR46,0.061837091690685
isotropic,T144,0.054878115055414
isotropic,HH101 (NSP),0.038215111754263
isotropic,HH101 (SP),0.032692374803261
anisotropic,HR46,0.024584977413508
anisotropic,T144,0.025091475490632
components,HH101 (NSP),0.037099844927297
components,HH101 (SP),0.029890098744748


In [19]:
pd.DataFrame(df_res_wt).style.format("{:.7f}")

Unnamed: 0,Unnamed: 1,0
isotropic,HR46,0.0618371
isotropic,T144,0.0548781
isotropic,HH101 (NSP),0.0382151
isotropic,HH101 (SP),0.0326924
anisotropic,HR46,0.024585
anisotropic,T144,0.0250915
components,HH101 (NSP),0.0370998
components,HH101 (SP),0.0298901


In [20]:
# df_res[(xc_info["type"] != "WFT") & ~xc_info["hybrid ex"].isna() & xc_info["hybrid corr"].isna()]

In [21]:
# df_res[(xc_info["type"] != "WFT") & xc_info["hybrid ex"].isna() & xc_info["hybrid corr"].isna()]

In [22]:
# df_res[(xc_info["type"] != "WFT") & ~xc_info["hybrid corr"].isna()]

In [30]:
d = df_res[df_res["information", "type"] != "WFT"].copy()
d[("rank", "rank")] = range(1, 1+62)
d.fillna("")

Unnamed: 0_level_0,information,information,information,information,information,isotropic,isotropic,isotropic,isotropic,anisotropic,anisotropic,components,components,wtmad,rank
Unnamed: 0_level_1,year,type,hybrid ex,hybrid corr,XYG3-type,HR46,T144,HH101 (NSP),HH101 (SP),HR46,T144,HH101 (NSP),HH101 (SP),wtmad,rank
XYGJ-OS,2011,GGA,hybrid,hybrid,xDH,0.911,0.813,1.235,1.592,2.756,1.845,1.381,1.762,0.418,1
XYG6,2021,GGA,hybrid,hybrid,xDH,1.04,1.05,1.285,1.905,2.841,2.112,1.429,2.101,0.472,2
DSD-PBEPBE-D3BJ,2013,GGA,hybrid,hybrid,,1.146,1.274,1.615,1.412,2.234,2.446,1.651,1.578,0.473,3
xDH-PBE0,2012,GGA,hybrid,hybrid,xDH,0.685,0.561,2.014,2.216,2.554,1.648,2.049,2.439,0.476,4
XYG-OS5,2021,GGA,hybrid,hybrid,xDH,0.817,0.625,1.477,2.2,3.678,2.096,1.636,2.368,0.488,5
ωPBEPP86,2021,GGA,range-separate,hybrid,,1.093,1.069,2.068,0.949,3.252,2.542,2.09,1.074,0.49,6
XYG5,2021,GGA,hybrid,hybrid,xDH,1.095,1.138,1.33,1.981,2.739,2.235,1.479,2.197,0.49,7
revXYGJ-OS,2021,GGA,hybrid,hybrid,xDH,0.965,0.68,1.515,2.187,3.624,2.025,1.716,2.384,0.501,8
XYG3,2009,GGA,hybrid,hybrid,xDH,1.083,1.385,1.345,2.047,2.513,3.141,1.456,2.272,0.524,9
revXYG3,2021,GGA,hybrid,hybrid,xDH,0.971,1.062,1.739,2.41,2.493,2.671,1.858,2.645,0.54,10
