In [1]:
import matplotlib as mpl
from matplotlib import pyplot as plt
from matplotlib_inline.backend_inline import set_matplotlib_formats

set_matplotlib_formats("svg")

In [2]:
import numpy as np
import pickle
import pandas as pd
import json

In [3]:
def eng_by_coeff(df, coeff):
    res = None
    for entry, val in coeff.items():
        if res is None: res = val * df.loc[entry]
        else: res += val * df.loc[entry]
    return res

In [4]:
def str_arr(arr):
    return "[" + ", ".join([f"{i:.6f}" for i in arr]) + "]"

## Read data

In [5]:
with open("../MN15_REF.json") as f:
    MN15_REF = json.load(f)
with open("../GMTKN55_REF.json") as f:
    GMTKN55_REF = json.load(f)

In [6]:
df_GMTKN55_b3lyp = pd.read_csv("../GMTKN55_b3lyp.csv", header=[0, 1], index_col=[0])
df_MN15_b3lyp = pd.read_csv("../MN15_b3lyp.csv", header=[0, 1], index_col=[0])
df_MN15_dh = pd.read_csv("../MN15_dh.csv", header=[0], index_col=[0]).T

In [7]:
df_react_MN15 = pd.DataFrame(
    index=list(df_MN15_b3lyp.index) + ["ref", "bond"],
    columns=pd.MultiIndex.from_tuples([v.split("/") for v in MN15_REF.keys()]),
    data=0.0)
for entry, info in MN15_REF.items():
    dataset, idx = entry.split("/")
    for syst, stoi in zip(info["systems"], info["stoichiometry"]):
        df_react_MN15[(dataset, idx)] += stoi * df_MN15_b3lyp[dataset][syst]
        df_react_MN15.loc["ref", (dataset, idx)] = info["ref"]
        df_react_MN15.loc["bond", (dataset, idx)] = info["bond"]

In [8]:
df_react_MN15_dh = pd.DataFrame(
    index=list(df_MN15_dh.index),
    columns=pd.MultiIndex.from_tuples([v.split("/") for v in MN15_REF.keys()]),
    data=0.0)
for entry, info in MN15_REF.items():
    dataset, idx = entry.split("/")
    for syst, stoi in zip(info["systems"], info["stoichiometry"]):
        df_react_MN15_dh[(dataset, idx)] += stoi * df_MN15_dh[syst]

In [9]:
df_react_GMTKN55 = pd.DataFrame(
    index=list(df_GMTKN55_b3lyp.index) + ["ref"],
    columns=pd.MultiIndex.from_tuples([v.split("/") for v in GMTKN55_REF.keys()]),
    data=0.0)
for entry, info in GMTKN55_REF.items():
    dataset, idx = entry.split("/")
    for syst, stoi in zip(info["systems"], info["stoichiometry"]):
        dat, syst = syst.split("/")
        df_react_GMTKN55[(dataset, idx)] += stoi * df_GMTKN55_b3lyp[dat][syst]
        df_react_GMTKN55.loc["ref", (dataset, idx)] = info["ref"]

## Useful functions

In [10]:
def err_wtmad2(para):
    r = eng_by_coeff(df_react_GMTKN55, para) * 627.51
    d = r - df_react_GMTKN55.loc["ref"]

    err = 0
    for dataset in d.index.levels[0]:
        err += 56.84 * d[dataset].abs().sum() / df_react_GMTKN55.loc["ref"][dataset].abs().mean()
    err /= df_react_GMTKN55.columns.size
    return err

In [11]:
def err_our(para, to_list=False):
    err_wt = err_wtmad2(para)
    ret = {"GMTKN55": err_wt}
    
    r = eng_by_coeff(df_react_MN15, para) * 627.51
    d = (r - df_react_MN15.loc["ref"]) / df_react_MN15.loc["bond"]

    for dataset in d.index.levels[0]:
        ret[dataset] = d[dataset].abs().mean()
    ret = pd.Series(ret)
    
    if to_list:
        return ret
    else:
        ret /= 25
        ret["GMTKN55"]     *= 12
        ret["MR-MGM-BE4"]  *= 3
        ret["MR-MGN-BE17"] *= 2
        ret["MR-TM-BE13"]  *= 3
        ret["SR-MGM-BE9"]  *= 2
        ret["SR-TM-BE17"]  *= 3
        return ret.sum()

In [12]:
para_exist = {
    "B3LYP": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.2,
        "eng_purexc_LDA_X": 0.08,
        "eng_purexc_GGA_X_B88": 0.72,
        "eng_purexc_LDA_C_VWN_RPA": 0.19,
        "eng_purexc_GGA_C_LYP": 0.81
    },
    "B3LYP5": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.2,
        "eng_purexc_LDA_X": 0.08,
        "eng_purexc_GGA_X_B88": 0.72,
        "eng_purexc_LDA_C_VWN": 0.19,
        "eng_purexc_GGA_C_LYP": 0.81
    },
    "XYG3": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.8033,
        "eng_purexc_LDA_X": -0.0140,
        "eng_purexc_GGA_X_B88": 0.2107,
        "eng_purexc_GGA_C_LYP": 0.6789,
        "eng_corr_MP2": 0.3211
    },
    "revXYG3": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.9196,
        "eng_purexc_LDA_X": -0.0222,
        "eng_purexc_GGA_X_B88": 0.1026,
        "eng_purexc_GGA_C_LYP": 0.6059,
        "eng_corr_MP2": 0.3941
    },
    "revXYGJ-OS": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.8877,
        "eng_purexc_LDA_X": 0.1123,
        "eng_purexc_LDA_C_VWN_RPA": -0.0697,
        "eng_purexc_GGA_C_LYP": 0.6167,
        "eng_corr_MP2_OS": 0.5485,
    },
    "XYG5": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.9150,
        "eng_purexc_LDA_X": 0.0612,
        "eng_purexc_GGA_X_B88": 0.0238,
        "eng_purexc_LDA_C_VWN_RPA": 0,
        "eng_purexc_GGA_C_LYP": 0.4957,
        "eng_corr_MP2_OS": 0.4548,
        "eng_corr_MP2_SS": 0.2764,
    },
    "XYG6": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.9105,
        "eng_purexc_LDA_X": 0.1576,
        "eng_purexc_GGA_X_B88": -0.0681,
        "eng_purexc_LDA_C_VWN_RPA": 0.1800,
        "eng_purexc_GGA_C_LYP": 0.2244,
        "eng_corr_MP2_OS": 0.4695,
        "eng_corr_MP2_SS": 0.2426,
    },
    "XYG7": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.8971,
        "eng_purexc_LDA_X": 0.2055,
        "eng_purexc_GGA_X_B88": -0.1408,
        "eng_purexc_LDA_C_VWN_RPA": 0.4056,
        "eng_purexc_GGA_C_LYP": 0.1159,
        "eng_corr_MP2_OS": 0.4052,
        "eng_corr_MP2_SS": 0.2589,
    },
}

## Actual Computation

In [13]:
df_err = ((df_react_MN15_dh * 627.51 - df_react_MN15.loc["ref"]) / df_react_MN15.loc["bond"])
df_err

Unnamed: 0_level_0,MR-MGM-BE4,MR-MGM-BE4,MR-MGM-BE4,MR-MGM-BE4,MR-MGN-BE17,MR-MGN-BE17,MR-MGN-BE17,MR-MGN-BE17,MR-MGN-BE17,MR-MGN-BE17,...,SR-TM-BE17,SR-TM-BE17,SR-TM-BE17,SR-TM-BE17,SR-TM-BE17,SR-TM-BE17,SR-TM-BE17,SR-TM-BE17,SR-TM-BE17,SR-TM-BE17
Unnamed: 0_level_1,1,2,3,4,1,10,11,12,13,14,...,16,17,2,3,4,5,6,7,8,9
TPSS0-DH,-3.090571,-10.610778,-10.746301,-7.771788,-6.765444,-11.524186,-14.52845,-15.712749,-9.936564,-25.120092,...,-5.595036,19.461822,19.81096,8.602451,-45.613685,-2.986476,1.077078,13.853293,-0.585027,-7.278396
B2PLYP,8.27188,9.361088,3.894244,-5.552635,0.43713,3.309229,2.790722,0.473307,-5.067302,-2.950737,...,-6.985482,11.142979,23.382036,-0.151272,-28.561783,-0.372766,0.592899,43.069451,4.640829,-2.811511
PBE-QIDH,1.247452,-1.268543,-4.96134,-5.469763,-2.212438,-2.104006,-4.75564,-7.697987,-5.103078,-13.662676,...,1.90397,46.169769,24.564898,22.589944,9.573173,-1.9888,-1.938285,39.603352,21.472973,-5.69334
DSD-BLYP-D3BJ,6.869052,12.112252,4.251984,-2.508836,-0.148524,1.220934,1.30528,-3.130452,-5.502583,-2.637261,...,3.367328,39.509046,26.374736,20.104497,-6.563767,3.266814,0.205871,15.812517,24.724122,-0.841608
DSD-PBEP86-D3BJ,4.321782,13.010001,2.418558,-3.56422,0.342642,-0.229799,1.341787,-4.026882,-4.879121,0.724136,...,3.629734,40.908998,24.569488,-29.183529,-6.727053,2.376873,0.495297,15.782113,23.940635,-1.539917
B2GPPLYP,4.670634,7.485871,2.279724,-5.989945,-1.10893,0.400629,-0.269796,-3.513424,-6.901905,-6.643351,...,-3.666497,29.324789,24.748342,13.387164,-4.594693,-0.722474,-0.264121,21.810762,-17.32252,-3.630377
wB97X-2-TQZ,6.59193,4.673885,2.327563,-5.009334,-0.177309,2.209996,0.268234,-6.1402,-9.120955,-3.756439,...,5.494071,4.468242,26.710067,-57.003874,-0.242924,0.552202,1.165235,17.616392,25.90152,-1.51279
RS-PBE-P86,5.998214,-2.967877,-0.856204,-1.25006,-1.770796,0.074945,-4.02052,-9.220853,-2.586712,-9.582709,...,14.472166,36.713584,27.322075,17.16766,-47.403667,1.957865,-3.2117,10.297628,22.161948,-3.563153
PBE0-DH,0.162879,-9.683924,-9.405566,-6.9068,-2.408911,-2.598264,-5.604104,-7.385271,-4.853313,-18.067453,...,-4.480827,20.531033,20.716362,3.217969,-30.171569,-3.171817,-1.963858,39.102006,0.751377,-7.316891
TPSS-QIDH,-1.015169,-1.779576,-5.637729,-6.126637,-5.089815,-7.751452,-10.667547,-13.346958,-8.363088,-18.312195,...,1.083257,48.102663,24.013028,23.616281,-5.115964,-1.746946,0.458956,14.269938,20.638415,-5.542836


In [14]:
d = {}
d["GMTKN55"] = pd.Series(dtype=float)
for dataset in df_react_MN15.columns.levels[0]:
    d[dataset] = df_err[dataset].abs().T.mean()
for method in ["XYG3", "XYG6", "XYG7"]:
    e = err_our(para_exist[method], to_list=True)
    for dataset in d:
        # if dataset not in e.index: continue
        d[dataset][method] = e[dataset]
pd.DataFrame.from_dict(d)

Unnamed: 0,GMTKN55,MR-MGM-BE4,MR-MGN-BE17,MR-TM-BE13,SR-MGM-BE9,SR-TM-BE17
B2GPPLYP,,5.106544,2.943033,5.872512,1.816971,9.497552
B2PLYP,,6.769962,2.347522,4.692672,2.294284,9.294913
DSD-BLYP-D3BJ,,6.435531,2.391904,8.287422,1.987828,11.756556
DSD-PBEP86-D3BJ,,5.82864,2.08941,7.467999,2.120246,11.97431
PBE-QIDH,,3.236775,5.645159,7.387977,3.443151,12.915987
PBE0-DH,,6.539792,7.575506,5.588154,4.07009,10.016575
RS-PBE-P86,,2.768089,4.203283,7.437405,3.297367,15.906874
TPSS-QIDH,,3.639778,9.896691,7.26985,3.658,12.212192
TPSS0-DH,,8.05486,14.076846,6.456035,4.690089,9.921341
XYG3,3.385794,16.929109,1.813253,10.667971,1.594389,5.557607
