In [1]:
import pandas as pd
import json
import numpy as np
import scipy
import scipy.optimize

pd.set_option('display.max_rows', None)

In [2]:
def eng_by_coeff(df, coeff):
    res = None
    for entry, val in coeff.items():
        if res is None: res = val * df.loc[entry]
        else: res += val * df.loc[entry]
    return res

## Read data

In [3]:
with open("../MN15_REF.json") as f:
    MN15_REF = json.load(f)
with open("../GMTKN55_REF.json") as f:
    GMTKN55_REF = json.load(f)

In [4]:
df_GMTKN55_b3lyp = pd.read_csv("../GMTKN55_b3lyp.csv", header=[0, 1], index_col=[0])
df_MN15_b3lyp = pd.read_csv("../MN15_b3lyp.csv", header=[0, 1], index_col=[0])

In [5]:
df_react_GMTKN55 = pd.DataFrame(
    index=list(df_GMTKN55_b3lyp.index) + ["ref"],
    columns=pd.MultiIndex.from_tuples([v.split("/") for v in GMTKN55_REF.keys()]),
    data=0.0)
for entry, info in GMTKN55_REF.items():
    dataset, idx = entry.split("/")
    for syst, stoi in zip(info["systems"], info["stoichiometry"]):
        dat, syst = syst.split("/")
        df_react_GMTKN55[(dataset, idx)] += stoi * df_GMTKN55_b3lyp[dat][syst]
        df_react_GMTKN55.loc["ref", (dataset, idx)] = info["ref"]

In [6]:
df_react_MN15 = pd.DataFrame(
    index=list(df_MN15_b3lyp.index) + ["ref", "bond"],
    columns=pd.MultiIndex.from_tuples([v.split("/") for v in MN15_REF.keys()]),
    data=0.0)
for entry, info in MN15_REF.items():
    dataset, idx = entry.split("/")
    for syst, stoi in zip(info["systems"], info["stoichiometry"]):
        df_react_MN15[(dataset, idx)] += stoi * df_MN15_b3lyp[dataset][syst]
        df_react_MN15.loc["ref", (dataset, idx)] = info["ref"]
        df_react_MN15.loc["bond", (dataset, idx)] = info["bond"]

## Existing Data

In [7]:
para_exist = {
    "B3LYP": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.2,
        "eng_purexc_LDA_X": 0.08,
        "eng_purexc_GGA_X_B88": 0.72,
        "eng_purexc_LDA_C_VWN_RPA": 0.19,
        "eng_purexc_GGA_C_LYP": 0.81
    },
    "B3LYP5": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.2,
        "eng_purexc_LDA_X": 0.08,
        "eng_purexc_GGA_X_B88": 0.72,
        "eng_purexc_LDA_C_VWN": 0.19,
        "eng_purexc_GGA_C_LYP": 0.81
    },
    "XYG3": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.8033,
        "eng_purexc_LDA_X": -0.0140,
        "eng_purexc_GGA_X_B88": 0.2107,
        "eng_purexc_GGA_C_LYP": 0.6789,
        "eng_corr_MP2": 0.3211
    },
    "revXYG3": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.9196,
        "eng_purexc_LDA_X": -0.0222,
        "eng_purexc_GGA_X_B88": 0.1026,
        "eng_purexc_GGA_C_LYP": 0.6059,
        "eng_corr_MP2": 0.3941
    },
    "revXYGJ-OS": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.8877,
        "eng_purexc_LDA_X": 0.1123,
        "eng_purexc_LDA_C_VWN_RPA": -0.0697,
        "eng_purexc_GGA_C_LYP": 0.6167,
        "eng_corr_MP2_OS": 0.5485,
    },
    "XYG5": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.9150,
        "eng_purexc_LDA_X": 0.0612,
        "eng_purexc_GGA_X_B88": 0.0238,
        "eng_purexc_LDA_C_VWN_RPA": 0,
        "eng_purexc_GGA_C_LYP": 0.4957,
        "eng_corr_MP2_OS": 0.4548,
        "eng_corr_MP2_SS": 0.2764,
    },
    "XYG6": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.9105,
        "eng_purexc_LDA_X": 0.1576,
        "eng_purexc_GGA_X_B88": -0.0681,
        "eng_purexc_LDA_C_VWN_RPA": 0.1800,
        "eng_purexc_GGA_C_LYP": 0.2244,
        "eng_corr_MP2_OS": 0.4695,
        "eng_corr_MP2_SS": 0.2426,
    },
    "XYG7": {
        "eng_noxc": 1,
        "eng_exx_HF": 0.8971,
        "eng_purexc_LDA_X": 0.2055,
        "eng_purexc_GGA_X_B88": -0.1408,
        "eng_purexc_LDA_C_VWN_RPA": 0.4056,
        "eng_purexc_GGA_C_LYP": 0.1159,
        "eng_corr_MP2_OS": 0.4052,
        "eng_corr_MP2_SS": 0.2589,
    },
    "XYG6+1/cr": {
        "eng_noxc": 1,
        "eng_exx_HF":                0.851546                 ,
        "eng_purexc_LDA_X":          0.209516                 ,
        "eng_purexc_GGA_X_B88":     -0.061062                 ,
        "eng_purexc_LDA_C_VWN_RPA":  0.168713                 ,
        "eng_purexc_GGA_C_LYP":      0.204008                 ,
        "eng_corr_MP2_OS":           0.460703 * (1 - 0.596938),
        "eng_corr_MP2_SS":           0.214325 * (1 - 0.596938),
        "eng_corr_MP2CR_OS":         0.460703 *      0.596938 ,
        "eng_corr_MP2CR_SS":         0.214325 *      0.596938 ,
    },
    "XYG6+1/sIEPA": {
        "eng_noxc": 1,
        "eng_exx_HF":                0.872368                 ,
        "eng_purexc_LDA_X":          0.297015                 ,
        "eng_purexc_GGA_X_B88":     -0.169383                 ,
        "eng_purexc_LDA_C_VWN_RPA":  0.334350                 ,
        "eng_purexc_GGA_C_LYP":      0.088983                 ,
        "eng_corr_MP2_OS":           0.479659 * (1 - 1.421052),
        "eng_corr_MP2_SS":          -0.004645 * (1 - 1.421052),
        "eng_corr_SIEPA_OS":         0.479659 *      1.421052 ,
        "eng_corr_SIEPA_SS":        -0.004645 *      1.421052 ,
    },
    "XYG6+1/IEPA": {
        "eng_noxc": 1,
        "eng_exx_HF":                0.716712                 ,
        "eng_purexc_LDA_X":          0.272940                 ,
        "eng_purexc_GGA_X_B88":      0.010348                 ,
        "eng_purexc_LDA_C_VWN_RPA":  0.359041                 ,
        "eng_purexc_GGA_C_LYP":      0.247560                 ,
        "eng_corr_MP2_OS":           0.286591 * (1 - 1.108974),
        "eng_corr_MP2_SS":           0.102393 * (1 - 1.108974),
        "eng_corr_IEPA_OS":          0.286591 *      1.108974 ,
        "eng_corr_IEPA_SS":          0.102393 *      1.108974 ,
    },
}

In [8]:
SET1 = [  # Basic properties and reaction energies for small systems
    "W4-11", "G21EA", "G21IP", "DIPCS10", "PA26", "SIE4x4",
    "ALKBDE10", "YBDE18", "AL2X6", "HEAVYSB11", "NBPRC", "ALK8",
    "RC21", "G2RC", "BH76RC", "FH51", "TAUT15", "DC13"]
SET2 = [  # Reaction energies for large systems and isomerisation reactions
    "MB16-43", "DARC", "RSE43", "BSR36", "CDIE20", "ISO34",
    "ISOL24", "C60ISO", "PArel"]
SET3 = [  # Reaction barrier heights
    "BH76", "BHPERI", "BHDIV10", "INV24", "BHROT27", "PX13", "WCPT18"]
SET4 = [  # Intermolecular noncovalent interactions
    "RG18", "ADIM6", "S22", "S66", "HEAVY28", "WATER27",
    "CARBHB12", "PNICO23", "HAL59", "AHB21", "CHB6", "IL16"]
SET5 = [  # Intramolecular noncovalent interactions
    "IDISP", "ICONF", "ACONF", "Amino20x4", "PCONF21", "MCONF",
    "SCONF", "UPU23", "BUT14DIOL"]
SET_ALL = SET1 + SET2 + SET3 + SET4 + SET5
SETS = [SET1, SET2, SET3, SET4, SET5]

## Useful functions

In [9]:
def err_wtmad2(para):
    r = eng_by_coeff(df_react_GMTKN55, para) * 627.51
    d = r - df_react_GMTKN55.loc["ref"]

    err = 0
    for dataset in d.index.levels[0]:
        err += 56.84 * d[dataset].abs().sum() / df_react_GMTKN55.loc["ref"][dataset].abs().mean()
    err /= df_react_GMTKN55.columns.size
    return err

In [10]:
def err_our(para, to_list=False):
    err_wt = err_wtmad2(para)
    ret = {"GMTKN55": err_wt}
    
    r = eng_by_coeff(df_react_MN15, para) * 627.51
    d = (r - df_react_MN15.loc["ref"]) / df_react_MN15.loc["bond"]

    for dataset in d.index.levels[0]:
        ret[dataset] = d[dataset].abs().mean()
    ret = pd.Series(ret)
    
    if to_list:
        return ret
    else:
        ret /= 25
        ret["GMTKN55"]     *= 12
        ret["MR-MGM-BE4"]  *= 3
        ret["MR-MGN-BE17"] *= 2
        ret["MR-TM-BE13"]  *= 3
        ret["SR-MGM-BE9"]  *= 2
        ret["SR-TM-BE17"]  *= 3
        return ret.sum()

In [11]:
def get_diagnose_gmtkn55(para):
    # GMTKN55
    SET1 = [  # Basic properties and reaction energies for small systems
        "W4-11", "G21EA", "G21IP", "DIPCS10", "PA26", "SIE4x4",
        "ALKBDE10", "YBDE18", "AL2X6", "HEAVYSB11", "NBPRC", "ALK8",
        "RC21", "G2RC", "BH76RC", "FH51", "TAUT15", "DC13"]
    SET2 = [  # Reaction energies for large systems and isomerisation reactions
        "MB16-43", "DARC", "RSE43", "BSR36", "CDIE20", "ISO34",
        "ISOL24", "C60ISO", "PArel"]
    SET3 = [  # Reaction barrier heights
        "BH76", "BHPERI", "BHDIV10", "INV24", "BHROT27", "PX13", "WCPT18"]
    SET4 = [  # Intermolecular noncovalent interactions
        "RG18", "ADIM6", "S22", "S66", "HEAVY28", "WATER27",
        "CARBHB12", "PNICO23", "HAL59", "AHB21", "CHB6", "IL16"]
    SET5 = [  # Intramolecular noncovalent interactions
        "IDISP", "ICONF", "ACONF", "Amino20x4", "PCONF21", "MCONF",
        "SCONF", "UPU23", "BUT14DIOL"]
    SET_ALL = SET1 + SET2 + SET3 + SET4 + SET5
    SETS = [SET1, SET2, SET3, SET4, SET5]
    
    r = eng_by_coeff(df_react_GMTKN55, para) * 627.51
    d = r - df_react_GMTKN55.loc["ref"]

    err_dict = {}
    for dataset in d.index.levels[0]:
        err_dict[dataset] = {}
        err_dict[dataset]["MAD"] = d[dataset].abs().mean()
        err_dict[dataset]["Delta"] = 56.84 * d[dataset].abs().sum() / df_react_GMTKN55.loc["ref"][dataset].abs().mean() / df_react_GMTKN55.columns.size
    for sub_name, sub in zip("Sub1 Sub2 Sub3 Sub4 Sub5 All".split(), SETS + [SET_ALL]):
        err, delta, n = 0, 0, 0
        for subset in sub:
            n += len(d[subset])
            err += err_dict[subset]["Delta"] * df_react_GMTKN55.columns.size
            delta += err_dict[subset]["Delta"]
        err /= n
        err_dict[sub_name] = {"MAD": err, "Delta": delta}
    return pd.DataFrame(err_dict).T

In [12]:
def get_diagnose(para, to_list=False):
    err_series = get_diagnose_gmtkn55(para)["MAD"]
    
    r = eng_by_coeff(df_react_MN15, para) * 627.51
    d = (r - df_react_MN15.loc["ref"]) / df_react_MN15.loc["bond"]

    ret = {}
    for dataset in d.index.levels[0]:
        ret[dataset] = d[dataset].abs().mean()
    ret = pd.concat([err_series, pd.Series(ret)])
    ret["Weighted"] = (
        + 12 * ret["All"]
        + 3 * ret["MR-MGM-BE4" ]
        + 2 * ret["MR-MGN-BE17"]
        + 3 * ret["MR-TM-BE13" ]
        + 2 * ret["SR-MGM-BE9" ]
        + 3 * ret["SR-TM-BE17" ]
    ) / 25
    
    return ret

In [13]:
v = pd.DataFrame({method: get_diagnose(para_exist[method]).iloc[:]
              for method in ["XYG3", "XYG6", "XYG7", "XYG6+1/cr", "XYG6+1/sIEPA", "XYG6+1/IEPA"]})

In [14]:
v.iloc[-12:].style.format("{:.2f}")

Unnamed: 0,XYG3,XYG6,XYG7,XYG6+1/cr,XYG6+1/sIEPA,XYG6+1/IEPA
Sub1,1.74,1.5,1.29,1.53,1.69,2.1
Sub2,4.7,2.45,2.38,2.54,3.97,4.53
Sub3,1.71,2.11,1.82,2.22,3.28,2.57
Sub4,5.15,2.83,2.86,3.38,3.7,6.97
Sub5,4.25,2.43,2.1,2.59,3.11,6.6
All,3.39,2.18,2.01,2.36,2.94,4.41
MR-MGM-BE4,16.93,22.85,17.81,3.24,3.62,2.62
MR-MGN-BE17,1.81,3.97,2.63,1.65,2.42,3.97
MR-TM-BE13,10.67,13.2,12.12,7.4,7.75,5.3
SR-MGM-BE9,1.59,1.78,2.55,1.97,1.87,2.03


In [15]:
pd.DataFrame({method: (eng_by_coeff(df_react_MN15, para_exist[method]) * 627.51 - df_react_MN15.loc["ref"]) / df_react_MN15.loc["bond"]
              for method in ["XYG3", "XYG6", "XYG7", "XYG6+1/cr", "XYG6+1/sIEPA", "XYG6+1/IEPA"]}).loc["SR-TM-BE17"].style.format("{:.2f}")


Unnamed: 0,XYG3,XYG6,XYG7,XYG6+1/cr,XYG6+1/sIEPA,XYG6+1/IEPA
1,1.4,3.42,3.43,1.6,5.75,1.47
10,0.1,1.61,0.89,-0.96,-0.84,-1.93
11,0.12,0.81,0.56,0.49,-0.44,0.04
12,25.16,26.43,27.3,23.81,26.57,23.07
13,-1.99,-4.3,-3.33,-4.6,-8.32,-3.79
14,23.14,27.1,19.22,0.34,-0.21,-0.03
15,1.23,4.18,4.35,2.01,0.8,-0.04
16,0.29,4.51,4.62,1.86,0.21,-1.86
17,9.3,12.17,13.86,12.05,13.53,12.69
2,2.27,3.63,0.51,3.28,1.43,1.76
