In [1]:
import pandas as pd
import json
import numpy as np
import scipy
import scipy.optimize
import pickle

In [2]:
import matplotlib as mpl
from matplotlib import pyplot as plt
from matplotlib_inline.backend_inline import set_matplotlib_formats

set_matplotlib_formats("svg")

In [3]:
def eng_by_coeff(df, coeff):
    res = None
    for entry, val in coeff.items():
        if res is None: res = val * df.loc[entry]
        else: res += val * df.loc[entry]
    return res

In [4]:
def str_arr(arr):
    return "[" + ", ".join([f"{i:.6f}" for i in arr]) + "]"

## Read data

In [5]:
with open("../MN15_REF.json") as f:
    MN15_REF = json.load(f)
with open("../GMTKN55_REF.json") as f:
    GMTKN55_REF = json.load(f)

In [6]:
df_GMTKN55_b3lyp = pd.read_csv("../GMTKN55_b3lyp.csv", header=[0, 1], index_col=[0])
df_MN15_b3lyp = pd.read_csv("../MN15_b3lyp.csv", header=[0, 1], index_col=[0])

In [7]:
df_react_GMTKN55 = pd.DataFrame(
    index=list(df_GMTKN55_b3lyp.index) + ["ref"],
    columns=pd.MultiIndex.from_tuples([v.split("/") for v in GMTKN55_REF.keys()]),
    data=0.0)
for entry, info in GMTKN55_REF.items():
    dataset, idx = entry.split("/")
    for syst, stoi in zip(info["systems"], info["stoichiometry"]):
        dat, syst = syst.split("/")
        df_react_GMTKN55[(dataset, idx)] += stoi * df_GMTKN55_b3lyp[dat][syst]
        df_react_GMTKN55.loc["ref", (dataset, idx)] = info["ref"]

In [8]:
df_react_MN15 = pd.DataFrame(
    index=list(df_MN15_b3lyp.index) + ["ref", "bond"],
    columns=pd.MultiIndex.from_tuples([v.split("/") for v in MN15_REF.keys()]),
    data=0.0)
for entry, info in MN15_REF.items():
    dataset, idx = entry.split("/")
    for syst, stoi in zip(info["systems"], info["stoichiometry"]):
        df_react_MN15[(dataset, idx)] += stoi * df_MN15_b3lyp[dataset][syst]
        df_react_MN15.loc["ref", (dataset, idx)] = info["ref"]
        df_react_MN15.loc["bond", (dataset, idx)] = info["bond"]

## Useful functions

In [9]:
def err_wtmad2(para):
    r = eng_by_coeff(df_react_GMTKN55, para) * 627.51
    d = r - df_react_GMTKN55.loc["ref"]

    err = 0
    for dataset in d.index.levels[0]:
        err += 56.84 * d[dataset].abs().sum() / df_react_GMTKN55.loc["ref"][dataset].abs().mean()
    err /= df_react_GMTKN55.columns.size
    return err

In [10]:
def err_our(para, to_list=False):
    err_wt = err_wtmad2(para)
    ret = {"GMTKN55": err_wt}
    
    r = eng_by_coeff(df_react_MN15, para) * 627.51
    d = (r - df_react_MN15.loc["ref"]) / df_react_MN15.loc["bond"]

    for dataset in d.index.levels[0]:
        ret[dataset] = d[dataset].abs().mean()
    ret = pd.Series(ret)
    
    if to_list:
        return ret
    else:
        ret /= 25
        ret["GMTKN55"]     *= 12
        ret["MR-MGM-BE4"]  *= 3
        ret["MR-MGN-BE17"] *= 2
        ret["MR-TM-BE13"]  *= 3
        ret["SR-MGM-BE9"]  *= 2
        ret["SR-TM-BE17"]  *= 3
        return ret.sum()

## Confined Optimization: XYG7

In [11]:
def gen_para_xyg7(c_x):
    def inner(arr):
        c_s, c_b88, c_vwn, c_lyp, c_os, c_ss = arr
        return {
            "eng_noxc": 1,
            "eng_exx_HF": c_x,
            "eng_purexc_LDA_X": c_s,
            "eng_purexc_GGA_X_B88": c_b88,
            "eng_purexc_LDA_C_VWN_RPA": c_vwn,
            "eng_purexc_GGA_C_LYP": c_lyp,
            "eng_corr_MP2_OS": c_os,
            "eng_corr_MP2_SS": c_ss,
        }
    return inner

In [12]:
def opt_para_xyg7(c_x=0.8865, arr=None):
    if arr is None:
        arr = [0.2162, -0.1432, 0.4658, 0.0739, 0.3870, 0.2595]
    obj = lambda arr: err_our(gen_para_xyg7(c_x)(arr))
    res = scipy.optimize.minimize(
        obj,
        arr,
        method="L-BFGS-B",
        options={"ftol": 1e-5},
        # callback=lambda arr: print(str_arr(arr), f"{obj(arr):.6f}"),
    )
    return res

In [13]:
conf_para_xyg7 = {}
arr = None
for c_x in np.arange(1.0, -0.025, -0.025):
    res = opt_para_xyg7(c_x, arr)
    arr = list(res.x)
    print(str_arr([c_x] + arr), f"{res.fun:.6f}")
    conf_para_xyg7[np.round(c_x, 4)] = res

[1.000000, 0.122237, -0.151589, 0.555825, 0.109334, 0.352026, 0.351220] 5.847635
[0.975000, 0.135955, -0.139300, 0.555839, 0.109532, 0.338101, 0.346475] 5.620618
[0.950000, 0.148575, -0.128877, 0.556020, 0.110043, 0.327160, 0.341942] 5.409501
[0.925000, 0.162179, -0.116822, 0.557886, 0.110923, 0.315875, 0.334667] 5.216930
[0.900000, 0.173576, -0.103339, 0.558688, 0.110486, 0.305956, 0.327599] 5.049259
[0.875000, 0.201073, -0.113033, 0.575431, 0.121706, 0.300825, 0.279868] 4.894492
[0.850000, 0.214969, -0.099586, 0.573818, 0.119523, 0.287424, 0.274823] 4.785523
[0.825000, 0.227997, -0.084729, 0.573160, 0.116847, 0.274062, 0.266654] 4.706016
[0.800000, 0.324820, -0.149762, 0.618915, -0.025686, 0.296575, 0.169365] 4.629377
[0.775000, 0.336439, -0.133946, 0.621553, -0.026465, 0.283670, 0.162919] 4.584243
[0.750000, 0.342393, -0.125022, 0.694291, -0.030397, 0.248939, 0.172543] 4.546588
[0.725000, 0.356244, -0.110194, 0.693691, -0.033452, 0.232672, 0.164674] 4.533629
[0.700000, 0.369191, -0.

In [14]:
with open("conf_data_xyg7.dat", "wb") as f:
    pickle.dump(conf_para_xyg7, f)

In [15]:
conf_data_xyg7 = {}
for c_x in conf_para_xyg7:
    conf_data_xyg7[c_x] = err_our(gen_para_xyg7(c_x)(conf_para_xyg7[c_x].x), True)

In [16]:
pd.DataFrame.from_dict(conf_data_xyg7).to_csv("conf_data_xyg7.csv")

## Confined Optimization: XYG6

In [17]:
def gen_para_xyg6(c_x):
    def inner(arr):
        c_b88, c_vwn, c_lyp, c_os, c_ss = arr
        return {
            "eng_noxc": 1,
            "eng_exx_HF": c_x,
            "eng_purexc_LDA_X": (1 - c_x - c_b88),
            "eng_purexc_GGA_X_B88": c_b88,
            "eng_purexc_LDA_C_VWN_RPA": c_vwn,
            "eng_purexc_GGA_C_LYP": c_lyp,
            "eng_corr_MP2_OS": c_os,
            "eng_corr_MP2_SS": c_ss,
        }
    return inner

In [18]:
def opt_para_xyg6(c_x=0.9105, arr=None):
    if arr is None:
        arr = [-0.0681, 0.1800, 0.2244, 0.4695, 0.2426]
    obj = lambda arr: err_our(gen_para_xyg6(c_x)(arr))
    res = scipy.optimize.minimize(
        obj, arr,
        method="L-BFGS-B",
        options={"ftol": 1e-5},
        # callback=lambda arr: print(str_arr(arr), f"{obj(arr):.6f}"),
    )
    return res

In [19]:
conf_para_xyg6 = {}
arr = None
for c_x in np.arange(1.0, -0.025, -0.025):
    res = opt_para_xyg6(c_x, arr)
    arr = list(res.x)
    print(str_arr([c_x] + arr), f"{res.fun:.6f}")
    conf_para_xyg6[np.round(c_x, 4)] = res

[1.000000, -0.106249, 0.285683, 0.300842, 0.402685, 0.247980] 5.911693
[0.975000, -0.092117, 0.293791, 0.289581, 0.385723, 0.263278] 5.675730
[0.950000, -0.078572, 0.310679, 0.282601, 0.366358, 0.266698] 5.470618
[0.925000, -0.070641, 0.308039, 0.278115, 0.355115, 0.259231] 5.289081
[0.900000, -0.063178, 0.304254, 0.273078, 0.342250, 0.251313] 5.136626
[0.875000, -0.055839, 0.300590, 0.268225, 0.329270, 0.243395] 5.014354
[0.850000, -0.048165, 0.296988, 0.263313, 0.316566, 0.235085] 4.923973
[0.825000, -0.046034, 0.330949, 0.279094, 0.310515, 0.180023] 4.805698
[0.800000, -0.039277, 0.326794, 0.274029, 0.297467, 0.172381] 4.744372
[0.775000, -0.032893, 0.323105, 0.269339, 0.284234, 0.164492] 4.705203
[0.750000, -0.026310, 0.319704, 0.264776, 0.271008, 0.156439] 4.695089
[0.725000, -0.013897, 0.347722, 0.282465, 0.258483, 0.120011] 4.673013
[0.700000, -0.008096, 0.344201, 0.277997, 0.244976, 0.112082] 4.697798
[0.675000, 0.005863, 0.375279, 0.278066, 0.218393, 0.112935] 4.725733
[0.6500

In [20]:
with open("conf_data_xyg6.dat", "wb") as f:
    pickle.dump(conf_para_xyg6, f)

In [21]:
conf_data_xyg6 = {}
for c_x in conf_para_xyg6:
    conf_data_xyg6[c_x] = err_our(gen_para_xyg6(c_x)(conf_para_xyg6[c_x].x), True)

In [22]:
pd.DataFrame.from_dict(conf_data_xyg6).to_csv("conf_data_xyg6.csv")

## Confined Optimization: XYG6+1/cr

In [23]:
def gen_para_cr(c_x):
    def inner(arr):
        c_b88, c_vwn, c_lyp, c_os, c_ss, c_cr = arr
        return {
            "eng_noxc": 1,
            "eng_exx_HF": c_x,
            "eng_purexc_LDA_X": (1 - c_x - c_b88),
            "eng_purexc_GGA_X_B88": c_b88,
            "eng_purexc_LDA_C_VWN_RPA": c_vwn,
            "eng_purexc_GGA_C_LYP": c_lyp,
            "eng_corr_MP2_OS": c_os * (1 - c_cr),
            "eng_corr_MP2_SS": c_ss * (1 - c_cr),
            "eng_corr_MP2CR_OS": c_os * c_cr,
            "eng_corr_MP2CR_SS": c_ss * c_cr,
        }
    return inner

In [24]:
def opt_para_cr(c_x=0.9105, arr=None):
    if arr is None:
        arr = [-0.0681, 0.1800, 0.2244, 0.4695, 0.2426, 0.7]
    obj = lambda arr: err_our(gen_para_cr(c_x)(arr))
    res = scipy.optimize.minimize(
        obj, arr,
        method="L-BFGS-B",
        options={"ftol": 1e-5},
        # callback=lambda arr: print(str_arr(arr), f"{obj(arr):.6f}"),
    )
    return res

In [25]:
conf_para_cr = {}
arr = None
for c_x in np.arange(1.0, -0.025, -0.025):
    res = opt_para_cr(c_x, arr)
    arr = list(res.x)
    print(str_arr([c_x] + arr), f"{res.fun:.6f}")
    conf_para_cr[np.round(c_x, 4)] = res

[1.000000, -0.105054, 0.147894, 0.205046, 0.546377, 0.293383, 0.605749] 3.769109
[0.975000, -0.110421, 0.155148, 0.206402, 0.543845, 0.246439, 0.608580] 3.593461
[0.950000, -0.101939, 0.151767, 0.200794, 0.531640, 0.239918, 0.609755] 3.477849
[0.925000, -0.092276, 0.149658, 0.195624, 0.518154, 0.229873, 0.608552] 3.386036
[0.900000, -0.085227, 0.147562, 0.191582, 0.506099, 0.222763, 0.610488] 3.329912
[0.875000, -0.075870, 0.148293, 0.188383, 0.492097, 0.211537, 0.609088] 3.297631
[0.850000, -0.053983, 0.200118, 0.199698, 0.458730, 0.203543, 0.595083] 3.251873
[0.825000, -0.100298, 0.332414, 0.032837, 0.445987, 0.173146, 0.589250] 3.263034
[0.800000, -0.092279, 0.328825, 0.027647, 0.433804, 0.165962, 0.590041] 3.316843
[0.775000, -0.085175, 0.326107, 0.023356, 0.421467, 0.158723, 0.591194] 3.405223
[0.750000, -0.077414, 0.324597, 0.019707, 0.408975, 0.150318, 0.591930] 3.517529
[0.725000, -0.057852, 0.368064, 0.043438, 0.381910, 0.116799, 0.571747] 3.582895
[0.700000, -0.050303, 0.3640

In [26]:
with open("conf_data_cr.dat", "wb") as f:
    pickle.dump(conf_para_cr, f)

In [27]:
conf_data_cr = {}
for c_x in conf_para_cr:
    conf_data_cr[c_x] = err_our(gen_para_cr(c_x)(conf_para_cr[c_x].x), True)

In [28]:
pd.DataFrame.from_dict(conf_data_cr).to_csv("conf_data_cr.csv")

## Confined Optimization: XYG6+1/SIEPA

In [29]:
def gen_para_siepa(c_x):
    def inner(arr):
        c_b88, c_vwn, c_lyp, c_os, c_ss, c_cr = arr
        return {
            "eng_noxc": 1,
            "eng_exx_HF": c_x,
            "eng_purexc_LDA_X": (1 - c_x - c_b88),
            "eng_purexc_GGA_X_B88": c_b88,
            "eng_purexc_LDA_C_VWN_RPA": c_vwn,
            "eng_purexc_GGA_C_LYP": c_lyp,
            "eng_corr_MP2_OS": c_os * (1 - c_cr),
            "eng_corr_MP2_SS": c_ss * (1 - c_cr),
            "eng_corr_SIEPA_OS": c_os * c_cr,
            "eng_corr_SIEPA_SS": c_ss * c_cr,
        }
    return inner

In [30]:
def opt_para_siepa(c_x=0.9105, arr=None):
    if arr is None:
        arr = [-0.0681, 0.1800, 0.2244, 0.4695, 0.2426, 0.7]
    obj = lambda arr: err_our(gen_para_siepa(c_x)(arr))
    res = scipy.optimize.minimize(
        obj, arr,
        method="L-BFGS-B",
        options={"ftol": 1e-5},
        # callback=lambda arr: print(str_arr(arr), f"{obj(arr):.6f}"),
    )
    return res

In [31]:
conf_para_siepa = {}
arr = None
for c_x in np.arange(1.0, -0.025, -0.025):
    res = opt_para_siepa(c_x, arr)
    arr = list(res.x)
    print(str_arr([c_x] + arr), f"{res.fun:.6f}")
    conf_para_siepa[np.round(c_x, 4)] = res

[1.000000, -0.147379, 0.252347, 0.188557, 0.503563, 0.159285, 1.511444] 4.411659
[0.975000, -0.141300, 0.248645, 0.183956, 0.491953, 0.152927, 1.511495] 4.251011
[0.950000, -0.134217, 0.245194, 0.179246, 0.480588, 0.146109, 1.511071] 4.102870
[0.925000, -0.125576, 0.242467, 0.174504, 0.469471, 0.138647, 1.509774] 3.979009
[0.900000, -0.117477, 0.241495, 0.171103, 0.457602, 0.130961, 1.507123] 3.880078
[0.875000, -0.109447, 0.241415, 0.168360, 0.445676, 0.122944, 1.504322] 3.811963
[0.850000, -0.106622, 0.277540, 0.181282, 0.429681, 0.061666, 1.400831] 3.697445
[0.825000, -0.101496, 0.274119, 0.177097, 0.417890, 0.055800, 1.401331] 3.674202
[0.800000, -0.094441, 0.272294, 0.173134, 0.406595, 0.049318, 1.400198] 3.695175
[0.775000, -0.081052, 0.278377, 0.170780, 0.395144, 0.039366, 1.391815] 3.734475
[0.750000, -0.061778, 0.307697, 0.173948, 0.375801, 0.028858, 1.357336] 3.768790
[0.725000, -0.056232, 0.304323, 0.168969, 0.362350, 0.024259, 1.356517] 3.859109
[0.700000, -0.040222, 0.3177

In [32]:
with open("conf_data_siepa.dat", "wb") as f:
    pickle.dump(conf_para_siepa, f)

In [33]:
conf_data_siepa = {}
for c_x in conf_para_siepa:
    conf_data_siepa[c_x] = err_our(gen_para_siepa(c_x)(conf_para_siepa[c_x].x), True)

In [34]:
pd.DataFrame.from_dict(conf_data_siepa).to_csv("conf_data_siepa.csv")

## Confined Optimization: XYG6+1/cr of cr

In [35]:
def gen_para_cr(c_cr):
    def inner(arr):
        c_x, c_b88, c_vwn, c_lyp, c_os, c_ss = arr
        return {
            "eng_noxc": 1,
            "eng_exx_HF": c_x,
            "eng_purexc_LDA_X": (1 - c_x - c_b88),
            "eng_purexc_GGA_X_B88": c_b88,
            "eng_purexc_LDA_C_VWN_RPA": c_vwn,
            "eng_purexc_GGA_C_LYP": c_lyp,
            "eng_corr_MP2_OS": c_os * (1 - c_cr),
            "eng_corr_MP2_SS": c_ss * (1 - c_cr),
            "eng_corr_MP2CR_OS": c_os * c_cr,
            "eng_corr_MP2CR_SS": c_ss * c_cr,
        }
    return inner

In [36]:
def opt_para_cr(c_x=0.7, arr=None):
    if arr is None:
        arr = [0.9105, -0.0681, 0.1800, 0.2244, 0.4695, 0.2426]
    obj = lambda arr: err_our(gen_para_cr(c_x)(arr))
    res = scipy.optimize.minimize(
        obj, arr,
        method="L-BFGS-B",
        options={"ftol": 1e-5},
        # callback=lambda arr: print(str_arr(arr), f"{obj(arr):.6f}"),
    )
    return res

In [37]:
conf_para_cr = {}
arr = None
for c_cr in np.arange(1.0, -0.025, -0.025):
    res = opt_para_cr(c_cr, arr)
    arr = list(res.x)
    print(str_arr(arr + [c_cr]), f"{res.fun:.6f}")
    conf_para_cr[np.round(c_cr, 4)] = res

[0.849814, -0.012982, 0.174025, 0.184535, 0.480909, 0.323224, 1.000000] 4.303027
[0.850833, -0.012588, 0.173705, 0.184155, 0.480121, 0.322866, 0.975000] 4.201244
[0.855036, -0.014727, 0.173539, 0.184293, 0.480972, 0.323307, 0.950000] 4.099764
[0.855590, -0.015044, 0.173452, 0.184279, 0.480697, 0.323263, 0.925000] 3.999136
[0.856588, -0.015415, 0.173249, 0.184184, 0.479917, 0.322898, 0.900000] 3.899493
[0.855790, -0.039518, 0.177594, 0.166757, 0.491137, 0.274188, 0.875000] 3.810438
[0.850509, -0.045839, 0.197284, 0.165893, 0.481740, 0.248414, 0.850000] 3.741138
[0.851926, -0.048050, 0.197503, 0.165490, 0.480445, 0.247973, 0.825000] 3.686981
[0.857339, -0.054197, 0.201053, 0.163726, 0.481436, 0.243584, 0.800000] 3.634123
[0.857770, -0.053871, 0.200993, 0.162427, 0.481353, 0.242392, 0.775000] 3.582632
[0.858698, -0.115841, 0.309491, -0.003018, 0.494983, 0.212968, 0.750000] 3.513272
[0.860305, -0.117449, 0.311665, -0.000785, 0.490667, 0.212852, 0.725000] 3.463531
[0.860856, -0.117193, 0.31

In [38]:
with open("conf_data_cr_of_cr.dat", "wb") as f:
    pickle.dump(conf_para_cr, f)

In [39]:
conf_data_cr = {}
for c_cr in conf_para_cr:
    conf_data_cr[c_cr] = err_our(gen_para_cr(c_cr)(conf_para_cr[c_cr].x), True)

In [40]:
pd.DataFrame.from_dict(conf_data_cr).to_csv("conf_data_cr_of_cr.csv")