In [1]:
import pandas as pd
import json
import numpy as np
import scipy
import scipy.optimize
import pickle
# from multiprocessing.pool import ThreadPool as Pool
from concurrent.futures import ProcessPoolExecutor as Pool

In [2]:
def eng_by_coeff(df, coeff):
    res = None
    for entry, val in coeff.items():
        if res is None: res = val * df.loc[entry]
        else: res += val * df.loc[entry]
    return res

In [3]:
def str_arr(arr):
    return "[" + ", ".join([f"{i:.6f}" for i in arr]) + "]"

## Read data

In [4]:
with open("../MN15_REF.json") as f:
    MN15_REF = json.load(f)
with open("../GMTKN55_REF.json") as f:
    GMTKN55_REF = json.load(f)

In [5]:
df_GMTKN55_b3lyp = pd.read_csv("../GMTKN55_b3lyp.csv", header=[0, 1], index_col=[0])
df_MN15_b3lyp = pd.read_csv("../MN15_b3lyp.csv", header=[0, 1], index_col=[0])

In [6]:
df_react_GMTKN55 = pd.DataFrame(
    index=list(df_GMTKN55_b3lyp.index) + ["ref"],
    columns=pd.MultiIndex.from_tuples([v.split("/") for v in GMTKN55_REF.keys()]),
    data=0.0)
for entry, info in GMTKN55_REF.items():
    dataset, idx = entry.split("/")
    for syst, stoi in zip(info["systems"], info["stoichiometry"]):
        dat, syst = syst.split("/")
        df_react_GMTKN55[(dataset, idx)] += stoi * df_GMTKN55_b3lyp[dat][syst]
        df_react_GMTKN55.loc["ref", (dataset, idx)] = info["ref"]

In [7]:
df_react_MN15 = pd.DataFrame(
    index=list(df_MN15_b3lyp.index) + ["ref", "bond"],
    columns=pd.MultiIndex.from_tuples([v.split("/") for v in MN15_REF.keys()]),
    data=0.0)
for entry, info in MN15_REF.items():
    dataset, idx = entry.split("/")
    for syst, stoi in zip(info["systems"], info["stoichiometry"]):
        df_react_MN15[(dataset, idx)] += stoi * df_MN15_b3lyp[dataset][syst]
        df_react_MN15.loc["ref", (dataset, idx)] = info["ref"]
        df_react_MN15.loc["bond", (dataset, idx)] = info["bond"]

## Useful functions

In [8]:
def err_wtmad2(para):
    r = eng_by_coeff(df_react_GMTKN55, para) * 627.51
    d = r - df_react_GMTKN55.loc["ref"]

    err = 0
    for dataset in d.index.levels[0]:
        err += 56.84 * d[dataset].abs().sum() / df_react_GMTKN55.loc["ref"][dataset].abs().mean()
    err /= df_react_GMTKN55.columns.size
    return err

In [9]:
def err_our(para, to_list=False):
    err_wt = err_wtmad2(para)
    ret = {"GMTKN55": err_wt}
    
    r = eng_by_coeff(df_react_MN15, para) * 627.51
    d = (r - df_react_MN15.loc["ref"]) / df_react_MN15.loc["bond"]

    for dataset in d.index.levels[0]:
        ret[dataset] = d[dataset].abs().mean()
    ret = pd.Series(ret)
    
    if to_list:
        return ret
    else:
        ret /= 20
        ret["GMTKN55"]     *= 8
        ret["MR-MGM-BE4"]  *= 4
        ret["MR-MGN-BE17"] *= 1
        ret["MR-TM-BE13"]  *= 4
        ret["SR-MGM-BE9"]  *= 1
        ret["SR-TM-BE17"]  *= 2
        return ret.sum()

## Confined Optimization: XYG7

In [10]:
def gen_para_xyg7(c_x):
    def inner(arr):
        c_s, c_b88, c_vwn, c_lyp, c_os, c_ss = arr
        return {
            "eng_noxc": 1,
            "eng_exx_HF": c_x,
            "eng_purexc_LDA_X": c_s,
            "eng_purexc_GGA_X_B88": c_b88,
            "eng_purexc_LDA_C_VWN_RPA": c_vwn,
            "eng_purexc_GGA_C_LYP": c_lyp,
            "eng_corr_MP2_OS": c_os,
            "eng_corr_MP2_SS": c_ss,
        }
    return inner

In [11]:
def opt_para_xyg7(c_x=0.8865, arr=None):
    err_min = 1000
    res_min = None
    
    if arr is None:
        arr = [0.2162, -0.1432, 0.4658, 0.0739, 0.3870, 0.2595]
    
    for epoch in range(20):
        arr = np.array(arr)
        arr += 0.5 * np.random.random(len(arr)) - 0.25
        obj = lambda arr: err_wtmad2(gen_para_xyg7(c_x)(arr))
        res = scipy.optimize.minimize(
            obj, arr,
            method="L-BFGS-B",
            options={"ftol": 1e-5},
        )
        print(str_arr([c_x] + list(res.x)), f"{res.fun:.6f}")
        if res.fun < err_min:
            res_min = res
    return res_min

In [12]:
np.random.seed(0)
lst_c_x = np.arange(1.0, -0.025, -0.025)

def get_res(c_x):
    res = opt_para_xyg7(c_x)
    arr = list(res.x)
    print("[Final]", str_arr([c_x] + arr), f"{res.fun:.6f}")
    return res

In [13]:
with Pool(16) as pool:
    lst_res = pool.map(get_res, lst_c_x)

[0.800000, 0.278851, -0.120380, 0.498073, 0.059383, 0.344498, 0.196412] 2.453437
[0.925000, 0.183834, -0.146952, 0.448370, 0.091511, 0.406580, 0.278926] 2.039592
[0.825000, 0.257480, -0.124545, 0.486439, 0.072777, 0.352406, 0.218989] 2.233693
[0.875000, 0.219775, -0.134674, 0.462494, 0.081157, 0.381051, 0.254684] 1.965052
[0.850000, 0.233350, -0.127540, 0.481256, 0.089705, 0.358106, 0.245224] 2.055104
[0.725000, 0.439822, -0.215572, 0.726890, -0.241552, 0.319102, 0.114699] 3.234033
[0.775000, 0.367538, -0.189185, 0.642242, -0.137861, 0.341297, 0.156361] 2.674064
[0.625000, 0.578304, -0.266189, 0.913705, -0.454651, 0.260460, 0.059849] 4.518007
[0.700000, 0.478806, -0.233721, 0.785820, -0.308738, 0.301162, 0.106749] [0.900000, 0.200043, -0.137740, 0.447070, 0.087306, 0.396485, 0.269629]3.539134
 1.966164
[0.675000, 0.516275, -0.249523, 0.838449, -0.369356, 0.287083, 0.090022] 3.855691
[0.750000, 0.399488, -0.197339, 0.678356, -0.181393, 0.329092, 0.137846] 2.942294
[1.000000, 0.058932, -

In [14]:
conf_para_xyg7 = {np.round(c_x, 4): res for c_x, res in zip(lst_c_x, lst_res)}

In [15]:
with open("conf_gmtkn55_xyg7.dat", "wb") as f:
    pickle.dump(conf_para_xyg7, f)

In [16]:
conf_data_xyg7 = {}
for c_x in conf_para_xyg7:
    conf_data_xyg7[c_x] = err_our(gen_para_xyg7(c_x)(conf_para_xyg7[c_x].x), True)

In [17]:
pd.DataFrame.from_dict(conf_data_xyg7).to_csv("conf_gmtkn55_xyg7.csv")

## Confined Optimization: XYG6+1/cr

In [18]:
def gen_para_xyg7(c_x):
    def inner(arr):
        c_b88, c_vwn, c_lyp, c_os, c_ss, c_cr = arr
        return {
            "eng_noxc": 1,
            "eng_exx_HF": c_x,
            "eng_purexc_LDA_X": (1 - c_x - c_b88),
            "eng_purexc_GGA_X_B88": c_b88,
            "eng_purexc_LDA_C_VWN_RPA": c_vwn,
            "eng_purexc_GGA_C_LYP": c_lyp,
            "eng_corr_MP2_OS": c_os * (1 - c_cr),
            "eng_corr_MP2_SS": c_ss * (1 - c_cr),
            "eng_corr_MP2CR_OS": c_os * c_cr,
            "eng_corr_MP2CR_SS": c_ss * c_cr,
        }
    return inner

In [19]:
def opt_para_xyg7(c_x=0.851546, arr=None):
    err_min = 1000
    res_min = None
    
    if arr is None:
        arr = [-0.061062, 0.168713, 0.204008, 0.460703, 0.214325, 0.596938]
    
    for epoch in range(1):
        arr = np.array(arr)
        # arr += 0.5 * np.random.random(len(arr)) - 0.25
        obj = lambda arr: err_wtmad2(gen_para_xyg7(c_x)(arr))
        res = scipy.optimize.minimize(
            obj, arr,
            method="L-BFGS-B",
            options={"ftol": 1e-5},
        )
        print(str_arr([c_x] + list(res.x)), f"{res.fun:.6f}")
        if res.fun < err_min:
            res_min = res
    return res_min

In [20]:
conf_para_cr = {}
arr = None
for c_cr in np.arange(1.0, -0.025, -0.025):
    res = opt_para_xyg7(c_cr, arr)
    arr = list(res.x)
    print(str_arr([c_cr] + arr), f"{res.fun:.6f}")
    conf_para_cr[np.round(c_cr, 4)] = res

[1.000000, -0.053406, 0.142552, 0.274302, 0.527598, 0.344829, 0.386098] 2.584923
[1.000000, -0.053406, 0.142552, 0.274302, 0.527598, 0.344829, 0.386098] 2.584923
[0.975000, -0.047688, 0.161956, 0.247582, 0.517211, 0.343534, 0.423784] 2.353847
[0.975000, -0.047688, 0.161956, 0.247582, 0.517211, 0.343534, 0.423784] 2.353847
[0.950000, -0.041845, 0.158935, 0.241465, 0.510884, 0.328622, 0.434011] 2.165190
[0.950000, -0.041845, 0.158935, 0.241465, 0.510884, 0.328622, 0.434011] 2.165190
[0.925000, -0.033588, 0.156219, 0.236978, 0.509610, 0.311667, 0.466611] 2.036347
[0.925000, -0.033588, 0.156219, 0.236978, 0.509610, 0.311667, 0.466611] 2.036347
[0.900000, -0.025607, 0.164032, 0.239456, 0.490762, 0.299802, 0.472557] 1.983986
[0.900000, -0.025607, 0.164032, 0.239456, 0.490762, 0.299802, 0.472557] 1.983986
[0.875000, -0.030220, 0.207381, 0.173175, 0.482996, 0.308543, 0.602752] 2.011533
[0.875000, -0.030220, 0.207381, 0.173175, 0.482996, 0.308543, 0.602752] 2.011533
[0.850000, -0.021924, 0.2085

In [21]:
with open("conf_gmtkn55_cr.dat", "wb") as f:
    pickle.dump(conf_para_cr, f)

In [22]:
conf_data_xyg7 = {}
for c_x in conf_para_cr:
    conf_data_xyg7[c_x] = err_our(gen_para_xyg7(c_x)(conf_para_cr[c_x].x), True)

In [23]:
pd.DataFrame.from_dict(conf_data_xyg7).to_csv("conf_gmtkn55_cr.csv")