In [1]:
import warnings
import sys
from pyscf import data

warnings.filterwarnings("ignore")

In [2]:
import numpy as np
import pandas as pd
import h5py
from atom_rdf_pack.atom_rdf import get_rmsd_error, rad
from matplotlib import pyplot as plt

pd.set_option('display.max_rows', None)

In [3]:
atom_config = ["B+1", "B+3", "C+2", "C+4", "N+3", "N+5", "O+4", "O+6", "F+5", "F+7", "Ne+0", "Ne+6", "Ne+8"]
atom_config_with_be = atom_config + ["Be+0"]

In [4]:
atom_cfg = [(l, l.split("+")[0], int(l.split("+")[1])) for l in atom_config]
atom_cfg_with_be = [(l, l.split("+")[0], int(l.split("+")[1])) for l in atom_config_with_be]

In [5]:
def h5group_to_dict(grp):
    dct = {}
    for key, val in grp.items():
        if isinstance(val, h5py.Dataset):
            dct[key] = val[()]
        else:
            dct[key] = h5group_to_dict(val)
    return dct

In [6]:
def get_rdf_error(dct1, dct2):
    return pd.Series({key: get_rmsd_error(dct1[key], dct2[key], scale=key) for key in ["RHO", "GRD", "LR"]}) / data.nist.BOHR

## Load Data

In [7]:
%%time
with h5py.File(f"rdf.h5", "r") as f:
    dict_ref = h5group_to_dict(f["CCSD"]["aug-cc-pwcv5z"])

CPU times: user 1.64 ms, sys: 12.9 ms, total: 14.6 ms
Wall time: 32.4 ms


In [8]:
%%time
with h5py.File(f"rdf-low.h5", "r") as f:
    dict_low = h5group_to_dict(f)

CPU times: user 1.48 s, sys: 1.59 s, total: 3.07 s
Wall time: 6.74 s


In [9]:
%%time
with h5py.File(f"rdf-dh.h5", "r") as f:
    dict_dh = h5group_to_dict(f)

CPU times: user 416 ms, sys: 536 ms, total: 952 ms
Wall time: 2.12 s


## RDF Error analysis

In [10]:
def get_tab_err(dat, ref):
    dct = {}
    for atom in atom_config_with_be:
        dct[atom] = get_rdf_error(dat[atom], ref[atom])
    return pd.DataFrame(dct)

In [11]:
column = pd.MultiIndex.from_tuples([(task, atom) for task in ["RHO", "GRD", "LR"] for atom in atom_config_with_be])

In [12]:
%%time
dct = {}
column = pd.MultiIndex.from_tuples([(task, atom) for task in ["RHO", "GRD", "LR"] for atom in atom_config_with_be])
for xc in dict_low:
    err = get_tab_err(dict_low[xc]["aug-cc-pwcv5z"], dict_ref)
    dct[xc] = pd.Series(np.array(err).reshape(-1), index=column)
tab_full_low = pd.DataFrame(dct).T
tab_full_low.to_csv("Table-RDF-low.csv")

CPU times: user 397 ms, sys: 1.96 ms, total: 399 ms
Wall time: 401 ms


In [13]:
%%time
dct = {}
column = pd.MultiIndex.from_tuples([(task, atom) for task in ["RHO", "GRD", "LR"] for atom in atom_config_with_be])
for xc in dict_low:
    err = get_tab_err(dict_low[xc]["aug-cc-pv5z"], dict_ref)
    dct[xc] = pd.Series(np.array(err).reshape(-1), index=column)
tab_full_pv5z_low = pd.DataFrame(dct).T
tab_full_pv5z_low.to_csv("Table-RDF-low-pv5z.csv")

CPU times: user 398 ms, sys: 825 µs, total: 399 ms
Wall time: 401 ms


In [14]:
pd.DataFrame({
    "RHO": tab_full_low["RHO"].T.max(),
    "GRD": tab_full_low["GRD"].T.max(),
    "LR": tab_full_low["LR"].T.max(),
})

Unnamed: 0,RHO,GRD,LR
APFD,1.666953,1.780747,1.812968
B1B95,1.618496,1.942107,2.033329
B3LYP,2.122773,1.712187,1.905967
B3LYP*,2.445838,1.706096,1.823662
B3LYPV1R,2.108666,1.713396,1.907803
B3P86,1.701735,1.871877,1.936202
B3PW91,1.651938,1.756371,1.813514
B97-1,1.721124,1.961903,1.942481
B97-2,1.911641,2.017747,1.596285
B97-3,1.623921,1.84697,1.879701


In [15]:
%%time
dct = {}
column = pd.MultiIndex.from_tuples([(task, atom) for task in ["RHO", "GRD", "LR"] for atom in atom_config_with_be])
for xc in dict_dh:
    err = get_tab_err(dict_dh[xc]["aug-cc-pwcv5z"], dict_ref)
    dct[xc] = pd.Series(np.array(err).reshape(-1), index=column)
tab_full_dh = pd.DataFrame(dct).T
tab_full_dh.to_csv("Table-RDF-dh.csv")

CPU times: user 133 ms, sys: 569 µs, total: 134 ms
Wall time: 135 ms


In [16]:
%%time
dct = {}
column = pd.MultiIndex.from_tuples([(task, atom) for task in ["RHO", "GRD", "LR"] for atom in atom_config_with_be])
for xc in dict_dh:
    err = get_tab_err(dict_dh[xc]["aug-cc-pv5z"], dict_ref)
    dct[xc] = pd.Series(np.array(err).reshape(-1), index=column)
tab_full_dh_pv5z = pd.DataFrame(dct).T
tab_full_dh_pv5z.to_csv("Table-RDF-dh-pv5z.csv")

CPU times: user 129 ms, sys: 5.15 ms, total: 134 ms
Wall time: 135 ms


In [17]:
pd.DataFrame({
    "RHO": tab_full_dh["RHO"].T.max(),
    "GRD": tab_full_dh["GRD"].T.max(),
    "LR" : tab_full_dh["LR" ].T.max(),
})

Unnamed: 0,RHO,GRD,LR
B2GP_PLYP,1.442336,1.570672,1.405187
B2GP_PLYP_D3BJ,1.442336,1.583475,1.39606
B2PLYP,1.408461,1.575831,1.54153
DSD_BLYP_D3BJ,1.413456,1.530166,1.319359
DSD_PBEB95_D3BJ,1.367139,1.503894,1.31325
DSD_PBEP86_D3BJ,1.432707,1.539768,1.302873
DSD_PBEPBE_D3BJ,1.379039,1.437469,1.193828
LS1DH_PBE,1.572222,1.614176,1.241276
PBE0_2,1.550024,1.597471,1.210851
PBE0_DH,1.651873,1.700858,1.479808


## Energy Analysis

In [20]:
%%time
with h5py.File(f"etot-low.h5", "r") as f:
    dict_eng_low = h5group_to_dict(f)

CPU times: user 408 ms, sys: 216 ms, total: 624 ms
Wall time: 1.24 s


In [21]:
%%time
dct = {}
for xc in dict_eng_low:
    dct[xc] = dict_eng_low[xc]["aug-cc-pwcv5z"]
pd.DataFrame(dct).T.to_csv("Table-etot-low.csv")

CPU times: user 4.94 ms, sys: 0 ns, total: 4.94 ms
Wall time: 6.41 ms


In [22]:
%%time
dct = {}
for xc in dict_eng_low:
    dct[xc] = dict_eng_low[xc]["aug-cc-pv5z"]
pd.DataFrame(dct).T.to_csv("Table-etot-low-pv5z.csv")

CPU times: user 4.45 ms, sys: 0 ns, total: 4.45 ms
Wall time: 5.89 ms


In [23]:
%%time
with h5py.File(f"etot-dh.h5", "r") as f:
    dict_eng_dh = h5group_to_dict(f)

CPU times: user 133 ms, sys: 69.3 ms, total: 202 ms
Wall time: 398 ms


In [24]:
%%time
dct = {}
for xc in dict_eng_dh:
    dct[xc] = dict_eng_dh[xc]["aug-cc-pwcv5z"]
pd.DataFrame(dct).T.to_csv("Table-etot-dh.csv")

CPU times: user 1.37 ms, sys: 1.27 ms, total: 2.64 ms
Wall time: 3.93 ms


In [25]:
%%time
dct = {}
for xc in dict_eng_dh:
    dct[xc] = dict_eng_dh[xc]["aug-cc-pv5z"]
pd.DataFrame(dct).T.to_csv("Table-etot-dh-pv5z.csv")

CPU times: user 1.55 ms, sys: 1.44 ms, total: 2.99 ms
Wall time: 4.1 ms


## Auxiliary

In [27]:
%%time
with h5py.File(f"conv-low.h5", "r") as f:
    dict_conv = h5group_to_dict(f)

CPU times: user 401 ms, sys: 224 ms, total: 625 ms
Wall time: 1.2 s


In [28]:
for method in dict_conv:
    for basis in dict_conv[method]:
        for atom in dict_conv[method][basis]:
            if not dict_conv[method][basis][atom]:
                print(method, basis, atom)

GLYP aug-cc-pv5z C+4
GLYP aug-cc-pwcv5z B+3
GLYP aug-cc-pwcv5z N+5
GLYP aug-cc-pwcv5z Ne+8
GLYP aug-cc-pwcv5z O+6
GOP aug-cc-pv5z C+4
GOP aug-cc-pv5z Ne+8
GOP aug-cc-pwcv5z N+5
GOP aug-cc-pwcv5z Ne+8
GOP aug-cc-pwcv5z O+6
GP86 aug-cc-pv5z Ne+8
GP86 aug-cc-pwcv5z N+5
GP86 aug-cc-pwcv5z Ne+8
GP86 aug-cc-pwcv5z O+6
GPBE aug-cc-pv5z Ne+8
GPBE aug-cc-pwcv5z F+7
GPBE aug-cc-pwcv5z N+5
GPBE aug-cc-pwcv5z Ne+8
GPBE aug-cc-pwcv5z O+6
GPW91 aug-cc-pv5z Ne+8
GPW91 aug-cc-pwcv5z F+7
GPW91 aug-cc-pwcv5z N+5
GPW91 aug-cc-pwcv5z Ne+8
GPW91 aug-cc-pwcv5z O+6
GPZ81 aug-cc-pv5z C+4
GPZ81 aug-cc-pv5z Ne+8
GPZ81 aug-cc-pwcv5z B+1
GPZ81 aug-cc-pwcv5z N+5
GPZ81 aug-cc-pwcv5z Ne+8
GPZ81 aug-cc-pwcv5z O+6
GVWN aug-cc-pv5z C+4
GVWN aug-cc-pv5z Ne+8
GVWN aug-cc-pwcv5z B+1
GVWN aug-cc-pwcv5z N+5
GVWN aug-cc-pwcv5z Ne+8
GVWN aug-cc-pwcv5z O+6
GVWN1RPA aug-cc-pv5z C+4
GVWN1RPA aug-cc-pv5z Ne+8
GVWN1RPA aug-cc-pwcv5z B+1
GVWN1RPA aug-cc-pwcv5z B+3
GVWN1RPA aug-cc-pwcv5z N+5
GVWN1RPA aug-cc-pwcv5z Ne+8
GVWN1RPA aug-