In [32]:
from pathlib import Path
import copy

import torch
import pyscf
import numpy as np

from cadft.utils import load_to_gpu, NAO
from cadft.utils import FCNet, add_args, DataBase, BasicDataset
from cadft import CC_DFT_DATA, Mol, add_args, gen_logger


def gen_dm1(dft2cc, dm1, model_dict, device):
    dm1_cc = np.zeros((dft2cc.mol.nao, dft2cc.mol.nao))
    for i in range(dft2cc.mol.natm):
        for j in range(dft2cc.mol.natm):
            atom_name = dft2cc.atom_info["atom"][i] + dft2cc.atom_info["atom"][j]
            input_mat = (
                dm1[dft2cc.atom_info["slice"][i], dft2cc.atom_info["slice"][j]]
            ).flatten()
            input_mat = (
                torch.as_tensor(input_mat.copy())
                .to(torch.float64)
                .contiguous()
                .to(device=device)
                .requires_grad_(True)
            )
            output_mat = model_dict[atom_name + "1"](input_mat)
            dm1_cc[dft2cc.atom_info["slice"][i], dft2cc.atom_info["slice"][j]] = (
                output_mat
                .detach()
                .cpu()
                .numpy()
                .reshape(
                    NAO[dft2cc.atom_info["atom"][i]], NAO[dft2cc.atom_info["atom"][j]]
                )
            )

    return dm1_cc


def gen_f_mat(dft2cc, dm1_cc, model_dict, device):
    f_mat = np.zeros((dft2cc.mol.nao, dft2cc.mol.nao))
    ene_xc = 0
    for i in range(dft2cc.mol.natm):
        for j in range(dft2cc.mol.natm):
            atom_name = dft2cc.atom_info["atom"][i] + dft2cc.atom_info["atom"][j]
            input_mat = (
                dm1_cc[dft2cc.atom_info["slice"][i], dft2cc.atom_info["slice"][j]]
            ).flatten()
            input_mat = (
                torch.as_tensor(input_mat.copy())
                .to(torch.float64)
                .contiguous()
                .to(device=device)
                .requires_grad_(True)
            )
            e_xc = model_dict[atom_name + "2"](input_mat)
            grad_dms = torch.autograd.grad(e_xc, input_mat)
            f_mat[dft2cc.atom_info["slice"][i], dft2cc.atom_info["slice"][j]] = (
                grad_dms[0]
                .detach()
                .cpu()
                .numpy()
                .reshape(
                    NAO[dft2cc.atom_info["atom"][i]], NAO[dft2cc.atom_info["atom"][j]]
                )
            )
            ene_xc += e_xc.detach().cpu().numpy()[0]

    return f_mat, ene_xc


key_l = []
model_dict = {}

ATOM_LIST = [
    "H",
    "C",
]

molecular = copy.deepcopy(Mol["Methane"])
dft2cc = CC_DFT_DATA(
    molecular,
    name="test",
    basis="cc-pvdz",
    if_basis_str=True,
)

mf = pyscf.scf.RHF(dft2cc.mol)
mf.kernel()
mycc = pyscf.cc.CCSD(mf)
mycc.kernel()

dm1_cc = mycc.make_rdm1(ao_repr=True)
e_cc = mycc.e_tot

mdft = pyscf.scf.RKS(dft2cc.mol)
mdft.xc = "b3lyp"
mdft.kernel()
dm1_dft = mdft.make_rdm1(ao_repr=True)

converged SCF energy = -39.9436908748407
E(CCSD) = -40.14735754407338  E_corr = -0.2036666692326493
converged SCF energy = -40.2951547559176


In [57]:
dir_checkpoint = Path("./checkpoint2024-04-30-20-50-32")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for i_atom in ATOM_LIST:
    for j_atom in ATOM_LIST:
        atom_name = i_atom + j_atom
        key_l.append(atom_name)

        model_dict[atom_name + "2"] = FCNet(NAO[i_atom] * NAO[j_atom], 100, 1).to(
            device
        )
        model_dict[atom_name + "2"].double()
        list_of_path = dir_checkpoint.glob(f"{atom_name}-2-*.pth")
        load_path = max(list_of_path, key=lambda p: p.stat().st_ctime)
        state_dict = torch.load(load_path, map_location=device)
        model_dict[atom_name + "2"].load_state_dict(state_dict)
        print(f"Model loaded from {load_path}")

        model_dict[atom_name + "1"] = FCNet(
            NAO[i_atom] * NAO[j_atom], 100, NAO[i_atom] * NAO[j_atom]
        ).to(device)
        model_dict[atom_name + "1"].double()
        list_of_path = dir_checkpoint.glob(f"{atom_name}-1-*.pth")
        load_path = max(list_of_path, key=lambda p: p.stat().st_ctime)
        state_dict = torch.load(load_path, map_location=device)
        model_dict[atom_name + "1"].load_state_dict(state_dict)
        print(f"Model loaded from {load_path}")

Model loaded from checkpoint2024-04-30-20-50-32/HH-2-30000.pth
Model loaded from checkpoint2024-04-30-20-50-32/HH-1-30000.pth
Model loaded from checkpoint2024-04-30-20-50-32/HC-2-30000.pth
Model loaded from checkpoint2024-04-30-20-50-32/HC-1-30000.pth
Model loaded from checkpoint2024-04-30-20-50-32/CH-2-30000.pth
Model loaded from checkpoint2024-04-30-20-50-32/CH-1-30000.pth
Model loaded from checkpoint2024-04-30-20-50-32/CC-2-30000.pth
Model loaded from checkpoint2024-04-30-20-50-32/CC-1-30000.pth


In [59]:
dm1_predict = gen_dm1(dft2cc, dm1_dft, model_dict, device)
print(np.mean(np.abs(dm1_predict - dm1_cc)))
f_mat, ene_xc = gen_f_mat(dft2cc, dm1_predict, model_dict, device)
h1e = dft2cc.mol.intor("int1e_nuc") + dft2cc.mol.intor("int1e_kin")
eri = dft2cc.mol.intor("int2e")
print(
    1000
    * (
        ene_xc
        + np.einsum("pqrs,pq,rs", eri, dm1_predict, dm1_predict) / 2
        + np.sum(h1e * dm1_predict)
        + dft2cc.mol.energy_nuc()
        - e_cc
    )
)

0.00027872076417977565
0.20315933647907514


In [19]:
print(np.einsum("pqrs,pq,rs", eri, dm1_predict, dm1_predict))
dm1_predict_flatten = dm1_predict.flatten()
eri_flatten = eri.reshape(
    dft2cc.mol.nao * dft2cc.mol.nao, dft2cc.mol.nao * dft2cc.mol.nao
)
print(dm1_predict_flatten.shape, eri.shape)
dm1_predict_flatten @ eri_flatten @ dm1_predict_flatten

64.97823500818082
(1156,) (34, 34, 34, 34)


64.97823500818089

In [8]:
# print(np.einsum("pqrs,pr->qs", eri, dm1_cc))
# print(f_mat)

In [3]:
import scipy.linalg as LA
import opt_einsum as oe

mat_s = dft2cc.mol.intor("int1e_ovlp")
mat_hs = LA.fractional_matrix_power(mat_s, -0.5).real
nocc = dft2cc.mol.nelec[0]

mdft = pyscf.scf.RKS(dft2cc.mol)
mdft.grids.build()
ao = pyscf.dft.numint.eval_ao(dft2cc.mol, mdft.grids.coords)

dm1 = dm1_cc.copy()
for i in range(100):
    f_mat, ene_xc = gen_f_mat(dft2cc, dm1, model_dict, device)
    vj = np.einsum("pqrs,pq->rs", eri, dm1)
    fock_a = mat_hs @ (h1e + vj + f_mat) @ mat_hs
    _, mo = np.linalg.eigh(fock_a)
    mo = mat_hs @ mo
    dm1_old = dm1.copy()
    dm1 = 2 * mo[:, :nocc] @ mo[:, :nocc].T
    dm1 = 0.01 * dm1 + 0.99 * dm1_old
    
    dm_cc_r = oe.contract(
        "" "uv,gu,gv,g->g",
        dm1_cc,
        ao,
        ao,
        mdft.grids.weights,
        optimize="auto",
    )

    dm_cc_real_r = oe.contract(
        "uv,gu,gv,g->g",
        dm1,
        ao,
        ao,
        mdft.grids.weights,
        optimize="auto",
    )

    nn_ene = (
        ene_xc
        + np.einsum("pqrs,pq,rs", eri, dm1, dm1) / 2
        + np.sum(h1e * dm1)
        + dft2cc.mol.energy_nuc()
    )
    print(
        f"CCSD energy: {e_cc}",
        f"NN energy: {nn_ene}",
    )
    print(np.sum(np.abs(dm_cc_r - dm_cc_real_r)))

    print(np.sum(dm_cc_r * mdft.grids.coords[:, 0]))
    print(np.sum(dm_cc_real_r * mdft.grids.coords[:, 0]))
    print(np.sum(dm_cc_r * mdft.grids.coords[:, 1]))
    print(np.sum(dm_cc_real_r * mdft.grids.coords[:, 1]))
    print(np.sum(dm_cc_r * mdft.grids.coords[:, 2]))
    print(np.sum(dm_cc_real_r * mdft.grids.coords[:, 2]))

CCSD energy: -40.14735754380377 NN energy: -39.99770520302959
0.06648199446414402
-1.49741330446318e-14
0.006504791491696665
6.748941683287768e-15
0.009288352065045372
-1.0473392986209973e-16
-0.008936148740616259
CCSD energy: -40.14735754380377 NN energy: -40.62979335343853
0.13036053589518118
-1.49741330446318e-14
0.00839967706960866
6.748941683287768e-15
0.018042449114605584
-1.0473392986209973e-16
-0.011948329206819255
CCSD energy: -40.14735754380377 NN energy: -41.24523971318373
0.19210619681395252
-1.49741330446318e-14
0.007097667342143496
6.748941683287768e-15
0.01808562709090069
-1.0473392986209973e-16
-0.018414730613095863
CCSD energy: -40.14735754380377 NN energy: -41.73283541981494
0.2437905905532322
-1.49741330446318e-14
0.015351934646489207
6.748941683287768e-15
0.008291055707002478
-1.0473392986209973e-16
-0.022802561683449785
CCSD energy: -40.14735754380377 NN energy: -42.49196705035505
0.2889631637400022
-1.49741330446318e-14
0.03048984783992817
6.748941683287768e-15
0.

In [6]:
print(np.linspace(-0.25, 0.25, 11))
print(np.linspace(-0.225, 0.225, 10))

[-0.25 -0.2  -0.15 -0.1  -0.05  0.    0.05  0.1   0.15  0.2   0.25]
[-0.225 -0.175 -0.125 -0.075 -0.025  0.025  0.075  0.125  0.175  0.225]


In [8]:
import h5py
import numpy as np
from pathlib import Path

ATOM_LIST = [
    "H",
    "C",
]
ATOM_STR_DICT = [
    "Methane",
    "Ethane",
    "Ethylene",
    "Acetylene",
    "Allene",
    "Cyclopropene",
    "Propyne",
    "Cyclopropane",
    "Propylene",
    "Propane",
    "Isobutane",
    "Butane",
    "Butadiene",
    "Butyne",
    "Bicyclobutane",
    "Cyclopropylmethyl",
    "Cyclobutane",
    "Spiropentane",
    "Benzene",
    "Pentane",
    "Isopentane",
    "Neopentane",
    "Cyclopentane",
]


def numpy_to_hdf5():
    path = Path("./") / "data"
    path_h5py = Path("./") / "data" / "file.h5"
    with h5py.File(path_h5py, "a") as f:
        for i_atom in ATOM_LIST:
            for j_atom in ATOM_LIST:
                atom_name = i_atom + j_atom
                grp = f.create_group(atom_name)
                for i_molecular in ATOM_STR_DICT:
                    dset = grp.create_group(i_molecular)
                    for (
                        extend_atom,
                        extend_xyz,
                        distance,
                        magic_str,
                    ) in (
                        [0, 1],
                        [1, 2, 3],
                        np.linspace(-0.25, 0.25, 11),
                        [
                            "weight/energy_nuc",
                            "weight/e_ccsd",
                            f"{atom_name}/input/input",
                            f"{atom_name}/input/output_dm1",
                            f"{atom_name}/input/output_exc",
                        ],
                    ):
                        data = np.load(
                            path
                            / f"{atom_name}/{magic_str}_{extend_atom}_{extend_xyz}_{distance}.npy"
                        )
                        dset.create_dataset(
                            f"{magic_str}_{i_molecular}_{extend_atom}_{extend_xyz}_{distance}",
                            data=data,
                        )


numpy_to_hdf5()
# f.create_dataset("dm1_cc", data=dm1_cc)
# f.create_dataset("eri", data=eri)
# f.create_dataset("h1e", data=h1e)
# f.create_dataset("mat_s", data=mat_s)
# f.create_dataset("dm1", data=dm1)

TypeError: One of data, shape or dtype must be specified