In [18]:
from pathlib import Path
import copy

import torch
import pyscf
from pyscf import dft
import numpy as np

from cadft import Mol
from cadft.utils import gen_basis
from cadft.utils import Grid

molecular = copy.deepcopy(Mol["Methane"])
mol = pyscf.M(
    atom=molecular,
    basis=gen_basis(molecular, "cc-pvdz", True),
)

# molecular_grids = [
#     ["C", 0, 0, 0],
# ]
# mol_grids = pyscf.M(
#     atom=molecular_grids,
#     basis=gen_basis(molecular_grids, "cc-pvdz", True),
# )
grids = Grid(mol, level=0)
coords = grids.coords
weights = grids.weights
ao_value = dft.numint.eval_ao(mol, coords)

print(coords.shape)
mdft = pyscf.scf.RKS(mol)
mdft.kernel()
dm1_dft = mdft.make_rdm1(ao_repr=True)
print(np.shape(ao_value))

rho_dft = dft.numint.eval_rho(mol, ao_value, dm1_dft)
mat_a = np.zeros((len(ao_value), mol.nao * mol.nao))
for j in range(mol.nao):
    for k in range(mol.nao):
        mat_a[:, j * mol.nao + k] = ao_value[:, j] * ao_value[:, k]
# rho_dft_error = 1e-4 * np.random.normal(size=rho_dft.shape)
# rho_dft += rho_dft_error
dm1_dft_fit = np.linalg.lstsq(mat_a, rho_dft, rcond=None)[0].reshape(mol.nao, mol.nao)

print((dm1_dft - dm1_dft_fit))

# data = np.load("data/grids/data_Methane_0_1_0.0000.npz")
# weights = data["weights"]
# rho_cc = data["rho_cc"] * weights
# rho_dft = data["rho_dft"] * weights
# coords_r = np.sqrt(data["coords_r"])
# exc_over_dm_cc_grids = data["exc_over_dm_cc_grids"] * rho_cc

# r = coords_r

# bohr = 0.52917721067

# mat_a = []
# for atom_basis in mol.basis.values():
#     for i in atom_basis:
#         for j in range(len(i[1:])):
#             print(j)
#             mat_a.append(np.exp(-i[1:][j][0] * bohr**2 * r**2))
#             # for k in range(j, len(i[1:])):
#             #     mat_a.append(np.exp(-(i[1:][j][0] + i[1:][k][0]) * r**2) * r ** i[0])
# mat_a = np.array(mat_a).T

# print(mat_a.shape)
# mol.basis

(6450, 3)
converged SCF energy = -39.8972142130528
(6450, 34)
[[-5.08348919e-12 -4.99726163e-10  4.54892866e-10  1.80158760e-11
  -9.73463881e-12  1.05139259e-11  3.08154952e-10 -1.29762048e-09
   2.57501766e-12 -1.64654654e-11 -1.47107554e-13 -1.41379530e-11
   2.28654219e-12 -8.71501955e-12  4.46220352e-11  4.35433967e-10
   4.77761129e-11 -7.17821982e-12  4.82827770e-12  1.26242419e-11
  -7.44497995e-10 -1.35060071e-11 -3.77060189e-11  2.11006213e-13
   2.46006757e-11  1.60696879e-09 -2.65921694e-11  5.15921958e-11
   4.37714111e-12 -3.66609729e-11 -2.36258873e-09  1.12137227e-11
  -3.43331735e-11  2.49473089e-12]
 [ 4.47144342e-10 -1.44716572e-10 -3.04050563e-10 -1.13821761e-11
   2.33644013e-12 -1.34165862e-12 -1.11527355e-10 -2.38367422e-10
   2.86469667e-12 -2.16035014e-11 -7.89110201e-12 -2.09994600e-11
  -3.27557318e-13 -1.42612250e-11  8.41135495e-11  8.47937387e-10
   7.89472862e-11 -1.84208100e-11 -2.47835876e-13  5.76910741e-11
   2.81311446e-10 -5.58568296e-11 -5.78522542

In [17]:
rho_dft_error

array([ 5.00932522e-05,  9.76160444e-05, -1.59873422e-04, -2.46005287e-05,
       -8.97547086e-05,  3.12500760e-05, -2.45922856e-05,  6.49743029e-05,
       -2.09973615e-05, -1.72330803e-04, -9.76316881e-06,  6.66287267e-05,
        3.96072733e-05, -1.37693393e-05,  8.38918344e-06, -8.69846150e-05,
       -1.37818734e-05,  1.64684338e-04,  7.04285425e-05,  1.54972193e-06,
       -6.44914005e-05, -3.82712444e-04, -2.20394532e-05,  7.55819599e-06,
       -1.84448360e-04, -1.71390329e-05, -5.92187147e-06, -7.79050234e-05,
       -4.80056374e-05, -2.25177256e-05,  3.83108405e-05,  5.86819924e-05,
       -1.39018079e-04, -2.79461611e-05,  1.44258306e-05,  2.65135512e-05,
       -5.93853390e-05, -4.25376116e-05,  2.34353235e-05, -6.54390290e-05,
        6.22192967e-06, -3.70391508e-05, -5.88608778e-05, -1.81100795e-05,
       -1.52381254e-04,  8.97083502e-05,  7.23327566e-07, -1.10620623e-04,
       -1.09848382e-04,  6.94813409e-05, -3.42005211e-05, -4.40991598e-05,
        1.26956850e-05, -

In [24]:
from scipy.optimize import minimize


error_0 = 0
error_1 = 0
error_2 = 0

for i_atom in range(5):
    for i in range(302):
        slice_ = (i_atom, slice(None), i)
        rho_0 = rho_dft[slice_]
        rho_1 = rho_cc[slice_]
        exc = exc_over_dm_cc_grids[slice_]
        weight = weights[slice_]

        # plt.plot(r, rho_1, "--", c="b")
        # plt.plot(r, exc, c="g")

        # for j, gaussian_exp_i in enumerate(gaussian_exp):
        #     plt.plot(r, mat_a[:, j], c="r")

        # do least square
        x_vec = np.linalg.lstsq(mat_a, rho_0, rcond=-1)[0]
        rho_0_fit = mat_a @ x_vec
        error_0 += np.sum(np.abs(rho_0 - rho_0_fit))

        x_vec = np.linalg.lstsq(mat_a, rho_1, rcond=-1)[0]
        rho_1_fit = mat_a @ x_vec
        error_1 += np.sum(np.abs(rho_1 - rho_0_fit))

        x_vec = np.linalg.lstsq(mat_a, exc, rcond=-1)[0]
        exc_fit = mat_a @ x_vec
        error = np.sum(np.abs(exc - exc_fit))
        if error > 1e-4:
            print(i_atom, i, error)
            plt.plot(r, exc, c="g")
            plt.plot(r, exc_fit, c="r")
        error_2 += error

print(error_0, error_1, error_2)

IndexError: too many indices for array: array is 1-dimensional, but 3 were indexed

In [None]:
slice_ = (4, slice(None), 298)
exc = exc_over_dm_cc_grids[slice_]
# x_vec = np.linalg.lstsq(mat_a, exc, rcond=-1)[0]
x_vec = np.linalg.inv(mat_a.T @ mat_a) @ mat_a.T @ exc
exc_fit = mat_a @ x_vec
print(np.abs(exc - exc_fit))

[5.84373777e-04 5.83121804e-04 5.74635703e-04 5.43564246e-04
 4.62120748e-04 2.91625512e-04 5.04596091e-06 4.33452869e-04
 9.23643298e-04 1.30257179e-03 1.33686914e-03 8.65583060e-04
 6.52911764e-05 1.20308902e-03 2.22299055e-03 2.85705121e-03
 2.95107929e-03 2.48703342e-03 1.58286207e-03 4.52392499e-04
 6.64707312e-04 1.56881281e-03 2.13655641e-03 2.32600659e-03
 2.16376775e-03 1.72524214e-03 1.11282193e-03 4.34435360e-04
 2.14466241e-04 7.62606227e-04 1.16730703e-03 1.41286463e-03
 1.50525227e-03 1.46545484e-03 1.32298151e-03 1.11032552e-03
 8.58656868e-04 5.94822307e-04 3.39630949e-04 1.07313513e-04
 9.40579298e-05 2.61483077e-04 3.95601318e-04 4.99206701e-04
 5.75929279e-04 6.29212138e-04 6.61632644e-04 6.74563581e-04
 6.68148899e-04 6.41570598e-04 5.93595428e-04 5.23409390e-04
 4.31775826e-04 3.22563500e-04 2.04569548e-04 9.30312442e-05
 8.95362980e-06 2.69890643e-05 7.05942610e-06 5.28056885e-05
 1.18752852e-04 1.59982022e-04 1.64063419e-04 1.35775382e-04
 8.80382283e-05 3.461166

In [None]:
np.linalg.inv(mat_a.T @ mat_a)

array([[ 1.47728054e+00, -2.54603211e+00,  2.84391227e+00,
        -4.52485693e+00,  3.15477449e+01, -2.48899196e+02,
         6.60750023e+02, -1.81771334e+04,  9.14473195e+04,
         3.23383190e+02, -1.73915788e+04, -6.21339549e+04,
        -1.04465321e+05,  8.93585166e+04, -1.49643522e+02,
         1.71615395e+04, -5.35640238e+03,  1.50645774e+04,
        -6.12225037e+03],
       [-2.54603197e+00,  6.73818250e+00, -1.05279598e+01,
         1.91388155e+01, -1.39468219e+02,  1.11668396e+03,
        -2.97917654e+03,  8.25059449e+04, -4.12987633e+05,
        -1.44778163e+03,  7.84746328e+04,  2.80223833e+05,
         4.71784772e+05, -4.03525063e+05,  6.67341827e+02,
        -7.74395265e+04,  2.41092423e+04, -6.80373033e+04,
         2.76622129e+04],
       [ 2.84391023e+00, -1.05279528e+01,  2.38696998e+01,
        -5.66156809e+01,  4.66698516e+02, -3.91558603e+03,
         1.06196092e+04, -3.00862963e+05,  1.48021813e+06,
         5.04152388e+03, -2.80451125e+05, -9.99649741e+05,
    