In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from mlelec.data.dataset import precomputed_molecules, MoleculeDataset, MLDataset
import torch
from ase.io import read
import ase
from mlelec.models.linear import LinearTargetModel

In [3]:
h_data = MoleculeDataset(mol_name='water_1000', frame_slice=slice(0,10), data_path = 'examples/data/water_1000/sto-3g', aux_path = 'examples/data/water_1000/sto-3g', device='cuda', aux=['overlap', 'orbitals']) #frames =frames, frame_slice=':4', target_data={'fock': h}, aux=['overlap', 'orbitals'],aux_data = {'overlap': over, 'orbitals':orbs }
h_ml = MLDataset(molecule_data=h_data, device ='cuda')

Loading structures
examples/data/water_1000/sto-3g/fock.hickle


In [4]:
# h_ml._shuffle(random_seed=5381)
# h_ml._split_indices(train_frac=0.7, val_frac=0.2)

In [5]:
for f in h_ml.structures:
    f.pbc = False

## training on a tiny dataset for now 

In [6]:
linmod = LinearTargetModel(dataset = h_ml, metrics = "l2_loss", nlayers = 1, nout = 1, nhidden = 10, bias = False, device = 'cuda')


Computing features with default hypers


In [7]:
optimizer = torch.optim.Adam(linmod.parameters(), lr=0.01)
for epoch in range(100):
    loss = linmod.forward()
    loss.backward()
    optimizer.step()

    if epoch%10 == 0:
        print(torch.sqrt(loss.detach()))

tensor(69.4316, device='cuda:0', dtype=torch.float64)
tensor(68.0965, device='cuda:0', dtype=torch.float64)
tensor(65.6010, device='cuda:0', dtype=torch.float64)
tensor(61.4844, device='cuda:0', dtype=torch.float64)
tensor(55.5974, device='cuda:0', dtype=torch.float64)
tensor(48.7347, device='cuda:0', dtype=torch.float64)
tensor(42.7062, device='cuda:0', dtype=torch.float64)
tensor(37.9929, device='cuda:0', dtype=torch.float64)
tensor(31.7903, device='cuda:0', dtype=torch.float64)
tensor(19.8591, device='cuda:0', dtype=torch.float64)


The default features and model is quite bad - so no wonder losses are high

## Predictions

In [9]:
linmod.forward()
fock = linmod.reconstructed_tensor
print(fock.shape)

torch.Size([10, 7, 7])


## plugging in predicted fock matrix into pyscfad

In [10]:

import os
os.environ['PYSCFAD_BACKEND']='torch'

import torch
from pyscf import gto

from pyscfad import numpy as np
from pyscfad import ops
from pyscfad.ml.scf import hf
import pyscf.pbc.tools.pyscf_ase as pyscf_ase


Using PyTorch backend.




In [11]:

mol = gto.Mole()
mol.atom = pyscf_ase.ase_atoms_to_pyscf(h_ml.structures[0])
mol.basis = 'sto-3g'
mol.build()
fock = linmod.reconstructed_tensor[0]


mf = hf.SCF(mol)

mo_energy, mo_coeff = mf.eig(fock, s = torch.eye(fock.shape[-1], dtype = fock.dtype))
mo_occ = mf.get_occ(mo_energy) # get_occ returns a numpy array
mo_occ = ops.convert_to_tensor(mo_occ)


In [12]:

dm1 = mf.make_rdm1(mo_coeff, mo_occ)
dip = mf.dip_moment(dm=dm1)
dip_norm = np.linalg.norm(dip)
dip_norm.backward()
print(fock.grad)

RuntimeError: expected mat1 and mat2 to have the same dtype, but got: double != float

In [12]:
mocc = mo_coeff[:, mo_occ>0]
dm = np.dot(mocc*mo_occ[mo_occ>0], mocc.T)

RuntimeError: expected mat1 and mat2 to have the same dtype, but got: double != float

In [None]:
print(mocc.dtype)
print(mo_occ[mo_occ>0].dtype)

In [None]:
print(mocc.conj().T.dtype)