In [1]:
%load_ext autoreload
%autoreload 2

## PYSCF AD

In [2]:
import os
# os.environ['PYSCFAD_BACKEND']='jax'
import pyscf
from pyscfad import gto, scf

"""
Analytic nuclear gradient for RHF computed by auto-differentiation
"""

mol = gto.Mole()
mol.atom    = 'H 0 0 0; H 0 0 0.74'  # in Angstrom
mol.basis   = '631g'
mol.verbose = 5
mol.build()

mf = scf.RHF(mol)
mf.kernel()
jac = mf.energy_grad()

Using PyTorch backend.
System: uname_result(system='Linux', node='cosmopc8', release='5.15.0-88-generic', version='#98-Ubuntu SMP Mon Oct 2 15:18:56 UTC 2023', machine='x86_64')  Threads 8
Python 3.11.5 (main, Sep 11 2023, 13:54:46) [GCC 11.2.0]
numpy 1.26.2  scipy 1.11.3
Date: Tue Nov 14 10:23:44 2023
PySCF version 2.3.0
PySCF path  /home/nigam/miniconda3/lib/python3.11/site-packages/pyscf

[CONFIG] conf_file /home/nigam/.pyscf_conf.py
[INPUT] verbose = 5
[INPUT] max_memory = 4000 
[INPUT] num. atoms = 2
[INPUT] num. electrons = 2
[INPUT] charge = 0
[INPUT] spin (= nelec alpha-beta = 2S) = 0
[INPUT] symmetry False subgroup None
[INPUT] Mole.unit = angstrom
[INPUT] Symbol           X                Y                Z      unit          X                Y                Z       unit  Magmom
[INPUT]  1 H      0.000000000000   0.000000000000   0.000000000000 AA    0.000000000000   0.000000000000   0.000000000000 Bohr   0.0
[INPUT]  2 H      0.000000000000   0.000000000000   0.740000000000

TypeError: Value tensor([[inf, nan],
        [nan, inf]], device='cuda:0', dtype=torch.float64) with type <class 'torch.Tensor'> is not a valid JAX type

In [3]:
import os
os.environ['PYSCFAD_BACKEND']='torch'
import torch
from pyscf import gto

from pyscfad import numpy as np
# import numpy as np 
from pyscfad import ops
from pyscfad.ml.scf import hf

mol = gto.Mole()
mol.atom = 'H 0 0 0; H 0 0 0.74'
mol.basis = 'sto3g'
mol.build()

# fock = torch.rand(mol.nao, mol.nao, dtype=float)
# fock = .5 * (fock + fock.T.conj())
# fock = torch.autograd.Variable(fock, requires_grad=True)

mf = hf.SCF(mol)
s = mf.get_ovlp()
# dm = mf.make_rdm1()
mf.kernel()
f = mf.get_fock()
print(f)
# mo_energy, mo_coeff = mf.eig(fock, s)
# mo_occ = mf.get_occ(mo_energy) # get_occ returns a numpy array
# mo_occ = ops.convert_to_tensor(mo_occ)
# dm1 = mf.make_rdm1(mo_coeff, mo_occ)
# dip = mf.dip_moment(dm=dm1)
# dip_norm = np.linalg.norm(dip)
# dip_norm.backward()
# print(fock.grad)

AttributeError: Mole object does not have method detach

In [None]:
import pyscf
from pyscfad import gto, scf

"""
Analytic nuclear gradient for RHF computed by auto-differentiation
"""

mol = gto.Mole()
mol.atom    = 'H 0 0 0; H 0 0 0.74'  # in Angstrom
mol.basis   = '631g'
mol.verbose = 5
mol.build()

mf = scf.RHF(mol)
mf.kernel()
jac = mf.energy_grad()

System: uname_result(system='Linux', node='cosmopc8', release='5.15.0-88-generic', version='#98-Ubuntu SMP Mon Oct 2 15:18:56 UTC 2023', machine='x86_64')  Threads 4
Python 3.11.5 (main, Sep 11 2023, 13:54:46) [GCC 11.2.0]
numpy 1.26.2  scipy 1.11.3
Date: Tue Nov 14 10:20:09 2023
PySCF version 2.3.0
PySCF path  /home/nigam/miniconda3/lib/python3.11/site-packages/pyscf

[ENV] PYSCFAD_BACKEND torch
[CONFIG] conf_file None
[INPUT] verbose = 5
[INPUT] max_memory = 4000 
[INPUT] num. atoms = 2
[INPUT] num. electrons = 2
[INPUT] charge = 0
[INPUT] spin (= nelec alpha-beta = 2S) = 0
[INPUT] symmetry False subgroup None
[INPUT] Mole.unit = angstrom
[INPUT] Symbol           X                Y                Z      unit          X                Y                Z       unit  Magmom
[INPUT]  1 H      0.000000000000   0.000000000000   0.000000000000 AA    0.000000000000   0.000000000000   0.000000000000 Bohr   0.0
[INPUT]  2 H      0.000000000000   0.000000000000   0.740000000000 AA    0.00000000

AttributeError: Mole object does not have method detach

## mlelec 

In [2]:
from data.pyscf_calculator import calculator
import numpy as np 
import torch 
from collections import defaultdict



In [3]:
from data.dataset import precomputed_molecules, MoleculeDataset, MLDataset

In [4]:
water_data = MoleculeDataset(mol_name='water_1000', data_path = '../../examples/data/water_1000/sto-3g', aux_path = '../../examples/data/water_1000/sto-3g')

Loading structures
../../examples/data/water_1000/sto-3g/fock.hickle


In [5]:
water_ml = MLDataset(water_data)

In [10]:
water_ml._shuffle(random_seed=23724)
water_ml._split_indices(0.7, 0.2)

In [17]:
dataloader = torch.utils.data.DataLoader(water_ml, batch_size=4,
                                              shuffle=True, num_workers=2)

In [19]:
dataloader.dataset.train

range(0, 1000)

In [4]:
calc = calculator(
        path="/Users/jigyasa/scratch/my_mlelec/examples/data/water/",
        mol_name="water_1000",
        dft=False,
        frame_slice="0:1",
    )
calc.calculate(   basis_set="def2-tzvp",)

Loading
Number of frames:  1
System: uname_result(system='Darwin', node='Jigyasas-MacBook-Pro.local', release='22.4.0', version='Darwin Kernel Version 22.4.0: Mon Mar  6 21:01:02 PST 2023; root:xnu-8796.101.5~3/RELEASE_ARM64_T8112', machine='arm64')  Threads 1
Python 3.10.10 (main, Mar 21 2023, 13:41:05) [Clang 14.0.6 ]
numpy 1.23.5  scipy 1.8.1
Date: Wed Oct 25 08:51:18 2023
PySCF version 2.2.1
PySCF path  /Users/jigyasa/miniconda3/lib/python3.10/site-packages/pyscf

[CONFIG] conf_file None
[INPUT] verbose = 5
[INPUT] max_memory = 4000 
[INPUT] num. atoms = 3
[INPUT] num. electrons = 10
[INPUT] charge = 0
[INPUT] spin (= nelec alpha-beta = 2S) = 0
[INPUT] symmetry False subgroup None
[INPUT] Mole.unit = angstrom
[INPUT] Symbol           X                Y                Z      unit          X                Y                Z       unit  Magmom
[INPUT]  1 O      0.000000000000   0.000000000000   0.000000000000 AA    0.000000000000   0.000000000000   0.000000000000 Bohr   0.0
[INPUT]  

In [5]:
for o in calc.ao_labels['O']:
    print(o)

1s
2s
3s
4s
5s
2px
2py
2pz
3px
3py
3pz
4px
4py
4pz
3dxy
3dyz
3dz^2
3dxz
3dx2-y2
4dxy
4dyz
4dz^2
4dxz
4dx2-y2
4f-3
4f-2
4f-1
4f+0
4f+1
4f+2
4f+3


In [36]:
calc.save_results(path = './')

1 s
2 s
3 s
4 s
5 s
2 px
2 py
2 pz
3 px
3 py
3 pz
4 px
4 py
4 pz
3 dxy
3 dyz
3 dz^2
3 dxz
3 dx2-y2
4 dxy
4 dyz
4 dz^2
4 dxz
4 dx2-y2
4 f-3
4 f-2
4 f-1
4 f+0
4 f+1
4 f+2
4 f+3
1 s
2 s
3 s
2 px
2 py
2 pz
All done, results saved at:  ./


  assert len(self.results[k]) == self.nframes


In [58]:
d = defaultdict(list) 

labels = ['0 O 1s    ', '0 O 2s    ', '0 O 2px   ', '0 O 2py   ', '0 O 2pz   ', '1 H 1s    ', '2 H 1s    ']
for l in labels: 
    _, elem, b = l.split()[:3]
    if b not in d[elem]:
        d[elem].append(convert_str_to_nlm(b))



1 [0, 0]
2 [0, 0]
2 [1, 1]
2 [1, -1]
2 [1, 0]
1 [0, 0]
1 [0, 0]


In [59]:
d

defaultdict(list,
            {'O': [[1, 0, 0], [2, 0, 0], [2, 1, 1], [2, 1, -1], [2, 1, 0]],
             'H': [[1, 0, 0], [1, 0, 0]]})

In [55]:
d_nlm = {i:[] for i in d.keys()}
for k in d.keys():
    for i in d[k]:
        d_nlm[k].append(convert_str_to_nlm(i))

1 [0, 0]
2 [0, 0]
2 [1, 1]
2 [1, -1]
2 [1, 0]
1 [0, 0]


In [57]:
d

defaultdict(list, {'O': ['1s', '2s', '2px', '2py', '2pz'], 'H': ['1s']})

In [56]:
d_nlm

{'O': [[1, 0, 0], [2, 0, 0], [2, 1, 1], [2, 1, -1], [2, 1, 0]],
 'H': [[1, 0, 0]]}

In [34]:
import re
x = "1s"
# y = "4d^2"
y = "4f+2"
match = re.match(r"([0-9]+)(.+)", x, re.I)
print(x, match, "1")
n, lm = match.groups()
print(n, lm, "2")

match = re.match(r"([0-9]+)(.+)",y, re.I)
print(y, match, "1")
n, lm = match.groups()
print(n, lm, "2")


1s <re.Match object; span=(0, 2), match='1s'> 1
1 s 2
4f+2 <re.Match object; span=(0, 4), match='4f+2'> 1
4 f+2 2
