Copyright (c) 2023 Graphcore Ltd. All rights reserved.

# DFT dataset generation using PySCF IPU

In [1]:
%load_ext autoreload
%autoreload 2

## Dependencies and configuration

Install the JAX experimental for IPU (and addons).  

Install `pyscf-ipu`:

In [4]:
# PySCF IPU dependencies 
%pip install -e "..[cpu]"

Looking in indexes: https://awf%40graphcore.ai:****@artifactory.sourcevertex.net:443/api/pypi/pypi-virtual/simple, https://pypi.python.org/simple/
Obtaining file:///home/awf/dev/gc-gh-public/pyscf-ipu
  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: pyscf-ipu
  Attempting uninstall: pyscf-ipu
    Found existing installation: pyscf-ipu 0.0.1
    Uninstalling pyscf-ipu-0.0.1:
      Successfully uninstalled pyscf-ipu-0.0.1
  Running setup.py develop for pyscf-ipu
Successfully installed pyscf-ipu
Note: you may need to restart the kernel to use updated packages.


# Download and preprocess GDB 11 dataset

In [5]:
load = False
gdb_filename = "../gdb/gdb11_size09.smi"
out_filename = gdb_filename.replace(".smi", "_sorted.csv")
if load:
  # Download and extract GDB11 dataset.
  !wget -p -O ./gdb/gdb11.tgz https://zenodo.org/record/5172018/files/gdb11.tgz\?download\=1
  !tar -xvf ./gdb/gdb11.tgz --directory ./gdb/

  from  gdb import sortgdb

  # Filter & sort GDB11 dataset (size 9).
  gdb_sorted = sortgdb.sort_gdb(gdb_filename, keep_only_atoms_count=9)
  # Save output as csv.
  gdb_sorted.to_csv(out_filename, index=False, header=False)

import os
os.system(f'ls -l {out_filename}')
assert os.path.getsize(out_filename) == 6985727

-rw-r--r-- 1 awf awf 6985727 Sep 14 17:55 ../gdb/gdb11_size09_sorted.csv


In [6]:
import os
# PySCF IPU setup: use a single device per process.
os.environ["JAX_IPU_USE_MODEL"] = "True"

# PySCF IPU setup: use a single device per process.
os.environ["JAX_IPU_DEVICE_COUNT"] = "1"
# JAX/XLA IPU compilation cache.
os.environ['TF_POPLAR_FLAGS'] = """
  --executable_cache_path=/tmp/ipu-ef-cache
"""

# First import of JAX and TessellateIPU may take a few minutes...
import jax
import tessellate_ipu

# Create a DFT dataset using PySCF IPU

In the following example, we use only a single IPU. Multiple IPUs can be used by simply launching a collection of PySCF IPU processes instead of a single one.

In [24]:
# Equivalent to command line:
# python density_functional_theory.py  -generate  -save  -fname <dataset_name>
#        -level 0  -plevel 0  -num_conformers <num_conformers>
#        -gdb 9  -float32

import time
from pyscf_ipu.dft import get_args, process_args, _eigh, recompute

args = get_args([])
args.backend = 'cpu'
args.generate = True
args.save = True
args.fname = "notebook_dataset"

args.level = 0
args.plevel = 0
args.num_conformers = 32 # TODO 1000
args.id = 1
args.limit = 33 # Do only 33

args.float32 = True

process_args(args)

if args.nan:
    jax.config.update("jax_debug_nans", True)

backend = args.backend
eigh = _eigh

t0 = time.time()
print("loading gdb data")

# Load GDB09 data
args.smiles = open(f"../gdb/gdb11_size09_sorted.csv", "r").read().split("\n")

print("DONE!", time.time()-t0)


[BASIS] STO-3G
loading gdb data
DONE! 0.03354763984680176


In [21]:
from rdkit import Chem

from pyscf_ipu.dft import angstrom_to_bohr, get_atom_string, jax_dft

print("Length GDB: ", len(args.smiles))

if args.limit != -1:
    args.smiles = args.smiles[:args.limit]

for i in range(int(args.id), min(int(args.id)+1000, len(args.smiles))):
    smile = args.smiles[i]

    print(smile)

    b = Chem.MolFromSmiles(smile)
    if not args.nohs: b = Chem.AddHs(b, explicitOnly=False)
    atoms = [atom.GetSymbol() for atom in b.GetAtoms()]

    e = Chem.AllChem.EmbedMolecule(b)
    if e == -1: continue

    locs = b.GetConformer().GetPositions() * angstrom_to_bohr
    atom_string, string = get_atom_string(" ".join(atoms), locs)

    print(string)
    break

recompute(args, None, 0, 0, our_fun=jax_dft, str=string)

Length GDB:  444285
FC(F)=C(F)C#CC#N
F  -5.976908 -0.272344 1.162637; C  -3.645289 -0.453388 0.099800; F  -3.255826 -1.468704 -2.208022; C  -1.700322 0.395499 1.375347; F  -1.910616 1.443580 3.702194; C  0.784915 0.247064 0.338877; C  2.897062 0.159965 -0.536063; C  5.393510 0.038812 -1.547242; N 7.413474 -0.090484 -2.387528; 
	 33
>>>  2 3
_2_3
14_GDB9_f32True_grid0_backendcpu_2_3


  0%|          | 0/2 [00:00<?, ?it/s]

[PAD] Last molecule had grisize=9816 we're using 10797. 
[Fc1nnc(=O)oc1F]
[conformers] 32
16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[1 / 32] Hs=    0 -565.726910 3257.8 0.0 0.0 0.1 4.6 0.3 381.1 0.1 25.4 1.7 0.2 0.3 0.1 0.0 0.0 2.2 0.1 0.0 0.0 0.2 0.3 0.9 0.2 2.9 3678.5 [1 ; 0]:   0%|          | 0/2 [00:04<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[2 / 32] Hs=    0 -565.719822 3499.4 0.0 0.0 0.1 4.2 0.3 372.5 0.1 25.7 2.1 0.3 0.3 0.1 0.0 0.0 2.1 0.1 0.0 0.0 0.2 0.3 2.7 0.2 2.6 3913.3 [1 ; 0]:   0%|          | 0/2 [00:08<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[3 / 32] Hs=    0 -565.720691 3446.0 0.0 0.0 0.1 4.0 0.3 383.4 0.0 20.2 1.7 0.2 0.3 0.1 0.0 0.0 2.0 0.1 0.0 0.0 0.2 0.3 12.9 0.2 2.6 3874.6 [1 ; 0]:   0%|          | 0/2 [00:11<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[4 / 32] Hs=    0 -565.718995 3385.9 0.0 0.0 0.1 3.5 0.4 382.0 0.1 19.9 1.7 0.2 0.4 0.4 0.0 0.0 2.6 0.1 0.0 0.0 0.2 0.4 0.5 0.2 2.2 3800.8 [1 ; 0]:   0%|          | 0/2 [00:15<?, ?it/s] 

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[5 / 32] Hs=    0 -565.721342 3377.4 0.0 0.0 0.1 2.5 0.2 356.6 0.0 18.4 1.9 0.3 0.3 0.1 0.0 0.0 2.3 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.6 3764.0 [1 ; 0]:   0%|          | 0/2 [00:19<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[6 / 32] Hs=    0 -565.727083 3305.1 0.0 0.0 0.1 2.5 0.2 352.6 0.0 21.3 2.0 0.3 0.3 0.1 0.0 0.0 2.3 0.1 0.0 0.0 0.2 0.4 0.4 0.2 2.1 3690.2 [1 ; 0]:   0%|          | 0/2 [00:23<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[7 / 32] Hs=    0 -565.718795 3350.3 0.0 0.0 0.1 3.9 0.3 378.8 0.1 23.0 1.8 0.2 0.3 0.1 0.0 0.0 2.2 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.5 3764.9 [1 ; 0]:   0%|          | 0/2 [00:27<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[8 / 32] Hs=    0 -565.730291 3149.6 0.0 0.0 0.1 3.9 0.3 414.5 0.1 25.5 1.8 0.3 0.3 0.1 0.0 0.0 2.9 0.5 0.0 0.0 0.2 0.3 0.6 0.2 6.6 3607.8 [1 ; 0]:   0%|          | 0/2 [00:30<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[9 / 32] Hs=    0 -565.723428 3571.9 0.0 0.0 0.1 2.7 0.2 383.7 0.0 24.0 1.7 0.3 0.3 0.1 0.0 0.0 2.1 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.5 3990.9 [1 ; 0]:   0%|          | 0/2 [00:34<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[10 / 32] Hs=    0 -565.721805 3678.1 0.0 0.0 0.1 3.0 0.2 372.1 0.0 21.8 2.0 0.3 0.3 0.1 0.0 0.0 2.4 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.5 4084.2 [1 ; 0]:   0%|          | 0/2 [00:38<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[11 / 32] Hs=    0 -565.722730 3297.0 0.0 0.0 0.1 2.6 0.3 422.4 0.1 24.6 1.8 0.3 0.3 0.1 0.0 0.0 2.0 0.1 0.0 0.0 0.2 0.3 1.9 0.4 2.4 3756.9 [1 ; 0]:   0%|          | 0/2 [00:42<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[12 / 32] Hs=    0 -565.721549 3147.5 0.0 0.0 0.1 3.0 0.2 371.9 0.0 19.8 1.7 0.2 0.3 0.1 0.0 0.0 2.0 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.4 3550.5 [1 ; 0]:   0%|          | 0/2 [00:46<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[13 / 32] Hs=    0 -565.728161 3244.9 0.0 0.0 0.1 2.8 0.4 359.1 0.1 19.0 2.0 0.3 0.3 0.1 0.0 0.0 2.3 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.1 3634.8 [1 ; 0]:   0%|          | 0/2 [00:49<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[14 / 32] Hs=    0 -565.722691 3461.8 0.0 0.0 0.1 3.4 0.3 362.2 0.0 21.1 1.6 0.3 0.3 0.2 0.0 0.0 2.4 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.2 3857.2 [1 ; 0]:   0%|          | 0/2 [00:53<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[15 / 32] Hs=    0 -565.721002 3366.1 0.0 0.0 0.1 2.5 0.2 397.1 0.1 25.8 2.9 0.9 0.4 0.2 0.0 0.1 2.8 0.2 0.0 0.0 0.2 0.4 2.0 0.2 2.3 3804.5 [1 ; 0]:   0%|          | 0/2 [00:57<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[16 / 32] Hs=    0 -565.725132 3683.2 0.0 0.0 0.1 4.3 0.3 405.0 0.1 23.5 1.8 0.3 0.3 0.1 0.0 0.0 2.1 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.6 4125.0 [1 ; 0]:   0%|          | 0/2 [01:01<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[17 / 32] Hs=    0 -565.727104 3574.0 0.0 0.0 0.1 3.9 0.2 396.6 0.1 27.4 2.3 0.4 0.4 0.2 0.0 0.0 2.7 0.2 0.0 0.0 0.2 0.4 1.7 0.2 2.6 4013.6 [1 ; 0]:   0%|          | 0/2 [01:05<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[18 / 32] Hs=    0 -565.722371 3446.6 0.0 0.0 0.1 3.9 0.3 380.2 0.1 19.7 1.7 0.3 0.3 0.1 0.0 0.0 2.0 0.1 0.0 0.0 0.2 0.3 8.7 0.2 2.5 3867.3 [1 ; 0]:   0%|          | 0/2 [01:09<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[19 / 32] Hs=    0 -565.726093 3414.4 0.0 0.0 0.1 2.4 0.2 393.0 0.1 24.2 2.1 0.4 0.4 0.1 0.0 0.1 2.9 0.2 0.0 0.0 0.2 0.3 0.5 0.2 2.4 3844.2 [1 ; 0]:   0%|          | 0/2 [01:13<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[20 / 32] Hs=    0 -565.726885 3283.1 0.1 0.0 0.1 4.3 0.2 374.2 0.1 19.7 1.6 0.2 0.3 0.1 0.0 0.0 2.9 0.1 0.0 0.0 0.2 0.4 0.5 0.2 2.4 3690.7 [1 ; 0]:   0%|          | 0/2 [01:16<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[21 / 32] Hs=    0 -565.720919 3274.5 0.0 0.0 0.1 2.9 0.2 385.3 0.1 22.5 1.7 0.3 0.3 0.1 0.0 0.0 2.1 0.1 0.0 0.0 0.2 0.4 0.9 0.2 2.8 3694.7 [1 ; 0]:   0%|          | 0/2 [01:20<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[22 / 32] Hs=    0 -565.719596 3470.5 0.0 0.0 0.1 3.8 0.2 384.5 0.1 27.6 2.0 0.3 0.3 0.1 0.0 0.0 2.0 0.1 0.0 0.0 0.2 0.3 0.6 0.2 2.4 3895.3 [1 ; 0]:   0%|          | 0/2 [01:24<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[23 / 32] Hs=    0 -565.723404 3547.2 0.0 0.0 0.1 2.4 0.2 373.9 0.0 17.5 2.0 0.2 0.3 0.1 0.0 0.0 2.2 0.1 0.0 0.0 0.2 0.4 0.5 0.2 2.0 3949.5 [1 ; 0]:   0%|          | 0/2 [01:28<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[24 / 32] Hs=    0 -565.715693 3836.7 0.0 0.0 0.1 3.9 0.3 735273.3 0.1 26.1 2.0 0.3 0.4 0.2 0.0 0.0 2.9 0.1 0.0 0.0 0.2 0.4 1.7 0.2 265.8 739414.7 [1 ; 0]:   0%|          | 0/2 [13:47<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[25 / 32] Hs=    0 -565.720925 3836.6 0.0 0.0 0.1 3.5 0.2 330.6 0.1 16.8 1.5 0.2 0.2 0.1 0.0 0.0 1.9 0.1 0.0 0.0 0.3 0.4 2.6 0.2 2.2 4197.6 [1 ; 0]:   0%|          | 0/2 [13:52<?, ?it/s]       

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[26 / 32] Hs=    0 -565.725878 3861.9 0.1 0.0 0.1 3.9 0.3 354.6 0.1 27.0 1.7 0.2 0.3 0.1 0.0 0.0 2.0 0.1 0.0 0.0 0.2 0.2 0.4 0.2 2.4 4255.8 [1 ; 0]:   0%|          | 0/2 [13:56<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[27 / 32] Hs=    0 -565.718065 3883.0 0.0 0.0 0.1 2.4 0.2 360.0 0.1 24.9 2.0 0.2 0.3 0.1 0.0 0.0 2.2 0.2 0.0 0.0 0.2 0.3 0.5 0.2 2.1 4279.0 [1 ; 0]:   0%|          | 0/2 [14:00<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[28 / 32] Hs=    0 -565.720971 3978.8 0.0 0.0 0.1 3.6 0.2 351.9 0.1 24.7 1.8 0.2 0.3 0.1 0.0 0.0 2.2 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.5 4367.8 [1 ; 0]:   0%|          | 0/2 [14:04<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[29 / 32] Hs=    0 -565.719820 4045.1 0.0 0.0 0.1 2.7 0.4 364.3 0.1 20.6 2.2 0.2 0.3 0.1 0.0 0.0 1.9 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.8 4442.1 [1 ; 0]:   0%|          | 0/2 [14:09<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[30 / 32] Hs=    0 -565.722699 3935.1 0.0 0.0 0.1 2.5 0.2 355.3 0.1 25.1 2.0 0.2 0.3 0.1 0.0 0.0 2.0 0.1 0.0 0.0 0.1 0.2 0.5 0.2 2.8 4326.9 [1 ; 0]:   0%|          | 0/2 [14:13<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[31 / 32] Hs=    0 -565.724055 4599.7 0.0 0.0 0.1 3.2 0.3 357.4 0.1 21.3 2.3 0.3 0.3 0.1 0.0 0.0 2.4 0.1 0.0 0.0 0.2 0.4 0.5 0.2 2.7 4991.6 [1 ; 0]:   0%|          | 0/2 [14:18<?, ?it/s]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[31 / 32] Hs=    0 -565.724055 4599.7 0.0 0.0 0.1 3.2 0.3 357.4 0.1 21.3 2.3 0.3 0.3 0.1 0.0 0.0 2.4 0.1 0.0 0.0 0.2 0.4 0.5 0.2 2.7 4991.6 [1 ; 0]:  50%|█████     | 1/2 [14:22<14:22, 862.94s/it]

[Fc1nonc(F)c1=O]
[conformers] 32
16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[1 / 32] Hs=    0 -565.667359 4073.7 0.0 0.0 0.1 2.7 0.4 359.1 0.1 25.1 1.9 0.2 0.3 0.1 0.0 0.1 2.2 0.1 0.0 0.0 0.2 0.3 3.9 0.2 2.5 4473.2 [2 ; 0]:  50%|█████     | 1/2 [14:27<14:22, 862.94s/it] 

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[2 / 32] Hs=    0 -565.664841 3935.5 0.0 0.0 0.1 2.7 0.4 355.5 0.1 25.3 1.7 0.2 0.2 0.1 0.0 0.0 2.8 0.1 0.0 0.0 0.2 0.3 1.3 0.2 2.6 4329.3 [2 ; 0]:  50%|█████     | 1/2 [14:32<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[3 / 32] Hs=    0 -565.674001 3889.4 0.0 0.0 0.1 3.7 0.3 340.0 0.0 20.6 2.1 0.3 0.3 0.1 0.0 0.0 2.2 0.1 0.0 0.0 0.2 0.3 0.4 0.2 3.0 4263.3 [2 ; 0]:  50%|█████     | 1/2 [14:36<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[4 / 32] Hs=    0 -565.663106 3924.3 0.0 0.0 0.1 2.4 0.2 369.7 0.1 25.3 1.8 0.3 0.2 0.1 0.0 0.0 2.1 0.2 0.0 0.0 0.2 0.2 3.6 0.1 2.1 4333.0 [2 ; 0]:  50%|█████     | 1/2 [14:40<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[5 / 32] Hs=    0 -565.668459 4089.6 0.0 0.0 0.1 2.5 0.2 367.6 0.0 24.3 2.2 0.2 0.3 0.1 0.0 0.0 2.0 0.1 0.0 0.0 0.2 0.2 0.6 0.2 2.7 4493.1 [2 ; 0]:  50%|█████     | 1/2 [14:45<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[6 / 32] Hs=    0 -565.655951 3836.7 0.0 0.0 0.1 2.4 0.2 356.7 0.1 23.0 2.0 0.3 0.5 0.2 0.0 0.0 2.3 0.1 0.0 0.0 0.2 0.2 0.4 0.2 3.2 4228.8 [2 ; 0]:  50%|█████     | 1/2 [14:49<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[7 / 32] Hs=    0 -565.672524 4047.6 0.0 0.0 0.1 4.0 0.3 358.2 0.1 24.9 2.0 0.3 0.3 0.1 0.0 0.0 2.3 0.1 0.0 0.0 0.2 0.3 0.9 0.2 2.9 4444.8 [2 ; 0]:  50%|█████     | 1/2 [14:53<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[8 / 32] Hs=    0 -565.663828 3879.8 0.0 0.0 0.1 2.6 0.2 346.0 0.1 20.2 2.4 0.2 0.3 0.1 0.0 0.0 2.1 0.1 0.0 0.0 0.2 0.2 0.5 0.2 2.6 4257.9 [2 ; 0]:  50%|█████     | 1/2 [14:58<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[9 / 32] Hs=    0 -565.660599 4104.0 0.0 0.0 0.1 3.7 0.3 361.4 0.1 25.6 1.9 0.2 0.3 0.1 0.0 0.0 2.0 0.1 0.0 0.0 0.2 0.3 0.6 0.2 3.1 4504.2 [2 ; 0]:  50%|█████     | 1/2 [15:02<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[10 / 32] Hs=    0 -565.666104 3844.8 0.0 0.0 0.1 2.6 0.2 368.4 0.1 23.7 2.0 0.3 0.3 0.1 0.0 0.0 2.4 0.1 0.0 0.0 0.3 0.4 8.1 0.2 2.5 4256.6 [2 ; 0]:  50%|█████     | 1/2 [15:07<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[11 / 32] Hs=    0 -565.678483 4033.9 0.0 0.0 0.1 2.7 0.2 361.4 0.0 24.1 1.8 0.3 0.3 0.2 0.0 0.0 2.5 0.1 0.0 0.0 0.2 0.2 12.9 0.2 2.2 4443.3 [2 ; 0]:  50%|█████     | 1/2 [15:11<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[12 / 32] Hs=    0 -565.654841 4087.7 0.0 0.0 0.1 2.5 0.2 379.6 0.2 20.2 2.3 0.3 0.3 0.1 0.0 0.0 2.3 0.2 0.0 0.0 0.2 0.3 0.6 0.2 2.4 4499.7 [2 ; 0]:  50%|█████     | 1/2 [15:15<14:22, 862.94s/it] 

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[13 / 32] Hs=    0 -565.666894 3958.2 0.0 0.0 0.1 2.6 0.2 373.9 0.1 20.0 2.4 0.4 0.4 0.1 0.0 0.0 2.0 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.9 4364.6 [2 ; 0]:  50%|█████     | 1/2 [15:20<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[14 / 32] Hs=    0 -565.663644 4199.3 0.0 0.0 0.1 2.5 0.2 370.4 0.1 20.4 2.3 0.3 0.3 0.1 0.0 0.0 1.9 0.2 0.0 0.0 0.1 0.2 0.5 0.2 2.4 4601.5 [2 ; 0]:  50%|█████     | 1/2 [15:24<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[15 / 32] Hs=    0 -565.674652 3797.2 0.0 0.0 0.1 3.9 0.3 359.5 0.0 20.0 2.0 0.3 0.3 0.1 0.0 0.0 2.2 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.4 4189.6 [2 ; 0]:  50%|█████     | 1/2 [15:29<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[16 / 32] Hs=    0 -565.669229 4142.4 0.0 0.0 0.1 2.5 0.2 364.8 0.1 25.3 2.3 0.3 0.4 0.2 0.0 0.0 2.7 0.2 0.0 0.0 0.2 0.4 0.5 0.2 2.9 4545.7 [2 ; 0]:  50%|█████     | 1/2 [15:33<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[17 / 32] Hs=    0 -565.656185 4071.9 0.0 0.0 0.1 2.5 0.2 358.0 0.1 30.0 1.7 0.3 0.3 0.1 0.0 0.0 2.4 0.1 0.0 0.0 0.2 0.3 8.3 1.4 2.8 4480.7 [2 ; 0]:  50%|█████     | 1/2 [15:38<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[18 / 32] Hs=    0 -565.671447 4055.6 0.0 0.0 0.1 3.2 0.2 385.7 0.1 24.9 2.0 0.3 0.3 0.1 0.0 0.0 2.4 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.5 4478.7 [2 ; 0]:  50%|█████     | 1/2 [15:42<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[19 / 32] Hs=    0 -565.658242 4163.9 0.0 0.0 0.1 2.6 0.2 376.0 0.1 24.3 1.7 0.2 0.2 0.1 0.0 0.0 2.1 0.1 0.0 0.0 0.2 0.2 0.5 0.2 3.8 4576.5 [2 ; 0]:  50%|█████     | 1/2 [15:47<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[20 / 32] Hs=    0 -565.657614 4044.5 0.0 0.0 0.1 3.0 0.3 392.3 0.1 22.1 2.3 0.3 0.3 0.2 0.0 0.0 2.0 0.1 0.0 0.0 0.2 0.2 0.4 0.2 2.4 4471.0 [2 ; 0]:  50%|█████     | 1/2 [15:51<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[21 / 32] Hs=    0 -565.661279 4096.6 0.0 0.0 0.1 2.8 0.4 393.6 0.1 24.7 2.1 0.3 0.3 0.1 0.0 0.0 2.4 0.1 0.0 0.0 0.2 0.3 0.6 0.2 2.3 4527.2 [2 ; 0]:  50%|█████     | 1/2 [15:56<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[22 / 32] Hs=    0 -565.652521 4191.0 0.0 0.0 0.1 2.7 0.2 382.8 0.1 25.4 2.0 0.3 0.3 0.1 0.0 0.0 2.2 0.2 0.0 0.0 0.2 0.3 17.1 0.8 2.6 4628.4 [2 ; 0]:  50%|█████     | 1/2 [16:00<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[23 / 32] Hs=    0 -565.663381 4180.3 0.0 0.0 0.1 2.6 0.2 388.1 0.1 24.7 2.0 0.2 0.3 0.1 0.0 0.0 2.4 0.1 0.0 0.0 0.2 0.4 0.6 0.2 2.2 4604.8 [2 ; 0]:  50%|█████     | 1/2 [16:05<14:22, 862.94s/it] 

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[24 / 32] Hs=    0 -565.672649 3850.6 0.0 0.0 0.1 2.6 0.2 401.6 0.1 24.9 1.8 0.2 0.3 0.1 0.0 0.0 2.2 0.1 0.0 0.0 0.2 0.2 0.5 0.2 2.4 4288.3 [2 ; 0]:  50%|█████     | 1/2 [16:09<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[25 / 32] Hs=    0 -565.661972 3906.6 0.0 0.0 0.1 2.5 0.2 382.8 0.1 23.3 1.8 0.2 0.3 0.1 0.0 0.0 2.1 0.1 0.0 0.0 0.2 0.2 0.5 0.2 2.7 4324.0 [2 ; 0]:  50%|█████     | 1/2 [16:14<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[26 / 32] Hs=    0 -565.657639 3986.3 0.0 0.0 0.1 3.8 0.2 377.9 0.1 21.3 2.1 0.3 0.3 0.1 0.0 0.0 2.1 0.1 0.0 0.0 0.2 0.3 0.5 0.2 3.0 4398.9 [2 ; 0]:  50%|█████     | 1/2 [16:18<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[27 / 32] Hs=    0 -565.661481 4026.4 0.0 0.0 0.1 2.7 0.2 365.9 0.1 24.7 1.9 0.2 0.3 0.2 0.0 0.0 2.0 0.1 0.0 0.0 0.2 0.2 0.5 0.2 3.3 4429.2 [2 ; 0]:  50%|█████     | 1/2 [16:22<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[28 / 32] Hs=    0 -565.664385 4128.1 0.0 0.0 0.1 2.6 0.2 375.4 0.1 24.7 2.1 0.3 0.3 0.1 0.0 0.0 2.5 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.3 4540.1 [2 ; 0]:  50%|█████     | 1/2 [16:27<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[29 / 32] Hs=    0 -565.665912 4174.3 0.0 0.0 0.1 4.0 0.3 405.8 0.1 24.2 1.9 0.3 0.3 0.1 0.0 0.0 2.6 0.1 0.0 0.0 0.2 0.3 0.5 0.2 2.4 4617.7 [2 ; 0]:  50%|█████     | 1/2 [16:32<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[30 / 32] Hs=    0 -565.672740 3934.5 0.1 0.0 0.1 2.6 0.2 380.5 0.0 24.5 2.1 0.3 0.3 0.1 0.0 0.0 2.2 0.1 0.0 0.0 0.2 0.3 0.4 0.2 2.8 4351.5 [2 ; 0]:  50%|█████     | 1/2 [16:36<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[31 / 32] Hs=    0 -565.672078 3825.9 0.0 0.0 0.1 2.6 0.2 369.7 0.1 24.9 2.1 0.2 0.3 0.1 0.0 0.0 2.1 0.1 0.0 0.0 0.2 0.3 0.4 0.2 2.4 4231.9 [2 ; 0]:  50%|█████     | 1/2 [16:40<14:22, 862.94s/it]

16.4025 (45, 45, 45, 45) 15
7.77384 (4, 10797, 45) 20


[31 / 32] Hs=    0 -565.672078 3825.9 0.0 0.0 0.1 2.6 0.2 369.7 0.1 24.9 2.1 0.2 0.3 0.1 0.0 0.0 2.1 0.1 0.0 0.0 0.2 0.3 0.4 0.2 2.4 4231.9 [2 ; 0]: 100%|██████████| 2/2 [16:44<00:00, 502.26s/it]


72131
cpu


ValueError: Non-hashable static arguments are not supported. An error occurred during a call to '_do_compute' while trying to hash an object of type <class 'argparse.Namespace'>, Namespace(C=-1, H=False, backend='cpu', basis='STO-3G', benchmark=False, checkc=False, choleskycpu=False, debug=False, density_mixing=False, enable64=False, float16=False, float32=True, fname='notebook_dataset', forloop=False, gdb=9, geneigh=False, generate=True, gname='', he=False, id=1, intv=1, ipumult=False, its=20, jit=False, level=0, limit=33, methane=False, multv=2, nan=False, nohs=False, num=10, num_conformers=32, numerror=False, plevel=0, profile=False, pyscf=False, randeri=False, randomSeed=43, rattled_std=0, resume=False, save=True, scale_ao=1, scale_cholesky=1, scale_eigvects=1, scale_eri=1, scale_errvec=1, scale_ghamil=1, scale_overlap=1, scale_sdf=1, scale_vj=1, scale_w=1, seperate=False, sk=(-2,), skip=0, skip_minao=False, skipdiis=False, skipeigh=False, skiperi=False, skippyscf=False, skipus=False, smiles=['FC(F)(C#N)C(=O)C#N', 'FC(F)=C(F)C#CC#N', 'Fc1nnc(=O)oc1F', 'Fc1nonc(F)c1=O', 'O=c1nc(nno1)C#N', 'Fc1onc2OC(=O)c12', 'O=C(C#N)c1nnno1', 'Fc1onc2nnoc12', 'Fc1onc2C(=O)Oc12', '[O-][N+](=O)c1noc(=O)o1', 'Fc1noc(C#N)c1F', 'FC(F)=C(F)C(F)(F)F', 'FC(F)(F)C(F)(F)C#N', 'Fc1nc2nnoc2o1', 'Fc1nc(C#N)c(F)o1', 'N#Cc1nnoc1C#N', 'O=C1Oc2nonc2O1', 'Fc1onc(C#N)c1F', 'Fc1oc(F)c(F)c1F', 'O=C1ON=C(C#N)C1=O', 'FC(F)(F)c1nnno1', 'O=C1OC(=O)C(=N1)C#N', 'Fc1noc(=O)oc1=O', 'FC(F)(F)c1nnon1', 'O=C1OC(=O)C(=O)C1=O', '[O-][N+](=O)c1onnc1F', 'Fc1noc(=O)nc1F', 'Fc1noc(=O)c(=O)o1', 'n1nnc2nonc2n1', 'O=c1nnoc(=O)nn1', '[O-][N+](=O)c1noc(F)n1', 'Fc1nnc(F)c(=O)o1', 'Fc1nc(=O)c(F)no1'], spin=0, split=[1, 16], step=1, str='', threads=1, threads_int=1, uniform_pyscf=-1, verbose=False, xc='b3lyp'). The error was:
TypeError: unhashable type: 'Namespace'


# Loading & visualizing generated data

After the dataset has been created, we can load the data.
(You may wish to spin up a new notebook, and view the data as 
it's being generated in this one).

In [10]:
import pandas as pd

In [11]:
# Output DFT dataset is a compressed CSV file.
# NOTE: it may take a couple of minutes before the file is generated.
rootpath = f'./data/generated/{args.fname}/'
paths = sorted(os.listdir(rootpath), key=lambda x: os.path.getmtime(rootpath + x))
filename = os.path.join(rootpath, paths[-1], "data.csv")

df = pd.read_csv(filename, compression="gzip")

In [12]:
df

Unnamed: 0.1,Unnamed: 0,smile,atoms,atom_positions,energies,std,pyscf_energies,pyscf_hlgap,pyscf_homo,pyscf_lumo,times,homo,lumo,hlgap,N,basis
0,0,CC(C(F)C=O)=C(F)F,CCCFCOCFFHHHHH,"[-2.310676011466988, -1.501283183213607, -1.07...","[-15234.328062231103, -15216.222079159548, -15...",0.001670,[0.0],0,0,0,[3.6225e+03 0.0000e+00 0.0000e+00 1.0000e-01 3...,-3.627531,1.996017,5.623548,50,STO-3G
1,0,CC(C(F)C=O)=C(F)F,CCCFCOCFFHHHHH,"[-2.8138999079144, -1.8577504582035314, -0.781...","[-15233.707002997247, -15214.106921784089, -15...",0.001685,[0.0],0,0,0,[3.4137e+03 0.0000e+00 0.0000e+00 1.0000e-01 2...,-3.810637,2.080500,5.891137,50,STO-3G
2,0,CC(C(F)C=O)=C(F)F,CCCFCOCFFHHHHH,"[-3.3157674827315, -0.5013038903612104, -0.791...","[-15233.268688412909, -15213.38878456933, -152...",0.001627,[0.0],0,0,0,[3.4426e+03 0.0000e+00 0.0000e+00 1.0000e-01 2...,-3.781198,2.014855,5.796053,50,STO-3G
3,0,CC(C(F)C=O)=C(F)F,CCCFCOCFFHHHHH,"[-2.2718425484681184, -1.511352692749988, -0.9...","[-15233.92559508189, -15205.603897519642, -152...",0.002464,[0.0],0,0,0,[3.3526e+03 0.0000e+00 0.0000e+00 1.0000e-01 2...,-3.801813,1.896403,5.698216,50,STO-3G
4,0,CC(C(F)C=O)=C(F)F,CCCFCOCFFHHHHH,"[-1.8202534347923944, -2.188123753845587, 0.66...","[-15234.38905939559, -15219.393843797065, -152...",0.001875,[0.0],0,0,0,[3.355e+03 0.000e+00 0.000e+00 1.000e-01 3.700...,-3.760773,1.895937,5.656710,50,STO-3G
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,0,COC(=O)C=C(F)C#C,COCOCCFCCHHHHH,"[-5.829719643481505, -1.0442410230540844, -0.3...","[-12921.907973375575, -12847.172828548093, -12...",0.002065,[0.0],0,0,0,[3.5821e+03 0.0000e+00 0.0000e+00 1.0000e-01 2...,-3.557515,0.994091,4.551606,50,STO-3G
69,0,COC(=O)C=C(F)C#C,COCOCCFCCHHHHH,"[-3.879745074030985, -3.9453784376908647, 1.89...","[-12896.409026279161, -12448.225227459361, -12...",0.005064,[0.0],0,0,0,[3.8648e+03 0.0000e+00 0.0000e+00 1.0000e-01 3...,-1.520286,-0.384428,1.135859,50,STO-3G
70,0,COC(=O)C=C(F)C#C,COCOCCFCCHHHHH,"[4.603785661294676, 2.201512234181421, -1.8361...","[-12921.780569965858, -12843.884053121841, -12...",0.001627,[0.0],0,0,0,[3.5696e+03 0.0000e+00 0.0000e+00 1.0000e-01 2...,-3.408105,1.005516,4.413621,50,STO-3G
71,0,COC(=O)C=C(F)C#C,COCOCCFCCHHHHH,"[-1.8221122964701737, -0.056891745197611164, -...","[-12921.990923514906, -12848.056616998725, -12...",0.000762,[0.0],0,0,0,[3.6236e+03 0.0000e+00 0.0000e+00 1.0000e-01 2...,-3.538141,0.812941,4.351082,50,STO-3G


In [13]:
# HLgap data.
df["hlgap"]

0     5.623548
1     5.891137
2     5.796053
3     5.698216
4     5.656710
        ...   
68    4.551606
69    1.135859
70    4.413621
71    4.351082
72    3.920660
Name: hlgap, Length: 73, dtype: float64