In [1]:
from torch.utils.data import DataLoader 
import torch
import matplotlib.pyplot as plt
from pyscf import gto, dft, scf 
from kspies import wy, zmp
from dpyscfl.scf import *
from dpyscfl.net import *
from dpyscfl.utils import *
from dpyscfl.losses import *
import os, shutil, copy
from ase import Atoms
from ase.units import Bohr, Hartree
import pickle
import pandas as pd
from ase.io import read, write
from torchviz import make_dot
import pylibnxc

# The Published Network

The network published has been saved in `dpyscfl/models/xcdiff/MODEL_MGGA`

In [2]:
xcdp = '/home/awills/Documents/Research/dpyscfl/models/xcdiff/MODEL_MGGA/'
xcd = torch.jit.load(os.path.join(xcdp, 'xc'))
print(xcd)
for n,p in xcd.named_parameters():
    print (n, p)

RecursiveScriptModule(
  original_name=XC
  (heg_model): RecursiveScriptModule(original_name=LDA_X)
  (pw_model): RecursiveScriptModule(original_name=PW_C)
  (grid_models): RecursiveScriptModule(
    original_name=ModuleList
    (0): RecursiveScriptModule(
      original_name=XC_L
      (net): RecursiveScriptModule(
        original_name=Sequential
        (0): RecursiveScriptModule(original_name=Linear)
        (1): RecursiveScriptModule(original_name=GELU)
        (2): RecursiveScriptModule(original_name=Linear)
        (3): RecursiveScriptModule(original_name=GELU)
        (4): RecursiveScriptModule(original_name=Linear)
        (5): RecursiveScriptModule(original_name=GELU)
        (6): RecursiveScriptModule(original_name=Linear)
      )
      (tanh): RecursiveScriptModule(original_name=Tanh)
      (sig): RecursiveScriptModule(original_name=Sigmoid)
      (lobf): RecursiveScriptModule(
        original_name=LOB
        (sig): RecursiveScriptModule(original_name=Sigmoid)
      )
   

In [3]:
xcd.state_dict()

OrderedDict([('grid_models.0.net.0.weight',
              tensor([[ -1.7256,  -0.7583],
                      [ -0.7135,  -0.4243],
                      [ -0.0929,  -4.0847],
                      [  0.1295,  -0.6161],
                      [-11.2182,  -1.2587],
                      [-47.5842,   0.0798],
                      [ -7.2921,  -0.4032],
                      [ -0.4546,   0.1030],
                      [ -1.5343,  -1.2948],
                      [ -3.5488,  -2.6906],
                      [  0.8545,   0.3988],
                      [ -1.1457,   0.0605],
                      [ -3.8406,   0.0731],
                      [ -0.6009,   0.1400],
                      [  0.7541,  -0.1070],
                      [ -2.9376,  -0.1989]])),
             ('grid_models.0.net.0.bias',
              tensor([ 1.0763,  0.9693, -0.1023,  0.1779, -2.7520,  0.3602, -0.3192,  1.3729,
                      -0.1036, -1.5152, -0.5317,  0.4798,  0.6637,  0.4314, -0.0635,  0.3054])),
             ('g

This is loaded into training script with `dpyscfl.net.get_scf`, so we must verify it is loaded in correctly.

In [4]:
xcds = get_scf(xctype='MGGA', path=os.path.join(xcdp, 'xc'))
print(xcds.xc)
for n,p in xcds.xc.named_parameters():
    print (n, p)

FREEC False
XC(
  (heg_model): LDA_X()
  (pw_model): PW_C()
  (grid_models): ModuleList(
    (0): XC_L(
      (net): Sequential(
        (0): Linear(in_features=2, out_features=16, bias=True)
        (1): GELU()
        (2): Linear(in_features=16, out_features=16, bias=True)
        (3): GELU()
        (4): Linear(in_features=16, out_features=16, bias=True)
        (5): GELU()
        (6): Linear(in_features=16, out_features=1, bias=True)
      )
      (tanh): Tanh()
      (lobf): LOB(
        (sig): Sigmoid()
      )
      (sig): Sigmoid()
    )
    (1): C_L(
      (net): Sequential(
        (0): Linear(in_features=4, out_features=16, bias=True)
        (1): GELU()
        (2): Linear(in_features=16, out_features=16, bias=True)
        (3): GELU()
        (4): Linear(in_features=16, out_features=16, bias=True)
        (5): GELU()
        (6): Linear(in_features=16, out_features=1, bias=True)
        (7): Softplus(beta=1, threshold=20)
      )
      (sig): Sigmoid()
      (tanh): Tanh(



*Prior to rearranging where `xc` is loaded in `get_scf`, the weights here were not correct.

Most molecules trained are from [G2/97](https://doi.org/10.1063/1.473182) dataset.

## 10 Linear Closed-Shell Molecules
- $\mathrm{H_2}$
- $\mathrm{N_2}$
- $\mathrm{LiF}$
- $\mathrm{CNH}$
- $\mathrm{CO_2}$
- $\mathrm{F_2}$
- $\mathrm{C_2H_2}$
- $\mathrm{OC}$
- $\mathrm{LiH}$
- $\mathrm{Na_2}$

RKS may be used since closed-shell.

## 3 Linear Open-Shell Molecules
- $\mathrm{NO}$
- $\mathrm{CH}$
- $\mathrm{OH}$

UKS should be used, open-shell.

## 8 Non-Linear Closed-Shell Molecules
- $\mathrm{NO_2}$
- $\mathrm{NH}$
- $\mathrm{O_3}$
- $\mathrm{N_2O}$
- $\mathrm{CH_3}$
- $\mathrm{CH_2}$
- $\mathrm{H_2O}$
- $\mathrm{NH_3}$

## 2 Ionization Potentials
- $\mathrm{Li} \to \mathrm{Li}^+$
- $\mathrm{C} \to \mathrm{C}^+$

## 3 Reaction Barrier Heights (from [BH76](https://pubs.acs.org/doi/10.1021/jp045141s))
- $\mathrm{OH} + \mathrm{N_2}\to \mathrm{H}+\mathrm{N_2O}$
- $\mathrm{OH} + \mathrm{CH_3}\to\mathrm{O}+\mathrm{CH_4}$
- $\mathrm{HF} + \mathrm{F} \to \mathrm{H} + \mathrm{F_2}$

## 10 Validation Choices: 8 Atomization Energies, 2 Barrier Heights
- $\mathrm{C_2H_2}$
- $\mathrm{BeH}$
- $\mathrm{NO_2}$
- $\mathrm{S_2}$
- $\mathrm{CH_4}$
- $\mathrm{PF_3}$
- $\mathrm{CH_2}$
- $\mathrm{C_2H_4O_2}$
- $\mathrm{N_2O} + \mathrm{H} \to \mathrm{OH} + \mathrm{N_2}$
- $\mathrm{OH} + \mathrm{Cl} \to \mathrm{O} + \mathrm{HCl}$

## Haunschild G2/97 Reference Energies

These have been calculated (scraped) from tables in the [Haunschild 2012](https://aip.scitation.org/doi/10.1063/1.4704796) paper, and stored in `../data/haunschild_g2/g2_97.traj`.

In [2]:
datapath = '/home/awills/Documents/Research/dpyscfl/data/haunschild_g2/g2_97.traj'
g297ref = read(datapath, ':')
g297mol = [ase_atoms_to_mol(atom) for atom in g297ref]

  a = np.array(obj)


In [3]:
train_formulas = ['H2', 'N2', 'LiF', 'CNH', 'CO2', 'F2', 'C2H2', 'OC', 'LiH', 'Na2', 'NO', 'CH', 'OH',
                 'NO2', 'NH', 'O3', 'N2O', 'CH3', 'CH2', 'H2O', 'NH3']
closed_formulas = ['H2', 'N2', 'LiF', 'CNH', 'CO2', 'F2', 'C2H2', 'OC', 'LiH', 'Na2',
                  'NO2', 'NH', 'O3', 'N2O', 'CH3', 'CH2', 'H2O', 'NH3']
open_formulas = ['NO', 'CH', 'OH']
train_idcs = []
for idx, atom in enumerate(g297ref):
    symb = str(atom.symbols)
    form = atom.get_chemical_formula()
    if (symb in train_formulas) or (form in train_formulas):
        #Does reference traj match expected shell occupation?
        expected_shell = True if ( (symb in open_formulas) or (form in open_formulas) ) else False
        oc = atom.info['openshell']
        match = expected_shell == oc
        print(idx, atom.symbols, atom.get_chemical_formula(), atom.calc.results['energy'], match)
        train_idcs.append(idx)

0 OC CO 11.271868661369064 True
2 H2 H2 4.7440371124327285 True
11 CO2 CO2 16.934698271599974 True
15 OH2 H2O 10.1045409791693 True
18 CNH CHN 13.58724648435963 True
21 N2O N2O 11.74997241685051 True
23 NO2 NO2 9.888549601232311 False
25 LiF FLi 6.052629788935811 True
26 LiH HLi 2.510951589865373 True
29 Na2 Na2 0.7439472700727967 True
58 NH3 H3N 12.913776247394873 True
83 NO NO 6.625670253721614 True
84 NH HN 3.5966088278164094 False
101 C2H2 C2H2 17.575624702100136 True
105 CH2 CH2 7.850415989365281 False
106 CH2 CH2 8.262706633047307 False
107 OH HO 4.646302050343198 True
108 CH CH 3.644077182468589 True
113 N2 N2 9.906376144900815 True
114 F2 F2 1.6822245946714354 True
116 CH3 CH3 13.340991439264483 False
144 O3 O3 6.382109918715892 True


In [5]:
print(len(train_formulas), len(train_idcs))

21 22


In [6]:
print(g297ref[105].info , g297ref[106].info)

{'CH2': True, 'name': 'Singlet carbene', 'openshell': True} {'CH2': True, 'name': 'Triplet carbene', 'openshell': True}


Methylene ($\mathrm{CH_2}$) matches twice -- the singlet and triplet states. Since it is assumed to be closed-shell in training, we train on the singlet state.

In [7]:
train_idcs = [i for i in train_idcs if i != 106]

In [8]:
for idx in train_idcs:
    print(g297ref[idx].info, g297mol[idx][1].spin)

{'CO': True, 'name': 'Carbon monoxide', 'openshell': False} 0
{'H2': True, 'name': 'Dihydrogen', 'openshell': False} 0
{'CO2': True, 'name': 'Carbon dioxide', 'openshell': False} 0
{'H2O': True, 'name': 'Water', 'openshell': False} 0
{'CNH': True, 'name': 'Hydrogen cyanide', 'openshell': False} 0
{'N2O': True, 'name': 'Nitrous oxide', 'openshell': False} 0
{'NO2': True, 'name': 'Nitrogen dioxide', 'openshell': True} 1
{'LiF': True, 'name': 'Lithium fluoride', 'openshell': False} 0
{'LiH': True, 'name': 'Lithium hydride', 'openshell': False} 0
{'Na2': True, 'name': 'Disodium', 'openshell': False} 0
{'NH3': True, 'name': 'Ammonia', 'openshell': False} 0
{'NO': True, 'name': 'Nitric oxide', 'openshell': True} 1
{'NH': True, 'name': 'Imidogen', 'openshell': True} 0
{'C2H2': True, 'name': 'Acetylene', 'openshell': False} 0
{'CH2': True, 'name': 'Singlet carbene', 'openshell': True} 0
{'HO': True, 'name': 'Hydroxyl radical', 'openshell': True} 1
{'CH': True, 'name': 'Methylidyne radical', 'o

## BH76 reactions coded in specific way.

In [3]:
rxn = read('/home/awills/Documents/Research/xcdiff/data/haunschild_scan_reaction.traj', ':')
scan = read('/home/awills/Documents/Research/xcdiff/data/haunschild_scan.traj', ':')

In [5]:
for at in rxn:
    if not at.info.get('grid_level'):
        print(at.get_chemical_formula(), at.calc, at.info)

H2 SinglePointCalculator(energy=4.7440371124327285) {'H2': True, 'name': 'Dihydrogen', 'n_rad': 2, 'n_ang': 10, 'pol': False}
N2 SinglePointCalculator(energy=9.906376144900815) {'N2': True, 'name': 'Dinitrogen', 'n_rad': 6, 'n_ang': 10, 'pol': False}
FLi SinglePointCalculator(energy=6.052629788935811) {'LiF': True, 'name': 'Lithium fluoride', 'n_rad': 3, 'n_ang': 15, 'pol': False}
CHN SinglePointCalculator(energy=13.58724648435963) {'CNH': True, 'name': 'Hydrogen cyanide', 'n_rad': 2, 'n_ang': 8, 'spin': 0, 'pol': False}
CO2 SinglePointCalculator(energy=16.934698271599974) {'CO2': True, 'name': 'Carbon dioxide', 'n_rad': 7, 'n_ang': 12}
Cl2 SinglePointCalculator(energy=2.5726189938349067) {'Cl2': True, 'name': 'Dichlorine', 'n_rad': 3, 'n_ang': 20, 'spin': 0, 'pol': True}
F2 SinglePointCalculator(energy=1.6822245946714354) {'F2': True, 'name': 'Difluorine', 'n_rad': 8, 'n_ang': 20, 'spin': 0, 'pol': True}
O2 SinglePointCalculator(energy=5.24307669245509) {'O2': True, 'name': 'Dioxygen'

In [7]:
for at in rxn:
    if at.info.get('sym', False):
        print(at.get_chemical_formula(), at.calc, at.info)

`prep_data.py` has been run on these molecules, and on the charge/neutral atoms with mixing flagged. However, some are not needed in training -- O2, for instance.

In [13]:
for i,at in enumerate(rxn):
    form = at.get_chemical_formula()
    symb = str(at.symbols)
    if (form not in train_formulas) and (symb not in train_formulas):
        print(at.get_chemical_formula(), at.info)

Cl2 {'Cl2': True, 'name': 'Dichlorine', 'n_rad': 3, 'n_ang': 20, 'spin': 0, 'pol': True}
O2 {'O2': True, 'name': 'Dioxygen', 'n_rad': 3, 'n_ang': 10}
HCl {'HCl': True, 'name': 'Hydrogen chloride', 'n_rad': 3, 'n_ang': 10}
C {'spin': 2, 'sym': False, 'grid_level': 4, 'basis': '6-311+G*', 'pol': False}
Cl {'spin': 1, 'sym': False, 'grid_level': 5, 'basis': '6-311+G*', 'pol': False}
F {'spin': 1, 'sym': False, 'grid_level': 4, 'basis': '6-311+G*', 'pol': False}
H {'spin': 1, 'sym': False, 'grid_level': 1, 'basis': '6-311+G', 'pol': False}
Li {'spin': 1, 'sym': False, 'grid_level': 2, 'basis': '6-311+G', 'pol': False}
N {'spin': 3, 'sym': False, 'grid_level': 3, 'basis': '6-311+G*', 'pol': False}
Na {'spin': 1, 'sym': False, 'grid_level': 1, 'basis': '6-311+G*', 'pol': False}
O {'spin': 2, 'sym': False, 'grid_level': 4, 'basis': '6-311++G(3df,2pd)', 'pol': False}
H {'multiplicity': 2, 'grid_level': 1, 'sym': False, 'sc': False, 'reaction': 'reactant', 'reference_height': None, 'spin': 1}
H

In [6]:
frac = read('/home/awills/Documents/Research/swxcd/aegis/fc0fcp/results.traj', ':')

  a = np.array(obj)


In [6]:
for i,at in enumerate(frac):
    form = at.get_chemical_formula()
    symb = at.symbols
    if not ((form in train_formulas) or (symb in train_formulas)):
        print(at.get_chemical_formula(), at.info)

NameError: name 'frac' is not defined

The prepared data (on my device) are respectively stored in `/home/awills/Documents/Research/swxcd/data/hscanrxnsym` and `/home/awills/Documents/Research/swxcd/data/fc0fcp`, so we want to create a new directory and symlink the previous files in an appropriately indexed manner, with the caveat that the prior folder used the old `prep_data.py` script, so the inputs are stored differently.

In [10]:
def cat_traj_write(traj1, traj2, writeloc, writefile='results.traj', traj1inf={}, traj2inf={}):
    newtraj = []
    for at in traj1:
        if traj1inf:
            at.info = {**at.info, **traj1inf}
        newtraj.append(at)
    for at in traj2:
        if traj2inf:
            at.info = {**at.info, **traj2inf}
        newtraj.append(at)
    write(filename=os.path.join(writeloc, writefile), images=newtraj)
rxn = read('/home/awills/Documents/Research/xcdiff/data/haunschild_scan_reaction.traj', ':')
frac = read('/home/awills/Documents/Research/swxcd/aegis/fc0fcp/results.traj', ':')
cat_traj_write(rxn, frac, '/home/awills/Documents/Research/swxcd/data/xcdsymfrac', traj1inf = {"oldprep":True},
              traj2inf= {'supp':True})

Symlink: 
```
ln -s source_file symbolic_link
```

In [8]:
def ln_cat_prep_dirs(traj1, traj2, traj1dat, traj2dat, destdir):
    n_traj1 = np.load(traj1dat+'/data_len.npy')
    n_traj2 = np.load(traj2dat+'/data_len.npy')
    n_traj_cat = n_traj1+n_traj2
    print(n_traj_cat)
    np.save(destdir+'/data_len.npy', n_traj_cat)
    nidx = 0
    for idx, at in enumerate(traj1):
        pcklp = 'data_{}.pckl'.format(idx)
        fp = os.path.join(traj1dat, pcklp)
        sp = os.path.join(destdir, pcklp)
        os.symlink(fp, sp)
        nidx += 1
    for idx, at in enumerate(traj2):
        fpcklp = 'data_{}.pckl'.format(idx)
        spcklp = 'data_{}.pckl'.format(nidx)
        fp = os.path.join(traj2dat, fpcklp)
        sp = os.path.join(destdir, spcklp)
        os.symlink(fp, sp)
        nidx += 1

In [9]:
ln_cat_prep_dirs(rxn, frac, '/home/awills/Documents/Research/swxcd/data/hscanrxnsym',
                '/home/awills/Documents/Research/swxcd/data/fc0fcp', 
                 destdir='/home/awills/Documents/Research/swxcd/data/xcdsymfrac')

102


In [16]:
len(rxn+frac)

102

In [38]:
rxnfrac = read('/home/awills/Documents/Research/swxcd/data/rxnfrac/results.traj', ':')

In [39]:
for i,at in enumerate(rxnfrac):
    print(at.get_chemical_formula(), at.info)

H2 {'H2': True, 'name': 'Dihydrogen', 'n_rad': 2, 'n_ang': 10, 'pol': False, 'oldprep': True}
N2 {'N2': True, 'name': 'Dinitrogen', 'n_rad': 6, 'n_ang': 10, 'pol': False, 'oldprep': True}
FLi {'LiF': True, 'name': 'Lithium fluoride', 'n_rad': 3, 'n_ang': 15, 'pol': False, 'oldprep': True}
CHN {'CNH': True, 'name': 'Hydrogen cyanide', 'n_rad': 2, 'n_ang': 8, 'spin': 0, 'pol': False, 'oldprep': True}
CO2 {'CO2': True, 'name': 'Carbon dioxide', 'n_rad': 7, 'n_ang': 12, 'oldprep': True}
Cl2 {'Cl2': True, 'name': 'Dichlorine', 'n_rad': 3, 'n_ang': 20, 'spin': 0, 'pol': True, 'oldprep': True}
F2 {'F2': True, 'name': 'Difluorine', 'n_rad': 8, 'n_ang': 20, 'spin': 0, 'pol': True, 'oldprep': True}
O2 {'O2': True, 'name': 'Dioxygen', 'n_rad': 3, 'n_ang': 10, 'oldprep': True}
C2H2 {'C2H2': True, 'name': 'Acetylene', 'n_rad': 3, 'n_ang': 10, 'oldprep': True}
CO {'CO': True, 'name': 'Carbon monoxide', 'n_rad': 7, 'n_ang': 15, 'oldprep': True}
HCl {'HCl': True, 'name': 'Hydrogen chloride', 'n_rad': 

In [44]:
atoms = rxnfrac
print("PARSING NON-ATOMIC NON-REACTION MOLECULES")
molecules = {'{:3d}'.format(idx) + ''.join(a.get_chemical_symbols()): [idx] for idx, a in enumerate(atoms) if len(a.positions) > 1 and not a.info.get('reaction') }
print(molecules)
print("PARSING NEUTRAL, PURE NON-REACTION ATOMS. CHARGE FLAG NOT SET.")
pure_atoms = {''.join(a.get_chemical_symbols()): [idx] for idx, a in enumerate(atoms) if len(a.positions) == 1 and not a.info.get('reaction') and not a.info.get('fractional') and not a.info.get('charge') and not a.info.get('supp')}
print(pure_atoms)
molecules.update(pure_atoms)
print("PARSING SUPPLEMENTAL NEUTRAL, PURE ATOMS (FROM FRAC DATASET)")
n_atoms = {''.join(a.get_chemical_symbols())+'_n0': [idx] for idx, a in enumerate(atoms) if len(a.positions)==1 and a.info.get('supp') and not a.info.get('charge')}
print(n_atoms)
molecules.update(n_atoms)
print("PARSING SUPPLEMENTAL CHARGED, PURE ATOMS")
c_atoms = {''.join(a.get_chemical_symbols())+'_c{}'.format(a.info['charge']): [idx] for idx, a in enumerate(atoms) if len(a.positions)==1 and a.info.get('supp') and a.info.get('charge')}
print(c_atoms)
molecules.update(c_atoms)
print("PARSING SUPPLEMENTAL FRACTIONAL ATOMS")
frac_atoms = {''.join(a.get_chemical_symbols())+'_f{}'.format(a.info['fractional']): [idx] for idx, a in enumerate(atoms) if len(a.positions)==1 and a.info.get('supp') and a.info.get('fractional')}
print(frac_atoms)
molecules.update(frac_atoms)
def split(el):
        import re
        #Splits a string on capital letter sequences
        res_list = [s for s in re.split("([A-Z][^A-Z]*)", el) if s]
        return res_list

for molecule in molecules:
    comp = []
    #ignore _ atoms, charged or fractional
    if '_' in molecule:
        continue
    for a in split(molecule[3:]):
        comp.append(pure_atoms[a][0])
    molecules[molecule] += comp
a_count = {idx: len(at.positions) for idx,at in enumerate(atoms)}
#a_count = {a: np.sum([a in molecules[mol] for mol in molecules]) for a in np.unique([m  for mol in molecules for m in molecules[mol]])}
print("PARSING REACTIONS")
reactions = {}
for idx, a in enumerate(atoms):
    #atom must have reaction flag
    if not a.info.get('reaction'): continue
    #and atom must not be a reactant
    if a.info.get('reaction') == 'reactant': continue
    reactions['{:3d}'.format(idx) + ''.join(a.get_chemical_symbols())] = \
        [idx] + [idx + i for i in np.arange(-a.info.get('reaction'),0,1).astype(int)]
print(reactions)
molecules.update(reactions)
print("MOLECULES TO TRAIN ON")
print(molecules)

PARSING NON-ATOMIC NON-REACTION MOLECULES
{'  0HH': [0], '  1NN': [1], '  2LiF': [2], '  3CNH': [3], '  4COO': [4], '  5ClCl': [5], '  6FF': [6], '  7OO': [7], '  8CCHH': [8], '  9OC': [9], ' 10ClH': [10], ' 11LiH': [11], ' 12NaNa': [12], ' 21NO': [21], ' 22NOO': [22], ' 23NH': [23], ' 24OOO': [24], ' 25NNO': [25], ' 26CHHH': [26], ' 27CHH': [27], ' 28OHH': [28], ' 29NHHH': [29], ' 30OH': [30], ' 31CH': [31]}
PARSING NEUTRAL, PURE NON-REACTION ATOMS. CHARGE FLAG NOT SET.
{'C': [13], 'Cl': [14], 'F': [15], 'H': [16], 'Li': [17], 'N': [18], 'Na': [19], 'O': [20]}
PARSING SUPPLEMENTAL NEUTRAL, PURE ATOMS (FROM FRAC DATASET)
{'H_n0': [84], 'He_n0': [85], 'Li_n0': [86], 'Be_n0': [87], 'B_n0': [88], 'C_n0': [89], 'N_n0': [90], 'O_n0': [91], 'F_n0': [92], 'Ne_n0': [93], 'Na_n0': [94], 'Mg_n0': [95], 'Al_n0': [96], 'Si_n0': [97], 'P_n0': [98], 'S_n0': [99], 'Cl_n0': [100], 'Ar_n0': [101]}
PARSING SUPPLEMENTAL CHARGED, PURE ATOMS
{'H_c1': [66], 'He_c1': [67], 'Li_c1': [68], 'Be_c1': [69], 'B_c1

In [45]:
molecules

{'  0HH': [0, 16, 16],
 '  1NN': [1, 18, 18],
 '  2LiF': [2, 17, 15],
 '  3CNH': [3, 13, 18, 16],
 '  4COO': [4, 13, 20, 20],
 '  5ClCl': [5, 14, 14],
 '  6FF': [6, 15, 15],
 '  7OO': [7, 20, 20],
 '  8CCHH': [8, 13, 13, 16, 16],
 '  9OC': [9, 20, 13],
 ' 10ClH': [10, 14, 16],
 ' 11LiH': [11, 17, 16],
 ' 12NaNa': [12, 19, 19],
 ' 21NO': [21, 18, 20],
 ' 22NOO': [22, 18, 20, 20],
 ' 23NH': [23, 18, 16],
 ' 24OOO': [24, 20, 20, 20],
 ' 25NNO': [25, 18, 18, 20],
 ' 26CHHH': [26, 13, 16, 16, 16],
 ' 27CHH': [27, 13, 16, 16],
 ' 28OHH': [28, 20, 16, 16],
 ' 29NHHH': [29, 18, 16, 16, 16],
 ' 30OH': [30, 20, 16],
 ' 31CH': [31, 13, 16],
 'C': [13],
 'Cl': [14],
 'F': [15],
 'H': [16],
 'Li': [17],
 'N': [18],
 'Na': [19],
 'O': [20],
 'H_n0': [84],
 'He_n0': [85],
 'Li_n0': [86],
 'Be_n0': [87],
 'B_n0': [88],
 'C_n0': [89],
 'N_n0': [90],
 'O_n0': [91],
 'F_n0': [92],
 'Ne_n0': [93],
 'Na_n0': [94],
 'Mg_n0': [95],
 'Al_n0': [96],
 'Si_n0': [97],
 'P_n0': [98],
 'S_n0': [99],
 'Cl_n0': [100]

In [28]:
for idx,at in enumerate(rxn):
    print(idx, at.get_chemical_formula(), at.calc, at.info)

0 H2 SinglePointCalculator(energy=4.7440371124327285) {'H2': True, 'name': 'Dihydrogen', 'n_rad': 2, 'n_ang': 10, 'pol': False, 'oldprep': True}
1 N2 SinglePointCalculator(energy=9.906376144900815) {'N2': True, 'name': 'Dinitrogen', 'n_rad': 6, 'n_ang': 10, 'pol': False, 'oldprep': True}
2 FLi SinglePointCalculator(energy=6.052629788935811) {'LiF': True, 'name': 'Lithium fluoride', 'n_rad': 3, 'n_ang': 15, 'pol': False, 'oldprep': True}
3 CHN SinglePointCalculator(energy=13.58724648435963) {'CNH': True, 'name': 'Hydrogen cyanide', 'n_rad': 2, 'n_ang': 8, 'spin': 0, 'pol': False, 'oldprep': True}
4 CO2 SinglePointCalculator(energy=16.934698271599974) {'CO2': True, 'name': 'Carbon dioxide', 'n_rad': 7, 'n_ang': 12, 'oldprep': True}
5 Cl2 SinglePointCalculator(energy=2.5726189938349067) {'Cl2': True, 'name': 'Dichlorine', 'n_rad': 3, 'n_ang': 20, 'spin': 0, 'pol': True, 'oldprep': True}
6 F2 SinglePointCalculator(energy=1.6822245946714354) {'F2': True, 'name': 'Difluorine', 'n_rad': 8, 'n

In [None]:
dataset[[40, 38, 39]]

In [None]:
m_idx = 10
molecule = list(molecules.keys())[m_idx]
dataset = MemDatasetRead('/home/awills/Documents/Research/swxcd/data/rxnfrac/')
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False) # Dont change batch size !

In [None]:
dataset

In [None]:
idatasetmat

In [None]:
idataset

In [None]:
print(molecules)

In [None]:
molecules