In [8]:
from torch.utils.data import DataLoader 
import torch
import matplotlib.pyplot as plt
from pyscf import gto, dft, scf 
from pyscf.lib import chkfile
# from kspies import wy, zmp
from dpyscfl.scf import *
from dpyscfl.net import *
from dpyscfl.utils import *
from dpyscfl.losses import *
import os, shutil, copy
from ase import Atoms, Atom
from ase.units import Bohr, Hartree, kcal, mol
import pickle
import pandas as pd
from ase.io import read, write
# from torchviz import make_dot
import pylibnxc
kcal_mol = kcal/mol
kcal_mol
Hart_to_kcalmol = Hartree/kcal_mol


#spins for single atoms, since pyscf doesn't guess this correctly.
spins_dict = {
    'Al': 1,
    'B' : 1,
    'Li': 1,
    'Na': 1,
    'Si': 2 ,
    'Be':0,
    'C': 2,
    'Cl': 1,
    'F': 1,
    'H': 1,
    'N': 3,
    'O': 2,
    'P': 3,
    'S': 2
}

def get_spin(at):
    #if single atom and spin is not specified in at.info dictionary, use spins_dict
    if ( (len(at.positions) == 1) and not ('spin' in at.info) ):
        spin = spins_dict[str(at.symbols)]
    else:
        if at.info.get('spin', None):
            print('Spin specified in atom info.')
            spin = at.info['spin']
        elif 'radical' in at.info.get('name', ''):
            print('Radical specified in atom.info["name"], assuming spin 1.')
            spin = 1
        elif at.info.get('openshell', None):
            print("Openshell specified in atom info, attempting spin 2.")
            spin = 2
        else:
            print("No specifications in atom info to help, assuming no spin.")
            spin = 0
    return spin


The referenced G2/97 dataset is located [here](https://pubs.aip.org/aip/jcp/article/136/16/164102/190335/New-accurate-reference-energies-for-the-G2-97-test).

The referenced BH76 dataset may be found [here](http://www.thch.uni-bonn.de/tc.old/downloads/GMTKN/GMTKN55/BH76.html).

# Small Subset for Validation

- BF3 = 1972.30 kJ/mol
- C2H2 = 1695.79 kJ/mol
- LiH = 242.27 kJ/mol
- NO = 639.28 kJ/mol

And the constituent atoms, with energies from the [reference](https://journals.aps.org/pra/pdf/10.1103/PhysRevA.47.3649) (table XI, neutral column -- energies are already given in Hartrees):
- B = -24.65391
- F = -99.7339
- C = -37.8450
- H = -0.5
- Li = -7.47808
- N = -54.5892
- O = -75.0673

For BF3, 1972.30 kJ/mol = 20.4414 eV. So energies in the below trajectory are stored in eV.

In [12]:
for at in g2:
    print(at, at.info)

Atoms(symbols='OC', pbc=False, calculator=SinglePointCalculator(...)) {'CO': True, 'name': 'Carbon monoxide', 'openshell': False}
Atoms(symbols='CS', pbc=False, calculator=SinglePointCalculator(...)) {'CS': True, 'name': 'Carbon monosulfide', 'openshell': False}
Atoms(symbols='H2', pbc=False, calculator=SinglePointCalculator(...)) {'H2': True, 'name': 'Dihydrogen', 'openshell': False}
Atoms(symbols='FH', pbc=False, calculator=SinglePointCalculator(...)) {'HF': True, 'name': 'Hydrogen fluoride', 'openshell': False}
Atoms(symbols='P2', pbc=False, calculator=SinglePointCalculator(...)) {'P2': True, 'name': 'Diphosphorus', 'openshell': False}
Atoms(symbols='S2', pbc=False, calculator=SinglePointCalculator(...)) {'S2': True, 'name': 'Disulfur', 'openshell': True}
Atoms(symbols='SH', pbc=False, calculator=SinglePointCalculator(...)) {'HS': True, 'name': 'Mercapto radical', 'openshell': True}
Atoms(symbols='OS', pbc=False, calculator=SinglePointCalculator(...)) {'OS': True, 'name': 'Sulfur mo

In [13]:
g2 = read('/home/awills/Documents/Research/xcdiff/data/haunschild_g2/g2_97.traj', ':')
# print(g2)
vals = ['BF3', 'LiH', 'NO', 'C2H2']
atvals = ['B','F','C','H','Li','N','O']
atens = [-24.65391, -99.7339, -37.8450, -0.5, -7.47806, -54.5892, -75.0673]
g2_sub = []
for idx, at in enumerate(g2):
    if str(at.symbols) in vals:
        at.info['e_H'] = at.calc.results['energy']/Hartree
        at.info['e_ev'] = at.calc.results['energy']
        at.info['atomization'] = -at.info['e_H']
        at.info['target_energy'] = -at.info['e_H']
        g2_sub.append(at)
        print(at.symbols)
        print(at.calc.results['energy'])
        print(at.info)
for idx, at in enumerate(atvals):
    single = Atoms(symbols=at)
    single.info['atomization'] = atens[idx]
    single.info['target_energy'] = atens[idx]
    single.info['e_H'] = atens[idx]
    single.info['spin'] = get_spin(single)
    g2_sub.append(single)
    
    print(single.symbols, single.info)
write('/home/awills/Documents/Research/swxcd/scont/valset/val.traj', g2_sub)

LiH
2.510951589865373
{'LiH': True, 'name': 'Lithium hydride', 'openshell': False, 'e_H': 0.09227576969496765, 'e_ev': 2.510951589865373, 'atomization': -0.09227576969496765, 'target_energy': -0.09227576969496765}
NO
6.625670253721614
{'NO': True, 'name': 'Nitric oxide', 'openshell': True, 'e_H': 0.24348889276674332, 'e_ev': 6.625670253721614, 'atomization': -0.24348889276674332, 'target_energy': -0.24348889276674332}
C2H2
17.575624702100136
{'C2H2': True, 'name': 'Acetylene', 'openshell': False, 'e_H': 0.6458922998762915, 'e_ev': 17.575624702100136, 'atomization': -0.6458922998762915, 'target_energy': -0.6458922998762915}
BF3
20.441448882203634
{'BF3': True, 'name': 'Boron trifluoride', 'openshell': False, 'e_H': 0.7512093968274431, 'e_ev': 20.441448882203634, 'atomization': -0.7512093968274431, 'target_energy': -0.7512093968274431}
B {'atomization': -24.65391, 'target_energy': -24.65391, 'e_H': -24.65391, 'spin': 1}
F {'atomization': -99.7339, 'target_energy': -99.7339, 'e_H': -99.73

### The CCSD(T) calculations of this subset

In [25]:
cc_sub = read('/home/awills/Documents/Research/swxcd/scont/valset/ccsdt/results.traj', ':')
for idx, at in enumerate(cc_sub):
    print(idx, at, at.calc.results, at.info)

0 Atoms(symbols='LiH', pbc=False, calculator=SinglePointCalculator(...)) {'energy': -8.038079906478368} {'LiH': True, 'name': 'Lithium hydride', 'openshell': False, 'e_H': 0.09227576969496765, 'e_ev': 2.510951589865373}
1 Atoms(symbols='NO', pbc=False, calculator=SinglePointCalculator(...)) {'energy': -129.7632739631654} {'NO': True, 'name': 'Nitric oxide', 'openshell': True, 'e_H': 0.24348889276674332, 'e_ev': 6.625670253721614}
2 Atoms(symbols='C2H2', pbc=False, calculator=SinglePointCalculator(...)) {'energy': -77.2336470993193} {'C2H2': True, 'name': 'Acetylene', 'openshell': False, 'e_H': 0.6458922998762915, 'e_ev': 17.575624702100136}
3 Atoms(symbols='BF3', pbc=False, calculator=SinglePointCalculator(...)) {'energy': -324.2808406836368} {'BF3': True, 'name': 'Boron trifluoride', 'openshell': False, 'e_H': 0.7512093968274431, 'e_ev': 20.441448882203634}


# Published Validation

In [2]:
#OH+N2 -> H+N2O ref energy is 0.132 Hart
ohn2 = 0.132*Hart_to_kcalmol
ohch3 = 0.0129*Hart_to_kcalmol
hff = 0.169*Hart_to_kcalmol
ohn2, ohch3, hff

(82.83125055346386, 8.094872213179421, 106.04910108738933)

In [3]:
n2oh = 17.7/Hart_to_kcalmol
ohcl = 9.9/Hart_to_kcalmol
n2oh, ohcl

(0.028206745454023522, 0.01577665423699621)

Validation is C2H2, BeH, NO2, S2, CH4, PF3, CH2, C2H4O2 atomization energies from G2/97 and N2O+H -> OH + N2, OH + CL -> O + HCl from BH76.

In [4]:
bh76 = read('/home/awills/Documents/Research/datasets/BH76/strucs_cm.traj',':')
g297 = read('/home/awills/Documents/Research/swxcd/aegis/g297_ps/caresults_os.traj',':')

  a = np.array(obj)


In [5]:
for idx, at in enumerate(g297):
    print(idx, at.get_chemical_formula(), at.info)
    print()

0 CO {'CO': True, 'energy': -113.20173021777055, 'atomization': 0.4142335363320095, 'atomization_ev': 11.271868661369064, 'atomization_H': 0.4142335363320095, 'openshell': False, 'name': 'Carbon monoxide'}

1 CS {'CS': True, 'energy': -435.87414183611736, 'atomization': 0.2740316507303354, 'atomization_ev': 7.456781030917735, 'atomization_H': 0.2740316507303354, 'openshell': False, 'name': 'Carbon monosulfide'}

2 H2 {'H2': True, 'energy': -1.1723488681184566, 'atomization': 0.17434014967795244, 'atomization_ev': 4.7440371124327285, 'atomization_H': 0.17434014967795244, 'openshell': False, 'name': 'Dihydrogen'}

3 HF {'HF': True, 'energy': -100.36472229755955, 'atomization': 0.2258693892950415, 'atomization_ev': 6.146219143195457, 'atomization_H': 0.2258693892950415, 'openshell': False, 'name': 'Hydrogen fluoride'}

4 P2 {'P2': True, 'energy': -682.1310084499595, 'atomization': 0.18636071884282296, 'atomization_ev': 5.071133460210626, 'atomization_H': 0.18636071884282296, 'openshell': 

In [6]:
#S2, C2H2, BeH, NO2, CH4, PF3, CH2, C2H4O2
g2inds = [5, 101, 14, 23, 10, 27, 105, 85]

In [7]:
for idx, at in enumerate(bh76):
    print(idx, at.get_chemical_formula(), at.info)

0 C2H5 {'': True, 'name': 'C2H5', 'dirname': 'C2H5', 'dataset': 'bh76', 'charge': 0, 'multiplicity': 2, 'spin': 1}
1 C2H6 {'': True, 'name': 'C2H6', 'dirname': 'C2H6', 'dataset': 'bh76', 'charge': 0, 'multiplicity': 1, 'spin': 0}
2 C5H8 {'': True, 'name': 'C5H8', 'dirname': 'C5H8', 'dataset': 'bh76', 'charge': 0, 'multiplicity': 1, 'spin': 0}
3 CH3O {'': True, 'name': 'CH2OH', 'dirname': 'CH2OH', 'dataset': 'bh76', 'charge': 0, 'multiplicity': 2, 'spin': 1}
4 CH4 {'': True, 'name': 'CH4', 'dirname': 'CH4', 'dataset': 'bh76', 'charge': 0, 'multiplicity': 1, 'spin': 0}
5 H2 {'': True, 'name': 'H2', 'dirname': 'H2', 'dataset': 'bh76', 'charge': 0, 'multiplicity': 1, 'spin': 0}
6 H2O {'': True, 'name': 'H2O', 'dirname': 'H2O', 'dataset': 'bh76', 'charge': 0, 'multiplicity': 1, 'spin': 0}
7 H2S {'': True, 'name': 'H2S', 'dirname': 'H2S', 'dataset': 'bh76', 'charge': 0, 'multiplicity': 1, 'spin': 0}
8 HS {'': True, 'name': 'HS', 'dirname': 'HS', 'dataset': 'bh76', 'charge': 0, 'multiplicity'

For N2O+H -> OH+N2, the reaction is first in the dataset. -H - N2O + n2ohts = 17.7 kcal/mol
For OH + CL -> O + HCl, the reaction is 66th: -OH - CL + RKT17 = 9.9 kcal/mol.

In [8]:
#-H - N2O -> n2ohts, -OH - CL -> rkt17
bhinds = [63, 82, 83, 84, 49, 33]

In [9]:
g2val = [g297[i] for i in g2inds]
bhval = [bh76[i] for i in bhinds]
bhvalr = [0, 0, 17.7/Hart_to_kcalmol, 0, 0, 9.9/Hart_to_kcalmol]

In [10]:
#H
bhval[0].info['product'] = 'n2ohts'
bhval[0].info['enmult'] = -1
#N2O
bhval[1].info['product'] = 'n2ohts'
bhval[1].info['enmult'] = -1
#n2ohts
bhval[2].info['product'] = 'n2ohts'
bhval[2].info['enmult'] = 1
bhval[2].info['bh_ref'] = 17.7/Hart_to_kcalmol

#OH
bhval[3].info['product'] = 'RKT17'
bhval[3].info['enmult'] = -1
#CL
bhval[4].info['product'] = 'RKT17'
bhval[4].info['enmult'] = -1
#RKT17
bhval[5].info['product']='RKT17'
bhval[5].info['enmult'] = 1
bhval[5].info['bh_ref'] = 9.9/Hart_to_kcalmol

In [11]:
val = g2val+bhval
write('/home/awills/Documents/Research/dpyscfl/data/validation/val.traj', val)

In [18]:
tval = read('/home/awills/Documents/Research/dpyscfl/data/validation/val.traj', ':')
tvalc = read('/home/awills/Documents/Research/swxcd/aegis/validation/results.traj', ':')
for idx, at in enumerate(tval):
    print(idx, at, at.info)
    try:
        print(at.info['energy'])
    except:
        at.info['energy'] = tvalc[idx].calc.results['energy']
        print("No energy in info dict.")
write('/home/awills/Documents/Research/swxcd/aegis/validation/val_c.traj', val)

0 Atoms(symbols='S2', pbc=False, calculator=SinglePointCalculator(...)) {'S2': True, 'energy': -795.7439189734063, 'atomization': 0.1652790172181213, 'atomization_ev': 4.497471139250339, 'atomization_H': 0.1652790172181213, 'openshell': True, 'name': 'Disulfur'}
-795.7439189734063
1 Atoms(symbols='C2H2', pbc=False, calculator=SinglePointCalculator(...)) {'C2H2': True, 'energy': -77.23364708420903, 'atomization': 0.6458922998762915, 'atomization_ev': 17.575624702100136, 'atomization_H': 0.6458922998762915, 'openshell': False, 'name': 'Acetylene'}
-77.23364708420903
2 Atoms(symbols='BeH', pbc=False, calculator=SinglePointCalculator(...)) {'BeH': True, 'energy': -15.213086658834586, 'atomization': 0.08093697552392216, 'atomization_ev': 2.2024072846262093, 'atomization_H': 0.08093697552392216, 'openshell': True, 'name': 'Beryllium monohydride'}
-15.213086658834586
3 Atoms(symbols='NO2', pbc=False, calculator=SinglePointCalculator(...)) {'NO2': True, 'energy': -204.87297985889361, 'atomizat

In [21]:
for idx, at in enumerate(tval):
    print(idx, at, at.info)
    try:
        print(at.info['energy'])
    except:
        at.info['energy'] = tvalc[idx].calc.results['energy']
        print("No energy in info dict.")
    try:
        print(at.info['atomization'])
    except:
        at.info['atomization'] = np.nan
write('/home/awills/Documents/Research/swxcd/aegis/validation/val_c.traj', tval)

0 Atoms(symbols='S2', pbc=False, calculator=SinglePointCalculator(...)) {'S2': True, 'energy': -795.7439189734063, 'atomization': 0.1652790172181213, 'atomization_ev': 4.497471139250339, 'atomization_H': 0.1652790172181213, 'openshell': True, 'name': 'Disulfur'}
-795.7439189734063
0.1652790172181213
1 Atoms(symbols='C2H2', pbc=False, calculator=SinglePointCalculator(...)) {'C2H2': True, 'energy': -77.23364708420903, 'atomization': 0.6458922998762915, 'atomization_ev': 17.575624702100136, 'atomization_H': 0.6458922998762915, 'openshell': False, 'name': 'Acetylene'}
-77.23364708420903
0.6458922998762915
2 Atoms(symbols='BeH', pbc=False, calculator=SinglePointCalculator(...)) {'BeH': True, 'energy': -15.213086658834586, 'atomization': 0.08093697552392216, 'atomization_ev': 2.2024072846262093, 'atomization_H': 0.08093697552392216, 'openshell': True, 'name': 'Beryllium monohydride'}
-15.213086658834586
0.08093697552392216
3 Atoms(symbols='NO2', pbc=False, calculator=SinglePointCalculator(..