In [6]:
import geomlib

In [7]:
from pathlib import Path

In [8]:
import numpy as np

In [22]:
from csv import DictReader

### S22

In [74]:
s22 = [geomlib.readfile(p) for p in Path('s22/build/geoms').glob('*-complex.xyz')]
s22

[H6N2,
 C4H8,
 C12H12,
 C8H8N4,
 C8H8N4O4,
 C14H13N,
 C10H11N7O2,
 C4H6,
 C6H8O,
 C6H9N,
 C7H7N,
 H4O2,
 C12H12,
 C14H13N,
 C12H12O2,
 C7H10,
 C2H4O4,
 C2H6N2O2,
 C8H8N4O4,
 C10H11N3O,
 C10H11N7O2,
 C2H8]

In [62]:
set(len(m.get_fragments()) for m in s22)

{2}

In [50]:
np.median([len(m.atoms) for m in s22])

18.5

In [51]:
np.median([m.mass for m in s22])

130.68005000000005

In [53]:
s22_enes = list(DictReader(Path('s22/build/energies.csv').open()))

In [56]:
np.median([float(row['CCSD(T) /CBS CP']) for row in s22_enes])

-4.7400000000000002

### S12L

In [76]:
s12l = [geomlib.readfile(p) for p in Path('s12l/geoms').glob('*-complex_*.aims')]
s12l

[C54H34N4,
 C50H34N2,
 C55H47N5O19,
 C48ClH44N5O15,
 C120H28,
 C130H28,
 C62H68N8O6,
 C64H66N6O6,
 C40H48N25O12,
 C39H46N25O12,
 C60FeH72N30O14,
 C52H58N28O15]

In [77]:
set(len(m.get_fragments()) for m in s12l)

{2}

In [78]:
np.median([len(m.atoms) for m in s12l])

134.0

In [79]:
np.median([m.mass for m in s12l])

1063.9542500000016

In [80]:
s12l_enes = list(DictReader(Path('s12l/energies.csv').open()))

In [82]:
np.median([float(row['ref']) for row in s12l_enes])

-28.100000000000001

### X23

In [38]:
x23 = []
for p in Path().glob('x23/original-data/geoms-alberto/*.xyz'):
    if '_g' in str(p):
        continue
    c = geomlib.readfile(p, 'xyzc')
    x23.append((p.stem, c.complete_molecules()))
x23.sort(key=lambda x: x[0])
x23

[('CO2', C4O8),
 ('acetic', C8H16O8),
 ('adaman', C20H32),
 ('ammonia', H12N4),
 ('anthracene', C28H20),
 ('benzene', C24H24),
 ('cyanamide', C8H16N16),
 ('cytosine', C16H20N12O4),
 ('ethcar', C6H14N2O4),
 ('formamide', C4H12N4O4),
 ('hexamine', C6H12N4),
 ('hexdio', C12H16O4),
 ('imdazole', C12H16N8),
 ('naph', C20H16),
 ('oxaca', C8H8O16),
 ('oxacb', C4H4O8),
 ('pyrazine', C8H8N4),
 ('pyrazole', C24H32N16),
 ('succinic', C8H12O8),
 ('triazine', C18H18N18),
 ('trioxane', C18H36O18),
 ('uracil', C16H16N8O8),
 ('urea', C2H8N4O2)]

In [27]:
np.median([len(c.atoms)/len(c.get_fragments()) for _, c in x23])

10.0

In [29]:
np.median([len(c.atoms) for _, c in x23])

32.0

In [30]:
np.median([c.mass/len(c.get_fragments()) for _, c in x23])

89.092900000000014

In [31]:
np.median([c.mass for _, c in x23])

256.34039999999993

In [36]:
x23_enes = sorted((DictReader(Path('x23/original-data/energies.csv').open())), key=lambda x: x['geomname'])

In [44]:
np.median([float(row['ref'])/len(c.get_fragments()) for (_, c), row in zip(x23, x23_enes)])

-30.645296349999999

In [45]:
np.median([float(row['ref']) for row in x23_enes])

-81.735690809999994

### S66

In [59]:
s66 = [geomlib.readfile(p) for p in Path('s66x8/geoms').glob('*-1.0-complex_*.aims')]
s66

[H4O2,
 C2H10N2,
 C4H12N2O,
 CH7NO,
 C4H11NO2,
 C4H12N2O,
 C6H14N2O2,
 C3H9NO2,
 C8H8N4O4,
 C5H7NO,
 C6H9NO,
 CH6O2,
 C4H8O4,
 C4H10N2O2,
 C6H8N2O4,
 C6H9N3O3,
 C12H12,
 C10H10N2,
 C8H8N4O4,
 C11H11N,
 C10H10N2O2,
 C9H9N3O2,
 CH7NO,
 C8H10,
 C6H8N2O2,
 C6H6N2O2,
 C7H9N,
 C10H24,
 C10H24,
 C10H24,
 C10H22,
 C10H20,
 C11H16,
 C3H9NO2,
 C11H18,
 C9H16N2O2,
 C9H14N2O2,
 C9H16N2O2,
 C7H16,
 C7H14,
 C8H19NO,
 C12H12,
 C10H10N2,
 C11H11N,
 C2H8O2,
 C8H8,
 C4H4,
 C8H10O2,
 C8H11NO,
 C6H8O,
 C7H10O,
 C7H11N,
 C9H13NO,
 C10H10N2,
 C2H4O,
 C2H9NO,
 C4H6O2,
 C7H16O2,
 C7H17NO,
 C8H10O2,
 C5H11NO,
 C7H7N,
 C6H10N2,
 C4H11NO2,
 CH6O2,
 C2H9NO]

In [61]:
set(len(m.get_fragments()) for m in s66)

{2}

In [63]:
np.median([len(m.atoms) for m in s66])

19.5

In [64]:
np.median([m.mass for m in s66])

125.65945000000002

In [67]:
s66_enes = list(DictReader(Path('s66x8/energies.csv').open()))

In [70]:
np.median([float(row['ref']) for row in s66_enes if float(row['scale']) == 1.0])

-4.1400000000000006