# The Light Atom Curriculum

We use curriculum learning in OneQMC to get the most gradient steps on our model for the lowest cost. The aim is to do as many gradient steps on the smallest possible molecules, increasing molecule size only to "teach" new concepts.

The currently proposed levels are

|         | Max electrons | Max nuclei |
| ------- | ------------- | ---------- |
| Level 1 | 10            | 5          |
| Level 2 | 24            | 8          |

## References
We have exclusively used the following resources to find equilibrium geometries:
 - NIST experimental geometries
 - Own calculation using `pyscf`

Perturbed geometries are generated in this notebook.

In [1]:
level1_formulas = {"H2", "HF", "H2O", "CH2", "CH4", "NH3", "BH3", "LiH"}

## Set of molecules
The set of molecules for inclusion is listed in the paper.

In this notebook, we generate the necessary geometries.

*Units*: units used in the notebooks are Angstroms unless specified. Conversion occurs when calling `to_molecule(unit="angstrom")`.

In [27]:
# Switch to write output files
SAVE_FILES = False

### Dimers

In [12]:
from collections import defaultdict
import os
import json
import numpy as np
from oneqmc.molecule import Molecule

In [13]:
dimer_bond_lengths = {
    "H2": 0.7414,
    "C2": 1.2,
    "CO": 1.128,
    "N2": 1.0976,
    "O2": 1.21,
    "HF": 0.917,
    "F2": 1.42,
    "NO": 1.15,
    "LiH": 1.5958,
    "LiF": 1.57398,
}
dimer_charges = {
    "H2": np.array([1, 1]),
    "C2": np.array([6, 6]),
    "CO": np.array([6, 8]),
    "N2": np.array([7, 7]),
    "O2": np.array([8, 8]),
    "HF": np.array([1, 9]),
    "F2": np.array([9, 9]),
    "NO": np.array([7, 8]),
    "LiH": np.array([3, 1]),
    "LiF": np.array([3, 9]),
}

In [14]:
fractions_stretch = np.concatenate(
    [np.linspace(0.7, 1, 25), np.logspace(0.01, 1.0, 25, base=1.5)]
)
fractions_break = np.concatenate(
    [np.linspace(0.6, 0.7, 10), np.logspace(0.2, 1.25, 40)]
)

In [15]:
def make_dimer(formula, frac):
    first_place = np.zeros(3)
    second_place = first_place + np.array(
        [dimer_bond_lengths[formula] * frac, 0.0, 0.0]
    )
    coords = np.stack([first_place, second_place], axis=0)
    return (
        Molecule.make(
            coords=coords,
            charges=dimer_charges[formula],
            charge=0,
            spin=1 if formula in ["NO", "OH"] else 0,
            unit="angstrom",
        )
        .to_qcelemental()
        .dict(encoding="json")
    )

In [16]:
dimers = {"stretch": defaultdict(dict), "break": defaultdict(dict)}
for formula in dimer_charges.keys():
    for name, fractions in [("stretch", fractions_stretch), ("break", fractions_break)]:
        for frac in fractions:
            dimers[name][formula][f"{formula}/{frac}"] = make_dimer(formula, frac)

In [17]:
if SAVE_FILES:
    for name in dimers.keys():
        for formula, structures in dimers[name].items():
            levels = [1, 2] if formula in level1_formulas else [2]
            for level in levels:
                os.makedirs(
                    f"../data/lightatomcurriculum/level{level}/{formula}",
                    exist_ok=True,
                )
                with open(
                    f"../data/lightatomcurriculum/level{level}/{formula}/{name}.json",
                    "w",
                ) as f:
                    json.dump(structures, f)

### Trimers

In [18]:
from scipy.stats import truncnorm

In [28]:
from oneqmc.analysis.visual import show_mol
from oneqmc.analysis.zmatrix import ZMatrix

In [29]:
def angle_to_rad(angle):
    return angle * np.pi / 180

In [30]:
trimer_eq_structures = {
    "H2O": ((8, 1, 1), 0.9584, 0.9584, 104.45),
    "O3": ((8, 8, 8), 1.278, 1.278, 116.8),
    "N2O": ((7, 7, 8), 1.126, 1.186, 180),
    "FCN": ((6, 9, 7), 1.262, 1.159, 180),
    "HCN": ((6, 1, 7), 1.064, 1.156, 180),
    "CNH": ((7, 6, 1), 1.173, 0.986, 180),
    "HNO": ((7, 1, 8), 1.090, 1.209, 108.047),
    "CH2": ((6, 1, 1), 1.085, 1.085, 135.5),
    "CHF": ((6, 1, 9), 1.138, 1.305, 104.1),
    "LiOH": ((8, 3, 1), 1.57790820376218, 0.947123224823465, 180),
    "Li2O": ((8, 3, 3), 1.6155583875799349, 1.6155583875799349, 180),
    "LiCN": ((6, 3, 7), 1.9116, 1.16213, 180),
    "LiOF": ((8, 3, 9), 1.72009017661866, 1.5058795746008, 65.78235739),
}

In [31]:
def make_z_matrix(x):
    return ZMatrix(
        0,
        sum(x[0]) % 2,
        x[0],
        [
            (),
            ((0, x[1]),),
            ((0, x[2]), (1, angle_to_rad(x[3]))),
        ],
    )

#### Bending

50 structures.
Vary the bond angle with a standard deviation of 30 degrees, clamped to a minimum bond angle of 60 degrees.

In [32]:
def make_trimer_bent(x):
    species, l1, l2, angle = x
    new_angle = truncnorm.rvs(-3, 3, loc=angle, scale=30)
    new_angle = min(max(60, new_angle), 300)
    return species, l1, l2, new_angle

In [33]:
for formula, spec in trimer_eq_structures.items():
    structures = {}
    for i in range(50):
        structures[str(i)] = (
            make_z_matrix(make_trimer_bent(spec))
            .to_molecule(unit="angstrom")
            .to_qcelemental()
            .dict(encoding="json")
        )

    if SAVE_FILES:
        levels = [1, 2] if formula in level1_formulas else [2]
        for level in levels:
            os.makedirs(
                f"../data/lightatomcurriculum/level{level}/{formula}",
                exist_ok=True,
            )
            with open(
                f"../data/lightatomcurriculum/level{level}/{formula}/bend.json",
                "w",
            ) as f:
                json.dump(structures, f)

#### Bond length distortion, no bending

50 structures.
Bonds are distorted by multiplication with a LogTruncatedNormal distribution with mean 0 and standard deviation 0.33

In [34]:
def make_trimer_stretched(x):
    species, l1, l2, angle = x
    new_l1 = l1 * np.exp(truncnorm.rvs(-2, 2, loc=0.0, scale=0.33))
    new_l2 = l2 * np.exp(truncnorm.rvs(-2, 2, loc=0.0, scale=0.33))
    return species, new_l1, new_l2, angle

In [35]:
for formula, spec in trimer_eq_structures.items():
    structures = {}
    for i in range(50):
        structures[str(i)] = (
            make_z_matrix(make_trimer_stretched(spec))
            .to_molecule(unit="angstrom")
            .to_qcelemental()
            .dict(encoding="json")
        )
    if SAVE_FILES:
        levels = [1, 2] if formula in level1_formulas else [2]
        for level in levels:
            os.makedirs(
                f"../data/lightatomcurriculum/level{level}/{formula}",
                exist_ok=True,
            )
            with open(
                f"../data/lightatomcurriculum/level{level}/{formula}/stretch.json",
                "w",
            ) as f:
                json.dump(structures, f)

#### Bond breaking, no bending

50 structures.
One bond is distorted as above, the other bond is distorted with a LogTruncatedNormal(0.5, 1) distribution


In [36]:
def make_trimer_broken(x):
    species, l1, l2, angle = x
    switch = np.random.rand()
    if switch < 0.5:
        new_l1 = l1 * np.exp(truncnorm.rvs(-1.2, 2, loc=0.5, scale=1.0))
        new_l2 = l2 * np.exp(truncnorm.rvs(-2, 2, loc=0.0, scale=0.33))
    else:
        new_l1 = l1 * np.exp(truncnorm.rvs(-2, 2, loc=0.0, scale=0.33))
        new_l2 = l2 * np.exp(truncnorm.rvs(-1.2, 2, loc=0.5, scale=1.0))
    return species, new_l1, new_l2, angle

In [None]:
for formula, spec in trimer_eq_structures.items():
    structures = {}
    for i in range(50):
        structures[str(i)] = (
            make_z_matrix(make_trimer_broken(spec))
            .to_molecule(unit="angstrom")
            .to_qcelemental()
            .dict(encoding="json")
        )
    if SAVE_FILES:
        levels = [1, 2] if formula in level1_formulas else [2]
        for level in levels:
            os.makedirs(
                f"../data/lightatomcurriculum/level{level}/{formula}",
                exist_ok=True,
            )
            with open(
                f"../data/lightatomcurriculum/level{level}/{formula}/break.json",
                "w",
            ) as f:
                json.dump(structures, f)

#### Combine bond stretching and bending

50 structures, apply both operations together

In [39]:
for formula, spec in trimer_eq_structures.items():
    structures = {}
    for i in range(50):
        structures[str(i)] = (
            make_z_matrix(make_trimer_stretched(make_trimer_bent(spec)))
            .to_molecule(unit="angstrom")
            .to_qcelemental()
            .dict(encoding="json")
        )
    if SAVE_FILES:
        # Only at Level 2
        os.makedirs(
            f"../data/lightatomcurriculum/level2/{formula}", exist_ok=True
        )
        with open(
            f"../data/lightatomcurriculum/level2/{formula}/bend_stretch.json", "w"
        ) as f:
            json.dump(structures, f)

#### Combine bond breaking and bending

100 structures, apply both operations together


In [40]:
for formula, spec in trimer_eq_structures.items():
    structures = {}
    for i in range(100):
        structures[str(i)] = (
            make_z_matrix(make_trimer_broken(make_trimer_bent(spec)))
            .to_molecule(unit="angstrom")
            .to_qcelemental()
            .dict(encoding="json")
        )
    if SAVE_FILES:
        # Only at Level 2
        os.makedirs(
            f"../data/lightatomcurriculum/level2/{formula}", exist_ok=True
        )
        with open(
            f"../data/lightatomcurriculum/level2/{formula}/bend_break.json", "w"
        ) as f:
            json.dump(structures, f)

### Molecules with four or more atoms

In [41]:
tetramer_eq_z = {}
bonds_to_break = {}

#### Li chemistry

In [42]:
tetramer_eq_z["LiNH2"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[3, 7, 1, 1],
    lines=[
        (),
        ((0, 1.72581081005422),),
        ((1, 1.01264305167221), (0, 2.234126438504645)),
        ((1, 1.01267655774191), (0, 2.2324879445860906), (2, 3.140480304297601)),
    ],
)
bonds_to_break["LiNH2"] = (1, 2)

In [43]:
tetramer_eq_z["LiCH3"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[3, 6, 1, 1, 1],
    lines=[
        (),
        ((0, 1.96621),),
        ((1, 1.09592454023988), (0, 1.970575300063215)),
        ((1, 1.09591826214367), (0, 1.9705777205925834), (2, -2.094388201005705)),
        ((1, 1.09591040464082), (0, 1.9705807501071213), (2, 2.0943912200282138)),
    ],
)
bonds_to_break["LiCH3"] = (1, 2)

In [44]:
# in cyclic position.
tetramer_eq_z["LiCH2NH2"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[3, 7, 6, 1, 1, 1, 1],
    lines=[
        (),
        ((0, 1.88178027213594),),
        ((1, 1.53116627212723), (0, 1.1659618791386894)),
        ((2, 1.08565599012763), (1, 2.5100283325008785), (0, -1.5998389968449105)),
        ((2, 1.09147956343671), (1, 1.8973526096634958), (0, 1.7333982644126087)),
        ((1, 1.00968671814578), (0, 1.9629584206458512), (2, 1.77730510408884)),
        ((1, 1.00978142115014), (0, 2.432821208247024), (2, -1.7439583526978453)),
    ],
)

In [45]:
tetramer_eq_z["LiCH2CH3"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[3, 6, 6, 1, 1, 1, 1, 1],
    lines=[
        (),
        ((0, 1.98450767924944),),
        ((1, 1.53467003218933), (0, 2.0746051537856003)),
        ((1, 1.0953104470423), (0, 1.888575287429959), (2, -2.162627718874665)),
        ((1, 1.09530950936254), (0, 1.8885770085646891), (2, 2.1626125369958182)),
        ((2, 1.09071223899799), (1, 1.6032833163148452), (0, 2.2143303717638996)),
        ((2, 1.09379720876404), (1, 2.596900038307089), (0, -1.033134161393413e-05)),
        ((2, 1.09071166813233), (1, 1.6032856927254824), (0, -2.214335460012257)),
    ],
)
bonds_to_break["LiCH2CH3"] = (1, 2, 4, 6)

In [46]:
tetramer_eq_z["LiOCH3"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[3, 8, 6, 1, 1, 1],
    lines=[
        (),
        ((0, 1.58340772847678),),
        ((1, 1.37166293093456), (0, 3.1397223959394447)),
        ((2, 1.09674260111477), (1, 1.9632399756322412), (0, 0.002479076741819717)),
        ((2, 1.09673216899114), (1, 1.961756089175044), (0, -2.0922792758796507)),
        ((2, 1.09674375193114), (1, 1.9617500446310316), (0, 2.097214236119077)),
    ],
)

bonds_to_break["LiOCH3"] = (1, 2, 3)

In [47]:
# in cyclic position
tetramer_eq_z["LiONH2"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[3, 8, 7, 1, 1],
    lines=[
        (),
        ((0, 1.6899005646487),),
        ((1, 1.43982743080551), (0, 1.2566078562745961)),
        ((2, 1.0117671717841), (1, 2.219812091256996), (0, 1.5780604416685318)),
        ((2, 1.01174490762247), (1, 2.222009673213329), (0, -1.5787417083257935)),
    ],
)

In [48]:
# in cyclic position
tetramer_eq_z["LiNHOH"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[3, 7, 8, 1, 1],
    lines=[
        (),
        ((0, 1.76785759203053),),
        ((1, 1.53437534195516), (0, 1.145658335529353)),
        ((1, 1.01171341302762), (0, 2.2738710765850825), (2, -1.4504380490758626)),
        ((2, 0.95442772052157), (1, 2.178420912159825), (0, -1.527206774584264)),
    ],
)

In [49]:
tetramer_eq_z["LiCCLi"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[3, 6, 6, 3],
    lines=[
        (),
        ((0, 1.87449),),
        ((1, 1.24464), (0, 3.141592653589793)),
        ((2, 1.87449), (1, 3.141592653589793), (0, 0.0)),
    ],
)

bonds_to_break["LiCCLi"] = (1, 2)

In [50]:
tetramer_eq_z["LiCHCH2"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[3, 6, 6, 1, 1, 1],
    lines=[
        (),
        ((0, 1.94454734521945),),
        ((1, 1.34596485243858), (0, 2.0984943387369364)),
        ((1, 1.09031771653954), (0, 2.2437889634575967), (2, 3.140883651198112)),
        ((2, 1.08557060599484), (1, 1.5078035405924588), (0, 3.1411309250947834)),
        ((2, 1.08616379506039), (1, 2.792326384540678), (0, 0.0009643783364423463)),
    ],
)
bonds_to_break["LiCHCH2"] = (1, 2, 3, 4)

In [51]:
# in cyclic position
tetramer_eq_z["LiCH2F"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[3, 9, 6, 1, 1],
    lines=[
        (),
        ((0, 1.7566772047818),),
        ((1, 1.54784831937758), (0, 1.2237568480235776)),
        ((2, 1.08691857100705), (1, 2.19732811431559), (0, -1.4468749095092879)),
        ((2, 1.08691492767373), (1, 2.1973327538694325), (0, 1.446864808825724)),
    ],
)

In [52]:
# in cyclic position
tetramer_eq_z["LiCHO"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[3, 8, 6, 1],
    lines=[
        (),
        ((0, 1.78784675719145),),
        ((1, 1.25824084125417), (0, 1.2789983907179363)),
        ((2, 1.1071714463894), (1, 3.123510712419773), (0, -0.0004829649489987146)),
    ],
)

In [53]:
tetramer_eq_z["LiOOLi"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[3, 8, 8, 3],
    lines=[
        (),
        ((0, 1.85882057073834),),
        ((1, 1.31567690414478), (0, 1.8207737093915977)),
        ((2, 1.85873820719864), (1, 1.6557787362432839), (0, -1.8380179925742255)),
    ],
)
bonds_to_break["LiOOLi"] = (1, 2)

In [54]:
tetramer_eq_z["LiBH4"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[3, 5, 1, 1, 1, 1],
    lines=[
        (),
        ((0, 1.90924721437534),),
        ((1, 1.23628663885039), (0, 1.1658297333218905)),
        ((1, 1.23625069852356), (0, 1.1660809422348508), (2, 2.0942420406467885)),
        ((1, 1.23624510247766), (0, 1.1656393196551036), (2, -2.094541090046249)),
        ((1, 1.1882486990946), (0, 3.141334267894829), (2, -1.4580454733908679)),
    ],
)
bonds_to_break["LiBH4"] = (1, 2, 5)

#### Boron chemistry

In [55]:
tetramer_eq_z["BH3"] = ZMatrix(
    0,
    0,
    [5, 1, 1, 1],
    [
        (),
        ((0, 1.190),),
        ((0, 1.190), (1, angle_to_rad(120))),
        ((0, 1.190), (1, -angle_to_rad(120)), (2, angle_to_rad(0))),
    ],
)

In [56]:
tetramer_eq_z["CH3BH2"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[6, 5, 1, 1, 1, 1, 1],
    lines=[
        (),
        ((0, 1.55903933590529),),
        ((0, 1.09404472755916), (1, 1.9040808647960223)),
        ((0, 1.0940452672993), (1, 1.9040847104395386), (2, -1.9868365144705564)),
        ((0, 1.08596529433495), (1, 2.0185417036785718), (2, 2.1481678497717267)),
        ((1, 1.18883355319405), (0, 2.08089386409013), (2, 0.9934159244538162)),
        ((1, 1.1882863636767), (0, 2.1368670828672434), (2, -2.148171146512614)),
    ],
)

In [57]:
tetramer_eq_z["CH2BH"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[6, 5, 1, 1, 1],
    lines=[
        (),
        ((0, 1.38203616758028),),
        ((0, 1.08050970092822), (1, 2.132567892654363)),
        ((0, 1.08049862096164), (1, 2.1325094562602787), (2, -3.1415532037061857)),
        ((1, 1.16830281322952), (0, 3.1415290203569795), (2, -1.834925709915037)),
    ],
)
bonds_to_break["CH2BH"] = (1, 2, 4)

In [58]:
tetramer_eq_z["CH3BO"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[6, 5, 8, 1, 1, 1],
    lines=[
        (),
        ((0, 1.53815699423043),),
        ((1, 1.20766340550668), (0, 3.141584871473522)),
        ((0, 1.08744207008925), (1, 1.9246860196402187), (2, 2.4798119452842298)),
        ((0, 1.08743529706369), (1, 1.9246960500515837), (2, 0.38542258378621547)),
        ((0, 1.08743151917718), (1, 1.924700053756777), (2, -1.7089845646105155)),
    ],
)
bonds_to_break["CH3BO"] = (1, 2, 3)

In [59]:
tetramer_eq_z["CH2BOH"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[6, 5, 8, 1, 1, 1],
    lines=[
        (),
        ((0, 1.38529910282942),),
        ((1, 1.31785048639821), (0, 3.112285937811738)),
        ((0, 1.07630603691515), (1, 2.1233409387552142), (2, -1.5794639994598352)),
        ((0, 1.07636767235922), (1, 2.1230209609165533), (2, 1.5605534708404978)),
        ((2, 0.958101583758215), (1, 2.0135723377208925), (0, 0.009068928831451368)),
    ],
)

In [60]:
tetramer_eq_z["CH3BNH"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[6, 5, 7, 1, 1, 1, 1],
    lines=[
        (),
        ((0, 1.54202582455677),),
        ((1, 1.2356316517474), (0, 3.141284213525426)),
        ((0, 1.08650010294523), (1, 1.9327938285600001), (2, 2.1628359516009565)),
        ((0, 1.08647026254748), (1, 1.932703007543492), (2, 0.06853642277317222)),
        ((0, 1.08647325857565), (1, 1.9327554975333927), (2, -2.026038845509357)),
        ((2, 0.9854119506582), (1, 3.141042096182353), (0, -0.7178948917747383)),
    ],
)
bonds_to_break["CH3BNH"] = (1, 2, 3, 6)

In [61]:
tetramer_eq_z["CH3NBH"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[6, 7, 5, 1, 1, 1, 1],
    lines=[
        (),
        ((0, 1.41004032740202),),
        ((1, 1.23350706086345), (0, 3.1412025367735814)),
        ((0, 1.08699697446681), (1, 1.9332801824874764), (2, 1.4737820243436486)),
        ((0, 1.08700401857583), (1, 1.9333176656578994), (2, -0.6205973873513414)),
        ((0, 1.08700805378801), (1, 1.9333144243840992), (2, -2.715037463608642)),
        ((2, 1.16734618040237), (1, 3.141535958172633), (0, 0.28330366688153064)),
    ],
)
bonds_to_break["CH3NBH"] = (1, 2, 3, 6)

In [62]:
tetramer_eq_z["CH2BNH2"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[6, 5, 7, 1, 1, 1, 1],
    lines=[
        (),
        ((0, 1.38904975396852),),
        ((1, 1.36555200065761), (0, 3.1408435228020153)),
        ((0, 1.07743184197424), (1, 2.1290037084687654), (2, 2.5797681877763727)),
        ((0, 1.07742435066226), (1, 2.1290846664408085), (2, -0.5624219886168652)),
        ((2, 1.00165912025998), (1, 2.146155672164305), (0, 2.132526728328857)),
        ((2, 1.00161214783967), (1, 2.146872456744653), (0, -1.008582633170937)),
    ],
)

In [63]:
tetramer_eq_z["CH2BF"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[6, 5, 9, 1, 1],
    lines=[
        (),
        ((0, 1.38335003158998),),
        ((1, 1.28263120206083), (0, 3.141466186923739)),
        ((0, 1.07757907463907), (1, 2.1167231792244374), (2, -0.03382860297659784)),
        ((0, 1.07752871539463), (1, 2.118596890567286), (2, 3.107785548574707)),
    ],
)
bonds_to_break["CH2BF"] = (1, 2, 3)

In [64]:
tetramer_eq_z["BHF2"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[5, 1, 9, 9],
    lines=[
        (),
        ((0, 1.189),),
        ((0, 1.311), (0, angle_to_rad(120.85))),
        ((0, 1.311), (1, angle_to_rad(120.85)), (2, angle_to_rad(180))),
    ],
)

In [65]:
# This is an approximate structure using the same bond lengths as in BHF2 and a simple planar geometry
tetramer_eq_z["BH2F"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[5, 1, 1, 9],
    lines=[
        (),
        ((0, 1.189),),
        ((0, 1.189), (0, angle_to_rad(120))),
        ((0, 1.311), (1, angle_to_rad(120)), (2, angle_to_rad(180))),
    ],
)

In [66]:
tetramer_eq_z["HBO2H2"] = {
    "E": ZMatrix(
        total_charge=0,
        total_spin=0,
        charges=[8, 5, 8, 1, 1, 1],
        lines=[
            (),
            ((0, 1.36202530082961),),
            ((1, 1.3530031505137), (0, 2.0877575487437654)),
            ((0, 0.954086260146325), (1, -1.967802406403173), (2, -3.1415884432272145)),
            ((1, 1.18690984088936), (0, 2.1349100303234625), (2, 3.1415891901354915)),
            ((2, 0.958035534257472), (1, 1.424737969392847), (0, -3.141591520513491)),
        ],
    ),
    "Z": ZMatrix(
        total_charge=0,
        total_spin=0,
        charges=[8, 5, 8, 1, 1, 1],
        lines=[
            (),
            ((0, 1.36202530082961),),
            ((1, 1.3530031505137), (0, 2.0877575487437654)),
            ((0, 0.954086260146325), (1, 1.967802406403173), (2, -3.1415884432272145)),
            ((1, 1.18690984088936), (0, 2.1349100303234625), (2, 3.1415891901354915)),
            ((2, 0.958035534257472), (1, 1.424737969392847), (0, -3.141591520513491)),
        ],
    ),
}

In [67]:
tetramer_eq_z["HBCBH"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[5, 6, 5, 1, 1],
    lines=[
        (),
        ((0, 1.35619872308596),),
        ((1, 1.35620744320329), (0, 3.14145704490849)),
        ((0, 1.17011547485708), (1, 3.1415596493711915), (2, -2.3814137977278165)),
        ((2, 1.17011263812507), (1, 3.1415469987128333), (0, 2.8396831760287222)),
    ],
)
bonds_to_break["HBCBH"] = (1, 3)

In [68]:
tetramer_eq_z["BH2CHCH2"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[5, 6, 6, 1, 1, 1, 1, 1],
    lines=[
        (),
        ((0, 1.53643151562313),),
        ((1, 1.34009221193916), (0, 2.105307533292076)),
        ((0, 1.18698814219014), (1, 2.126927913517519), (2, -3.1415921130246196)),
        ((0, 1.18848621262512), (1, 2.071184900124364), (2, 9.840961964880719e-06)),
        ((1, 1.07934862199384), (0, 2.124680055508159), (2, 3.141581433460676)),
        ((2, 1.07916666706306), (1, 1.5461635320210223), (0, 3.1415818964168523)),
        ((2, 1.07947837407704), (1, 2.700820175261583), (0, 1.498323428486764e-05)),
    ],
)

In [69]:
tetramer_eq_z["CH3BCH2"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[6, 5, 6, 1, 1, 1, 1, 1],
    lines=[
        (),
        ((0, 1.53114246041967),),
        ((1, 1.38201168739631), (0, 3.1083045179935835)),
        ((0, 1.0924939169167), (1, 1.8592490485702478), (2, 8.160542260870317e-05)),
        ((0, 1.08610042100167), (1, 1.9712249923136973), (2, -2.048017501803168)),
        ((0, 1.08609900165685), (1, 1.9712222575022098), (2, 2.0481901992111813)),
        ((2, 1.07897608949411), (1, 2.1138493918057426), (0, 3.1415196807244348)),
        ((2, 1.07869136146536), (1, 2.153467987260626), (0, -9.175623868275908e-05)),
    ],
)

In [70]:
tetramer_eq_z["H2BCCH"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[5, 6, 6, 1, 1, 1],
    lines=[
        (),
        ((0, 1.50848000003315),),
        ((1, 1.21539000761073), (0, 3.141486033198974)),
        ((0, 1.18397094525161), (1, 2.0891453727206755), (2, 0.024454205613543525)),
        ((0, 1.18395501662859), (1, 2.089572114242892), (2, -3.1171481604081053)),
        ((2, 1.0593000047201), (1, 3.141551013873896), (0, -3.092873741556357)),
    ],
)

In [71]:
tetramer_eq_z["HBCCH2"] = ZMatrix(
    total_charge=0,
    total_spin=0,
    charges=[5, 6, 6, 1, 1, 1],
    lines=[
        (),
        ((0, 1.36002959118543),),
        ((1, 1.32290075674632), (0, 3.139329183191186)),
        ((0, 1.16754542708196), (1, 3.140957410699107), (2, 1.5215155319813012)),
        ((2, 1.08404753355192), (1, 2.1272865065090376), (0, 1.3040893290131934)),
        ((2, 1.0839955736072), (1, 2.126188091577278), (0, -1.8385269244171083)),
    ],
)

#### Non-boron chemistry

In [72]:
# This structure has E/Z isometry
tetramer_eq_z["H2O2"] = {
    "E": ZMatrix(
        0,
        0,
        [1, 8, 8, 1],
        [
            (),
            ((0, 0.950),),
            ((1, 1.475), (0, angle_to_rad(94.8))),
            ((2, 0.950), (1, angle_to_rad(94.8)), (0, angle_to_rad(119.8))),
        ],
    ),
    "Z": ZMatrix(
        0,
        0,
        [1, 8, 8, 1],
        [
            (),
            ((0, 0.950),),
            ((1, 1.475), (0, angle_to_rad(94.8))),
            ((2, 0.950), (1, angle_to_rad(94.8)), (0, -angle_to_rad(119.8))),
        ],
    ),
}

In [73]:
tetramer_eq_z["C2H2"] = ZMatrix(
    0,
    0,
    [1, 6, 6, 1],
    [
        (),
        ((0, 1.063),),
        ((1, 1.203), (0, angle_to_rad(180))),
        ((2, 1.063), (1, angle_to_rad(180)), (0, 0.0)),
    ],
)
bonds_to_break["C2H2"] = (1, 2)

In [74]:
tetramer_eq_z["NH3"] = ZMatrix(
    0,
    0,
    [7, 1, 1, 1],
    [
        (),
        ((0, 1.012),),
        ((0, 1.012), (1, angle_to_rad(106.67))),
        ((0, 1.012), (1, angle_to_rad(106.67)), (2, angle_to_rad(112.15))),
    ],
)

In [75]:
# This structure has E/Z isometry
tetramer_eq_z["H2N2"] = {
    "E": ZMatrix(
        0,
        0,
        [1, 7, 7, 1],
        [
            (),
            ((0, 1.028),),
            ((1, 1.252), (0, angle_to_rad(106.85))),
            ((2, 1.028), (1, angle_to_rad(106.85)), (0, angle_to_rad(180))),
        ],
    ),
    "Z": ZMatrix(
        0,
        0,
        [1, 7, 7, 1],
        [
            (),
            ((0, 1.028),),
            ((1, 1.252), (0, angle_to_rad(106.85))),
            ((2, 1.028), (1, -angle_to_rad(106.85)), (0, angle_to_rad(180))),
        ],
    ),
}

In [76]:
# show_mol(tetramer_eq_z["H2N2"]["E"].to_molecule(unit="angstrom"))
# show_mol(tetramer_eq_z["H2N2"]["Z"].to_molecule(unit="angstrom"))

In [77]:
# This structure has syn/anti isometry
tetramer_eq_z["HNO2"] = {
    "E": ZMatrix(
        0,
        0,
        [1, 8, 7, 8],
        [
            (),
            ((0, 0.959),),
            ((1, 1.442), (0, angle_to_rad(102.1))),
            ((2, 1.169), (1, angle_to_rad(110.6)), (0, angle_to_rad(180))),
        ],
    ),
    "Z": ZMatrix(
        0,
        0,
        [1, 8, 7, 8],
        [
            (),
            ((0, 0.959),),
            ((1, 1.442), (0, angle_to_rad(102.1))),
            ((2, 1.169), (1, -angle_to_rad(110.6)), (0, angle_to_rad(180))),
        ],
    ),
}

In [78]:
# show_mol(tetramer_eq_z["HNO2"]["E"].to_molecule(unit="angstrom"))
# show_mol(tetramer_eq_z["HNO2"]["Z"].to_molecule(unit="angstrom"))

In [79]:
tetramer_eq_z["CH2O"] = ZMatrix(
    0,
    0,
    [6, 1, 1, 8],
    [
        (),
        ((0, 1.111),),
        ((0, 1.111), (1, angle_to_rad(116.133))),
        ((0, 1.205), (1, angle_to_rad(121.9)), (2, angle_to_rad(180))),
    ],
)
bonds_to_break["CH2O"] = (1, 3)

In [80]:
tetramer_eq_z["CHOF"] = ZMatrix(
    0,
    0,
    [6, 1, 8, 9],
    [
        (),
        ((0, 1.095),),
        ((0, 1.181), (1, angle_to_rad(127.3))),
        ((0, 1.095), (1, angle_to_rad(109.9)), (2, angle_to_rad(180))),
    ],
)

In [81]:
tetramer_eq_z["CH4"] = ZMatrix(
    0,
    0,
    [6, 1, 1, 1, 1],
    [
        (),
        ((0, 1.087),),
        ((0, 1.087), (1, angle_to_rad(109.471))),
        ((0, 1.087), (1, angle_to_rad(109.471)), (2, angle_to_rad(120.0))),
        ((0, 1.087), (1, angle_to_rad(109.471)), (2, -angle_to_rad(120.0))),
    ],
)
# Breaking every bond with equal probability is fine here

In [82]:
tetramer_eq_z["CH3F"] = ZMatrix(
    0,
    0,
    [6, 1, 1, 1, 9],
    [
        (),
        ((0, 1.087),),
        ((0, 1.087), (1, angle_to_rad(110.2))),
        ((0, 1.087), (1, angle_to_rad(110.2)), (2, angle_to_rad(120.0))),
        ((0, 1.383), (1, angle_to_rad(108.73)), (2, -angle_to_rad(120.0))),
    ],
)
# Only break one of the C-H bonds and the C-F bonds
bonds_to_break["CH3F"] = (1, 4)

In [83]:
tetramer_eq_z["HCOOH"] = ZMatrix(
    0,
    0,
    [6, 8, 8, 1, 1],
    [
        (),
        ((0, 1.343),),
        ((0, 1.202), (1, angle_to_rad(124.9))),
        ((0, 1.097), (2, angle_to_rad(124.1)), (1, angle_to_rad(180.0))),
        ((1, 0.972), (0, -angle_to_rad(106.3)), (2, angle_to_rad(180.0))),
    ],
)

In [84]:
# show_mol(tetramer_eq_z["HCOOH"].to_molecule(unit="angstrom"))

In [85]:
# This is not really E/Z isomerism, but there are two structures we want to use here
tetramer_eq_z["N2H4"] = {
    "E": ZMatrix(
        0,
        0,
        [7, 7, 1, 1, 1, 1],
        [
            (),
            ((0, 1.446),),
            ((0, 1.016), (1, angle_to_rad(108.85))),
            ((0, 1.016), (1, angle_to_rad(108.85)), (2, angle_to_rad(120.0))),
            ((1, 1.016), (0, angle_to_rad(108.85)), (2, angle_to_rad(0.0))),
            ((1, 1.016), (0, angle_to_rad(108.85)), (4, angle_to_rad(120.0))),
        ],
    ),
    "Z": ZMatrix(
        0,
        0,
        [7, 7, 1, 1, 1, 1],
        [
            (),
            ((0, 1.446),),
            ((0, 1.016), (1, angle_to_rad(108.85))),
            ((0, 1.016), (1, angle_to_rad(108.85)), (2, angle_to_rad(120.0))),
            ((1, 1.016), (0, angle_to_rad(108.85)), (2, angle_to_rad(0.0))),
            ((1, 1.016), (0, angle_to_rad(108.85)), (4, -angle_to_rad(120.0))),
        ],
    ),
}
bonds_to_break["N2H4"] = (1, 2, 4)

In [86]:
tetramer_eq_z["C2H4"] = ZMatrix(
    0,
    0,
    [6, 6, 1, 1, 1, 1],
    [
        (),
        ((0, 1.339),),
        ((0, 1.086), (1, angle_to_rad(121.2))),
        ((0, 1.086), (1, -angle_to_rad(121.2)), (2, angle_to_rad(0.0))),
        ((1, 1.086), (0, angle_to_rad(121.2)), (2, angle_to_rad(0.0))),
        ((1, 1.086), (0, -angle_to_rad(121.2)), (4, angle_to_rad(0.0))),
    ],
)
bonds_to_break["C2H4"] = (1, 2)

In [87]:
tetramer_eq_z["C2H6"] = {
    "Z": ZMatrix(
        0,
        0,
        [6, 6, 1, 1, 1, 1, 1, 1],
        [
            (),
            ((0, 1.536),),
            ((0, 1.091), (1, angle_to_rad(110.91))),
            ((0, 1.091), (1, angle_to_rad(110.91)), (2, angle_to_rad(120.0))),
            ((0, 1.091), (1, angle_to_rad(110.91)), (2, -angle_to_rad(120.0))),
            ((1, 1.091), (0, angle_to_rad(110.91)), (2, angle_to_rad(0.0))),
            ((1, 1.091), (0, angle_to_rad(110.91)), (2, angle_to_rad(120.0))),
            ((1, 1.091), (0, angle_to_rad(110.91)), (2, -angle_to_rad(120.0))),
        ],
    ),
    "E": ZMatrix(
        0,
        0,
        [6, 6, 1, 1, 1, 1, 1, 1],
        [
            (),
            ((0, 1.536),),
            ((0, 1.091), (1, angle_to_rad(110.91))),
            ((0, 1.091), (1, angle_to_rad(110.91)), (2, angle_to_rad(120.0))),
            ((0, 1.091), (1, angle_to_rad(110.91)), (2, -angle_to_rad(120.0))),
            ((1, 1.091), (0, angle_to_rad(110.91)), (2, angle_to_rad(60.0))),
            ((1, 1.091), (0, angle_to_rad(110.91)), (2, angle_to_rad(180.0))),
            ((1, 1.091), (0, angle_to_rad(110.91)), (2, -angle_to_rad(60.0))),
        ],
    ),
}
bonds_to_break["C2H6"] = (1, 2)

In [88]:
tetramer_eq_z["CH3NH2"] = {
    "E": ZMatrix(
        0,
        0,
        [6, 7, 1, 1, 1, 1, 1],
        [
            (),
            ((0, 1.471),),
            ((0, 1.093), (1, angle_to_rad(112))),
            ((0, 1.093), (1, angle_to_rad(112)), (2, angle_to_rad(120.0))),
            ((0, 1.093), (1, angle_to_rad(112)), (2, -angle_to_rad(120.0))),
            ((1, 1.018), (0, angle_to_rad(111)), (2, angle_to_rad(60.0))),
            ((1, 1.018), (0, angle_to_rad(111)), (2, angle_to_rad(180.0))),
        ],
    ),
    "Z": ZMatrix(
        0,
        0,
        [6, 7, 1, 1, 1, 1, 1],
        [
            (),
            ((0, 1.471),),
            ((0, 1.093), (1, angle_to_rad(112))),
            ((0, 1.093), (1, angle_to_rad(112)), (2, angle_to_rad(120.0))),
            ((0, 1.093), (1, angle_to_rad(112)), (2, -angle_to_rad(120.0))),
            ((1, 1.018), (0, angle_to_rad(111)), (2, angle_to_rad(120.0))),
            ((1, 1.018), (0, angle_to_rad(111)), (2, -angle_to_rad(120.0))),
        ],
    ),
}
bonds_to_break["CH3NH2"] = (1, 2, 5)

In [89]:
tetramer_eq_z["CH3CN"] = ZMatrix(
    0,
    0,
    [6, 6, 1, 1, 1, 7],
    [
        (),
        ((0, 1.458),),
        ((0, 1.104), (1, angle_to_rad(109.44))),
        ((0, 1.104), (1, angle_to_rad(109.44)), (2, angle_to_rad(120.0))),
        ((0, 1.104), (1, angle_to_rad(109.44)), (2, -angle_to_rad(120.0))),
        ((1, 1.157), (0, angle_to_rad(180)), (2, angle_to_rad(180.0))),
    ],
)

bonds_to_break["CH3CN"] = (1, 2, 5)

In [90]:
tetramer_eq_z["CH3OH"] = {
    "E": ZMatrix(
        0,
        0,
        [8, 1, 6, 1, 1, 1],
        [
            (),
            ((0, 0.956),),
            ((0, 1.427), (1, angle_to_rad(108.87))),
            ((2, 1.096), (0, angle_to_rad(109.03)), (1, angle_to_rad(60.0))),
            ((2, 1.096), (0, angle_to_rad(109.03)), (1, angle_to_rad(180.0))),
            ((2, 1.096), (0, angle_to_rad(109.03)), (1, -angle_to_rad(60.0))),
        ],
    ),
    "Z": ZMatrix(
        0,
        0,
        [8, 1, 6, 1, 1, 1],
        [
            (),
            ((0, 0.956),),
            ((0, 1.427), (1, angle_to_rad(108.87))),
            ((2, 1.096), (0, angle_to_rad(109.03)), (1, angle_to_rad(0.0))),
            ((2, 1.096), (0, angle_to_rad(109.03)), (1, angle_to_rad(120.0))),
            ((2, 1.096), (0, angle_to_rad(109.03)), (1, -angle_to_rad(120.0))),
        ],
    ),
}
bonds_to_break["CH3OH"] = (1, 2, 3)

In [91]:
tetramer_eq_z["CH3CHO"] = {
    "E": ZMatrix(
        0,
        0,
        [6, 6, 8, 1, 1, 1, 1],
        [
            (),
            ((0, 1.501),),
            ((0, 1.216), (1, angle_to_rad(123.9))),
            ((0, 1.114), (1, angle_to_rad(117.5)), (2, angle_to_rad(180.0))),
            ((1, 1.086), (0, angle_to_rad(110.618)), (3, angle_to_rad(120.0))),
            ((1, 1.086), (0, angle_to_rad(110.618)), (3, angle_to_rad(-120.0))),
            ((1, 1.086), (0, angle_to_rad(110.618)), (3, angle_to_rad(0.0))),
        ],
    ),
    "Z": ZMatrix(
        0,
        0,
        [6, 6, 8, 1, 1, 1, 1],
        [
            (),
            ((0, 1.501),),
            ((0, 1.216), (1, angle_to_rad(123.9))),
            ((0, 1.114), (1, angle_to_rad(117.5)), (2, angle_to_rad(180.0))),
            ((1, 1.086), (0, angle_to_rad(110.618)), (3, angle_to_rad(60.0))),
            ((1, 1.086), (0, angle_to_rad(110.618)), (3, angle_to_rad(-60.0))),
            ((1, 1.086), (0, angle_to_rad(110.618)), (3, angle_to_rad(180.0))),
        ],
    ),
}
bonds_to_break["CH3CHO"] = (1, 2, 3, 4)

In [92]:
tetramer_eq_z["HCONH2"] = ZMatrix(
    0,
    0,
    [6, 7, 1, 1, 8, 1],
    [
        (),
        ((0, 1.350),),
        ((1, 1.001), (0, angle_to_rad(118.5))),
        ((1, 1.001), (2, angle_to_rad(121.6)), (0, angle_to_rad(180.0))),
        ((0, 1.210), (1, angle_to_rad(124.7)), (2, angle_to_rad(0.0))),
        ((0, 1.090), (4, angle_to_rad(122.5)), (1, angle_to_rad(180.0))),
    ],
)

In [93]:
tetramer_eq_z["CH2CHOH"] = {
    "E": ZMatrix(
        0,
        0,
        [6, 6, 8, 1, 1, 1, 1],
        [
            (),
            ((0, 1.326),),
            ((0, 1.372), (1, angle_to_rad(126.2))),
            ((0, 1.097), (1, angle_to_rad(129.1)), (2, angle_to_rad(180.0))),
            ((1, 1.086), (0, angle_to_rad(121.7)), (2, angle_to_rad(0.0))),
            ((1, 1.086), (0, angle_to_rad(121.7)), (2, angle_to_rad(180.0))),
            ((2, 0.960), (0, angle_to_rad(108.3)), (1, angle_to_rad(180.0))),
        ],
    ),
    "Z": ZMatrix(
        0,
        0,
        [6, 6, 8, 1, 1, 1, 1],
        [
            (),
            ((0, 1.326),),
            ((0, 1.372), (1, angle_to_rad(126.2))),
            ((0, 1.097), (1, angle_to_rad(129.1)), (2, angle_to_rad(180.0))),
            ((1, 1.086), (0, angle_to_rad(121.7)), (2, angle_to_rad(0.0))),
            ((1, 1.086), (0, angle_to_rad(121.7)), (2, angle_to_rad(180.0))),
            ((2, 0.960), (0, angle_to_rad(108.3)), (1, angle_to_rad(0.0))),
        ],
    ),
}

In [94]:
tetramer_eq_z["CH2CHF"] = ZMatrix(
    0,
    0,
    [6, 6, 7, 1, 1, 1],
    [
        (),
        ((0, 1.329),),
        ((0, 1.347), (1, angle_to_rad(120.8))),
        ((0, 1.082), (2, angle_to_rad(110)), (1, angle_to_rad(180.0))),
        ((1, 1.087), (0, angle_to_rad(120.9)), (2, angle_to_rad(0.0))),
        ((1, 1.077), (0, angle_to_rad(119)), (2, angle_to_rad(180.0))),
    ],
)

In [95]:
tetramer_eq_z["C2H4O"] = ZMatrix(
    0,
    0,
    [6, 6, 8, 1, 1, 1, 1],
    [
        (),
        ((0, 1.459),),
        ((0, 1.425), (1, angle_to_rad(59.2))),
        ((0, 1.084), (1, angle_to_rad(119.078)), (2, angle_to_rad(103.017))),
        ((0, 1.084), (1, angle_to_rad(119.078)), (2, -angle_to_rad(103.017))),
        ((1, 1.084), (0, angle_to_rad(119.078)), (2, angle_to_rad(103.017))),
        ((1, 1.084), (0, angle_to_rad(119.078)), (2, -angle_to_rad(103.017))),
    ],
)
# Trying to break the ring structure sounds dangerous
# We will perturb the ring via bending anyway
# Only break bonds to H
bonds_to_break["C2H4O"] = (3,)

In [96]:
# Oxaziridine structure determined in-house
tetramer_eq_z["CH3NO"] = ZMatrix(
    0,
    0,
    [6, 7, 8, 1, 1, 1],
    [
        (),
        ((0, 1.44147868584659),),
        ((0, 1.3949303989558), (1, angle_to_rad(63.6516477129233))),
        (
            (0, 1.09746021071791),
            (1, angle_to_rad(115.572867977747)),
            (2, angle_to_rad(108.549120943775)),
        ),
        (
            (0, 1.09951330631421),
            (1, angle_to_rad(119.626244922843)),
            (2, angle_to_rad(-106.785455851276)),
        ),
        (
            (1, 1.03186342511836),
            (0, angle_to_rad(106.237646977911)),
            (2, angle_to_rad(-94.185723295857)),
        ),
    ],
)
# Trying to break the ring structure sounds dangerous
# We will perturb the ring via bending anyway
# Only break bonds to H
bonds_to_break["CH3NO"] = (3, 4, 5)

In [97]:
# show_mol(tetramer_eq_z["CH3NO"].to_molecule(unit="angstrom"))

In [98]:
tetramer_eq_z["CH2NH"] = ZMatrix(
    0,
    0,
    [6, 7, 1, 1, 1],
    [
        (),
        ((0, 1.273),),
        ((0, 1.103), (1, angle_to_rad(123.4))),
        ((0, 1.081), (1, -angle_to_rad(119.7)), (2, angle_to_rad(0))),
        ((1, 1.023), (0, angle_to_rad(110.5)), (2, angle_to_rad(0))),
    ],
)

In [99]:
tetramer_eq_z["NH2OH"] = ZMatrix(
    0,
    0,
    [7, 8, 1, 1, 1],
    [
        (),
        ((0, 1.453),),
        ((0, 1.016), (1, angle_to_rad(107.01))),
        ((0, 1.016), (1, angle_to_rad(107.01)), (2, angle_to_rad(120))),
        ((1, 0.962), (0, angle_to_rad(101.37)), (2, angle_to_rad(120))),
    ],
)

In [100]:
tetramer_eq_z["NH2F"] = ZMatrix(
    0,
    0,
    [7, 1, 1, 9],
    [
        (),
        ((0, 1.023),),
        ((0, 1.023), (1, angle_to_rad(106.27))),
        ((0, 1.433), (1, angle_to_rad(101.08)), (2, angle_to_rad(120))),
    ],
)
bonds_to_break["NH2F"] = (1, 3)

In [101]:
tetramer_eq_z["NH4F"] = ZMatrix(
    0,
    0,
    [7, 1, 1, 1, 1, 9],
    [
        (),
        ((0, 1.029),),
        ((0, 1.023), (1, angle_to_rad(109.471))),
        ((0, 1.023), (1, angle_to_rad(109.471)), (2, angle_to_rad(120))),
        ((0, 1.023), (1, angle_to_rad(109.471)), (2, -angle_to_rad(120))),
        ((1, 2.0), (0, angle_to_rad(180)), (2, angle_to_rad(0))),
    ],
)
bonds_to_break["NH4F"] = (1, 2, 5)

#### Bond bending and mixture between stereoisomers

In this stage we apply the following random operations:
 - choose one of two stereoisomers when they exist
 - apply the same angle randomisation as we did with trimers
 - check that no two atoms are now closer than the minimum bond length of the original

In [102]:
def min_pdist(cart):
    return np.min(
        np.linalg.norm(cart[:, None, :] - cart, axis=-1)[
            np.triu_indices(cart.shape[0], k=1)
        ]
    )


def check_no_collision(zmatrix, new_zmatrix, tol=0.99):
    before = min_pdist(zmatrix.to_cartesian())
    after = min_pdist(new_zmatrix.to_cartesian())
    return after > tol * before

In [103]:
def make_tetramer_bent(zmatrix):
    for i in range(100):  # rejection sampling
        a1 = zmatrix.lines[2][1][1]
        new_a1 = truncnorm.rvs(-3, 3, loc=a1, scale=angle_to_rad(24))
        new_zmatrix = zmatrix.replace_single_value(2, 1, new_a1)
        for ix in range(3, len(zmatrix.lines)):
            a2 = zmatrix.lines[ix][1][1]
            new_a2 = truncnorm.rvs(-3, 3, loc=a2, scale=angle_to_rad(24))
            new_zmatrix = new_zmatrix.replace_single_value(ix, 1, new_a2)
            a3 = zmatrix.lines[ix][2][1]
            new_a3 = truncnorm.rvs(-3, 3, loc=a3, scale=angle_to_rad(24))
            new_zmatrix = new_zmatrix.replace_single_value(ix, 2, new_a3)

        if check_no_collision(zmatrix, new_zmatrix):
            return new_zmatrix
    raise

In [104]:
for formula, zmatrix in tetramer_eq_z.items():
    structures = {}
    for i in range(50):
        if isinstance(zmatrix, dict):
            p = np.random.rand()
            if p > 0.5:
                zmatrix = zmatrix["E"]
            else:
                zmatrix = zmatrix["Z"]

        structures[str(i)] = (
            make_tetramer_bent(zmatrix)
            .to_molecule(unit="angstrom")
            .to_qcelemental()
            .dict(encoding="json")
        )
    if SAVE_FILES:
        levels = [1, 2] if formula in level1_formulas else [2]
        for level in levels:
            os.makedirs(
                f"../data/lightatomcurriculum/level{level}/{formula}",
                exist_ok=True,
            )
            with open(
                f"../data/lightatomcurriculum/level{level}/{formula}/bend.json",
                "w",
            ) as f:
                json.dump(structures, f)

#### Bond stretching, no bending

50 structures.
Bonds are distorted by multiplication with a LogTruncatedNormal distribution with mean 0 and standard deviation 0.33, we include the mixture between stereoisomers and the check on minimum distances, which is now set to 0.33 (allowing quite a lot of compression).

In [105]:
def make_tetramer_stretched(zmatrix):
    for i in range(100):  # rejection sampling
        a1 = zmatrix.lines[1][0][1]
        new_a1 = a1 * np.exp(truncnorm.rvs(-2, 2, loc=0.0, scale=0.25))
        new_zmatrix = zmatrix.replace_single_value(1, 0, new_a1)
        for ix in range(2, len(zmatrix.lines)):
            a2 = zmatrix.lines[ix][0][1]
            new_a2 = a2 * np.exp(truncnorm.rvs(-2, 2, loc=0.0, scale=0.25))
            new_zmatrix = new_zmatrix.replace_single_value(ix, 0, new_a2)

        if check_no_collision(zmatrix, new_zmatrix, tol=0.6):
            return new_zmatrix
    raise

In [106]:
for formula, zmatrix in tetramer_eq_z.items():
    structures = {}
    for i in range(50):
        if isinstance(zmatrix, dict):
            p = np.random.rand()
            if p > 0.5:
                zmatrix = zmatrix["E"]
            else:
                zmatrix = zmatrix["Z"]

        structures[str(i)] = (
            make_tetramer_stretched(zmatrix)
            .to_molecule(unit="angstrom")
            .to_qcelemental()
            .dict(encoding="json")
        )
    if SAVE_FILES or formula == "CH3":
        levels = [1, 2] if formula in level1_formulas else [2]
        for level in levels:
            os.makedirs(
                f"../data/lightatomcurriculum/level{level}/{formula}",
                exist_ok=True,
            )
            with open(
                f"../data/lightatomcurriculum/level{level}/{formula}/stretch.json",
                "w",
            ) as f:
                json.dump(structures, f)

#### Bond breaking, no bending

50 structures.
Choose one out of the three bonds, and multiply the length by LogTruncatedNormal(0.5, 1.0), the others are marginally distorted as well.

In [107]:
def make_tetramer_broken(zmatrix, formula):
    all_available_lines = list(range(1, len(zmatrix.lines)))
    if formula in bonds_to_break:
        ix = np.random.choice(bonds_to_break[formula])
    else:
        ix = np.random.choice(all_available_lines)
    all_available_lines.remove(ix)
    # print(ix, all_available_lines)
    for i in range(100):  # rejection sampling
        a1 = zmatrix.lines[ix][0][1]
        new_a1 = a1 * np.exp(truncnorm.rvs(-1, 2, loc=0.5, scale=1.0))
        new_zmatrix = zmatrix.replace_single_value(ix, 0, new_a1)
        for other_ix in all_available_lines:
            a2 = zmatrix.lines[other_ix][0][1]
            new_a2 = a2 * np.exp(truncnorm.rvs(-2, 2, loc=0.0, scale=0.25))
            new_zmatrix = new_zmatrix.replace_single_value(other_ix, 0, new_a2)

        if check_no_collision(zmatrix, new_zmatrix, tol=0.5):
            return new_zmatrix
    raise

In [108]:
# show_mol(make_tetramer_broken(tetramer_eq_z["CH3CHO"]["E"], "CH3CHO").to_molecule(unit="angstrom"))

In [109]:
for formula, zmatrix in tetramer_eq_z.items():
    structures = {}
    for i in range(50):
        if isinstance(zmatrix, dict):
            p = np.random.rand()
            if p > 0.5:
                zmatrix = zmatrix["E"]
            else:
                zmatrix = zmatrix["Z"]

        structures[str(i)] = (
            make_tetramer_broken(zmatrix, formula)
            .to_molecule(unit="angstrom")
            .to_qcelemental()
            .dict(encoding="json")
        )
    if SAVE_FILES:
        levels = [1, 2] if formula in level1_formulas else [2]
        for level in levels:
            os.makedirs(
                f"../data/lightatomcurriculum/level{level}/{formula}",
                exist_ok=True,
            )
            with open(
                f"../data/lightatomcurriculum/level{level}/{formula}/break.json",
                "w",
            ) as f:
                json.dump(structures, f)

#### Combine stretching and bending

In [110]:
for formula, zmatrix in tetramer_eq_z.items():
    structures = {}
    for i in range(50):
        if isinstance(zmatrix, dict):
            p = np.random.rand()
            if p > 0.5:
                zmatrix = zmatrix["E"]
            else:
                zmatrix = zmatrix["Z"]

        structures[str(i)] = (
            make_tetramer_stretched(make_tetramer_bent(zmatrix))
            .to_molecule(unit="angstrom")
            .to_qcelemental()
            .dict(encoding="json")
        )
    if SAVE_FILES:
        os.makedirs(
            f"../data/lightatomcurriculum/level2/{formula}", exist_ok=True
        )
        with open(
            f"../data/lightatomcurriculum/level2/{formula}/bend_stretch.json", "w"
        ) as f:
            json.dump(structures, f)

#### Combine breaking and bending, 100 structures

In [111]:
# show_mol(make_tetramer_broken(make_tetramer_bent(tetramer_eq_z["CH3NO"]), "CH3NO").to_molecule(unit="angstrom"))

In [112]:
for formula, zmatrix in tetramer_eq_z.items():
    structures = {}
    for i in range(100):
        if isinstance(zmatrix, dict):
            p = np.random.rand()
            if p > 0.5:
                zmatrix = zmatrix["E"]
            else:
                zmatrix = zmatrix["Z"]

        structures[str(i)] = (
            make_tetramer_broken(make_tetramer_bent(zmatrix), formula)
            .to_molecule(unit="angstrom")
            .to_qcelemental()
            .dict(encoding="json")
        )
    if SAVE_FILES:
        os.makedirs(
            f"../data/lightatomcurriculum/level2/{formula}", exist_ok=True
        )
        with open(
            f"../data/lightatomcurriculum/level2/{formula}/bend_break.json", "w"
        ) as f:
            json.dump(structures, f)

### Reactions

We include some hypothetical reaction mechanisms for basic reactions.

##### 2HF -> H2 + F2 and H4
Bond lengths are given in our data. Our mechanism brings two HF molecules together until they are at a distance of the F2 bond length, then we move each away towards its final pair using linear interpolation. Everything happens in the x-y plane.

In [113]:
def make_2hf_reaction():
    hh, hf, ff = (
        dimer_bond_lengths["H2"],
        dimer_bond_lengths["HF"],
        dimer_bond_lengths["F2"],
    )
    start_x = 5
    end_x = max(hh, ff) / 2
    start_y = hf / 2
    end_y = 5

    mols = []
    for i in range(25):
        lambda_ = i / 24
        x = lambda_ * end_x + (1 - lambda_) * start_x
        geometry = np.array(
            [
                [-x, -start_y, 0],
                [x, -start_y, 0],
                [-x, start_y, 0],
                [x, start_y, 0],
            ]
        )
        mols.append(
            Molecule.make(
                coords=geometry,
                charges=np.array([9, 9, 1, 1]),
                charge=0,
                spin=0,
                unit="angstrom",
            )
        )

    for i in range(25):
        lambda_ = i / 24
        y = lambda_ * end_y + (1 - lambda_) * start_y
        xh = lambda_ * hh / 2 + (1 - lambda_) * end_x
        xf = lambda_ * ff / 2 + (1 - lambda_) * end_x
        geometry = np.array(
            [
                [-xf, -y, 0],
                [xf, -y, 0],
                [-xh, y, 0],
                [xh, y, 0],
            ]
        )
        mols.append(
            Molecule.make(
                coords=geometry,
                charges=np.array([9, 9, 1, 1]),
                charge=0,
                spin=0,
                unit="angstrom",
            )
        )
    return mols

In [114]:
mols = make_2hf_reaction()
mols = {str(i): m.to_qcelemental().dict(encoding="json") for i, m in enumerate(mols)}
if SAVE_FILES:
    os.makedirs(f"../data/lightatomcurriculum/level2/2HF", exist_ok=True)
    with open(f"../data/lightatomcurriculum/level2/2HF/reaction.json", "w") as f:
        json.dump(mols, f)

In [115]:
def make_h4_reaction():
    hh = dimer_bond_lengths["H2"]
    sq_len = 1.6
    start_x = 4
    end_x = sq_len / 2
    start_y = hh / 2
    end_y = sq_len / 2

    mols = []

    for i in range(50):
        lambda_ = i / 49
        x = lambda_ * end_x + (1 - lambda_) * start_x
        y = lambda_ * end_y + (1 - lambda_) * start_y
        geometry = np.array(
            [
                [-x, -y, 0],
                [x, -y, 0],
                [-x, y, 0],
                [x, y, 0],
            ]
        )
        mols.append(
            Molecule.make(
                coords=geometry,
                charges=np.array([1, 1, 1, 1]),
                charge=0,
                spin=0,
                unit="angstrom",
            )
        )
    return mols

In [116]:
mols = make_h4_reaction()
mols = {str(i): m.to_qcelemental().dict(encoding="json") for i, m in enumerate(mols)}
if SAVE_FILES:
    for level in [2]:
        os.makedirs(f"../data/lightatomcurriculum/level{level}/H4", exist_ok=True)
        with open(
            f"../data/lightatomcurriculum/level{level}/H4/reaction.json", "w"
        ) as f:
            json.dump(mols, f)

#### Decomposition reactions

These are small enough to fit into our dataset.

- N2H4 -> N2H2 + H2
- H2O2 -> H2O + O.
- C2H6 -> C2H4 + H2
- C2H4 -> C2H2 + H2
- CH3OH -> CO + 2H2

In [117]:
def make_n2h4_reaction(lambda_):
    nn_len = 1.446 * (1 - lambda_) + lambda_ * 1.252
    bound_nh_len = 1.016 * (1 - lambda_) + lambda_ * 1.028
    unbound_nh_len = 1.016 * (1 - lambda_) + lambda_ * 4
    angle_nh = 107 * (1 - lambda_) + lambda_ * 90
    dihedral_nh = 120 * (1 - lambda_) + lambda_ * 90
    bound_dihedral = 120 * (1 - lambda_) + lambda_ * 180
    hh_len_init = nn_len + 2 * np.sin(angle_to_rad(17))
    hh_len = hh_len_init * (1 - lambda_) + lambda_ * 0.7414

    return ZMatrix(
        0,
        0,
        [7, 7, 1, 1, 1, 1],
        [
            (),
            ((0, nn_len),),
            ((0, unbound_nh_len), (1, angle_to_rad(angle_nh))),
            ((0, bound_nh_len), (1, angle_to_rad(107)), (2, angle_to_rad(dihedral_nh))),
            (
                (1, bound_nh_len),
                (0, angle_to_rad(107)),
                (3, -angle_to_rad(bound_dihedral)),
            ),
            ((2, hh_len), (0, angle_to_rad(180 - angle_nh)), (1, angle_to_rad(0))),
        ],
    )

In [118]:
# show_mol(make_n2h4_reaction(0.75).to_molecule(unit="angstrom"))

In [119]:
mols = [make_n2h4_reaction(lambda_) for lambda_ in np.linspace(0, 1, 50)]
mols = {
    str(i): m.to_molecule(unit="angstrom").to_qcelemental().dict(encoding="json")
    for i, m in enumerate(mols)
}
if SAVE_FILES:
    os.makedirs(f"../data/lightatomcurriculum/level2/N2H4", exist_ok=True)
    with open(f"../data/lightatomcurriculum/level2/N2H4/reaction.json", "w") as f:
        json.dump(mols, f)

In [120]:
def make_c2h6_reaction(lambda_):
    cc_len = 1.536 * (1 - lambda_) + lambda_ * 1.339
    bound_ch_len = 1.091 * (1 - lambda_) + lambda_ * 1.086
    unbound_ch_len = 1.091 * (1 - lambda_) + lambda_ * 4
    angle_ch = 110.91 * (1 - lambda_) + lambda_ * 90
    dihedral_ch = 120 * (1 - lambda_) + lambda_ * 90
    bound_dihedral = 120 * (1 - lambda_) + lambda_ * 180
    bound_angle = 110.9 * (1 - lambda_) + lambda_ * 121.2
    hh_len_init = cc_len + 2 * np.sin(angle_to_rad(20.91))
    hh_len = hh_len_init * (1 - lambda_) + lambda_ * 0.7414

    return ZMatrix(
        0,
        0,
        [6, 6, 1, 1, 1, 1, 1, 1],
        [
            (),
            ((0, cc_len),),
            ((0, unbound_ch_len), (1, angle_to_rad(angle_ch))),
            (
                (0, bound_ch_len),
                (1, angle_to_rad(bound_angle)),
                (2, angle_to_rad(dihedral_ch)),
            ),
            (
                (0, bound_ch_len),
                (1, angle_to_rad(bound_angle)),
                (2, -angle_to_rad(dihedral_ch)),
            ),
            ((2, hh_len), (0, angle_to_rad(180 - angle_ch)), (1, angle_to_rad(0.0))),
            (
                (1, bound_ch_len),
                (0, angle_to_rad(bound_angle)),
                (2, angle_to_rad(dihedral_ch)),
            ),
            (
                (1, bound_ch_len),
                (0, angle_to_rad(bound_angle)),
                (2, -angle_to_rad(dihedral_ch)),
            ),
        ],
    )

In [121]:
# show_mol(make_c2h6_reaction(0.75).to_molecule(unit="angstrom"))

In [122]:
mols = [make_c2h6_reaction(lambda_) for lambda_ in np.linspace(0, 1, 50)]
mols = {
    str(i): m.to_molecule(unit="angstrom").to_qcelemental().dict(encoding="json")
    for i, m in enumerate(mols)
}
if SAVE_FILES:
    os.makedirs(f"../data/lightatomcurriculum/level2/C2H6", exist_ok=True)
    with open(f"../data/lightatomcurriculum/level2/C2H6/reaction.json", "w") as f:
        json.dump(mols, f)

In [123]:
def make_c2h4_reaction(lambda_):
    cc_len = 1.339 * (1 - lambda_) + lambda_ * 1.203
    bound_ch_len = 1.086 * (1 - lambda_) + lambda_ * 1.063
    unbound_ch_len = 1.086 * (1 - lambda_) + lambda_ * 4
    bound_ch_angle = 120 * (1 - lambda_) + lambda_ * 180
    angle_ch = 120 * (1 - lambda_) + lambda_ * 90
    hh_len_init = cc_len + 2 * np.sin(angle_to_rad(31.2))
    hh_len = hh_len_init * (1 - lambda_) + lambda_ * 0.7414

    return ZMatrix(
        0,
        0,
        [6, 6, 1, 1, 1, 1],
        [
            (),
            ((0, cc_len),),
            ((0, unbound_ch_len), (1, angle_to_rad(angle_ch))),
            (
                (0, bound_ch_len),
                (1, angle_to_rad(bound_ch_angle)),
                (2, angle_to_rad(180)),
            ),
            ((2, hh_len), (0, angle_to_rad(180 - angle_ch)), (1, 0)),
            (
                (1, bound_ch_len),
                (0, angle_to_rad(bound_ch_angle)),
                (2, angle_to_rad(180)),
            ),
        ],
    )

In [124]:
mols = [make_c2h4_reaction(lambda_) for lambda_ in np.linspace(0, 1, 50)]
mols = {
    str(i): m.to_molecule(unit="angstrom").to_qcelemental().dict(encoding="json")
    for i, m in enumerate(mols)
}
if SAVE_FILES:
    os.makedirs(f"../data/lightatomcurriculum/level2/C2H4", exist_ok=True)
    with open(f"../data/lightatomcurriculum/level2/C2H4/reaction.json", "w") as f:
        json.dump(mols, f)

In [125]:
def make_h2o2_reaction(lambda_):
    # Cosine law
    init_oh_far_len = np.sqrt(
        0.950**2 + 1.475**2 - 2 * 1.475 * 0.950 * np.cos(angle_to_rad(94.8))
    )
    angle = 94.8 * (1 - lambda_) + 90 * lambda_
    dihedral = 119.8 * (1 - lambda_) + 104.45 * lambda_
    # Sine law
    special_rad = (
        np.arcsin(0.950 * np.sin(angle_to_rad(94.8)) / init_oh_far_len) * (1 - lambda_)
        + angle_to_rad(90) * lambda_
    )
    return ZMatrix(
        0,
        0,
        [1, 8, 8, 1],
        [
            (),
            ((0, 0.950 * (1 - lambda_) + 0.9584 * lambda_),),
            ((1, 1.475 * (1 - lambda_) + 4 * lambda_), (0, angle_to_rad(angle))),
            (
                (1, init_oh_far_len * (1 - lambda_) + 0.9584 * lambda_),
                (2, special_rad),
                (0, -angle_to_rad(dihedral)),
            ),
        ],
    )

In [126]:
mols = [make_h2o2_reaction(lambda_) for lambda_ in np.linspace(0, 1, 50)]
mols = {
    str(i): m.to_molecule(unit="angstrom").to_qcelemental().dict(encoding="json")
    for i, m in enumerate(mols)
}
if SAVE_FILES:
    os.makedirs(f"../data/lightatomcurriculum/level2/H2O2", exist_ok=True)
    with open(f"../data/lightatomcurriculum/level2/H2O2/reaction.json", "w") as f:
        json.dump(mols, f)

In [127]:
def make_methanol_synthesis_reaction(lambda_):
    co_bond_length = 1.427 * (1 - lambda_) + dimer_bond_lengths["CO"] * lambda_
    oh_bond_length = 0.956 * (1 - lambda_) + 4 * lambda_
    ch_bond_length = 1.096 * (1 - lambda_) + 4 * lambda_
    # Cosine law
    hch_final_rad = (
        np.pi - np.arcsin(dimer_bond_lengths["H2"] / (2 * 4)) - 0.0055
    )  # Correction for bad numerics
    hch_rad = angle_to_rad(120) * (1 - lambda_) + hch_final_rad * lambda_
    coh_rad_final = np.arccos((dimer_bond_lengths["CO"] - dimer_bond_lengths["H2"]) / 8)
    coh_rad = angle_to_rad(108.87) * (1 - lambda_) + coh_rad_final * lambda_
    och_rad = angle_to_rad(109.03) * (1 - lambda_) + coh_rad_final * lambda_
    return ZMatrix(
        0,
        0,
        [8, 1, 6, 1, 1, 1],
        [
            (),
            ((0, oh_bond_length),),
            ((0, co_bond_length), (1, coh_rad)),
            ((2, ch_bond_length), (0, coh_rad), (1, 0.0)),  # OC pair on the C
            ((2, ch_bond_length), (0, angle_to_rad(109.03)), (1, hch_rad)),
            ((2, ch_bond_length), (0, angle_to_rad(109.03)), (1, -hch_rad)),
        ],
    )

In [128]:
mols = [make_methanol_synthesis_reaction(lambda_) for lambda_ in np.linspace(0, 1, 50)]
mols = {
    str(i): m.to_molecule(unit="angstrom").to_qcelemental().dict(encoding="json")
    for i, m in enumerate(mols)
}
if SAVE_FILES:
    os.makedirs(f"../data/lightatomcurriculum/level2/CH3OH", exist_ok=True)
    with open(
        f"../data/lightatomcurriculum/level2/CH3OH/reaction.json", "w"
    ) as f:
        json.dump(mols, f)

In [129]:
def make_hydrogen_combustion_reaction(lambda_):
    hh_len_init = 2 * 0.9584 * np.sin(angle_to_rad(104.45 / 2))
    hh_len = hh_len_init * (1 - lambda_) + dimer_bond_lengths["H2"] * lambda_
    ohh_angle_init = (180 - 104.45) / 2
    ohh_angle = ohh_angle_init * (1 - lambda_) + 85 * lambda_
    oo_len = 4 * (1 - lambda_) + dimer_bond_lengths["O2"] * lambda_
    hoo_angle_init = (360 - 104.45) / 2
    hoo_angle_final = 169.4
    hoo_angle = hoo_angle_init * (1 - lambda_) + hoo_angle_final * lambda_
    oh_len = 0.9584 * (1 - lambda_) + 4 * lambda_
    return ZMatrix(
        0,
        0,
        [8, 1, 1, 8, 1, 1],
        [
            (),
            ((0, oh_len),),
            ((1, hh_len), (0, angle_to_rad(ohh_angle))),
            ((0, oo_len), (1, -angle_to_rad(hoo_angle)), (2, angle_to_rad(0))),
            ((3, oh_len), (0, angle_to_rad(hoo_angle)), (2, angle_to_rad(0))),
            ((4, hh_len), (3, -angle_to_rad(ohh_angle)), (0, angle_to_rad(0))),
        ],
    )

In [130]:
mols = [make_hydrogen_combustion_reaction(lambda_) for lambda_ in np.linspace(0, 1, 50)]
mols = {
    str(i): m.to_molecule(unit="angstrom").to_qcelemental().dict(encoding="json")
    for i, m in enumerate(mols)
}
if SAVE_FILES:
    os.makedirs(f"../data/lightatomcurriculum/level2/2H2O", exist_ok=True)
    with open(f"../data/lightatomcurriculum/level2/2H2O/reaction.json", "w") as f:
        json.dump(mols, f)

### Hydrogen bonding

We include H-bonding with the following pairs:
 - H2O/H2O
 - H2O/NH3
 - H2O/HF

In [131]:
def make_water_ammonia_h_bond(bond_length):
    # We do not use the exact geometries of the molecules, but instead we use an idealised tetrahedral geometry
    return ZMatrix(
        0,
        0,
        [8, 1, 1, 1, 7, 1, 1],
        [
            (),
            ((0, 0.9584),),
            ((0, 0.9584), (1, angle_to_rad(109.471))),
            ((0, bond_length), (2, angle_to_rad(109.471)), (1, angle_to_rad(120.0))),
            ((3, 1.012), (0, angle_to_rad(180.0)), (1, angle_to_rad(180.0))),
            ((4, 1.012), (3, angle_to_rad(109.471)), (1, angle_to_rad(60.0))),
            ((4, 1.012), (3, angle_to_rad(109.471)), (1, angle_to_rad(180.0))),
        ],
    )


def make_water_water_h_bond(bond_length):
    # We do not use the exact geometries of the molecules, but instead we use an idealised tetrahedral geometry
    return ZMatrix(
        0,
        0,
        [8, 1, 1, 1, 8, 1],
        [
            (),
            ((0, 0.9584),),
            ((0, 0.9584), (1, angle_to_rad(109.471))),
            ((0, bond_length), (2, angle_to_rad(109.471)), (1, angle_to_rad(120.0))),
            ((3, 0.9584), (0, angle_to_rad(180.0)), (1, angle_to_rad(60.0))),
            ((4, 0.9584), (3, angle_to_rad(109.471)), (1, angle_to_rad(180.0))),
        ],
    )


def make_ammonia_water_h_bond(bond_length):
    # We do not use the exact geometries of the molecules, but instead we use an idealised tetrahedral geometry
    return ZMatrix(
        0,
        0,
        [7, 1, 1, 1, 1, 8, 1],
        [
            (),
            ((0, 1.012),),
            ((0, 1.012), (1, angle_to_rad(109.471))),
            ((0, 1.012), (2, angle_to_rad(109.471)), (1, -angle_to_rad(120))),
            ((0, bond_length), (3, angle_to_rad(109.471)), (2, angle_to_rad(120.0))),
            ((4, 0.9584), (0, angle_to_rad(180.0)), (1, angle_to_rad(60.0))),
            ((5, 0.9584), (4, angle_to_rad(109.471)), (1, angle_to_rad(180.0))),
        ],
    )


def make_water_hf_h_bond(bond_length):
    # We do not use the exact geometries of the molecules, but instead we use an idealised tetrahedral geometry
    return ZMatrix(
        0,
        0,
        [8, 1, 1, 1, 9],
        [
            (),
            ((0, 0.9584),),
            ((0, 0.9584), (1, angle_to_rad(109.471))),
            ((0, bond_length), (2, angle_to_rad(109.471)), (1, angle_to_rad(120.0))),
            ((3, 0.917), (0, angle_to_rad(180.0)), (1, angle_to_rad(180.0))),
        ],
    )


def make_hf_water_h_bond(bond_length):
    # We do not use the exact geometries of the molecules, but instead we use an idealised tetrahedral geometry
    return ZMatrix(
        0,
        0,
        [9, 1, 1, 8, 1],
        [
            (),
            ((0, 0.917),),
            ((0, bond_length), (1, angle_to_rad(109.471))),
            ((2, 0.9584), (0, angle_to_rad(180.0)), (1, angle_to_rad(180.0))),
            ((3, 0.9584), (2, angle_to_rad(109.471)), (1, angle_to_rad(120.0))),
        ],
    )

In [132]:
# show_mol(make_water_water_h_bond(2).to_molecule(unit="angstrom"))

In [133]:
for fn, formula in [
    (make_water_ammonia_h_bond, "H2O_NH3"),
    (make_ammonia_water_h_bond, "NH3_H2O"),
    (make_water_hf_h_bond, "H2O_HF"),
    (make_hf_water_h_bond, "HF_H2O"),
    (make_water_water_h_bond, "2H2O"),
]:
    mols = [fn(dist) for dist in np.linspace(1.0, 5.0, 50)]
    mols = {
        str(i): m.to_molecule(unit="angstrom").to_qcelemental().dict(encoding="json")
        for i, m in enumerate(mols)
    }
    if SAVE_FILES:
        os.makedirs(
            f"../data/lightatomcurriculum/level2/{formula}", exist_ok=True
        )
        with open(
            f"../data/lightatomcurriculum/level2/{formula}/hbonding.json", "w"
        ) as f:
            json.dump(mols, f)