#### For each of the QCA hdf5 files, compute and store valence and nonbonded energies and forces using Parsley 1.2

In [1]:
import matplotlib.pyplot as plt
from openforcefield.topology import Molecule
from tqdm import tqdm
import numpy as np
import pandas as pd

import torch
torch.set_default_dtype(torch.float64)



#### Fetch files

In [2]:
from glob import glob
path_to_dataset = '../../data/qca/*.h5'

paths_to_datasets = glob(path_to_dataset)
paths_to_datasets

['../../data/qca/eMolecules.h5',
 '../../data/qca/Roche.h5',
 '../../data/qca/Coverage.h5',
 '../../data/qca/Bayer.h5',
 '../../data/qca/Pfizer.h5']

#### For a given offmol, create two OpenMM simulations: one containing only valence forces, and one containing only nonbonded forces

In [3]:
from openforcefield.typing.engines.smirnoff import ForceField
forcefield = ForceField('openff-1.2.0.offxml')

In [4]:
from simtk import unit
from simtk import openmm as mm
from simtk.openmm import app
from functools import lru_cache

from espaloma.units import DISTANCE_UNIT, ENERGY_UNIT, FORCE_UNIT

platform = mm.Platform.getPlatformByName('Reference')

@lru_cache(2**20)
def make_nb_only_sim(offmol):
    
    off_top = offmol.to_topology()
    omm_top = off_top.to_openmm()
    sys = forcefield.create_openmm_system(off_top)
    
    for i in range(sys.getNumForces())[::-1]:
        f = sys.getForce(i)
        if 'Nonbond' not in f.__class__.__name__:
            sys.removeForce(i)
    
    for f in sys.getForces():
        assert('Nonbond' in f.__class__.__name__)
    
    
    sim = app.Simulation(omm_top, sys, mm.VerletIntegrator(1.0), platform)
    
    return sim


@lru_cache(2**20)
def make_valence_only_sim(offmol):
    
    off_top = offmol.to_topology()
    omm_top = off_top.to_openmm()
    sys = forcefield.create_openmm_system(off_top)
    
    for i in range(sys.getNumForces())[::-1]:
        f = sys.getForce(i)
        if 'Nonbond' in f.__class__.__name__:
            sys.removeForce(i)
    
    for f in sys.getForces():
        assert('Nonbond' not in f.__class__.__name__)
        
    sim = app.Simulation(omm_top, sys, mm.VerletIntegrator(1.0), platform)
    
    return sim

Using backend: pytorch


In [5]:
def get_potential_energies(sim, xyz: unit.Quantity) -> np.ndarray:
    energies = []
    for pos in xyz:
        sim.context.setPositions(pos)
        U = sim.context.getState(getEnergy=True).getPotentialEnergy()
        energies.append(U.value_in_unit(ENERGY_UNIT))
    return np.array(energies)

In [8]:
def add_mm_energies(df):
    df['mm_energies_valence'] = None
    df['mm_energies_nonbonded'] = None
    
    exceptions = dict()

    for key in tqdm(df.index):
        try:
            offmol = df['offmol'][key]
            xyz = df['xyz'][key] * DISTANCE_UNIT

            valence_energies = get_potential_energies(make_valence_only_sim(offmol), xyz)
            df['mm_energies_valence'][key] = valence_energies

            nb_energies = get_potential_energies(make_nb_only_sim(offmol), xyz)
            df['mm_energies_nonbonded'][key] = nb_energies
        except Exception as e:
            exceptions[key] = e
    return exceptions

In [9]:
dfs = dict()
for path in paths_to_datasets:
    dfs[path] = pd.read_hdf(path)
    exceptions = add_mm_energies(dfs[path])

100%|██████████| 2201/2201 [00:58<00:00, 37.88it/s]
100%|██████████| 202/202 [00:03<00:00, 57.45it/s]
 59%|█████▉    | 215/365 [00:37<00:54,  2.76it/s]RDKit ERROR: [01:16:22] UFFTYPER: Unrecognized charge state for atom: 8
RDKit ERROR: [01:16:25] UFFTYPER: Unrecognized charge state for atom: 8
Problematic atoms are:
Atom atomic num: 7, name: , idx: 8, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 3, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 7, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 8, name: , idx: 11, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 7, name: , idx: 8, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 3, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 7, aromatic: False, chiral: False
bond o

Problematic atoms are:
Atom atomic num: 7, name: , idx: 17, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 8, aromatic: True, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 11, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 16, name: , idx: 23, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 7, name: , idx: 17, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 8, aromatic: True, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 11, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 16, name: , idx: 23, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 7, name: , idx: 17, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 8, aromatic: True, chiral: Fa

Problematic atoms are:
Atom atomic num: 7, name: , idx: 29, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 16, aromatic: True, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 22, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 16, name: , idx: 37, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 7, name: , idx: 29, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 16, aromatic: True, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 22, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 16, name: , idx: 37, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 7, name: , idx: 29, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 16, aromatic: True, chiral:

Problematic atoms are:
Atom atomic num: 7, name: , idx: 16, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 6, aromatic: True, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 11, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 13, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 7, name: , idx: 26, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 9, aromatic: True, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 20, aromatic: False, chiral: True
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 21, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 7, name: , idx: 26, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 9, aromatic: True, chiral: False

Problematic atoms are:
Atom atomic num: 7, name: , idx: 22, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 8, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 20, aromatic: False, chiral: True
bond order: 1, chiral: False to atom atomic num: 8, name: , idx: 24, aromatic: False, chiral: False
Atom atomic num: 7, name: , idx: 23, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 9, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 21, aromatic: False, chiral: True
bond order: 1, chiral: False to atom atomic num: 8, name: , idx: 25, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 7, name: , idx: 26, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 8, aromatic: False, chiral: False
bond order: 1, chiral

Problematic atoms are:
Atom atomic num: 7, name: , idx: 15, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 8, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 9, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 11, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 7, name: , idx: 13, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 7, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 8, aromatic: False, chiral: False
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 11, aromatic: False, chiral: False

Problematic atoms are:
Atom atomic num: 7, name: , idx: 13, aromatic: False, chiral: True with bonds:
bond order: 1, chiral: False to atom atomic num: 6, name: , idx: 7, aromatic: False, chiral: Fal

#### now also forces

In [10]:
def get_gradients(sim, xyz: unit.Quantity) -> np.ndarray:
    gradients = []
    for pos in xyz:
        sim.context.setPositions(pos)
        F = sim.context.getState(getForces=True).getForces(asNumpy=True)
        gradients.append(- F.value_in_unit(FORCE_UNIT))
    return np.array(gradients)

In [15]:
def add_mm_gradients(df):
    df['mm_gradients_valence'] = None
    df['mm_gradients_nonbonded'] = None
    
    exceptions = dict()

    for key in tqdm(df.index):
        try:
            offmol = df['offmol'][key]
            xyz = df['xyz'][key] * DISTANCE_UNIT

            valence_gradients = get_gradients(make_valence_only_sim(offmol), xyz)    
            nb_gradients = get_gradients(make_nb_only_sim(offmol), xyz)

            df['mm_gradients_valence'][key] = valence_gradients
            df['mm_gradients_nonbonded'][key] = nb_gradients
        except Exception as e:
            exceptions[key] = e
    
    return exceptions

In [16]:
for path in paths_to_datasets:
    dfs[path] = pd.read_hdf(path)
    add_mm_gradients(dfs[path])

100%|██████████| 2201/2201 [01:05<00:00, 33.81it/s]
100%|██████████| 202/202 [00:04<00:00, 50.06it/s]
100%|██████████| 365/365 [00:11<00:00, 31.47it/s]
100%|██████████| 1838/1838 [01:23<00:00, 21.97it/s]
100%|██████████| 197/197 [00:05<00:00, 37.70it/s]


#### Now save

In [18]:
for path in paths_to_datasets:
    dfs[path].to_hdf(path, 'df')