diff --git a/.github/workflows/ut.yml b/.github/workflows/ut.yml index caa0c77c4..0371fe8db 100644 --- a/.github/workflows/ut.yml +++ b/.github/workflows/ut.yml @@ -22,7 +22,7 @@ jobs: $CONDA/bin/conda update -n base -c defaults conda conda install pip conda update pip - conda install numpy openmm pytest -c conda-forge + conda install numpy openmm pytest rdkit biopandas openbabel -c conda-forge pip install jax jax_md pip install mdtraj==1.9.7 pymbar==4.0.1 - name: Install DMFF diff --git a/README.md b/README.md index 4ad11df2c..1b530e68b 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,20 @@ # DMFF +[![doi:10.26434/chemrxiv-2022-2c7gv](https://img.shields.io/badge/DOI-10.26434%2Fchemrxiv--2022--2c7gv-blue)](https://doi.org/10.26434/chemrxiv-2022-2c7gv) + +## About DMFF + **DMFF** (**D**ifferentiable **M**olecular **F**orce **F**ield) is a Jax-based python package that provides a full differentiable implementation of molecular force field models. This project aims to establish an extensible codebase to minimize the efforts in force field parameterization, and to ease the force and virial tensor evaluations for advanced complicated potentials (e.g., polarizable models with geometry-dependent atomic parameters). Currently, this project mainly focuses on the molecular systems such as: water, biological macromolecules (peptides, proteins, nucleic acids), organic polymers, and small organic molecules (organic electrolyte, drug-like molecules) etc. We support both the conventional point charge models (OPLS and AMBER like) and multipolar polarizable models (AMOEBA and MPID like). The entire project is backed by the XLA technique in JAX, thus can be "jitted" and run in GPU devices much more efficiently compared to normal python codes. The behavior of organic molecular systems (e.g., protein folding, polymer structure, etc.) is often determined by a complex effect of many different types of interactions. The existing organic molecular force fields are mainly empirically fitted and their performance relies heavily on error cancellation. Therefore, the transferability and the prediction power of these force fields are insufficient. For new molecules, the parameter fitting process requires essential manual intervention and can be quite cumbersome. In order to automate the parametrization process and increase the robustness of the model, it is necessary to apply modern AI techniques in conventional force field development. This project serves for this purpose by utilizing the automatic differentiable programming technique to develop a codebase, which allows a more convenient incorporation of modern AI optimization techniques. It also helps the realization of many exciting functions including (but not limited to): hybrid machine learning/force field models and parameter optimization based on trajectory. +### License and credits + +The project DMFF is licensed under [GNU LGPL v3.0](LICENSE). If you use this code in any future publications, please cite this using `Wang X, Li J, Yang L, Chen F, Wang Y, Chang J, et al. DMFF: An Open-Source Automatic +Differentiable Platform for Molecular Force Field +Development and Molecular Dynamics +Simulation. ChemRxiv. Cambridge: Cambridge Open Engage; 2022; This content is a preprint and has not been peer-reviewed.` + ## User Guide + [1. Introduction](docs/user_guide/introduction.md) @@ -18,9 +29,20 @@ The behavior of organic molecular systems (e.g., protein folding, polymer struct + [3. Coding conventions](docs/dev_guide/convention.md) + [4. Document writing](docs/dev_guide/write_docs.md) -## Modules -+ [1. ADMP](docs/modules/admp.md) +## Code Structure + +The code is organized as follows: ++ `examples`: demos presented in Jupyter Notebook. ++ `docs`: documentation. ++ `package`: files for constructing packages or images, such as conda recipe and docker files. ++ `tests`: unit tests. ++ `dmff`: DMFF python codes ++ `dmff/admp`: source code of automatic differentiable multipolar polarizable (ADMP) force field module. ++ `dmff/classical`: source code of classical force field module. ++ `dmff/common`: source code of common functions, such as neighbor list. ++ `dmff/generators`: source code of force generators. ++ `dmff/sgnn`: source of subgragh neural network force field model. ## Support and Contribution diff --git a/dmff/api.py b/dmff/api.py index d2af46683..ddc0533fc 100644 --- a/dmff/api.py +++ b/dmff/api.py @@ -1,4 +1,5 @@ import linecache +from typing import Callable, Dict, Any import numpy as np import jax.numpy as jnp @@ -82,7 +83,7 @@ def totalPE(positions, box, pairs, params): class Hamiltonian(app.forcefield.ForceField): - def __init__(self, *xmlnames): + def __init__(self, *xmlnames, **kwargs): super().__init__(*xmlnames) self._pseudo_ff = app.ForceField(*xmlnames) # parse XML forcefields @@ -104,6 +105,9 @@ def __init__(self, *xmlnames): self.extractParameterTree() # hook generators to self._forces + # use noOmmSys to disable all traditional openmm system + if kwargs.get("noOmmSys", False): + self._forces = [] for jaxGen in self._jaxGenerators: self._forces.append(jaxGen) @@ -184,6 +188,29 @@ def createPotential(self, print(e) pass + # virtual site + try: + addVsiteFunc = generator.getAddVsiteFunc() + self.setAddVirtualSiteFunc(addVsiteFunc) + vsiteObj = generator.getVsiteObj() + self.setVirtualSiteObj(vsiteObj) + except AttributeError as e: + pass + + # covalent map + try: + cov_map = generator.covalent_map + self.setCovalentMap(cov_map) + except AttributeError as e: + pass + + # topology matrix (for BCC usage) + try: + top_mat = generator.getTopologyMatrix() + self.setTopologyMatrix(top_mat) + except AttributeError as e: + pass + return potObj def render(self, filename): @@ -201,4 +228,151 @@ def update_iter(node, ref): else: node[key] = ref[key] - update_iter(self.paramtree, paramtree) \ No newline at end of file + update_iter(self.paramtree, paramtree) + + def setCovalentMap(self, cov_map: jnp.ndarray): + self._cov_map = cov_map + + def getCovalentMap(self) -> jnp.ndarray: + """ + Get covalent map + """ + if hasattr(self, "_cov_map"): + return self._cov_map + else: + raise DMFFException("Covalent map is not set.") + + def getAddVirtualSiteFunc(self) -> Callable: + return self._add_vsite_coords + + def setAddVirtualSiteFunc(self, func: Callable): + self._add_vsite_coords = func + + def setVirtualSiteObj(self, vsite): + self._vsite = vsite + + def getVirtualSiteObj(self): + return self._vsite + + def setTopologyMatrix(self, top_mat): + self._top_mat = top_mat + + def getTopologyMatrix(self): + return self._top_mat + + def addVirtualSiteCoords(self, pos: jnp.ndarray, params: Dict[str, Any]) -> jnp.ndarray: + """ + Add coordinates for virtual sites + + Parameters + ---------- + pos: jnp.ndarray + Coordinates without virtual sites + params: dict + Paramtree of hamiltonian, i.e. `dmff.Hamiltonian.paramtree` + + Return + ------ + newpos: jnp.ndarray + + Examples + -------- + >>> import jax.numpy as jnp + >>> import openmm.app as app + >>> from rdkit import Chem + >>> from dmff import Hamiltonian + >>> pdb = app.PDBFile("tests/data/chlorobenzene.pdb") + >>> pos = jnp.array(pdb.getPositions(asNumpy=True)._value) + >>> mol = Chem.MolFromMolFile("tests/data/chlorobenzene.mol", removeHs=False) + >>> h = Hamiltonian("tests/data/cholorobenzene_vsite.xml") + >>> potObj = h.createPotential(pdb.topology, rdmol=mol) + >>> newpos = h.addVirtualSiteCoords(pos, h.paramtree) + + """ + func = self.getAddVirtualSiteFunc() + newpos = func(pos, params) + return newpos + + def addVirtualSiteToMol(self, rdmol, params): + """ + Add coordinates for rdkit.Chem.Mol object + + Parameters + ---------- + rdmol: rdkit.Chem.Mol + Mol object to which virtual sites are added + params: dict + Paramtree of hamiltonian, i.e. `dmff.Hamiltonian.paramtree` + + Return + ------ + newmol: rdkit.Chem.Mol + Mol object with virtual sites added + + Examples + -------- + >>> import jax.numpy as jnp + >>> import openmm.app as app + >>> from rdkit import Chem + >>> from dmff import Hamiltonian + >>> pdb = app.PDBFile("tests/data/chlorobenzene.pdb") + >>> mol = Chem.MolFromMolFile("tests/data/chlorobenzene.mol", removeHs=False) + >>> h = Hamiltonian("tests/data/cholorobenzene_vsite.xml") + >>> potObj = h.createPotential(pdb.topology, rdmol=mol) + >>> newmol = h.addVirtualSiteToMol(mol, h.paramtree) + """ + vsiteObj = self.getVirtualSiteObj() + newmol = vsiteObj.addVirtualSiteToMol( + rdmol, + params['NonbondedForce']['vsite_types'], + params['NonbondedForce']['vsite_distances'] + ) + return newmol + + @staticmethod + def buildTopologyFromMol(rdmol, resname: str = "MOL") -> app.Topology: + """ + Build openmm.app.Topology from rdkit.Chem.Mol Object + + Parameters + ---------- + rdmol: rdkit.Chem.Mol + Mol object + resname: str + Name of the added residue, default "MOL" + + Return + ------ + top: `openmm.app.Topology` + Topology built based on the input rdkit Mol object + """ + from rdkit import Chem + + top = app.Topology() + chain = top.addChain(0) + res = top.addResidue(resname, chain, "1", "") + + atCount = {} + addedAtoms = [] + for idx, atom in enumerate(rdmol.GetAtoms()): + symb = atom.GetSymbol().upper() + atCount.update({symb: atCount.get(symb, 0) + 1}) + ele = app.Element.getBySymbol(symb) + atName = f'{symb}{atCount[symb]}' + + addedAtom = top.addAtom(atName, ele, res, str(idx+1)) + addedAtoms.append(addedAtom) + + bondTypeMap = { + Chem.rdchem.BondType.SINGLE: app.Single, + Chem.rdchem.BondType.DOUBLE: app.Double, + Chem.rdchem.BondType.TRIPLE: app.Triple, + Chem.rdchem.BondType.AROMATIC: app.Aromatic + } + for bond in rdmol.GetBonds(): + top.addBond( + addedAtoms[bond.GetBeginAtomIdx()], + addedAtoms[bond.GetEndAtomIdx()], + type=bondTypeMap.get(bond.GetBondType(), None) + ) + return top \ No newline at end of file diff --git a/dmff/classical/inter.py b/dmff/classical/inter.py index b33f2f1b8..8438b9ee5 100644 --- a/dmff/classical/inter.py +++ b/dmff/classical/inter.py @@ -1,4 +1,4 @@ -from typing import Iterable, Tuple +from typing import Iterable, Tuple, Optional import jax.numpy as jnp import numpy as np @@ -130,10 +130,11 @@ def get_energy(box, epsilon, sigma, epsfix, sigfix): class CoulNoCutoffForce: # E=\frac{{q}_{1}{q}_{2}}{4\pi\epsilon_0\epsilon_1 r} - def __init__(self, map_prm, epsilon_1=1.0) -> None: + def __init__(self, map_prm, epsilon_1=1.0, topology_matrix=None) -> None: self.eps_1 = epsilon_1 self.map_prm = map_prm + self.top_mat = topology_matrix def generate_get_energy(self): def get_coul_energy(dr_vec, chrgprod, box): @@ -145,7 +146,6 @@ def get_coul_energy(dr_vec, chrgprod, box): return E def get_energy(positions, box, pairs, charges, mscales): - pairs = pairs.at[:, :2].set(regularize_pairs(pairs[:, :2])) mask = pair_buffer_scales(pairs[:, :2]) map_prm = jnp.array(self.map_prm) @@ -163,9 +163,16 @@ def get_energy(positions, box, pairs, charges, mscales): E_inter = get_coul_energy(dr_vec, chrgprod_scale, box) - return jnp.sum(E_inter * mask) - - return get_energy + return jnp.sum(E_inter * mask) + + def get_energy_bcc(positions, box, pairs, pre_charges, bcc, mscales): + charges = pre_charges + jnp.dot(self.top_mat, bcc).flatten() + return get_energy(positions, box, pairs, charges, mscales) + + if self.top_mat is None: + return get_energy + else: + return get_energy_bcc class CoulReactionFieldForce: @@ -177,6 +184,7 @@ def __init__( epsilon_1=1.0, epsilon_solv=78.5, isPBC=True, + topology_matrix=None ) -> None: self.r_cut = r_cut @@ -186,6 +194,7 @@ def __init__( self.eps_1 = epsilon_1 self.map_prm = map_prm self.ifPBC = isPBC + self.top_mat = topology_matrix def generate_get_energy(self): def get_rf_energy(dr_vec, chrgprod, box): @@ -204,7 +213,6 @@ def get_rf_energy(dr_vec, chrgprod, box): return E def get_energy(positions, box, pairs, charges, mscales): - pairs = pairs.at[:, :2].set(regularize_pairs(pairs[:, :2])) mask = pair_buffer_scales(pairs[:, :2]) @@ -223,7 +231,14 @@ def get_energy(positions, box, pairs, charges, mscales): return jnp.sum(E_inter * mask) - return get_energy + def get_energy_bcc(positions, box, pairs, pre_charges, bcc, mscales): + charges = pre_charges + jnp.dot(self.top_mat, bcc).flatten() + return get_energy(positions, box, pairs, charges, mscales) + + if self.top_mat is None: + return get_energy + else: + return get_energy_bcc class CoulombPMEForce: @@ -235,6 +250,7 @@ def __init__( kappa: float, K: Tuple[int, int, int], pme_order: int = 6, + topology_matrix: Optional[jnp.array] = None, ): self.r_cut = r_cut self.map_prm = map_prm @@ -242,6 +258,7 @@ def __init__( self.kappa = kappa self.K1, self.K2, self.K3 = K[0], K[1], K[2] self.pme_order = pme_order + self.top_mat = topology_matrix assert pme_order == 6, "PME order other than 6 is not supported" def generate_get_energy(self): @@ -283,4 +300,11 @@ def get_energy(positions, box, pairs, charges, mscales): False, ) - return get_energy + def get_energy_bcc(positions, box, pairs, pre_charges, bcc, mscales): + charges = pre_charges + jnp.dot(self.top_mat, bcc).flatten() + return get_energy(positions, box, pairs, charges, mscales) + + if self.top_mat is None: + return get_energy + else: + return get_energy_bcc diff --git a/dmff/classical/vsite.py b/dmff/classical/vsite.py new file mode 100644 index 000000000..9da7e960b --- /dev/null +++ b/dmff/classical/vsite.py @@ -0,0 +1,109 @@ +from typing import Tuple, List, Dict, Callable, Optional +import numpy as np +import jax.numpy as jnp + + +class VirtualSite: + """ + Class for manipulation of virtual sites + """ + def __init__(self, matches_dict: Dict[Tuple[int], int]): + """ + Initialize a virtual site object + """ + self.num_vsites = len(matches_dict) + self.matches, self.indices = [], [] + for key, value in matches_dict.items(): + self.matches.append(key) + self.indices.append(value) + + def getAddVirtualSiteFunc(self) -> Callable: + """ + Get fuction to compute virtual site coordinates + """ + + def add_vsite_position(pos: jnp.ndarray, vtypes: jnp.ndarray, vdist: jnp.ndarray) -> jnp.ndarray: + newpos = jnp.zeros((pos.shape[0] + self.num_vsites, pos.shape[1])) + newpos = newpos.at[:pos.shape[0]].set(pos) + for i in range(self.num_vsites): + match = self.matches[i] + idx = self.indices[i] + if vtypes[idx] == 1: + vec = newpos[match[0]] - newpos[match[1]] + nvec = vec / jnp.linalg.norm(vec, ord=2) + newpos = newpos.at[pos.shape[0] + i].set(newpos[match[0]] + vdist[idx] * nvec) + elif vtypes[idx] == 2: + vec1 = newpos[match[0]] - newpos[match[1]] + vec2 = newpos[match[0]] - newpos[match[2]] + nvec1 = vec1 / jnp.linalg.norm(vec1, ord=2) + nvec2 = vec2 / jnp.linalg.norm(vec2, ord=2) + nvec = (nvec1 + nvec2) / jnp.linalg.norm(nvec1 + nvec2, ord=2) + newpos = newpos.at[pos.shape[0] + i].set(newpos[match[0]] + vdist[idx] * nvec) + return newpos + + return add_vsite_position + + def addVirtualSiteToMol(self, rdmol, vtypes=None, vdist=None): + """ + Add virtual site to rdkit.Chem.Mol object + + Parameters + ---------- + rdmol: rdkit.Chem.Mol + Mol object to which virtual sites are added + vtypes: jnp.ndarray or None + Virtual site types, can be obtained from `dmff.Hamiltonian.paramtree['vsite_types']` + vdist: jnp.ndarray or None + Virtual site distances params, can be obtained from `dmff.Hamiltonian.paramtree['vsite_distances']` + + Return + ------ + newmol: rdkit.Chem.Mol + Mol with virtual sites added + """ + if isinstance(vtypes, jnp.ndarray) and isinstance(vdist, jnp.ndarray): + func = self.getAddVirtualSiteFunc() + # convert between angstrom and nm + pos = jnp.array(rdmol.GetConformer(0).GetPositions()) / 10 + addCoords = func(pos, vtypes, vdist) * 10 + else: + addCoords = None + + newmol = self.add_dummy(rdmol, [m[0] for m in self.matches], addCoords) + return newmol + + @staticmethod + def add_dummy(mol, parentAtomIdx: List[int], addCoords: Optional[np.ndarray]): + """ + Add dummy atom to rdkit.Chem.Mol object and make a dummy bond between + the dummy atom and its parent atom + + Parameters + ---------- + mol: rdkit.Chem.Mol + Molecule to add dummy atom + parentAtomIdx: int + Index of the dummy atom's parent atom + addCoords: numpy.ndarray or None + Coordinates of the virtual sites. In unit of Angstrom + """ + from rdkit import Chem + ori_num_atoms = mol.GetNumAtoms() + rwmol = Chem.RWMol(mol) + + duIdxs = [] + for pidx in parentAtomIdx: + dummy = Chem.Atom(0) + duIdx = rwmol.AddAtom(dummy) + rwmol.AddBond(duIdx, pidx) + newmol = rwmol.GetMol() + duIdxs.append(duIdx) + + if addCoords is not None: + assert len(addCoords) == len(parentAtomIdx) + ori_num_atoms, f"Number of atoms in coordinates doesn't match" + conf = newmol.GetConformer() + for i, duIdx in enumerate(duIdxs): + conf.SetAtomPosition(duIdx, [float(x) for x in addCoords[duIdx]]) + + return newmol + diff --git a/dmff/fftree.py b/dmff/fftree.py index b68b9a882..e4b954f53 100644 --- a/dmff/fftree.py +++ b/dmff/fftree.py @@ -257,11 +257,13 @@ def write(self, path): class TypeMatcher: - def __init__(self, fftree: ForcefieldTree, parser): + def __init__(self, fftree: ForcefieldTree, parser: str): """ Freeze type matching list. """ # not convert to float for atom types + self.useSmirks = False + atypes = fftree.get_attribs("AtomTypes/Type", "name", convert_to_float=False) aclasses = fftree.get_attribs("AtomTypes/Type", "class", convert_to_float=False) self.class2type = {} @@ -288,8 +290,13 @@ def __init__(self, fftree: ForcefieldTree, parser): tmp.append((nit, self.class2type.get(node.attrs[key], [None]))) elif key == "class": tmp.append((1, self.class2type.get(node.attrs[key], [None]))) - tmp = sorted(tmp, key=lambda x: x[0]) - self.functions.append([i[1] for i in tmp]) + elif key == "smirks": + self.useSmirks = True + self.functions.append(node.attrs[key]) + + if not self.useSmirks: + tmp = sorted(tmp, key=lambda x: x[0]) + self.functions.append([i[1] for i in tmp]) def matchGeneral(self, types): matches = [] @@ -300,6 +307,82 @@ def matchGeneral(self, types): if len(matches) == 0: return False, False, -1 return matches[-1] + + def matchSmirks(self, rdmol): + """ + Match smirks + """ + from rdkit import Chem + + if rdmol is None: + raise DMFFException("No rdkit.Chem.Mol object is provided") + + matches_dict = {} + for idx, smk in enumerate(self.functions): + patt = Chem.MolFromSmarts(smk) + matches = rdmol.GetSubstructMatches(patt) + for match in matches: + if len(match) == 2: + canonical_match = (min(match), max(match)) + elif len(match) == 3: + canonical_match = (min([match[0], match[2]]), match[1], max([match[0], match[2]])) + elif len(match) == 4: + canonical_match = (match[3], match[2], match[1], match[0]) if match[2] < match[1] else match + elif len(match) == 1: + canonical_match = match + else: + raise DMFFException(f"Invalid SMIRKS: {smk}") + matches_dict.update({canonical_match: idx}) + + return matches_dict + + def matchSmirksNoSort(self, rdmol): + """ + Match smirks, but no sorting the matched atom indices + """ + from rdkit import Chem + + if rdmol is None: + raise DMFFException("No rdkit.Chem.Mol object is provided") + + matches_dict = {} + for idx, smk in enumerate(self.functions): + patt = Chem.MolFromSmarts(smk) + matches = rdmol.GetSubstructMatches(patt) + for match in matches: + matches_dict.update({match: idx}) + + return matches_dict + + def matchSmirksImproper(self, rdmol): + """ + Match smirks for improper torsions + """ + from rdkit import Chem + + if rdmol is None: + raise DMFFException("No rdkit.Chem.Mol object is provided") + + matches_dict = {} + for idx, smk in enumerate(self.functions): + patt = Chem.MolFromSmarts(smk) + matches = rdmol.GetSubstructMatches(patt) + hasWildcard = "*" in smk + for match in matches: + # Be the most consistent with AMBER, in which ordering is determined in this way + atnums = [rdmol.GetAtomWithIdx(i).GetAtomicNum() for i in match] + if hasWildcard: + if atnums[1] == atnums[2] and match[1] > match[2]: + canonical_match = (match[2], match[1], match[0], match[3]) + elif atnums[1] != 6 and (atnums[2] == 6 or atnums[1] < atnums[2]): + canonical_match = (match[2], match[1], match[0], match[3]) + else: + canonical_match = (match[1], match[2], match[0], match[3]) + else: + canonical_match = match + matches_dict.update({canonical_match: idx}) + + return matches_dict def _match(self, types, term): if len(types) != len(term): diff --git a/dmff/generators/classical.py b/dmff/generators/classical.py index b4584ba9a..cda71da45 100644 --- a/dmff/generators/classical.py +++ b/dmff/generators/classical.py @@ -1,5 +1,6 @@ from collections import defaultdict from typing import Dict +from copy import deepcopy import numpy as np import jax.numpy as jnp @@ -26,7 +27,8 @@ LennardJonesLongRangeFreeEnergyForce, CoulombPMEFreeEnergyForce ) -from dmff.admp.pme import setup_ewald_parameters +from dmff.classical.vsite import VirtualSite +from dmff.admp.pme import setup_ewald_parameters from dmff.utils import jit_condition, isinstance_jnp, DMFFException, findItemInList from dmff.fftree import ForcefieldTree, TypeMatcher from dmff.api import Hamiltonian, build_covalent_map @@ -77,24 +79,42 @@ def createForce(self, sys, data, nonbondedMethod, nonbondedCutoff, args): matcher = TypeMatcher(self.fftree, "HarmonicBondForce/Bond") map_atom1, map_atom2, map_param = [], [], [] - n_bonds = len(data.bonds) - # build map - for i in range(n_bonds): - idx1 = data.bonds[i].atom1 - idx2 = data.bonds[i].atom2 - type1 = data.atomType[data.atoms[idx1]] - type2 = data.atomType[data.atoms[idx2]] - ifFound, ifForward, nfunc = matcher.matchGeneral([type1, type2]) - if not ifFound: - raise BaseException( - f"No parameter for bond ({idx1},{type1}) - ({idx2},{type2})" - ) - map_atom1.append(idx1) - map_atom2.append(idx2) - map_param.append(nfunc) + + if not matcher.useSmirks: + n_bonds = len(data.bonds) + # build map + for i in range(n_bonds): + idx1 = data.bonds[i].atom1 + idx2 = data.bonds[i].atom2 + type1 = data.atomType[data.atoms[idx1]] + type2 = data.atomType[data.atoms[idx2]] + ifFound, ifForward, nfunc = matcher.matchGeneral([type1, type2]) + if not ifFound: + raise DMFFException( + f"No parameter for bond ({idx1},{type1}) - ({idx2},{type2})" + ) + map_atom1.append(idx1) + map_atom2.append(idx2) + map_param.append(nfunc) + else: + rdmol = args.get("rdmol", None) + matches_dict = matcher.matchSmirks(rdmol) + for bond in rdmol.GetBonds(): + beginAtomIdx = bond.GetBeginAtomIdx() + endAtomIdx = bond.GetEndAtomIdx() + query = (beginAtomIdx, endAtomIdx) if beginAtomIdx < endAtomIdx else (endAtomIdx, beginAtomIdx) + map_atom1.append(query[0]) + map_atom2.append(query[1]) + try: + map_param.append(matches_dict[query]) + except KeyError as e: + raise DMFFException( + f"No parameter for bond between Atom{beginAtomIdx} and Atom{endAtomIdx}" + ) + map_atom1 = np.array(map_atom1, dtype=int) map_atom2 = np.array(map_atom2, dtype=int) - map_param = np.array(map_param, dtype=int) + map_param = np.array(map_param, dtype=int) bforce = HarmonicBondJaxForce(map_atom1, map_atom2, map_param) @@ -137,25 +157,46 @@ def createForce(self, sys, data, nonbondedMethod, nonbondedCutoff, args): matcher = TypeMatcher(self.fftree, "HarmonicAngleForce/Angle") map_atom1, map_atom2, map_atom3, map_param = [], [], [], [] - n_angles = len(data.angles) - for nangle in range(n_angles): - idx1 = data.angles[nangle][0] - idx2 = data.angles[nangle][1] - idx3 = data.angles[nangle][2] - type1 = data.atomType[data.atoms[idx1]] - type2 = data.atomType[data.atoms[idx2]] - type3 = data.atomType[data.atoms[idx3]] - ifFound, ifForward, nfunc = matcher.matchGeneral( - [type1, type2, type3]) - if not ifFound: - print( - f"No parameter for angle ({idx1},{type1}) - ({idx2},{type2}) - ({idx3},{type3})" - ) - else: - map_atom1.append(idx1) - map_atom2.append(idx2) - map_atom3.append(idx3) - map_param.append(nfunc) + + if not matcher.useSmirks: + n_angles = len(data.angles) + for nangle in range(n_angles): + idx1 = data.angles[nangle][0] + idx2 = data.angles[nangle][1] + idx3 = data.angles[nangle][2] + type1 = data.atomType[data.atoms[idx1]] + type2 = data.atomType[data.atoms[idx2]] + type3 = data.atomType[data.atoms[idx3]] + ifFound, ifForward, nfunc = matcher.matchGeneral( + [type1, type2, type3]) + if not ifFound: + print( + f"No parameter for angle ({idx1},{type1}) - ({idx2},{type2}) - ({idx3},{type3})" + ) + else: + map_atom1.append(idx1) + map_atom2.append(idx2) + map_atom3.append(idx3) + map_param.append(nfunc) + else: + from rdkit import Chem + + rdmol = args.get("rdmol", None) + matches_dict = matcher.matchSmirks(rdmol) + angle_patt = Chem.MolFromSmarts("[*:1]~[*:2]~[*:3]") + angles = rdmol.GetSubstructMatches(angle_patt) + for angle in angles: + canonical_angle = (min([angle[0], angle[2]]), angle[1], max([angle[0], angle[2]])) + map_atom1.append(canonical_angle[0]) + map_atom2.append(canonical_angle[1]) + map_atom3.append(canonical_angle[2]) + try: + map_param.append(matches_dict[canonical_angle]) + except KeyError as e: + raise DMFFException( + f"No parameter for angle Atom{canonical_angle[0]}-Atom{canonical_angle[1]}-Atom{canonical_angle[2]}" + ) + map_atom1 = np.array(map_atom1, dtype=int) map_atom2 = np.array(map_atom2, dtype=int) map_atom3 = np.array(map_atom3, dtype=int) @@ -294,6 +335,10 @@ def overwrite(self): self.fftree.set_node("PeriodicTorsionForce/Improper", impr_data) def createForce(self, sys, data, nonbondedMethod, nonbondedCutoff, args): + """ + Create force for torsions + """ + # Proper Torsions proper_matcher = TypeMatcher(self.fftree, "PeriodicTorsionForce/Proper") map_prop_atom1 = {i: [] for i in range(1, self.max_pred_prop + 1)} @@ -302,24 +347,55 @@ def createForce(self, sys, data, nonbondedMethod, nonbondedCutoff, args): map_prop_atom4 = {i: [] for i in range(1, self.max_pred_prop + 1)} map_prop_param = {i: [] for i in range(1, self.max_pred_prop + 1)} n_matched_props = 0 - for torsion in data.propers: - types = [data.atomType[data.atoms[torsion[i]]] for i in range(4)] - ifFound, ifForward, nnode = proper_matcher.matchGeneral(types) - if not ifFound: - continue - # find terms for node - for periodicity in range(1, self.max_pred_prop + 1): - idx = findItemInList( - nnode, self.meta[f"prop_nodeidx"][f"{periodicity}"]) - if idx < 0: - continue - n_matched_props += 1 - map_prop_atom1[periodicity].append(torsion[0]) - map_prop_atom2[periodicity].append(torsion[1]) - map_prop_atom3[periodicity].append(torsion[2]) - map_prop_atom4[periodicity].append(torsion[3]) - map_prop_param[periodicity].append(idx) + if not proper_matcher.useSmirks: + for torsion in data.propers: + types = [data.atomType[data.atoms[torsion[i]]] for i in range(4)] + ifFound, ifForward, nnode = proper_matcher.matchGeneral(types) + if not ifFound: + continue + # find terms for node + for periodicity in range(1, self.max_pred_prop + 1): + idx = findItemInList( + nnode, self.meta[f"prop_nodeidx"][f"{periodicity}"]) + if idx < 0: + continue + n_matched_props += 1 + map_prop_atom1[periodicity].append(torsion[0]) + map_prop_atom2[periodicity].append(torsion[1]) + map_prop_atom3[periodicity].append(torsion[2]) + map_prop_atom4[periodicity].append(torsion[3]) + map_prop_param[periodicity].append(idx) + else: + from rdkit import Chem + + rdmol = args.get("rdmol", None) + proper_patt = Chem.MolFromSmarts("[*:1]~[*:2]-[*:3]~[*:4]") + propers = rdmol.GetSubstructMatches(proper_patt) + matches_dict = proper_matcher.matchSmirks(rdmol) + for match in propers: + torsion = (match[3], match[2], match[1], match[0]) if match[2] < match[1] else match + try: + nnode = matches_dict[torsion] + ifFound = True + n_matched_props += 1 + except KeyError: + ifFound = False + + if not ifFound: + continue + + for periodicity in range(1, self.max_pred_prop + 1): + idx = findItemInList(nnode, self.meta['prop_nodeidx'][f"{periodicity}"]) + if idx < 0: + continue + map_prop_atom1[periodicity].append(torsion[0]) + map_prop_atom2[periodicity].append(torsion[1]) + map_prop_atom3[periodicity].append(torsion[2]) + map_prop_atom4[periodicity].append(torsion[3]) + map_prop_param[periodicity].append(idx) + + # Improper Torsions impr_matcher = TypeMatcher(self.fftree, "PeriodicTorsionForce/Improper") try: @@ -327,47 +403,69 @@ def createForce(self, sys, data, nonbondedMethod, nonbondedCutoff, args): "ordering")[0] except KeyError as e: ordering = "default" + map_impr_atom1 = {i: [] for i in range(1, self.max_pred_impr + 1)} map_impr_atom2 = {i: [] for i in range(1, self.max_pred_impr + 1)} map_impr_atom3 = {i: [] for i in range(1, self.max_pred_impr + 1)} map_impr_atom4 = {i: [] for i in range(1, self.max_pred_impr + 1)} map_impr_param = {i: [] for i in range(1, self.max_pred_impr + 1)} n_matched_imprs = 0 - for impr in data.impropers: - match = impr_matcher.matchImproper(impr, data, ordering=ordering) - if match is not None: - (a1, a2, a3, a4, nnode) = match + + if not impr_matcher.useSmirks: + for impr in data.impropers: + match = impr_matcher.matchImproper(impr, data, ordering=ordering) + if match is not None: + (a1, a2, a3, a4, nnode) = match + n_matched_imprs += 1 + # find terms for node + for periodicity in range(1, self.max_pred_impr + 1): + idx = findItemInList( + nnode, self.meta[f"impr_nodeidx"][f"{periodicity}"]) + if idx < 0: + continue + if ordering == 'smirnoff': + # Add all torsions in trefoil + map_impr_atom1[periodicity].append(a1) + map_impr_atom2[periodicity].append(a2) + map_impr_atom3[periodicity].append(a3) + map_impr_atom4[periodicity].append(a4) + map_impr_param[periodicity].append(idx) + map_impr_atom1[periodicity].append(a1) + map_impr_atom2[periodicity].append(a3) + map_impr_atom3[periodicity].append(a4) + map_impr_atom4[periodicity].append(a2) + map_impr_param[periodicity].append(idx) + map_impr_atom1[periodicity].append(a1) + map_impr_atom2[periodicity].append(a4) + map_impr_atom3[periodicity].append(a2) + map_impr_atom4[periodicity].append(a3) + map_impr_param[periodicity].append(idx) + else: + map_impr_atom1[periodicity].append(a1) + map_impr_atom2[periodicity].append(a2) + map_impr_atom3[periodicity].append(a3) + map_impr_atom4[periodicity].append(a4) + map_impr_param[periodicity].append(idx) + else: + rdmol = args.get("rdmol", None) + + if rdmol is None: + raise DMFFException("No rdkit.Chem.Mol object is provided") + + matches_dict = impr_matcher.matchSmirksImproper(rdmol) + for torsion, nnode in matches_dict.items(): n_matched_imprs += 1 - # find terms for node - for periodicity in range(1, self.max_pred_impr + 1): - idx = findItemInList( - nnode, self.meta[f"impr_nodeidx"][f"{periodicity}"]) + for periodicity in range(1, self.max_pred_impr+ 1): + idx = findItemInList(nnode, self.meta['impr_nodeidx'][f"{periodicity}"]) if idx < 0: continue - if ordering == 'smirnoff': - # Add all torsions in trefoil - map_impr_atom1[periodicity].append(a1) - map_impr_atom2[periodicity].append(a2) - map_impr_atom3[periodicity].append(a3) - map_impr_atom4[periodicity].append(a4) - map_impr_param[periodicity].append(idx) - map_impr_atom1[periodicity].append(a1) - map_impr_atom2[periodicity].append(a3) - map_impr_atom3[periodicity].append(a4) - map_impr_atom4[periodicity].append(a2) - map_impr_param[periodicity].append(idx) - map_impr_atom1[periodicity].append(a1) - map_impr_atom2[periodicity].append(a4) - map_impr_atom3[periodicity].append(a2) - map_impr_atom4[periodicity].append(a3) - map_impr_param[periodicity].append(idx) - else: - map_impr_atom1[periodicity].append(a1) - map_impr_atom2[periodicity].append(a2) - map_impr_atom3[periodicity].append(a3) - map_impr_atom4[periodicity].append(a4) - map_impr_param[periodicity].append(idx) - + map_impr_atom1[periodicity].append(torsion[0]) + map_impr_atom2[periodicity].append(torsion[1]) + map_impr_atom3[periodicity].append(torsion[2]) + map_impr_atom4[periodicity].append(torsion[3]) + map_impr_param[periodicity].append(idx) + + # Sum proper and improper torsions props = [ PeriodicTorsionJaxForce(jnp.array(map_prop_atom1[p], dtype=int), jnp.array(map_prop_atom2[p], dtype=int), @@ -413,7 +511,7 @@ def getJaxPotential(self): class NonbondedJaxGenerator: - def __init__(self, ff): + def __init__(self, ff: Hamiltonian): self.name = "NonbondedForce" self.ff = ff self.fftree = ff.fftree @@ -427,6 +525,9 @@ def __init__(self, ff): self.ra2idx = {} self.idx2rai = {} + self.useBCC = False + self.useVsite = False + def extract(self): self.from_residue = self.fftree.get_attribs( "NonbondedForce/UseAttributeFromResidue", "name") @@ -438,6 +539,7 @@ def extract(self): for prm in self.from_force: vals = self.fftree.get_attribs("NonbondedForce/Atom", prm) self.paramtree[self.name][prm] = jnp.array(vals) + # Build per-atom array for from_residue residues = self.fftree.get_nodes("Residues/Residue") resvals = {k: [] for k in self.from_residue} @@ -454,6 +556,7 @@ def extract(self): resvals[prm].extend(atomval) for prm in self.from_residue: self.paramtree[self.name][prm] = jnp.array(resvals[prm]) + # Build coulomb14scale and lj14scale coulomb14scale, lj14scale = self.fftree.get_attribs( "NonbondedForce", ["coulomb14scale", "lj14scale"])[0] @@ -461,6 +564,18 @@ def extract(self): [coulomb14scale]) self.paramtree[self.name]["lj14scale"] = jnp.array([lj14scale]) + # Build BondChargeCorrection + bccs = self.fftree.get_attribs("NonbondedForce/BondChargeCorrection", "bcc") + self.paramtree[self.name]['bcc'] = jnp.array(bccs).reshape(-1, 1) + self.useBCC = len(bccs) > 0 + + # Build VirtualSite + vsite_types = self.fftree.get_attribs("NonbondedForce/VirtualSite", "vtype") + self.paramtree[self.name]['vsite_types'] = jnp.array(vsite_types, dtype=int) + vsite_distance = self.fftree.get_attribs("NonbondedForce/VirtualSite", "distance") + self.paramtree[self.name]['vsite_distances'] = jnp.array(vsite_distance) + self.useVsite = len(vsite_types) > 0 + def overwrite(self): # write coulomb14scale self.fftree.set_attrib("NonbondedForce", "coulomb14scale", @@ -486,9 +601,18 @@ def overwrite(self): [d for d in data if d[0] == resnode.attrs["name"]], key=lambda x: x[1]) resnode.set_attrib("Atom", prm, [t[2] for t in tmp]) - - def createForce(self, system, data, nonbondedMethod, nonbondedCutoff, - args): + + # write BCC + if self.useBCC: + self.fftree.set_attrib( + "NonbondedForce/BondChargeCorrection", "bcc", + self.paramtree[self.name]['bcc'] + ) + + def createForce(self, system, data, nonbondedMethod, nonbondedCutoff, args): + # Build Covalent Map + self.covalent_map = build_covalent_map(data, 6) + methodMap = { app.NoCutoff: "NoCutoff", app.CutoffPeriodic: "CutoffPeriodic", @@ -509,7 +633,6 @@ def createForce(self, system, data, nonbondedMethod, nonbondedCutoff, mscales_lj = mscales_lj.at[2].set( self.paramtree[self.name]["lj14scale"][0]) - # Coulomb: only support PME for now # set PBC if nonbondedMethod not in [app.NoCutoff, app.CutoffNonPeriodic]: ifPBC = True @@ -517,20 +640,73 @@ def createForce(self, system, data, nonbondedMethod, nonbondedCutoff, ifPBC = False nbmatcher = TypeMatcher(self.fftree, "NonbondedForce/Atom") - # load LJ from types - maps = {} - for prm in self.from_force: - maps[prm] = [] - for atom in data.atoms: - atype = data.atomType[atom] - ifFound, _, nnode = nbmatcher.matchGeneral([atype]) - if not ifFound: - raise DMFFException( - "AtomType of %s mismatched in NonbondedForce" % - (str(atom))) - maps[prm].append(nnode) - maps[prm] = jnp.array(maps[prm], dtype=int) + + rdmol = args.get("rdmol", None) + + if self.useVsite: + vsitematcher = TypeMatcher(self.fftree, "NonbondedForce/VirtualSite") + vsite_matches_dict = vsitematcher.matchSmirksNoSort(rdmol) + vsiteObj = VirtualSite(vsite_matches_dict) + + def addVsiteFunc(pos, params): + func = vsiteObj.getAddVirtualSiteFunc() + newpos = func(pos, params[self.name]['vsite_types'], params[self.name]['vsite_distances']) + return newpos + + self._addVsiteFunc = addVsiteFunc + rdmol = vsiteObj.addVirtualSiteToMol(rdmol) + self.vsiteObj = vsiteObj + + # expand covalent map + ori_dim = self.covalent_map.shape[0] + new_dim = ori_dim + len(vsite_matches_dict) + cov_map = np.zeros((new_dim, new_dim), dtype=int) + cov_map[:ori_dim, :ori_dim] += np.array(self.covalent_map, dtype=int) + + map_to_parents = np.arange(new_dim) + for i, match in enumerate(vsite_matches_dict.keys()): + map_to_parents[ori_dim + i] = match[0] + for i in range(len(vsite_matches_dict)): + parent_i = map_to_parents[ori_dim + i] + for j in range(new_dim): + parent_j = map_to_parents[j] + cov_map[ori_dim + i, j] = cov_map[parent_i, parent_j] + cov_map[j, ori_dim + i] = cov_map[parent_j, parent_i] + # keep diagonal 0 + cov_map[ori_dim + i, ori_dim + i] = 0 + # keep vsite and its parent atom 1 + cov_map[parent_i, ori_dim + i] = 1 + cov_map[ori_dim + i, parent_i] = 1 + self.covalent_map = jnp.array(cov_map) + + # Load Lennard-Jones parameters + maps = {} + if not nbmatcher.useSmirks: + for prm in self.from_force: + maps[prm] = [] + for atom in data.atoms: + atype = data.atomType[atom] + ifFound, _, nnode = nbmatcher.matchGeneral([atype]) + if not ifFound: + raise DMFFException( + "AtomType of %s mismatched in NonbondedForce" % + (str(atom))) + maps[prm].append(nnode) + maps[prm] = jnp.array(maps[prm], dtype=int) + else: + lj_matches_dict = nbmatcher.matchSmirks(rdmol) + for prm in self.from_force: + maps[prm] = [] + for i in range(rdmol.GetNumAtoms()): + try: + maps[prm].append(lj_matches_dict[(i,)]) + except KeyError as e: + raise DMFFException( + f"No parameter for atom {i}" + ) + maps[prm] = jnp.array(maps[prm], dtype=int) + for prm in self.from_residue: maps[prm] = [] for atom in data.atoms: @@ -538,10 +714,56 @@ def createForce(self, system, data, nonbondedMethod, nonbondedCutoff, aidx = data.atomTemplateIndexes[atom] resname, aname = templateName, atom.name maps[prm].append(self.ra2idx[(resname, aidx)]) + + # Virtual Site + if self.useVsite: + # expand charges + chg = jnp.zeros( + (len(self.paramtree[self.name]['charge']) + len(vsite_matches_dict),), + dtype=self.paramtree[self.name]['charge'].dtype + ) + self.paramtree[self.name]['charge'] = chg.at[:len(self.paramtree[self.name]['charge'])].set( + self.paramtree[self.name]['charge'] + ) + maps_chg = [int(x) for x in maps['charge']] + for i in range(len(vsite_matches_dict)): + maps_chg.append(len(maps['charge']) + i) + maps['charge'] = jnp.array(maps_chg, dtype=int) + + # BCC parameters + if self.useBCC: + bccmatcher = TypeMatcher(self.fftree, "NonbondedForce/BondChargeCorrection") + + if bccmatcher.useSmirks: + bcc_matches_dict = bccmatcher.matchSmirksNoSort(rdmol) + self.top_mat = np.zeros((rdmol.GetNumAtoms(), self.paramtree[self.name]['bcc'].shape[0])) + + for bond in rdmol.GetBonds(): + beginAtomIdx = bond.GetBeginAtomIdx() + endAtomIdx = bond.GetEndAtomIdx() + query1, query2 = (beginAtomIdx, endAtomIdx), (endAtomIdx, beginAtomIdx) + if query1 in bcc_matches_dict: + nnode = bcc_matches_dict[query1] + self.top_mat[query1[0], nnode] += 1 + self.top_mat[query1[1], nnode] -= 1 + elif query2 in bcc_matches_dict: + nnode = bcc_matches_dict[query2] + self.top_mat[query2[0], nnode] += 1 + self.top_mat[query2[1], nnode] -= 1 + else: + raise DMFFException( + f"No BCC parameter for bond between Atom{beginAtomIdx} and Atom{endAtomIdx}" + ) + else: + raise DMFFException( + "Only SMIRKS-based parametrization is supported for BCC" + ) + else: + self.top_mat = None + + # NBFIX map_nbfix = [] - map_nbfix = np.array(map_nbfix, dtype=int).reshape((-1, 2)) - - self.covalent_map = build_covalent_map(data, 6) + map_nbfix = jnp.array(map_nbfix, dtype=jnp.int32).reshape(-1, 2) if unit.is_quantity(nonbondedCutoff): r_cut = nonbondedCutoff.value_in_unit(unit.nanometer) @@ -569,7 +791,6 @@ def createForce(self, system, data, nonbondedMethod, nonbondedCutoff, self.coeff_method) map_lj = jnp.array(maps["sigma"]) - map_nbfix = jnp.array(map_nbfix) map_charge = jnp.array(maps["charge"]) # Free Energy Settings # @@ -664,15 +885,17 @@ def createForce(self, system, data, nonbondedMethod, nonbondedCutoff, # use Reaction Field coulforce = CoulReactionFieldForce(r_cut, map_charge, - isPBC=ifPBC) + isPBC=ifPBC, + topology_matrix=self.top_mat) if nonbondedMethod is app.NoCutoff: # use NoCutoff - coulforce = CoulNoCutoffForce(map_charge) + coulforce = CoulNoCutoffForce(map_charge, topology_matrix=self.top_mat) else: coulforce = CoulombPMEForce(r_cut, map_charge, kappa, - (K1, K2, K3)) + (K1, K2, K3), topology_matrix=self.top_mat) else: assert nonbondedMethod is app.PME, "Only PME is supported in free energy calculations" + assert not self.useBCC, "BCC usage in free energy calculations is not supported yet" coulforce = CoulombPMEFreeEnergyForce(r_cut, map_charge, kappa, (K1, K2, K3), @@ -699,8 +922,13 @@ def potential_fn(positions, box, pairs, params): params[self.name]["sigma"], params[self.name]["epsfix"], params[self.name]["sigfix"], mscales_lj) - coulE = coulenergy(positions, box, pairs, - params[self.name]["charge"], mscales_coul) + + if not self.useBCC: + coulE = coulenergy(positions, box, pairs, + params[self.name]["charge"], mscales_coul) + else: + coulE = coulenergy(positions, box, pairs, + params[self.name]["charge"], params[self.name]["bcc"], mscales_coul) if useDispersionCorrection: ljDispEnergy = ljDispEnergyFn(box, @@ -745,6 +973,26 @@ def potential_fn(positions, box, pairs, params, vdwLambda, def getJaxPotential(self): return self._jaxPotential + def getAddVsiteFunc(self): + """ + Get function to add coordinates for virtual sites + """ + return self._addVsiteFunc + + def getVsiteObj(self): + """ + Get `dmff.classical.vsite.VirtualSite` object + """ + if self.useVsite: + return self.vsiteObj + else: + return None + + def getTopologyMatrix(self): + """ + Get topology Matrix + """ + return self.top_mat dmff.api.jaxGenerators["NonbondedForce"] = NonbondedJaxGenerator diff --git a/docs/assets/arch.svg b/docs/assets/arch.svg new file mode 100644 index 000000000..f033f9175 --- /dev/null +++ b/docs/assets/arch.svg @@ -0,0 +1,121 @@ + + + + + + + + + + + + + + + + + +ForceField.xml + + +d +mff.Hamiltonian + + +o +penmm.Topology + + + +Force Generators + + + +createPotential + + + +Differentiable +Parameters + + +Init +Calculators + + + + + +potential_fn + + +Positions + + +Pairs + + + + +Input + + + +grad_potential_fn + +j +ax.grad + + + + + + +Forces + + +Virial + + +dVdl + + +Param Grad + + +Energy + +Frontend +Backend + + +Bond + + +Angle + + +Torsion + + +vdW + + +Coulomb + + +MPID +- +like + + + +dmff.NeighborList + + + + +Box + + + diff --git a/docs/assets/clpy.png b/docs/assets/clpy.png new file mode 100644 index 000000000..4a2abebc5 Binary files /dev/null and b/docs/assets/clpy.png differ diff --git a/docs/assets/smirks.png b/docs/assets/smirks.png new file mode 100644 index 000000000..c79969bff Binary files /dev/null and b/docs/assets/smirks.png differ diff --git a/docs/assets/vsite.png b/docs/assets/vsite.png new file mode 100644 index 000000000..ca4874217 Binary files /dev/null and b/docs/assets/vsite.png differ diff --git a/docs/dev_guide/arch.md b/docs/dev_guide/arch.md index 4b1aa8c62..d31543c90 100644 --- a/docs/dev_guide/arch.md +++ b/docs/dev_guide/arch.md @@ -1,6 +1,6 @@ # 2. Software architecture -![arch](../assets/arch.png) +![arch](../assets/arch.svg) The overall architechture of DMFF can be divided into two parts: 1. parser & typing and 2. calculators. We usually refer to the former as the *frontend* and the latter as the *backend* for ease of description. diff --git a/docs/gen_ref_pages.py b/docs/gen_ref_pages.py index 77fbb0a3c..ff00dc02c 100644 --- a/docs/gen_ref_pages.py +++ b/docs/gen_ref_pages.py @@ -7,6 +7,9 @@ nav = mkdocs_gen_files.Nav() for path in sorted(Path("dmff").rglob("*.py")): # + + if path.name.startswith("_"): + continue module_path = path.relative_to('dmff').with_suffix("") # diff --git a/docs/index.md b/docs/index.md index 5406747fb..d66fa2f09 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,25 +1,47 @@ # DMFF +[![doi:10.26434/chemrxiv-2022-2c7gv](https://img.shields.io/badge/DOI-10.26434%2Fchemrxiv--2022--2c7gv-blue)](https://doi.org/10.26434/chemrxiv-2022-2c7gv) + +## About DMFF + **DMFF** (**D**ifferentiable **M**olecular **F**orce **F**ield) is a Jax-based python package that provides a full differentiable implementation of molecular force field models. This project aims to establish an extensible codebase to minimize the efforts in force field parameterization, and to ease the force and virial tensor evaluations for advanced complicated potentials (e.g., polarizable models with geometry-dependent atomic parameters). Currently, this project mainly focuses on the molecular systems such as: water, biological macromolecules (peptides, proteins, nucleic acids), organic polymers, and small organic molecules (organic electrolyte, drug-like molecules) etc. We support both the conventional point charge models (OPLS and AMBER like) and multipolar polarizable models (AMOEBA and MPID like). The entire project is backed by the XLA technique in JAX, thus can be "jitted" and run in GPU devices much more efficiently compared to normal python codes. The behavior of organic molecular systems (e.g., protein folding, polymer structure, etc.) is often determined by a complex effect of many different types of interactions. The existing organic molecular force fields are mainly empirically fitted and their performance relies heavily on error cancellation. Therefore, the transferability and the prediction power of these force fields are insufficient. For new molecules, the parameter fitting process requires essential manual intervention and can be quite cumbersome. In order to automate the parametrization process and increase the robustness of the model, it is necessary to apply modern AI techniques in conventional force field development. This project serves for this purpose by utilizing the automatic differentiable programming technique to develop a codebase, which allows a more convenient incorporation of modern AI optimization techniques. It also helps the realization of many exciting functions including (but not limited to): hybrid machine learning/force field models and parameter optimization based on trajectory. +### License and credits + +The project DMFF is licensed under [GNU LGPL v3.0](LICENSE). If you use this code in any future publications, please cite this using `Wang X, Li J, Yang L, Chen F, Wang Y, Chang J, et al. DMFF: An Open-Source Automatic +Differentiable Platform for Molecular Force Field +Development and Molecular Dynamics +Simulation. ChemRxiv. Cambridge: Cambridge Open Engage; 2022; This content is a preprint and has not been peer-reviewed. + ## User Guide + [1. Introduction](user_guide/introduction.md) + [2. Installation](user_guide/installation.md) -+ [3. Compute energy and forces](user_guide/compute.md) -+ [4. Compute gradients with auto differentiable framework](user_guide/auto_diff.md) -+ [5. Theories](user_guide/theory.md) -+ [6. Introduction to force field xml files](user_guide/xml_spec.md) ++ [3. Basic usage](user_guide/usage.md) ++ [4. XML format force field](user_guide/xml_spec.md) ++ [5. Theory](user_guide/theory.md) ## Developer Guide + [1. Introduction](dev_guide/introduction.md) -+ [2. Architecture](dev_guide/arch.md) -+ [3. Convention](dev_guide/convention.md) - -## Modules -+ [1. ADMP](modules/admp.md) ++ [2. Software architecture](dev_guide/arch.md) ++ [3. Coding conventions](dev_guide/convention.md) ++ [4. Document writing](dev_guide/write_docs.md) + +## Code Structure +The code is organized as follows: + ++ `examples`: demos presented in Jupyter Notebook. ++ `docs`: documentation. ++ `package`: files for constructing packages or images, such as conda recipe and docker files. ++ `tests`: unit tests. ++ `dmff`: DMFF python codes ++ `dmff/admp`: source code of automatic differentiable multipolar polarizable (ADMP) force field module. ++ `dmff/classical`: source code of classical force field module. ++ `dmff/common`: source code of common functions, such as neighbor list. ++ `dmff/generators`: source code of force generators. ++ `dmff/sgnn`: source of subgragh neural network force field model. ## Support and Contribution diff --git a/docs/user_guide/installation.md b/docs/user_guide/installation.md index d4b3c4687..d4cd73550 100644 --- a/docs/user_guide/installation.md +++ b/docs/user_guide/installation.md @@ -17,6 +17,10 @@ pip install jax-md==0.2.0 ```bash conda install -c conda-forge openmm==7.7.0 ``` ++ Install [RDKit](https://www.rdkit.org/) (required for SMIRKS-based parametrization): +```bash +conda install -c conda-forge rdkit +``` ## 2.2 Install DMFF from Source Code One can download the DMFF source code from github: ```bash diff --git a/docs/user_guide/theory.md b/docs/user_guide/theory.md index 720476852..34e4928b7 100644 --- a/docs/user_guide/theory.md +++ b/docs/user_guide/theory.md @@ -10,7 +10,7 @@ DMFF project aims to implement organic molecular force fields using a differenti All interations involved in DMFF are briefly introduced below and the users are encouraged to read the references for more mathematical details: -## Electrostatic Interaction +## 5.1 Electrostatic Interaction The electrostatic interaction between two atoms can be described using multipole expansion, in which the electron cloud of an atom can be expanded as a series of multipole moments including charges, dipoles, quadrupoles, and octupoles etc. If only the charges (zero-moment) are considered, it is reduced to the point charge model in classical force fields: @@ -33,7 +33,7 @@ $$0, 10, 1c, 1s, 20, 21c, 21s, 22c, 22s, ...$$ The $T_{tu}^{AB}$ represents the interaction tensor between multipoles. The mathematical expression of these tensors can be found in the appendix F of Ref 1. The user can also find the conversion rule between different representations in Ref 1 & 5. -## Coordinate System for Multipoles +## 5.2 Coordinate System for Multipoles Different to charges, the definition of multipole moments depends on the coordinate system. The exact value of the moment tensor will be rotated in accord to different coordinate systems. There are three types of frames involved in DMFF, each used in a different scenario: @@ -44,7 +44,7 @@ Different to charges, the definition of multipole moments depends on the coordin - Quasi internal frame, aka. QI frame: this frame is defined for each pair of interaction sites, in which the z-axis is pointing from one site to another. In this frame, the real-space interaction tensor ($T_{tu}^{AB}$) can be greatly simplified due to symmetry. We thus use this frame in the real space calculation of PME. -## Polarization Interaction +## 5.3 Polarization Interaction DMFF supports polarizable force fields, in which the dipole moment of the atom can respond to the change of the external electric field. In practice, each atom has not only permanent multipoles $Q_t$, but also induced dipoles $U_{ind}$. The induced dipole-induced dipole and induced dipole-permanent multipole interactions needs to be damped at short-range to avoid polarization catastrophe. In DMFF, we use the Thole damping scheme identical to MPID (ref 6), which introduces a damping width ($a_i$) for each atom $i$. The damping function is then computed and applied to the corresponding interaction tensor. Taking $U_{ind}$-permanent charge interaction as an example, the definition of damping function is: @@ -76,7 +76,7 @@ where the off-diagonal term of $K$ matrix is induced-induced dipole interaction, In the current version, we temporarily assume that the polarizability is spherically symmetric, thus the polarizability $\alpha_i$ is a scalar, not a tensor. **Thus the inputs (`polarizabilityXX, polarizabilityYY, polarizabilityZZ`) in the xml API is averaged internally**. In future, it is relatively simple to relax this restriction: simply change the reciprocal of the polarizability to the inverse of the matrix when calculating the diagonal terms of the $K$ matrix. -## Dispersion Interaction +## 5.4 Dispersion Interaction In ADMP, we assume that the following expansion is used for the long-range dispersion interaction: @@ -96,7 +96,7 @@ In ADMP, this long-range dispersion is computed using PME (*vida infra*), just a In the classical module, dispersions are treated as short-range interactions using standard cutoff scheme. -## Long-Range Interaction with PME +## 5.5 Long-Range Interaction with PME The long-range potential includes electrostatic, polarization, and dispersion (in ADMP) interactions. Taking charge-charge interaction as example, the interaction decays in the form of $O(\frac{1}{r})$, and its energy does not converge with the increase of cutoff distance. The multipole electrostatics and dispersion interactions also converge slow with respect to cutoff distance. We therefore use Particle Meshed Ewald(PME) method to calculate these interactions. @@ -141,7 +141,7 @@ where the user needs to specify the cutoff distance $r_c$ when building the neig In the current version, the dispersion PME calculator uses the same parameters as in electrostatic PME. -## Short-Range Interaction +## 5.6 Short-Range Interaction Short-range pair interaction refers to all interactions with the following form: @@ -161,13 +161,14 @@ v(r)=\frac{C^{12}}{r^{12}} $$ - Tang-Tonnies Damping: damping function for short-range electrostatic and dispersion energies. + $$ - f_n(r, \beta) = 1 - e^{-\beta r}\sum_{k=0}^n {\frac{(\beta r)^k}{k!}} +f_n(r,\beta)=1-e^{-\beta r} \sum_{k=0}^{n}\frac{(\beta r)^k}{k!} $$ In ADMP, the user can define a pairwise kernel function $f(dr)=f(dr, m, a_i,a_j,b_i,b_j,\dots)$ ($a_i, b_i$ are atomic parameters), then use `generate_pairwise_interaction` to raise the kernel function into an energy calculator (see details in ADMP manual). -## Combination Rule +## 5.7 Combination Rule For most traditional force fields, pairwise parameters between interacting particles are determined by atomic parameters. This mathematical relationship is called the combination rule. For example, in the calculation of LJ potential, the following combination rule may be used: @@ -181,7 +182,7 @@ $$ In ADMP module, we do not make any assumptions about the specific mathematical forms of the combination rule and $v(r)$. Users need to write them in the definition of the pairwise kernel function. -## Neighbor List +## 5.8 Neighbor List All DMFF real space calculations depends on neighbor list (or "pair list" as we sometimes call in DMFF). Its purpose is to keep a record of all the "neighbors" within a certain distance of the central atom, thus avoiding to go over all pairs explicitly. @@ -189,7 +190,7 @@ In DMFF, we use external code ([jax-md](https://github.com/google/jax-md)) to bu Since the pair list only provides atom **id** information, it does not take part in the differentiation process, so it can be fed in as a normal numpy array (instead of a jax numpy array). -## Topological scaling +## 5.9 Topological scaling In order to avoid double-counting with the bonding term, we often need to scale the non-bonding interactions between two atoms that are topologically connected. The scaling factor depends on the topological distance between the two atoms. We define two atoms separated by one bond as "1-2" interaction, and those separated by two bonds as "1-3" interaction, and so on. For example, in the OPLS-AA force field, all "1-2" nonbonding interactions are turned off completely, while all "1-3" non-bonding interactions are scaled by 50%. DMFF supports such feature, and important variables related to topological scaling include: @@ -200,34 +201,36 @@ In order to avoid double-counting with the bonding term, we often need to scale - `pScales`/`dScales`: similar to `mScales`, but only related to polarizable calculations. They are scaling factors for induced-perm and induced-induced interactions, respectively. -## General Many-Body Interactions (such as ML force field) +## 5.10 General Many-Body Interactions - TODO: +(such as ML force field) TBA -## Bonding Interaction +## 5.11 Bonded Interaction Intramolecular bonding interactions refer to all interactions that depend on internal coordinates (IC), such as bonds, angles, and dihedrals, etc. - * Harmonic Bonding Terms ++ Harmonic Bonding Terms + The definition of the bonding term in DMFF is the same as in OpenMM. For each bond, we have: + $$ E=\frac{1}{2}k(x-x_0)^2 $$ - Note prefactor $1/2$ before the force constant. - * Harmonic Angle Terms - we have: ++ Harmonic Angle Terms + $$ E=\frac{1}{2} k\left(\theta-\theta_{0}\right)^{2} $$ - * Dihedral Terms ++ Dihedral Terms + 1. Proper dihedral 2. Improper dihedral - * Multi IC coupling term ++ Multi IC coupling term -## Typification +## 5.12 Typification Before energy calculation, atomic and IC parameters (such as charge, multipole moment, dispersion coefficient, polarizability, force constant of each bond and angle, etc.) need to be assigned first. @@ -237,7 +240,7 @@ In DMFF, the input parameters that need to be optimized are called **force field The design of the high-level DMFF API is based on the existing framework of OpenMM. DMFF needs to keep the derivation chain uninterrupted when dispatching the force field params into atomic params. Therefore, maintaining the basic design logic of OpenMM, we rewrite the typification part of OpenMM using Jax. Briefly speaking, OpenMM/DMFF requires the users to clearly define the type of each atom in each residue and the connection mode between atoms in residue templates. Then the residue templates are used to match the PDB file to typify the whole system. See the following [documents](../dev_guide/arch.MD) for details. -## References +## 5.13 References 1. [Anthony's book](https://oxford.universitypressscholarship.com/view/10.1093/acprof:oso/9780199672394.001.0001/acprof-9780199672394) 2. [The Multipolar Ewald paper in JCTC: J. Chem. Theory Comput. 2015, 11, 2, 436–450](https://pubs.acs.org/doi/abs/10.1021/ct5007983) diff --git a/docs/user_guide/usage.md b/docs/user_guide/usage.md index 8f6b00a82..4eeefbf34 100644 --- a/docs/user_guide/usage.md +++ b/docs/user_guide/usage.md @@ -1,4 +1,5 @@ # 3. Basic usage +This chapter will introduce some basic usage of DMFF. All scripts can be found in `examples/` directory in which Jupyter notebook-based demos are provided. ## 3.1 Compute energy DMFF uses OpenMM to parse input files, including coordinates file, topology specification file and force field parameter file. Then, the core class `Hamiltonian` inherited from `openmm.ForceField` will be initialized and the method `createPotential` will be called to create differentiable potential energy functions for different energy terms. Take parametrzing an organic moleclue with GAFF2 force field as an example: ```python @@ -81,7 +82,7 @@ force = -pos_grad_func(positions, box, pairs, params) ## 3.3 Compute parametric gradients Similarly, the derivative of energy with regard to force field parameters can also be computed easily. -``` +```python param_grad_func = jax.grad(nbfunc, argnums=-1) pgrad = param_grad_func(positions, box, pairs, params) print(pgrad["NonbondedForce"]["charge"]) @@ -103,3 +104,102 @@ print(pgrad["NonbondedForce"]["charge"]) 485.1427 512.1267 558.55896 560.4667 562.812 333.74194 ] ``` + +## 3.4 Parametrize molecules with SMIRKS-based force field +### 3.4.1 Background +Besides atom-typing based methods, DMFF also supports assigning force field parameters with [SMIRKS](https://www.daylight.com/dayhtml/doc/theory/theory.smirks.html). SMIRKS is an extenstion of [SMARTS](https://www.daylight.com/dayhtml/doc/theory/theory.smarts.html) language which allows users not only to specify chemical substructures with certain patterns, but also to numerically tag the matched atoms for assigning parameters. This approach avoid the duplicate atom-typing definition process, which enables new parameters to be easily introduced to existing force field parameters sets. [OpenFF](https://github.com/openforcefield/openff-toolkit)[[1-2]](#sminorff) series are examples of SMIRKS-based force fields for organic molecules. + +![smirks](../assets/smirks.png) +
Fig. 1 Illustration of matching chemical substructures with SMIRKS language. Adapted from [[1]](#sminorff).
+ +### 3.4.2 Parametrize molecules with SMIRKS by DMFF +The SMIRKS pattern matching is supported by RDKit package, which can be install with conda: +```bash +conda install rdkit -c conda-forge +``` +To begin with, we need a molecule encoded in `rdkit.Chem.Mol` object. As an example, we will load a N-methylacetamide molecule defined in `examples/smirks/C3H7NO.mol`: +```python +from rdkit import Chem +from dmff import Hamiltonian + +mol = Chem.MolFromMolFile("C3H7NO.mol", removeHs=False) # hydrogens must be preserved +``` +Then load force field parameters in xml format. Instuctions about how to write a SMIRKS-based force field XML file can be found in the [Chapter 4](xml_spec.md). + +```python +h_smk = Hamiltonian("C3H7NO.xml", noOmmSys=True) +``` +Note that the argument `noOmmSys` is set to `False` so that DMFF will not create an openmm system, as `openmm` does not support SMIRKS-based force field definitions. + +Build an openmm topology and parametrize the molecule to create differentiable potential energy functions: +```python +top = h_smk.buildTopologyFromMol(mol) +potObj = h_smk.createPotential(top, rdmol=mol) +``` +So far, we can utilize this `dmff.Potential` object to calculate energy and forces as we did in the previous sections. + +### 3.4.3 Bond Charge Correction (BCC) and Virtual Sites +Bond charge correction[[3-4]](#bcc) is an approach to obtain high-accuracy atomic partial charges (e.g. HF/6-31G* ESP-fit charges) by adopting corrections to low-accuracy atomic charges (e.g. AMI Mulliken charges). In order to ensure a zero of total correction values within a molecule, these correction parameters are usually defined based on bond types, which suggests that they can also be defined by SMIRKS patterns. + +Virtual sites are additional off-centered charged sites which are introduced to improve the desciption of electrostatic effects caused by sigma hole (halogen bond) or lone pairs. The positions of virtual sites are calculated directly by its parent atoms, not by integrating the equations of motion. This approach is well known for its application in TIP4P[[5]](#tip4p) and TIP5P[[6]](#opls4) water models, and it also proves to be useful in drug-like moelcular force fields like OPLS series[[7-8]](#opls3). Basically, the parameters to define a virtual site includes : where to add virtual sites, how the virtual sites' positions are determined and the charges. + +Not surprisingly, all these parameters can all be defined in SMIRKS pattern and as well as can be parsed with DMFF by adding terms in ``, such as: +```xml + + + + + + + + + +``` +In [Chapter 4](xml_spec.md), we will explain the meaning of these XML-format parameters in detail. Here, we will give a simple example to parametrize 2-chloropyridine with BCC parameters and virtual sites. + +As introduced above, we first load molecule and force field parameters. +```python +import jax.numpy as jnp +from rdkit import Chem +from dmff import Hamiltonian + +mol = Chem.MolFromMolFile("clpy.mol", removeHs=False) +h_vsite = Hamiltonian("clpy_vsite.xml", noOmmSys=True) +``` +Next, we build the dmff potential. We can see the BCC and virtual site parameters are successfully parsed. +```python +top = h_vsite.buildTopologyFromMol(mol) +potObj = h_vsite.createPotential(top, rdmol=mol) +``` + +Then we can add virtual sites to the molecule and obtain a new `rdkit.Chem.Mol` object. +```python +mol_vsite = h_vsite.addVirtualSiteToMol(rdmol, h_vsite.getParameters()) +Chem.MolToMolFile(mol_vsite, "clpy_vsite.mol") +``` +By dumping this molecule to mol file and visualize it, we can see that as expected, two virtual sites are added along the bond between aromatic carbon (arC) and chloroine and also along the bisector of the arC-N-arC angle. +![clpy](../assets/clpy.png) + +Alternatively, we can also add coordinates of virtual sites by taking atomic positions matrix as an input. +```python +pos = jnp.array(mol.GetConformer().GetPositions()) / 10 # convert angstrom to nm +pos_vsite = h_vsite.addVirtualSiteCoords(pos, h_vsite.getParameters()) +``` + +## 3.5 Reference +
+- [1] [Mobley, David L., et al. "Escaping atom types in force fields using direct chemical perception." Journal of chemical theory and computation 14.11 (2018): 6076-6092.](https://doi.org/10.1021/acs.jctc.8b00640) +
+- [2] [Qiu, Yudong, et al. "Development and Benchmarking of Open Force Field v1. 0.0—the Parsley Small-Molecule Force Field." Journal of Chemical Theory and Computation 17.10 (2021): 6262-6280.](https://doi.org/10.1021/acs.jctc.1c00571) +
+- [3] [Jakalian, Araz, et al. "Fast, efficient generation of high‐quality atomic charges. AM1‐BCC model: I. Method." Journal of computational chemistry 21.2 (2000): 132-146.](https://doi.org/10.1002/(SICI)1096-987X(20000130)21:2<132::AID-JCC5>3.0.CO;2-P) +
+- [4] [Jakalian, Araz, David B. Jack, and Christopher I. Bayly. "Fast, efficient generation of high‐quality atomic charges. AM1‐BCC model: II. Parameterization and validation." Journal of computational chemistry 23.16 (2002): 1623-1641.](https://doi.org/10.1002/jcc.10128) +
+- [5] [Jorgensen, William L., et al. "Comparison of simple potential functions for simulating liquid water." The Journal of chemical physics 79.2 (1983): 926-935.](https://doi.org/10.1063/1.445869) +
+- [6] [Mahoney, Michael W., and William L. Jorgensen. "A five-site model for liquid water and the reproduction of the density anomaly by rigid, nonpolarizable potential functions." The Journal of chemical physics 112.20 (2000): 8910-8922.](https://doi.org/10.1063/1.481505) +
+- [7] [Harder, Edward, et al. "OPLS3: a force field providing broad coverage of drug-like small molecules and proteins." Journal of chemical theory and computation 12.1 (2016): 281-296.](https://doi.org/10.1021/acs.jctc.5b00864) +
+- [8] [Lu, Chao, et al. "OPLS4: Improving force field accuracy on challenging regimes of chemical space." Journal of chemical theory and computation 17.7 (2021): 4291-4300.](https://doi.org/10.1021/acs.jctc.1c00302) \ No newline at end of file diff --git a/docs/user_guide/xml_spec.md b/docs/user_guide/xml_spec.md index 7386c6345..24f7aff09 100644 --- a/docs/user_guide/xml_spec.md +++ b/docs/user_guide/xml_spec.md @@ -1,22 +1,22 @@ -# 4. XML format force field +# 4. XML Format Force Field -The design of openmm force field file is quite modular and convenient to use. Unfortunately, only limited documentations are available right now to explain the details of the file format. Here, the format and the meaning of the OpenMM XML file are sorted in details in below. +DMFF utilizes XML format to describe force fields, which is compatitable with OpenMM. The design of such format is quite modular and convenient to use. Unfortunately, only limited documentations are available right now to explain the details of the file format. Here, the format and the meaning of the OpenMM XML file are sorted in details in below. Overall speaking, the typification process is composed by the following steps: 1. Build the residue topology (draw all bonds) according to the residue name. This is done by matching the residue template in the topology file. -2. Match the residue topology with the right parameter template. +2. Match the residue topology with corresponding parameter templates. 3. Use the information in the matched parameter template to define the potential function. More specifically, go over all forces (i.e., potential components), and for each force (e.g., the bond stretching potential), match all terms (e.g., find all bonds) and register these terms in the total potential. The files involved in this process are introduced below. -## Topology file +## 4.1 Topology File -Topology file is used to describe the bonding information of residues. Whenever a residue name is matched, the OpenMM topology module will add keys for atoms according to the information in the topology file. +Bonding information is necessary for DMFF to assign force field parameters correctly. Such information can be either provided by the PDB file (in `CONECT` keyword) or by the topology file in XML format. -An example of the residue topology XML file is as follows: +An example of the topology XML file of alanine is as follows: ```xml @@ -39,31 +39,24 @@ An example of the residue topology XML file is as follows: ``` -Where "-C" indicates an external bond between the "C" atom and the **previous** residue. During typification, the code will try to match all atoms in the topology template with the actual atoms in the real structure. Once a match is successful, the matched atom will be bonded accordingly. If the match fails, this template atom will be skipped. Therefore, the actual number of bonds in the matched structure can be less than the number of bonds defined in the template. +In the topology file, the chemical bonds are defined in residues and specified by the name of its consisting atoms. Note that here "-C" indicates an external bond between the "C" atom and the **previous** residue. The program (`openmm.topology` module) will first matching the residues in the provided structure with the residues defined in topology files by its residue name. Whenever a residue is matched, the program will then try to match all atoms in the topology template with the actual atoms in the real structure. Once a match is successful, the matched atom will be bonded accordingly. If the match fails, this template atom will be skipped. Therefore, the actual number of bonds in the matched structure can be less than the number of bonds defined in the template. It should also be noted that disulfide bonds are not registered in this step. The `openmm.topolgoy` will look for SG atoms in Cys that are not connected to HG, and connect SG atom pairs within 0.3 nm as disulfide bonds. -The XML file registration method is as follows: +For common molecules such as water, protein, DNA and RNA, the topology files has been constructed and loaded internally, so the users do not need to provide an external one. But for other arbitrary molecules, such as a drug-like organic molecule, an external topology file is required to provide bonding information. Otherwise, users must load pdb files with correct bonding information which is specified by `CONECT` keyword. + +An external topology XML file is registered as: ``` python -try: - import openmm.app as app -except: - import simtk.openmm.app as app +import openmm.app as app -app.Topology.loadBondDefinations("residues.xml") # register residue topology - -# Create topology and add atoms and residues to it, which is automatically performed when reading PDB -top = app.Topology() -... -top.createStandardBonds() # Connect keys according to template files +app.Topology.loadBondDefinations("topology.xml") # register residue topology ``` -After this process, the bonding topologies are constructed in the matched residue, but the force field parameters are not yet assigned. It should be noted that disulfide bonds are not registered in this step. The OpenMM topology class will look for SG atoms in Cys that are not connected to Hg, and connect SG atom pairs within 0.3nm as disulfide bonds. +It should be awared that after this process, the bonding topologies are constructed for the matched residues in the given structure, but the force field parameters are not yet assigned, which is discussed in the next section. -## Force field Parameter File -After all bonds are constructed using the topology file, the force field parameters will be assigned using the force field parameter file. +## 4.2 Force Field Parameter File -The force field parameter file is as follows: +The force field parameters are specified in a XML formatted force field parameter file. A sample force field file of water is given as follows: ``` xml @@ -93,25 +86,18 @@ The force field parameter file is as follows:
``` -This file can be further divided into the residue part and the force field part. +The force field parameter file must have one root element `` and can contain serveral children, which can be mainly divided into two parts: (1) residue part and (2) force field part. They are described in detail below. -### Residue Part -``` xml - - - - - - - - - - - - ... - -``` -The `` node of the residue part defines all the atoms involved in the residue and some paramemters per atom, which can be called by the force field part on demand. The `` node defines the bonding information of the residue. The information contained in this part is different from that in the topology file discussed above. Take ALA as an example, we usually have at least three states for ALA, N-end, C-end and in-chain. The corresponding parameter templates in the force field file are as follows: +### 4.2.1 Residue Part +The detailed description of this part can be found in the [OpenMM user guide](http://docs.openmm.org/latest/userguide/application/05_creating_ffs.html). Here we just provide a brief summary. + +The residue part refers to contents within the tag ``, in which each residue template is defined in tag ``. The residue template contain the following nodes: + ++ ``: specifies the name of the atom and its atom types, as well as some atom-level parameters (usually atomic ++ ``: specifies atom pairs that are bonded to each other. ++ ``: specified atoms that will be bonded to an atom of different residue, such as the amide bond between amino acid residues. + +It should be clairfied that the bonding information contained in `` tags may be different from that in the topology file. Take alanine (ALA) as an example, we usually have at least three states for ALA: at N-terminal, at C-terminal or inside chains. The corresponding parameter templates in the force field file are as follows: ``` xml @@ -189,10 +175,17 @@ The `` node of the residue part defines all the atoms involved in the resi ``` -In this example, the atom numbers and the bonding configurations of ALA, CALA and NALA are different. When matching each ALA, OpenMM will try to match CALA, NALA, and ALA separately. It will compare each parameter template with the topology of the residue, and select the one with the right number of atoms, element composition, and bonding configurations as the matched template. The parameter template contains atom type and class information, which are then used to assign force field parameters. +In this example, the atom numbers and the bonding configurations of ALA, CALA and NALA are different. When matching each ALA, OpenMM will try to match CALA, NALA, and ALA separately. It will compare each parameter template with the topology of the residue, and select the one with the exactly correct number of atoms, element composition, and bonding configurations as the matched template. The parameter template contains atom type and class information, which are then used to assign force field parameters. + + +### 4.2.2 Force Field Part +DMFF supports two scheme to assign force field parameters: atom typing scheme and SMIRKS-based scheme. The structure of force field file for these two schemes are generally similar but only has some slight different. -### Forcefield Part +#### 4.2.2.1 Atom-typing Scheme +The atom-typing scheme is the most widely adopted way to assign parameters. In this scheme, the types of atoms are first defined and the parameters of bonds/angles/dihedrals are defined and assigned according to atom types. In OpenMM, the force field parameters are constructed in this scheme and DMFF provides a full support of this. Again, a detailed description of how to write and interpret this kind of force field file can also be found in the [OpenMM user guide](http://docs.openmm.org/latest/userguide/application/05_creating_ffs.html). + +An example of TIP3P water parameters is provided: ```xml @@ -215,8 +208,90 @@ In this example, the atom numbers and the bonding configurations of ALA, CALA an ``` -The `` node defines all atom types. The `type` label of each atom in the residue part will match the `name` label of each child node of ``. For each atom type, it also defines a `class` tag for different matching scenarios. The `name` tag of different `` must be different, but the `class` tag can be the same. +The `` node contain all atom types, each of which is defined in a child node ``. The `type` label of each atom in the residue part (i.e. `Residues/Residue/Atom`) will match the `name` label of `` . For each atom type, it also has a `class` tag for different a matching scenario, which is designed for situations where there are many rather similar atom types that can be grouped together for simplification. For example, in small molecular force fields (e.g. GAFF), the number of Lennard Jones (LJ) parameters are much more smaller than that of valence force parameters, such as dihedrals. We may need different atom types in order to specify these valence terms, but we can group the atom types with the same LJ parameters in to a certain "atom class" and this will simplify the definition of force field. Thus, be aware that the `name` tag of different `` nodes must be different, but the `class` tag can be the same. + +The `<*Force>` nodes defines parameters as well as the matching rule of different types of interactions. For example, `` defines harmonic bond, and the `` node defines intermolecular interaction. In each kind of force, there are many children nodes and each of them define a specific type. In each type, several tags with name beginning with `type` or `class` define its consisting atoms, and also other necessary parameters to build the interaction. For instance, for ``, there are many `` nodes defining different bond types and in each node, `class1` and `class2` tags specify its consisting two atom classes (or `type1` and `type2` specify atom types) and `k`, `length` define its force constant (in kJ/mol/nm2) and equilibrium bond length (in nm) respcetively. + +Again, for more information about each force, one can refer to the [OpenMM user guide](http://docs.openmm.org/latest/userguide/application/05_creating_ffs.html#writing-the-xml-file). + +#### 4.2.2.2 SMIRKS-based Scheme + +DMFF also supports assigning force field parameters with [SMIRKS](https://www.daylight.com/dayhtml/doc/theory/theory.smirks.html). SMIRKS is an extenstion of [SMARTS](https://www.daylight.com/dayhtml/doc/theory/theory.smarts.html) language which allows users not only to specify chemical substructures with certain patterns, but also to numerically tag the matched atoms for assigning parameters. + +An exmple force field based on SMIRKS for ethane molecule is as following: + +```xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +``` + +The convention of this scheme in XML formatted force field is very similar with that of atom typing scheme, only replacing the `type` (or `class`) that specifying atom types with the tag `smirks`. Note that the `` and `` nodes are also requires as DMFF needs them to search for atomic charge or element type, but the name of atomtypes are not used in matching interaction types. + +In DMFF, SMIRKS patterns are matched sequentially, which means that during building each type of interaction, the program will try to match all the patterns defined in that force and assign parameters with the last matched pattern. Hence, users need to put patterns with more specificity behind the pattern with less specificty in order to assign the exact correct parameters. For example, in the example above, there are two similar smirks patterns in harmonic bonds: `"[#6:1]-[#6:1]"` and `"[#6X4:1]-[#6X4:1]"`. The program will finally use the latter one for ethane molecule, which is a more specifically-defined parameter than the former one. + +SMIRKS-based scheme has two additional features that is not supported by atom typing scheme, bond charge corrections (BCC) and virtual site. They are defined with `` and `` nodes respectively under ``. + +In ``, a pattern matching two atoms is defined with `smirks` tag, and the partial charge of the first atom will be added with the correction value defined in tag `bcc` while the second atoms' charge will substract that value accordingly. + +In ``, three tags are required: `smirks`, `vtype` and `distance`. The tag `smirks` specify the substructure pattern that virtual site is to be added, with the first atom as the parent atom of the virtual site. The tag `vtype` defines the type of the virtual site. Currently only two types are supported, which are along a bond (`"1"`) and along the bisector of an angle (`"2"`). The tag `distance` defines the distance between the virtual site and its parent atom, in nm. + +The charge of the virtual site can only be specified by bond charge correction now. So the user needs to define corresponding BCC terms that can match atom with atomic number 0 (i.e. `"[#0]"`). + +Here is an example of several virtual sites definitions and a graphical illustration: +```xml + + + +``` -The `<*Force>` node defines the matching rule of a potential function. For example, `` defines harmonic bond, and the `` node defines intermolecular interaction. For more information about each force, the readers are referred to this document: [details](http://docs.openmm.org/latest/userguide/application/05_creating_ffs.html#writing-the-xml-file). +![vsite](../assets/vsite.png) -In the matching process, OpenMM will iterate all atoms, bonds, angles, dihedrals and impropers, and add all matched entries to the total potential function. Matching can be carried out according to the `type` tag, corresponding to the `name` of each atom defined in ``; It can also be based on the `class` tag, corresponding to the `class` of each atom in ``. This design is applicable to the situation when there are many types of atoms but they are roughly the same. For example, there are few kinds of LJ parameters in small molecular force field, but there are many kinds of intramolecular force parameters. We can even create a separate type for a specific small molecule to define the intra molecular interaction, but it belongs to the same class on LJ, so as to achieve the effect that the small molecule parameters can be tuned and do not affect each other. diff --git a/examples/smirks/C3H7NO.mol b/examples/smirks/C3H7NO.mol new file mode 100644 index 000000000..ba04b279b --- /dev/null +++ b/examples/smirks/C3H7NO.mol @@ -0,0 +1,28 @@ + + RDKit 3D + + 12 11 0 0 0 0 0 0 0 0999 V2000 + -1.8857 -0.0588 0.0187 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.4726 -0.4185 -0.3545 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.2138 -1.3729 -1.0799 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.4819 0.4035 0.2006 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1.8857 0.1345 0.0172 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.9470 0.9478 0.4417 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.5115 -0.0907 -0.8776 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.2561 -0.7782 0.7535 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2092 1.0317 0.9448 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4473 1.0406 0.2554 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0840 -0.1673 -1.0148 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.1785 -0.6717 0.6950 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 2 3 2 0 + 2 4 1 0 + 4 5 1 0 + 1 6 1 0 + 1 7 1 0 + 1 8 1 0 + 4 9 1 0 + 5 10 1 0 + 5 11 1 0 + 5 12 1 0 +M END diff --git a/examples/smirks/C3H7NO.xml b/examples/smirks/C3H7NO.xml new file mode 100644 index 000000000..942af040e --- /dev/null +++ b/examples/smirks/C3H7NO.xml @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/examples/smirks/clpy.mol b/examples/smirks/clpy.mol new file mode 100644 index 000000000..4d0b10a78 --- /dev/null +++ b/examples/smirks/clpy.mol @@ -0,0 +1,27 @@ + + RDKit 3D + + 11 11 0 0 0 0 0 0 0 0999 V2000 + 2.8623 -1.1338 -0.4470 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2590 -0.5725 -0.2105 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0072 0.7619 0.0614 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3166 1.1502 0.2425 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3245 0.1954 0.1448 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.9657 -1.1156 -0.1315 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3081 -1.5238 -0.3122 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1.8148 1.4810 0.1311 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5605 2.1868 0.4577 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3663 0.4655 0.2806 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7178 -1.8952 -0.2170 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 2 3 2 0 + 3 4 1 0 + 4 5 2 0 + 5 6 1 0 + 6 7 2 0 + 7 2 1 0 + 3 8 1 0 + 4 9 1 0 + 5 10 1 0 + 6 11 1 0 +M END diff --git a/examples/smirks/clpy_vsite.mol b/examples/smirks/clpy_vsite.mol new file mode 100644 index 000000000..02699f734 --- /dev/null +++ b/examples/smirks/clpy_vsite.mol @@ -0,0 +1,31 @@ + + RDKit 3D + + 13 13 0 0 0 0 0 0 0 0999 V2000 + 2.8623 -1.1338 -0.4470 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2590 -0.5725 -0.2105 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0072 0.7619 0.0614 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3166 1.1502 0.2425 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3245 0.1954 0.1448 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.9657 -1.1156 -0.1315 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3081 -1.5238 -0.3122 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1.8148 1.4810 0.1311 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5605 2.1868 0.4577 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3663 0.4655 0.2806 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7178 -1.8952 -0.2170 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3580 -1.6574 -0.6676 R 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3985 -1.9053 -0.3914 R 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 2 3 2 0 + 3 4 1 0 + 4 5 2 0 + 5 6 1 0 + 6 7 2 0 + 7 2 1 0 + 3 8 1 0 + 4 9 1 0 + 5 10 1 0 + 6 11 1 0 + 12 1 0 0 + 13 7 0 0 +M END diff --git a/examples/smirks/clpy_vsite.xml b/examples/smirks/clpy_vsite.xml new file mode 100644 index 000000000..6d9d89299 --- /dev/null +++ b/examples/smirks/clpy_vsite.xml @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/smirks/demo.ipynb b/examples/smirks/demo.ipynb new file mode 100644 index 000000000..e7e98caeb --- /dev/null +++ b/examples/smirks/demo.ipynb @@ -0,0 +1,276 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Parametrize Molecules with SMIRKS-based Force Field" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic Usage" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "SMIRKS is an extension of SMARTS language that enables users to define chemical substructures with certain patterns as well as to numerically tag the matching atoms. This allowed force field developers to introduce new parameters more easily by avoiding starting from defining new atom types. DMFF can deal with SMIRKS-based force field in XML format to create differentiable potential functions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The usage of SMIRKS-based force field is generally the same as conventional force field based on atom-typing scheme, with the only difference such that we need an extra `rdkit.Chem.Mol` as input because the matching of SMIRKS pattern is powered by rdkit." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As an example, we will first load a N-methylacetamide molecule defined in a mol file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import jax.numpy as jnp\n", + "from rdkit import Chem\n", + "from dmff import Hamiltonian, NeighborList\n", + "\n", + "mol = Chem.MolFromMolFile(\"C3H7NO.mol\", removeHs=False) # hydrogens must be preserved" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then load force field parameters in xml format. Instuctions about how to write a SMIRKS-based force field XML file can be found in the Chapter 4 of the user's guide." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "h_smk = Hamiltonian(\"C3H7NO.xml\", noOmmSys=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that the argument noOmmSys is set to False so that DMFF will not create an openmm system, as openmm does not support SMIRKS-based force field definitions.\n", + "\n", + "Finally, we build an openmm topology and parametrize the molecule to create differentiable potential energy functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top = h_smk.buildTopologyFromMol(mol)\n", + "potObj = h_smk.createPotential(top, rdmol=mol)\n", + "func = potObj.getPotentialFunc()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So far, we can utilize this dmff.Potential object to calculate energy and forces as we did in the previous sections." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pos = jnp.array(mol.GetConformer().GetPositions()) / 10 # angstrom -> nm\n", + "box = jnp.eye(3, dtype=jnp.float32)\n", + "nblist = NeighborList(box, 1.0, h_smk.getCovalentMap())\n", + "nblist.allocate(pos)\n", + "pairs = nblist.pairs\n", + "energy = func(pos, box, pairs, h_smk.getParameters())\n", + "print(energy)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bond Charge Correction and Virtual Sites\n", + "\n", + "This section mainly introduces how to use BCC and virtual sites in DMFF." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, import required libraries:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import jax\n", + "import jax.numpy as jnp\n", + "from rdkit import Chem\n", + "from dmff import Hamiltonian, NeighborList" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load the molecule and SMIRKS-based force field file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mol = Chem.MolFromMolFile(\"clpy.mol\", removeHs=False)\n", + "h_vsite = Hamiltonian(\"clpy_vsite.xml\", noOmmSys=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "BCC and virtual site parameters are parsed into `h_vsite.getParameters()['NonbondedForce']`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = h_vsite.getParameters()\n", + "print(params['NonbondedForce'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Build OpenMM topology and create DMFF potential" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "top = h_vsite.buildTopologyFromMol(mol)\n", + "potObj = h_vsite.createPotential(top, rdmol=mol)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add virtual site to RDKit Mol object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mol_vsite = h_vsite.addVirtualSiteToMol(mol, h_vsite.getParameters())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculate energy, forces and parametric gradients" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pos_vsite = jnp.array(mol_vsite.GetConformer().GetPositions()) / 10 # angstrom -> nm\n", + "box = jnp.eye(3, dtype=jnp.float32)\n", + "nblist = NeighborList(box, 1.0, h_vsite.getCovalentMap())\n", + "nblist.allocate(pos_vsite)\n", + "pairs_vsite = nblist.pairs\n", + "\n", + "nbfunc_vsite = jax.value_and_grad(\n", + " potObj.dmff_potentials['NonbondedForce'], \n", + " argnums=-1, \n", + " allow_int=True # set to True since the type of virtual sites are speicified as integars\n", + ")\n", + "nbene_vsite, nbene_grad_vsite = nbfunc_vsite(pos_vsite, box, pairs_vsite, params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, we can also add coordinates of virtual sites by taking atomic positions matrix as an input." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pos = jnp.array(mol.GetConformer().GetPositions()) / 10 # convert angstrom to nm\n", + "pos_vsite = h_vsite.addVirtualSiteCoords(pos, h_vsite.getParameters())\n", + "print(pos_vsite.shape)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dmff", + "language": "python", + "name": "dmff" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "vscode": { + "interpreter": { + "hash": "44fe82502fda871be637af1aa98d2b3ddaac01204dd30f1519cbec4e95000815" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/package/docker/develop_cpu.dockerfile b/package/docker/develop_cpu.dockerfile new file mode 100644 index 000000000..ccd60f6cc --- /dev/null +++ b/package/docker/develop_cpu.dockerfile @@ -0,0 +1,27 @@ +FROM ubuntu:20.04 +SHELL ["/bin/bash", "-c"] +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install wget -y && apt-get clean all + +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/miniconda && \ + /opt/miniconda/bin/conda init bash && \ + rm -rf Miniconda3-latest-Linux-x86_64.sh && \ + rm -rf /var/lib/apt/lists/* + +ENV PATH=/opt/miniconda/bin:$PATH + +RUN conda create -n dmff python=3.8 openmm=7.7.0 rdkit freud pytest openbabel biopandas mdtraj==1.9.7 -c conda-forge -y && \ + conda init && \ + source activate dmff && \ + pip install pymbar==4.0.1 && \ + pip install jaxlib[cpu]==0.3.15 && \ + pip install jax==0.3.15 && \ + pip install jax-md==0.2.0 && \ + pip install tqdm && \ + conda remove cudatoolkit --force -y && \ + conda clean --all -y && \ + rm -rf /root/.cache/pip && \ + echo "source activate dmff" >> ~/.bashrc + +SHELL ["/bin/bash", "-c"] \ No newline at end of file diff --git a/tests/data/chloropyridine.mol b/tests/data/chloropyridine.mol new file mode 100644 index 000000000..4d0b10a78 --- /dev/null +++ b/tests/data/chloropyridine.mol @@ -0,0 +1,27 @@ + + RDKit 3D + + 11 11 0 0 0 0 0 0 0 0999 V2000 + 2.8623 -1.1338 -0.4470 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2590 -0.5725 -0.2105 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0072 0.7619 0.0614 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3166 1.1502 0.2425 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3245 0.1954 0.1448 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.9657 -1.1156 -0.1315 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3081 -1.5238 -0.3122 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1.8148 1.4810 0.1311 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5605 2.1868 0.4577 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3663 0.4655 0.2806 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7178 -1.8952 -0.2170 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 2 3 2 0 + 3 4 1 0 + 4 5 2 0 + 5 6 1 0 + 6 7 2 0 + 7 2 1 0 + 3 8 1 0 + 4 9 1 0 + 5 10 1 0 + 6 11 1 0 +M END diff --git a/tests/data/chloropyridine.xml b/tests/data/chloropyridine.xml new file mode 100644 index 000000000..4d371888b --- /dev/null +++ b/tests/data/chloropyridine.xml @@ -0,0 +1,55 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/chloropyridine_vsite.mol b/tests/data/chloropyridine_vsite.mol new file mode 100644 index 000000000..02699f734 --- /dev/null +++ b/tests/data/chloropyridine_vsite.mol @@ -0,0 +1,31 @@ + + RDKit 3D + + 13 13 0 0 0 0 0 0 0 0999 V2000 + 2.8623 -1.1338 -0.4470 Cl 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2590 -0.5725 -0.2105 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.0072 0.7619 0.0614 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3166 1.1502 0.2425 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3245 0.1954 0.1448 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.9657 -1.1156 -0.1315 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3081 -1.5238 -0.3122 N 0 0 0 0 0 0 0 0 0 0 0 0 + 1.8148 1.4810 0.1311 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5605 2.1868 0.4577 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3663 0.4655 0.2806 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7178 -1.8952 -0.2170 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3580 -1.6574 -0.6676 R 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3985 -1.9053 -0.3914 R 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 2 3 2 0 + 3 4 1 0 + 4 5 2 0 + 5 6 1 0 + 6 7 2 0 + 7 2 1 0 + 3 8 1 0 + 4 9 1 0 + 5 10 1 0 + 6 11 1 0 + 12 1 0 0 + 13 7 0 0 +M END diff --git a/tests/data/chloropyridine_vsite.xml b/tests/data/chloropyridine_vsite.xml new file mode 100644 index 000000000..d1bb92169 --- /dev/null +++ b/tests/data/chloropyridine_vsite.xml @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/ethane.mol b/tests/data/ethane.mol new file mode 100644 index 000000000..b48261356 --- /dev/null +++ b/tests/data/ethane.mol @@ -0,0 +1,20 @@ + + RDKit 3D + + 8 7 0 0 0 0 0 0 0 0999 V2000 + -0.7558 0.0071 -0.0160 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7558 -0.0071 0.0160 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1627 -0.1018 0.9937 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1225 0.9487 -0.4356 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1350 -0.8147 -0.6307 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1350 0.8148 0.6307 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1627 0.1018 -0.9937 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1226 -0.9487 0.4356 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 1 3 1 0 + 1 4 1 0 + 1 5 1 0 + 2 6 1 0 + 2 7 1 0 + 2 8 1 0 +M END diff --git a/tests/data/ethane.pdb b/tests/data/ethane.pdb new file mode 100644 index 000000000..e5370346a --- /dev/null +++ b/tests/data/ethane.pdb @@ -0,0 +1,20 @@ +COMPND UNNAMED +AUTHOR GENERATED BY OPEN BABEL 3.1.0 +HETATM 1 C1 MOL 1 -0.756 0.007 -0.016 1.00 0.00 C +HETATM 2 C2 MOL 1 0.756 -0.007 0.016 1.00 0.00 C +HETATM 3 H1 MOL 1 -1.163 -0.102 0.994 1.00 0.00 H +HETATM 4 H2 MOL 1 -1.123 0.949 -0.436 1.00 0.00 H +HETATM 5 H3 MOL 1 -1.135 -0.815 -0.631 1.00 0.00 H +HETATM 6 H4 MOL 1 1.135 0.815 0.631 1.00 0.00 H +HETATM 7 H5 MOL 1 1.163 0.102 -0.994 1.00 0.00 H +HETATM 8 H6 MOL 1 1.123 -0.949 0.436 1.00 0.00 H +CONECT 1 2 3 4 5 +CONECT 2 1 6 7 8 +CONECT 3 1 +CONECT 4 1 +CONECT 5 1 +CONECT 6 2 +CONECT 7 2 +CONECT 8 2 +MASTER 0 0 0 0 0 0 0 0 8 0 8 0 +END diff --git a/tests/data/ethane.xml b/tests/data/ethane.xml new file mode 100644 index 000000000..4b7f9ea65 --- /dev/null +++ b/tests/data/ethane.xml @@ -0,0 +1,76 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/ethane_smirks.xml b/tests/data/ethane_smirks.xml new file mode 100644 index 000000000..d70c36316 --- /dev/null +++ b/tests/data/ethane_smirks.xml @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/formaldehyde.mol b/tests/data/formaldehyde.mol new file mode 100644 index 000000000..892aa5d11 --- /dev/null +++ b/tests/data/formaldehyde.mol @@ -0,0 +1,12 @@ + + RDKit 3D + + 4 3 0 0 0 0 0 0 0 0999 V2000 + 1.2097 -0.0770 0.0173 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0123 0.0008 -0.1002 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.6579 -0.8919 -0.0108 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5395 0.9681 -0.0064 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 + 2 3 1 0 + 2 4 1 0 +M END diff --git a/tests/data/formaldehyde.pdb b/tests/data/formaldehyde.pdb new file mode 100644 index 000000000..7a2972478 --- /dev/null +++ b/tests/data/formaldehyde.pdb @@ -0,0 +1,12 @@ +COMPND UNNAMED +AUTHOR GENERATED BY OPEN BABEL 3.1.0 +HETATM 1 O1 MOL 1 1.210 -0.077 0.017 1.00 0.00 O +HETATM 2 C1 MOL 1 -0.012 0.001 -0.100 1.00 0.00 C +HETATM 3 H1 MOL 1 -0.658 -0.892 -0.011 1.00 0.00 H +HETATM 4 H2 MOL 1 -0.539 0.968 -0.006 1.00 0.00 H +CONECT 1 2 2 +CONECT 2 1 1 3 4 +CONECT 3 2 +CONECT 4 2 +MASTER 0 0 0 0 0 0 0 0 4 0 4 0 +END diff --git a/tests/data/formaldehyde.xml b/tests/data/formaldehyde.xml new file mode 100644 index 000000000..87c349b49 --- /dev/null +++ b/tests/data/formaldehyde.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/formaldehyde_smirks.xml b/tests/data/formaldehyde_smirks.xml new file mode 100644 index 000000000..6554415cd --- /dev/null +++ b/tests/data/formaldehyde_smirks.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/methane.mol b/tests/data/methane.mol new file mode 100644 index 000000000..f96dbc4dc --- /dev/null +++ b/tests/data/methane.mol @@ -0,0 +1,14 @@ + + RDKit 3D + + 5 4 0 0 0 0 0 0 0 0999 V2000 + 0.0000 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.6757 0.8538 -0.0852 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3946 -0.8360 -0.5816 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0850 -0.2938 1.0485 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9854 0.2760 -0.3817 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 + 1 3 1 0 + 1 4 1 0 + 1 5 1 0 +M END diff --git a/tests/data/methane.pdb b/tests/data/methane.pdb new file mode 100644 index 000000000..3b4545565 --- /dev/null +++ b/tests/data/methane.pdb @@ -0,0 +1,14 @@ +COMPND UNNAMED +AUTHOR GENERATED BY OPEN BABEL 3.1.0 +HETATM 1 C1 UNL 1 0.000 0.000 0.000 1.00 0.00 C +HETATM 2 H1 UNL 1 -0.676 0.854 -0.085 1.00 0.00 H +HETATM 3 H2 UNL 1 -0.395 -0.836 -0.582 1.00 0.00 H +HETATM 4 H3 UNL 1 0.085 -0.294 1.048 1.00 0.00 H +HETATM 5 H4 UNL 1 0.985 0.276 -0.382 1.00 0.00 H +CONECT 1 2 3 4 5 +CONECT 2 1 +CONECT 3 1 +CONECT 4 1 +CONECT 5 1 +MASTER 0 0 0 0 0 0 0 0 5 0 5 0 +END diff --git a/tests/data/methane.xml b/tests/data/methane.xml index 4bb3fea3c..099d393ad 100644 --- a/tests/data/methane.xml +++ b/tests/data/methane.xml @@ -1,6 +1,6 @@ - + diff --git a/tests/data/methane_smirks.xml b/tests/data/methane_smirks.xml new file mode 100644 index 000000000..10c8df95f --- /dev/null +++ b/tests/data/methane_smirks.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/test_api.py b/tests/test_api.py index 31786910d..d256881fc 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,11 +1,16 @@ +import subprocess +from pathlib import Path import openmm.app as app import openmm.unit as unit -import numpy as np -import jax.numpy as jnp import numpy.testing as npt import pytest +from rdkit import Chem +from rdkit.Chem import AllChem +from biopandas.pdb import PandasPdb + from dmff import Hamiltonian + class TestADMPAPI: """ Test ADMP related generators @@ -158,3 +163,56 @@ def test_parse_multiple_files(self): h = Hamiltonian("tests/data/methane.xml", "tests/data/tip3p.xml") potentials = h.createPotential(pdb.topology) + +def check_same_topology(top1: app.Topology, top2: app.Topology): + assert top1.getNumChains() == top2.getNumChains(), "Number of chains are not the same" + assert top1.getNumResidues() == top2.getNumResidues(), "Number of residues are not the same" + assert top1.getNumAtoms() == top2.getNumAtoms(), "Number of atoms are not the same" + assert top1.getNumBonds() == top2.getNumBonds(), "Number of bonds are not the same" + + atoms1, atoms2 = [at for at in top1.atoms()], [at for at in top2.atoms()] + for i in range(top1.getNumAtoms()): + assert atoms1[i].element == atoms2[i].element, f"Atom {i} are not the same" + + bonds1 = { + (int(bond.atom1.id) - 1, int(bond.atom2.id) - 1): True \ + for bond in top1.bonds() + } + bonds2 = { + (int(bond.atom1.id) - 1, int(bond.atom2.id) - 1): True \ + for bond in top2.bonds() + } + for key in bonds1.keys(): + if (key not in bonds2) and ((key[1], key[0]) not in bonds2): + raise KeyError(f"Bond (key) not match") + + +def fix_pdb(path): + pdb = PandasPdb() + pdb.read_pdb(path) + atCount = {} + for i in range(pdb.df['HETATM'].shape[0]): + atName = pdb.df['HETATM'].loc[i, 'atom_name'] + atCount.update({atName: atCount.get(atName, 0) + 1}) + pdb.df['HETATM'].loc[i, 'atom_name'] = f"{atName}{atCount.get(atName)}" + pdb.to_pdb(path) + + +@pytest.mark.parametrize( + "smi", + ["CC", "CO", "C=C", "CC#N", "C1=CC=CC=C1"] +) +def test_build_top_from_rdkit(smi, tmpdir): + mol = Chem.AddHs(Chem.MolFromSmiles(smi)) + AllChem.EmbedMolecule(mol) + AllChem.MMFFOptimizeMolecule(mol) + key = Chem.MolToInchiKey(mol) + wdir = Path(tmpdir).resolve() + molfile = str(wdir / f"{key}.mol") + pdbfile = str(wdir / f"{key}.pdb") + Chem.MolToMolFile(mol, str(wdir / f"{key}.mol")) + subprocess.run(["obabel", molfile, "-O", pdbfile], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + fix_pdb(pdbfile) + ref_top = app.PDBFile(pdbfile).getTopology() + test_top = Hamiltonian.buildTopologyFromMol(mol) + check_same_topology(ref_top, test_top) diff --git a/tests/test_classical/test_smirks.py b/tests/test_classical/test_smirks.py new file mode 100644 index 000000000..3f200b71a --- /dev/null +++ b/tests/test_classical/test_smirks.py @@ -0,0 +1,109 @@ +import pytest + +import numpy as np +import jax +import jax.numpy as jnp +from rdkit import Chem +import openmm.app as app + +from dmff import Hamiltonian, NeighborList + + +@pytest.mark.parametrize( + "name", + ["methane", "ethane", "formaldehyde"] +) +def test_smirks(name: str): + pdb = app.PDBFile(f"tests/data/{name}.pdb") + h_typing = Hamiltonian(f"tests/data/{name}.xml") + pot_typing = h_typing.createPotential(pdb.topology) + cov_map = h_typing.getCovalentMap() + + pos = jnp.array(pdb.getPositions(asNumpy=True)) + box = jnp.eye(3, dtype=jnp.float32) + nblist = NeighborList(box, 1.0, cov_map) + nblist.allocate(pos) + pairs = nblist.pairs + + ref_data = {} + for key, pot in pot_typing.dmff_potentials.items(): + ref_data[key] = pot(pos, box, pairs, h_typing.paramtree) + + rdmol = Chem.MolFromMolFile(f"tests/data/{name}.mol", removeHs=False) + h_smirks = Hamiltonian(f"tests/data/{name}_smirks.xml", noOmmSys=True) + top = h_smirks.buildTopologyFromMol(rdmol) + pot_smirks = h_smirks.createPotential(top, rdmol=rdmol) + + # print(ref_data) + + for key in ref_data.keys(): + value_smirks = pot_smirks.dmff_potentials[key](pos, box, pairs, h_smirks.paramtree) + assert jnp.allclose(value_smirks, ref_data[key], atol=1e-6), f"{key} does not match" + + +@pytest.mark.parametrize( + "name", + ["chloropyridine"] +) +def test_vsite(name: str): + rdmol = Chem.MolFromMolFile(f"tests/data/{name}.mol", removeHs=False) + h_smirks = Hamiltonian(f"tests/data/{name}_vsite.xml", noOmmSys=True) + top = h_smirks.buildTopologyFromMol(rdmol) + pot_vsite = h_smirks.createPotential(top, rdmol=rdmol) + newmol = h_smirks.addVirtualSiteToMol(rdmol, h_smirks.paramtree) + # Chem.MolToMolFile(newmol, f"tests/data/{name}_vsite.mol") + rdmol_vsite = Chem.MolFromMolFile(f"tests/data/{name}_vsite.mol", removeHs=False) + + pos_vsite = jnp.array(newmol.GetConformer().GetPositions()) / 10 + box = jnp.eye(3, dtype=jnp.float32) + nblist = NeighborList(box, 1.0, h_smirks.getCovalentMap()) + nblist.allocate(pos_vsite) + pairs_vsite = nblist.pairs + + nbfunc_vsite = jax.value_and_grad(pot_vsite.dmff_potentials['NonbondedForce'], argnums=-1, allow_int=True) + nbene_vsite, nbene_grad_vsite = nbfunc_vsite(pos_vsite, box, pairs_vsite, h_smirks.paramtree) + nbene_dbcc = jnp.dot( + h_smirks.getTopologyMatrix().T, + nbene_grad_vsite['NonbondedForce']['charge'].reshape(-1, 1) + ) + # test grad bcc + assert jnp.allclose(nbene_dbcc, nbene_grad_vsite['NonbondedForce']['bcc']) + + # test vsite coordinates + assert np.allclose( + newmol.GetConformer().GetPositions(), + rdmol_vsite.GetConformer().GetPositions(), + atol=1e-4 + ) + + h_typing = Hamiltonian(f"tests/data/{name}.xml", removeHs=False) + pot_typing = h_typing.createPotential(top) + pos = jnp.array(rdmol.GetConformer().GetPositions()) / 10 + box = jnp.eye(3, dtype=jnp.float32) + nblist = NeighborList(box, 1.0, h_typing.getCovalentMap()) + nblist.allocate(pos) + pairs = nblist.pairs + nbfunc = jax.value_and_grad(pot_typing.dmff_potentials['NonbondedForce'], argnums=-1, allow_int=True) + nbene, nbene_grad = nbfunc(pos, box, pairs, h_typing.paramtree) + # test energies + assert np.allclose(nbene, nbene_vsite, atol=1e-6) + + +@pytest.mark.parametrize( + "name", + ["ethane"] +) +def test_bcc(name: str): + rdmol = Chem.MolFromMolFile(f"tests/data/{name}.mol", removeHs=False) + h_smirks = Hamiltonian(f"tests/data/{name}_smirks.xml", noOmmSys=True) + top = h_smirks.buildTopologyFromMol(rdmol) + h_smirks.createPotential(top, rdmol=rdmol) + bccchg = jnp.dot(h_smirks.getTopologyMatrix(), h_smirks.paramtree['NonbondedForce']['bcc']).flatten() + prechg = h_smirks.paramtree['NonbondedForce']['charge'] + + h_typing = Hamiltonian(f"tests/data/{name}.xml") + h_typing.createPotential(top) + refchg = h_typing.paramtree['NonbondedForce']['charge'] + + assert jnp.allclose(bccchg+prechg, refchg, atol=1e-6) +