# benchmarks for guess bonds

In [3]:
from MDAnalysis.topology.guessers import guess_bonds
import numpy as np
import MDAnalysis as mda

In [4]:
u1,u2 = mda.Universe('small.gro'), mda.Universe('big.gro')

In [5]:
bonds = guess_bonds(u1.atoms, u1.atoms.positions, box=u1.dimensions)

In [7]:
len(bonds)

8284

In [12]:
from MDAnalysis.topology import tables
from MDAnalysis.lib import distances
def old_guess_bonds(atoms, coords, box=None, **kwargs):
    r"""Guess if bonds exist between two atoms based on their distance.

    Bond between two atoms is created, if the two atoms are within

    .. math::

          d < f \cdot (R_1 + R_2)

    of each other, where :math:`R_1` and :math:`R_2` are the VdW radii
    of the atoms and :math:`f` is an ad-hoc *fudge_factor*. This is
    the `same algorithm that VMD uses`_.

    Parameters
    ----------
    atoms : AtomGroup
         atoms for which bonds should be guessed
    coords : array
         coordinates of the atoms (i.e., `AtomGroup.positions)`)
    fudge_factor : float, optional
        The factor by which atoms must overlap eachother to be considered a
        bond.  Larger values will increase the number of bonds found. [0.72]
    vdwradii : dict, optional
        To supply custom vdwradii for atoms in the algorithm. Must be a dict
        of format {type:radii}. The default table of van der Waals radii is
        hard-coded as :data:`MDAnalysis.topology.tables.vdwradii`.  Any user
        defined vdwradii passed as an argument will supercede the table
        values. [``None``]
    lower_bound : float, optional
        The minimum bond length. All bonds found shorter than this length will
        be ignored. This is useful for parsing PDB with altloc records where
        atoms with altloc A and B maybe very close together and there should be
        no chemical bond between them. [0.1]
    box : array_like, optional
        Bonds are found using a distance search, if unit cell information is
        given, periodic boundary conditions will be considered in the distance
        search. [``None``]

    Returns
    -------
    list
        List of tuples suitable for use in Universe topology building.

    Warnings
    --------
    No check is done after the bonds are guessed to see if Lewis
    structure is correct. This is wrong and will burn somebody.

    Raises
    ------
    :exc:`ValueError` if inputs are malformed or `vdwradii` data is missing.


    .. _`same algorithm that VMD uses`:
       http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.1/ug/node26.html

    .. versionadded:: 0.7.7
    .. versionchanged:: 0.9.0
       Updated method internally to use more :mod:`numpy`, should work
       faster.  Should also use less memory, previously scaled as
       :math:`O(n^2)`.  *vdwradii* argument now augments table list
       rather than replacing entirely.
    """
    # why not just use atom.positions?
    if len(atoms) != len(coords):
        raise ValueError("'atoms' and 'coord' must be the same length")

    fudge_factor = kwargs.get('fudge_factor', 0.72)

    vdwradii = tables.vdwradii.copy()  # so I don't permanently change it
    user_vdwradii = kwargs.get('vdwradii', None)
    if user_vdwradii:  # this should make algo use their values over defaults
        vdwradii.update(user_vdwradii)

    # Try using types, then elements
    atomtypes = atoms.types

    # check that all types have a defined vdw
    if not all(val in vdwradii for val in set(atomtypes)):
        raise ValueError(("vdw radii for types: " +
                          ", ".join([t for t in set(atomtypes) if
                                     not t in vdwradii]) +
                          ". These can be defined manually using the" +
                          " keyword 'vdwradii'"))

    lower_bound = kwargs.get('lower_bound', 0.1)

    if box is not None:
        box = np.asarray(box)

    # to speed up checking, calculate what the largest possible bond
    # atom that would warrant attention.
    # then use this to quickly mask distance results later
    max_vdw = max([vdwradii[t] for t in atomtypes])

    bonds = []

    for i, atom in enumerate(atoms[:-1]):
        vdw_i = vdwradii[atomtypes[i]]
        max_d = (vdw_i + max_vdw) * fudge_factor

        # using self_distance_array scales O(n^2)
        # 20,000 atoms = 1.6 Gb memory
        dist = distances.distance_array(coords[i][None, :], coords[i + 1:],
                                        box=box)[0]
        idx = np.where((dist > lower_bound) & (dist <= max_d))[0]

        for a in idx:
            j = i + 1 + a
            atom_j = atoms[j]

            if dist[a] < (vdw_i + vdwradii[atomtypes[j]]) * fudge_factor:
                # because of method used, same bond won't be seen twice,
                # so don't need to worry about duplicates
                bonds.append((atom.index, atom_j.index))

    return tuple(bonds)

In [13]:
bonds_old = old_guess_bonds(u1.atoms, u1.atoms.positions, box=u1.dimensions)

In [14]:
len(bonds_old)

8284

In [15]:
%timeit old_guess_bonds(u1.atoms, u1.atoms.positions, box=u1.dimensions)

3.19 s ± 108 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
%timeit guess_bonds(u1.atoms, u1.atoms.positions, box=u1.dimensions)

394 ms ± 26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
