Skip to content

Commit

Permalink
Improve reaction alignment by only transferring coordinates for the l…
Browse files Browse the repository at this point in the history
…argest part.
  • Loading branch information
johnmay committed Jun 25, 2016
1 parent c1c4ead commit f9d9c5c
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 75 deletions.
49 changes: 49 additions & 0 deletions base/core/src/main/java/org/openscience/cdk/graph/GraphUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@
package org.openscience.cdk.graph;

import com.google.common.collect.Maps;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;

import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import static java.util.Arrays.copyOf;

Expand Down Expand Up @@ -90,6 +93,52 @@ public static int[][] toAdjList(IAtomContainer container) {
return graph;
}

/**
* Create an adjacent list representation of the {@literal container} that only
* includes bonds that are in the set provided as an argument.
*
* @param container the molecule
* @return adjacency list representation stored as an {@literal int[][]}.
* @throws NullPointerException the container was null
* @throws IllegalArgumentException a bond was found which contained atoms
* not in the molecule
*/
public static int[][] toAdjListSubgraph(IAtomContainer container, Set<IBond> include) {

if (container == null) throw new NullPointerException("atom container was null");

int n = container.getAtomCount();

int[][] graph = new int[n][DEFAULT_DEGREE];
int[] degree = new int[n];

for (IBond bond : container.bonds()) {

if (!include.contains(bond))
continue;

int v = container.getAtomNumber(bond.getAtom(0));
int w = container.getAtomNumber(bond.getAtom(1));

if (v < 0 || w < 0)
throw new IllegalArgumentException("bond at index " + container.getBondNumber(bond)
+ " contained an atom not pressent in molecule");

graph[v][degree[v]++] = w;
graph[w][degree[w]++] = v;

// if the vertex degree of v or w reaches capacity, double the size
if (degree[v] == graph[v].length) graph[v] = copyOf(graph[v], degree[v] * 2);
if (degree[w] == graph[w].length) graph[w] = copyOf(graph[w], degree[w] * 2);
}

for (int v = 0; v < n; v++) {
graph[v] = copyOf(graph[v], degree[v]);
}

return graph;
}

/**
* Create an adjacent list representation of the {@code container} and
* fill in the {@code bondMap} for quick lookup.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,12 @@
import org.openscience.cdk.stereo.ExtendedTetrahedral;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
* @cdk.module standard
Expand Down Expand Up @@ -397,4 +400,87 @@ public static IReaction toReaction(IAtomContainer mol) {

return rxn;
}

/**
* Bi-direction int-tuple for looking up bonds by index.
*/
private static final class IntTuple {
private final int beg, end;

public IntTuple(int beg, int end) {
this.beg = beg;
this.end = end;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
IntTuple that = (IntTuple) o;
return (this.beg == that.beg && this.end == that.end) ||
(this.beg == that.end && this.end == that.beg);
}

@Override
public int hashCode() {
return beg ^ end;
}
}

/**
* Collect the set of bonds that mapped in both a reactant and a product. The method uses
* the {@link CDKConstants#ATOM_ATOM_MAPPING} property of atoms.
*
* @param reaction reaction
* @return mapped bonds
*/
public static Set<IBond> findMappedBonds(IReaction reaction) {
Set<IBond> mapped = new HashSet<>();

// first we collect the occurrance of mapped bonds from reacants then products
Set<IntTuple> mappedReactantBonds = new HashSet<>();
Set<IntTuple> mappedProductBonds = new HashSet<>();
for (IAtomContainer reactant : reaction.getReactants().atomContainers()) {
for (IBond bond : reactant.bonds()) {
Integer begidx = bond.getAtom(0).getProperty(CDKConstants.ATOM_ATOM_MAPPING);
Integer endidx = bond.getAtom(1).getProperty(CDKConstants.ATOM_ATOM_MAPPING);
if (begidx != null && endidx != null)
mappedReactantBonds.add(new IntTuple(begidx, endidx));
}
}
// fail fast
if (mappedReactantBonds.isEmpty())
return Collections.emptySet();

for (IAtomContainer product : reaction.getProducts().atomContainers()) {
for (IBond bond : product.bonds()) {
Integer begidx = bond.getAtom(0).getProperty(CDKConstants.ATOM_ATOM_MAPPING);
Integer endidx = bond.getAtom(1).getProperty(CDKConstants.ATOM_ATOM_MAPPING);
if (begidx != null && endidx != null)
mappedProductBonds.add(new IntTuple(begidx, endidx));
}
}
// fail fast
if (mappedProductBonds.isEmpty())
return Collections.emptySet();

// repeat above but now store any that are different or unmapped as being mapped
for (IAtomContainer reactant : reaction.getReactants().atomContainers()) {
for (IBond bond : reactant.bonds()) {
Integer begidx = bond.getAtom(0).getProperty(CDKConstants.ATOM_ATOM_MAPPING);
Integer endidx = bond.getAtom(1).getProperty(CDKConstants.ATOM_ATOM_MAPPING);
if (begidx != null && endidx != null && mappedProductBonds.contains(new IntTuple(begidx, endidx)))
mapped.add(bond);
}
}
for (IAtomContainer product : reaction.getProducts().atomContainers()) {
for (IBond bond : product.bonds()) {
Integer begidx = bond.getAtom(0).getProperty(CDKConstants.ATOM_ATOM_MAPPING);
Integer endidx = bond.getAtom(1).getProperty(CDKConstants.ATOM_ATOM_MAPPING);
if (begidx != null && endidx != null && mappedReactantBonds.contains(new IntTuple(begidx, endidx)))
mapped.add(bond);
}
}
return mapped;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.geometry.GeometryUtil;
import org.openscience.cdk.graph.ConnectedComponents;
import org.openscience.cdk.graph.ConnectivityChecker;
import org.openscience.cdk.graph.Cycles;
import org.openscience.cdk.graph.GraphUtil;
Expand All @@ -54,8 +55,8 @@
import org.openscience.cdk.tools.LoggingToolFactory;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.tools.manipulator.AtomContainerSetManipulator;
import org.openscience.cdk.tools.manipulator.ReactionManipulator;
import org.openscience.cdk.tools.manipulator.RingSetManipulator;
import uk.ac.ebi.beam.Atom;

import javax.vecmath.Point2d;
import javax.vecmath.Vector2d;
Expand All @@ -72,7 +73,6 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

/**
* Generates 2D coordinates for a molecule for which only connectivity is known
Expand Down Expand Up @@ -183,89 +183,94 @@ public final void generateCoordinates(IAtomContainer mol) throws CDKException {
* @throws CDKException problem with layout
*/
public final void generateCoordinates(final IReaction reaction) throws CDKException {
final Map<IntTuple,IBond> bmap = new HashMap<>();

final Set<IBond> duplicated = new HashSet<>();

for (IAtomContainer product : reaction.getProducts().atomContainers()) {
setMolecule(product, false);
generateCoordinates();
if (!alignMappedReaction)
continue;
for (IBond bond : product.bonds()) {
Integer begidx = bond.getAtom(0).getProperty(CDKConstants.ATOM_ATOM_MAPPING);
Integer endidx = bond.getAtom(1).getProperty(CDKConstants.ATOM_ATOM_MAPPING);
if (begidx != null && endidx != null) {
// overwrite is allowed but stored for fixing later
IBond old = bmap.put(new IntTuple(begidx, endidx), bond);
if (old != null)
duplicated.add(old);
// layout products and agents
for (IAtomContainer mol : reaction.getProducts().atomContainers())
generateCoordinates(mol);
for (IAtomContainer mol : reaction.getAgents().atomContainers())
generateCoordinates(mol);

// do not align = simple layout of reactants
if (alignMappedReaction) {
final Set<IBond> mapped = ReactionManipulator.findMappedBonds(reaction);

Multimap<Integer, Map<Integer, IAtom>> refmap = HashMultimap.create();

for (IAtomContainer mol : reaction.getProducts().atomContainers()) {
Cycles.markRingAtomsAndBonds(mol);
final ConnectedComponents cc = new ConnectedComponents(GraphUtil.toAdjListSubgraph(mol, mapped));
final IAtomContainerSet parts = ConnectivityChecker.partitionIntoMolecules(mol, cc.components());
for (IAtomContainer part : parts.atomContainers()) {
// skip single atoms (unmapped)
if (part.getAtomCount() == 1)
continue;
final Map<Integer, IAtom> map = new HashMap<>();
for (IAtom atom : part.atoms()) {
// safe as substructure should only be mapped bonds and therefore atoms!
int idx = atom.getProperty(CDKConstants.ATOM_ATOM_MAPPING);
if (map.put(idx, atom) == null)
refmap.put(idx, map);
}
}
}
// remove bad maps
for (IBond bond : duplicated) {
Integer begidx = bond.getAtom(0).getProperty(CDKConstants.ATOM_ATOM_MAPPING);
Integer endidx = bond.getAtom(1).getProperty(CDKConstants.ATOM_ATOM_MAPPING);
bmap.remove(new IntTuple(begidx, endidx));
}

}
final Set<IAtom> afix = new HashSet<>();
final Set<IBond> bfix = new HashSet<>();
for (IAtomContainer mol : reaction.getAgents().atomContainers()) {
copyMappedCoords(bmap, afix, bfix, mol);
setMolecule(mol, false, afix, bfix);
generateCoordinates();
}
for (IAtomContainer mol : reaction.getReactants().atomContainers()) {
copyMappedCoords(bmap, afix, bfix, mol);
setMolecule(mol, false, afix, bfix);
generateCoordinates();
}
}
Set<IAtom> afix = new HashSet<>();
Set<IBond> bfix = new HashSet<>();

private static void copyMappedCoords(Map<IntTuple, IBond> bmap,
Set<IAtom> afix, Set<IBond> bfix,
IAtomContainer mol) {
// stiochiometry can mess up alignment
Set<Integer> mapvisit = new TreeSet<>();
afix.clear();
bfix.clear();
if (!bmap.isEmpty()) {
// we only copy coordinates for bonded atoms
for (IBond bond : mol.bonds()) {
IAtom beg = bond.getAtom(0);
IAtom end = bond.getAtom(1);
Integer begmapidx = beg.getProperty(CDKConstants.ATOM_ATOM_MAPPING);
Integer endmapidx = end.getProperty(CDKConstants.ATOM_ATOM_MAPPING);
if (begmapidx != null && endmapidx != null) {
if (!mapvisit.add(begmapidx)) continue; // already have coords for beg in this mol
if (!mapvisit.add(endmapidx)) continue; // already have coords for end in this mol
IBond mbond = bmap.get(new IntTuple(begmapidx, endmapidx));
if (mbond != null) {
IAtom mbeg = mbond.getAtom(0);
IAtom mend = mbond.getAtom(1);
if (mbeg.getProperty(CDKConstants.ATOM_ATOM_MAPPING).equals(begmapidx)) {
beg.setPoint2d(new Point2d(mbeg.getPoint2d()));
end.setPoint2d(new Point2d(mend.getPoint2d()));
afix.add(beg);
afix.add(end);
} else {
beg.setPoint2d(new Point2d(mend.getPoint2d()));
end.setPoint2d(new Point2d(mbeg.getPoint2d()));
afix.add(beg);
afix.add(end);
}
for (IAtomContainer mol : reaction.getReactants().atomContainers()) {
Cycles.markRingAtomsAndBonds(mol);
final ConnectedComponents cc = new ConnectedComponents(GraphUtil.toAdjListSubgraph(mol, mapped));
final IAtomContainerSet parts = ConnectivityChecker.partitionIntoMolecules(mol, cc.components());

// we only aligned the largest part
IAtomContainer largest = null;
for (IAtomContainer part : parts.atomContainers()) {
if (largest == null || part.getBondCount() > largest.getBondCount())
largest = part;
}

afix.clear();
bfix.clear();

if (largest != null && largest.getAtomCount() > 1) {

int idx = largest.getAtom(0).getProperty(CDKConstants.ATOM_ATOM_MAPPING);

// select the largest and use those coordinates
Map<Integer, IAtom> reference = select(refmap.get(idx));
for (IAtom atom : largest.atoms()) {
idx = atom.getProperty(CDKConstants.ATOM_ATOM_MAPPING);
final IAtom src = reference.get(idx);
if (src == null) continue;
atom.setPoint2d(new Point2d(src.getPoint2d()));
afix.add(atom);
}
}

if (!afix.isEmpty()) {
for (IBond bond : mol.bonds()) {
if (afix.contains(bond.getAtom(0)) && afix.contains(bond.getAtom(1)))
bfix.add(bond);
}
}

setMolecule(mol, false, afix, bfix);
generateCoordinates();
}

} else {
for (IAtomContainer mol : reaction.getReactants().atomContainers())
generateCoordinates(mol);
}
if (!afix.isEmpty()) {
for (IBond bond : mol.bonds()) {
if (afix.contains(bond.getAtom(0)) && afix.contains(bond.getAtom(1)))
bfix.add(bond);
}
}

private Map<Integer, IAtom> select(Collection<Map<Integer, IAtom>> refs) {
Map<Integer, IAtom> largest = Collections.emptyMap();
for (Map<Integer, IAtom> ref : refs) {
if (ref.size() > largest.size())
largest = ref;
}
return largest;
}

public void setMolecule(IAtomContainer mol, boolean clone) {
Expand Down

0 comments on commit f9d9c5c

Please sign in to comment.