Skip to content

Commit

Permalink
Hide away post match query filters, users should not have to worry ab…
Browse files Browse the repository at this point in the history
…out these and their previous locations made recursive SMARTS difficult.
  • Loading branch information
johnmay committed Oct 15, 2018
1 parent f8f22b0 commit 47067b7
Show file tree
Hide file tree
Showing 19 changed files with 203 additions and 215 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016 John May <jwmay@users.sf.net>
* Copyright (C) 2016-2018 The Chemistry Development Kit (CDK) project
*
* Contact: cdk-devel@lists.sourceforge.net
*
Expand All @@ -18,10 +18,10 @@
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/

package org.openscience.cdk.smarts;
package org.openscience.cdk.isomorphism;

import com.google.common.base.Predicate;
import com.google.common.collect.ArrayListMultimap;
Expand Down Expand Up @@ -66,12 +66,12 @@
*
* @see <a href="http://www.daylight.com/dayhtml/doc/theory/theory.smarts.html">Daylight Theory Manual</a>
*/
final class SmartsAamFilter implements Predicate<int[]> {
final class AtomMapFilter implements Predicate<int[]> {

private final List<MappedPairs> mapped = new ArrayList<>();
private final IAtomContainer target;

SmartsAamFilter(IAtomContainer query, IAtomContainer target) {
AtomMapFilter(IAtomContainer query, IAtomContainer target) {

Multimap<Integer,Integer> reactInvMap = null;
Multimap<Integer,Integer> prodInvMap = null;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2013 European Bioinformatics Institute (EMBL-EBI)
* John May <jwmay@users.sf.net>
* John May
* 2018 John Mayfield (ne May)
*
* Contact: cdk-devel@lists.sourceforge.net
*
Expand All @@ -19,7 +20,7 @@
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/

package org.openscience.cdk.isomorphism;
Expand Down Expand Up @@ -57,7 +58,7 @@
* @cdk.module isomorphism
* @see Pattern
*/
public final class ComponentGrouping implements Predicate<int[]> {
final class ComponentFilter implements Predicate<int[]> {

/**
* Key indicates where the grouping should be store in the query
Expand All @@ -77,7 +78,7 @@ public final class ComponentGrouping implements Predicate<int[]> {
* @param query query structure
* @param target target structure
*/
public ComponentGrouping(IAtomContainer query, IAtomContainer target) {
public ComponentFilter(IAtomContainer query, IAtomContainer target) {
this(query.getProperty(KEY) == null ? determineComponents(query, false) : query.getProperty(KEY, int[].class),
determineComponents(target, true));
}
Expand Down Expand Up @@ -121,7 +122,7 @@ private static int[] determineComponents(IAtomContainer target, boolean auto) {
* @param grouping query grouping
* @param targetComponents connected component of the target
*/
public ComponentGrouping(int[] grouping, int[] targetComponents) {
public ComponentFilter(int[] grouping, int[] targetComponents) {
this.queryComponents = grouping;
this.targetComponents = targetComponents;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,10 @@ public Mappings limit(int limit) {
* the query.
*
* @return fluent-api instance
* @deprecated Results now automatically consider stereo if it's present, to
* match without stereochemistry remove the stereo features.
*/
@Deprecated
public Mappings stereochemistry() {
// query structures currently have special requirements (i.e. SMARTS)
if (query instanceof IQueryAtomContainer) return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,16 @@

package org.openscience.cdk.isomorphism;

import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IReaction;
import org.openscience.cdk.isomorphism.matchers.IQueryAtom;
import org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer;
import org.openscience.cdk.tools.manipulator.ReactionManipulator;

import java.util.Map;

/**
* A structural pattern for finding an exact matching in a target compound.
*
Expand All @@ -36,6 +42,40 @@
*/
public abstract class Pattern {

/** Additional filters on results. */
private boolean hasStereo, hasQueryStereo, hasCompGrp, hasRxnMap;

void determineFilters(IAtomContainer query) {
hasStereo = query.stereoElements().iterator().hasNext();
hasCompGrp = query.getProperty(ComponentFilter.KEY) != null;
for (IAtom atom : query.atoms()) {
Integer compId = atom.getProperty(CDKConstants.REACTION_GROUP);
Integer mapIdx = atom.getProperty(CDKConstants.ATOM_ATOM_MAPPING);
if (mapIdx != null && mapIdx != 0)
hasRxnMap = true;
if (compId != null && compId != 0)
hasCompGrp = true;
if (atom instanceof IQueryAtom)
hasQueryStereo = true;
if (hasRxnMap && hasCompGrp && hasQueryStereo)
break;
}
}

Mappings filter(Mappings mappings, IAtomContainer query, IAtomContainer target) {
// apply required post-match filters
if (hasStereo) {
mappings = hasQueryStereo
? mappings.filter(new QueryStereoFilter(query, target))
: mappings.filter(new StereoMatch(query, target));
}
if (hasCompGrp)
mappings = mappings.filter(new ComponentFilter(query, target));
if (hasRxnMap)
mappings = mappings.filter(new AtomMapFilter(query, target));
return mappings;
}

/**
* Find a matching of this pattern in the {@code target}. If no such order
* exist an empty mapping is returned. Depending on the implementation
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2013 European Bioinformatics Institute (EMBL-EBI)
* John May <jwmay@users.sf.net>
* John May
* 2018 John Mayfield (ne May)
*
* Contact: cdk-devel@lists.sourceforge.net
*
Expand All @@ -19,10 +20,10 @@
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/

package org.openscience.cdk.smarts;
package org.openscience.cdk.isomorphism;

import com.google.common.base.Predicate;
import com.google.common.collect.Maps;
Expand Down Expand Up @@ -55,7 +56,7 @@
* @cdk.module smarts
* @cdk.githash
*/
final class SmartsStereoFilter implements Predicate<int[]> {
final class QueryStereoFilter implements Predicate<int[]> {

/** Query and target contains. */
private final IAtomContainer query, target;
Expand All @@ -79,7 +80,7 @@ final class SmartsStereoFilter implements Predicate<int[]> {
* @param query query container
* @param target target container
*/
public SmartsStereoFilter(IAtomContainer query, IAtomContainer target) {
public QueryStereoFilter(IAtomContainer query, IAtomContainer target) {

if (!(query instanceof IQueryAtomContainer))
throw new IllegalArgumentException("match predicate is for SMARTS only");
Expand Down Expand Up @@ -131,7 +132,7 @@ public boolean apply(final int[] mapping) {
private boolean checkTetrahedral(int u, int[] mapping) {

int v = mapping[u];

if (targetTypes[v] != null && targetTypes[v] != Type.Tetrahedral)
return false;

Expand All @@ -155,7 +156,6 @@ private boolean checkTetrahedral(int u, int[] mapping) {

int[] vs = neighbors(targetElement, targetMap);
int q = permutationParity(vs) * parity(targetElement.getStereo());

q *= p;
if (q < 0)
return ((QueryAtom) queryAtom).getExpression().matches(targetAtom, IStereoElement.LEFT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer;

import java.util.Iterator;
import java.util.Map;

import static org.openscience.cdk.graph.GraphUtil.EdgeToBondMap;

Expand Down Expand Up @@ -88,9 +89,6 @@ public final class Ullmann extends Pattern {
/** The bond matcher to determine atom feasibility. */
private final BondMatcher bondMatcher;

/** Is the query matching query atoms/bonds etc? */
private final boolean queryMatching;

/**
* Non-public constructor for-now the atom/bond semantics are fixed.
*
Expand All @@ -104,20 +102,21 @@ private Ullmann(IAtomContainer query, AtomMatcher atomMatcher, BondMatcher bondM
this.bondMatcher = bondMatcher;
this.bonds1 = EdgeToBondMap.withSpaceFor(query);
this.g1 = GraphUtil.toAdjList(query, bonds1);
this.queryMatching = query instanceof IQueryAtomContainer;
determineFilters(query);
}

@Override
public int[] match(IAtomContainer target) {
return matchAll(target).stereochemistry().first();
return matchAll(target).first();
}

@Override
public Mappings matchAll(IAtomContainer target) {
EdgeToBondMap bonds2 = EdgeToBondMap.withSpaceFor(target);
int[][] g2 = GraphUtil.toAdjList(target, bonds2);
Iterable<int[]> iterable = new UllmannIterable(query, target, g1, g2, bonds1, bonds2, atomMatcher, bondMatcher);
return new Mappings(query, target, iterable);
Mappings mappings = new Mappings(query, target, iterable);
return filter(mappings, query, target);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@

package org.openscience.cdk.isomorphism;

import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.graph.GraphUtil;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer;

Expand Down Expand Up @@ -92,9 +94,6 @@ public final class VentoFoggia extends Pattern {
/** Search for a subgraph. */
private final boolean subgraph;

/** Is the query matching query atoms/bonds etc? */
private final boolean queryMatching;

/**
* Non-public constructor for-now the atom/bond semantics are fixed.
*
Expand All @@ -110,13 +109,13 @@ private VentoFoggia(IAtomContainer query, AtomMatcher atomMatcher, BondMatcher b
this.bonds1 = EdgeToBondMap.withSpaceFor(query);
this.g1 = GraphUtil.toAdjList(query, bonds1);
this.subgraph = substructure;
this.queryMatching = query instanceof IQueryAtomContainer;
determineFilters(query);
}

/**{@inheritDoc} */
@Override
public int[] match(IAtomContainer target) {
return matchAll(target).stereochemistry().first();
return matchAll(target).first();
}

/**{@inheritDoc} */
Expand All @@ -135,9 +134,14 @@ public Mappings matchAll(final IAtomContainer target) {
bonds2 = cached.bmap;
g2 = cached.g;

Iterable<int[]> iterable = new VFIterable(query, target, g1, g2, bonds1, bonds2, atomMatcher, bondMatcher,
subgraph);
return new Mappings(query, target, iterable);
Iterable<int[]> iterable = new VFIterable(query, target,
g1, g2,
bonds1, bonds2,
atomMatcher, bondMatcher,
subgraph);

Mappings mappings = new Mappings(query, target, iterable);
return filter(mappings, query, target);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.isomorphism.AtomMatcher;
import org.openscience.cdk.isomorphism.BondMatcher;
import org.openscience.cdk.isomorphism.ComponentGrouping;
import org.openscience.cdk.isomorphism.Pattern;
import org.openscience.cdk.isomorphism.VentoFoggia;
import static org.openscience.cdk.isomorphism.matchers.Expr.Type.*;

Expand Down Expand Up @@ -370,11 +370,8 @@ private boolean matches(Type type, IAtom atom, int stereo) {
left.type == OR && left.left.type == STEREOCHEMISTRY));

case RECURSIVE:
for (int[] match : VentoFoggia.findSubstructure(query,
AtomMatcher.forQuery(),
BondMatcher.forQuery())
.matchAll(atom.getContainer())
.filter(new ComponentGrouping(query, atom.getContainer()))) {
for (int[] match : Pattern.findSubstructure(query)
.matchAll(atom.getContainer())) {
if (match[0] == atom.getIndex())
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ static IAtomContainer ethylAlcoholHydrate() {
return m;
}

static ComponentGrouping create(int[] grouping, IAtomContainer container) {
return new ComponentGrouping(grouping, new ConnectedComponents(GraphUtil.toAdjList(container)).components());
static ComponentFilter create(int[] grouping, IAtomContainer container) {
return new ComponentFilter(grouping, new ConnectedComponents(GraphUtil.toAdjList(container)).components());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,9 @@
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.FluentIterable;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.isomorphism.ComponentGrouping;
import org.openscience.cdk.isomorphism.Pattern;
import org.openscience.cdk.isomorphism.SmartsStereoMatch;
import org.openscience.cdk.isomorphism.matchers.IQueryAtom;
import org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer;

Expand Down Expand Up @@ -62,9 +59,8 @@ public RecursiveSmartsAtom(final IQueryAtomContainer query) {
@Override
public BitSet load(IAtomContainer target) throws Exception {
BitSet hits = new BitSet();
for (int[] mapping : FluentIterable.from(Pattern.findSubstructure(query).matchAll(target))
.filter(new SmartsStereoMatch(query, target))
.filter(new ComponentGrouping(query, target))) {
for (int[] mapping : Pattern.findSubstructure(query)
.matchAll(target)) {
hits.set(mapping[0]);
}
return hits;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.interfaces.IRingSet;
import org.openscience.cdk.isomorphism.ComponentGrouping;
import org.openscience.cdk.isomorphism.SmartsStereoMatch;
import org.openscience.cdk.isomorphism.Ullmann;
import org.openscience.cdk.isomorphism.VentoFoggia;
import org.openscience.cdk.isomorphism.matchers.IQueryAtom;
import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer;
Expand Down Expand Up @@ -366,9 +364,10 @@ public boolean matches(IAtomContainer atomContainer, boolean forceInitialization
}
}
} else {
mappings = FluentIterable.from(VentoFoggia.findSubstructure(query).matchAll(atomContainer))
.filter(new SmartsStereoMatch(query, atomContainer))
.filter(new ComponentGrouping(query, atomContainer)).toList();
mappings = FluentIterable.from(VentoFoggia.findSubstructure(query)
.matchAll(atomContainer)
.filter(new SmartsStereoMatch(query, atomContainer)))
.toList();
}

return !mappings.isEmpty();
Expand Down

0 comments on commit 47067b7

Please sign in to comment.