Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Pattern implementation for SMARTS queries.

Signed-off-by: Egon Willighagen <egonw@users.sourceforge.net>
  • Loading branch information...
commit 1d510370b4d66ca54d2437fcb7f0e38f0833f1f6 1 parent 55e30c6
@johnmay johnmay authored egonw committed
View
202 tool/smarts/src/main/java/org/openscience/cdk/smiles/smarts/SmartsPattern.java
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2014 European Bioinformatics Institute (EMBL-EBI)
+ * John May <jwmay@users.sf.net>
+ *
+ * Contact: cdk-devel@lists.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version. All we ask is that proper credit is given
+ * for our work, which includes - but is not limited to - adding the above
+ * copyright notice to the beginning of your source code files, and to any
+ * copyright notice that you may distribute with programs based on this work.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
+ */
+package org.openscience.cdk.smiles.smarts;
+
+import org.openscience.cdk.aromaticity.Aromaticity;
+import org.openscience.cdk.aromaticity.ElectronDonation;
+import org.openscience.cdk.exception.CDKException;
+import org.openscience.cdk.graph.Cycles;
+import org.openscience.cdk.interfaces.IAtomContainer;
+import org.openscience.cdk.interfaces.IChemObjectBuilder;
+import org.openscience.cdk.isomorphism.ComponentGrouping;
+import org.openscience.cdk.isomorphism.Mappings;
+import org.openscience.cdk.isomorphism.Pattern;
+import org.openscience.cdk.isomorphism.SmartsStereoMatch;
+import org.openscience.cdk.isomorphism.matchers.smarts.SmartsMatchers;
+import org.openscience.cdk.smiles.smarts.parser.SMARTSParser;
+import org.openscience.cdk.tools.LoggingToolFactory;
+
+import java.io.IOException;
+
+/**
+ * A {@link Pattern} for matching a single SMARTS query against multiple target
+ * compounds. The class should <b>not</b> be used for matching many queries
+ * against a single target as in substructure keyed fingerprints. The {@link
+ * SMARTSQueryTool} is currently a better option as less target initialistion is
+ * performed.
+ *
+ * Simple usage:
+ *
+ * <blockquote><pre>
+ * Pattern ptrn = SmartsPattern.create("O[C@?H](C)CC");
+ *
+ * for (IAtomContainer ac : acs) {
+ * if (ptrn.matches(ac)) {
+ * // 'ac' contains the pattern
+ * }
+ * }
+ * </pre></blockquote>
+ *
+ * Obtaining a {@link Mappings} instance and determine the number of unique
+ * matches.
+ *
+ * <blockquote><pre>
+ * Pattern ptrn = SmartsPattern.create("O[C@?H](C)CC");
+ *
+ * for (IAtomContainer ac : acs) {
+ * nUniqueHits += ptrn.matchAll(ac)
+ * .countUnique();
+ * }
+ * </pre></blockquote>
+ *
+ * @author John May
+ */
+public final class SmartsPattern extends Pattern {
+
+ /** Parsed query. */
+ private final IAtomContainer query;
+
+ /** Subgraph mapping. */
+ private final Pattern pattern;
+
+ /** Include invariants about ring size / number. */
+ private final boolean ringInfo;
+
+ /** Aromaticity model. */
+ private final Aromaticity arom = new Aromaticity(ElectronDonation.daylight(),
+ Cycles.or(Cycles.all(), Cycles.relevant()));
+
+ /**
+ * Internal constructor.
+ *
+ * @param smarts pattern
+ * @param builder the builder
+ * @throws IOException the pattern could not be parsed
+ */
+ private SmartsPattern(final String smarts, IChemObjectBuilder builder) throws IOException {
+ try {
+ this.query = SMARTSParser.parse(smarts, builder);
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+ this.pattern = Pattern.findSubstructure(query);
+
+ // X<num>, R and @ are cheap and done always but R<num>, r<num> are not
+ // we inspect the SMARTS pattern string to determine if ring
+ // size or number queries are needed
+ this.ringInfo = ringSizeOrNumber(smarts);
+ }
+
+ /**
+ * @inheritDoc
+ */
+ @Override public int[] match(IAtomContainer container) {
+ return matchAll(container).first();
+ }
+
+ /**
+ * Obtain the mappings of the query pattern against the target compound. Any
+ * initialisations required for the SMARTS match are automatically
+ * performed. The Daylight aromaticity model is applied clearing existing
+ * aromaticity. <b>Do not use this for matching multiple SMARTS againsts the
+ * same container</b>.
+ *
+ * <blockquote><pre>
+ * Pattern ptrn = SmartsPattern.create("O[C@?H](C)CC");
+ * int nUniqueHits = 0;
+ *
+ * for (IAtomContainer ac : acs) {
+ * nUniqueHits += ptrn.matchAll(ac)
+ * .countUnique();
+ * }
+ * </pre></blockquote>
+ *
+ * See {@link Mappings} for available methods.
+ *
+ * @param target the target compound in which we want to match the pattern
+ * @return mappings of the query to the target compound
+ */
+ @Override public Mappings matchAll(final IAtomContainer target) {
+
+ // TODO: prescreen target for element frequency before intialising
+ // invariants and applying aromaticity, requires pattern enumeration -
+ // see http://www.daylight.com/meetings/emug00/Sayle/substruct.html.
+
+ // assign additional atom invariants for SMARTS queries, a CDK quirk
+ // as each atom knows not which molecule from wence it came
+ SmartsMatchers.prepare(target, ringInfo);
+
+ // apply the daylight aromaticity model
+ try {
+ arom.apply(target);
+ } catch (CDKException e) {
+ LoggingToolFactory.createLoggingTool(getClass()).error(e);
+ }
+
+ Mappings mappings = pattern.matchAll(target);
+
+ // stereochemistry and component grouping filters are skipped if the
+ // query does not contain them
+ if (query.stereoElements().iterator().hasNext())
+ mappings = mappings.filter(new SmartsStereoMatch(query, target));
+ if (query.getProperty(ComponentGrouping.KEY) != null)
+ mappings = mappings.filter(new ComponentGrouping(query, target));
+
+ // Note: Mappings is lazy, we can't reset aromaticity etc as the
+ // substructure match may not have finished
+
+ return mappings;
+ }
+
+ /**
+ * Create a {@link Pattern} that will match the given {@code smarts} query.
+ *
+ * @param smarts SMARTS pattern string
+ * @param builder chem object builder used to create objects
+ * @return a new pattern
+ * @throws java.io.IOException the smarts could not be parsed
+ */
+ public static SmartsPattern create(String smarts, IChemObjectBuilder builder) throws IOException {
+ return new SmartsPattern(smarts, builder);
+ }
+
+ /**
+ * Checks a smarts string for !R, R<num> or r<num>. If found then the more
+ * expensive ring info needs to be initlised before querying.
+ *
+ * @param smarts pattern string
+ * @return the pattern has a ring size or number query
+ */
+ static boolean ringSizeOrNumber(final String smarts) {
+ for (int i = 0, end = smarts.length() - 1; i <= end; i++) {
+ char c = smarts.charAt(i);
+ if ((c == 'r' || c == 'R') && i < end && Character.isDigit(smarts.charAt(i + 1)))
+ return true;
+ // !R = R0
+ if (c == '!' && i < end && smarts.charAt(i + 1) == 'R')
+ return true;
+ }
+ return false;
+ }
+}
View
94 tool/smarts/src/test/java/org/openscience/cdk/smiles/smarts/SmartsPatternTest.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2014 European Bioinformatics Institute (EMBL-EBI)
+ * John May <jwmay@users.sf.net>
+ *
+ * Contact: cdk-devel@lists.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version. All we ask is that proper credit is given
+ * for our work, which includes - but is not limited to - adding the above
+ * copyright notice to the beginning of your source code files, and to any
+ * copyright notice that you may distribute with programs based on this work.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
+ */
+
+package org.openscience.cdk.smiles.smarts;
+
+import org.junit.Test;
+import org.openscience.cdk.interfaces.IAtomContainer;
+import org.openscience.cdk.interfaces.IChemObjectBuilder;
+import org.openscience.cdk.silent.SilentChemObjectBuilder;
+import org.openscience.cdk.smiles.SmilesParser;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author John May
+ */
+public class SmartsPatternTest {
+
+ IChemObjectBuilder bldr = SilentChemObjectBuilder.getInstance();
+
+ @Test public void ringSizeOrNumber_membership() throws Exception {
+ assertFalse(SmartsPattern.ringSizeOrNumber("[R]"));
+ }
+
+ @Test public void ringSizeOrNumber_ringConnectivity() throws Exception {
+ assertFalse(SmartsPattern.ringSizeOrNumber("[X2]"));
+ }
+
+ @Test public void ringSizeOrNumber_elements() throws Exception {
+ assertFalse(SmartsPattern.ringSizeOrNumber("[Br]"));
+ assertFalse(SmartsPattern.ringSizeOrNumber("[Cr]"));
+ assertFalse(SmartsPattern.ringSizeOrNumber("[Fr]"));
+ assertFalse(SmartsPattern.ringSizeOrNumber("[Sr]"));
+ assertFalse(SmartsPattern.ringSizeOrNumber("[Ra]"));
+ assertFalse(SmartsPattern.ringSizeOrNumber("[Re]"));
+ assertFalse(SmartsPattern.ringSizeOrNumber("[Rf]"));
+ }
+
+ @Test public void ringSizeOrNumber_negatedMembership() throws Exception {
+ assertTrue(SmartsPattern.ringSizeOrNumber("[!R]"));
+ }
+
+ @Test public void ringSizeOrNumber_membershipZero() throws Exception {
+ assertTrue(SmartsPattern.ringSizeOrNumber("[R0]"));
+ }
+
+ @Test public void ringSizeOrNumber_membershipTwo() throws Exception {
+ assertTrue(SmartsPattern.ringSizeOrNumber("[R2]"));
+ }
+
+ @Test public void ringSizeOrNumber_ringSize() throws Exception {
+ assertTrue(SmartsPattern.ringSizeOrNumber("[r5]"));
+ }
+
+ @Test public void components() throws Exception {
+ assertTrue(SmartsPattern.create("(O).(O)", bldr).matches(smi("O.O")));
+ assertFalse(SmartsPattern.create("(O).(O)", bldr).matches(smi("OO")));
+ }
+
+ @Test public void stereochemistry() throws Exception {
+ assertTrue(SmartsPattern.create("C[C@H](O)CC", bldr)
+ .matches(smi("C[C@H](O)CC")));
+ assertFalse(SmartsPattern.create("C[C@H](O)CC", bldr)
+ .matches(smi("C[C@@H](O)CC")));
+ assertFalse(SmartsPattern.create("C[C@H](O)CC", bldr)
+ .matches(smi("CC(O)CC")));
+ }
+
+ IAtomContainer smi(String smi) throws Exception {
+ return new SmilesParser(bldr).parseSmiles(smi);
+ }
+}
Please sign in to comment.
Something went wrong with that request. Please try again.