Skip to content

Commit

Permalink
Merge pull request #231 from cdk/patch/smarts-ecfp
Browse files Browse the repository at this point in the history
extracting the substructure patterns of CircularFingerprint.
  • Loading branch information
egonw committed Oct 9, 2016
2 parents fa6f843 + 6889bf8 commit 02872f8
Show file tree
Hide file tree
Showing 5 changed files with 936 additions and 91 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,23 @@

import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.zip.CRC32;

import javax.vecmath.Point2d;
import javax.vecmath.Point3d;

import com.google.common.primitives.Ints;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;



/**
* <p>Circular fingerprints: for generating fingerprints that are functionally equivalent to ECFP-2/4/6 and FCFP-2/4/6
* fingerprints, which are partially described by Rogers et al. {@cdk.cite Rogers2010}.
Expand Down Expand Up @@ -112,6 +117,7 @@ public FP(int hashCode, int iteration, int[] atoms) {
this.atoms = atoms;
}
}


// ------------ private members ------------

Expand Down Expand Up @@ -1238,7 +1244,7 @@ private int findBond(int a1, int a2) {
if (atomAdj[a1][n] == a2) return bondAdj[a1][n];
return -1;
}

/*
* for debugging convenience: revive if necessary private void wr(String
* str) {System.out.println(str);} private String arrayStr(int[] val) { if
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/*
* Copyright (c) 2016 John May <jwmay@users.sf.net>
*
* Contact: cdk-devel@lists.sourceforge.net
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or (at
* your option) any later version. All we ask is that proper credit is given
* for our work, which includes - but is not limited to - adding the above
* copyright notice to the beginning of your source code files, and to any
* copyright notice that you may distribute with programs based on this work.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
*/
package org.openscience.cdk.fingerprint;

import org.junit.Test;
import org.openscience.cdk.CDKTestCase;
import org.openscience.cdk.fingerprint.CircularFingerprinter.FP;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.silent.SilentChemObjectBuilder;
import org.openscience.cdk.smarts.SmartsFragmentExtractor;
import org.openscience.cdk.smiles.SmilesParser;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;

import java.util.Collections;
import java.util.HashSet;
import java.util.Set;

import static org.hamcrest.CoreMatchers.everyItem;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.collection.IsIn.isIn;

/**
* @cdk.module test-standard
*/
public class CircularFingerprintSmartsTest extends CDKTestCase {

private static ILoggingTool logger = LoggingToolFactory
.createLoggingTool(CircularFingerprintSmartsTest.class);

public static SmilesParser parser = new SmilesParser(
SilentChemObjectBuilder.getInstance());

@Test
public void testMol1() throws Exception {
String molSmiles = "CC";
String expectedFPSmarts[][] = { { "C*" }, { "CC" } };
checkFPSmartsForMolecule(molSmiles, expectedFPSmarts);
}

@Test
public void testMol2() throws Exception {
String molSmiles = "CCC";
String expectedFPSmarts[][] = { { "C*" }, { "C(*)*" },
{ "CC*", "C(*)C" }, { "CCC" }, };
checkFPSmartsForMolecule(molSmiles, expectedFPSmarts);
}

@Test
public void testMol3() throws Exception {
String molSmiles = "CCN";
String expectedFPSmarts[][] = { { "C*" }, { "C(*)*" }, { "N*" },
{ "CC*", "C(*)C" }, { "C(*)N", "NC*" },
{ "CCN", "NCC", "C(C)N", "C(N)C" }, };
checkFPSmartsForMolecule(molSmiles, expectedFPSmarts);
}

@Test
public void testMol4() throws Exception {
String molSmiles = "C1CC1";
String expectedFPSmarts[][] = {

{ "C(*)*" }, { "C1CC1", "C(C1)C1" } };
checkFPSmartsForMolecule(molSmiles, expectedFPSmarts);
}

@Test
public void testMol5() throws Exception {
String molSmiles = "C1CCC1";
String expectedFPSmarts[][] = {

{ "C(*)*" }, { "C(C*)C*", "C(CC*)*", "C(*)CC*" },
{ "C1CCC1", "C(CC1)C1", "C(C1)CC1" } };
checkFPSmartsForMolecule(molSmiles, expectedFPSmarts);
}

@Test
public void testMol6() throws Exception {
String molSmiles = "CC[C-]";
String expectedFPSmarts[][] = {

{ "C*" }, { "C(*)*" }, { "[C-]*" }, { "CC*", "C(*)C" },
{ "[C-]C*", "C(*)[C-]" },
{ "CC[C-]", "C(C)[C-]", "[C-]CC", "C([C-])C" } };
checkFPSmartsForMolecule(molSmiles, expectedFPSmarts);
}

@Test
public void testMol7() throws Exception {
String molSmiles = "c1ccccc1";
String expectedFPSmarts[][] = {

{ "c(a)a" },
{ "c(a)cca", "c(ca)ca", "c(cca)a" },
{ "c(a)cccca", "c(ca)ccca", "c(cca)cca", "c(ccca)ca",
"c(cccca)a" },
{ "c1ccccc1", "c(c1)cccc1", "c(cc1)ccc1", "c(ccc1)cc1",
"c(cccc1)c1" } };
checkFPSmartsForMolecule(molSmiles, expectedFPSmarts);
}

private void checkFPSmartsForMolecule(String moleculeSmiles,
String expectedFPSmarts[][]) throws Exception {

Set<String> expected = new HashSet<>();
for (String[] strs : expectedFPSmarts)
Collections.addAll(expected, strs);

// expectedFPSmarts[][] is a double array because for each smarts
// several equivalent variants
// of the smarts are given e.g. CCC C(C)C
IAtomContainer mol = parser.parseSmiles(moleculeSmiles);

CircularFingerprinter circ = new CircularFingerprinter();
circ.calculate(mol);
SmartsFragmentExtractor subsmarts = new SmartsFragmentExtractor(mol);
subsmarts.setMode(SmartsFragmentExtractor.MODE_JCOMPOUNDMAPPER);
int numFP = circ.getFPCount();

Set<String> actual = new HashSet<>();
for (int i = 0; i < numFP; i++) {
FP fp = circ.getFP(i);
actual.add(subsmarts.generate(fp.atoms));
}

assertThat(actual, everyItem(isIn(expected)));
}
}

0 comments on commit 02872f8

Please sign in to comment.