Skip to content

Commit

Permalink
Implement counting fps for 'SubstructureFingerprints'
Browse files Browse the repository at this point in the history
- Substructures specified via SMARTS are counted using Vento-Foggia algorithm
- Test have been implemented
  • Loading branch information
bachi55 authored and johnmay committed Jun 24, 2018
1 parent c3a253c commit f47df30
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,16 @@

import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.isomorphism.Pattern;
import org.openscience.cdk.isomorphism.VentoFoggia;
import org.openscience.cdk.isomorphism.matchers.smarts.SmartsMatchers;
import org.openscience.cdk.smiles.smarts.SMARTSQueryTool;
import org.openscience.cdk.smiles.smarts.parser.SMARTSParser;

import java.util.BitSet;
import java.util.Map;
import java.util.TreeMap;

/**
* {@link IFingerprinter} that gives a bit set which has a size equal to the number
Expand Down Expand Up @@ -397,8 +403,6 @@ public SubstructureFingerprinter(String[] smarts) {
* <li>Countable MACCS patterns:</li>
* </ul>
*
* @todo: Somehow we should handle this exceptions in a way, that it is transparent to the user why 'smarts==null'.
*
* @param type The desired type of substructures.
*/
public SubstructureFingerprinter(Type type) {
Expand Down Expand Up @@ -440,6 +444,61 @@ public IBitFingerprint getBitFingerprint(IAtomContainer atomContainer) throws CD
return new BitSetFingerprint(fingerPrint);
}


/** {@inheritDoc} */
@Override
public ICountFingerprint getCountFingerprint(IAtomContainer atomContainer) throws CDKException {
if (smarts == null) {
throw new CDKException("No substructures were defined");
}

IChemObjectBuilder aCBuilder = atomContainer.getBuilder();

// init SMARTS invariants (connectivity, degree, etc)

SmartsMatchers.prepare(atomContainer, false);

final Map<Integer, Integer> map = new TreeMap<Integer, Integer>();
for (int i = 0; i < smarts.length; i++) {
Pattern pattern = VentoFoggia.findSubstructure(SMARTSParser.parse(smarts[i], aCBuilder));
map.put(i, pattern.matchAll(atomContainer).stereochemistry().countUnique());
}

final int sz = map.size();
final int[] hash = new int[sz], count = new int[sz];
int n = 0;
for (int h : map.keySet()) {
hash[n] = h;
count[n++] = map.get(h);
}

return new ICountFingerprint() {
@Override
public long size() { return smarts.length; }

@Override
public int numOfPopulatedbins() { return sz; }

@Override
public int getCount(int index) { return count[index]; }

@Override
public int getHash(int index) { return hash[index]; }

@Override
public void merge(ICountFingerprint fp) {}

@Override
public void setBehaveAsBitFingerprint(boolean behaveAsBitFingerprint) {}

@Override
public boolean hasHash(int hash) { return map.containsKey(hash); }

@Override
public int getCountForHash(int hash) { return map.get(hash); }
};
}

/** {@inheritDoc} */
@Override
public Map<String, Integer> getRawFingerprint(IAtomContainer iAtomContainer) throws CDKException {
Expand All @@ -463,11 +522,4 @@ public int getSize() {
public String getSubstructure(int bitIndex) {
return smarts[bitIndex];
}

/** {@inheritDoc} */
@Override
public ICountFingerprint getCountFingerprint(IAtomContainer container) throws CDKException {
throw new UnsupportedOperationException();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ public void testCountableMACCSBinary2() throws Exception {
mol = parser.parseSmiles("C([S](O)(=O)=O)C1=C(C=CC=C1)CCCC[N+](=O)[O-]");
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
Aromaticity.cdkLegacy().apply(mol);

bs = printer.getBitFingerprint(mol).asBitSet();

Assert.assertTrue(bs.get(46));
Expand Down Expand Up @@ -209,6 +210,111 @@ public void testCountableMACCSBinary2() throws Exception {
Assert.assertFalse(bs.get(91));
}

@Test
public void testGetCountFingerprint() throws Exception {}

@Test
public void testCountableMACCSCount2() throws Exception {
SmilesParser parser = new SmilesParser(SilentChemObjectBuilder.getInstance());
IFingerprinter printer = new SubstructureFingerprinter(SubstructureFingerprinter.Type.COUNTABLE_MACCS166);
IAtomContainer mol;
ICountFingerprint cfp;

// Test molecule 1
mol = parser.parseSmiles("C([S](O)(=O)=O)C1=C(C=CC=C1)CCCC[N+](=O)[O-]");
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
Aromaticity.cdkLegacy().apply(mol);
cfp = printer.getCountFingerprint(mol);

Assert.assertTrue(cfp.getCountForHash(46) == 2);
Assert.assertTrue(cfp.getCountForHash(27) == 1);
Assert.assertTrue(cfp.getCountForHash(59) == 2);
Assert.assertTrue(cfp.getCountForHash(49) == 1);
Assert.assertTrue(cfp.getCountForHash(111) == 1);
Assert.assertTrue(cfp.getCountForHash(129) == 3);
Assert.assertTrue(cfp.getCountForHash(115) == 2);
Assert.assertTrue(cfp.getCountForHash(120) == 3);
Assert.assertTrue(cfp.getCountForHash(41) == 3);

Assert.assertTrue(cfp.getCountForHash(93) == 0);
Assert.assertTrue(cfp.getCountForHash(91) == 0);
Assert.assertTrue(cfp.getCountForHash(24) == 0);

// Test molecule 2: Diatrizoic acid
mol = parser.parseSmiles("CC(=O)NC1=C(C(=C(C(=C1I)C(=O)O)I)NC(=O)C)I");
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
Aromaticity.cdkLegacy().apply(mol);
cfp = printer.getCountFingerprint(mol);

Assert.assertTrue(cfp.getCountForHash(15) == 3);
Assert.assertTrue(cfp.getCountForHash(135) == 3);
Assert.assertTrue(cfp.getCountForHash(139) == 4);
Assert.assertTrue(cfp.getCountForHash(93) == 3);
Assert.assertTrue(cfp.getCountForHash(73) == 6);

Assert.assertTrue(cfp.getCountForHash(91) == 0);
}

@Test
public void testCountableMACCSCount_Rings() throws Exception {
SmilesParser parser = new SmilesParser(SilentChemObjectBuilder.getInstance());
IFingerprinter printer = new SubstructureFingerprinter(SubstructureFingerprinter.Type.COUNTABLE_MACCS166);
IAtomContainer mol;
ICountFingerprint cfp;

// Aromatic 6-rings
mol = parser.parseSmiles("C1=CC=CC(=C1)CCCC2=CC=CC=C2");
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
Aromaticity.cdkLegacy().apply(mol);
cfp = printer.getCountFingerprint(mol);

Assert.assertTrue(cfp.getCountForHash(128) == 2); // 6-ring
Assert.assertTrue(cfp.getCountForHash(111) == 2); // aromaticity

Assert.assertTrue(cfp.getCountForHash(7) == 0); // 7-ring
Assert.assertTrue(cfp.getCountForHash(82) == 0); // 5-ring

// Non-aromatic 6-rings
mol = parser.parseSmiles("C1CC(CCC1)CCCCC2CCCCC2");
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
Aromaticity.cdkLegacy().apply(mol);
cfp = printer.getCountFingerprint(mol);

Assert.assertTrue(cfp.getCountForHash(128) == 2); // 6-ring

Assert.assertTrue(cfp.getCountForHash(111) == 0); // aromaticity
Assert.assertTrue(cfp.getCountForHash(7) == 0); // 7-ring
Assert.assertTrue(cfp.getCountForHash(82) == 0); // 5-ring

// Aromatic 6-ring, 3-ring and 4-ring
mol = parser.parseSmiles("C1CC1C(CCC2CCC2)CC3=CC=CC=C3");
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
Aromaticity.cdkLegacy().apply(mol);
cfp = printer.getCountFingerprint(mol);

Assert.assertTrue(cfp.getCountForHash(128) == 1); // 6-ring
Assert.assertTrue(cfp.getCountForHash(111) == 1); // aromaticity
Assert.assertTrue(cfp.getCountForHash(10) == 1); // 3-ring
Assert.assertTrue(cfp.getCountForHash(1) == 1); // 4-ring

Assert.assertTrue(cfp.getCountForHash(7) == 0); // 7-ring
Assert.assertTrue(cfp.getCountForHash(82) == 0); // 5-ring

// Aromatic 6-ring, 3-ring and 4-ring
mol = parser.parseSmiles("C1(CC1C(CCC2CCC2)CC3=CC=CC=C3)C(C(C(C4CC4)C5CC5)C6CC6)C7CC7");
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
Aromaticity.cdkLegacy().apply(mol);
cfp = printer.getCountFingerprint(mol);

Assert.assertTrue(cfp.getCountForHash(128) == 1); // 6-ring
Assert.assertTrue(cfp.getCountForHash(111) == 1); // aromaticity
Assert.assertTrue(cfp.getCountForHash(10) == 5); // 3-ring
Assert.assertTrue(cfp.getCountForHash(1) == 1); // 4-ring

Assert.assertTrue(cfp.getCountForHash(7) == 0); // 7-ring
Assert.assertTrue(cfp.getCountForHash(82) == 0); // 5-ring
}

@Test
public void testCountableMACCSBinary_Rings() throws Exception {
SmilesParser parser = new SmilesParser(SilentChemObjectBuilder.getInstance());
Expand Down

0 comments on commit f47df30

Please sign in to comment.