diff --git a/descriptor/fingerprint/src/main/java/org/openscience/cdk/fingerprint/AtomPairs2DFingerprinter.java b/descriptor/fingerprint/src/main/java/org/openscience/cdk/fingerprint/AtomPairs2DFingerprinter.java index 4354eb625a..4707662efa 100644 --- a/descriptor/fingerprint/src/main/java/org/openscience/cdk/fingerprint/AtomPairs2DFingerprinter.java +++ b/descriptor/fingerprint/src/main/java/org/openscience/cdk/fingerprint/AtomPairs2DFingerprinter.java @@ -5,189 +5,204 @@ */ - package org.openscience.cdk.fingerprint; +import org.openscience.cdk.config.Elements; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.graph.AllPairsShortestPaths; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; + import java.util.ArrayList; import java.util.BitSet; +import java.util.Collections; import java.util.HashMap; -import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; -import java.util.TreeMap; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.graph.AllPairsShortestPaths; -import org.openscience.cdk.interfaces.IAtom; //import org.openscience.cdk.graph.matrix.TopologicalMatrix; -import org.openscience.cdk.interfaces.IAtomContainer; -import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; /** * Generates an atom pair 2D fingerprint as implemented in PaDEL given an {@link IAtomContainer}, that * extends the {@link Fingerprinter}. * - * @author Lyle Burgoon - * @cdk.created 2018-02-05 - * @cdk.keyword fingerprint - * @cdk.keyword similarity - * @cdk.module fingerprint + * @author Lyle Burgoon + * @cdk.created 2018-02-05 + * @cdk.keyword fingerprint + * @cdk.keyword similarity + * @cdk.module fingerprint * @cdk.githash - * - * @see org.openscience.cdk.fingerprint.Fingerprinter + * @see org.openscience.cdk.fingerprint.Fingerprinter */ public class AtomPairs2DFingerprinter extends AbstractFingerprinter implements IFingerprinter { - - private static final int maxDistance = 10; - public String[] names; - private static final String[] atypes = {"C", "N", "O", "S", "P", "F", "Cl", "Br", "I", "B", "Si", "X"}; - private ArrayList atypesInt; - - private Map map = new TreeMap(); - private HashMap bit_counter_map = new HashMap(); - private ArrayList alAtomPairs = new ArrayList(); + private static final int MAX_DISTANCE = 10; + private static final String[] atypes = {"C", "N", "O", "S", "P", "F", "Cl", "Br", "I", "B", "Si", "X"}; + + private final Map pathToBit = new HashMap<>(); + private final Map bitToPath = new HashMap<>(); public AtomPairs2DFingerprinter() { - int distance_counter; - for(distance_counter=1; distance_counter < 11; distance_counter++){ - for(int i = 0; i < atypes.length; i++){ - for(int j = i; j < atypes.length; j++){ - String key_name = distance_counter + "_" + atypes[i] + "_" + atypes[j]; - if(key_name != null){ - bit_counter_map.put(key_name, 0); - alAtomPairs.add(key_name); - } - - } - } - } - alAtomPairs.trimToSize(); + for (int dist = 1; dist <= MAX_DISTANCE; dist++) { + for (int i = 0; i < atypes.length; i++) { + for (int j = i; j < atypes.length; j++) { + final String key_name = dist + "_" + atypes[i] + "_" + atypes[j]; + pathToBit.put(key_name, pathToBit.size()); + bitToPath.put(bitToPath.size(), key_name); + } + } + } } @Override public int getSize() { - return bit_counter_map.size(); + return pathToBit.size(); } - - private BitSet calculate(IAtomContainer container){ - IAtomContainer local = AtomContainerManipulator.removeHydrogens(container); - AllPairsShortestPaths shortest_paths = new AllPairsShortestPaths(local); - BitSet fp = new BitSet(alAtomPairs.size()); - for(int i = 0; i < local.getAtomCount(); i++){ - //for each atom we are going to get the atoms that are within distance 10 - if(local.getAtomCount() > 11){ - for(int j = i+1; j < 11; j++){ - IAtom[] atoms = shortest_paths.from(i).atomsTo(j); - String key_test_string = j + "_" + atoms[0].getAtomTypeName() + "_" + atoms[atoms.length-1].getAtomTypeName(); - String key_test_string2 = j + "_" + atoms[atoms.length-1].getAtomTypeName() + "_" + atoms[0].getAtomTypeName(); - if(bit_counter_map.containsKey(key_test_string)){ - int coordinate = alAtomPairs.indexOf(key_test_string); - fp.set(coordinate, true); -// bit_counter_map.compute(key_test_string, (k,v) -> v+1); - Integer previousValue = bit_counter_map.get(key_test_string); - bit_counter_map.put(key_test_string, previousValue + 1); - } - else if(bit_counter_map.containsKey(key_test_string2)){ - int coordinate = alAtomPairs.indexOf(key_test_string2); - fp.set(coordinate, true); -// bit_counter_map.compute(key_test_string2, (k,v) -> v+1); - Integer previousValue = bit_counter_map.get(key_test_string); - bit_counter_map.put(key_test_string, previousValue + 1); - } - } - } - else{ - for(int j = i+1; j < local.getAtomCount(); j++){ - IAtom[] atoms = shortest_paths.from(i).atomsTo(j); - String key_test_string = j + "_" + atoms[0].getSymbol() + "_" + atoms[atoms.length-1].getSymbol(); - String key_test_string2 = j + "_" + atoms[atoms.length-1].getSymbol() + "_" + atoms[0].getSymbol(); - if(bit_counter_map.containsKey(key_test_string)){ - int coordinate = alAtomPairs.indexOf(key_test_string); - fp.set(coordinate, true); -// bit_counter_map.compute(key_test_string, (k,v) -> v+1); - Integer previousValue = bit_counter_map.get(key_test_string); - bit_counter_map.put(key_test_string, previousValue + 1); - } - else if(bit_counter_map.containsKey(key_test_string2)){ - int coordinate = alAtomPairs.indexOf(key_test_string2); - fp.set(coordinate, true); -// bit_counter_map.compute(key_test_string2, (k,v) -> v+1); - Integer previousValue = bit_counter_map.get(key_test_string); - bit_counter_map.put(key_test_string, previousValue + 1); - } - } - } - + private static boolean isHalogen(final IAtom atom) { + switch (atom.getAtomicNumber()) { + case 9: // F + case 17: // Cl + case 35: // Br + case 53: // I + return true; + default: + return false; } - return fp; } - @Override - public IBitFingerprint getBitFingerprint(IAtomContainer container) throws CDKException { - BitSet fp = calculate(container); + private static boolean include(final IAtom atom) { + switch (atom.getAtomicNumber()) { + case 5: // B + case 6: // C + case 7: // N + case 8: // O + case 14: // Si + case 15: // P + case 16: // S + case 9: // F + case 17: // Cl + case 35: // Br + case 53: // I + return true; + default: + return false; + } + } + + private static String encodePath(int dist, IAtom a, IAtom b) { + return dist + "_" + a.getSymbol() + "_" + b.getSymbol(); + } + + private static String encodeHalPath(int dist, IAtom a, IAtom b) { + return dist + "_" + (isHalogen(a) ? "X" : a.getSymbol()) + "_" + + (isHalogen(b) ? "X" : b.getSymbol()); + } + + private void calculate(List paths, IAtomContainer mol) { + AllPairsShortestPaths apsp = new AllPairsShortestPaths(mol); + int numAtoms = mol.getAtomCount(); + for (int i = 0; i < numAtoms; i++) { + if (!include(mol.getAtom(i))) + continue; + for (int j = i + 1; j < numAtoms; j++) { + if (!include(mol.getAtom(j))) continue; + final int dist = apsp.from(i).distanceTo(j); + if (dist > MAX_DISTANCE) + continue; + final IAtom beg = mol.getAtom(i); + final IAtom end = mol.getAtom(j); + paths.add(encodePath(dist, beg, end)); + paths.add(encodePath(dist, end, beg)); + if (isHalogen(mol.getAtom(i)) || isHalogen(mol.getAtom(j))) { + paths.add(encodeHalPath(dist, beg, end)); + paths.add(encodeHalPath(dist, end, beg)); + } + } + } + } + + @Override + public IBitFingerprint getBitFingerprint(IAtomContainer container) throws CDKException { + BitSet fp = new BitSet(pathToBit.size()); + List paths = new ArrayList<>(); + calculate(paths, container); + for (String path : paths) + fp.set(pathToBit.get(path)); return new BitSetFingerprint(fp); - } + } - /** - * Invalid: it is not appropriate to convert the integer hash codes into strings. - */ @Override - public Map getRawFingerprint(IAtomContainer mol) throws CDKException { - throw new UnsupportedOperationException(); + public Map getRawFingerprint(IAtomContainer mol) throws + CDKException { + Map raw = new HashMap<>(); + List paths = new ArrayList<>(); + calculate(paths, mol); + + Collections.sort(paths); + int count = 0; + String prev = null; + for (String path : paths) { + if (prev == null || !path.equals(prev)) { + if (count > 0) + raw.put(prev, count); + count = 1; + prev = path; + } else { + ++count; + } + } + if (count > 0) + raw.put(prev, count); + + return raw; } - @Override - public ICountFingerprint getCountFingerprint(IAtomContainer container) throws CDKException { - BitSet fp = calculate(container); - final BitSet fp2 = fp; - + @Override + public ICountFingerprint getCountFingerprint(IAtomContainer mol) throws CDKException { + final Map raw = getRawFingerprint(mol); + final List keys = new ArrayList<>(raw.keySet()); return new ICountFingerprint() { - @Override public long size() { - return alAtomPairs.size(); + return pathToBit.size(); } @Override public int numOfPopulatedbins() { - return alAtomPairs.size(); + return keys.size(); } @Override - public int getCount(int index){ - String key = alAtomPairs.get(index); - return bit_counter_map.get(key).intValue(); + public int getCount(int index) { + return raw.get(keys.get(index)); } @Override public int getHash(int index) { - if(fp2.get(index)){ - return index; - } - else{ - return 0; - } + return pathToBit.get(keys.get(index)); } @Override - public void merge(ICountFingerprint fp) {} + public void merge(ICountFingerprint fp) { + + } @Override - public void setBehaveAsBitFingerprint(boolean behaveAsBitFingerprint) {} + public void setBehaveAsBitFingerprint( + boolean behaveAsBitFingerprint) { + + } @Override public boolean hasHash(int hash) { - return fp2.get(hash); + return bitToPath.containsKey(hash); } @Override public int getCountForHash(int hash) { - String key = alAtomPairs.get(hash); - return bit_counter_map.get(key).intValue(); + return raw.get(bitToPath.get(hash)); } }; - } - - + } } diff --git a/descriptor/fingerprint/src/test/java/org/openscience/cdk/fingerprint/AtomPairs2DFingerprintTest.java b/descriptor/fingerprint/src/test/java/org/openscience/cdk/fingerprint/AtomPairs2DFingerprintTest.java index cbf63b881c..f71c10f0cf 100644 --- a/descriptor/fingerprint/src/test/java/org/openscience/cdk/fingerprint/AtomPairs2DFingerprintTest.java +++ b/descriptor/fingerprint/src/test/java/org/openscience/cdk/fingerprint/AtomPairs2DFingerprintTest.java @@ -1,4 +1,4 @@ -/* This work is thhe product of a US Government employee as part of his/her regular duties +/* This work is the product of a US Government employee as part of his/her regular duties * and is thus in the public domain. * * Author: Lyle D. Burgoon, Ph.D. @@ -7,30 +7,13 @@ */ package org.openscience.cdk.fingerprint; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.BitSet; -import java.util.List; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; - import org.junit.Assert; -import org.junit.Before; import org.junit.Test; -import org.openscience.cdk.DefaultChemObjectBuilder; -import org.openscience.cdk.aromaticity.Aromaticity; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.exception.InvalidSmilesException; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.silent.SilentChemObjectBuilder; import org.openscience.cdk.smiles.SmilesParser; -import org.openscience.cdk.tools.CDKHydrogenAdder; -import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; -import org.openscience.cdk.fingerprint.ICountFingerprint; -import static org.hamcrest.CoreMatchers.is; +import java.util.Map; /** * @cdk.module test-fingerprint @@ -41,40 +24,64 @@ public class AtomPairs2DFingerprintTest extends AbstractFingerprinterTest { @Test public void testFingerprint() throws Exception { - /* + /* * We are going to test hexane. Hexane is a good test b/c it has 10 carbons. * Since the max distance for this fingerprint is 10, the final C-C fingerprint slot * at distance 10 should return false, while all the other C-C fingerprint slots * should return true. */ - IFingerprinter printer = new AtomPairs2DFingerprinter(); - IAtomContainer mol1 = parser.parseSmiles("cccccccccc"); - BitSetFingerprint bsfp = (BitSetFingerprint) printer.getBitFingerprint(mol1); + IFingerprinter printer = new AtomPairs2DFingerprinter(); + IAtomContainer mol1 = parser.parseSmiles("cccccccccc"); + BitSetFingerprint bsfp = (BitSetFingerprint) printer.getBitFingerprint(mol1); Assert.assertEquals(9, bsfp.cardinality()); - Assert.assertEquals(true, bsfp.get(0)); //Distance 1 - Assert.assertEquals(true, bsfp.get(78)); //Distance 2 - Assert.assertEquals(true, bsfp.get(156)); //Distance 3 - Assert.assertEquals(true, bsfp.get(234)); //Distance 4 - Assert.assertEquals(true, bsfp.get(312)); //Distance 5 - Assert.assertEquals(true, bsfp.get(390)); //Distance 6 - Assert.assertEquals(true, bsfp.get(468)); //Distance 7 - Assert.assertEquals(true, bsfp.get(546)); //Distance 8 - Assert.assertEquals(true, bsfp.get(624)); //Distance 9 - Assert.assertEquals(false, bsfp.get(702)); //Distance 10 + Assert.assertEquals(true, bsfp.get(0)); //Distance 1 + Assert.assertEquals(true, bsfp.get(78)); //Distance 2 + Assert.assertEquals(true, bsfp.get(156)); //Distance 3 + Assert.assertEquals(true, bsfp.get(234)); //Distance 4 + Assert.assertEquals(true, bsfp.get(312)); //Distance 5 + Assert.assertEquals(true, bsfp.get(390)); //Distance 6 + Assert.assertEquals(true, bsfp.get(468)); //Distance 7 + Assert.assertEquals(true, bsfp.get(546)); //Distance 8 + Assert.assertEquals(true, bsfp.get(624)); //Distance 9 + Assert.assertEquals(false, bsfp.get(702)); //Distance 10 + } + + @Test + public void testHalogen() throws Exception { + IFingerprinter printer = new AtomPairs2DFingerprinter(); + IAtomContainer mol1 = parser.parseSmiles("Clc1ccccc1"); + Map map = printer.getRawFingerprint(mol1); + Assert.assertTrue(map.containsKey("1_X_C")); + Assert.assertTrue(map.containsKey("1_Cl_C")); + Assert.assertTrue(map.containsKey("2_X_C")); + Assert.assertTrue(map.containsKey("2_Cl_C")); + Assert.assertTrue(map.containsKey("3_X_C")); + Assert.assertTrue(map.containsKey("3_Cl_C")); + Assert.assertTrue(map.containsKey("4_X_C")); + Assert.assertTrue(map.containsKey("4_Cl_C")); + } + + @Test + public void ignoredAtom() throws Exception { + IFingerprinter printer = new AtomPairs2DFingerprinter(); + IAtomContainer mol1 = parser.parseSmiles("[Te]1cccc1"); + Map map = printer.getRawFingerprint(mol1); + Assert.assertTrue(map.containsKey("1_C_C")); + Assert.assertTrue(map.containsKey("2_C_C")); } - + @Test public void testGetCountFingerprint() throws Exception { - IFingerprinter printer = new AtomPairs2DFingerprinter(); - IAtomContainer mol1 = parser.parseSmiles("cccccccccc"); - ICountFingerprint icfp = printer.getCountFingerprint(mol1); + IFingerprinter printer = new AtomPairs2DFingerprinter(); + IAtomContainer mol1 = parser.parseSmiles("cccccccccc"); + ICountFingerprint icfp = printer.getCountFingerprint(mol1); Assert.assertEquals(780, icfp.numOfPopulatedbins()); Assert.assertEquals(780, icfp.size()); - + } - + @Test public void testGetRawFingerprint() throws Exception { - IFingerprinter printer = new AtomPairs2DFingerprinter(); + IFingerprinter printer = new AtomPairs2DFingerprinter(); } }