Skip to content

Commit

Permalink
Extend standard-substructure for countable MACCS
Browse files Browse the repository at this point in the history
- Remove static modifier for the smarts pattern, to allow reuse of classe.
- Countable MACCS fingerprints are supported now as well, following the
  SMARTS list structure given before.
  • Loading branch information
bachi55 authored and johnmay committed Jun 24, 2018
1 parent 9406010 commit c83b514
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,46 @@
* @cdk.githash
*/
public class StandardSubstructureSets {

private static String[] smarts = null;

/**
* The functional groups.
*
* @return A set of the functional groups.
* @throws Exception if there is an error parsing SMILES for the functional groups
*/
public static String[] getFunctionalGroupSMARTS() throws Exception {
if (smarts != null) return smarts;
return readSMARTSPattern("org/openscience/cdk/fingerprint/data/SMARTS_InteLigand.txt");
}

/**
* Subset of the MACCS fingerprint definitions. The subset encompasses the pattern
* that are countable:
* <ul>
* <li>Patterns have obvious counting nature, <i>e.g., 6-Ring, C=O, etc.</i></li>
* <li>Patterns like <i>"Is there at least 1 of this and that?", "Are there at least 2 ..."</i> etc. are merged</li>
* <li>Patterns clearly corresponding to binary properties, <i>e.g., actinide group ([Ac,Th,Pa,...]), isotope, etc.,</i> have been removed.</li>
* </ul>
*
*
* @return Countable subset of the MACCS fingerprint definition
* @throws Exception if there is an error parsing SMILES patterns
*/
public static String[] getCountableMACCSSMARTS() throws Exception {
return readSMARTSPattern("org/openscience/cdk/fingerprint/data/SMARTS_countable_MACCS_keys.txt");
}

String filename = "org/openscience/cdk/fingerprint/data/SMARTS_InteLigand.txt";
/**
* Load a list of SMARTS patterns from the specified file.
*
* Each line in the file corresponds to a pattern with the following structure:
* PATTERN_DESCRIPTION: SMARTS_PATTERN, <i>e.g., Thioketone: [#6][CX3](=[SX1])[#6]</i>
*
* Empty lines and lines starting with a "#" are skipped.
*
* @param filename list of the SMARTS pattern to be loaded
* @return list of strings containing the loaded SMARTS pattern
* @throws Exception if there is an error parsing SMILES patterns
*/
private static String[] readSMARTSPattern(String filename) throws Exception {
InputStream ins = StandardSubstructureSets.class.getClassLoader().getResourceAsStream(filename);
BufferedReader reader = new BufferedReader(new InputStreamReader(ins));

Expand All @@ -43,9 +70,6 @@ public static String[] getFunctionalGroupSMARTS() throws Exception {
s.append(toks[toks.length - 1]);
tmp.add(s.toString().trim());
}
smarts = tmp.toArray(new String[]{});
return smarts;

return tmp.toArray(new String[]{});
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,12 @@ public void testGetFunctionalGroupSubstructureSet() throws Exception {
Assert.assertEquals(307, smarts.length);
}

@Test
public void testGetCountableMACCSSMARTSSubstructureSet() throws Exception {
String[] smarts = null;
smarts = StandardSubstructureSets.getCountableMACCSSMARTS();
Assert.assertNotNull(smarts);
Assert.assertEquals(141, smarts.length); // currently two smarts are ignored!
}

}

0 comments on commit c83b514

Please sign in to comment.