Skip to content

Commit

Permalink
Extract and generalise SMARTS substructure functionality to a separat…
Browse files Browse the repository at this point in the history
…e class. Copies exact functionality ATM.
  • Loading branch information
johnmay committed Oct 6, 2016
1 parent 93922bf commit a43a478
Show file tree
Hide file tree
Showing 4 changed files with 472 additions and 229 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -508,235 +508,7 @@ private void considerNewFP(FP newFP) {
if (fp.iteration < newFP.iteration || fp.hashCode < newFP.hashCode) return;
fplist.set(hit, newFP);
}

// ------------ Generation of fingerprint smarts ------------

//Helper variables for getFPSmarts() function
private HashMap<Integer, AtomNode> nodes = new HashMap<Integer, AtomNode>();
private HashMap<Integer, String> atomIndexes = new HashMap<Integer, String>();
private List<Integer> traversedAtoms = new ArrayList<Integer>();
private List<Integer> ringClosures = new ArrayList<Integer>();
private FP curFP = null;
private IAtomContainer curFPMolecule = null;
private int curIndex;

private static class AtomNode
{
private int parent;
private int atom;
}

/**
* Determines the structural fragment corresponding to particular FP object
* and returns it as SMARTS notation.
* This function must be called immediately after calculate() function since it uses the
* internal state of CircularFingerprint object.
*
* @param fp - the fingerprint
* @param molecule - the molecule for which the fingerprints were calculated
* @return the fragment as smarts/smiles
*/
public String getFPSmarts(FP fp, IAtomContainer molecule)
{
if (fp.atoms == null)
return null;

int n = fp.atoms.length;
if (n == 0)
return null;

curFP = fp;
curFPMolecule = molecule;

nodes.clear();
traversedAtoms.clear();
atomIndexes.clear();
ringClosures.clear();
curIndex = 1;

//Set initial node
AtomNode node = new AtomNode();
node.parent = -1;
node.atom = fp.atoms[0];
traversedAtoms.add(node.atom);
nodes.put(node.atom, node);

return nodeToString(fp.atoms[0]); //traverse recursively all layers of the atom
}


/**
* Recursive generation of a smarts string for particular AtomNode
* @param atom index of atom
* @return SMARTS expression
*/
private String nodeToString(int atom)
{
StringBuilder sb = new StringBuilder();
AtomNode curNode = nodes.get(atom);
List<String> branches = new ArrayList<String>();

for (int i = 0; i < atomAdj[atom].length; i++)
{
int neighborAt = atomAdj[atom][i];

if (neighborAt == curNode.parent)
continue; //This is the parent atom (it is already traversed)

int neighborBo = bondAdj[atom][i];

AtomNode neighborNode = nodes.get(neighborAt);
if (neighborNode == null) // This node has not been registered yet
{
//Check for external atom (e.g. it is a neighbor atom which is not in the fp.atoms[] array)
if (Ints.indexOf(curFP.atoms, neighborAt) == -1)
{
String bond_str = "";
if (bondArom[neighborBo])
{
//aromatic bond is represented as ""
branches.add(":a");
}
else
{
bond_str = bondToString1(bondOrder[neighborBo]);
branches.add(bond_str + "*");
}
continue;
}

// Registering a new Node and a new branch
AtomNode newNode = new AtomNode();
newNode.atom = neighborAt;
newNode.parent = atom;
traversedAtoms.add(newNode.atom);
nodes.put(newNode.atom, newNode);

String bond_str = "";
if (!bondArom[neighborBo]) //aromatic bond is represented as ""
bond_str = bondToString1(bondOrder[neighborBo]);

//recursion
branches.add(bond_str + nodeToString(neighborAt));
}
else
{ // Handle ring closure: adding indexes to both atoms

if (!ringClosures.contains(neighborBo)) {
ringClosures.add(neighborBo);
String ind = ((curIndex > 9) ? "%" : "") + curIndex;
String bond_str = "";
if (!bondArom[neighborBo]) //aromatic bond is represented as ""
bond_str = bondToString1(bondOrder[neighborBo]);
addIndexToAtom(bond_str + ind, atom);
addIndexToAtom(ind, neighborAt);
curIndex++;
}
}
}

// Add atom from the current node
sb.append(getAtomSmarts(curFPMolecule, atom));

// Add indexes
if (atomIndexes.containsKey(atom))
sb.append(atomIndexes.get(atom));

// Add branches
if (branches.size() == 0)
return (sb.toString());

for (int i = 0; i < branches.size() - 1; i++)
sb.append("(").append(branches.get(i)).append(")");
sb.append(branches.get(branches.size() - 1));

return sb.toString();
}

private void addIndexToAtom(String ind, final int atom)
{
final String curr = atomIndexes.get(atom);
if (curr != null)
ind = curr + ind;
atomIndexes.put(atom, ind);
}


private String bondToString1(int boOrder)
{
switch (boOrder)
{
//single bond ('-') is coded as "" by default
case 2:
return "=";
case 3:
return "#";
default:
return "";
}
}

private String getAtomSmarts(IAtomContainer mol, int atNum)
{
IAtom at = mol.getAtom(atNum);
Integer chrg = at.getFormalCharge();
String atStr = at.getSymbol();

boolean complex = false;

String chStr = "";
if (chrg != null)
if (chrg != 0) // +0 ?
{
chStr = getChargeSmartsStr(chrg);
complex = true;
}

switch (at.getAtomicNumber()) {
case 5: // B
case 6: // C
case 7: // N
case 8: // O
case 15: // P
case 16: // S
case 9: // F
case 17: // Cl
case 35: // Br
case 53: // I
break;
default:
complex = true;
break;
}


//Handle aromaticity
if (atomArom[atNum])
atStr = atStr.toLowerCase();

if (complex)
atStr = "[" + atStr + chStr + "]";

return atStr;
}

private String getChargeSmartsStr(int chrg)
{
if (chrg == -1)
return "-";
if (chrg == +1)
return "+";

if (chrg > 0)
return "+" + chrg;
else
if (chrg < 0)
return "" + chrg;
else
return ""; // case chrg == 0
}


// ------------ molecule analysis: cached cheminformatics ------------

// summarize preliminary information about the molecular structure, to make sure the rest all goes quickly
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.openscience.cdk.fingerprint.CircularFingerprinter.FP;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.silent.SilentChemObjectBuilder;
import org.openscience.cdk.smarts.SubstructureSmarts;
import org.openscience.cdk.smiles.SmilesParser;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
Expand Down Expand Up @@ -110,12 +111,14 @@ private void checkFPSmartsForMolecule(String moleculeSmiles,

CircularFingerprinter circ = new CircularFingerprinter();
circ.calculate(mol);
SubstructureSmarts subsmarts = new SubstructureSmarts(mol);
subsmarts.setIncludePeripheralBonds(true);
int numFP = circ.getFPCount();

Set<String> actual = new HashSet<>();
for (int i = 0; i < numFP; i++) {
FP fp = circ.getFP(i);
actual.add(circ.getFPSmarts(fp, mol));
actual.add(subsmarts.generate(fp.atoms));
}

assertThat(actual, everyItem(isIn(expected)));
Expand Down

0 comments on commit a43a478

Please sign in to comment.