Skip to content

Commit

Permalink
Better radical and valence support in V3000.
Browse files Browse the repository at this point in the history
  • Loading branch information
johnmay committed Aug 11, 2016
1 parent 700f2c2 commit 04966f8
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
hasQueryBonds = true; // also counts aromatic bond as query
} else {
int unpaired = outputContainer.getConnectedSingleElectronsCount(outputContainer.getAtom(i));
applyMDLValenceModel(outputContainer.getAtom(i), valence + unpaired);
applyMDLValenceModel(outputContainer.getAtom(i), valence + unpaired, unpaired);
}
}

Expand Down Expand Up @@ -542,13 +542,14 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
* 0 - this is the case when a query bond was read for an atom.
*
* @param atom the atom to apply the model to
* @param unpaired unpaired electron count
* @param explicitValence the explicit valence (bond order sum)
*/
private void applyMDLValenceModel(IAtom atom, int explicitValence) {
private void applyMDLValenceModel(IAtom atom, int explicitValence, int unpaired) {

if (atom.getValency() != null) {
if (atom.getValency() >= explicitValence)
atom.setImplicitHydrogenCount(atom.getValency() - explicitValence);
atom.setImplicitHydrogenCount(atom.getValency() - (explicitValence - unpaired));
else
atom.setImplicitHydrogenCount(0);
} else {
Expand Down
148 changes: 120 additions & 28 deletions storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Reader.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,10 @@
import org.openscience.cdk.interfaces.IChemObject;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.interfaces.IPseudoAtom;
import org.openscience.cdk.interfaces.ISingleElectron;
import org.openscience.cdk.io.formats.IResourceFormat;
import org.openscience.cdk.io.formats.MDLV3000Format;
import org.openscience.cdk.isomorphism.matchers.IQueryBond;
import org.openscience.cdk.sgroup.Sgroup;
import org.openscience.cdk.sgroup.SgroupType;
import org.openscience.cdk.tools.ILoggingTool;
Expand Down Expand Up @@ -151,6 +153,8 @@ public IAtomContainer readMolecule(IChemObjectBuilder builder) throws CDKExcepti
}

public IAtomContainer readConnectionTable(IChemObjectBuilder builder) throws CDKException {


logger.info("Reading CTAB block");
IAtomContainer readData = builder.newInstance(IAtomContainer.class);
boolean foundEND = false;
Expand All @@ -175,6 +179,27 @@ public IAtomContainer readConnectionTable(IChemObjectBuilder builder) throws CDK
}
lastLine = readLine();
}

for (IAtom atom : readData.atoms()) {
// XXX: slow method is slow
int valence = 0;
for (IBond bond : readData.getConnectedBondsList(atom)) {
if (bond instanceof IQueryBond || bond.getOrder() == IBond.Order.UNSET) {
valence = -1;
break;
}
else {
valence += bond.getOrder().numeric();
}
}
if (valence < 0) {
logger.warn("Cannot set valence for atom with query bonds"); // also counts aromatic bond as query
} else {
final int unpaired = readData.getConnectedSingleElectronsCount(atom);
applyMDLValenceModel(atom, valence + unpaired, unpaired);
}
}

return readData;
}

Expand Down Expand Up @@ -310,13 +335,41 @@ public void readAtomBlock(IAtomContainer readData) throws CDKException {
String key = keys.next();
String value = options.get(key);
try {
if (key.equals("CHG")) {
int charge = Integer.parseInt(value);
if (charge != 0) { // zero is no charge specified
atom.setFormalCharge(charge);
}
} else {
logger.warn("Not parsing key: " + key);
switch (key) {
case "CHG":
int charge = Integer.parseInt(value);
if (charge != 0) { // zero is no charge specified
atom.setFormalCharge(charge);
}
break;
case "RAD":
int numElectons = MDLV2000Writer.SPIN_MULTIPLICITY.ofValue(Integer.parseInt(value))
.getSingleElectrons();
while (numElectons-- > 0) {
readData.addSingleElectron(readData.getBuilder().newInstance(ISingleElectron.class, atom));
}
break;
case "VAL":
if (!(atom instanceof IPseudoAtom)) {
try {
int valence = Integer.parseInt(value);
if (valence != 0) {
//15 is defined as 0 in mol files
if (valence == 15)
atom.setValency(0);
else
atom.setValency(valence);
}
} catch (Exception exception) {
handleError("Could not parse valence information field", lineNumber, 0, 0, exception);
}
} else {
logger.error("Cannot set valence information for a non-element!");
}
break;
default:
logger.warn("Not parsing key: " + key);
break;
}
} catch (Exception exception) {
String error = "Error while parsing key/value " + key + "=" + value + ": "
Expand Down Expand Up @@ -408,27 +461,32 @@ public void readBondBlock(IAtomContainer readData) throws CDKException {
for (String key : options.keySet()) {
String value = options.get(key);
try {
if (key.equals("CFG")) {
int configuration = Integer.parseInt(value);
if (configuration == 0) {
bond.setStereo(IBond.Stereo.NONE);
} else if (configuration == 1) {
bond.setStereo(IBond.Stereo.UP);
} else if (configuration == 2) {
bond.setStereo((IBond.Stereo) CDKConstants.UNSET);
} else if (configuration == 3) {
bond.setStereo(IBond.Stereo.DOWN);
}
} else if (key.equals("ENDPTS")) {
String[] endptStr = value.split(" ");
// skip first value that is count
for (int i = 1; i < endptStr.length; i++) {
endpts.add(readData.getAtom(Integer.parseInt(endptStr[i]) - 1));
}
} else if (key.equals("ATTACH")) {
attach = value;
} else {
logger.warn("Not parsing key: " + key);
switch (key) {
case "CFG":
int configuration = Integer.parseInt(value);
if (configuration == 0) {
bond.setStereo(IBond.Stereo.NONE);
} else if (configuration == 1) {
bond.setStereo(IBond.Stereo.UP);
} else if (configuration == 2) {
bond.setStereo((IBond.Stereo) CDKConstants.UNSET);
} else if (configuration == 3) {
bond.setStereo(IBond.Stereo.DOWN);
}
break;
case "ENDPTS":
String[] endptStr = value.split(" ");
// skip first value that is count
for (int i = 1; i < endptStr.length; i++) {
endpts.add(readData.getAtom(Integer.parseInt(endptStr[i]) - 1));
}
break;
case "ATTACH":
attach = value;
break;
default:
logger.warn("Not parsing key: " + key);
break;
}
} catch (Exception exception) {
String error = "Error while parsing key/value " + key + "=" + value + ": "
Expand Down Expand Up @@ -626,4 +684,38 @@ public void close() throws IOException {

private void initIOSettings() {}

/**
* Applies the MDL valence model to atoms using the explicit valence (bond
* order sum) and charge to determine the correct number of implicit
* hydrogens. The model is not applied if the explicit valence is less than
* 0 - this is the case when a query bond was read for an atom.
*
* @param atom the atom to apply the model to
* @param explicitValence the explicit valence (bond order sum)
*/
private void applyMDLValenceModel(IAtom atom, int explicitValence, int unpaired) {

if (atom.getValency() != null) {
if (atom.getValency() >= explicitValence)
atom.setImplicitHydrogenCount(atom.getValency() - (explicitValence - unpaired));
else
atom.setImplicitHydrogenCount(0);
} else {
Integer element = atom.getAtomicNumber();
if (element == null) element = 0;

Integer charge = atom.getFormalCharge();
if (charge == null) charge = 0;

int implicitValence = MDLValence.implicitValence(element, charge, explicitValence);
if (implicitValence < explicitValence) {
atom.setValency(explicitValence);
atom.setImplicitHydrogenCount(0);
} else {
atom.setValency(implicitValence);
atom.setImplicitHydrogenCount(implicitValence - explicitValence);
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -137,4 +137,12 @@ public void testPseudoAtomLabels() throws Exception {
assertThat(sgroups.get(0).getType(), is(SgroupType.ExtMulticenter));
}
}

@Test public void radicalsInCH3() throws Exception {
try (MDLV3000Reader reader = new MDLV3000Reader(getClass().getResourceAsStream("CH3.mol"))) {
IAtomContainer container = reader.read(new org.openscience.cdk.AtomContainer(0, 0, 0, 0));
assertThat(container.getSingleElectronCount(), is(1));
assertThat(container.getAtom(0).getImplicitHydrogenCount(), is(3));
}
}
}
11 changes: 11 additions & 0 deletions storage/io/src/test/resources/org/openscience/cdk/io/CH3.mol
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@



0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 1 0 0 0 0
M V30 BEGIN ATOM
M V30 1 C 1.8704 8.5741 0 0 RAD=2 VAL=3
M V30 END ATOM
M V30 END CTAB
M END

0 comments on commit 04966f8

Please sign in to comment.