Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct MDL radical to/from electron count semantics. #222

Merged
merged 2 commits into from
Aug 16, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@
import org.openscience.cdk.interfaces.IIsotope;
import org.openscience.cdk.interfaces.IPseudoAtom;
import org.openscience.cdk.interfaces.ISingleElectron;
import org.openscience.cdk.interfaces.IStereoElement;
import org.openscience.cdk.interfaces.ITetrahedralChirality;
import org.openscience.cdk.interfaces.ITetrahedralChirality.Stereo;
import org.openscience.cdk.io.formats.IResourceFormat;
import org.openscience.cdk.io.formats.MDLV2000Format;
Expand Down Expand Up @@ -506,7 +504,7 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
hasQueryBonds = true; // also counts aromatic bond as query
} else {
int unpaired = outputContainer.getConnectedSingleElectronsCount(outputContainer.getAtom(i));
applyMDLValenceModel(outputContainer.getAtom(i), valence + unpaired);
applyMDLValenceModel(outputContainer.getAtom(i), valence + unpaired, unpaired);
}
}

Expand Down Expand Up @@ -544,13 +542,14 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
* 0 - this is the case when a query bond was read for an atom.
*
* @param atom the atom to apply the model to
* @param unpaired unpaired electron count
* @param explicitValence the explicit valence (bond order sum)
*/
private void applyMDLValenceModel(IAtom atom, int explicitValence) {
private void applyMDLValenceModel(IAtom atom, int explicitValence, int unpaired) {

if (atom.getValency() != null) {
if (atom.getValency() >= explicitValence)
atom.setImplicitHydrogenCount(atom.getValency() - explicitValence);
atom.setImplicitHydrogenCount(atom.getValency() - (explicitValence - unpaired));
else
atom.setImplicitHydrogenCount(0);
} else {
Expand Down Expand Up @@ -1886,24 +1885,11 @@ private void readPropertiesSlow(BufferedReader input, IAtomContainer container,
StringTokenizer st = new StringTokenizer(line.substring(9));
for (int i = 1; i <= infoCount; i++) {
int atomNumber = Integer.parseInt(st.nextToken().trim());
int spinMultiplicity = Integer.parseInt(st.nextToken().trim());
MDLV2000Writer.SPIN_MULTIPLICITY spin = MDLV2000Writer.SPIN_MULTIPLICITY.NONE;
if (spinMultiplicity > 0) {
int rad = Integer.parseInt(st.nextToken().trim());
MDLV2000Writer.SPIN_MULTIPLICITY spin = MDLV2000Writer.SPIN_MULTIPLICITY.None;
if (rad > 0) {
IAtom radical = container.getAtom(atomNumber - 1);
switch (spinMultiplicity) {
case 1:
spin = MDLV2000Writer.SPIN_MULTIPLICITY.DOUBLET;
break;
case 2:
spin = MDLV2000Writer.SPIN_MULTIPLICITY.SINGLET;
break;
case 3:
spin = MDLV2000Writer.SPIN_MULTIPLICITY.TRIPLET;
break;
default:
logger.debug("Invalid spin multiplicity found: " + spinMultiplicity);
break;
}
spin = MDLV2000Writer.SPIN_MULTIPLICITY.ofValue(rad);
for (int j = 0; j < spin.getSingleElectrons(); j++) {
container.addSingleElectron(container.getBuilder().newInstance(ISingleElectron.class,
radical));
Expand Down
66 changes: 33 additions & 33 deletions storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Writer.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,35 +24,12 @@
*/
package org.openscience.cdk.io;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.config.Isotopes;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IBond.Order;
import org.openscience.cdk.interfaces.IChemFile;
import org.openscience.cdk.interfaces.IChemModel;
import org.openscience.cdk.interfaces.IChemObject;
Expand All @@ -74,6 +51,28 @@
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.tools.manipulator.ChemFileManipulator;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Writes MDL molfiles, which contains a single molecule (see {@cdk.cite DAL92}).
* For writing a MDL molfile you can this code:
Expand Down Expand Up @@ -118,7 +117,10 @@ public class MDLV2000Writer extends DefaultChemObjectWriter {
*/
public enum SPIN_MULTIPLICITY {

NONE(0, 0), SINGLET(2, 1), DOUBLET(1, 2), TRIPLET(3, 2);
None(0, 0),
Monovalent(2, 1),
DivalentSinglet(1, 2),
DivalentTriplet(3, 2);

// the radical SDF value
private final int value;
Expand Down Expand Up @@ -158,13 +160,13 @@ public int getSingleElectrons() {
public static SPIN_MULTIPLICITY ofValue(int value) throws CDKException {
switch (value) {
case 0:
return NONE;
return None;
case 1:
return DOUBLET;
return DivalentSinglet;
case 2:
return SINGLET;
return Monovalent;
case 3:
return TRIPLET;
return DivalentTriplet;
default:
throw new CDKException("unknown spin multiplicity: " + value);
}
Expand Down Expand Up @@ -712,13 +714,11 @@ else if (valence > 0 && valence < 15)
case 0:
continue;
case 1:
atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.SINGLET);
atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.Monovalent);
break;
case 2:
atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.DOUBLET);
break;
case 3:
atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.TRIPLET);
// information loss, divalent but singlet or triplet?
atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.DivalentSinglet);
break;
default:
logger.debug("Invalid number of radicals found: " + eCount);
Expand Down
148 changes: 120 additions & 28 deletions storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Reader.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,10 @@
import org.openscience.cdk.interfaces.IChemObject;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.interfaces.IPseudoAtom;
import org.openscience.cdk.interfaces.ISingleElectron;
import org.openscience.cdk.io.formats.IResourceFormat;
import org.openscience.cdk.io.formats.MDLV3000Format;
import org.openscience.cdk.isomorphism.matchers.IQueryBond;
import org.openscience.cdk.sgroup.Sgroup;
import org.openscience.cdk.sgroup.SgroupType;
import org.openscience.cdk.tools.ILoggingTool;
Expand Down Expand Up @@ -151,6 +153,8 @@ public IAtomContainer readMolecule(IChemObjectBuilder builder) throws CDKExcepti
}

public IAtomContainer readConnectionTable(IChemObjectBuilder builder) throws CDKException {


logger.info("Reading CTAB block");
IAtomContainer readData = builder.newInstance(IAtomContainer.class);
boolean foundEND = false;
Expand All @@ -175,6 +179,27 @@ public IAtomContainer readConnectionTable(IChemObjectBuilder builder) throws CDK
}
lastLine = readLine();
}

for (IAtom atom : readData.atoms()) {
// XXX: slow method is slow
int valence = 0;
for (IBond bond : readData.getConnectedBondsList(atom)) {
if (bond instanceof IQueryBond || bond.getOrder() == IBond.Order.UNSET) {
valence = -1;
break;
}
else {
valence += bond.getOrder().numeric();
}
}
if (valence < 0) {
logger.warn("Cannot set valence for atom with query bonds"); // also counts aromatic bond as query
} else {
final int unpaired = readData.getConnectedSingleElectronsCount(atom);
applyMDLValenceModel(atom, valence + unpaired, unpaired);
}
}

return readData;
}

Expand Down Expand Up @@ -310,13 +335,41 @@ public void readAtomBlock(IAtomContainer readData) throws CDKException {
String key = keys.next();
String value = options.get(key);
try {
if (key.equals("CHG")) {
int charge = Integer.parseInt(value);
if (charge != 0) { // zero is no charge specified
atom.setFormalCharge(charge);
}
} else {
logger.warn("Not parsing key: " + key);
switch (key) {
case "CHG":
int charge = Integer.parseInt(value);
if (charge != 0) { // zero is no charge specified
atom.setFormalCharge(charge);
}
break;
case "RAD":
int numElectons = MDLV2000Writer.SPIN_MULTIPLICITY.ofValue(Integer.parseInt(value))
.getSingleElectrons();
while (numElectons-- > 0) {
readData.addSingleElectron(readData.getBuilder().newInstance(ISingleElectron.class, atom));
}
break;
case "VAL":
if (!(atom instanceof IPseudoAtom)) {
try {
int valence = Integer.parseInt(value);
if (valence != 0) {
//15 is defined as 0 in mol files
if (valence == 15)
atom.setValency(0);
else
atom.setValency(valence);
}
} catch (Exception exception) {
handleError("Could not parse valence information field", lineNumber, 0, 0, exception);
}
} else {
logger.error("Cannot set valence information for a non-element!");
}
break;
default:
logger.warn("Not parsing key: " + key);
break;
}
} catch (Exception exception) {
String error = "Error while parsing key/value " + key + "=" + value + ": "
Expand Down Expand Up @@ -408,27 +461,32 @@ public void readBondBlock(IAtomContainer readData) throws CDKException {
for (String key : options.keySet()) {
String value = options.get(key);
try {
if (key.equals("CFG")) {
int configuration = Integer.parseInt(value);
if (configuration == 0) {
bond.setStereo(IBond.Stereo.NONE);
} else if (configuration == 1) {
bond.setStereo(IBond.Stereo.UP);
} else if (configuration == 2) {
bond.setStereo((IBond.Stereo) CDKConstants.UNSET);
} else if (configuration == 3) {
bond.setStereo(IBond.Stereo.DOWN);
}
} else if (key.equals("ENDPTS")) {
String[] endptStr = value.split(" ");
// skip first value that is count
for (int i = 1; i < endptStr.length; i++) {
endpts.add(readData.getAtom(Integer.parseInt(endptStr[i]) - 1));
}
} else if (key.equals("ATTACH")) {
attach = value;
} else {
logger.warn("Not parsing key: " + key);
switch (key) {
case "CFG":
int configuration = Integer.parseInt(value);
if (configuration == 0) {
bond.setStereo(IBond.Stereo.NONE);
} else if (configuration == 1) {
bond.setStereo(IBond.Stereo.UP);
} else if (configuration == 2) {
bond.setStereo((IBond.Stereo) CDKConstants.UNSET);
} else if (configuration == 3) {
bond.setStereo(IBond.Stereo.DOWN);
}
break;
case "ENDPTS":
String[] endptStr = value.split(" ");
// skip first value that is count
for (int i = 1; i < endptStr.length; i++) {
endpts.add(readData.getAtom(Integer.parseInt(endptStr[i]) - 1));
}
break;
case "ATTACH":
attach = value;
break;
default:
logger.warn("Not parsing key: " + key);
break;
}
} catch (Exception exception) {
String error = "Error while parsing key/value " + key + "=" + value + ": "
Expand Down Expand Up @@ -626,4 +684,38 @@ public void close() throws IOException {

private void initIOSettings() {}

/**
* Applies the MDL valence model to atoms using the explicit valence (bond
* order sum) and charge to determine the correct number of implicit
* hydrogens. The model is not applied if the explicit valence is less than
* 0 - this is the case when a query bond was read for an atom.
*
* @param atom the atom to apply the model to
* @param explicitValence the explicit valence (bond order sum)
*/
private void applyMDLValenceModel(IAtom atom, int explicitValence, int unpaired) {

if (atom.getValency() != null) {
if (atom.getValency() >= explicitValence)
atom.setImplicitHydrogenCount(atom.getValency() - (explicitValence - unpaired));
else
atom.setImplicitHydrogenCount(0);
} else {
Integer element = atom.getAtomicNumber();
if (element == null) element = 0;

Integer charge = atom.getFormalCharge();
if (charge == null) charge = 0;

int implicitValence = MDLValence.implicitValence(element, charge, explicitValence);
if (implicitValence < explicitValence) {
atom.setValency(explicitValence);
atom.setImplicitHydrogenCount(0);
} else {
atom.setValency(implicitValence);
atom.setImplicitHydrogenCount(implicitValence - explicitValence);
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,16 @@ private void writeAtomBlock(IAtomContainer mol, IAtom[] atoms, Map<IChemObject,
final int chg = nullAsZero(atom.getFormalCharge());
final int mass = nullAsZero(atom.getMassNumber());
final int hcnt = nullAsZero(atom.getImplicitHydrogenCount());
final int rad = mol.getConnectedSingleElectronsCount(atom);
final int elec = mol.getConnectedSingleElectronsCount(atom);
int rad = 0;
switch (elec) {
case 1: // 2
rad = MDLV2000Writer.SPIN_MULTIPLICITY.Monovalent.getValue();
break;
case 2: // 1 or 3? Information loss as to which
rad = MDLV2000Writer.SPIN_MULTIPLICITY.DivalentSinglet.getValue();
break;
}

int expVal = 0;
for (IBond bond : mol.getConnectedBondsList(atom)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,4 +137,12 @@ public void testPseudoAtomLabels() throws Exception {
assertThat(sgroups.get(0).getType(), is(SgroupType.ExtMulticenter));
}
}

@Test public void radicalsInCH3() throws Exception {
try (MDLV3000Reader reader = new MDLV3000Reader(getClass().getResourceAsStream("CH3.mol"))) {
IAtomContainer container = reader.read(new org.openscience.cdk.AtomContainer(0, 0, 0, 0));
assertThat(container.getSingleElectronCount(), is(1));
assertThat(container.getAtom(0).getImplicitHydrogenCount(), is(3));
}
}
}
Loading