diff --git a/storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Reader.java b/storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Reader.java index 4a480131565..c786d06338f 100644 --- a/storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Reader.java +++ b/storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Reader.java @@ -41,8 +41,6 @@ import org.openscience.cdk.interfaces.IIsotope; import org.openscience.cdk.interfaces.IPseudoAtom; import org.openscience.cdk.interfaces.ISingleElectron; -import org.openscience.cdk.interfaces.IStereoElement; -import org.openscience.cdk.interfaces.ITetrahedralChirality; import org.openscience.cdk.interfaces.ITetrahedralChirality.Stereo; import org.openscience.cdk.io.formats.IResourceFormat; import org.openscience.cdk.io.formats.MDLV2000Format; @@ -506,7 +504,7 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce hasQueryBonds = true; // also counts aromatic bond as query } else { int unpaired = outputContainer.getConnectedSingleElectronsCount(outputContainer.getAtom(i)); - applyMDLValenceModel(outputContainer.getAtom(i), valence + unpaired); + applyMDLValenceModel(outputContainer.getAtom(i), valence + unpaired, unpaired); } } @@ -544,13 +542,14 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce * 0 - this is the case when a query bond was read for an atom. * * @param atom the atom to apply the model to + * @param unpaired unpaired electron count * @param explicitValence the explicit valence (bond order sum) */ - private void applyMDLValenceModel(IAtom atom, int explicitValence) { + private void applyMDLValenceModel(IAtom atom, int explicitValence, int unpaired) { if (atom.getValency() != null) { if (atom.getValency() >= explicitValence) - atom.setImplicitHydrogenCount(atom.getValency() - explicitValence); + atom.setImplicitHydrogenCount(atom.getValency() - (explicitValence - unpaired)); else atom.setImplicitHydrogenCount(0); } else { @@ -1886,24 +1885,11 @@ private void readPropertiesSlow(BufferedReader input, IAtomContainer container, StringTokenizer st = new StringTokenizer(line.substring(9)); for (int i = 1; i <= infoCount; i++) { int atomNumber = Integer.parseInt(st.nextToken().trim()); - int spinMultiplicity = Integer.parseInt(st.nextToken().trim()); - MDLV2000Writer.SPIN_MULTIPLICITY spin = MDLV2000Writer.SPIN_MULTIPLICITY.NONE; - if (spinMultiplicity > 0) { + int rad = Integer.parseInt(st.nextToken().trim()); + MDLV2000Writer.SPIN_MULTIPLICITY spin = MDLV2000Writer.SPIN_MULTIPLICITY.None; + if (rad > 0) { IAtom radical = container.getAtom(atomNumber - 1); - switch (spinMultiplicity) { - case 1: - spin = MDLV2000Writer.SPIN_MULTIPLICITY.DOUBLET; - break; - case 2: - spin = MDLV2000Writer.SPIN_MULTIPLICITY.SINGLET; - break; - case 3: - spin = MDLV2000Writer.SPIN_MULTIPLICITY.TRIPLET; - break; - default: - logger.debug("Invalid spin multiplicity found: " + spinMultiplicity); - break; - } + spin = MDLV2000Writer.SPIN_MULTIPLICITY.ofValue(rad); for (int j = 0; j < spin.getSingleElectrons(); j++) { container.addSingleElectron(container.getBuilder().newInstance(ISingleElectron.class, radical)); diff --git a/storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Writer.java b/storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Writer.java index 143fb8d5f9f..a2ec44bc818 100644 --- a/storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Writer.java +++ b/storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Writer.java @@ -24,35 +24,12 @@ */ package org.openscience.cdk.io; -import java.io.BufferedWriter; -import java.io.IOException; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.StringWriter; -import java.io.Writer; -import java.nio.charset.StandardCharsets; -import java.text.NumberFormat; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - import org.openscience.cdk.CDKConstants; import org.openscience.cdk.config.Isotopes; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; -import org.openscience.cdk.interfaces.IBond.Order; import org.openscience.cdk.interfaces.IChemFile; import org.openscience.cdk.interfaces.IChemModel; import org.openscience.cdk.interfaces.IChemObject; @@ -74,6 +51,28 @@ import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.manipulator.ChemFileManipulator; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.StringWriter; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.text.NumberFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + /** * Writes MDL molfiles, which contains a single molecule (see {@cdk.cite DAL92}). * For writing a MDL molfile you can this code: @@ -118,7 +117,10 @@ public class MDLV2000Writer extends DefaultChemObjectWriter { */ public enum SPIN_MULTIPLICITY { - NONE(0, 0), SINGLET(2, 1), DOUBLET(1, 2), TRIPLET(3, 2); + None(0, 0), + Monovalent(2, 1), + DivalentSinglet(1, 2), + DivalentTriplet(3, 2); // the radical SDF value private final int value; @@ -158,13 +160,13 @@ public int getSingleElectrons() { public static SPIN_MULTIPLICITY ofValue(int value) throws CDKException { switch (value) { case 0: - return NONE; + return None; case 1: - return DOUBLET; + return DivalentSinglet; case 2: - return SINGLET; + return Monovalent; case 3: - return TRIPLET; + return DivalentTriplet; default: throw new CDKException("unknown spin multiplicity: " + value); } @@ -712,13 +714,11 @@ else if (valence > 0 && valence < 15) case 0: continue; case 1: - atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.SINGLET); + atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.Monovalent); break; case 2: - atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.DOUBLET); - break; - case 3: - atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.TRIPLET); + // information loss, divalent but singlet or triplet? + atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.DivalentSinglet); break; default: logger.debug("Invalid number of radicals found: " + eCount); diff --git a/storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Reader.java b/storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Reader.java index bd2c0621909..29061b3cff5 100644 --- a/storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Reader.java +++ b/storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Reader.java @@ -47,8 +47,10 @@ import org.openscience.cdk.interfaces.IChemObject; import org.openscience.cdk.interfaces.IChemObjectBuilder; import org.openscience.cdk.interfaces.IPseudoAtom; +import org.openscience.cdk.interfaces.ISingleElectron; import org.openscience.cdk.io.formats.IResourceFormat; import org.openscience.cdk.io.formats.MDLV3000Format; +import org.openscience.cdk.isomorphism.matchers.IQueryBond; import org.openscience.cdk.sgroup.Sgroup; import org.openscience.cdk.sgroup.SgroupType; import org.openscience.cdk.tools.ILoggingTool; @@ -151,6 +153,8 @@ public IAtomContainer readMolecule(IChemObjectBuilder builder) throws CDKExcepti } public IAtomContainer readConnectionTable(IChemObjectBuilder builder) throws CDKException { + + logger.info("Reading CTAB block"); IAtomContainer readData = builder.newInstance(IAtomContainer.class); boolean foundEND = false; @@ -175,6 +179,27 @@ public IAtomContainer readConnectionTable(IChemObjectBuilder builder) throws CDK } lastLine = readLine(); } + + for (IAtom atom : readData.atoms()) { + // XXX: slow method is slow + int valence = 0; + for (IBond bond : readData.getConnectedBondsList(atom)) { + if (bond instanceof IQueryBond || bond.getOrder() == IBond.Order.UNSET) { + valence = -1; + break; + } + else { + valence += bond.getOrder().numeric(); + } + } + if (valence < 0) { + logger.warn("Cannot set valence for atom with query bonds"); // also counts aromatic bond as query + } else { + final int unpaired = readData.getConnectedSingleElectronsCount(atom); + applyMDLValenceModel(atom, valence + unpaired, unpaired); + } + } + return readData; } @@ -310,13 +335,41 @@ public void readAtomBlock(IAtomContainer readData) throws CDKException { String key = keys.next(); String value = options.get(key); try { - if (key.equals("CHG")) { - int charge = Integer.parseInt(value); - if (charge != 0) { // zero is no charge specified - atom.setFormalCharge(charge); - } - } else { - logger.warn("Not parsing key: " + key); + switch (key) { + case "CHG": + int charge = Integer.parseInt(value); + if (charge != 0) { // zero is no charge specified + atom.setFormalCharge(charge); + } + break; + case "RAD": + int numElectons = MDLV2000Writer.SPIN_MULTIPLICITY.ofValue(Integer.parseInt(value)) + .getSingleElectrons(); + while (numElectons-- > 0) { + readData.addSingleElectron(readData.getBuilder().newInstance(ISingleElectron.class, atom)); + } + break; + case "VAL": + if (!(atom instanceof IPseudoAtom)) { + try { + int valence = Integer.parseInt(value); + if (valence != 0) { + //15 is defined as 0 in mol files + if (valence == 15) + atom.setValency(0); + else + atom.setValency(valence); + } + } catch (Exception exception) { + handleError("Could not parse valence information field", lineNumber, 0, 0, exception); + } + } else { + logger.error("Cannot set valence information for a non-element!"); + } + break; + default: + logger.warn("Not parsing key: " + key); + break; } } catch (Exception exception) { String error = "Error while parsing key/value " + key + "=" + value + ": " @@ -408,27 +461,32 @@ public void readBondBlock(IAtomContainer readData) throws CDKException { for (String key : options.keySet()) { String value = options.get(key); try { - if (key.equals("CFG")) { - int configuration = Integer.parseInt(value); - if (configuration == 0) { - bond.setStereo(IBond.Stereo.NONE); - } else if (configuration == 1) { - bond.setStereo(IBond.Stereo.UP); - } else if (configuration == 2) { - bond.setStereo((IBond.Stereo) CDKConstants.UNSET); - } else if (configuration == 3) { - bond.setStereo(IBond.Stereo.DOWN); - } - } else if (key.equals("ENDPTS")) { - String[] endptStr = value.split(" "); - // skip first value that is count - for (int i = 1; i < endptStr.length; i++) { - endpts.add(readData.getAtom(Integer.parseInt(endptStr[i]) - 1)); - } - } else if (key.equals("ATTACH")) { - attach = value; - } else { - logger.warn("Not parsing key: " + key); + switch (key) { + case "CFG": + int configuration = Integer.parseInt(value); + if (configuration == 0) { + bond.setStereo(IBond.Stereo.NONE); + } else if (configuration == 1) { + bond.setStereo(IBond.Stereo.UP); + } else if (configuration == 2) { + bond.setStereo((IBond.Stereo) CDKConstants.UNSET); + } else if (configuration == 3) { + bond.setStereo(IBond.Stereo.DOWN); + } + break; + case "ENDPTS": + String[] endptStr = value.split(" "); + // skip first value that is count + for (int i = 1; i < endptStr.length; i++) { + endpts.add(readData.getAtom(Integer.parseInt(endptStr[i]) - 1)); + } + break; + case "ATTACH": + attach = value; + break; + default: + logger.warn("Not parsing key: " + key); + break; } } catch (Exception exception) { String error = "Error while parsing key/value " + key + "=" + value + ": " @@ -626,4 +684,38 @@ public void close() throws IOException { private void initIOSettings() {} + /** + * Applies the MDL valence model to atoms using the explicit valence (bond + * order sum) and charge to determine the correct number of implicit + * hydrogens. The model is not applied if the explicit valence is less than + * 0 - this is the case when a query bond was read for an atom. + * + * @param atom the atom to apply the model to + * @param explicitValence the explicit valence (bond order sum) + */ + private void applyMDLValenceModel(IAtom atom, int explicitValence, int unpaired) { + + if (atom.getValency() != null) { + if (atom.getValency() >= explicitValence) + atom.setImplicitHydrogenCount(atom.getValency() - (explicitValence - unpaired)); + else + atom.setImplicitHydrogenCount(0); + } else { + Integer element = atom.getAtomicNumber(); + if (element == null) element = 0; + + Integer charge = atom.getFormalCharge(); + if (charge == null) charge = 0; + + int implicitValence = MDLValence.implicitValence(element, charge, explicitValence); + if (implicitValence < explicitValence) { + atom.setValency(explicitValence); + atom.setImplicitHydrogenCount(0); + } else { + atom.setValency(implicitValence); + atom.setImplicitHydrogenCount(implicitValence - explicitValence); + } + } + } + } diff --git a/storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Writer.java b/storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Writer.java index c0021a98151..9472eaf7db6 100644 --- a/storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Writer.java +++ b/storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Writer.java @@ -221,7 +221,16 @@ private void writeAtomBlock(IAtomContainer mol, IAtom[] atoms, Map