diff --git a/storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Reader.java b/storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Reader.java index 241a03cb71a..c786d06338f 100644 --- a/storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Reader.java +++ b/storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Reader.java @@ -504,7 +504,7 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce hasQueryBonds = true; // also counts aromatic bond as query } else { int unpaired = outputContainer.getConnectedSingleElectronsCount(outputContainer.getAtom(i)); - applyMDLValenceModel(outputContainer.getAtom(i), valence + unpaired); + applyMDLValenceModel(outputContainer.getAtom(i), valence + unpaired, unpaired); } } @@ -542,13 +542,14 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce * 0 - this is the case when a query bond was read for an atom. * * @param atom the atom to apply the model to + * @param unpaired unpaired electron count * @param explicitValence the explicit valence (bond order sum) */ - private void applyMDLValenceModel(IAtom atom, int explicitValence) { + private void applyMDLValenceModel(IAtom atom, int explicitValence, int unpaired) { if (atom.getValency() != null) { if (atom.getValency() >= explicitValence) - atom.setImplicitHydrogenCount(atom.getValency() - explicitValence); + atom.setImplicitHydrogenCount(atom.getValency() - (explicitValence - unpaired)); else atom.setImplicitHydrogenCount(0); } else { diff --git a/storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Reader.java b/storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Reader.java index bd2c0621909..29061b3cff5 100644 --- a/storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Reader.java +++ b/storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Reader.java @@ -47,8 +47,10 @@ import org.openscience.cdk.interfaces.IChemObject; import org.openscience.cdk.interfaces.IChemObjectBuilder; import org.openscience.cdk.interfaces.IPseudoAtom; +import org.openscience.cdk.interfaces.ISingleElectron; import org.openscience.cdk.io.formats.IResourceFormat; import org.openscience.cdk.io.formats.MDLV3000Format; +import org.openscience.cdk.isomorphism.matchers.IQueryBond; import org.openscience.cdk.sgroup.Sgroup; import org.openscience.cdk.sgroup.SgroupType; import org.openscience.cdk.tools.ILoggingTool; @@ -151,6 +153,8 @@ public IAtomContainer readMolecule(IChemObjectBuilder builder) throws CDKExcepti } public IAtomContainer readConnectionTable(IChemObjectBuilder builder) throws CDKException { + + logger.info("Reading CTAB block"); IAtomContainer readData = builder.newInstance(IAtomContainer.class); boolean foundEND = false; @@ -175,6 +179,27 @@ public IAtomContainer readConnectionTable(IChemObjectBuilder builder) throws CDK } lastLine = readLine(); } + + for (IAtom atom : readData.atoms()) { + // XXX: slow method is slow + int valence = 0; + for (IBond bond : readData.getConnectedBondsList(atom)) { + if (bond instanceof IQueryBond || bond.getOrder() == IBond.Order.UNSET) { + valence = -1; + break; + } + else { + valence += bond.getOrder().numeric(); + } + } + if (valence < 0) { + logger.warn("Cannot set valence for atom with query bonds"); // also counts aromatic bond as query + } else { + final int unpaired = readData.getConnectedSingleElectronsCount(atom); + applyMDLValenceModel(atom, valence + unpaired, unpaired); + } + } + return readData; } @@ -310,13 +335,41 @@ public void readAtomBlock(IAtomContainer readData) throws CDKException { String key = keys.next(); String value = options.get(key); try { - if (key.equals("CHG")) { - int charge = Integer.parseInt(value); - if (charge != 0) { // zero is no charge specified - atom.setFormalCharge(charge); - } - } else { - logger.warn("Not parsing key: " + key); + switch (key) { + case "CHG": + int charge = Integer.parseInt(value); + if (charge != 0) { // zero is no charge specified + atom.setFormalCharge(charge); + } + break; + case "RAD": + int numElectons = MDLV2000Writer.SPIN_MULTIPLICITY.ofValue(Integer.parseInt(value)) + .getSingleElectrons(); + while (numElectons-- > 0) { + readData.addSingleElectron(readData.getBuilder().newInstance(ISingleElectron.class, atom)); + } + break; + case "VAL": + if (!(atom instanceof IPseudoAtom)) { + try { + int valence = Integer.parseInt(value); + if (valence != 0) { + //15 is defined as 0 in mol files + if (valence == 15) + atom.setValency(0); + else + atom.setValency(valence); + } + } catch (Exception exception) { + handleError("Could not parse valence information field", lineNumber, 0, 0, exception); + } + } else { + logger.error("Cannot set valence information for a non-element!"); + } + break; + default: + logger.warn("Not parsing key: " + key); + break; } } catch (Exception exception) { String error = "Error while parsing key/value " + key + "=" + value + ": " @@ -408,27 +461,32 @@ public void readBondBlock(IAtomContainer readData) throws CDKException { for (String key : options.keySet()) { String value = options.get(key); try { - if (key.equals("CFG")) { - int configuration = Integer.parseInt(value); - if (configuration == 0) { - bond.setStereo(IBond.Stereo.NONE); - } else if (configuration == 1) { - bond.setStereo(IBond.Stereo.UP); - } else if (configuration == 2) { - bond.setStereo((IBond.Stereo) CDKConstants.UNSET); - } else if (configuration == 3) { - bond.setStereo(IBond.Stereo.DOWN); - } - } else if (key.equals("ENDPTS")) { - String[] endptStr = value.split(" "); - // skip first value that is count - for (int i = 1; i < endptStr.length; i++) { - endpts.add(readData.getAtom(Integer.parseInt(endptStr[i]) - 1)); - } - } else if (key.equals("ATTACH")) { - attach = value; - } else { - logger.warn("Not parsing key: " + key); + switch (key) { + case "CFG": + int configuration = Integer.parseInt(value); + if (configuration == 0) { + bond.setStereo(IBond.Stereo.NONE); + } else if (configuration == 1) { + bond.setStereo(IBond.Stereo.UP); + } else if (configuration == 2) { + bond.setStereo((IBond.Stereo) CDKConstants.UNSET); + } else if (configuration == 3) { + bond.setStereo(IBond.Stereo.DOWN); + } + break; + case "ENDPTS": + String[] endptStr = value.split(" "); + // skip first value that is count + for (int i = 1; i < endptStr.length; i++) { + endpts.add(readData.getAtom(Integer.parseInt(endptStr[i]) - 1)); + } + break; + case "ATTACH": + attach = value; + break; + default: + logger.warn("Not parsing key: " + key); + break; } } catch (Exception exception) { String error = "Error while parsing key/value " + key + "=" + value + ": " @@ -626,4 +684,38 @@ public void close() throws IOException { private void initIOSettings() {} + /** + * Applies the MDL valence model to atoms using the explicit valence (bond + * order sum) and charge to determine the correct number of implicit + * hydrogens. The model is not applied if the explicit valence is less than + * 0 - this is the case when a query bond was read for an atom. + * + * @param atom the atom to apply the model to + * @param explicitValence the explicit valence (bond order sum) + */ + private void applyMDLValenceModel(IAtom atom, int explicitValence, int unpaired) { + + if (atom.getValency() != null) { + if (atom.getValency() >= explicitValence) + atom.setImplicitHydrogenCount(atom.getValency() - (explicitValence - unpaired)); + else + atom.setImplicitHydrogenCount(0); + } else { + Integer element = atom.getAtomicNumber(); + if (element == null) element = 0; + + Integer charge = atom.getFormalCharge(); + if (charge == null) charge = 0; + + int implicitValence = MDLValence.implicitValence(element, charge, explicitValence); + if (implicitValence < explicitValence) { + atom.setValency(explicitValence); + atom.setImplicitHydrogenCount(0); + } else { + atom.setValency(implicitValence); + atom.setImplicitHydrogenCount(implicitValence - explicitValence); + } + } + } + } diff --git a/storage/io/src/test/java/org/openscience/cdk/io/MDLV3000ReaderTest.java b/storage/io/src/test/java/org/openscience/cdk/io/MDLV3000ReaderTest.java index 931be5bcc6d..ad9f2909f65 100644 --- a/storage/io/src/test/java/org/openscience/cdk/io/MDLV3000ReaderTest.java +++ b/storage/io/src/test/java/org/openscience/cdk/io/MDLV3000ReaderTest.java @@ -137,4 +137,12 @@ public void testPseudoAtomLabels() throws Exception { assertThat(sgroups.get(0).getType(), is(SgroupType.ExtMulticenter)); } } + + @Test public void radicalsInCH3() throws Exception { + try (MDLV3000Reader reader = new MDLV3000Reader(getClass().getResourceAsStream("CH3.mol"))) { + IAtomContainer container = reader.read(new org.openscience.cdk.AtomContainer(0, 0, 0, 0)); + assertThat(container.getSingleElectronCount(), is(1)); + assertThat(container.getAtom(0).getImplicitHydrogenCount(), is(3)); + } + } } diff --git a/storage/io/src/test/resources/org/openscience/cdk/io/CH3.mol b/storage/io/src/test/resources/org/openscience/cdk/io/CH3.mol new file mode 100644 index 00000000000..6f0b39b92d2 --- /dev/null +++ b/storage/io/src/test/resources/org/openscience/cdk/io/CH3.mol @@ -0,0 +1,11 @@ + + + + 0 0 0 0 0 999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 1 0 0 0 0 +M V30 BEGIN ATOM +M V30 1 C 1.8704 8.5741 0 0 RAD=2 VAL=3 +M V30 END ATOM +M V30 END CTAB +M END