Permalink
Browse files

Updated MF manipulator to properly evaluate total mass and generate f…

…ormula string correctly, even when there are multiple isotopes of the same element. Added unit test. Simplified molecular formula to properly use the isotopes map as an actual map

Signed-off-by: Egon Willighagen <egonw@users.sourceforge.net>
  • Loading branch information...
rajarshi authored and egonw committed Oct 16, 2010
1 parent e280bc2 commit 1e54d9368b5bf7856c8a14ba2958a9ba5bb31051
@@ -162,7 +162,7 @@ public Integer getCharge() {
*/
@TestMethod("testGetIsotopeCount_IIsotope")
public int getIsotopeCount(IIsotope isotope) {
- return !contains(isotope) ? 0 : isotopes.get(getIsotope(isotope));
+ return !isotopes.containsKey(isotope) ? 0 : isotopes.get(isotope);
}
/**
@@ -24,34 +24,25 @@
* */
package org.openscience.cdk.tools.manipulator;
-import java.io.IOException;
-import java.io.FileReader;
-import java.io.FileNotFoundException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.HashMap;
-
import org.openscience.cdk.CDKConstants;
-import org.openscience.cdk.DefaultChemObjectBuilder;
-import org.openscience.cdk.ChemFile;
-import org.openscience.cdk.io.MDLReader;
-import org.openscience.cdk.tools.CDKHydrogenAdder;
-import org.openscience.cdk.smiles.SmilesParser;
import org.openscience.cdk.annotations.TestClass;
import org.openscience.cdk.annotations.TestMethod;
import org.openscience.cdk.config.AtomTypeFactory;
import org.openscience.cdk.config.IsotopeFactory;
import org.openscience.cdk.exception.CDKException;
-import org.openscience.cdk.exception.InvalidSmilesException;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IAtomType;
+import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.interfaces.IElement;
import org.openscience.cdk.interfaces.IIsotope;
import org.openscience.cdk.interfaces.IMolecularFormula;
-import org.openscience.cdk.interfaces.IChemObjectBuilder;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
/**
* Class with convenience methods that provide methods to manipulate
@@ -64,35 +55,7 @@
*/
@TestClass("org.openscience.cdk.formula.MolecularFormulaManipulatorTest")
public class MolecularFormulaManipulator {
-
- public static void main(String[] args) throws CDKException, FileNotFoundException {
- SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance());
-// IAtomContainer mol = sp.parseSmiles("[H]OC3([H])(C([H])(O[H])C([H])(OP(=O)(O[H])O[H])C([H])(OP(=O)(O[H])O[H])C([H])(O[H])C3([H])(OP(=O)(O[H])OC([H])([H])C([H])(OC(=O)C([H])([H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])C([H])([H])OC(=O)C([H])([H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])N([H])C(=O)C([H])([H])C([H])([H])C([H])([H])C([H])([H])C1([H])(C2([H])(N([H])C(=O)N([H])C2([H])(C([H])([H])S1)))))");
-// IAtomContainer mol = sp.parseSmiles("[nH]1c(=O)nc2c(c1=O)nc1c(n2C[C@@H]([C@@H]([C@@H](COP(=O)(OP(=O)(OC[C@@H]2[C@H]([C@H]([C@@H](O2)n2c3ncnc(c3nc2)N)O)O)O)O)O)O)O)cc(c(c1)C)C");
- IAtomContainer mol = sp.parseSmiles("[C@H]1([C@H]([C@@H]([C@H]([C@@H](O1)O)O)O)O)CO");
-
-// MDLReader reader = new MDLReader(new FileReader("/Users/guhar/Downloads/45002.sdf"));
-// ChemFile chemFile = (ChemFile) reader.read(DefaultChemObjectBuilder.getInstance().newInstance(ChemFile.class));
-// List<IAtomContainer> mols = ChemFileManipulator.getAllAtomContainers(chemFile);
-// mol = mols.get(0);
-
- AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
- CDKHydrogenAdder ha = CDKHydrogenAdder.getInstance(DefaultChemObjectBuilder.getInstance());
- ha.addImplicitHydrogens(mol);
- AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol);
-
- int nh = 0;
- for (IAtom atom : mol.atoms()) {
- if (atom.getSymbol().equals("H")) nh++;
- }
- System.out.println("nh = " + nh);
- IMolecularFormula molecularFormula = MolecularFormulaManipulator.getMolecularFormula(mol);
- System.out.println(MolecularFormulaManipulator.getString(molecularFormula));
-
-
- Iterable<IIsotope> map = molecularFormula.isotopes();
- for (IIsotope iso : map) System.out.println(iso.getSymbol());
- }
+
/**
* Checks a set of Nodes for the occurrence of each isotopes
* instance in the molecular formula. In short number of atoms.
@@ -221,17 +184,27 @@ public static IMolecularFormula removeElement(IMolecularFormula formula, IElemen
*/
@TestMethod("testGetString_IMolecularFormula_arrayString_boolean")
public static String getString(IMolecularFormula formula, String[] orderElements, boolean setOne) {
- String stringMF = "";
+ StringBuffer stringMF = new StringBuffer();
List<IIsotope> isotopesList = putInOrder(orderElements, formula);
+
+ // collect elements in a map - since different isotopes of the
+ // same element will get repeated in the formula
+ List<String> elemSet = new ArrayList<String>();
for (IIsotope isotope : isotopesList) {
- int elemCount = getElementCount(formula, isotope);
- System.out.println(isotope.getSymbol()+" -> "+elemCount);
- if (elemCount == 1 && !setOne)
- stringMF = stringMF + isotope.getSymbol();
- else
- stringMF = stringMF + isotope.getSymbol() + getElementCount(formula, isotope);
+ String symbol = isotope.getSymbol();
+ if (!elemSet.contains(symbol)) elemSet.add(symbol);
}
- return stringMF;
+
+ for (String elem : elemSet) {
+ int count = 0;
+ for (IIsotope isotope : formula.isotopes()) {
+ if (isotope.getSymbol().equals(elem)) count += formula.getIsotopeCount(isotope);
+ }
+ stringMF.append(elem);
+ if (count == 1 && !setOne) continue;
+ else stringMF.append(count);
+ }
+ return stringMF.toString();
}
/**
@@ -648,13 +621,13 @@ else if(charge < 0)
public static double getTotalMassNumber(IMolecularFormula formula) {
double mass = 0.0;
for (IIsotope isotope : formula.isotopes()) {
+
try {
IIsotope isotope2 = IsotopeFactory.getInstance(formula.getBuilder()).getMajorIsotope(isotope.getSymbol());
- mass += isotope2.getAtomicNumber() * formula.getIsotopeCount(isotope);
+ mass += isotope2.getMassNumber() * formula.getIsotopeCount(isotope);
} catch (IOException e) {
e.printStackTrace();
- }
-
+ }
}
return mass;
}
@@ -0,0 +1,150 @@
+PtdIns-(4,5)-P2-biotin
+ ChemDraw09091017372D
+
+ 65 67 0 0 0 0 0 0 0 0999 V2000
+ -5.7601 -2.5558 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.7601 -3.3808 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.0457 -3.7933 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -4.3312 -3.3808 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -4.3312 -2.5558 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.0457 -2.1433 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.0457 -1.3183 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -6.4746 -2.1433 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -6.4746 -3.7933 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -3.6167 -2.1433 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.0457 -0.4933 0.0000 P 0 0 0 0 0 0 0 0 0 0 0 0
+ -4.2207 -0.4933 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.0457 0.3317 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.8707 -0.4933 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -6.2832 0.2211 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.8707 0.9356 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -6.2832 1.6501 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.9325 2.4883 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.0457 0.9356 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.2181 2.9008 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -4.3312 1.3481 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.2181 3.7258 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -4.3312 2.1731 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -3.6167 0.9356 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -2.9023 1.3481 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -2.1878 0.9356 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -1.4733 1.3481 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -0.7588 0.9356 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -0.0444 1.3481 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 0.6701 0.9356 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -4.5036 2.4883 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -3.7891 2.9008 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -3.0747 2.4883 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -2.3602 2.9008 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -1.6457 2.4883 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -0.9313 2.9008 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ -0.2168 2.4883 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 0.4977 2.9008 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
+ 1.2122 2.4883 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 1.9266 2.9008 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.6411 2.4883 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 3.3556 2.9008 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 1.2122 1.6633 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.0700 2.4883 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.7845 2.9008 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.5382 2.5653 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.0902 3.1783 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.6777 3.8928 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.8708 3.7213 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.7640 4.7133 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.0103 5.0489 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.4583 4.4358 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.0739 3.5078 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.4746 4.1063 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.8388 5.8558 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -3.6167 -3.7933 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -2.9023 -3.3808 0.0000 P 0 0 0 0 0 0 0 0 0 0 0 0
+ -2.9023 -2.5558 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -2.9023 -4.2058 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -2.1878 -3.7933 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.0457 -4.6183 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -4.3312 -5.0308 0.0000 P 0 0 0 0 0 0 0 0 0 0 0 0
+ -3.6167 -4.6183 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -5.0457 -5.4433 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ -4.3312 -5.8558 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 1 2 1 0
+ 2 3 1 0
+ 3 4 1 0
+ 4 5 1 0
+ 5 6 1 0
+ 6 1 1 0
+ 6 7 1 1
+ 1 8 1 1
+ 2 9 1 1
+ 5 10 1 6
+ 7 11 1 0
+ 11 12 1 0
+ 11 13 2 0
+ 11 14 1 0
+ 14 15 1 0
+ 15 16 1 0
+ 16 17 1 0
+ 17 18 1 0
+ 16 19 1 1
+ 18 20 1 0
+ 19 21 1 0
+ 20 22 2 0
+ 21 23 2 0
+ 21 24 1 0
+ 24 25 1 0
+ 25 26 1 0
+ 26 27 1 0
+ 27 28 1 0
+ 28 29 1 0
+ 29 30 1 0
+ 20 31 1 0
+ 31 32 1 0
+ 32 33 1 0
+ 33 34 1 0
+ 34 35 1 0
+ 35 36 1 0
+ 36 37 1 0
+ 37 38 1 0
+ 38 39 1 0
+ 39 40 1 0
+ 40 41 1 0
+ 41 42 1 0
+ 39 43 2 0
+ 42 44 1 0
+ 44 45 1 6
+ 45 46 1 0
+ 46 47 1 0
+ 47 48 1 0
+ 48 49 1 0
+ 49 45 1 0
+ 48 50 1 0
+ 50 51 1 0
+ 51 52 1 0
+ 52 49 1 0
+ 49 53 1 6
+ 48 54 1 6
+ 51 55 2 0
+ 4 56 1 1
+ 56 57 1 0
+ 57 58 2 0
+ 57 59 1 0
+ 57 60 1 0
+ 3 61 1 6
+ 61 62 1 0
+ 62 63 2 0
+ 62 64 1 0
+ 62 65 1 0
+M STY 1 1 SUP
+M SLB 1 1 1
+M SAL 1 5 56 57 58 59 60
+M SBL 1 1 58
+M SMT 1 OPO3H2
+M SBV 1 58 -0.7145 0.4125
+M STY 1 2 SUP
+M SLB 1 2 2
+M SAL 2 5 61 62 63 64 65
+M SBL 2 1 63
+M SMT 2 OPO3H2
+M SBV 2 63 0.0000 0.8250
+M END
+
@@ -20,25 +20,29 @@
*/
package org.openscience.cdk.tools.manipulator;
-import java.io.IOException;
-import java.util.List;
-
import org.junit.Assert;
import org.junit.Test;
import org.openscience.cdk.CDKTestCase;
+import org.openscience.cdk.ChemFile;
import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.config.IsotopeFactory;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.exception.InvalidSmilesException;
import org.openscience.cdk.formula.MolecularFormula;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
+import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.interfaces.IElement;
import org.openscience.cdk.interfaces.IIsotope;
import org.openscience.cdk.interfaces.IMolecularFormula;
-import org.openscience.cdk.interfaces.IChemObjectBuilder;
+import org.openscience.cdk.io.MDLV2000Reader;
import org.openscience.cdk.nonotify.NoNotificationChemObjectBuilder;
import org.openscience.cdk.smiles.SmilesParser;
+import org.openscience.cdk.tools.CDKHydrogenAdder;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
/**
* Checks the functionality of the MolecularFormulaManipulator.
@@ -1147,4 +1151,27 @@ public void testWithH_Initial() {
Assert.assertTrue(MolecularFormulaManipulator.compare(formula1, MolecularFormulaManipulator.getMolecularFormula(formula, builder)));
Assert.assertEquals("C5H13NO2", MolecularFormulaManipulator.getString(ff));
}
+
+ /**
+ * @cdk.bug 3071473
+ */
+ @Test
+ public void testFromMol() throws CDKException {
+ String filename = "data/mdl/formulatest.mol";
+ InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename);
+ MDLV2000Reader reader = new MDLV2000Reader(ins);
+ ChemFile chemFile = reader.read(new ChemFile());
+ Assert.assertNotNull(chemFile);
+ List<IAtomContainer> mols = ChemFileManipulator.getAllAtomContainers(chemFile);
+ IAtomContainer mol = mols.get(0);
+
+ AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
+ CDKHydrogenAdder ha = CDKHydrogenAdder.getInstance(DefaultChemObjectBuilder.getInstance());
+ ha.addImplicitHydrogens(mol);
+ AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol);
+
+ IMolecularFormula molecularFormula = MolecularFormulaManipulator.getMolecularFormula(mol);
+ String formula2 = MolecularFormulaManipulator.getString(molecularFormula);
+ Assert.assertTrue(formula2.equals("C35H64N3O21P3S"));
+ }
}

0 comments on commit 1e54d93

Please sign in to comment.