+ * IsotopeFactory ifac = Isotopes.getInstance(); + * IIsotope c = ifac.getMajorIsotope("C"); + * IIsotope h = ifac.getMajorIsotope("H"); + * IIsotope n = ifac.getMajorIsotope("N"); + * IIsotope o = ifac.getMajorIsotope("O"); + * IIsotope p = ifac.getMajorIsotope("P"); + * IIsotope s = ifac.getMajorIsotope("S"); + * + * MolecularFormulaRange mfRange = new MolecularFormulaRange(); + * mfRange.addIsotope(c, 0, 50); + * mfRange.addIsotope(h, 0, 100); + * mfRange.addIsotope(o, 0, 50); + * mfRange.addIsotope(n, 0, 50); + * mfRange.addIsotope(p, 0, 10); + * mfRange.addIsotope(s, 0, 10); + * + * MolecularFormulaGenerator mfg = new MolecularFormulaGenerator(builder, minMass, + * maxMass, mfRange); + * double minMass = 133.003; + * double maxMass = 133.005; + * IMolecularFormulaSet mfSet = mfg.getAllFormulas(); + *+ * + * The code was originally developed for a MZmine 2 framework module, published + * in Pluskal et al. {@cdk.cite Pluskal2012}. + * + * @cdk.module formula + * @author Tomas Pluskal + * @cdk.created 2014-12-28 + * @cdk.githash + */ +class FullEnumerationFormulaGenerator implements IFormulaGenerator { + + private static final ILoggingTool logger = LoggingToolFactory + .createLoggingTool(FullEnumerationFormulaGenerator.class); + + private final IChemObjectBuilder builder; + + /** + * Mass range to search by this instance of MolecularFormulaGenerator + */ + private final double minMass, maxMass; + + /** + * Internal arrays of isotopes (elements) used for the formula generation, + * their minimal and maximal counts, and the current counts, which + * correspond to the latest formula candidate. + * + * For example, let's assume we set isotopes=[C,H,N], minCounts=[0,0,0], and + * maxCounts=[3,3,3]. Then, currentCounts will be iterated in the following + * order: [0,0,0], [1,0,0], [2,0,0], [3,0,0], [0,1,0], [1,1,0], [2,1,0], + * [3,1,0], [0,2,0], [1,2,0], [2,2,0], etc. + * + * The lastIncreasedPosition index indicates the last position in + * currentCounts that was increased by calling increaseCounter(position) + */ + private final IIsotope isotopes[]; + private final int minCounts[], maxCounts[], currentCounts[]; + private int lastIncreasedPosition = 0; + + /** + * A flag indicating that the formula generation is running. If the search + * is finished (the whole search space has been examined) or the search is + * canceled by calling the cancel() method, this flag is set to false. + * + * @see #cancel() + */ + private volatile boolean searchRunning = true; + + /** + * Initiate the MolecularFormulaGenerator. + * + * @param minMass + * Lower boundary of the target mass range + * @param maxMass + * Upper boundary of the target mass range + * @param mfRange + * A range of elemental compositions defining the search space + * @throws IllegalArgumentException + * In case some of the isotopes in mfRange has undefined exact + * mass or in case illegal parameters are provided (e.g., + * negative mass values or empty MolecularFormulaRange) + * @see MolecularFormulaRange + */ + public FullEnumerationFormulaGenerator(final IChemObjectBuilder builder, + final double minMass, final double maxMass, + final MolecularFormulaRange mfRange) { + + logger.info("Initiate MolecularFormulaGenerator, mass range ", minMass, + "-", maxMass); + + // Check parameter values + if ((minMass < 0.0) || (maxMass < 0.0)) { + throw (new IllegalArgumentException( + "The minimum and maximum mass values must be >=0")); + } + + if ((minMass > maxMass)) { + throw (new IllegalArgumentException( + "Minimum mass must be <= maximum mass")); + } + + if ((mfRange == null) || (mfRange.getIsotopeCount() == 0)) { + throw (new IllegalArgumentException( + "The MolecularFormulaRange parameter must be non-null and must contain at least one isotope")); + } + + // Save the parameters + this.builder = builder; + this.minMass = minMass; + this.maxMass = maxMass; + + // Sort the elements by mass in ascending order. That speeds up + // the search. + final TreeSet
* IsotopeFactory ifac = Isotopes.getInstance(); * IIsotope c = ifac.getMajorIsotope("C"); @@ -48,7 +19,7 @@ * IIsotope o = ifac.getMajorIsotope("O"); * IIsotope p = ifac.getMajorIsotope("P"); * IIsotope s = ifac.getMajorIsotope("S"); - * + * * MolecularFormulaRange mfRange = new MolecularFormulaRange(); * mfRange.addIsotope(c, 0, 50); * mfRange.addIsotope(h, 0, 100); @@ -56,63 +27,36 @@ * mfRange.addIsotope(n, 0, 50); * mfRange.addIsotope(p, 0, 10); * mfRange.addIsotope(s, 0, 10); - * + * * MolecularFormulaGenerator mfg = new MolecularFormulaGenerator(builder, minMass, * maxMass, mfRange); * double minMass = 133.003; * double maxMass = 133.005; * IMolecularFormulaSet mfSet = mfg.getAllFormulas(); *- * - * The code was originally developed for a MZmine 2 framework module, published - * in Pluskal et al. {@cdk.cite Pluskal2012}. - * + * + * This class offers two implementations: The Round Robin algorithm {@cdk.cite Boecker2008} on mass ranges + * {@cdk.cite Duehrkop2013} is used on most inputs. For special cases (e.g. single elements, extremely large mass ranges) + * a full enumeration algorithm {@cdk.cite Pluskal2012} is used. + * + * The Round Robin algorithm was originally developed for the SIRIUS 3 software. The full enumeration algorithm was + * originally developed for a MZmine 2 framework module, published in Pluskal et al. {@cdk.cite Pluskal2012}. + * * @cdk.module formula - * @author Tomas Pluskal + * @author Tomas Pluskal, Kai Dührkop, Marcus Ludwig * @cdk.created 2014-12-28 * @cdk.githash */ -public class MolecularFormulaGenerator { - - private static final ILoggingTool logger = LoggingToolFactory - .createLoggingTool(MolecularFormulaGenerator.class); - - private final IChemObjectBuilder builder; +public class MolecularFormulaGenerator implements IFormulaGenerator { /** - * Mass range to search by this instance of MolecularFormulaGenerator + * The chosen implementation */ - private final double minMass, maxMass; - - /** - * Internal arrays of isotopes (elements) used for the formula generation, - * their minimal and maximal counts, and the current counts, which - * correspond to the latest formula candidate. - * - * For example, let's assume we set isotopes=[C,H,N], minCounts=[0,0,0], and - * maxCounts=[3,3,3]. Then, currentCounts will be iterated in the following - * order: [0,0,0], [1,0,0], [2,0,0], [3,0,0], [0,1,0], [1,1,0], [2,1,0], - * [3,1,0], [0,2,0], [1,2,0], [2,2,0], etc. - * - * The lastIncreasedPosition index indicates the last position in - * currentCounts that was increased by calling increaseCounter(position) - */ - private final IIsotope isotopes[]; - private final int minCounts[], maxCounts[], currentCounts[]; - private int lastIncreasedPosition = 0; - - /** - * A flag indicating that the formula generation is running. If the search - * is finished (the whole search space has been examined) or the search is - * canceled by calling the cancel() method, this flag is set to false. - * - * @see #cancel() - */ - private boolean searchRunning = true; + protected final IFormulaGenerator formulaGenerator; /** * Initiate the MolecularFormulaGenerator. - * + * * @param minMass * Lower boundary of the target mass range * @param maxMass @@ -126,65 +70,47 @@ public class MolecularFormulaGenerator { * @see MolecularFormulaRange */ public MolecularFormulaGenerator(final IChemObjectBuilder builder, - final double minMass, final double maxMass, - final MolecularFormulaRange mfRange) { - - logger.info("Initiate MolecularFormulaGenerator, mass range ", minMass, - "-", maxMass); - - // Check parameter values - if ((minMass < 0.0) || (maxMass < 0.0)) { - throw (new IllegalArgumentException( - "The minimum and maximum mass values must be >=0")); - } - - if ((minMass > maxMass)) { - throw (new IllegalArgumentException( - "Minimum mass must be <= maximum mass")); - } - - if ((mfRange == null) || (mfRange.getIsotopeCount() == 0)) { - throw (new IllegalArgumentException( - "The MolecularFormulaRange parameter must be non-null and must contain at least one isotope")); - } - - // Save the parameters - this.builder = builder; - this.minMass = minMass; - this.maxMass = maxMass; - - // Sort the elements by mass in ascending order. That speeds up - // the search. - final TreeSet
+ * It would be also the case when the given alphabet is a subset of this decomposers alphabet. However,
+ * if the alphabet size of the decomposer is much larger, the decomposer might be slower anyways due to
+ * larger memory footprint. As we expect that the alphabet does not change that often, it might be
+ * sufficient to just compare the arrays.
+ */
+ boolean isCompatible(IIsotope[] elements) {
+ return Arrays.equals(elements, this.elements);
+ }
+
+ /*
+
+ */
+ private double findOptimalPrecision() {
+ return 1d / 5963.337687d; // This blowup is optimized for organic compounds based on the CHNOPS alphabet
+ }
+
+ /**
+ * Initializes the decomposer. Computes the extended residue table. This have to be done only one time for
+ * a given alphabet, independently from the masses you want to decompose. This method is called automatically
+ * if you compute the decompositions, so call it only if you want to control the time of the initialisation.
+ */
+ private void init() {
+ if (ERTs != null) return;
+ synchronized (this) {
+ if (ERTs != null) return;
+ discretizeMasses();
+ divideByGCD();
+ computeLCMs();
+ calcERT();
+ computeErrors();
+ }
+ }
+
+ /**
+ * Check if a mass is decomposable. This is done in constant time (especially: it is very very very fast!).
+ * But it doesn't check if there is a valid decomposition. Therefore, even if the method returns true,
+ * all decompositions may be invalid for the given validator or given bounds.
+ * #decompose(mass) uses this function before starting the decomposition, therefore this method should only
+ * be used if you don't want to start the decomposition algorithm.
+ *
+ * @return true if the mass is decomposable, ignoring bounds or any additional filtering rule
+ */
+ boolean maybeDecomposable(double from, double to) {
+ init();
+ final int[][][] ERTs = this.ERTs;
+ final int[] minmax = new int[2];
+ //normal version seems to be faster, because it returns after first hit
+ integerBound(from, to, minmax);
+ final int a = weights.get(0).getIntegerMass();
+ for (int i = minmax[0]; i <= minmax[1]; ++i) {
+ final int r = i % a;
+ if (i >= ERTs[0][r][weights.size() - 1]) return true;
+ }
+ return false;
+ }
+
+ /**
+ * Returns an iterator over all decompositons of this mass range
+ *
+ * @param from lowest mass to decompose
+ * @param to (inclusive) largest mass to decompose
+ * @param boundaries defines lowerbounds and upperbounds for the number of elements
+ */
+ DecompIterator decomposeIterator(double from, double to, MolecularFormulaRange boundaries) {
+ init();
+ if (to < 0d || from < 0d)
+ throw new IllegalArgumentException("Expect positive mass for decomposition: [" + from + ", " + to + "]");
+ if (to < from) throw new IllegalArgumentException("Negative range given: [" + from + ", " + to + "]");
+ final int[] minValues = new int[weights.size()];
+ final int[] boundsarray = new int[weights.size()];
+ double cfrom = from, cto = to;
+ Arrays.fill(boundsarray, Integer.MAX_VALUE);
+ if (boundaries != null) {
+ for (int i = 0; i < boundsarray.length; i++) {
+ IIsotope el = weights.get(i).getOwner();
+ int max = boundaries.getIsotopeCountMax(el);
+ int min = boundaries.getIsotopeCountMin(el);
+ if (min >= 0 || max >= 0) {
+ boundsarray[i] = max - min;
+ minValues[i] = min;
+ if (minValues[i] > 0) {
+ final double reduceWeightBy = weights.get(i).getMass() * min;
+ cfrom -= reduceWeightBy;
+ cto -= reduceWeightBy;
+ }
+ }
+ }
+ }
+ final int[] minmax = new int[2];
+ integerBound(cfrom, cto, minmax);
+ final int deviation = minmax[1] - minmax[0];
+ //calculate the required ERTs
+ if ((1 << (ERTs.length - 1)) <= deviation) {
+ calcERT(deviation);
+ }
+ final int[][][] ERTs = this.ERTs;
+
+ //take ERT with required deviation
+ int[][] currentERT;
+ if (deviation == 0) currentERT = ERTs[0];
+ else currentERT = ERTs[32 - Integer.numberOfLeadingZeros(deviation)];
+
+ return new DecompIterator(currentERT, minmax[0], minmax[1], from, to, minValues, boundsarray, weights);
+ }
+
+ /**
+ * calculates ERTs to look up whether a mass or lower masses within a certain deviation are decomposable.
+ * only ERTs for deviation 2^x are calculated
+ */
+ private void calcERT(int deviation) {
+ final int[][][] ERTs = this.ERTs;
+ final int currentLength = ERTs.length;
+
+ // we have to extend the ERT table
+
+ int[][] lastERT = ERTs[ERTs.length - 1];
+ int[][] nextERT = new int[lastERT.length][weights.size()];
+ if (currentLength == 1) {
+ //first line compares biggest residue and 0
+ for (int j = 0; j < weights.size(); j++) {
+ nextERT[0][j] = Math.min(lastERT[nextERT.length - 1][j], lastERT[0][j]);
+ }
+ for (int i = 1; i < nextERT.length; i++) {
+ for (int j = 0; j < weights.size(); j++) {
+ nextERT[i][j] = Math.min(lastERT[i][j], lastERT[i - 1][j]);
+ }
+ }
+ } else {
+ int step = (1 << (currentLength - 2));
+ for (int i = step; i < nextERT.length; i++) {
+ for (int j = 0; j < weights.size(); j++) {
+ nextERT[i][j] = Math.min(lastERT[i][j], lastERT[i - step][j]);
+ }
+ }
+ //first lines compared with last lines (greatest residues) because of modulo's cyclic characteristic
+ for (int i = 0; i < step; i++) {
+ for (int j = 0; j < weights.size(); j++) {
+ nextERT[i][j] = Math.min(lastERT[i][j], lastERT[i + nextERT.length - step][j]);
+ }
+ }
+ }
+
+ // now store newly calculated ERT
+ synchronized (this) {
+ final int[][][] tables = this.ERTs;
+ if (tables.length == currentLength) {
+ this.ERTs = Arrays.copyOf(this.ERTs, this.ERTs.length + 1);
+ this.ERTs[this.ERTs.length - 1] = nextERT;
+ } else {
+ // another background thread did already compute the ERT. So we don't have to do this again
+ }
+ }
+ // recursively calculate ERTs for higher deviations
+ // current ERT is already sufficient
+ if ((1 << (currentLength - 1)) <= deviation) calcERT(deviation);
+ }
+
+ private void calcERT() {
+ int firstLongVal = weights.get(0).getIntegerMass();
+ int[][] ERT = new int[firstLongVal][weights.size()];
+ int d, r, n, argmin;
+
+ //Init
+ ERT[0][0] = 0;
+ for (int i = 1; i < ERT.length; ++i) {
+ ERT[i][0] = Integer.MAX_VALUE; // should be infinity
+ }
+
+ //Filling the Table, j loops over columns
+ for (int j = 1; j < ERT[0].length; ++j) {
+ ERT[0][j] = 0; // Init again
+ d = gcd(firstLongVal, weights.get(j).getIntegerMass());
+ for (int p = 0; p < d; p++) { // Need to start d Round Robin loops
+ if (p == 0) {
+ n = 0; // 0 is the min in the complete RT or the first p-loop
+ } else {
+ n = Integer.MAX_VALUE; // should be infinity
+ argmin = p;
+ for (int i = p; i < ERT.length; i += d) { // Find Minimum in specific part of ERT
+ if (ERT[i][j - 1] < n) {
+ n = ERT[i][j - 1];
+ argmin = i;
+ }
+ }
+ ERT[argmin][j] = n;
+ }
+ if (n == Integer.MAX_VALUE) { // Minimum of the specific part of ERT was infinity
+ for (int i = p; i < ERT.length; i += d) { // Fill specific part of ERT with infinity
+ ERT[i][j] = Integer.MAX_VALUE;
+ }
+ } else { // Do normal loop
+ for (long i = 1; i < ERT.length / d; ++i) { // i is just a counter
+ n += weights.get(j).getIntegerMass();
+ if (n < 0) {
+ throw new ArithmeticException("Integer overflow occurs. DECOMP cannot calculate decompositions for the given alphabet as it exceeds the 32 bit integer space. Please use a smaller precision value.");
+ }
+ r = n % firstLongVal;
+ if (ERT[r][j - 1] < n) n = ERT[r][j - 1]; // get the min
+ ERT[r][j] = n;
+ }
+ }
+ } // end for p
+ } // end for j
+ synchronized (this) {
+ if (this.ERTs == null) {
+ this.ERTs = new int[][][]{ERT};
+ }
+ }
+ }
+
+
+ private void discretizeMasses() {
+ // compute integer masses
+ for (final ChemicalElement weight : weights) {
+ weight.setIntegerMass((int) (weight.getMass() / precision));
+ }
+ }
+
+ private void divideByGCD() {
+ if (weights.size() > 0) {
+ int d = gcd(weights.get(0).getIntegerMass(), weights.get(1).getIntegerMass());
+ for (int i = 2; i < weights.size(); ++i) {
+ d = gcd(d, weights.get(i).getIntegerMass());
+ if (d == 1) return;
+ }
+ precision *= d;
+ for (ChemicalElement weight : weights) {
+ weight.setIntegerMass(weight.getIntegerMass() / d);
+ }
+ }
+ }
+
+ private void computeLCMs() {
+ final ChemicalElement first = weights.get(0);
+ first.setL(1);
+ first.setLcm(first.getIntegerMass());
+
+ for (int i = 1; i < weights.size(); i++) {
+ final ChemicalElement weight = weights.get(i);
+ int temp = first.getIntegerMass() / gcd(first.getIntegerMass(), weight.getIntegerMass());
+ weight.setL(temp);
+ weight.setLcm(temp * weight.getIntegerMass());
+ }
+ }
+
+ private void computeErrors() {
+ this.minError = 0d;
+ this.maxError = 0d;
+ for (ChemicalElement weight : weights) {
+ final double error = (precision * weight.getIntegerMass() - weight.getMass()) / weight.getMass();
+ minError = Math.min(minError, error);
+ maxError = Math.max(maxError, error);
+ }
+ }
+
+ private void integerBound(double from, double to, int[] bounds) {
+ final double fromD = Math.ceil((1 + minError) * from / precision);
+ final double toD = Math.floor((1 + maxError) * to / precision);
+ if (fromD > Integer.MAX_VALUE || toD > Integer.MAX_VALUE) {
+ throw new ArithmeticException("Given mass is too large to decompose. Please use a smaller precision value, i.e. mass/precision have to be within 32 bit integer space");
+ }
+
+ bounds[0] = Math.max(0, (int) fromD);
+ bounds[1] = Math.max(0, (int) toD);
+ }
+
+ /**
+ * Iterator implementation of the loop
+ * We do not use static classes. This gives us the possibility to make some of the variables behave thread safe
+ * and resistant against changes from the user.
+ */
+ static class DecompIterator {
+ // final initialization values
+ final int[][] ERT;
+ final double minDoubleMass;
+ final double maxDoubleMass;
+ final int[] buffer;
+ final int[] minValues;
+ final int[] maxValues;
+ final List