-
Notifications
You must be signed in to change notification settings - Fork 154
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
implement round robin algorithm for decomposing molecular formulas ta…
…ken from SIRIUS
- Loading branch information
1 parent
397a0c5
commit 86b4065
Showing
14 changed files
with
2,144 additions
and
288 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
53 changes: 53 additions & 0 deletions
53
tool/formula/src/main/java/org/openscience/cdk/decomp/Alphabet.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
/* | ||
* This file is part of the SIRIUS library for analyzing MS and MS/MS data | ||
* | ||
* Copyright (C) 2013-2015 Kai Dührkop | ||
* | ||
* This library is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU Lesser General Public | ||
* License as published by the Free Software Foundation; either | ||
* version 2.1 of the License, or (at your option) any later version. | ||
* | ||
* This library is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
* Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License along with SIRIUS. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
package org.openscience.cdk.decomp; | ||
|
||
/** | ||
* The alphabet for which a given weight is decomposed. An alphabet is a vector c_1..c_k of k characters of Type T. | ||
* It maps each character to a weight. It supports access by an index as well as by the character itself. | ||
* | ||
* @param <T> type of a single character in the alphabet | ||
*/ | ||
interface Alphabet<T> { | ||
|
||
/** | ||
* @return size of the alphabet. Indizes of characters are 0..{@literal <} size | ||
*/ | ||
public int size(); | ||
|
||
/** | ||
* @param i index of the character | ||
* @return weight of character c_i | ||
*/ | ||
public double weightOf(int i); | ||
|
||
/** | ||
* @param i index of the character | ||
* @return character c_i | ||
*/ | ||
public T get(int i); | ||
|
||
/** | ||
* Maps the character to its index. This operation should be fast, because internally a modified ordered | ||
* alphabet is used which have to be mapped back to the original alphabet | ||
* @param character | ||
* @return the index of the character | ||
*/ | ||
public int indexOf(T character); | ||
|
||
} |
105 changes: 105 additions & 0 deletions
105
tool/formula/src/main/java/org/openscience/cdk/decomp/ChemicalAlphabet.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
/* | ||
* This file is part of the SIRIUS library for analyzing MS and MS/MS data | ||
* | ||
* Copyright (C) 2013-2015 Kai Dührkop | ||
* | ||
* This library is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU Lesser General Public | ||
* License as published by the Free Software Foundation; either | ||
* version 2.1 of the License, or (at your option) any later version. | ||
* | ||
* This library is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
* Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License along with SIRIUS. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
package org.openscience.cdk.decomp; | ||
|
||
import org.openscience.cdk.formula.MolecularFormulaRange; | ||
import org.openscience.cdk.interfaces.IChemObjectBuilder; | ||
import org.openscience.cdk.interfaces.IIsotope; | ||
import org.openscience.cdk.interfaces.IMolecularFormula; | ||
|
||
import java.util.Arrays; | ||
|
||
/** | ||
* Implements the {@link Alphabet} for chemical elements. | ||
*/ | ||
public class ChemicalAlphabet implements Alphabet<IIsotope> { | ||
|
||
/** | ||
* Is used to convert compomeres to IMolecularFormula instances | ||
*/ | ||
protected final IChemObjectBuilder objectBuilder; | ||
|
||
/** | ||
* The characters (chemical elements) of the alphabet | ||
*/ | ||
protected final IIsotope[] characters; | ||
|
||
/** | ||
* Construct a new chemical alphabet from the given search space using the given object builder. | ||
* | ||
* @param molecularFormulaRange Search spacel, defining the allowed elements | ||
*/ | ||
public ChemicalAlphabet(IChemObjectBuilder builder, MolecularFormulaRange molecularFormulaRange) { | ||
this.objectBuilder = builder; | ||
IIsotope[] chars = new IIsotope[molecularFormulaRange.getIsotopeCount()]; | ||
int k=0; | ||
for (IIsotope i : molecularFormulaRange.isotopes()) { | ||
if (molecularFormulaRange.getIsotopeCountMax(i) > 0) chars[k++] = i; | ||
} | ||
if (k < chars.length) chars = Arrays.copyOf(chars, k); | ||
this.characters = chars; | ||
} | ||
|
||
/** | ||
* Translates a compomere (multiset of characters) into a IMolecularFormula | ||
*/ | ||
public IMolecularFormula buildFormulaFromCompomere(int[] compomere, int[] orderedIndizes) { | ||
IMolecularFormula formula = objectBuilder.newInstance(IMolecularFormula.class); | ||
for (int k=0; k < orderedIndizes.length; ++k) { | ||
if (compomere[k] > 0) formula.addIsotope(characters[orderedIndizes[k]], compomere[k]); | ||
} | ||
return formula; | ||
} | ||
|
||
/** | ||
* Checks if two chemical alphabets are compatible. In theory, an alphabet would be compatible if it is a subset | ||
* of another alphabet. However, we directly check for equality to keep this operation symetric. | ||
* | ||
* A decomposer can decompose every mass with alphabet as long as the alphabet is compatible to the decomposers | ||
* own alphabet. | ||
*/ | ||
public boolean isCompatible(ChemicalAlphabet other) { | ||
return Arrays.equals(characters, other.characters); | ||
} | ||
|
||
@Override | ||
public int size() { | ||
return characters.length; | ||
} | ||
|
||
@Override | ||
public double weightOf(int i) { | ||
return characters[i].getExactMass(); | ||
} | ||
|
||
@Override | ||
public IIsotope get(int i) { | ||
return characters[i]; | ||
} | ||
|
||
/** | ||
* maps each character to its index. This operation is quite slow, but have to be done only once when starting the | ||
* decomposer. Therefore, we don't need a hash table here. | ||
*/ | ||
@Override | ||
public int indexOf(IIsotope character) { | ||
for (int k=0; k < characters.length; ++k) | ||
if (characters[k]==character) return k; | ||
return -1; | ||
} | ||
} |
63 changes: 63 additions & 0 deletions
63
tool/formula/src/main/java/org/openscience/cdk/decomp/DecompIterator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
/* | ||
* This file is part of the SIRIUS library for analyzing MS and MS/MS data | ||
* | ||
* Copyright (C) 2013-2015 Kai Dührkop | ||
* | ||
* This library is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU Lesser General Public | ||
* License as published by the Free Software Foundation; either | ||
* version 2.1 of the License, or (at your option) any later version. | ||
* | ||
* This library is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
* Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License along with SIRIUS. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
package org.openscience.cdk.decomp; | ||
|
||
/** | ||
* This class allows to iterate over decompositions instead of keeping all decompositions in memory. It's slightly | ||
* slower than generating all decompositions within a single loop, but it offers more flexibility (i.e. find a single | ||
* decomposition that satisfies some rule). | ||
* @param <T> type of the characters of the alphabet | ||
*/ | ||
public interface DecompIterator<T> { | ||
|
||
/** | ||
* moves the iterator one step. | ||
* @return true, if a new decomposition is found. false, if the iterator reached the end. | ||
*/ | ||
public boolean next(); | ||
|
||
/** | ||
* Give access to the current compomere. Please note that this array is only valid during the current iteration step | ||
* and might be changed afterwards. Furthermore, it is absolutely forbidden to write anything into this array. | ||
* However, you are free to clone the array and do anything with its copy. | ||
* | ||
* @return the compomere (a tuple (a_1,...,a_n) with a_i is the amount of the i-th character in the ordered alphabet | ||
*/ | ||
public int[] getCurrentCompomere(); | ||
|
||
/** | ||
* @return the underlying (possibly unordered) alphabet | ||
*/ | ||
public Alphabet<T> getAlphabet(); | ||
|
||
/** | ||
* The order of characters in the compomere might be different to the order of characters in the alphabet (i.e. | ||
* the characters in the compomere are always ordered by mass). This array maps the i-th character in the compomere | ||
* to it's appropiate index in the alphabet | ||
* @return mapping of positions in compomere to character indizes in alphabet | ||
*/ | ||
public int[] getAlphabetOrder(); | ||
|
||
/** | ||
* Returns the character on the given position in the compomere | ||
* @param index index in compomere | ||
* @return corresponding character in alphabet | ||
*/ | ||
public T getCharacterAt(int index); | ||
|
||
} |
52 changes: 52 additions & 0 deletions
52
tool/formula/src/main/java/org/openscience/cdk/decomp/DecomposerFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
/* | ||
* This file is part of the SIRIUS library for analyzing MS and MS/MS data | ||
* | ||
* Copyright (C) 2013-2015 Kai Dührkop | ||
* | ||
* This library is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU Lesser General Public | ||
* License as published by the Free Software Foundation; either | ||
* version 2.1 of the License, or (at your option) any later version. | ||
* | ||
* This library is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
* Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License along with SIRIUS. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
package org.openscience.cdk.decomp; | ||
|
||
import org.openscience.cdk.interfaces.IIsotope; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
public final class DecomposerFactory { | ||
|
||
protected List<RangeMassDecomposer<IIsotope>> decomposerCache; | ||
protected static final int maximalNumberOfCachedDecomposers = 10; | ||
|
||
protected final static DecomposerFactory instance = new DecomposerFactory(); | ||
|
||
public static DecomposerFactory getInstance() { | ||
return instance; | ||
} | ||
|
||
public DecomposerFactory() { | ||
this.decomposerCache = new ArrayList<>(maximalNumberOfCachedDecomposers); | ||
} | ||
|
||
public RangeMassDecomposer<IIsotope> getDecomposerFor(ChemicalAlphabet alphabet) { | ||
for (RangeMassDecomposer<IIsotope> decomposer : decomposerCache) { | ||
if (((ChemicalAlphabet)decomposer.getAlphabet()).isCompatible(alphabet)) { | ||
return decomposer; | ||
} | ||
} | ||
if (decomposerCache.size()>= maximalNumberOfCachedDecomposers) decomposerCache.remove(0); | ||
final RangeMassDecomposer<IIsotope> decomposer = new RangeMassDecomposer<>(alphabet); | ||
decomposerCache.add(decomposer); | ||
return decomposer; | ||
} | ||
|
||
} |
44 changes: 44 additions & 0 deletions
44
tool/formula/src/main/java/org/openscience/cdk/decomp/Interval.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
/* | ||
* This file is part of the SIRIUS library for analyzing MS and MS/MS data | ||
* | ||
* Copyright (C) 2013-2015 Kai Dührkop | ||
* | ||
* This library is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU Lesser General Public | ||
* License as published by the Free Software Foundation; either | ||
* version 2.1 of the License, or (at your option) any later version. | ||
* | ||
* This library is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
* Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License along with SIRIUS. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
package org.openscience.cdk.decomp; | ||
|
||
/** | ||
* A simple POJO which defines a range from min to max (including max). | ||
*/ | ||
public class Interval { | ||
|
||
private final int min; | ||
private final int max; | ||
|
||
public Interval(int min, int max) { | ||
this.min = min; | ||
this.max = max; | ||
} | ||
|
||
public int getMin() { | ||
return min; | ||
} | ||
|
||
public int getMax() { | ||
return max; | ||
} | ||
|
||
public String toString() { | ||
return "(" + min + " .. " + max + ")"; | ||
} | ||
} |
Oops, something went wrong.