/
plugin_setup.py
282 lines (263 loc) · 14.1 KB
/
plugin_setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# ----------------------------------------------------------------------------
# Copyright (c) 2016-2018, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import q2_qemistree
import importlib
from ._fingerprint import (compute_fragmentation_trees,
rerank_molecular_formulas,
predict_fingerprints)
from ._hierarchy import make_hierarchy
from ._prune_hierarchy import prune_hierarchy
from ._classyfire import get_classyfire_taxonomy
from ._semantics import (MassSpectrometryFeatures, MGFDirFmt,
SiriusFolder, SiriusDirFmt,
ZodiacFolder, ZodiacDirFmt,
CSIFolder, CSIDirFmt,
FeatureData, TSVMoleculesFormat, Molecules)
from ._plot import plot
from qiime2.plugin import (Plugin, Str, Range, Choices, Float, Int, Bool, List,
Citations)
from q2_types.feature_table import FeatureTable, Frequency
from q2_types.tree import Phylogeny, Rooted
citations = Citations.load('citations.bib', package='q2_qemistree')
plugin = Plugin(
name='qemistree',
version=q2_qemistree.__version__,
website='https://github.com/biocore/q2-qemistree',
package='q2_qemistree',
description='Hierarchical orderings for mass spectrometry data',
short_description='Plugin for exploring chemical diversity.',
citations=citations,
)
# type registration
plugin.register_views(MGFDirFmt)
plugin.register_semantic_types(MassSpectrometryFeatures)
plugin.register_semantic_type_to_format(MassSpectrometryFeatures,
artifact_format=MGFDirFmt)
plugin.register_views(SiriusDirFmt)
plugin.register_semantic_types(SiriusFolder)
plugin.register_semantic_type_to_format(SiriusFolder,
artifact_format=SiriusDirFmt)
plugin.register_views(ZodiacDirFmt)
plugin.register_semantic_types(ZodiacFolder)
plugin.register_semantic_type_to_format(ZodiacFolder,
artifact_format=ZodiacDirFmt)
plugin.register_views(CSIDirFmt)
plugin.register_semantic_types(CSIFolder)
plugin.register_semantic_type_to_format(CSIFolder,
artifact_format=CSIDirFmt)
plugin.register_views(TSVMoleculesFormat)
plugin.register_semantic_types(Molecules)
plugin.register_semantic_type_to_format(FeatureData[Molecules],
artifact_format=TSVMoleculesFormat)
PARAMS = {
'ionization_mode': Str % Choices(['positive', 'negative', 'auto']),
'database': Str % Choices(['all', 'pubchem']),
'sirius_path': Str,
'profile': Str % Choices(['qtof', 'orbitrap', 'fticr']),
'fingerid_db': Str % Choices(['all', 'pubchem', 'bio', 'kegg', 'hmdb']),
'ppm_max': Int % Range(0, 30, inclusive_end=True),
'n_jobs': Int % Range(1, None),
'num_candidates': Int % Range(5, 100, inclusive_end=True),
'tree_timeout': Int % Range(600, 3000, inclusive_end=True),
'maxmz': Int % Range(100, 850, inclusive_end=True),
'zodiac_threshold': Float % Range(0, 1, inclusive_end=True),
'java_flags': Str
}
PARAMS_DESC = {
'ionization_mode': 'Ionization mode for mass spectrometry',
'database': 'search formulas in given database',
'sirius_path': 'path to Sirius executable',
'ppm_max': 'allowed parts per million tolerance for decomposing masses',
'profile': 'configuration profile for mass-spec platform used',
'n_jobs': 'Number of cpu cores to use',
'num_candidates': 'number of fragmentation trees to compute per feature',
'tree_timeout': 'time for computation per fragmentation tree in seconds',
'fingerid_db': 'search structure in given database',
'maxmz': 'consider compounds with a precursor mz lower or equal to this',
'zodiac_threshold': 'threshold filter for molecular formula re-ranking',
'java_flags': 'Setup additional flags for the Java virtual machine. '
'For Sirius it is recommended that you modify the initial '
'and maximum heap size. For example to set an initial and '
'maximum heap size of 16GB and 64GB (respectively) specify '
'"-Xms16G -Xmx64G". Note that the quotes are important.'
}
# method registration
keys = ['sirius_path', 'features', 'ppm_max', 'tree_timeout', 'maxmz',
'n_jobs', 'num_candidates', 'database', 'profile', 'java_flags',
'ionization_mode']
plugin.methods.register_function(
function=compute_fragmentation_trees,
name='Compute fragmentation trees for candidate molecular formulas',
description='Use Sirius to compute fragmentation trees',
inputs={'features': MassSpectrometryFeatures},
parameters={k: v for k, v in PARAMS.items() if k in keys},
input_descriptions={'features': 'List of MS1 ions and corresponding '
'MS2 ions for each MS1.'},
parameter_descriptions={k: v
for k, v in PARAMS_DESC.items() if k in keys},
outputs=[('fragmentation_trees', SiriusFolder)],
output_descriptions={'fragmentation_trees': 'fragmentation trees '
'computed per feature '
'by Sirius'},
citations=[citations['duhrkop2015sirius']]
)
keys = ['sirius_path', 'zodiac_threshold', 'n_jobs', 'java_flags']
plugin.methods.register_function(
function=rerank_molecular_formulas,
name='Reranks candidate molecular formulas',
description='Use Zodiac to rerank candidate molecular formulas',
inputs={'features': MassSpectrometryFeatures,
'fragmentation_trees': SiriusFolder},
parameters={k: v for k, v in PARAMS.items() if k in keys},
input_descriptions={'features': 'List of MS1 ions and corresponding '
'MS2 ions for each MS1.'},
parameter_descriptions={k: v
for k, v in PARAMS_DESC.items() if k in keys},
outputs=[('molecular_formulas', ZodiacFolder)],
output_descriptions={'molecular_formulas': 'Top scored molecular formula '
'per feature after reranking'
'using Zodiac'},
citations=[citations['duhrkop2015sirius']]
)
keys = ['sirius_path', 'ppm_max', 'n_jobs', 'fingerid_db', 'java_flags']
plugin.methods.register_function(
function=predict_fingerprints,
name='Predict fingerprints for molecular formulas',
description='Use CSI:FingerID to predict molecular formulas',
inputs={'molecular_formulas': ZodiacFolder},
parameters={k: v for k, v in PARAMS.items() if k in keys},
input_descriptions={
'molecular_formulas': 'The output from running Zodiac'},
parameter_descriptions={k: v
for k, v in PARAMS_DESC.items() if k in keys},
outputs=[('predicted_fingerprints', CSIFolder)],
output_descriptions={'predicted_fingerprints': 'Predicted substructures '
'per feature using '
'CSI:FingerID'},
citations=[citations['duhrkop2015sirius']]
)
plugin.methods.register_function(
function=make_hierarchy,
name='Create a molecular tree',
description='Build a phylogeny based on molecular substructures',
inputs={'csi_results': List[CSIFolder],
'feature_tables': List[FeatureTable[Frequency]],
'library_matches': List[FeatureData[Molecules]]},
parameters={'qc_properties': Bool,
'metric': Str % Choices(['euclidean', 'jaccard'])},
input_descriptions={'csi_results': 'one or more CSI:FingerID '
'output folders',
'feature_tables': 'one or more feature tables with '
'mass-spec feature intensity '
'per sample',
'library_matches': 'one or more tables with MS/MS '
'library match for mass-spec '
'features'},
parameter_descriptions={'qc_properties': 'filters molecular properties to '
'retain PUBCHEM fingerprints',
'metric': 'metric for hierarchical clustering of '
'fingerprints. If the Jaccard metric is '
'selected, molecular fingerprints are '
'first binarized (probabilities above '
'0.5 are True, and False otherwise).'},
outputs=[('tree', Phylogeny[Rooted]),
('feature_table', FeatureTable[Frequency]),
('feature_data', FeatureData[Molecules])],
output_descriptions={'tree': 'Tree of relatedness between mass '
'spectrometry features based on the chemical '
'substructures within those features',
'feature_table': 'filtered feature table '
'that contains only the '
'features present in '
'the tree',
'feature_data': 'mapping of unique feature '
'identifiers in input '
'feature tables to MD5 hash '
'of feature fingerprints'}
)
plugin.methods.register_function(
function=get_classyfire_taxonomy,
name='Generate Classyfire annotations',
description='Predicts chemical taxonomy based on molecule structures',
inputs={'feature_data': FeatureData[Molecules]},
parameters={},
input_descriptions={'feature_data': 'Feature data table that maps MD5 '
'hash of mass-spec features to their '
'structural annotations (SMILES)'},
parameter_descriptions={},
outputs=[('classified_feature_data', FeatureData[Molecules])],
output_descriptions={'classified_feature_data': 'Feature data table that '
'contains Classyfire '
'annotations per mass-'
'spec feature'},
citations=[citations['djoumbou2016classyfire']]
)
plugin.methods.register_function(
function=prune_hierarchy,
name='Prune hierarchy of molecules',
description='Removes the tips of the tree based on feature data',
inputs={'feature_data': FeatureData[Molecules],
'tree': Phylogeny[Rooted]},
parameters={'column': Str},
input_descriptions={'feature_data': 'Feature data table with '
'molecules to keep',
'tree': 'Tree of relatedness of molecules.'},
parameter_descriptions={'column': 'A column in feature data table. '
'Features with missing values in this '
'column will be removed from the tree. '
'If no column name is specified then '
'the tree will be pruned to only '
'contain features in the feature data.'},
outputs=[('pruned_tree', Phylogeny[Rooted])],
output_descriptions={'pruned_tree': 'Pruned tree of molecules with '
'tips that are in feature data'}
)
plugin.visualizers.register_function(
function=plot,
name='Generate an annotated qemistree plot in iTOL',
description=('Plots the phenetic tree in iTOL with clade colors, '
'feature labels and relative abundance per sample group'),
inputs={'grouped_table': FeatureTable[Frequency],
'tree': Phylogeny[Rooted],
'feature_metadata': FeatureData[Molecules]
},
parameters={
'category': Str,
'color_palette': Str % Choices(['Pastel1', 'Pastel2', 'Paired',
'Accent', 'Dark2', 'Set1', 'Set2',
'Set3', 'tab10', 'tab20', 'tab20b',
'tab20c', 'Greys', 'Purples', 'Blues',
'Greens', 'Oranges', 'Reds', 'YlOrBr',
'YlOrRd', 'OrRd', 'PuRd', 'RdPu',
'BuPu', 'GnBu', 'PuBu', 'YlGnBu',
'PuBuGn', 'BuGn', 'YlGn']),
'ms2_label': Bool,
'parent_mz': Bool
},
input_descriptions={'grouped_table': 'Feature table of samples '
'grouped by categories. We recommend '
'collapsing feature table by a '
'sample metadata category using '
'`qiime feature-table group`. '
'We can then plot the prevalence '
'of these categories for '
'each molecule on the tree',
'tree': 'Phenetic tree',
'feature_metadata': 'Feature metadata'
},
parameter_descriptions={
'category': 'The feature data column used to color and label the tips',
'color_palette': 'The color palette to use for coloring tips. '
'For examples, see: https://matplotlib.org/'
'tutorials/colors/colormaps.html',
'ms2_label': 'Whether to label the tips with the MS2 value',
'parent_mz': 'If the feature is unclassified, label the tips using '
'this parent mass of the molecule'
},
citations=[citations['letunic2019itol']])
importlib.import_module('q2_qemistree._transformer')