-
Notifications
You must be signed in to change notification settings - Fork 3
/
general.xml
415 lines (391 loc) · 18.9 KB
/
general.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
<tool id="singularity_scriptrunner" name="scriptrunner" version="0.1" profile="22.05">
<description>singularity</description>
<creator>
<person givenName="Matthias" familyName="Bernt" email="m.bernt@ufz.de" />
<organization name="Helmholtz Centre for Environmental Research - UFZ" url="https://www.ufz.de/"/>
</creator>
<command detect_errors="aggressive"><![CDATA[
#import re
## cp script to JWD
mkdir script &&
cp '$script' script/script &&
mkdir inputs &&
#for $p in $parameters
#if $p.type_cond.type_sel == "data"
#if $p.type_cond.filename != ''
#set fname = $p.type_cond.filename
#else
#set fname=re.sub('[^\s\w\.]', '_', str($p.type_cond.param.element_identifier)) + "." + $p.type_cond.param.ext
#end if
ln -s '$p.type_cond.param' inputs/'$fname' &&
#end if
#end for
$image.fields.container_type
exec
## --cpus 1 # disabled because rootless cgroups requires cgroups v2
## --memory "\$((1024 * \${GALAY_MEMORY_MB:-8192}))" # not needed on EVE
## bind Galaxy's file dir, otherwise we need to copy input file to JWD
--bind '$__app__.config.file_path:$__app__.config.file_path'
$image.fields.container_params
'$image.fields.image'
$image.fields.interpreter 'script/script'
#for $p in $parameters
#if $p.type_cond.type_sel == "data"
#if $p.type_cond.filename != ''
#set fname = $p.type_cond.filename
#else
#set fname=re.sub('[^\s\w\.]', '_', str($p.type_cond.param.element_identifier)) + "." + $p.type_cond.param.ext
#end if
inputs/'$fname'
#else
'$p.type_cond.param'
#end if
#end for
]]></command>
<configfiles>
<configfile name="script">$code</configfile>
</configfiles>
<inputs>
<param name="interpreter" type="select" label="Interpreter">
<options from_data_table="scripting_images">
<column name="name" index="3"/>
<column name="value" index="3"/>
<filter type="regexp" column="4" value="singularity|apptainer"/>
</options>
<validator type="no_options" message="No interpreter available. Contact you Galaxy administrator." />
</param>
<param name="image" type="select" label="Image" >
<options from_data_table="scripting_images">
<filter type="regexp" column="4" value="singularity|apptainer"/>
<filter type="param_value" column="3" ref="interpreter"/>
</options>
<validator type="no_options" message="No interpreter / image is available. Contact you Galaxy administrator." />
</param>
<repeat name="parameters" title="Parameters" min="1" default="1" help="Supply one or more parameters">
<conditional name="type_cond">
<param name="type_sel" type="select" label="Parameter type">
<option value="data">Dataset</option>
<option value="text">Text</option>
<!-- Not sure if int/float make sense .. can they be connected to text in WFs? -->
</param>
<when value="data">
<param name="param" type="data" format="data" label="Dataset"/>
<param name="filename" type="text" label="File name" help="Set if you want to access the data set with a specific file name. Only alphanumeric characters, dash and underscore are allowed (all other characters are replaced by an undercore). Default is Galaxy's data set name.">
<sanitizer invalid_char="_">
<valid initial="string.ascii_letters,string.digits">
<add value="_" />
<add value="-" />
<add value="." />
</valid>
</sanitizer>
<!-- file names must not start with dash -->
<validator type="regex" negate="true" message="Filenames must not start with a dash">^[-].*$</validator>
</param>
</when>
<when value="text">
<param name="param" type="text" label="Text parameter" help=""/>
</when>
</conditional>
</repeat>
<param name="code" type="text" area="true" label="Script to execute" help="">
<sanitizer>
<valid initial="string.printable"/>
</sanitizer>
</param>
</inputs>
<outputs>
<collection name="output" type="list" label="Outputs">
<discover_datasets pattern="__designation_and_ext__"/>
</collection>
</outputs>
<tests>
<!-- read tsv write csv -->
<test>
<param name="interpreter" value="python"/>
<param name="image" value="python_continuumio_anaconda"/>
<repeat name="parameters">
<conditional name="type_cond">
<param name="type_sel" value="data"/>
<param name="param" value="test.tsv" ftype="tabular"/>
</conditional>
</repeat>
<param name="code" value='import sys; import pandas as pd; df = pd.read_csv(sys.argv[1], sep="\t"); df.to_csv("data.csv", index=False, sep=",");'/>
<output_collection name="output" type="list" count="1">
<element name="data" ftype="csv">
<assert_contents>
<has_line line="1,2" />
<has_n_lines n="3"/>
<has_n_columns n="2" sep=","/>
</assert_contents>
</element>
</output_collection>
<!-- but the data table this should use singularity and not use additional parameters to it (\-\-cleanenv) -->
<assert_command>
<has_text text="singularity"/>
<has_text text="--cleanenv"/>
</assert_command>
</test>
<!-- plot w matplotlib -->
<test>
<param name="interpreter" value="python"/>
<param name="image" value="python_continuumio_anaconda"/>
<repeat name="parameters">
<conditional name="type_cond">
<param name="param" value="test.tsv" ftype="tabular"/>
<param name="filename" value="custom_name.tsv"/>
</conditional>
</repeat>
<param name="code" value='import sys; import pandas as pd; from matplotlib.backends.backend_pdf import PdfPages; df = pd.read_csv(sys.argv[1], sep="\t"); fh = PdfPages("points.pdf"); plt = df.plot(); fh.savefig(); fh.close(); print(f"plotted {sys.argv[1]}")'/>
<output_collection name="output" type="list" count="1">
<element name="points" ftype="pdf">
<assert_contents>
<has_text text="PDF" />
</assert_contents>
</element>
</output_collection>
<assert_stdout>
<has_line line="plotted inputs/custom_name.tsv"/>
</assert_stdout>
</test>
<!-- install libraries ("forbidden") -->
<test expect_failure="true">
<param name="interpreter" value="python"/>
<param name="image" value="python_continuumio_anaconda"/>
<param name="code" value='import pip; pip.main(["install", "biopython"]); import Bio'/>
</test>
<!-- read binary files (eg feather) -->
<test>
<param name="interpreter" value="python"/>
<param name="image" value="python_continuumio_anaconda"/>
<repeat name="parameters">
<conditional name="type_cond">
<param name="param" value="test.h5" ftype="h5"/>
</conditional>
</repeat>
<param name="code" value='import sys; import os; import pandas as pd; df = pd.read_hdf(sys.argv[1]); df.to_csv("data.csv", index=False, sep=",");'/>
<output_collection name="output" type="list" count="1">
<element name="data" ftype="csv">
<assert_contents>
<has_line line="1,2" />
<has_n_lines n="3"/>
<has_n_columns n="2" sep=","/>
</assert_contents>
</element>
</output_collection>
</test>
<!-- parameters -->
<test>
<param name="interpreter" value="python"/>
<param name="image" value="python_continuumio_anaconda"/>
<repeat name="parameters">
<conditional name="type_cond">
<param name="type_sel" value="text"/>
<param name="param" value="filename.csv"/>
</conditional>
</repeat>
<repeat name="parameters">
<conditional name="type_cond">
<param name="type_sel" value="text"/>
<param name="param" value="some value"/>
</conditional>
</repeat>
<param name="code" value='import sys; fh = open(sys.argv[1], "w"); fh.write("Hello,world\n"); fh.write("Bye,world\n"); fh.close(); print(sys.argv[2]);'/>
<output_collection name="output" type="list" count="1">
<element name="filename" ftype="csv">
<assert_contents>
<has_line line="Hello,world"/>
<has_n_lines n="2"/>
<has_n_columns n="2" sep=","/>
</assert_contents>
</element>
</output_collection>
<assert_stdout>
<has_line line='some value'/>
</assert_stdout>
</test>
<!-- read tsv write csv -->
<test>
<param name="interpreter" value="Rscript"/>
<param name="image" value="r_rocker_tidyverse"/>
<repeat name="parameters">
<conditional name="type_cond">
<param name="param" value="test.tsv" ftype="tabular"/>
</conditional>
</repeat>
<param name="code" value='args = commandArgs(trailingOnly = TRUE); data = read.delim(args[1]); write.csv(data, "data.csv", row.names=FALSE)'/>
<output_collection name="output" type="list" count="1">
<element name="data" ftype="csv">
<assert_contents>
<has_line line="1,2" />
<has_n_lines n="3"/>
<has_n_columns n="2" sep=","/>
</assert_contents>
</element>
</output_collection>
<!-- but the data table this should use apptainer and not use any additional parameters to it -->
<assert_command>
<has_text text="apptainer"/>
<has_text text="--cleanenv" negate="true"/>
</assert_command>
</test>
<!-- use a tidyverse library -->
<test>
<param name="interpreter" value="Rscript"/>
<param name="image" value="r_rocker_tidyverse"/>
<repeat name="parameters">
<conditional name="type_cond">
<param name="param" value="test.tsv" ftype="tabular"/>
<param name="filename" value="custom_name.tsv"/>
</conditional>
</repeat>
<param name="code" value='library(ggplot2); args = commandArgs(trailingOnly = TRUE); data = read.delim(args[1]); pdf("points.pdf"); ggplot(data, aes(x=A, y=B)) + geom_point(); dev.off(); print(paste("plotted", args[1]))'/>
<output_collection name="output" type="list" count="1">
<element name="points" ftype="pdf">
<assert_contents>
<has_text text="PDF" />
</assert_contents>
</element>
</output_collection>
<assert_stdout>
<has_line line='[1] "plotted inputs/custom_name.tsv"'/>
</assert_stdout>
</test>
<!-- install libraries fails -->
<test expect_failure="true">
<param name="interpreter" value="Rscript"/>
<param name="image" value="r_rocker_tidyverse"/>
<param name="code" value='install.packages("maybe"); library(maybe); print("success")'/>
</test>
<test expect_failure="true">
<param name="interpreter" value="Rscript"/>
<param name="image" value="r_rocker_tidyverse"/>
<param name="code" value='install.packages("BiocManager"); BiocManager::install("multtest"); print("success")'/>
</test>
<!-- read binary files (eg rds) -->
<test>
<param name="interpreter" value="Rscript"/>
<param name="image" value="r_rocker_tidyverse"/>
<repeat name="parameters">
<conditional name="type_cond">
<param name="param" value="test.rds" ftype="rds"/>
</conditional>
</repeat>
<param name="code" value='args = commandArgs(trailingOnly = TRUE); data = readRDS(args[1]); write.csv(data, "data.csv", row.names=FALSE)'/>
<output_collection name="output" type="list" count="1">
<element name="data" ftype="csv">
<assert_contents>
<has_line line="1,2" />
<has_n_lines n="3"/>
<has_n_columns n="2" sep=","/>
</assert_contents>
</element>
</output_collection>
</test>
<!-- optional input and parameters -->
<test>
<param name="interpreter" value="Rscript"/>
<param name="image" value="r_rocker_tidyverse"/>
<repeat name="parameters">
<conditional name="type_cond">
<param name="type_sel" value="text"/>
<param name="param" value="filename.csv"/>
</conditional>
</repeat>
<repeat name="parameters">
<conditional name="type_cond">
<param name="type_sel" value="text"/>
<param name="param" value="some value"/>
</conditional>
</repeat>
<param name="code" value='args = commandArgs(trailingOnly = TRUE); fileConn = file(args[1]); writeLines(c("Hello,world","Bye,world"), fileConn); close(fileConn); print(args[2]);'/>
<output_collection name="output" type="list" count="1">
<element name="filename" ftype="csv">
<assert_contents>
<has_line line="Hello,world"/>
<has_n_lines n="2"/>
<has_n_columns n="2" sep=","/>
</assert_contents>
</element>
</output_collection>
<assert_stdout>
<has_line line='[1] "some value"'/>
</assert_stdout>
</test>
<!-- some tests with bash-->
<test>
<param name="interpreter" value="bash"/>
<param name="image" value="bash_continuumio_anaconda"/>
<repeat name="parameters">
<conditional name="type_cond">
<param name="param" value="test.tsv" ftype="tabular"/>
</conditional>
</repeat>
<param name="code" value="sed -e 's/\t/,/' $1 > data.csv"/>
<output_collection name="output" type="list" count="1">
<element name="data" ftype="csv">
<assert_contents>
<has_line line="1,2" />
<has_n_lines n="3"/>
<has_n_columns n="2" sep=","/>
</assert_contents>
</element>
</output_collection>
</test>
<!-- check that we can turn off networking -->
<test expect_failure="true">
<param name="interpreter" value="bash"/>
<param name="image" value="bash_continuumio_anaconda"/>
<repeat name="parameters">
<conditional name="type_cond">
<param name="param" value="test.tsv" ftype="tabular"/>
</conditional>
</repeat>
<param name="code" value="curl -iL https://www.galaxyproject.org"/>
</test>
</tests>
<help><![CDATA[
**Warning**
.. class:: warningmark
**Make sure that you know what you are doing. When used wrong the tool may lead to
data loss of files that you can write to.**
.. class:: warningmark
This tool is only intended to serve for single-use, ad-hoc exploratory analysis
of data sets with small scripts. This is because the tool has a limited
reusuability (in particular on other Galaxy servers).
.. class:: warningmark
If you use this tool repeatedly with the same script and/or have the impression
that other Galaxy users could profit from this script then contact your local
Galaxy administrator or the Galaxy community, e.g. at https://github.com/galaxyproject/tools-iuc/,
and ask if your script can be turned into a proper Galaxy tool.
One of the main advantages of a proper Galaxy tool is that they are tested and
maintained. Furthermore the whole Galaxy community may profit.
**What it does**
Executes an interpreted script (in a container). The available scripting
languages (e.g. python, R, bash, etc) and containers are configured by the
Galaxy administrator.
An arbitrary number of data or text parameters can be given to the script.
Data parameters are by default named like the datasets name and the datatype
is used as extension. This can be overwritten with the filename parameter
for the corresponding dataset.
**Inputs**
A python script can access data set parameters via the ``sys.argv`` list
where the i-th parameter corresponds to the i-th list element (counting from 1).
A tab delimited file, for instance, can be read with ``pandas`` as follows:
::
import sys
import pandas as pd
df = pd.read_csv(sys.argv[1], sep="\t")
In an R script the list obtained by ``args <- commandArgs(trailingOnly = TRUE);``
contains (again the i-th list element contains the i-th parameter, starting from 1).
Reading a tab separated file in R could be done as follows:
::
args <- commandArgs(trailingOnly = TRUE);
first_arg <- file(args[1])
df <- read.delim(args[1]);
**Outputs**
Output datasets are read from the current working directory and put into a
single collection. The collection elements will be named as the file names
(without the extension). The file extension determines the datatype of the
datasets (or Galaxy will try to autodetect the data type).
]]></help>
</tool>