/
job_conf.xml.sample_advanced
940 lines (873 loc) · 54.8 KB
/
job_conf.xml.sample_advanced
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
<?xml version="1.0"?>
<job_conf>
<plugins workers="4">
<!-- "workers" is the number of threads for the runner's work queue.
The default from <plugins> is used if not defined for a <plugin>.
-->
<plugin id="local" type="runner" load="galaxy.jobs.runners.local:LocalJobRunner"/>
<plugin id="pbs" type="runner" load="galaxy.jobs.runners.pbs:PBSJobRunner" workers="2"/>
<plugin id="drmaa" type="runner" load="galaxy.jobs.runners.drmaa:DRMAAJobRunner">
<!-- Different DRMs handle successfully completed jobs differently,
these options can be changed to handle such differences and
are explained in detail on the Galaxy wiki. Defaults are shown -->
<param id="invalidjobexception_state">ok</param>
<param id="invalidjobexception_retries">0</param>
<param id="internalexception_state">ok</param>
<param id="internalexception_retries">0</param>
</plugin>
<plugin id="sge" type="runner" load="galaxy.jobs.runners.drmaa:DRMAAJobRunner">
<!-- Override the $DRMAA_LIBRARY_PATH environment variable -->
<param id="drmaa_library_path">/sge/lib/libdrmaa.so</param>
</plugin>
<plugin id="cli" type="runner" load="galaxy.jobs.runners.cli:ShellJobRunner" />
<plugin id="condor" type="runner" load="galaxy.jobs.runners.condor:CondorJobRunner" />
<plugin id="slurm" type="runner" load="galaxy.jobs.runners.slurm:SlurmJobRunner" />
<plugin id="dynamic" type="runner">
<!-- The dynamic runner is not a real job running plugin and is
always loaded, so it does not need to be explicitly stated in
<plugins>. However, if you wish to change the base module
containing your dynamic rules, you can do so.
The `load` attribute is not required (and ignored if
included).
-->
<param id="rules_module">galaxy.jobs.rules</param>
</plugin>
<!-- Pulsar runners (see more at https://pulsar.readthedocs.org) -->
<plugin id="pulsar_rest" type="runner" load="galaxy.jobs.runners.pulsar:PulsarRESTJobRunner">
<!-- Allow optimized HTTP calls with libcurl (defaults to urllib) -->
<!-- <param id="transport">curl</param> -->
<!-- *Experimental Caching*: Next parameter enables caching.
Likely will not work with newer features such as MQ support.
If this is enabled be sure to specify a `file_cache_dir` in
the remote Pulsar's servers main configuration file.
-->
<!-- <param id="cache">True</param> -->
</plugin>
<plugin id="pulsar_mq" type="runner" load="galaxy.jobs.runners.pulsar:PulsarMQJobRunner">
<!-- AMQP URL to connect to. -->
<param id="amqp_url">amqp://guest:guest@localhost:5672//</param>
<!-- URL remote Pulsar apps should transfer files to this Galaxy
instance to/from. This can be unspecified/empty if
galaxy_infrastructure_url is set in galaxy.ini.
-->
<param id="galaxy_url">http://localhost:8080</param>
<!-- AMQP does not guarantee that a published message is received by
the AMQP server, so Galaxy/Pulsar can request that the consumer
acknowledge messages and will resend them if acknowledgement is
not received after a configurable timeout. -->
<!-- <param id="amqp_acknowledge">False</param> -->
<!-- Galaxy reuses Pulsar's persistence_directory parameter (via the
Pulsar client lib) to store a record of received
acknowledgements, and to keep track of messages which have not
been acknowledged. -->
<!-- <param id="persistence_directory">/path/to/dir</param> -->
<!-- Number of seconds to wait for an acknowledgement before
republishing a message. -->
<!-- <param id="amqp_republish_time">30</param> -->
<!-- Pulsar job manager to communicate with (see Pulsar
docs for information on job managers). -->
<!-- <param id="manager">_default_</param> -->
<!-- The AMQP client can provide an SSL client certificate (e.g. for
validation), the following options configure that certificate
(see for reference:
http://kombu.readthedocs.org/en/latest/reference/kombu.connection.html
). If you simply want to use SSL but not use/validate a client
cert, just use the ?ssl=1 query on the amqp URL instead. -->
<!-- <param id="amqp_connect_ssl_ca_certs">/path/to/cacert.pem</param> -->
<!-- <param id="amqp_connect_ssl_keyfile">/path/to/key.pem</param> -->
<!-- <param id="amqp_connect_ssl_certfile">/path/to/cert.pem</param> -->
<!-- <param id="amqp_connect_ssl_cert_reqs">cert_required</param> -->
<!-- By default, the AMQP consumer uses a nonblocking connection with
a 0.2 second timeout. In testing, this works fine for
unencrypted AMQP connections, but with SSL it will cause the
client to reconnect to the server after each timeout. Set to a
higher value (in seconds) (or `None` to use blocking connections). -->
<!-- <param id="amqp_consumer_timeout">None</param> -->
</plugin>
<plugin id="pulsar_legacy" type="runner" load="galaxy.jobs.runners.pulsar:PulsarLegacyJobRunner" shell="none">
<!-- Pulsar job runner with default parameters matching those
of old LWR job runner. If your Pulsar server is running on a
Windows machine for instance this runner should still be used.
These destinations still needs to target a Pulsar server,
older LWR plugins and destinations still work in Galaxy can
target LWR servers, but this support should be considered
deprecated and will disappear with a future release of Galaxy.
-->
</plugin>
<plugin id="pulsar_embedded" type="runner" load="galaxy.jobs.runners.pulsar:PulsarEmbeddedJobRunner">
<!-- The embedded Pulsar runner starts a Pulsar app
internal to Galaxy and communicates it directly.
This maybe be useful for instance when Pulsar
staging is important but a Pulsar server is
unneeded (for instance if compute servers cannot
mount Galaxy's files but Galaxy can mount a
scratch directory available on compute). -->
<!-- Specify a complete description of the Pulsar app
to create. If this configuration defines more than
one manager - you can specify the manager name
using the "manager" destination parameter. For more
information on configuring a Pulsar app see:
https://github.com/galaxyproject/pulsar/blob/master/app.yml.sample
http://pulsar.readthedocs.org/en/latest/configure.html
-->
<!-- <param id="pulsar_config">path/to/pulsar/app.yml</param> -->
</plugin>
<plugin id="k8s" type="runner" load="galaxy.jobs.runners.kubernetes:KubernetesJobRunner">
<!-- The Kubernetes (k8s) plugin allows to send jobs to a k8s cluster which shares filesystem with Galaxy.
This requires installing pykube. Install pykube by activating Galaxy's virtual
and then executing the following pip command:
pip install pykube==0.15.0
The shared file system needs to be exposed to k8s through a Persistent Volume (rw) and a Persistent
Volume Claim. An example of a Persistent Volume could be, in yaml (access modes, reclaim policy and
path are relevant) (persistent_volume.yaml):
kind: PersistentVolume
apiVersion: v1
metadata:
name: pv-galaxy-nfs
labels:
type: nfs
spec:
capacity:
storage: 10Gi
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retained
nfs:
path: /scratch1/galaxy_data
server: 192.168.64.1
The path (nfs:path: in the example) set needs to be a parent directory of the directories used for
variables “file_path” and “new_file_path” on the galaxy.ini files. Clearly, for this particular example
to work, there needs to be a NFS server serving that directory on that ip. Please make sure that you
use reasonable storage size for your set up (possibly larger that the 10Gi written).
An example of the volume claim should be (this needs to be followed more closely) (pv_claim.yaml):
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: galaxy-pvc
spec:
accessModes:
- ReadWriteMany
volumeName: pv-galaxy-nfs
resources:
requests:
storage: 2Gi
The volume claim needs to reference the name of the volume in spec:volumeName. The name of the claim
(metadat:name) is referenced in the plugin definition (see below), through param
"k8s_persistent_volume_claim_name". These two k8s object need to be created before galaxy can use them:
kubectl create -f <path/to/persistent_volume.yaml>
kubectl create -f <path/to/pv_claim.yaml>
pointing of course to the same Kubernetes cluster that you intend to use.
-->
<param id="k8s_config_path">/path/to/kubeconfig</param>
<!-- This is the path to the kube config file, which is normally on ~/.kube/config, but that will depend on
your installation. This is the file that tells the plugin where the k8s cluster is, access credentials,
etc. This parameter is not necessary and ignored if k8s_use_service_account is set to True -->
<!-- <param id="k8s_pull_policy">Default</param> -->
<!-- Sets the pull policy to be used for all containers invoked for jobs by the Kubernetes cluster. Can take
any value among the possible Kubernetes container image pull policies: Always, IfNotPresent or Never
(respecting capitalization). Any other value, such as "Default", will not change the setting on k8s and
leave for the containers to run with the default pull policy specified at the cluster (this is normally
IfNotPresent). Container images using latest are by default always pulled, so you need to use defined
versions for offline cases to work (and images need to be previously pulled). -->
<!-- <param id="k8s_use_service_account">false</param> -->
<!-- For use when Kubernetes should be accessed from inside a Pod running Galaxy (that is,
galaxy is running inside Kubernetes). If this variable is True, then the previous k8s_config_path is
not required and actually ignored. It is not necessary to set this value if not setting it to true -->
<!-- <param id="k8s_job_api_version">batch/v1</param> -->
<!-- Version of the Kubernetes Job API object to use, the default one batch/v1 should be supported from
Kubernetes 1.2 and on. Changing this a much newer version in the future might require changes to the
plugin runner code. Value extensions/v1beta1 is also supported for pre 1.2 legacy installations.
-->
<param id="k8s_persistent_volume_claim_name">galaxy_pvc</param>
<!-- The name of the Persisten Volume Claim (PVC) to be used, details above, needs to match the PVC's
metadata:name -->
<param id="k8s_persistent_volume_claim_mount_path">/scratch1/galaxy_data</param>
<!-- The mount path needs to be parent directory of the "file_path" and "new_file_path" paths
set in universe_wsgi.ini (or equivalent general galaxy config file). This is the mount path of the
PVC within the docker container that will be actually running the tool -->
<!-- <param id="k8s_namespace">default</param> -->
<!-- The namespace to be used on the Kubernetes cluster, if different from default, this needs to be set
accordingly in the PV and PVC detailed above -->
<!-- <param id="k8s_pod_retrials">4</param> -->
<!-- Allows pods to retry up to this number of times, before marking the galaxy job failed. k8s is a state
setter essentially, so by default it will try to take a job submitted to successful completion. A job
submits pods, until the number of successes (1 in this use case) is achieved, assuming that whatever is
making the pods fail will be fixed (such as a stale disk or a dead node that it is being restarted).
This option sets a limit of retrials, so that after that number of failed pods, the job is re-scaled to
zero (no execution) and the stderr/stdout of the k8s job is reported in galaxy (and the galaxy job set
to failed) -->
<!-- <param id="k8s_supplemental_group_id">0</param> -->
<!-- <param id="k8s_fs_group_id">0</param> -->
<!-- If mounting an NFS / GlusterFS or other shared file system which is administered to ONLY provide access
to a DEFINED user/group, these variables set the group id that Pods need to use to be able to read and
write from that mount. If left to zero or deleted, these parameters are neglected. Integer values
above zero trigger the addition of a security context on each Pod created to dispatch jobs:
securityContext:
supplementalGroups: [value-set-goes-here]
fsGroup: fs-group-integer-value
inside the Pod spec, applicable for all containers on the pod. Just one of them set to >0 will generate
the security context.
Using this requires that the Kubernetes cluster is not running the admission controller
"SecurityContextDeny". To check this, look at the admission-control= variable setup for the
api-server pod definition (normally in /etc/kubernetes/manifests/kube-apiserver.manifest), it shouldn't
have the mentioned admission controller. Pods with the securityContext set will fail if such admission
controller is present. Removing that admission controller from the manifest should provoke kubelet
to restart the api-server pod running on that machine (although this might vary on your Kubernetes
installation).
For more information see point 21.2.4.1 Group IDs on:
https://access.redhat.com/documentation/en-us/openshift_container_platform/3.4/html/
installation_and_configuration/configuring-persistent-storage
Different storage strategies might or might not require supplemental groups or fs groups, one is not
a requirement of the other.
-->
</plugin>
<plugin id="godocker" type="runner" load="galaxy.jobs.runners.godocker:GodockerJobRunner">
<!-- Go-Docker is a batch computing/cluster management tool using Docker
See https://bitbucket.org/osallou/go-docker for more details. -->
<!-- REST based runner , submits jobs to godocker -->
<param id="godocker_master">GODOCKER_URL</param>
<!-- Specify the instance of GoDocker -->
<param id="user">USERNAME</param>
<!-- GoDocker username -->
<param id="key">APIKEY</param>
<!-- GoDocker API key -->
<param id="godocker_project">galaxy</param>
<!-- Specify the project present in the GoDocker setup -->
</plugin>
<plugin id="chronos" type="runner" load="galaxy.jobs.runners.chronos:ChronosJobRunner">
<!-- Chronos is a framework for the Apache Mesos software; a software which manages
computer clusters. Specifically, Chronos runs of top of Mesos and it's used
for job orchestration.
This runner requires a shared file system where the directories of
`job_working_directory`, `file_path` and `new_file_path` settings defined on
the `galaxy.ini` file are shared amongst the Mesos agents (i.e. nodes which
actually run the jobs).
-->
<param id="chronos">`chronos_host`</param>
<!-- Hostname which runs Chronos instance. -->
<param id="owner">foo@bar.com</param>
<!-- The email address of the person responsible for the job. -->
<param id="username">username</param>
<!-- Username to access Mesos cluster. -->
<param id="password">password</param>
<!-- Password to access Mesos cluster. -->
<param id="insecure">true</param>
<!-- True to communicate with Chronos over HTTPS; false otherwise-->
</plugin>
<!-- Additionally any plugin or destination (below) may define an "enabled" parameter that should
evaluate to True or False. When setup using
<param id="enabled" from_environ="<VAR>">True|False</param>
plugins and destinations can be conditionally loaded using environment variables.
Setting the param body above to True or False is required and specifies the default
used by Galaxy is no environment variable of the specified name is found.
-->
</plugins>
<handlers default="handlers">
<!-- Additional job handlers - the id should match the name of a
[server:<id>] in galaxy.ini.
-->
<handler id="handler0" tags="handlers"/>
<handler id="handler1" tags="handlers"/>
<!-- Handlers will load all plugins defined in the <plugins> collection
above by default, but can be limited to a subset using <plugin>
tags. This is useful for heterogenous environments where the DRMAA
plugin would need to be loaded more than once with different
configs.
-->
<handler id="sge_handler">
<plugin id="sge"/>
</handler>
<handler id="special_handler0" tags="special_handlers"/>
<handler id="special_handler1" tags="special_handlers"/>
<handler id="trackster_handler"/>
</handlers>
<destinations default="local">
<!-- Destinations define details about remote resources and how jobs
should be executed on those remote resources.
-->
<destination id="local" runner="local"/>
<destination id="multicore_local" runner="local">
<param id="local_slots">4</param> <!-- Specify GALAXY_SLOTS for local jobs. -->
<!-- Warning: Local slot count doesn't tie up additional worker threads, to prevent over
allocating machine define a second local runner with different name and fewer workers
to run this destination. -->
<param id="embed_metadata_in_job">True</param>
<!-- Above parameter will be default (with no option to set
to False) in an upcoming release of Galaxy, but you can
try it early - it will slightly speed up local jobs by
embedding metadata calculation in job script itself.
-->
<job_metrics />
<!-- Above element demonstrates embedded job metrics definition - see
job_metrics_conf.xml.sample for full documentation on possible nested
elements. This empty block will simply disable job metrics for the
corresponding destination. -->
</destination>
<destination id="docker_local" runner="local">
<param id="docker_enabled">true</param>
<!-- docker_volumes can be used to configure volumes to expose to docker,
For added isolation append :ro to the path to mount it read only.
Galaxy will attempt to infer a reasonable set of defaults which
volumes should be exposed how based on Galaxy's settings and the
destination - but be sure to add any library paths or data incides
that may be needed read-only.
-->
<!--
<param id="docker_volumes">$defaults,/mnt/galaxyData/libraries:ro,/mnt/galaxyData/indices:ro</param>
-->
<!-- For a stock Galaxy instance and traditional job runner $defaults will
expand out as:
$galaxy_root:ro,$tool_directory:ro,$job_directory:ro,$working_directory:rw,$default_file_path:rw
This assumes most of what is needed is available under Galaxy's root directory,
the tool directory, and the Galaxy's file_path (if using object store creatively
you will definitely need to expand defaults).
This configuration allows any docker instance to write to any Galaxy
file - for greater isolation set outputs_to_working_directory in
galaxy.ini. This will cause $defaults to allow writing to much
less. It will then expand as follows:
$galaxy_root:ro,$tool_directory:ro,$job_directory:ro,$working_directory:rw,$default_file_path:ro
If using the Pulsar, defaults will be even further restricted because the
Pulsar will (by default) stage all needed inputs into the job's job_directory
(so there is not need to allow the docker container to read all the
files - let alone write over them). Defaults in this case becomes:
$job_directory:ro,$tool_directory:ro,$job_directory/outputs:rw,$working_directory:rw
Python string.Template is used to expand volumes and values $defaults,
$galaxy_root, $default_file_path, $tool_directory, $working_directory,
are available to all jobs and $job_directory is also available for
Pulsar jobs.
-->
<!-- One can run docker using volumes-from tag by setting the following
parameter. For more information on volumes-from check out the following
docker tutorial. https://docs.docker.com/userguide/dockervolumes/
-->
<!-- <param id="docker_volumes_from">parent_container_name</param> -->
<!-- Control memory allocatable by docker container with following option:
-->
<!-- <param id="docker_memory">24G</param> -->
<!-- By default Docker will need to runnable by Galaxy using
password-less sudo - this can be configured by adding the
following line to the sudoers file of all compute nodes
with docker enabled:
galaxy ALL = (root) NOPASSWD: SETENV: /usr/bin/docker
The follow option is set to false to disable sudo (docker
must likewise be configured to allow this).
-->
<!-- <param id="docker_sudo">false</param> -->
<!-- Following option can be used to tweak sudo command used by
default. -->
<!-- <param id="docker_sudo_cmd">/usr/bin/sudo -extra_param</param> -->
<!-- By default, docker container will not have any networking
enabled. host networking can be bridged by uncommenting next option
http://docs.docker.io/reference/run/#network-settings
-->
<!-- <param id="docker_net">bridge</param> -->
<!-- By default, a container will live on past its run. By
adding the '\-\-rm' flag to the command line, the container
will be removed automatically after the program is complete.
-->
<!-- <param id="docker_auto_rm">true</param> -->
<!-- Override which user to launch Docker container as - defaults to
Galaxy's user id. For remote job execution (e.g. Pulsar) set to
remote job user. Leave empty to not use the -u argument with
Docker. -->
<!-- <param id="docker_set_user">$UID</param> -->
<!-- Pass extra arguments to the docker run command not covered by the
above options. -->
<!-- <param id="docker_run_extra_arguments"></param> -->
<!-- Following command can be used to tweak docker command. -->
<!-- <param id="docker_cmd">/usr/local/custom_docker/docker</param> -->
<!-- Following can be used to connect to docke server in different
ways (translated as -H argument to docker client). -->
<!-- <param id="docker_host">unix:///var/run/docker.sock</param> -->
<!-- <param id="docker_host">:5555</param> -->
<!-- <param id="docker_host">:5555</param> -->
<!-- <param id="docker_host">tcp://127.0.0.1:4243</param> -->
<!-- If deployer wants to use docker for isolation, but does not
trust tool's specified container - a destination wide override
can be set. This will cause all jobs on this destination to use
that docker image. -->
<!-- <param id="docker_container_id_override">busybox:ubuntu-14.04</param> -->
<!-- Likewise, if deployer wants to use docker for isolation and
does trust tool's specified container - but also wants tool's not
configured to run in a container the following option can provide
a fallback. -->
<!-- <param id="docker_default_container_id">busybox:ubuntu-14.04</param> -->
<!-- If the destination should be secured to only allow containerized jobs
the following parameter may be set for the job destination. Not all,
or even most, tools available in Galaxy core or in the Tool Shed
support Docker yet so this option may require a lot of extra work for
the deployer. -->
<!-- <param id="require_container">true</param> -->
</destination>
<destination id="singularity_local" runner="local">
<param id="singularity_enabled">true</param>
<!-- See the above documentation for docker_volumes, singularity_volumes works
almost the same way. The only difference is that $default will expand with
rw directories that in Docker would expand as ro if any of subdirectories are rw.
As an example consider that Docker mounts the parent of the working directory
(this is known as the job directory) as ro and the working directory itself as rw.
This doesn't work in Singularity because if any parent directory is mounted as ro
none of its children will be rw. So the job directory will be mounted rw for
Singularity.
-->
<!--
<param id="singularity_volumes">$defaults,/mnt/galaxyData/libraries:ro,/mnt/galaxyData/indices:ro</param>
-->
<!-- You can configure singularity to run using sudo - this probably should not
be set and may be removed in the future.
-->
<!-- <param id="singularity_sudo">false</param> -->
<!-- Following option can be used to tweak sudo command used by
default. -->
<!-- <param id="singularity_sudo_cmd">/usr/bin/sudo -extra_param</param> -->
<!-- Pass extra arguments to the singularity exec command not covered by the
above options. -->
<!-- <param id="singularity_run_extra_arguments"></param> -->
<!-- Following command can be used to tweak singularity command. -->
<!-- <param id="singularity_cmd">/usr/local/custom_docker/docker</param> -->
<!-- If deployer wants to use singularity for isolation, but does not
trust tool's specified container - a destination wide override
can be set. This will cause all jobs on this destination to use
that singularity image. -->
<!-- <param id="singularity_container_id_override">/path/to/singularity/image</param> -->
<!-- Likewise, if deployer wants to use singularity for isolation and
does trust tool's specified container - but also wants tool's not
configured to run in a container the following option can provide
a fallback. -->
<!-- <param id="singularity_default_container_id">/path/to/singularity/image</param> -->
<!-- If the destination should be secured to only allow containerized jobs
the following parameter may be set for the job destination. Not all,
or even most, tools available in Galaxy core or in the Tool Shed
support Docker yet so this option may require a lot of extra work for
the deployer. -->
<!-- <param id="require_container">true</param> -->
</destination>
<destination id="pbs" runner="pbs" tags="mycluster"/>
<destination id="pbs_longjobs" runner="pbs" tags="mycluster,longjobs">
<!-- Define parameters that are native to the job runner plugin. -->
<param id="Resource_List">walltime=72:00:00</param>
</destination>
<destination id="remote_cluster" runner="drmaa" tags="longjobs"/>
<destination id="java_cluster" runner="drmaa">
<!-- Allow users that are not mapped to any real users to run jobs
as a Galaxy (fallback). Default is False.
-->
<param id="allow_guests">True</param>
<!-- Set to False if cluster nodes don't shared Galaxy library,
it will perform metadata calculation locally after the job finishes.
-->
<param id="embed_metadata_in_job">True</param>
<!-- set arbitrary environment variables at runtime. General
dependencies for tools should be configured via
tool_dependency_dir and package options and these
options should be reserved for defining cluster
specific options.
-->
<env id="_JAVA_OPTIONS">-Xmx6G</env>
<env id="ANOTHER_OPTION" raw="true">'5'</env> <!-- raw disables auto quoting -->
<env file="/mnt/java_cluster/environment_setup.sh" /> <!-- will be sourced -->
<env exec="module load javastuff/2.10" /> <!-- will be sourced -->
<!-- files to source and exec statements will be handled on remote
clusters. These don't need to be available on the Galaxy server
itself.
-->
</destination>
<destination id="real_user_cluster" runner="drmaa">
<!-- Make sure to setup 3 real user parameters in galaxy.ini. -->
</destination>
<destination id="dynamic" runner="dynamic">
<!-- A destination that represents a method in the dynamic runner.
foo should be a Python function defined in any file in
lib/galaxy/jobs/rules.
-->
<param id="function">foo</param>
</destination>
<destination id="dtd_destination" runner="dynamic">
<!-- DTD is a special dynamic job destination type that builds up
rules given a YAML-based DSL (see config/tool_destinations.yml.sample
for the syntax).
-->
<param id="type">dtd</param>
</destination>
<destination id="load_balance" runner="dynamic">
<param id="type">choose_one</param>
<!-- Randomly assign jobs to various static destination ids -->
<param id="destination_ids">cluster1,cluster2,cluster3</param>
</destination>
<destination id="load_balance_with_data_locality" runner="dynamic">
<!-- Randomly assign jobs to various static destination ids,
but keep jobs in the same workflow invocation together and
for those jobs ran outside of workflows keep jobs in same
history together.
-->
<param id="type">choose_one</param>
<param id="destination_ids">cluster1,cluster2,cluster3</param>
<param id="hash_by">workflow_invocation,history</param>
</destination>
<destination id="burst_out" runner="dynamic">
<!-- Burst out from static destination local_cluster_8_core to
static destination shared_cluster_8_core when there are about
50 Galaxy jobs assigned to any of the local_cluster_XXX
destinations (either running or queued). If there are fewer
than 50 jobs, just use local_cluster_8_core destination.
Uncomment job_state parameter to make this bursting happen when
roughly 50 jobs are queued instead.
-->
<param id="type">burst</param>
<param id="from_destination_ids">local_cluster_8_core,local_cluster_1_core,local_cluster_16_core</param>
<param id="to_destination_id">shared_cluster_8_core</param>
<param id="num_jobs">50</param>
<!-- <param id="job_states">queued</param> -->
</destination>
<destination id="docker_dispatch" runner="dynamic">
<!-- Follow dynamic destination type will send all tool's that
support docker to static destination defined by
docker_destination_id (docker_cluster in this example) and all
other tools to default_destination_id (normal_cluster in this
example).
-->
<param id="type">docker_dispatch</param>
<param id="docker_destination_id">docker_cluster</param>
<param id="default_destination_id">normal_cluster</param>
</destination>
<destination id="secure_pulsar_rest_dest" runner="pulsar_rest">
<param id="url">https://examle.com:8913/</param>
<!-- If set, private_token must match token in remote Pulsar's
configuration. -->
<param id="private_token">123456789changeme</param>
<!-- Uncomment the following statement to disable file staging (e.g.
if there is a shared file system between Galaxy and the Pulsar
server). Alternatively action can be set to 'copy' - to replace
http transfers with file system copies, 'remote_transfer' to cause
the Pulsar to initiate HTTP transfers instead of Galaxy, or
'remote_copy' to cause Pulsar to initiate file system copies.
If setting this to 'remote_transfer' be sure to specify a
'galaxy_url' attribute on the runner plugin above. -->
<!-- <param id="default_file_action">none</param> -->
<!-- The above option is just the default, the transfer behavior
none|copy|http can be configured on a per path basis via the
following file. See Pulsar documentation for more details and
examples.
-->
<!-- <param id="file_action_config">file_actions.yaml</param> -->
<!-- The non-legacy Pulsar runners will attempt to resolve Galaxy
dependencies remotely - to enable this set a tool_dependency_dir
in Pulsar's configuration (can work with all the same dependency
resolutions mechanisms as Galaxy - tool Shed installs, Galaxy
packages, etc...). To disable this behavior, set the follow parameter
to none. To generate the dependency resolution command locally
set the following parameter local.
-->
<!-- <param id="dependency_resolution">none</params> -->
<!-- Uncomment following option to enable setting metadata on remote
Pulsar server. The 'use_remote_datatypes' option is available for
determining whether to use remotely configured datatypes or local
ones (both alternatives are a little brittle). -->
<!-- <param id="remote_metadata">true</param> -->
<!-- <param id="use_remote_datatypes">false</param> -->
<!-- <param id="remote_property_galaxy_home">/path/to/remote/galaxy-central</param> -->
<!-- If remote Pulsar server is configured to run jobs as the real user,
uncomment the following line to pass the current Galaxy user
along. -->
<!-- <param id="submit_user">$__user_name__</param> -->
<!-- Various other submission parameters can be passed along to the Pulsar
whose use will depend on the remote Pulsar's configured job manager.
For instance:
-->
<!-- <param id="submit_native_specification">-P bignodes -R y -pe threads 8</param> -->
<!-- Disable parameter rewriting and rewrite generated commands
instead. This may be required if remote host is Windows machine
but probably not otherwise.
-->
<!-- <param id="rewrite_parameters">false</params> -->
</destination>
<destination id="pulsar_mq_dest" runner="pulsar_mq" >
<!-- The RESTful Pulsar client sends a request to Pulsar
to populate various system properties. This
extra step can be disabled and these calculated here
on client by uncommenting jobs_directory and
specifying any additional remote_property_ of
interest, this is not optional when using message
queues.
-->
<param id="jobs_directory">/path/to/remote/pulsar/files/staging/</param>
<!-- Otherwise MQ and Legacy pulsar destinations can be supplied
all the same destination parameters as the RESTful client documented
above (though url and private_token are ignored when using a MQ).
-->
</destination>
<destination id="ssh_torque" runner="cli">
<param id="shell_plugin">SecureShell</param>
<param id="job_plugin">Torque</param>
<param id="shell_username">foo</param>
<param id="shell_hostname">foo.example.org</param>
<param id="job_Resource_List">walltime=24:00:00,ncpus=4</param>
</destination>
<!-- Example CLI Slurm runner. -->
<destination id="ssh_slurm" runner="cli">
<param id="shell_plugin">SecureShell</param>
<param id="job_plugin">Slurm</param>
<param id="shell_username">foo</param>
<param id="shell_hostname">my_host</param>
<param id="job_time">2:00:00</param>
<param id="job_ncpus">4</param>
<param id="job_partition">my_partition</param>
</destination>
<destination id="condor" runner="condor">
<!-- With no params, jobs are submitted to the 'vanilla' universe with:
notification = NEVER
getenv = true
Additional/override query ClassAd params can be specified with
<param> tags.
-->
<param id="request_cpus">8</param>
<!-- Recent version of HTCondor do have a `docker` universe to handle containers.
Activate this feature by explicitly specifying the `docker` universe.
-->
<!-- <param id="universe">docker</param> -->
<!-- If the tool has a container specified this one is used.
<requirements>
<container type="docker">bgruening/galaxy-stable</container>
</requirements>
Unless the job destination specifies an override
with docker_container_id_override. If neither of
these is set a default container can be specified
with docker_default_container_id. The resolved
container ID will be passed along to condor as
the docker_image submission parameter.
-->
<!-- <param id="docker_default_container_id">busybox:ubuntu-14.04</param> -->
</destination>
<!-- Jobs can be re-submitted for various reasons (to the same destination or others,
with or without a short delay). For instance, jobs that hit the walltime on one
destination can be automatically resubmitted to another destination. Re-submission
is defined on a per-destination basis using ``resubmit`` tags. Re-submission only
happens currently in response to problems in the job runner - so for instance if a
job fails to allocate memory but the job runner doesn't detect this and completes
the job normally but the exit code indicates the error - the job failure
re-submission won't run yet (this will be added in the future).
Multiple `resubmit` tags can be defined, the first resubmit condition that is true
(i.e. evaluates to a Python truthy value) will be used for a particular job failure.
The ``condition`` attribute is optional, if not present, the
resubmit destination will be used for all relevant failure types.
Conditions are expressed as Python-like expressions (a fairly safe subset of Python
is available). These expressions include math and logical operators, numbers,
strings, etc.... The following variables are available in these expressions:
- "walltime_reached" (True if and only if the job runner indicates a walltime maximum was reached)
- "memory_limit_reached" (True if and only if the job runner indicates a memory limit was hit)
- "unknown_error" (True for job or job runner problems that aren't otherwise classified)
- "attempt" (the re-submission attempt number this is)
- "seconds_since_queued" (the number of seconds since the last time the job was in a queued state within Galaxy)
- "seconds_running" (the number of seconds the job was in a running state within Galaxy)
The ``handler`` attribute is optional, if not present, the job's original
handler will be reused for the resubmitted job. The ``destination`` attriubte
is optional, if not present the job's original destination will be reused for the
re-submission. The ``delay`` attribute is optional, if present it will cause the job to
delay for that number of seconds before being re-submitted.
-->
<destination id="short_fast" runner="slurm">
<param id="nativeSpecification">--time=00:05:00 --nodes=1</param>
<resubmit condition="walltime_reached" destination="long_slow" handler="sge_handler" />
</destination>
<destination id="long_slow" runner="sge">
<!-- The destination that you resubmit jobs to can be any runner type -->
<param id="nativeSpecification">-l h_rt=96:00:00</param>
</destination>
<destination id="smallmem" runner="slurm">
<param id="nativeSpecification">--mem-per-cpu=512</param>
<resubmit condition="memory_limit_reached" destination="bigmem" />
</destination>
<destination id="retry_on_unknown_problems" runner="slurm">
<!-- Just retry the job 5 times if un-categories errors occur backing
off by 30 more seconds between attempts. -->
<resubmit condition="unknown_error and attempt <= 5" delay="attempt * 30" />
</destination>
<!-- Any tag param in this file can be set using an environment variable or using
values from galaxy.ini using the from_environ and from_config attributes
repectively. The text of the param will still be used if that environment variable
or config value isn't set.
-->
<destination id="params_from_environment" runner="slurm">
<param id="nativeSpecification" from_environ="NATIVE_SPECIFICATION">--time=00:05:00 --nodes=1</param>
<param id="docker_enabled" from_config="use_docker">false</param>
</destination>
<destination id="my-tool-container" runner="k8s">
<!-- For the kubernetes (k8s) runner, each container is a destination.
Make sure that the container is able to execute the calls that will be passed by the galaxy built
command. Most notably, containers that execute scripts through an interpreter in the form
Rscript my-script.R <arguments>
should have this wrapped as the container set working directory won't be the one actually used by
galaxy (galaxy creates a new working director and moves to it). Recommendation is hence to wrap this
type of calls on a shell script, and leave that script with execution privileges on the PATH of the
container:
RUN echo '#!/bin/bash' > /usr/local/bin/myScriptExec
RUN echo 'Rscript /path/to/my-script.r "$@"' >> /usr/local/bin/myScriptExec
RUN chmod a+x /usr/local/bin/myScriptExec
-->
<!-- The following four fields assemble the container's full name:
docker pull <repo>/<owner>/<image>:tag
-->
<param id="docker_repo_override">my-docker-registry.org</param>
<param id="docker_owner_override">superbioinfo</param>
<param id="docker_image_override">my-tool</param>
<param id="docker_tag_override">latest</param>
<!-- Alternatively you could specify a different type of container, such as rkt (not tested with Kubernetes)
<param id="rkt_repo_override">my-docker-registry.org</param>
<param id="rkt_owner_override">superbioinfo</param>
<param id="rkt_image_override">my-tool</param>
<param id="rkt_tag_override">latest</param>
-->
<!-- You can also allow the destination to accept the docker container set in the tool, and only fall into
the docker image set by this destination if the tool doesn't set a docker container, by using the
"default" suffix instead of "override".
<param id="docker_repo_default">my-docker-registry.org</param>
<param id="docker_owner_default">superbioinfo</param>
<param id="docker_image_default">my-tool</param>
<param id="docker_tag_default">latest</param>
-->
<param id="max_pod_retrials">3</param>
<!-- Allows pods to retry up to this number of times, before marking the galaxy job failed. k8s is a state
setter essentially, so by default it will try to take a job submitted to successful completion. A job
submits pods, until the number of successes (1 in this use case) is achieved, assuming that whatever is
making the pods fail will be fixed (such as a stale disk or a dead node that it is being restarted).
This option sets a limit of retrials, so that after that number of failed pods, the job is re-scaled to
zero (no execution) and the stderr/stdout of the k8s job is reported in galaxy (and the galaxy job set
to failed).
Overrides the runner config. (Not implemented yet)
-->
<!-- REQUIRED: To play nicely with the existing galaxy setup for containers. This could be set though
internally by the runner. -->
<param id="docker_enabled">true</param>
</destination>
<destination id="god" runner="godocker">
<!-- The following are configurations for the container -->
<param id="docker_enabled">true</param>
<param id="docker_cpu">1</param>
<param id="docker_memory">2</param>
<param id="docker_default_container_id">centos:latest</param>
<!-- Specify the image on which the jobs have to be executed -->
<param id="godocker_volumes"></param>
<!-- Mount the godocker volumes
volumes must be separated by commas.
eg: <param id="godocker_volumes">home,galaxy</param>
-->
<param id="virtualenv">false</param>
<!-- If a tool execution in container requires galaxy virtualenv,
then enable it by setting the value to true.
Disable venv by setting the value to false.
-->
</destination>
<destination id="chronos_dest" runner="chronos">
<param id="docker_enabled">true</param>
<param id="docker_memory">512</param>
<param id="docker_cpu">2</param>
<param id="volumes">/directory/</param>
<!-- Directory which is mounted to the container and is parent of
the `job_working_directory`, `file_path`, `new_file_path`
directories. Directories of the data used by tools are
included as well.-->
<param id="max_retries">2</param>
<!-- Number of retries to attempt if a command returns a non-zero status -->
</destination>
<!-- Templatized destinations - macros can be used to create templated
destinations with reduced XML duplication. Here we are creating 4 destinations in 4 lines instead of 28 using the macros defined below.
-->
<expand macro="foohost_destination" id="foo_small" ncpus="1" walltime="1:00:00" />
<expand macro="foohost_destination" id="foo_medium" ncpus="2" walltime="4:00:00" />
<expand macro="foohost_destination" id="foo_large" ncpus="8" walltime="24:00:00" />
<expand macro="foohost_destination" id="foo_longrunning" ncpus="1" walltime="48:00:00" />
</destinations>
<resources default="default">
<!-- Group different parameters defined in job_resource_params_conf.xml
together and assign these groups ids. Tool section below can map
tools to different groups. This is experimental functionality!
-->
<group id="default"></group>
<group id="memoryonly">memory</group>
<group id="all">processors,memory,time,project</group>
</resources>
<tools>
<!-- Tools can be configured to use specific destinations or handlers,
identified by either the "id" or "tags" attribute. If assigned to
a tag, a handler or destination that matches that tag will be
chosen at random.
-->
<tool id="foo" handler="trackster_handler">
<param id="source">trackster</param>
</tool>
<tool id="bar" destination="dynamic"/>
<!-- Next example defines resource group to insert into tool interface
and pass to dynamic destination (as resource_params argument). -->
<tool id="longbar" destination="dynamic" resources="all" />
<tool id="baz" handler="special_handlers" destination="bigmem"/>
<!-- Finally for Kubernetes runner, the following connects a particular tool to be executed with
the container of choice in Kubernetes.
-->
<tool id="my-tool" destination="my-tool-container"/>
</tools>
<limits>
<!-- Certain limits can be defined. The 'concurrent_jobs' limits all
control the number of jobs that can be "active" at a time, that
is, dispatched to a runner and in the 'queued' or 'running'
states.
A race condition exists that will allow destination_* concurrency
limits to be surpassed when multiple handlers are allowed to
handle jobs for the same destination. To prevent this, assign all
jobs for a specific destination to a single handler.
-->
<!-- registered_user_concurrent_jobs:
Limit on the number of jobs a user with a registered Galaxy
account can have active across all destinations.
-->
<limit type="registered_user_concurrent_jobs">2</limit>
<!-- anonymous_user_concurrent_jobs:
Likewise, but for unregistered/anonymous users.
-->
<limit type="anonymous_user_concurrent_jobs">1</limit>
<!-- destination_user_concurrent_jobs:
The number of jobs a user can have active in the specified
destination, or across all destinations identified by the
specified tag. (formerly: concurrent_jobs)
-->
<limit type="destination_user_concurrent_jobs" id="local">1</limit>
<limit type="destination_user_concurrent_jobs" tag="mycluster">2</limit>
<limit type="destination_user_concurrent_jobs" tag="longjobs">1</limit>
<!-- destination_total_concurrent_jobs:
The number of jobs that can be active in the specified
destination (or across all destinations identified by the
specified tag) by any/all users.
-->
<limit type="destination_total_concurrent_jobs" id="local">16</limit>
<limit type="destination_total_concurrent_jobs" tag="longjobs">100</limit>
<!-- walltime:
Amount of time a job can run (in any destination) before it
will be terminated by Galaxy.
-->
<limit type="walltime">24:00:00</limit>
<!-- total_walltime:
Total walltime that jobs may not exceed during a set period.
If total walltime of finished jobs exceeds this value, any
new jobs are paused. `window` is a number in days,
representing the period.
-->
<limit type="total_walltime" window="30">24:00:00</limit>
<!-- output_size:
Size that any defined tool output can grow to before the job
will be terminated. This does not include temporary files
created by the job. Format is flexible, e.g.:
'10GB' = '10g' = '10240 Mb' = '10737418240'
-->
<limit type="output_size">10GB</limit>
</limits>
<macros>
<xml name="foohost_destination" tokens="id,walltime,ncpus">
<destination id="@ID@" runner="cli">
<param id="shell_plugin">SecureShell</param>
<param id="job_plugin">Torque</param>
<param id="shell_username">galaxy</param>
<param id="shell_hostname">foohost_destination.example.org</param>
<param id="job_Resource_List">walltime=@WALLTIME@,ncpus=@NCPUS@</param>
</destination>
</xml>
</macros>
</job_conf>