-
Notifications
You must be signed in to change notification settings - Fork 967
/
joiner.xml
220 lines (201 loc) · 9.01 KB
/
joiner.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
<tool id="join1" name="Join two Datasets" version="2.0.2">
<description>side by side on a specified field</description>
<command interpreter="python">join.py $input1 $input2 $field1 $field2 $out_file1 $unmatched $partial --index_depth=3 --buffer=50000000 --fill_options_file=$fill_options_file $header</command>
<inputs>
<param format="tabular" name="input1" type="data" label="Join"/>
<param name="field1" label="using column" type="data_column" data_ref="input1" />
<param format="tabular" name="input2" type="data" label="with" />
<param name="field2" label="and column" type="data_column" data_ref="input2" />
<param name="unmatched" type="select" label="Keep lines of first input that do not join with second input">
<option value="-u">Yes</option>
<option value="" selected="true">No</option>
</param>
<param name="partial" type="select" label="Keep lines of first input that are incomplete">
<option value="-p">Yes</option>
<option value="" selected="true">No</option>
</param>
<conditional name="fill_empty_columns">
<param name="fill_empty_columns_switch" type="select" label="Fill empty columns">
<option value="no_fill" selected="True">No</option>
<option value="fill_empty">Yes</option>
</param>
<when value="no_fill">
<!-- do nothing -->
</when>
<when value="fill_empty">
<param type="select" name="fill_columns_by" label="Only fill unjoined rows">
<option value="fill_unjoined_only" selected="True">Yes</option>
<option value="fill_all">No</option>
</param>
<conditional name="do_fill_empty_columns">
<param name="column_fill_type" type="select" label="Fill Columns by">
<option value="single_fill_value" selected="True">Single fill value</option>
<option value="fill_value_by_column">Values by column</option>
</param>
<when value="single_fill_value">
<param type="text" name="fill_value" label="Fill value" value="."/>
</when>
<when value="fill_value_by_column">
<repeat name="column_fill1" title="Fill Column for Input 1">
<param name="column_number1" label="Column" type="data_column" data_ref="input1" />
<param type="text" name="fill_value1" value="."/>
</repeat>
<repeat name="column_fill2" title="Fill Column for Input 2">
<param name="column_number2" label="Column" type="data_column" data_ref="input2" />
<param type="text" name="fill_value2" value="."/>
</repeat>
</when>
</conditional>
</when>
</conditional>
<param name="header" type="select" label="Keep the header lines">
<option value="-H">Yes</option>
<option value="" selected="true">No</option>
</param>
</inputs>
<configfiles>
<configfile name="fill_options_file"><%
import json
%>
#set $__fill_options = {}
#if $fill_empty_columns['fill_empty_columns_switch'] == 'fill_empty':
#set $__fill_options['fill_unjoined_only'] = $fill_empty_columns['fill_columns_by'].value == 'fill_unjoined_only'
#if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'single_fill_value':
#set $__start_fill = $fill_empty_columns['do_fill_empty_columns']['fill_value'].value
#else:
#set $__start_fill = ""
#end if
#set $__fill_options['file1_columns'] = [ __start_fill for i in range( int( $input1.metadata.columns ) ) ]
#set $__fill_options['file2_columns'] = [ __start_fill for i in range( int( $input2.metadata.columns ) ) ]
#if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'fill_value_by_column':
#for column_fill1 in $fill_empty_columns['do_fill_empty_columns']['column_fill1']:
#set $__fill_options['file1_columns'][ int( column_fill1['column_number1'].value ) - 1 ] = column_fill1['fill_value1'].value
#end for
#for column_fill2 in $fill_empty_columns['do_fill_empty_columns']['column_fill2']:
#set $__fill_options['file2_columns'][ int( column_fill2['column_number2'].value ) - 1 ] = column_fill2['fill_value2'].value
#end for
#end if
#end if
${json.dumps( __fill_options )}
</configfile>
</configfiles>
<outputs>
<data format="input" name="out_file1" metadata_source="input1" />
</outputs>
<tests>
<test>
<param name="input1" value="1.bed"/>
<param name="input2" value="2.bed"/>
<param name="field1" value="2"/>
<param name="field2" value="2"/>
<param name="unmatched" value=""/>
<param name="partial" value=""/>
<param name="fill_empty_columns_switch" value="no_fill"/>
<output name="out_file1" file="joiner_out1.bed"/>
</test>
<test>
<param name="input1" value="1.bed"/>
<param name="input2" value="2.bed"/>
<param name="field1" value="2"/>
<param name="field2" value="2"/>
<param name="unmatched" value="Yes"/>
<param name="partial" value="Yes"/>
<param name="fill_empty_columns_switch" value="no_fill"/>
<output name="out_file1" file="joiner_out2.bed"/>
</test>
<test>
<param name="input1" value="1.bed"/>
<param name="input2" value="2.bed"/>
<param name="field1" value="2"/>
<param name="field2" value="2"/>
<param name="unmatched" value="Yes"/>
<param name="partial" value="Yes"/>
<param name="fill_empty_columns_switch" value="fill_empty"/>
<param name="fill_columns_by" value="fill_all"/>
<param name="column_fill_type" value="single_fill_value"/>
<param name="fill_value" value="~"/>
<output name="out_file1" file="joiner_out3.bed"/>
</test>
<test>
<param name="input1" value="1.bed"/>
<param name="input2" value="2.bed"/>
<param name="field1" value="2"/>
<param name="field2" value="2"/>
<param name="unmatched" value="Yes"/>
<param name="partial" value="Yes"/>
<param name="fill_empty_columns_switch" value="fill_empty"/>
<param name="fill_columns_by" value="fill_all"/>
<param name="column_fill_type" value="fill_value_by_column"/>
<param name="column_number1" value="6"/>
<param name="fill_value1" value="+"/>
<param name="column_number2" value="1"/>
<param name="fill_value2" value="NoChrom"/>
<output name="out_file1" file="joiner_out4.bed"/>
</test>
<test>
<param name="input1" value="joiner_header_in1.tab"/>
<param name="input2" value="joiner_header_in2.tab"/>
<param name="field1" value="4"/>
<param name="field2" value="1"/>
<param name="unmatched" value=""/>
<param name="partial" value=""/>
<param name="header" value="-H"/>
<param name="fill_empty_columns_switch" value="no_fill"/>
<output name="out_file1" file="joiner_out5.tab"/>
</test>
<test>
<param name="input1" value="joiner_header_in1.tab"/>
<param name="input2" value="joiner_header_in2.tab"/>
<param name="field1" value="4"/>
<param name="field2" value="1"/>
<param name="unmatched" value="Yes"/>
<param name="partial" value="Yes"/>
<param name="header" value="-H"/>
<param name="fill_empty_columns_switch" value="fill_empty"/>
<param name="fill_columns_by" value="fill_all"/>
<param name="column_fill_type" value="single_fill_value"/>
<param name="fill_value" value="."/>
<output name="out_file1" file="joiner_out6.tab"/>
</test>
<test>
<param name="input1" value="joiner_header_in1.tab"/>
<param name="input2" value="joiner_header_in2.tab"/>
<param name="field1" value="4"/>
<param name="field2" value="1"/>
<param name="unmatched" value=""/>
<param name="partial" value=""/>
<param name="header" value=""/>
<param name="fill_empty_columns_switch" value="no_fill"/>
<output name="out_file1" file="joiner_out7.tab"/>
</test>
</tests>
<help>
.. class:: warningmark
**This tool will attempt to reuse the metadata from your first input.** To change metadata assignments click on the "edit attributes" link of the history item generated by this tool.
.. class:: infomark
**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*
-----
**Syntax**
This tool joins lines of two datasets on a common field. An empty string ("") is not a valid identifier.
You may choose to include lines of your first input that do not join with your second input.
- Columns are referenced with a **number**. For example, **3** refers to the 3rd column of a tab-delimited file.
-----
**Example**
Dataset1::
chr1 10 20 geneA
chr1 50 80 geneB
chr5 10 40 geneL
Dataset2::
geneA tumor-supressor
geneB Foxp2
geneC Gnas1
geneE INK4a
Joining the 4th column of Dataset1 with the 1st column of Dataset2 will yield::
chr1 10 20 geneA geneA tumor-suppressor
chr1 50 80 geneB geneB Foxp2
Joining the 4th column of Dataset1 with the 1st column of Dataset2, while keeping all lines from Dataset1, will yield::
chr1 10 20 geneA geneA tumor-suppressor
chr1 50 80 geneB geneB Foxp2
chr5 10 40 geneL
</help>
</tool>