Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
367 lines (358 sloc) 18.9 KB
<tool profile="16.07" id="mothur_cluster_split" name="Cluster.split" version="@WRAPPER_VERSION@.0">
<description>Assign sequences to OTUs and split large matrices</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements">
<requirement type="package" version="2.6.0">vsearch</requirement>
</expand>
<expand macro="stdio"/>
<expand macro="version_command"/>
<command><![CDATA[
@SHELL_OPTIONS@
## create symlinks to input datasets
#if $splitby.splitmethod == "distance":
ln -s '$splitby.matrix.dist' splitby.matrix.dist.dat &&
ln -s '$splitby.matrix.nameOrCount' splitby.matrix.nameOrCount.dat &&
#elif $splitby.splitmethod == "classify":
ln -s '$splitby.dist' splitby.dist.dat &&
ln -s '$splitby.nameOrCount' splitby.nameOrCount.dat &&
ln -s '$splitby.taxonomy' splitby.taxonomy.dat &&
#elif $splitby.splitmethod == "fasta":
ln -s '$splitby.fasta' splitby.fasta.dat &&
ln -s '$splitby.nameOrCount' splitby.nameOrCount.dat &&
ln -s '$splitby.taxonomy' splitby.taxonomy.dat &&
#end if
echo 'cluster.split(
splitmethod=$splitby.splitmethod,
#if $splitby.splitmethod == "distance":
#if $splitby.matrix.format == "column":
column=splitby.matrix.dist.dat,
#if $splitby.matrix.nameOrCount.is_of_type("mothur.names"):
name=splitby.matrix.nameOrCount.dat,
#elif $splitby.matrix.nameOrCount.is_of_type("mothur.count_table"):
count=splitby.matrix.nameOrCount.dat,
#end if
#elif $splitby.matrix.format == "phylip":
phylip=splitby.matrix.dist.dat,
#if $splitby.matrix.nameOrCount:
#if $splitby.matrix.nameOrCount.is_of_type("mothur.names"):
name=splitby.matrix.nameOrCount.dat,
#elif $splitby.matrix.nameOrCount.is_of_type("mothur.count_table"):
count=splitby.matrix.nameOrCount.dat,
#end if
#end if
#end if
#elif $splitby.splitmethod == "classify":
column=splitby.dist.dat,
taxonomy=splitby.taxonomy.dat,
#if $splitby.nameOrCount.is_of_type("mothur.names"):
name=splitby.nameOrCount.dat,
#elif $splitby.nameOrCount.is_of_type("mothur.count_table"):
count=splitby.nameOrCount.dat,
#end if
#if $splitby.taxlevel:
taxlevel=$splitby.taxlevel,
#end if
#elif $splitby.splitmethod == "fasta":
fasta=splitby.fasta.dat,
taxonomy=splitby.taxonomy.dat,
#if $splitby.nameOrCount.is_of_type("mothur.names"):
name=splitby.nameOrCount.dat,
#elif $splitby.nameOrCount.is_of_type("mothur.count_table"):
count=splitby.nameOrCount.dat,
#end if
#if $splitby.taxlevel:
taxlevel=$splitby.taxlevel,
#end if
classic=$splitby.classic,
#end if
method=$splitby.condmethod.method,
#if $splitby.condmethod.method == "opti":
metric=$splitby.condmethod.metric,
initialize=$splitby.condmethod.initialize,
delta=$splitby.condmethod.delta,
iters=$splitby.condmethod.iters,
#end if
#if float($cutoff) > 0.0:
cutoff=$cutoff,
#end if
#if $precision
precision=$precision,
#end if
large=$large,
cluster=$cluster,
runsensspec=$runsensspec,
processors='\${GALAXY_SLOTS:-8}'
)'
| sed 's/ //g' ## mothur trips over whitespace
| mothur
| tee mothur.out.log
]]></command>
<inputs>
<conditional name="splitby">
<param name="splitmethod" type="select" label="Split by" help="VSEARCH methods (agc and dgc) require a fasta file">
<option value="distance">Distance</option>
<option value="classify">Classification</option>
<option value="fasta">Classification using fasta</option>
</param>
<when value="distance">
<conditional name="matrix">
<param name="format" type="select" label="Select a Distance Matrix Format">
<option value="column">Pairwise Column Matrix</option>
<option value="phylip">Phylip Distance Matrix</option>
</param>
<when value="column">
<param name="dist" argument="column" type="data" format="mothur.pair.dist" label="column - Distance Matrix"/>
<param name="nameOrCount" type="data" format="mothur.names,mothur.count_table" label="name file or count table - Sequences Name reference"/>
</when>
<when value="phylip">
<param name="dist" argument="phylip" type="data" format="mothur.dist,mothur.lower.dist,mothur.square.dist" label="phylip - Distance Matrix"/>
<param name="nameOrCount" type="data" format="mothur.names,mothur.count_table" optional="true" label="name file or count table - Sequences Name reference"/>
</when>
</conditional>
<conditional name="condmethod">
<expand macro="param-clustermethods"/>
<when value="furthest"/>
<when value="nearest"/>
<when value="average"/>
<when value="opti">
<expand macro="params-opticlust"/>
</when>
</conditional>
</when>
<when value="classify">
<param name="dist" argument="column" type="data" format="mothur.pair.dist" label="column - Distance Matrix"/>
<param argument="taxonomy" type="data" format="mothur.seq.taxonomy" label="taxonomy - Taxonomy (from Classify.seqs)"/>
<param name="nameOrCount" type="data" format="mothur.names,mothur.count_table" label="name file or count table - Sequences Name reference"/>
<param argument="taxlevel" type="integer" value="1" min="1" label="taxlevel - taxonomy level for split (default=1)"
help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/>
<conditional name="condmethod">
<expand macro="param-clustermethods"/>
<when value="furthest"/>
<when value="nearest"/>
<when value="average"/>
<when value="opti">
<expand macro="params-opticlust"/>
</when>
</conditional>
</when>
<when value="fasta">
<param argument="fasta" type="data" format="mothur.align,fasta" label="Fasta"/>
<param argument="taxonomy" type="data" format="mothur.seq.taxonomy" label="Taxonomy" help="can be obtained by running classify.seqs"/>
<param name="nameOrCount" type="data" format="mothur.names,mothur.count_table" label="name file or count table - Sequences Name reference"/>
<param argument="taxlevel" type="integer" value="3" min="1" label="taxlevel - taxonomy level for split (default=3)"
help="taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list"/>
<param argument="classic" type="boolean" checked="false" truevalue="true" falsevalue="false" label="classic - Use cluster.classic"/>
<conditional name="condmethod">
<expand macro="param-clustermethods">
<expand macro="option-vsearch-clustermethods"/>
</expand>
<when value="furthest"/>
<when value="nearest"/>
<when value="average"/>
<when value="agc"/>
<when value="dgc"/>
<when value="opti">
<expand macro="params-opticlust"/>
</when>
</conditional>
</when>
</conditional>
<param argument="cutoff" type="float" value="0.0" min="0.0" label="cutoff - Distance Cutoff threshold - ignored if not > 0"
help="Ignore pairwise distances larger than this, a common value would be 0.25"/>
<param argument="precision" type="select" optional="true" label="precision - Precision for rounding distance values"
help="Set higher precision for longer genome scale sequence lengths">
<option value="10">.1</option>
<option value="100" selected="true">.01</option>
<option value="1000">.001</option>
<option value="10000">.0001</option>
<option value="100000">.00001</option>
<option value="1000000">.000001</option>
</param>
<param argument="large" type="boolean" checked="false" truevalue="true" falsevalue="false" label="large - distance matrix is too large to fit in RAM"
help="If your job fails due to not enough memory error, set this to true to rerun"/>
<param argument="cluster" type="boolean" falsevalue="false" truevalue="true" checked="true" label="perform clustering?"
help="indicate whether you want to run the clustering or just split the distance matrix"/>
<param argument="runsensspec" type="boolean" truevalue="true" falsevalue="false" checked="true" label="runsensspec" help="run the sens.spec command on the completed list file"/>
<expand macro="param-savelog"/>
</inputs>
<outputs>
<expand macro="logfile-output"/>
<data name="rabund" format="mothur.rabund" from_work_dir="splitby.*.rabund" label="${tool.name} on ${on_string}: rabund (Rank Abundance)">
<filter>splitby['nameOrCount'].ext != "mothur.count_table"</filter>
<filter>splitby['matrix']['nameOrCount'].ext != "mothur.count_table"</filter>
</data>
<data name="sabund" format="mothur.sabund" from_work_dir="splitby.*.sabund" label="${tool.name} on ${on_string}: sabund (Species Abundance)">
<filter>splitby['nameOrCount'].ext != "mothur.count_table"</filter>
<filter>splitby['matrix']['nameOrCount'].ext != "mothur.count_table"</filter>
</data>
<data name="otulist" format="mothur.list" from_work_dir="splitby.*.list" label="${tool.name} on ${on_string}: list (OTU List)"/>
<collection name="splitdist" type="list" label="${tool.name} on ${on_string}: split dist">
<filter>not cluster</filter>
<discover_datasets pattern=".*?\.column\.dist\.(?P&lt;designation&gt;.*)\.temp" format="mothur.dist"/>
</collection>
<collection name="splitnames" type="list" label="${tool.name} on ${on_string}: split names">
<filter>not cluster</filter>
<discover_datasets pattern=".*?\.names\.(?P&lt;designation&gt;.*)\.temp" format="mothur.names"/>
</collection>
<data name="splitfile" format="txt" from_work_dir="splitby.*.file" label="${tool.name} on ${on_string}: split.file">
<filter>not cluster</filter>
</data>
<data name="sensspec" format="txt" from_work_dir="splitby.*.sensspec" label="${tool.name} on ${on_string}: sensspec">
<filter>runsensspec and splitby['condmethod']['method'] == "opti"</filter>
</data>
</outputs>
<tests>
<test><!-- test with distance method -->
<param name="splitmethod" value="distance"/>
<param name="format" value="phylip"/>
<param name="dist" value="98_sq_phylip_amazon.dist" ftype="mothur.square.dist"/>
<param name="method" value="average"/>
<output name="otulist" ftype="mothur.list">
<assert_contents>
<expand macro="test-list-format"/>
<has_text text="unique"/>
<has_text text="U68680"/>
</assert_contents>
</output>
<output name="rabund" ftype="mothur.rabund">
<assert_contents>
<has_line_matching expression="^unique(\t\d+)+$"/>
<has_text text="0.03"/>
</assert_contents>
</output>
<output name="sabund" ftype="mothur.sabund">
<assert_contents>
<has_line_matching expression="^unique(\t\d+)+$"/>
<has_text text="0.03"/>
</assert_contents>
</output>
<param name="savelog" value="true"/>
<expand macro="logfile-test"/>
</test>
<test><!-- test with cluster false -->
<param name="splitmethod" value="distance"/>
<param name="format" value="phylip"/>
<param name="dist" value="98_sq_phylip_amazon.dist" ftype="mothur.square.dist"/>
<param name="cluster" value="false"/>
<param name="method" value="average"/>
<output name="splitfile" ftype="txt">
<assert_contents>
<has_text text="column"/>
<has_text text="dist"/>
<has_text text="names"/>
<has_text text="temp"/>
</assert_contents>
</output>
<output_collection name="splitnames" count="14">
<element name="0" ftype="mothur.names">
<assert_contents>
<has_text text="U68591"/>
<has_text text="U68600"/>
</assert_contents>
</element>
</output_collection>
<output_collection name="splitdist" count="13">
<element name="4" ftype="mothur.dist">
<assert_contents>
<has_line_matching expression="^U\d+\tU\d+\t\d+\.\d+$"/>
</assert_contents>
</element>
</output_collection>
<param name="savelog" value="true"/>
<expand macro="logfile-test"/>
</test>
<test><!-- test with classify method (mothur.names input file) -->
<param name="splitmethod" value="classify"/>
<param name="format" value="column"/>
<param name="dist" value="amazon.pair.dist" ftype="mothur.pair.dist"/>
<param name="nameOrCount" value="amazon.names" ftype="mothur.names"/>
<param name="taxonomy" value="amazon.wang.wang.taxonomy" ftype="mothur.seq.taxonomy"/>
<param name="method" value="average"/>
<output name="otulist" md5="d6eba624ad79759c530b9bc3285a1361" ftype="mothur.list"/>
<output name="rabund" md5="2a165e1e40644fccb8cc9f53d8915bc3" ftype="mothur.rabund"/>
<output name="sabund" md5="7aad8a9ca0eade414d6eba1f8bef960f" ftype="mothur.sabund"/>
<param name="savelog" value="true"/>
<expand macro="logfile-test"/>
</test>
<test><!-- test with classify method (mothur.count_table input file) -->
<param name="splitmethod" value="classify"/>
<param name="format" value="column"/>
<param name="dist" value="amazon.pair.dist" ftype="mothur.pair.dist"/>
<param name="nameOrCount" value="amazon.count_table" ftype="mothur.count_table"/>
<param name="taxonomy" value="amazon.wang.wang.taxonomy" ftype="mothur.seq.taxonomy"/>
<param name="method" value="average"/>
<output name="otulist" md5="c5c28330434d3e773221f635d04d6af9" ftype="mothur.list"/>
<param name="savelog" value="true"/>
<expand macro="logfile-test"/>
</test>
<test><!-- test with fasta -->
<param name="splitmethod" value="fasta"/>
<param name="fasta" value="amazon.align_head" ftype="mothur.align"/>
<param name="nameOrCount" value="amazon.align_head.names" ftype="mothur.names"/>
<param name="taxonomy" value="amazon.align_head.wang.taxonomy" ftype="mothur.seq.taxonomy"/>
<param name="cutoff" value="9999"/>
<param name="method" value="average"/>
<output name="otulist" md5="a1279248cf2bc1094e0046b2cff1b785" ftype="mothur.list"/>
<output name="rabund" md5="65ec9f326cd92fc607679b9902ec8430" ftype="mothur.rabund"/>
<output name="sabund" md5="854d3acd15f64299c5d9d9e18f2d51b4" ftype="mothur.sabund"/>
<param name="savelog" value="true"/>
<expand macro="logfile-test"/>
</test>
<test><!-- test with vsearch executable (agc/dgc method) -->
<param name="splitmethod" value="fasta"/>
<param name="fasta" value="amazon.align_head" ftype="mothur.align"/>
<param name="nameOrCount" value="amazon.align_head.names" ftype="mothur.names"/>
<param name="taxonomy" value="amazon.align_head.wang.taxonomy" ftype="mothur.seq.taxonomy"/>
<param name="method" value="agc"/>
<output name="otulist" md5="0fbc5bf21331538dd50b6586c4005edc" ftype="mothur.list"/>
<output name="rabund" md5="dcccca11d9fa7a186cd93e9d4592f832" ftype="mothur.rabund"/>
<output name="sabund" md5="167815924b1b2b4d4e5e7468d41256cb" ftype="mothur.sabund"/>
<param name="savelog" value="true"/>
<expand macro="logfile-test"/>
</test>
<test><!-- test with opticlust method -->
<param name="splitmethod" value="distance"/>
<param name="format" value="phylip"/>
<param name="dist" value="98_sq_phylip_amazon.dist" ftype="mothur.square.dist"/>
<param name="method" value="opti"/>
<output name="otulist" ftype="mothur.list">
<assert_contents>
<expand macro="test-list-format"/>
<has_text text="0.03"/>
</assert_contents>
</output>
<output name="rabund" ftype="mothur.rabund">
<assert_contents>
<expand macro="test-rabund-format"/>
<has_text text="0.03"/>
</assert_contents>
</output>
<output name="sabund" ftype="mothur.sabund">
<assert_contents>
<expand macro="test-sabund-format"/>
<has_text text="0.03"/>
</assert_contents>
</output>
<output name="sensspec" ftype="txt">
<assert_contents>
<expand macro="test-sensspec-format"/>
<has_text text="0.03"/>
</assert_contents>
</output>
<param name="savelog" value="true"/>
<expand macro="logfile-test"/>
</test>
</tests>
<help><![CDATA[
@MOTHUR_OVERVIEW@
**Command Documentation**
The cluster.split_ command assign sequences to OTUs (Operational Taxonomy Unit).
.. _cluster.split: https://www.mothur.org/wiki/Cluster.split
v1.28.0: Upgraded to Mothur 1.33, introduced cluster boolean.
]]></help>
<expand macro="citations">
<citation type="doi">10.1128/AEM.02810-10</citation>
</expand>
</tool>
You can’t perform that action at this time.