Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trinity 2.6.6 #1937

Merged
merged 11 commits into from Jun 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
66 changes: 53 additions & 13 deletions tools/trinity/abundance_estimates_to_matrix.xml
@@ -1,12 +1,12 @@
<tool id="trinity_abundance_estimates_to_matrix" name="Build expression matrix" version="@WRAPPER_VERSION@.2">
<tool id="trinity_abundance_estimates_to_matrix" name="Build expression matrix" version="@WRAPPER_VERSION@">
<description>for a de novo assembly of RNA-Seq data by Trinity</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements">
<requirement type="package" version="3.20.1">bioconductor-edger</requirement>
<requirement type="package" version="3.20.7">bioconductor-edger</requirement>
<requirement type="package" version="0.9.1">salmon</requirement>
<requirement type="package" version="0.43.1">kallisto</requirement>
<requirement type="package" version="0.44.0">kallisto</requirement>
</expand>
<command detect_errors="aggressive"><![CDATA[
#import re
Expand All @@ -19,13 +19,38 @@
--est_method ${est_method}
--cross_sample_norm ${additional_params.cross_sample_norm}

#if $gene_trans_map:
--gene_trans_map '$gene_trans_map'
#else:
--gene_trans_map 'none'
#end if

#for $entry in $samples:
'${re.sub('[^\w\-_]', '_', entry.element_identifier)}'
#end for

&& mv *.isoform.counts.matrix '$trans_counts'
&& mv *.isoform.TPM.not_cross_norm '$TPM_no_norm'
#if $gene_trans_map:
&& mv *.gene.counts.matrix '$trans_counts_gene'
&& mv *.gene.TPM.not_cross_norm '$TPM_no_norm_gene'
#end if

#if $additional_params.cross_sample_norm == "TMM":
&& mv *.isoform.TMM.EXPR.matrix '$norm_TMM'
#if $gene_trans_map:
&& mv *.gene.TMM.EXPR.matrix '$norm_TMM_gene'
#end if
#else if $additional_params.cross_sample_norm == "UpperQuartile":
&& mv *.isoform.UpperQuartile.EXPR.matrix '$norm_UQ'
#if $gene_trans_map:
&& mv *.gene.UpperQuartile.EXPR.matrix '$norm_UQ_gene'
#end if
#end if
]]></command>
<inputs>
<param name="samples" label="Abundance estimates" type="data" multiple="true" format="tabular" help="output(s) from 'Align reads and estimate abundance' tool" />

<param format="tabular" name="gene_trans_map" argument="--gene_trans_map" type="data" label="Gene to transcript correspondence ('gene(tab)transcript' lines)" optional="true" help="Only needed for gene level estimates" />
<param type="select" name="est_method" argument="--est_method" label="Abundance estimation method">
<option value="RSEM">RSEM</option>
<option value="eXpress">eXpress</option>
Expand All @@ -42,14 +67,29 @@
</section>
</inputs>
<outputs>
<data format="tabular" name="trans_counts" label="${tool.name} on ${on_string}: estimated RNA-Seq fragment counts (raw counts)" from_work_dir="matrix.counts.matrix"/>
<data format="tabular" name="TPM_no_norm" label="${tool.name} on ${on_string}: matrix of TPM expression values (not cross-sample normalized)" from_work_dir="matrix.TPM.not_cross_norm"/>
<data format="tabular" name="norm" label="${tool.name} on ${on_string}: matrix of TMM-normalized expression values" from_work_dir="matrix.TMM.EXPR.matrix">
<data format="tabular" name="trans_counts" label="${tool.name} on ${on_string}: estimated RNA-Seq fragment isoform counts (raw counts)"/>
<data format="tabular" name="TPM_no_norm" label="${tool.name} on ${on_string}: matrix of isoform TPM expression values (not cross-sample normalized)"/>

<data format="tabular" name="trans_counts_gene" label="${tool.name} on ${on_string}: estimated RNA-Seq fragment gene counts (raw counts)">
<filter>gene_trans_map</filter>
</data>
<data format="tabular" name="TPM_no_norm_gene" label="${tool.name} on ${on_string}: matrix of gene TPM expression values (not cross-sample normalized)">
<filter>gene_trans_map</filter>
</data>

<data format="tabular" name="norm_TMM" label="${tool.name} on ${on_string}: matrix of TMM-normalized expression values">
<filter>additional_params['cross_sample_norm'] == "TMM"</filter>
</data>
<data format="tabular" name="norm" label="${tool.name} on ${on_string}: matrix of UpperQuartile-normalized expression values" from_work_dir="matrix.UpperQuartile.EXPR.matrix">
<data format="tabular" name="norm_UQ" label="${tool.name} on ${on_string}: matrix of UpperQuartile-normalized expression values">
<filter>additional_params['cross_sample_norm'] == "UpperQuartile"</filter>
</data>

<data format="tabular" name="norm_TMM_gene" label="${tool.name} on ${on_string}: matrix of TMM-normalized expression values">
<filter>additional_params['cross_sample_norm'] == "TMM" and gene_trans_map</filter>
</data>
<data format="tabular" name="norm_UQ_gene" label="${tool.name} on ${on_string}: matrix of UpperQuartile-normalized expression values">
<filter>additional_params['cross_sample_norm'] == "UpperQuartile" and gene_trans_map</filter>
</data>
</outputs>
<tests>
<test>
Expand All @@ -68,7 +108,7 @@
<has_n_columns n="3" />
</assert_contents>
</output>
<output name="norm">
<output name="norm_TMM">
<assert_contents>
<has_line_matching expression="TRINITY_DN3_c0_g1&#009;.*" />
<has_n_columns n="3" />
Expand All @@ -91,7 +131,7 @@
<has_n_columns n="3" />
</assert_contents>
</output>
<output name="norm">
<output name="norm_TMM">
<assert_contents>
<has_line_matching expression="TRINITY_DN3_c0_g1&#009;.*" />
<has_n_columns n="3" />
Expand All @@ -114,7 +154,7 @@
<has_n_columns n="3" />
</assert_contents>
</output>
<output name="norm">
<output name="norm_TMM">
<assert_contents>
<has_line_matching expression="TRINITY_DN3_c0_g1&#009;.*" />
<has_n_columns n="3" />
Expand All @@ -137,7 +177,7 @@
<has_n_columns n="3" />
</assert_contents>
</output>
<output name="norm">
<output name="norm_UQ">
<assert_contents>
<has_line_matching expression="TRINITY_DN3_c0_g1&#009;.*" />
</assert_contents>
Expand Down Expand Up @@ -193,7 +233,7 @@
<has_n_columns n="3" />
</assert_contents>
</output>
<output name="norm">
<output name="norm_TMM">
<assert_contents>
<has_line_matching expression="TRINITY_DN3_c0_g1&#009;.*" />
<has_n_columns n="3" />
Expand Down
19 changes: 13 additions & 6 deletions tools/trinity/align_and_estimate_abundance.xml
@@ -1,4 +1,4 @@
<tool id="trinity_align_and_estimate_abundance" name="Align reads and estimate abundance" version="@WRAPPER_VERSION@.3">
<tool id="trinity_align_and_estimate_abundance" name="Align reads and estimate abundance" version="@WRAPPER_VERSION@">
<description>on a de novo assembly of RNA-Seq data</description>
<macros>
<import>macros.xml</import>
Expand All @@ -12,6 +12,12 @@
<command detect_errors="aggressive"><![CDATA[
ln -f -s '$transcripts' input.fa &&

#if $additional_params.gene_map.has_gene_map == "yes":
get_Trinity_gene_to_trans_map.pl input.fa > gene_to_trans.map &&
#else:
ln -f -s '$additional_params.gene_map.gene_trans_map' gene_to_trans.map &&
#end if

#if $inputs.paired_or_single == "paired":
#if $inputs.left_input.is_of_type('fasta'):
ln -s '$inputs.left_input' paired_left.fa &&
Expand Down Expand Up @@ -68,11 +74,7 @@
#end if

## Additional parameters.
#if $additional_params.gene_map.has_gene_map == "no":
--gene_trans_map $additional_params.gene_map.gene_trans_map
#else
--trinity_mode
#end if
--gene_trans_map gene_to_trans.map

--prep_reference

Expand Down Expand Up @@ -199,6 +201,7 @@
<param name="left_input" value="reads.left.fq"/>
<param name="right_input" value="reads.right.fq"/>
<param name="transcripts" value="raw/Trinity.fasta"/>
<param name="gene_to_trans" value="raw/Trinity.map" />
<param name="library_type" value="RF"/>
<param name="est_method" value="RSEM"/>
<param name="aln_method" value="bowtie"/>
Expand All @@ -221,6 +224,7 @@
<param name="left_input" value="reads.left.fq"/>
<param name="right_input" value="reads.right.fq"/>
<param name="transcripts" value="raw/Trinity.fasta"/>
<param name="gene_to_trans" value="raw/Trinity.map" />
<param name="library_type" value="RF"/>
<param name="est_method" value="RSEM"/>
<param name="aln_method" value="bowtie2"/>
Expand All @@ -243,6 +247,7 @@
<param name="left_input" value="reads.left.fq"/>
<param name="right_input" value="reads.right.fq"/>
<param name="transcripts" value="raw/Trinity.fasta"/>
<param name="gene_to_trans" value="raw/Trinity.map" />
<param name="library_type" value="RF"/>
<param name="est_method" value="eXpress"/>
<param name="aln_method" value="bowtie"/>
Expand All @@ -265,6 +270,7 @@
<param name="left_input" value="reads.left.fq"/>
<param name="right_input" value="reads.right.fq"/>
<param name="transcripts" value="raw/Trinity.fasta"/>
<param name="gene_to_trans" value="raw/Trinity.map" />
<param name="library_type" value="RF"/>
<param name="est_method" value="salmon"/>
<param name="aln_method" value="bowtie"/>
Expand All @@ -287,6 +293,7 @@
<param name="left_input" value="reads.left.fq"/>
<param name="right_input" value="reads.right.fq"/>
<param name="transcripts" value="raw/Trinity.fasta"/>
<param name="gene_to_trans" value="raw/Trinity.map" />
<param name="library_type" value="RF"/>
<param name="est_method" value="kallisto"/>
<param name="has_gene_map" value="yes"/>
Expand Down
27 changes: 12 additions & 15 deletions tools/trinity/analyze_diff_expr.xml
@@ -1,38 +1,39 @@
<tool id="trinity_analyze_diff_expr" name="Extract and cluster differentially expressed transcripts" version="@WRAPPER_VERSION@.2">
<tool id="trinity_analyze_diff_expr" name="Extract and cluster differentially expressed transcripts" version="@WRAPPER_VERSION@">
<description>from a Trinity assembly</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements">
<requirement type="package" version="2.6.0">bioconductor-qvalue</requirement>
<requirement type="package" version="1.26.0">bioconductor-goseq</requirement>
<requirement type="package" version="2.10.0">bioconductor-qvalue</requirement>
<requirement type="package" version="1.30.0">bioconductor-goseq</requirement>
<requirement type="package" version="2.0.6">r-cluster</requirement>
<requirement type="package" version="1.1.24">r-fastcluster</requirement>
</expand>
<command detect_errors="aggressive"><![CDATA[
## DE results input files must be in the working directory and have suffix .DE_results
#import re
#for $input in $DE_results
#if re.search('.DE_results$',input.element_identifier)
## General case, where DE results files have been previously generated by run_de_analysis.pl
ln -s "${input}" "${re.sub('[^\w\-_.]', '_', input.element_identifier)}"
ln -s '${input}' "${re.sub('[^\w\-_.]', '_', input.element_identifier)}"
#else
## Particular case, where DE results files have non-standard names
ln -s "${input}" "${re.sub('[^\w\-_.]', '_', input.element_identifier)}.DE_results"
ln -s '${input}' "${re.sub('[^\w\-_.]', '_', input.element_identifier)}.DE_results"
#end if
&&
#end for
#if str( $additional_params.GO_enrichment.examine_GO_enrichment ) == "yes":
## DE matrix input files must be in the working directory and have the same name as DE results input files, but replacing suffix .DE_results by suffix .count_matrix
#for $DE_matrix in $additional_params.GO_enrichment.DE_matrices
## Handle general case, where DE results files and DE matrix files have been previously generated by run_de_analysis.pl
ln -s "${DE_matrix}" "${re.sub('[^\w\-_.]', '_', DE_matrix.element_identifier)}"
ln -s '${DE_matrix}' "${re.sub('[^\w\-_.]', '_', DE_matrix.element_identifier)}"
&&
#end for
#end if

analyze_diff_expr.pl
--matrix "${matrix}"
--samples "${samples}"
--matrix '${matrix}'
--samples '${samples}'
-P ${p}
-C ${c}

Expand All @@ -48,8 +49,8 @@

#if str( $additional_params.GO_enrichment.examine_GO_enrichment ) == "yes":
--examine_GO_enrichment
--GO_annots "${$additional_params.GO_enrichment.GO_annots}"
--gene_lengths "${$additional_params.GO_enrichment.gene_lengths}"
--GO_annots '${$additional_params.GO_enrichment.GO_annots}'
--gene_lengths '${$additional_params.GO_enrichment.gene_lengths}'
#end if

--output results
Expand Down Expand Up @@ -162,8 +163,6 @@
<param name="samples" value="count/samples.txt"/>
<param name="DE_results">
<collection type="list">
<element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results" value="count/exp_diff/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results" ftype="tabular" />
<element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results" value="count/exp_diff/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
<element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results" value="count/exp_diff/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
</collection>
</param>
Expand All @@ -172,8 +171,6 @@
<param name="examine_GO_enrichment" value="yes"/>
<param name="DE_matrices">
<collection type="list">
<element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.count_matrix" value="count/exp_diff/input.matrix.wt_37_vs_wt_GSNO.DESeq2.count_matrix" ftype="tabular" />
<element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.count_matrix" value="count/exp_diff/input.matrix.wt_37_vs_wt_ph8.DESeq2.count_matrix" ftype="tabular" />
<element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.count_matrix" value="count/exp_diff/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.count_matrix" ftype="tabular" />
</collection>
</param>
Expand All @@ -187,7 +184,7 @@
<has_text text="--gene_lengths" />
</assert_command>
<output_collection name="GOseq_enrichment">
<element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_37-UP.subset.GOseq.enriched" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_37-UP.subset.GOseq.enriched"/>
<element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.DE.subset.GOseq.enriched" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.DE.subset.GOseq.enriched"/>
</output_collection>
</test>
</tests>
Expand Down
4 changes: 2 additions & 2 deletions tools/trinity/contig_exn50_statistic.xml
@@ -1,4 +1,4 @@
<tool id="trinity_contig_exn50_statistic" name="Compute contig Ex90N50 statistic and Ex90 transcript count" version="@WRAPPER_VERSION@.0">
<tool id="trinity_contig_exn50_statistic" name="Compute contig Ex90N50 statistic and Ex90 transcript count" version="@WRAPPER_VERSION@">
<description>from a Trinity assembly</description>
<macros>
<import>macros.xml</import>
Expand Down Expand Up @@ -27,7 +27,7 @@
<help>
<![CDATA[
Trinity_ assembles transcript sequences from Illumina RNA-Seq data.
This tool computes the N50 statistic limited to the top most highly expressed transcripts that represent x% of the total normalized expression data. This requires that you have first performed transcript abundance estimation with 'Align reads and estimate abundance for a de novo assembly of RNA-Seq data by Trinity' tool and that you have built the expression matrix with 'Build expression matrix for a de novo assembly of RNA-Seq data by Trinity' tool.
This tool computes the N50 statistic limited to the top most highly expressed transcripts that represent x% of the total normalized expression data. This requires that you have first performed transcript abundance estimation with 'Align reads and estimate abundance for a de novo assembly of RNA-Seq data by Trinity' tool and that you have built the expression matrix with 'Build expression matrix for a de novo assembly of RNA-Seq data by Trinity' tool.

**Inputs**

Expand Down
5 changes: 3 additions & 2 deletions tools/trinity/define_clusters_by_cutting_tree.xml
@@ -1,11 +1,12 @@
<tool id="trinity_define_clusters_by_cutting_tree" name="Partition genes into expression clusters" version="@WRAPPER_VERSION@.0">
<tool id="trinity_define_clusters_by_cutting_tree" name="Partition genes into expression clusters" version="@WRAPPER_VERSION@">
<description>after differential expression analysis using a Trinity assembly</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements">
<requirement type="package" version="2.34.0">bioconductor-biobase</requirement>
<requirement type="package" version="2.38.0">bioconductor-biobase</requirement>
<requirement type="package" version="2.0.6">r-cluster</requirement>
<requirement type="package" version="1.1.24">r-fastcluster</requirement>
</expand>
<command detect_errors="aggressive"><![CDATA[

Expand Down
2 changes: 1 addition & 1 deletion tools/trinity/describe_samples.xml
@@ -1,5 +1,5 @@
<?xml version="1.0"?>
<tool id="describe_samples" name="Describe samples" version="@WRAPPER_VERSION@.0">
<tool id="describe_samples" name="Describe samples" version="@WRAPPER_VERSION@">
<description>and replicates</description>
<macros>
<import>macros.xml</import>
Expand Down
15 changes: 9 additions & 6 deletions tools/trinity/filter_low_expr_transcripts.xml
@@ -1,19 +1,22 @@
<tool id="trinity_filter_low_expr_transcripts" name="Filter low expression transcripts" version="@WRAPPER_VERSION@.0">
<tool id="trinity_filter_low_expr_transcripts" name="Filter low expression transcripts" version="@WRAPPER_VERSION@">
<description>from a Trinity assembly</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements"/>
<command detect_errors="aggressive"><![CDATA[

#if $additional_params.gene_map.has_gene_map == "yes":
get_Trinity_gene_to_trans_map.pl '$assembly' > gene_to_trans.map &&
#else:
ln -f -s '$additional_params.gene_map.gene_trans_map' gene_to_trans.map &&
#end if

filter_low_expr_transcripts.pl
--matrix '$matrix'
--transcripts '$assembly'

#if $additional_params.gene_map.has_gene_map == "no":
--gene_to_trans_map '$additional_params.gene_map.gene_trans_map'
#else
--trinity_mode
#end if
--gene_to_trans_map gene_to_trans.map

#if str($min_expr_any):
--min_expr_any $min_expr_any
Expand Down