update funannotate tuto

galaxyproject · Apr 26, 2023 · ed3325d · ed3325d
1 parent c62f839
commit ed3325d
Show file tree

Hide file tree

Showing 7 changed files with 218 additions and 458 deletions.
diff --git a/topics/genome-annotation/tutorials/funannotate/data-library.yaml b/topics/genome-annotation/tutorials/funannotate/data-library.yaml
@@ -10,30 +10,30 @@ items:
   items:
   - name: Genome annotation with Funannotate
     items:
-    - name: 'DOI: 10.5281/zenodo.5906635'
+    - name: 'DOI: 10.5281/zenodo.7867921'
       description: latest
       items:
-      - url: https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/alternate_annotation.gbk
+      - url: https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/alternate_annotation.gbk
         src: url
         ext: genbank
-        info: https://zenodo.org/record/5906635
-      - url: https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/alternate_annotation.gff3
+        info: https://zenodo.org/record/7867921
+      - url: https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/alternate_annotation.gff3
         src: url
         ext: gff3
-        info: https://zenodo.org/record/5906635
-      - url: https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/genome_masked.fasta
+        info: https://zenodo.org/record/7867921
+      - url: https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/genome_masked.fasta
         src: url
         ext: fasta
-        info: https://zenodo.org/record/5906635
-      - url: https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/rnaseq_R1.fq.gz
+        info: https://zenodo.org/record/7867921
+      - url: https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/rnaseq_R1.fq.gz
         src: url
         ext: fastqsanger.gz
-        info: https://zenodo.org/record/5906635
-      - url: https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/rnaseq_R2.fq.gz
+        info: https://zenodo.org/record/7867921
+      - url: https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/rnaseq_R2.fq.gz
         src: url
         ext: fastqsanger.gz
-        info: https://zenodo.org/record/5906635
-      - url: https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/SwissProt_subset.fasta
+        info: https://zenodo.org/record/7867921
+      - url: https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/SwissProt_subset.fasta
         src: url
         ext: fasta
-        info: https://zenodo.org/record/5906635
+        info: https://zenodo.org/record/7867921
diff --git a/topics/genome-annotation/tutorials/funannotate/tutorial.md b/topics/genome-annotation/tutorials/funannotate/tutorial.md
@@ -2,7 +2,7 @@
 layout: tutorial_hands_on
 
 title: Genome annotation with Funannotate
-zenodo_link: https://zenodo.org/record/5726818
+zenodo_link: https://zenodo.org/record/7867921
 tags:
   - eukaryote
 questions:
@@ -95,53 +95,19 @@ To annotate our genome using Funannotate, we will use the following files:
 >     -> `{{ page.title }}`):
 >
 >    ```
->    https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/genome_masked.fasta
->    https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/rnaseq_R1.fq.gz
->    https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/rnaseq_R2.fq.gz
->    https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/SwissProt_subset.fasta
->    https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/alternate_annotation.gbk
->    https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/alternate_annotation.gff3
+>    https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/genome_masked.fasta
+>    https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/rnaseq_R1.fq.gz
+>    https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/rnaseq_R2.fq.gz
+>    https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/SwissProt_subset.fasta
+>    https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/alternate_annotation.gbk
+>    https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/alternate_annotation.gff3
 >    ```
 >
 >    {% snippet faqs/galaxy/datasets_import_via_link.md %}
 >    {% snippet faqs/galaxy/datasets_import_from_data_library.md %}
 >
 {: .hands_on}
 
-# Preparing the genome sequence
-
-Before annotating the genome, we want to make sure that the fasta file is properly formatted. We do it now to make sure we will not encounter unexpected errors later in the annotation process.
-
-Funannotate provides two little tools to help us. Let's run the two tools, one after the other.
-
-The first one ({% tool [Funannotate assembly clean](toolshed.g2.bx.psu.edu/repos/iuc/funannotate_clean/funannotate_clean/1.8.9+galaxy2) %}) compares all the sequences between them, and removes the shorter ones that are already included in longer ones. This is to reduce unexpected redundancy in the genome. This step is recommended only for haploid genomes (we know our organism is haploid). This first tool also removes any suspicious sequence (like sequences made only of 1 or 2 letters, instead of the 5 expected (ATGCN).
-
-The second tool will ensure that our fasta file is sorted, based on the length of the contigs (the longest ones first). It will also rename contigs to make sure the name are standard (they will all begin with `scaffold_`, then a number).
-
-> <hands-on-title>Polish the assembly</hands-on-title>
->
-> 1. {% tool [Funannotate assembly clean](toolshed.g2.bx.psu.edu/repos/iuc/funannotate_clean/funannotate_clean/1.8.9+galaxy2) %} with the following parameters:
->    - {% icon param-file %} *"Assembly to clean"*: `genome_masked.fasta` (Input dataset)
->
-> 2. {% tool [Sort assembly](toolshed.g2.bx.psu.edu/repos/iuc/funannotate_sort/funannotate_sort/1.8.9+galaxy2) %} with the following parameters:
->    - {% icon param-file %} *"Assembly to sort"*: `output` (output of **Funannotate assembly clean** {% icon tool %})
->
-{: .hands_on}
-
-After this step, the genome is clean, sorted, and ready for the structural annotation.
-
-> <question-title></question-title>
->
-> How many sequences are removed by this cleaning step ?
->
-> > <solution-title></solution-title>
-> >
-> > The repeat masked genome contains 1461 sequences, while the cleand one only contains 1425, so 36 were removed.
-> >
-> {: .solution}
->
-{: .question}
-
 # Preparing RNASeq data
 
 When you sequence a new genome, you usually sequence a few libraries of RNASeq data, from different tissues and in different conditions, because this data will help you in annotating the genome. Here, we are using data from one RNASeq dataset that is available on [Sequence Read Archive (SRA)](https://www.ncbi.nlm.nih.gov/sra): [SRR8534859](https://www.ncbi.nlm.nih.gov/sra/?term=SRR8534859).
@@ -157,7 +123,7 @@ To make use of this RNASeq data, we need to map it on the genome. We will use **
 >        - {% icon param-file %} *"RNA-Seq FASTQ/FASTA file, forward reads"*: `rnaseq_R1.fq.gz` (Input dataset)
 >        - {% icon param-file %} *"RNA-Seq FASTQ/FASTA file, reverse reads"*: `rnaseq_R2.fq.gz` (Input dataset)
 >    - *"Custom or built-in reference genome"*: `Use reference genome from history and create temporary index`
->        - {% icon param-file %} *"Select a reference genome"*: `genome` (output of **Sort assembly** {% icon tool %})
+>        - {% icon param-file %} *"Select a reference genome"*: `genome_masked.fasta` (Input dataset)
 >        - *"Length of the SA pre-indexing string"*: `11`
 >
 {: .hands_on}
@@ -196,8 +162,8 @@ Funannotate is also able to use GeneMark to predict new genes, but to due to lic
 
 > <hands-on-title></hands-on-title>
 >
-> 1. {% tool [Funannotate predict annotation](toolshed.g2.bx.psu.edu/repos/iuc/funannotate_predict/funannotate_predict/1.8.9+galaxy2) %} with the following parameters:
->    - {% icon param-file %} *"Assembly to annotate"*: `genome` (output of **Sort assembly** {% icon tool %})
+> 1. {% tool [Funannotate predict annotation](toolshed.g2.bx.psu.edu/repos/iuc/funannotate_predict/funannotate_predict/1.8.15+galaxy1) %} with the following parameters:
+>    - {% icon param-file %} *"Assembly to annotate"*: `genome_masked.fasta` (Input dataset)
 >    - *"Funannotate database"*: select the latest version available
 >    - In *"Organism"*:
 >        - *"Name of the species to annotate"*: `Mucor mucedo`
@@ -274,7 +240,7 @@ Now we have a structural annotation, and the results of both **EggNOG Mapper** a
 
 > <hands-on-title></hands-on-title>
 >
-> 1. {% tool [Funannotate functional](toolshed.g2.bx.psu.edu/repos/iuc/funannotate_annotate/funannotate_annotate/1.8.9+galaxy2) %} with the following parameters:
+> 1. {% tool [Funannotate functional](toolshed.g2.bx.psu.edu/repos/iuc/funannotate_annotate/funannotate_annotate/1.8.15+galaxy1) %} with the following parameters:
 >    - *"Input format"*: `GenBank (from 'Funannotate predict annotation' tool)`
 >        - {% icon param-file %} *"Genome annotation in genbank format"*: `annotation (genbank)` (output of **Funannotate predict annotation** {% icon tool %})
 >    - *"Funannotate database"*: select the latest version available
@@ -303,7 +269,7 @@ We now have a complete annotation, including functional annotation, but it's tim
 
 > <hands-on-title></hands-on-title>
 >
-> 1. {% tool [Busco](toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.2.2+galaxy2) %} with the following parameters:
+> 1. {% tool [Busco](toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.4.6+galaxy0) %} with the following parameters:
 >    - {% icon param-file %} *"Sequences to analyse"*: `protein sequences` (output of **Funannotate functional** {% icon tool %})
 >    - *"Mode"*: `annotated gene sets (protein)`
 >    - *"Auto-detect or select lineage?"*: `Select lineage`
@@ -336,7 +302,7 @@ With Galaxy, you can visualize the annotation you have generated using JBrowse g
 >
 > 1. {% tool [JBrowse](toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy1) %} with the following parameters:
 >    - *"Reference genome to display"*: `Use a genome from history`
->        - {% icon param-file %} *"Select the reference genome"*: `genome` (output of **Sort assembly** {% icon tool %})
+>        - {% icon param-file %} *"Select the reference genome"*: `genome_masked.fasta` (Input dataset)
 >    - In *"Track Group"*:
 >        - {% icon param-repeat %} *"Insert Track Group"*
 >            - *"Track Category"*: `Annotation`
@@ -416,7 +382,7 @@ The output is a web page where you can see how many loci or genes are identical
 
 > <hands-on-title></hands-on-title>
 >
-> 1. {% tool [Funannotate compare](toolshed.g2.bx.psu.edu/repos/iuc/funannotate_compare/funannotate_compare/1.8.9+galaxy2) %} with the following parameters:
+> 1. {% tool [Funannotate compare](toolshed.g2.bx.psu.edu/repos/iuc/funannotate_compare/funannotate_compare/1.8.15+galaxy1) %} with the following parameters:
 >    - {% icon param-files %} *"Genome annotations in genbank format"*: `alternate_annotation.gbk` (Input dataset) and `gbk` (output of **Funannotate functional** {% icon tool %})
 >    - *"Funannotate database"*: select the latest version available
 >

diff --git a/topics/genome-annotation/tutorials/funannotate/workflows/funannotate-tests.yml b/topics/genome-annotation/tutorials/funannotate/workflows/funannotate-tests.yml
@@ -3,31 +3,31 @@
     Alternate annotation gbk:
       class: File
       filetype: genbank
-      location: https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/alternate_annotation.gbk
+      location: https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/alternate_annotation.gbk
     Alternate annotation gff3:
       class: File
       filetype: gff3
-      location: https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/alternate_annotation.gff3
+      location: https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/alternate_annotation.gff3
     Genome assembly:
       class: File
       filetype: fasta
-      location: https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/genome_masked.fasta
+      location: https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/genome_masked.fasta
     NCBI submission template:
       class: File
       filetype: txt
       location: test-data/NCBI_submission_template.txt
     Protein evidence sequences:
       class: File
       filetype: fasta
-      location: https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/SwissProt_subset.fasta
+      location: https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/SwissProt_subset.fasta
     RNASeq reads forward:
       class: File
       filetype: fastqsanger.gz
-      location: https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/rnaseq_R1.fq.gz
+      location: https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/rnaseq_R1.fq.gz
     RNASeq reads reverse:
       class: File
       filetype: fastqsanger.gz
-      location: https://zenodo.org/api/files/8c2cc766-2b68-45bd-a2d3-391acf9bdb1b/rnaseq_R2.fq.gz
+      location: https://zenodo.org/api/files/47406781-e8af-42e7-855d-d29e4a098f6f/rnaseq_R2.fq.gz
   outputs:
     'Busco summary':
       file: 'test-data/busco_sum.txt'