sequana_pipelines/rnaseq/multiqc_config.yaml

#######################################
# Example MultiQC Configuration File
#######################################

# This file can be saved either in the MultiQC installation
# directory, or as ~/.multiqc_config.yaml

# Configuration settings are taken from the following locations, in order:
# - Hardcoded in MultiQC (multiqc/utils/config.py)
# - <installation_dir>/multiqc_config.yaml
# - ~/.multiqc_config.yaml
# - Command line options

# Note that all of the values below are set to the MultiQC defaults.
# It's recommended that you delete any that you don't need.


#------------------------------------------------------------------------------
#        TO CHANGE
#-------------------------------------------------------------------------------

# Title to use for the report.
title: "sequana_rnaseq MultiQC summary"
subtitle: "RNA-seq analysis" 
intro_text: "Report summarising cleaning (cutadapt section) and mapping (stat/bowtie sections) of your FastQ files, as well as feature counts (featureCounts section) and quick contaminant search (fastq screen section)"

# Add generic information to the top of reports
report_header_info:
   - Authors: '<a href="http://github.com/sequana/sequana">Sequana developers</a>'
   - Want to know more?: 'See the <a href="http://sequana.readthedocs.io" target="_blank">Sequana</a> and <a href="http://github.com/sequana/sequana_rnaseq/" target="_blank"> sequana_rnaseq pipeline</a> documentation.'
   - Citations: 'If you used Sequanix, Sequana, Sequana_coverage tool, or any Sequana pipelines, please see <a href="http://sequana.readthedocs.io">How to cite ?</a> section. In particular, if you use this report in a publication, please cite Sequana.'
   - Contact E-mail: ''
#   - Project Type: 'Differential gene expression'


#-------------------------------------------------------------------------------


# Prepend sample names with their directory. Useful if analysing the
# sample samples with different parameters.
prepend_dirs: False

# Default output filenames
output_fn_name: multiqc_report.html
data_dir_name: multiqc_data

# Whether to create the parsed data directory in addition to the report
make_data_dir: True

# Cleaning options for sample names. Typically, sample names are detected
# from an input filename. If any of these strings are found, they and any
# text to their right will be discarded.
# For example - file1.fq.gz_trimmed.bam_deduplicated_fastqc.zip
# would be cleaned to 'file1'
# Two options here - fn_clean_exts will replace the defaults,
# extra_fn_clean_exts will append to the defaults
extra_fn_clean_exts:
    - .gz
    - .fastq
    - _R1_.cutadapt
    - _R2_.cutadapt
    - _R2_001
    - _R1_001
    - .err
    - type: remove
      pattern: '.sorted'
    - type: regex
      pattern: '_S\d+'


# Ignore these files / directories / paths when searching for logs
fn_ignore_files:
    - .DS_Store
    - slurm*out
    - "*.js"

fn_ignore_dirs:
    - .sequana
    - .snakemake
    - multiqc
    - logs

# We want to ignore the 3 strand case to use only the final ones in
# post_analysis/feature_counts
fn_ignore_paths:
    - "*/feature_counts/*"

# Ignore files larger than this when searcing for logs (bytes)
log_filesize_limit: 5000000

# MultiQC skips a couple of debug messages when searching files as the
# log can get very verbose otherwise. Re-enable here to help debugging.
report_readerrors: False
report_imgskips: False

# Opt-out of remotely checking that you're running the latest version
no_version_check: False

# How to plot graphs. Different templates can override these settings, but
# the default template can use interactive plots (Javascript using HighCharts)
# or flat plots (images, using MatPlotLib). With interactive plots, the report
# can prevent automatically rendering all graphs if there are lots of samples
# to prevent the browser being locked up when the report opens.
plots_force_flat: False          # Try to use only flat image graphs
plots_force_interactive: False   # Try to use only interactive javascript graphs
plots_flat_numseries: 100        # If neither of the above, use flat if > this number of datasets
num_datasets_plot_limit: 50      # If interactive, don't plot on load if > this number of datasets
max_table_rows: 500              # Swap tables for a beeswarm plot above this

# Overwrite module filename search patterns. See multiqc/utils/search_patterns.yaml
# for the defaults. Remove a default by setting it to null.
sp:
    star:
        fn: '*Log.final.out'
    cutadapt:
        fn: 'cutadapt.txt'
    fastp:
        fn: '*fastp*json'
    #rna_seqc/metrics:
    #    fn: "*metrics.tsv"
    #rna_seqc/coverage:
    #    fn: "*coverage.tsv"

# Overwrite the defaults of which table columns are visible by default
#
read_count_prefix: ''
read_count_multiplier: 1

table_columns_visible:
    FastQC:
        percent_fails: False
        total_sequences: True
        percent_gc: False
    fastp:
        pct_duplication: False
        after_filtering_gc_content: False
    Bowtie 1:
        reads_aligned_percentage: False
        reads_aligned: False
    picard:
        PERCENT_DUPLICATION: False

top_modules:
  - fastqc
  - fastp
  - bowtie1
  - bowtie2
  - salmon
  - star
  - featureCounts

module_order:
  - fastqc
  - fastp
  - rseqc
  - markduplicates
  - picard
  - bowtie1
  - bowtie2
  - salmon
  - star
  - featureCounts

remove_sections:
  - fastqc_status_checks
  - fastqc_per_base_n_content


#fastqc_config:
#fastqc_theoretical_gc: 'mm10_genome'
#