diff --git a/AGENTS.md b/AGENTS.md index 453c8ac72..ebc18f2a0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -352,7 +352,7 @@ withName: 'OPENMS_PERCOLATORADAPTER' { publishDir = [ path: { "${params.outdir}/intermediate_results/fdr_control" }, mode: params.publish_dir_mode, - pattern: '*.idXML' + pattern: '*.idparquet' ] } ``` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 3a52d98a5..40d716fd1 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -75,7 +75,7 @@ sp: fn: "*_ms_info.parquet" num_lines: 0 pmultiqc/idXML: - fn: "*.idXML" + fn: "*.idparquet" num_lines: 0 pmultiqc/msstats: fn: "*msstats_in.csv" diff --git a/conf/modules/id.config b/conf/modules/id.config index 1b2a4ac4a..966700307 100644 --- a/conf/modules/id.config +++ b/conf/modules/id.config @@ -6,11 +6,6 @@ process { - // FDRCONSENSUSID - withName: '.*:FDR_CONSENSUSID' { - ext.args = "-PSM true -protein false" - } - // ID_SCORE_SWITCHER for phospho scoring withName: '.*:ID:PHOSPHO_SCORING:ID_SCORE_SWITCHER' { ext.args = [ @@ -47,13 +42,13 @@ process { // PSM FDR control ID_FILTER withName: '.*:ID:PSM_FDR_CONTROL:ID_FILTER' { ext.args = "-score:psm \"$params.run_fdr_cutoff\"" - ext.suffix = '.idXML' + ext.suffix = '.idparquet' } // DDA_ID PSM FDR control ID_FILTER withName: '.*:DDA_ID:PSM_FDR_CONTROL:ID_FILTER' { ext.args = "-score:psm \"$params.run_fdr_cutoff\"" - ext.suffix = '.idXML' + ext.suffix = '.idparquet' } // MS2RESCORE diff --git a/conf/modules/shared.config b/conf/modules/shared.config index c796635e1..1a9ee7c48 100644 --- a/conf/modules/shared.config +++ b/conf/modules/shared.config @@ -34,7 +34,7 @@ process { } // Result tables from multiple pipelines including LFQ, TMT, DDA - withName: '.*:PROTEOMICSLFQ|PROTEIN_QUANTIFIER|MSSTATS_CONVERTER' { + withName: '.*:PROTEOMICSLFQ|PROTEIN_QUANTIFIER|MSSTATS_CONVERTER|ISOBARIC_WORKFLOW' { publishDir = [ path: { "${params.outdir}/quant_tables" }, mode: 'copy', diff --git a/modules/local/openms/comet/meta.yml b/modules/local/openms/comet/meta.yml index 5f255f4f9..a36a020c2 100644 --- a/modules/local/openms/comet/meta.yml +++ b/modules/local/openms/comet/meta.yml @@ -29,7 +29,7 @@ output: - id_files_comet: type: file description: Output file - pattern: "*.idXML" + pattern: "*.idparquet" - log: type: file description: log file diff --git a/modules/local/openms/consensusid/main.nf b/modules/local/openms/consensusid/main.nf deleted file mode 100644 index 76346fb21..000000000 --- a/modules/local/openms/consensusid/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -process CONSENSUSID { - tag "$meta.mzml_id" - label 'process_single' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" - - input: - tuple val(meta), path(id_file) - - output: - tuple val(meta), path("${meta.mzml_id}_consensus.idXML"), emit: consensusids - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.mzml_id}" - - """ - ConsensusID \\ - -in ${id_file} \\ - -out ${meta.mzml_id}_consensus.idXML \\ - -per_spectrum \\ - -threads $task.cpus \\ - -algorithm $params.consensusid_algorithm \\ - -filter:min_support $params.min_consensus_support \\ - -filter:considered_hits $params.consensusid_considered_top_hits \\ - -debug $params.consensusid_debug \\ - $args \\ - 2>&1 | tee ${meta.mzml_id}_consensusID.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ConsensusID: \$(ConsensusID 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/consensusid/meta.yml b/modules/local/openms/consensusid/meta.yml deleted file mode 100644 index c8a7df5a0..000000000 --- a/modules/local/openms/consensusid/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: consensusid -description: Computes a consensus from results of multiple peptide identification engines. -keywords: - - consensus scoring - - peptide database search - - OpenMS -tools: - - ConsensusID: - description: | - Tool to Computes a consensus from results of multiple peptide identification engines. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_ConsensusID.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_ConsensusID.html -input: - - meta: - type: map - description: Groovy Map containing sample information - - id_file: - type: file - description: | - Putative peptide-spectrum matches in idXML format (potentially multiple per spectrum). Score needs to be comparable (e.g. probability) - pattern: "*.idXML" -output: - - meta: - type: map - description: Groovy Map containing sample information - - id_files_idx_ForIDPEP_FDR: - type: file - description: | - Identifications with annotated FDR. - pattern: "*.idXML" - - log: - type: file - description: log file - pattern: "*.log" - - version: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@daichengxin" diff --git a/modules/local/openms/extractfeatures/main.nf b/modules/local/openms/extractfeatures/main.nf index 046921f83..16340ee11 100644 --- a/modules/local/openms/extractfeatures/main.nf +++ b/modules/local/openms/extractfeatures/main.nf @@ -36,4 +36,4 @@ process EXTRACTPSMFEATURES { PSMFeatureExtractor: \$(PSMFeatureExtractor 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/local/openms/false_discovery_rate/meta.yml b/modules/local/openms/extractfeatures/meta.yml similarity index 51% rename from modules/local/openms/false_discovery_rate/meta.yml rename to modules/local/openms/extractfeatures/meta.yml index 62fa100fc..eff674afc 100644 --- a/modules/local/openms/false_discovery_rate/meta.yml +++ b/modules/local/openms/extractfeatures/meta.yml @@ -1,33 +1,34 @@ -name: false_discovery_rate -description: Estimates the false discovery rate on peptide and protein level using decoy searches. +name: EXTRACTPSMFEATURES +description: Extracts PSM features from multiple search engines for rescoring. keywords: - - FDR - - decoy + - PSM + - feature extraction + - rescoring - OpenMS tools: - - FalseDiscoveryRate: + - PSMFeatureExtractor: description: | - Tool to estimate the false discovery rate on peptide and protein level. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_FalseDiscoveryRate.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_FalseDiscoveryRate.html + Tool to extract PSM features from identification results for downstream rescoring. + homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_PSMFeatureExtractor.html + documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_PSMFeatureExtractor.html input: - meta: type: map description: Groovy Map containing sample information - - id_file: + - id_files: type: file description: | - Identifications from searching a target-decoy database. - pattern: "*.idXML" + Identification files from one or multiple search engines. + pattern: "*.idparquet" output: - meta: type: map description: Groovy Map containing sample information - - id_files_idx_ForIDPEP_FDR: + - id_files_feat: type: file description: | - Identifications with annotated FDR. - pattern: "*.idXML" + PSM features in Parquet format for rescoring. + pattern: "*_feat.idparquet" - log: type: file description: log file diff --git a/modules/local/openms/false_discovery_rate/main.nf b/modules/local/openms/false_discovery_rate/main.nf deleted file mode 100644 index 37a872dde..000000000 --- a/modules/local/openms/false_discovery_rate/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -process FALSE_DISCOVERY_RATE { - tag "$meta.mzml_id" - label 'process_low' - label 'process_single' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" - - input: - tuple val(meta), path(id_file) - - output: - tuple val(meta), path("${id_file.baseName}_fdr.idXML"), emit: id_files_idx_ForIDPEP_FDR - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.mzml_id}" - - """ - FalseDiscoveryRate \\ - -in ${id_file} \\ - -out ${id_file.baseName}_fdr.idXML \\ - -threads $task.cpus \\ - -FDR:PSM ${params.run_fdr_cutoff} \\ - -algorithm:add_decoy_peptides \\ - -algorithm:add_decoy_proteins \\ - -algorithm:conservative ${params.fdr_conservative} \\ - $args \\ - 2>&1 | tee ${id_file.baseName}_fdr.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - FalseDiscoveryRate: \$(FalseDiscoveryRate 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/id_conflict_resolver/main.nf b/modules/local/openms/id_conflict_resolver/main.nf deleted file mode 100644 index d9b135afc..000000000 --- a/modules/local/openms/id_conflict_resolver/main.nf +++ /dev/null @@ -1,33 +0,0 @@ -process ID_CONFLICT_RESOLVER { - label 'process_low' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" - - input: - path consus_file - - output: - path "${consus_file.baseName}_resconf.consensusXML", emit: pro_resconf - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - - """ - IDConflictResolver \\ - -in ${consus_file} \\ - -threads $task.cpus \\ - -out ${consus_file.baseName}_resconf.consensusXML \\ - $args \\ - 2>&1 | tee ${consus_file.baseName}_resconf.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - IDConflictResolver: \$(IDConflictResolver 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/id_conflict_resolver/meta.yml b/modules/local/openms/id_conflict_resolver/meta.yml deleted file mode 100644 index 62db48b4f..000000000 --- a/modules/local/openms/id_conflict_resolver/meta.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: id_conflict_resolver -description: Resolves ambiguous annotations of features with peptide identifications. -keywords: - - ambiguous - - OpenMS -tools: - - IDConflictResolver: - description: | - Resolves ambiguous annotations of features with peptide identifications. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_IDConflictResolver.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_IDConflictResolver.html -input: - - consus_file: - type: file - description: | - Input file (data annotated with identifications) - pattern: "*.{featureXML,consensusXML}" -output: - - pro_resconf: - type: file - description: Output file (data with one peptide identification per feature) - - log: - type: file - description: log file - pattern: "*.log" - - version: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@daichengxin" diff --git a/modules/local/openms/id_filter/main.nf b/modules/local/openms/id_filter/main.nf index 0f939bf27..2bf6c8cf1 100644 --- a/modules/local/openms/id_filter/main.nf +++ b/modules/local/openms/id_filter/main.nf @@ -1,5 +1,5 @@ process ID_FILTER { - tag {task.ext.suffix == ".idXML" ? "$meta.mzml_id" : "$id_file.baseName"} + tag {task.ext.suffix == ".idparquet" ? "$meta.mzml_id" : "$id_file.baseName"} label 'process_very_low' label 'process_single' label 'openms' diff --git a/modules/local/openms/id_mapper/main.nf b/modules/local/openms/id_mapper/main.nf deleted file mode 100644 index 1c88cede2..000000000 --- a/modules/local/openms/id_mapper/main.nf +++ /dev/null @@ -1,36 +0,0 @@ -process ID_MAPPER { - tag "$meta.mzml_id" - label 'process_medium' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" - - input: - tuple val(meta), path(id_file), path(map_file) - - output: - path "${id_file.baseName}_map.consensusXML", emit: id_map - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.mzml_id}" - - """ - IDMapper \\ - -id ${id_file} \\ - -in ${map_file} \\ - -threads $task.cpus \\ - -out ${id_file.baseName}_map.consensusXML \\ - $args \\ - 2>&1 | tee ${id_file.baseName}_map.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - IDMapper: \$(IDMapper 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/id_mapper/meta.yml b/modules/local/openms/id_mapper/meta.yml deleted file mode 100644 index 67e9ed6d3..000000000 --- a/modules/local/openms/id_mapper/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: id_mapper -description: Assigns protein/peptide identifications to features or consensus features. -keywords: - - feature - - identification - - OpenMS -tools: - - IDMapper: - description: | - Assigns protein/peptide identifications to features or consensus features. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_IDMapper.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_IDMapper.html -input: - - meta: - type: map - description: Groovy Map containing sample information - - id_file: - type: file - description: | - Identifications file - pattern: "*.{idXML, mzid}" - - map_file: - type: file - description: Feature map/consensus map file - pattern: "*.{featureXML, consensusXML, mzq}" -output: - - id_map: - type: file - description: Output file - pattern: "*.{featureXML, consensusXML, mzq}" - - log: - type: file - description: log file - pattern: "*.log" - - version: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@daichengxin" diff --git a/modules/local/openms/id_score_switcher/main.nf b/modules/local/openms/id_score_switcher/main.nf index bedd9fd71..97a9cd768 100644 --- a/modules/local/openms/id_score_switcher/main.nf +++ b/modules/local/openms/id_score_switcher/main.nf @@ -12,7 +12,7 @@ process ID_SCORE_SWITCHER { tuple val(meta), path(id_file), val(new_score) output: - tuple val(meta), path("${id_file.baseName}_pep.idXML"), emit: id_score_switcher + tuple val(meta), path("${id_file.baseName}_pep.idparquet"), emit: id_score_switcher path "versions.yml", emit: versions path "*.log", emit: log @@ -23,7 +23,7 @@ process ID_SCORE_SWITCHER { """ IDScoreSwitcher \\ -in ${id_file} \\ - -out ${id_file.baseName}_pep.idXML \\ + -out ${id_file.baseName}_pep.idparquet \\ -threads $task.cpus \\ -new_score ${new_score} \\ $args \\ diff --git a/modules/local/openms/id_score_switcher/meta.yml b/modules/local/openms/id_score_switcher/meta.yml index 246d603b7..d20a3c524 100644 --- a/modules/local/openms/id_score_switcher/meta.yml +++ b/modules/local/openms/id_score_switcher/meta.yml @@ -18,7 +18,7 @@ input: type: file description: | Identifications from searching a target-decoy database. - pattern: "*.idXML" + pattern: "*.idparquet" output: - meta: type: map diff --git a/modules/local/openms/index_peptides/main.nf b/modules/local/openms/index_peptides/main.nf index e0628abb9..6849853e6 100644 --- a/modules/local/openms/index_peptides/main.nf +++ b/modules/local/openms/index_peptides/main.nf @@ -12,7 +12,7 @@ process INDEX_PEPTIDES { output: - tuple val(meta), path("${id_file.baseName}_idx.idXML"), emit: id_files_idx + tuple val(meta), path("${id_file.baseName}_idx.idparquet"), emit: id_files_idx path "versions.yml", emit: versions path "*.log", emit: log @@ -45,7 +45,7 @@ process INDEX_PEPTIDES { """ PeptideIndexer \\ -in ${id_file} \\ - -out ${id_file.baseName}_idx.idXML \\ + -out ${id_file.baseName}_idx.idparquet \\ -threads $task.cpus \\ -fasta ${database} \\ -enzyme:name "${enzyme}" \\ diff --git a/modules/local/openms/index_peptides/meta.yml b/modules/local/openms/index_peptides/meta.yml index eb937b22d..2a784d7ff 100644 --- a/modules/local/openms/index_peptides/meta.yml +++ b/modules/local/openms/index_peptides/meta.yml @@ -18,7 +18,7 @@ input: type: file description: | Input idXML file containing the identifications. - pattern: "*.idXML" + pattern: "*.idparquet" - database: type: file description: | @@ -32,7 +32,7 @@ output: type: file description: | Output idXML file. - pattern: "*.idXML" + pattern: "*.idparquet" - log: type: file description: log file diff --git a/modules/local/openms/isobaric_analyzer/main.nf b/modules/local/openms/isobaric_analyzer/main.nf deleted file mode 100644 index 6db8e587a..000000000 --- a/modules/local/openms/isobaric_analyzer/main.nf +++ /dev/null @@ -1,79 +0,0 @@ -process ISOBARIC_ANALYZER { - tag "$meta.mzml_id" - label 'process_medium' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" - - input: - tuple val(meta), path(mzml_file) - - output: - tuple val(meta), path("${mzml_file.baseName}_iso.consensusXML"), emit: id_files_consensusXML - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.mzml_id}" - - if (params.quant_activation_method == "HCD" || params.quant_activation_method == "HCID") diss_meth = "auto" - else if (params.quant_activation_method == "CID") diss_meth = "Collision-induced dissociation" - else if (params.quant_activation_method == "ETD") diss_meth = "Electron transfer dissociation" - else if (params.quant_activation_method == "ECD") diss_meth = "Electron capture dissociation" - - def iso_normalization = params.iso_normalization ? "-quantification:normalization" : "" - def isotope_correction = params.isotope_correction ? "-quantification:isotope_correction true" : "-quantification:isotope_correction false" - - // Check for isotope correction and load the correction matrix - if (params.isotope_correction) { - if (params.plex_corr_matrix_file == null) { - error("plex_corr_matrix_file is required when isotope_correction is enabled") - } - - // Read the matrix file and format it into the command-line format - // Read the matrix file, skipping lines that start with '#' and process the matrix - def matrix_lines = new File(params.plex_corr_matrix_file).readLines() - .findAll { line -> !line.startsWith('#') && line.trim() } // Skip lines starting with '#' and empty lines - .drop(1) // Assuming the first non-comment line is a header - .collect { line -> - def values = line.split('/') - // Handle different labelling types - if (meta.labelling_type == 'tmt18plex' || meta.labelling_type == 'tmt16plex') { - return "\"${values[1]}/${values[2]}/${values[3]}/${values[4]}/${values[5]}/${values[6]}/${values[7]}/${values[8]}\"" - } else { - return "\"${values[1]}/${values[2]}/${values[3]}/${values[4]}\"" - } - } - - // Join the matrix lines into a format for the C++ tool - def correction_matrix = matrix_lines.join(" ") - - isotope_correction += " -${meta.labelling_type}:correction_matrix ${correction_matrix}" - } - - """ - IsobaricAnalyzer \\ - -type ${meta.labelling_type} \\ - -in ${mzml_file} \\ - -threads ${task.cpus} \\ - -extraction:select_activation "${diss_meth}" \\ - -extraction:reporter_mass_shift ${params.reporter_mass_shift} \\ - -extraction:min_reporter_intensity ${params.min_reporter_intensity} \\ - -extraction:min_precursor_purity ${params.min_precursor_purity} \\ - -extraction:precursor_isotope_deviation ${params.precursor_isotope_deviation} \\ - ${iso_normalization} \\ - -${meta.labelling_type}:reference_channel ${params.reference_channel} \\ - ${isotope_correction} \\ - -out ${mzml_file.baseName}_iso.consensusXML \\ - ${args} \\ - 2>&1 | tee ${mzml_file.baseName}_isob.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - IsobaricAnalyzer: \$(IsobaricAnalyzer --version 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/isobaric_analyzer/meta.yml b/modules/local/openms/isobaric_analyzer/meta.yml deleted file mode 100644 index faf4c4ae2..000000000 --- a/modules/local/openms/isobaric_analyzer/meta.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: isobaric_analyzer -description: Extracts and normalizes isobaric labeling information from an LC-MS/MS experiment. -keywords: - - peak - - OpenMS -tools: - - IsobaricAnalyzer: - description: | - Extracts and normalizes isobaric labeling information from an LC-MS/MS experiment. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_IsobaricAnalyzer.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_IsobaricAnalyzer.html -input: - - meta: - type: map - description: Groovy Map containing sample information - - mzml_file: - type: file - description: Input profile data file. - pattern: "*.mzML" -output: - - meta: - type: map - description: Groovy Map containing sample information - - iso_consensusXML: - type: file - description: Output consensusXML file with quantitative information - pattern: "*.consensusXML" - - log: - type: file - description: log file - pattern: "*.log" - - version: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@daichengxin" diff --git a/modules/local/openms/isobaric_workflow/main.nf b/modules/local/openms/isobaric_workflow/main.nf new file mode 100644 index 000000000..8a2e54183 --- /dev/null +++ b/modules/local/openms/isobaric_workflow/main.nf @@ -0,0 +1,71 @@ +process ISOBARIC_WORKFLOW { + tag "${expdes.baseName}" + label 'process_high' + label 'openms' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:latest' : + 'ghcr.io/bigbio/openms-tools-thirdparty:latest' }" + + input: + val(labelling_type) + path(mzmls) + path(id_files) + path(expdes) + + output: + path "${expdes.baseName}_openms.mzTab", emit: out_mztab + path "${expdes.baseName}_openms.consensusXML", emit: out_consensusXML + path "${expdes.baseName}_qpx", emit: out_qpx + path "*.log", emit: log + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '' + + // Build isotope correction matrix argument if enabled + def isotope_correction = "" + if (params.isotope_correction && params.plex_corr_matrix_file != null) { + def matrix_lines = new File(params.plex_corr_matrix_file).readLines() + .findAll { !it.startsWith('#') && it.trim() } + .drop(1) + .collect { line -> + def values = line.split('/') + if (labelling_type == 'tmt18plex' || labelling_type == 'tmt16plex') { + return "\"${values[1]}/${values[2]}/${values[3]}/${values[4]}/${values[5]}/${values[6]}/${values[7]}/${values[8]}\"" + } else { + return "\"${values[1]}/${values[2]}/${values[3]}/${values[4]}\"" + } + } + def correction_matrix = matrix_lines.join(" ") + isotope_correction = "-quantification:isotope_correction true -${labelling_type}:correction_matrix ${correction_matrix}" + } + + """ + IsobaricWorkflow \\ + -threads ${task.cpus} \\ + -in ${mzmls.join(' ')} \\ + -in_id ${id_files.join(' ')} \\ + -exp_design ${expdes} \\ + -type ${labelling_type} \\ + -inference_method ${params.protein_inference_method} \\ + -protein_quantification ${params.protein_quant} \\ + -psmFDR ${params.psm_level_fdr_cutoff} \\ + -proteinFDR ${params.protein_level_fdr_cutoff} \\ + -picked_fdr ${params.picked_fdr} \\ + -picked_decoy_string ${params.decoy_string} \\ + -extraction:min_precursor_purity ${params.min_precursor_purity} \\ + -extraction:precursor_isotope_deviation ${params.precursor_isotope_deviation} \\ + -extraction:min_reporter_intensity ${params.min_reporter_intensity} \\ + ${isotope_correction} \\ + -out ${expdes.baseName}_openms.consensusXML \\ + -out_mzTab ${expdes.baseName}_openms.mzTab \\ + -out_qpx ${expdes.baseName}_qpx \\ + $args \\ + 2>&1 | tee isobaricworkflow.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + IsobaricWorkflow: \$(IsobaricWorkflow 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) + END_VERSIONS + """ +} diff --git a/modules/local/openms/isobaric_workflow/meta.yml b/modules/local/openms/isobaric_workflow/meta.yml new file mode 100644 index 000000000..45971764b --- /dev/null +++ b/modules/local/openms/isobaric_workflow/meta.yml @@ -0,0 +1,42 @@ +name: isobaric_workflow +description: Extracts and normalizes isobaric labeling information from an LC-MS/MS experiment. +keywords: + - OpenMS + - quantification +tools: + - IsobaricWorkflow: + description: | + Extracts and normalizes isobaric labeling information from an LC-MS/MS experiment. + homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_IsobaricWorkflow.html + documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_IsobaricWorkflow.html + - mzmls: + type: file + description: Input Spectra in mzML format + pattern: "*.mzML" + - id_files: + type: file + description: Identifications in idparquet or mzIdentML format with posterior error probabilities as score type. + pattern: "*.idparquet" + - expdes: + type: file + description: An experimental design file + pattern: "*.tsv" +output: + - out_mztab: + type: file + description: mzTab file with analysis results + pattern: "*.mzTab" + - out_consensusXML: + type: file + description: ConsensusXML file for visualization and further processing in OpenMS. + pattern: "*.consensusXML" + - log: + type: file + description: log file + pattern: "*.log" + - version: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@MaLLLiYA" diff --git a/modules/local/openms/msgf/meta.yml b/modules/local/openms/msgf/meta.yml index 8c7c8690b..d8fe9bce1 100644 --- a/modules/local/openms/msgf/meta.yml +++ b/modules/local/openms/msgf/meta.yml @@ -29,7 +29,7 @@ output: - id_files_msgf: type: file description: Output file - pattern: "*.idXML" + pattern: "*.idparquet" - log: type: file description: log file diff --git a/modules/local/openms/percolator/meta.yml b/modules/local/openms/percolator/meta.yml index b7be826db..aad911da7 100644 --- a/modules/local/openms/percolator/meta.yml +++ b/modules/local/openms/percolator/meta.yml @@ -17,7 +17,7 @@ input: type: file description: | Input idXML file containing the identifications. - pattern: "*.idXML" + pattern: "*.idparquet" output: - meta: type: map diff --git a/modules/local/openms/protein_inference_epifany/main.nf b/modules/local/openms/protein_inference_epifany/main.nf deleted file mode 100644 index ddafba50b..000000000 --- a/modules/local/openms/protein_inference_epifany/main.nf +++ /dev/null @@ -1,39 +0,0 @@ -process PROTEIN_INFERENCE_EPIFANY { - label 'process_medium' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" - - input: - tuple val(meta), path(consus_file) - - output: - tuple val(meta), path("${consus_file.baseName}_epi.consensusXML"), emit: epi_inference - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - gg = params.protein_quant == 'shared_peptides' ? 'remove_proteins_wo_evidence' : 'none' - - """ - Epifany \\ - -in ${consus_file} \\ - -protein_fdr true \\ - -threads $task.cpus \\ - -algorithm:keep_best_PSM_only $params.keep_best_PSM_only \\ - -algorithm:update_PSM_probabilities $params.update_PSM_probabilities \\ - -greedy_group_resolution $gg \\ - -algorithm:top_PSMs $params.top_PSMs \\ - -out ${consus_file.baseName}_epi.consensusXML \\ - $args \\ - 2>&1 | tee ${consus_file.baseName}_inference.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - Epifany: \$(Epifany 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/protein_inference_epifany/meta.yml b/modules/local/openms/protein_inference_epifany/meta.yml deleted file mode 100644 index 23d09813b..000000000 --- a/modules/local/openms/protein_inference_epifany/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: protein_inference_epifany -description: Runs a Bayesian protein inference. -keywords: - - Bayesian - - inference - - OpenMS -tools: - - Epifany: - description: | - It is a protein inference engine based on a Bayesian network. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/UTILS_Epifany.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/UTILS_Epifany.html -input: - - consus_file: - type: file - description: | - identification results. - pattern: "*.{idXML,consensusXML}" -output: - - epi_inference: - type: file - description: | - identification results with scored/grouped proteins. - pattern: "*.{idXML,consensusXML}" - - log: - type: file - description: log file - pattern: "*.log" - - version: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@daichengxin" diff --git a/modules/local/openms/protein_inference_generic/main.nf b/modules/local/openms/protein_inference_generic/main.nf deleted file mode 100644 index 8222fb632..000000000 --- a/modules/local/openms/protein_inference_generic/main.nf +++ /dev/null @@ -1,44 +0,0 @@ -process PROTEIN_INFERENCE_GENERIC { - label 'process_medium' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" - - input: - tuple val(meta), path(consus_file) - - output: - tuple val(meta), path("${consus_file.baseName}_epi.consensusXML"), emit: protein_inference - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - gg = params.protein_quant == 'shared_peptides' ? '-Algorithm:greedy_group_resolution' : '' - groups = params.protein_quant == 'strictly_unique_peptides' ? 'false' : 'true' - - """ - ProteinInference \\ - -in ${consus_file} \\ - -threads $task.cpus \\ - -picked_fdr $params.picked_fdr \\ - -picked_decoy_string $params.decoy_string \\ - -protein_fdr true \\ - -Algorithm:use_shared_peptides $params.use_shared_peptides \\ - -Algorithm:annotate_indistinguishable_groups $groups \\ - -Algorithm:score_type "PEP" \\ - $gg \\ - -Algorithm:score_aggregation_method $params.protein_score \\ - -Algorithm:min_peptides_per_protein $params.min_peptides_per_protein \\ - -out ${consus_file.baseName}_epi.consensusXML \\ - $args \\ - 2>&1 | tee ${consus_file.baseName}_inference.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ProteinInference: \$(ProteinInference 2>&1 | grep -E '^Version(.*) ' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/protein_inference_generic/meta.yml b/modules/local/openms/protein_inference_generic/meta.yml deleted file mode 100644 index 983479342..000000000 --- a/modules/local/openms/protein_inference_generic/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: protein_inference_generic -description: Computes a protein identification score based on an aggregation of scores of identified peptides. -keywords: - - protein - - inference - - OpenMS -tools: - - ProteinInference: - description: | - Computes a protein identification score based on an aggregation of scores of identified peptides. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_ProteinInference.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_ProteinInference.html -input: - - consus_file: - type: file - description: | - identification results. - pattern: "*.{idXML,consensusXML}" -output: - - protein_inference: - type: file - description: | - identification results with scored/grouped proteins. - pattern: "*.consensusXML" - - log: - type: file - description: log file - pattern: "*.log" - - version: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@daichengxin" diff --git a/modules/local/openms/proteomicslfq/main.nf b/modules/local/openms/proteomicslfq/main.nf index 176b71ae5..bb2216c0b 100644 --- a/modules/local/openms/proteomicslfq/main.nf +++ b/modules/local/openms/proteomicslfq/main.nf @@ -18,12 +18,12 @@ process PROTEOMICSLFQ { path "${expdes.baseName}_qpx", emit: out_qpx path "${expdes.baseName}_openms.consensusXML", emit: out_consensusXML path "*msstats_in.csv", emit: out_msstats, optional: true - path "debug_mergedIDs.idXML", emit: debug_mergedIDs, optional: true - path "debug_mergedIDs_inference.idXML", emit: debug_mergedIDs_inference, optional: true - path "debug_mergedIDsGreedyResolved.idXML", emit: debug_mergedIDsGreedyResolved, optional: true - path "debug_mergedIDsGreedyResolvedFDR.idXML", emit: debug_mergedIDsGreedyResolvedFDR, optional: true - path "debug_mergedIDsGreedyResolvedFDRFiltered.idXML", emit: debug_mergedIDsGreedyResolvedFDRFiltered, optional: true - path "debug_mergedIDsFDRFilteredStrictlyUniqueResolved.idXML", emit: debug_mergedIDsFDRFilteredStrictlyUniqueResolved, optional: true + path "debug_mergedIDs.idparquet", emit: debug_mergedIDs, optional: true + path "debug_mergedIDs_inference.idparquet", emit: debug_mergedIDs_inference, optional: true + path "debug_mergedIDsGreedyResolved.idparquet", emit: debug_mergedIDsGreedyResolved, optional: true + path "debug_mergedIDsGreedyResolvedFDR.idparquet", emit: debug_mergedIDsGreedyResolvedFDR, optional: true + path "debug_mergedIDsGreedyResolvedFDRFiltered.idparquet", emit: debug_mergedIDsGreedyResolvedFDRFiltered, optional: true + path "debug_mergedIDsFDRFilteredStrictlyUniqueResolved.idparquet", emit: debug_mergedIDsFDRFilteredStrictlyUniqueResolved, optional: true path "*.log", emit: log path "versions.yml", emit: versions diff --git a/modules/local/openms/proteomicslfq/meta.yml b/modules/local/openms/proteomicslfq/meta.yml index b8b327e16..7ca500076 100644 --- a/modules/local/openms/proteomicslfq/meta.yml +++ b/modules/local/openms/proteomicslfq/meta.yml @@ -18,7 +18,7 @@ input: - id_files: type: file description: Identifications in idXML or mzIdentML format with posterior error probabilities as score type. - pattern: "*.idXML" + pattern: "*.idparquet" - expdes: type: file description: An experimental design file diff --git a/modules/local/openms/sage/meta.yml b/modules/local/openms/sage/meta.yml index 11eed615a..49690e826 100644 --- a/modules/local/openms/sage/meta.yml +++ b/modules/local/openms/sage/meta.yml @@ -29,7 +29,7 @@ output: - id_files_sage: type: file description: Output file - pattern: "*.idXML" + pattern: "*.idparquet" - log: type: file description: log file diff --git a/modules/local/utils/msrescore_features/main.nf b/modules/local/utils/msrescore_features/main.nf index 79b4244a2..449f2fd61 100644 --- a/modules/local/utils/msrescore_features/main.nf +++ b/modules/local/utils/msrescore_features/main.nf @@ -3,8 +3,8 @@ process MSRESCORE_FEATURES { label 'process_medium' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.16' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.17' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.18' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.18' }" input: tuple val(meta), path(id_files), path(mzml), path(model_weight) diff --git a/modules/local/utils/msrescore_features/meta.yml b/modules/local/utils/msrescore_features/meta.yml index 208da0e7f..fe8162c1d 100644 --- a/modules/local/utils/msrescore_features/meta.yml +++ b/modules/local/utils/msrescore_features/meta.yml @@ -13,7 +13,7 @@ input: - idxml_file: type: file description: idXML identification file - pattern: "*.idXML" + pattern: "*.idparquet" - mzml: type: file description: spectrum data file @@ -25,7 +25,7 @@ output: - idxml: type: file description: idXML identification file after MS2 rescoring - pattern: "*.idXML" + pattern: "*.idparquet" - version: type: file description: File containing software version diff --git a/modules/local/utils/msrescore_fine_tuning/main.nf b/modules/local/utils/msrescore_fine_tuning/main.nf index aa7ae59b3..23636619f 100644 --- a/modules/local/utils/msrescore_fine_tuning/main.nf +++ b/modules/local/utils/msrescore_fine_tuning/main.nf @@ -3,23 +3,23 @@ process MSRESCORE_FINE_TUNING { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.16' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.16' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.18' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.18' }" input: - tuple val(meta), path(idxml), path(mzml), val(groupkey), path(ms2_model_dir) + tuple val(meta), path(idparquet), path(mzml), path(ms2_model_dir) output: - tuple val(groupkey), path("retained_ms2.pth") , emit: model_weight - path "versions.yml" , emit: versions - path "*.log" , emit: log + path("retained_ms2.pth") , emit: model_weight + path "versions.yml" , emit: versions + path "*.log" , emit: log when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${groupkey}_fine_tuning" + def prefix = task.ext.prefix ?: "fine_tuning" // Initialize tolerance variables def ms2_tolerance = null @@ -49,8 +49,8 @@ process MSRESCORE_FINE_TUNING { """ rescoring transfer_learning \\ - --idxml ./ \\ - --mzml ./ \\ + --idparquet ${idparquet.join(' --idparquet ')} \\ + --mzml ${mzml.join(' --mzml ')} \\ --save_model_dir ./ \\ --ms2_tolerance $ms2_tolerance \\ --ms2_tolerance_unit $ms2_tolerance_unit \\ @@ -62,7 +62,7 @@ process MSRESCORE_FINE_TUNING { ${force_transfer_learning} \\ ${consider_modloss} \\ $args \\ - 2>&1 | tee ${groupkey}_fine_tuning.log + 2>&1 | tee fine_tuning.log cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/utils/msrescore_fine_tuning/meta.yml b/modules/local/utils/msrescore_fine_tuning/meta.yml index dd98ac3fa..72fb9a60e 100644 --- a/modules/local/utils/msrescore_fine_tuning/meta.yml +++ b/modules/local/utils/msrescore_fine_tuning/meta.yml @@ -18,7 +18,7 @@ input: - idxml_file: type: file description: idXML identification file - pattern: "*.idXML" + pattern: "*.idparquet" - mzml: type: file description: spectrum data file @@ -30,7 +30,7 @@ output: - idxml: type: file description: idXML identification file after MS2 rescoring - pattern: "*.idXML" + pattern: "*.idparquet" - version: type: file description: File containing software version diff --git a/modules/local/utils/psm_clean/main.nf b/modules/local/utils/psm_clean/main.nf index aa8fc3b43..650ffc907 100644 --- a/modules/local/utils/psm_clean/main.nf +++ b/modules/local/utils/psm_clean/main.nf @@ -3,8 +3,8 @@ process PSM_CLEAN { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.16' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.16' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.18' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.18' }" input: tuple val(meta), path(idparquet), path(mzml) @@ -23,11 +23,11 @@ process PSM_CLEAN { """ rescoring psm_feature_clean \\ - --idparquet $idparquet \\ + --idparquet ${idparquet.join(' --idparquet ')} \\ --mzml $mzml \\ - --output ${idxml.baseName}_clean.idparquet \\ + --output ${mzml.baseName}_clean.idparquet \\ $args \\ - 2>&1 | tee ${idxml.baseName}_clean.log + 2>&1 | tee ${mzml.baseName}_clean.log cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/utils/psm_clean/meta.yml b/modules/local/utils/psm_clean/meta.yml index 84b1505b0..32fb1c400 100644 --- a/modules/local/utils/psm_clean/meta.yml +++ b/modules/local/utils/psm_clean/meta.yml @@ -13,7 +13,7 @@ input: - idxml_file: type: file description: idXML identification file - pattern: "*.idXML" + pattern: "*.idparquet" - mzml: type: file description: spectrum data file @@ -25,7 +25,7 @@ output: - idxml: type: file description: idXML identification file after postprocessing - pattern: "*.idXML" + pattern: "*.idparquet" - version: type: file description: File containing software version diff --git a/modules/local/utils/psm_conversion/meta.yml b/modules/local/utils/psm_conversion/meta.yml index b3b3317f8..86b6171d9 100644 --- a/modules/local/utils/psm_conversion/meta.yml +++ b/modules/local/utils/psm_conversion/meta.yml @@ -13,7 +13,7 @@ input: - idxml_file: type: file description: idXML identification file - pattern: "*.idXML" + pattern: "*.idparquet" - spectrum_df: type: file description: spectrum data file diff --git a/modules/local/utils/spectrum_features/main.nf b/modules/local/utils/spectrum_features/main.nf index baaa989b5..101b8f7c2 100644 --- a/modules/local/utils/spectrum_features/main.nf +++ b/modules/local/utils/spectrum_features/main.nf @@ -3,14 +3,14 @@ process SPECTRUM_FEATURES { label 'process_low' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.16' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.16' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.17' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.17' }" input: - tuple val(meta), path(id_file), val(search_engine), path(ms_file) + tuple val(meta), path(id_file), path(ms_file) output: - tuple val(meta), path("${id_file.baseName}_snr.idXML"), val(search_engine), emit: id_files_snr + tuple val(meta), path("${id_file.baseName}_snr.idparquet"), emit: id_files_snr path "versions.yml", emit: versions path "*.log", emit: log @@ -21,8 +21,8 @@ process SPECTRUM_FEATURES { """ rescoring spectrum2feature \\ --mzml "${ms_file}" \\ - --idxml "${id_file}" \\ - --output "${id_file.baseName}_snr.idXML" \\ + --idparquet "${id_file}" \\ + --output "${id_file.baseName}_snr.idparquet" \\ $args \\ 2>&1 | tee "${id_file.baseName}_snr_feature.log" diff --git a/modules/local/utils/spectrum_features/meta.yml b/modules/local/utils/spectrum_features/meta.yml index 1c53ddf9a..4cfb9b9da 100644 --- a/modules/local/utils/spectrum_features/meta.yml +++ b/modules/local/utils/spectrum_features/meta.yml @@ -22,7 +22,7 @@ input: type: file description: | Input idXML file containing the identifications. - pattern: "*.idXML" + pattern: "*.idparquet" output: - meta: type: map @@ -31,7 +31,7 @@ output: type: file description: | Output file in idXML format - pattern: "*.idXML" + pattern: "*.idparquet" - log: type: file description: log file diff --git a/subworkflows/local/dda_id/main.nf b/subworkflows/local/dda_id/main.nf index 29daeebab..a145297c4 100644 --- a/subworkflows/local/dda_id/main.nf +++ b/subworkflows/local/dda_id/main.nf @@ -1,9 +1,7 @@ // // MODULE: Local to the pipeline // -include { MSRESCORE_FEATURES } from '../../../modules/local/utils/msrescore_features/main' include { PERCOLATOR } from '../../../modules/local/openms/percolator/main' -include { EXTRACTPSMFEATURES } from '../../../modules/local/openms/extractfeatures/main' include { PSM_CONVERSION } from '../../../modules/local/utils/psm_conversion/main' include { PHOSPHO_SCORING } from '../phospho_scoring/main' @@ -41,62 +39,11 @@ workflow DDA_ID { // // SUBWORKFLOW: Rescoring // - if (params.search_engines.tokenize(",").unique().size() > 1) { - if (params.ms2features_enable == true) { - // Only add ms2_model_dir if it's actually set and not empty - // Handle cases where parameter might be empty string, null, boolean true, or whitespace - // When --ms2features_model_dir is passed with no value, Nextflow may set it to boolean true - if (params.ms2features_model_dir && params.ms2features_model_dir != true) { - ms2_model_dir = channel.from(file(params.ms2features_model_dir, checkIfExists: true)) - } else { - // create a fake channel when don't specify model dir - ms2_model_dir = channel.from(file("pretrained_models")) - } - ch_id_files_feats.groupTuple(size: params.search_engines.tokenize(",").unique().size()) - .combine(ch_mzmls_search, by: 0) - .combine(ms2_model_dir).set{ ch_id_rescoring } + PERCOLATOR(ch_id_files_feats) + ch_rescoring_results = PERCOLATOR.out.id_files_perc + ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions) - MSRESCORE_FEATURES(ch_id_rescoring) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_feats = MSRESCORE_FEATURES.out.idxml - ch_id_files_feats.map { v -> [v[0], v[1]] }.set {ch_perc_input} - - } else{ - EXTRACTPSMFEATURES(ch_id_files_feats.groupTuple(size: params.search_engines.tokenize(",").unique().size())) - ch_consensus_input = EXTRACTPSMFEATURES.out.id_files_feat - ch_perc_input = ch_consensus_input - } - PERCOLATOR(ch_perc_input) - ch_rescoring_results = PERCOLATOR.out.id_files_perc - ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions) - } else { - if (params.ms2features_enable == true) { - // Only add ms2_model_dir if it's actually set and not empty - // Handle cases where parameter might be empty string, null, boolean true, or whitespace - // When --ms2features_model_dir is passed with no value, Nextflow may set it to boolean true - if (params.ms2features_model_dir && params.ms2features_model_dir != true) { - ms2_model_dir = channel.from(file(params.ms2features_model_dir, checkIfExists: true)) - } else { - // create a fake channel when don't specify model dir - ms2_model_dir = channel.from(file("pretrained_models")) - } - - ch_id_files_feats.groupTuple(size: params.search_engines.tokenize(",").unique().size()) - .combine(ch_mzmls_search, by: 0) - .combine(ms2_model_dir).set{ ch_id_rescoring } - - MSRESCORE_FEATURES(ch_id_rescoring) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_feats = MSRESCORE_FEATURES.out.idxml - ch_id_files_feats.map { v -> [v[0], v[1]] }.set {ch_perc_input} - } else { - ch_perc_input = ch_id_files_feats - } - PERCOLATOR(ch_perc_input) - ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions) - ch_rescoring_results = PERCOLATOR.out.id_files_perc - } PSM_FDR_CONTROL(ch_rescoring_results) ch_software_versions = ch_software_versions.mix(PSM_FDR_CONTROL.out.versions) diff --git a/subworkflows/local/feature_mapper/main.nf b/subworkflows/local/feature_mapper/main.nf deleted file mode 100644 index 308293da7..000000000 --- a/subworkflows/local/feature_mapper/main.nf +++ /dev/null @@ -1,26 +0,0 @@ -// -// Assigns protein/peptide identifications to features or consensus features. -// - -include { ISOBARIC_ANALYZER } from '../../../modules/local/openms/isobaric_analyzer/main' -include { ID_MAPPER } from '../../../modules/local/openms/id_mapper/main' - -workflow FEATURE_MAPPER { - take: - ch_mzml_files - ch_id_files - - main: - ch_version = channel.empty() - - ISOBARIC_ANALYZER(ch_mzml_files) - ch_version = ch_version.mix(ISOBARIC_ANALYZER.out.versions) - - ID_MAPPER(ch_id_files.combine(ISOBARIC_ANALYZER.out.id_files_consensusXML, by: 0)) - ch_version = ch_version.mix(ID_MAPPER.out.versions) - - emit: - id_map = ID_MAPPER.out.id_map - - versions = ch_version -} diff --git a/subworkflows/local/feature_mapper/meta.yml b/subworkflows/local/feature_mapper/meta.yml deleted file mode 100644 index e2c40d976..000000000 --- a/subworkflows/local/feature_mapper/meta.yml +++ /dev/null @@ -1,29 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "feature_mapper" -description: Subworkflow for mapping features across different runs and conditions -keywords: - - feature - - mapping - - alignment - - proteomics -components: - - isobaric/analyzer - - id/mapper -input: - - ch_input: - type: file - description: | - Channel containing input files for feature mapping -output: - - ch_mapped_features: - type: file - description: | - Channel containing mapped features - - versions: - type: file - description: | - Software versions used in this subworkflow -authors: - - "@bigbio" -maintainers: - - "@bigbio" diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index e22ac32dc..96c8aec31 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -91,57 +91,40 @@ workflow PEPTIDE_DATABASE_SEARCH { } else { // Preparing train datasets and fine tuning MS2 model - sage_train_datasets = ch_id_sage + // Randomly select one search engine for fine-tuning sampling + engine_opts = [] + if (params.search_engines.contains("sage")) engine_opts.add("sage") + if (params.search_engines.contains("msgf")) engine_opts.add("msgf") + if (params.search_engines.contains("comet")) engine_opts.add("comet") + selected_engine = engine_opts[new Random(2025).nextInt(engine_opts.size())] + + ch_selected_engine = (selected_engine == "sage") ? ch_id_sage : + (selected_engine == "msgf") ? ch_id_msgf : + ch_id_comet + + train_datasets = ch_selected_engine .combine(ch_mzmls_search, by: 0) .toSortedList() .flatMap() .randomSample(params.fine_tuning_sample_run, 2025) - .combine(channel.value("sage")) .groupTuple(by: 3) - msgf_train_datasets = ch_id_msgf - .combine(ch_mzmls_search, by: 0) - .toSortedList() - .flatMap() - .randomSample(params.fine_tuning_sample_run, 2025) - .combine(channel.value("msgf")) - .groupTuple(by: 3) - - comet_train_datasets = ch_id_comet - .combine(ch_mzmls_search, by: 0) - .toSortedList() - .flatMap() - .randomSample(params.fine_tuning_sample_run, 2025) - .combine(channel.value("comet")) - .groupTuple(by: 3) - - sage_train_datasets.mix(msgf_train_datasets) - .mix(comet_train_datasets) - .combine(ms2_model_dir) - .set { train_datasets } - MSRESCORE_FINE_TUNING(train_datasets) + MSRESCORE_FINE_TUNING(train_datasets.combine(ms2_model_dir)) ch_versions = ch_versions.mix(MSRESCORE_FINE_TUNING.out.versions) - channel.value("msgf").combine(ch_id_msgf.combine(ch_mzmls_search, by: 0)) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - .map { v -> [v[1], v[2], v[3], v[4], v[0] ] } - .set { msgf_features_input } - - channel.value("sage").combine(ch_id_sage.combine(ch_mzmls_search, by: 0)) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - .map { v -> [v[1], v[2], v[3], v[4], v[0] ] } - .set { sage_features_input } - - channel.value("comet").combine(ch_id_comet.combine(ch_mzmls_search, by: 0)) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - .map { v -> [v[1], v[2], v[3], v[4], v[0] ] } - .set { comet_features_input } + if (params.search_engines.tokenize(",").unique().size() > 1) { + ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage).groupTuple(size: params.search_engines.tokenize(",").unique().size()) + .combine(ch_mzmls_search, by: 0) + .combine(MSRESCORE_FINE_TUNING.out.model_weight).set{ ch_id_rescoring } + } else { + ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage).combine(ch_mzmls_search, by: 0) + .combine(MSRESCORE_FINE_TUNING.out.model_weight).set{ ch_id_rescoring } + } - MSRESCORE_FEATURES(msgf_features_input.mix(sage_features_input).mix(comet_features_input)) + MSRESCORE_FEATURES(ch_id_rescoring) ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) ch_id_files_feats = MSRESCORE_FEATURES.out.idparquet - } } else{ if (params.search_engines.tokenize(",").unique().size() > 1) { @@ -167,16 +150,16 @@ workflow PEPTIDE_DATABASE_SEARCH { } } else if (params.search_engines.tokenize(",").unique().size() > 1) { - EXTRACTPSMFEATURES(ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage).groupTuple(size: params.search_engines.tokenize(",").unique().size())) - ch_id_files_out = EXTRACTPSMFEATURES.out.id_files_feat + PSM_CLEAN(ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage).groupTuple(size: params.search_engines.tokenize(",").unique().size()).combine(ch_mzmls_search, by: 0)) + ch_id_files_out = PSM_CLEAN.out.idparquet } else { ch_id_files_out = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) } - + } else if (params.psm_clean == true) { ch_id_files = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) PSM_CLEAN(ch_id_files.combine(ch_mzmls_search, by: 0)) - ch_id_files_out = PSM_CLEAN.out.idxml + ch_id_files_out = PSM_CLEAN.out.idparquet ch_versions = ch_versions.mix(PSM_CLEAN.out.versions) } else { ch_id_files_out = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) @@ -185,4 +168,4 @@ workflow PEPTIDE_DATABASE_SEARCH { emit: ch_id_files_idx = ch_id_files_out versions = ch_versions -} \ No newline at end of file +} diff --git a/subworkflows/local/phospho_scoring/main.nf b/subworkflows/local/phospho_scoring/main.nf index 13d5a80c3..a233c33c2 100644 --- a/subworkflows/local/phospho_scoring/main.nf +++ b/subworkflows/local/phospho_scoring/main.nf @@ -12,16 +12,9 @@ workflow PHOSPHO_SCORING { main: ch_version = channel.empty() - if (params.search_engines.split(",").size() != 1){ - ID_SCORE_SWITCHER(ch_id_files.combine(channel.value("\"Posterior Error Probability_score\""))) - ch_version = ch_version.mix(ID_SCORE_SWITCHER.out.versions) - ONSITE(ch_mzml_files.join(ID_SCORE_SWITCHER.out.id_score_switcher)) - ch_version = ch_version.mix(ONSITE.out.versions) - } else{ - ONSITE(ch_mzml_files.join(ch_id_files)) - ch_version = ch_version.mix(ONSITE.out.versions) - } + ONSITE(ch_mzml_files.join(ch_id_files)) + ch_version = ch_version.mix(ONSITE.out.versions) emit: diff --git a/subworkflows/local/protein_inference/main.nf b/subworkflows/local/protein_inference/main.nf deleted file mode 100644 index 96381089a..000000000 --- a/subworkflows/local/protein_inference/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -// -// ProteinInference -// - -include { PROTEIN_INFERENCE_EPIFANY } from '../../../modules/local/openms/protein_inference_epifany/main' -include { PROTEIN_INFERENCE_GENERIC } from '../../../modules/local/openms/protein_inference_generic/main' -include { ID_FILTER } from '../../../modules/local/openms/id_filter/main' - -workflow PROTEIN_INFERENCE { - take: - ch_consus_file - - main: - ch_version = channel.empty() - - if (params.protein_inference_method == "bayesian") { - PROTEIN_INFERENCE_EPIFANY(ch_consus_file) - ch_version = ch_version.mix(PROTEIN_INFERENCE_EPIFANY.out.versions) - ch_inference = PROTEIN_INFERENCE_EPIFANY.out.epi_inference - } else { - PROTEIN_INFERENCE_GENERIC(ch_consus_file) - ch_version = ch_version.mix(PROTEIN_INFERENCE_GENERIC.out.versions) - ch_inference = PROTEIN_INFERENCE_GENERIC.out.protein_inference - } - - ID_FILTER(ch_inference.combine(channel.value("-score:type_protein q-value"))) - ch_version = ch_version.mix(ID_FILTER.out.versions) - ID_FILTER.out.id_filtered - .multiMap{ it -> - meta: it[0] - results: it[1] - } - .set{ ch_epi_results } - - emit: - epi_idfilter = ch_epi_results.results - - versions = ch_version - -} diff --git a/subworkflows/local/protein_inference/meta.yml b/subworkflows/local/protein_inference/meta.yml deleted file mode 100644 index 7bab69e8d..000000000 --- a/subworkflows/local/protein_inference/meta.yml +++ /dev/null @@ -1,30 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "protein_inference" -description: Subworkflow for protein inference from peptide identifications -keywords: - - protein - - inference - - proteomics - - peptides -components: - - protein/inference/epifany - - protein/inference/generic - - id/filter -input: - - ch_input: - type: file - description: | - Channel containing input files for protein inference -output: - - ch_protein_results: - type: file - description: | - Channel containing protein inference results - - versions: - type: file - description: | - Software versions used in this subworkflow -authors: - - "@bigbio" -maintainers: - - "@bigbio" diff --git a/subworkflows/local/protein_quant/main.nf b/subworkflows/local/protein_quant/main.nf deleted file mode 100644 index 6c9f295bf..000000000 --- a/subworkflows/local/protein_quant/main.nf +++ /dev/null @@ -1,30 +0,0 @@ -// -// ProteinQuant -// - -include { ID_CONFLICT_RESOLVER as ID_CONFLICT_RESOLVER } from '../../../modules/local/openms/id_conflict_resolver/main' -include { PROTEIN_QUANTIFIER as PROTEIN_QUANTIFIER } from '../../../modules/local/openms/protein_quantifier/main' -include { MSSTATS_CONVERTER as MSSTATS_CONVERTER } from '../../../modules/local/openms/msstats_converter/main' - -workflow PROTEIN_QUANT { - take: - ch_conflict_file - ch_expdesign_file - - main: - ch_version = channel.empty() - - ID_CONFLICT_RESOLVER(ch_conflict_file) - ch_version = ch_version.mix(ID_CONFLICT_RESOLVER.out.versions) - - PROTEIN_QUANTIFIER(ID_CONFLICT_RESOLVER.out.pro_resconf, ch_expdesign_file) - ch_version = ch_version.mix(PROTEIN_QUANTIFIER.out.versions) - - MSSTATS_CONVERTER(ID_CONFLICT_RESOLVER.out.pro_resconf, ch_expdesign_file, "ISO") - ch_version = ch_version.mix(MSSTATS_CONVERTER.out.versions) - - emit: - msstats_csv = MSSTATS_CONVERTER.out.out_msstats - out_mztab = PROTEIN_QUANTIFIER.out.out_mztab - versions = ch_version -} diff --git a/subworkflows/local/protein_quant/meta.yml b/subworkflows/local/protein_quant/meta.yml deleted file mode 100644 index 71ae589f2..000000000 --- a/subworkflows/local/protein_quant/meta.yml +++ /dev/null @@ -1,30 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "protein_quant" -description: Subworkflow for protein quantification from MS data -keywords: - - protein - - quantification - - proteomics - - ms -components: - - id/conflict/resolver - - protein/quantifier - - msstats/converter -input: - - ch_input: - type: file - description: | - Channel containing input files for protein quantification -output: - - ch_quant_results: - type: file - description: | - Channel containing protein quantification results - - versions: - type: file - description: | - Software versions used in this subworkflow -authors: - - "@bigbio" -maintainers: - - "@bigbio" diff --git a/workflows/tmt.nf b/workflows/tmt.nf index 6a882cefb..a8b9f0d66 100644 --- a/workflows/tmt.nf +++ b/workflows/tmt.nf @@ -7,14 +7,12 @@ // // MODULES: Local to the pipeline // -include { FILE_MERGE } from '../modules/local/openms/file_merge/main' +include { ISOBARIC_WORKFLOW } from '../modules/local/openms/isobaric_workflow/main' +include { MSSTATS_CONVERTER } from '../modules/local/openms/msstats_converter/main' // // SUBWORKFLOWS: Consisting of a mix of local and nf-core/modules // -include { FEATURE_MAPPER } from '../subworkflows/local/feature_mapper/main' -include { PROTEIN_INFERENCE } from '../subworkflows/local/protein_inference/main' -include { PROTEIN_QUANT } from '../subworkflows/local/protein_quant/main' include { ID } from '../subworkflows/local/id/main' /* @@ -40,28 +38,28 @@ workflow TMT { ch_software_versions = ch_software_versions.mix(ID.out.versions) // - // SUBWORKFLOW: FEATUREMAPPER + // SUBWORKFLOW: ISOBARIC_WORKFLOW // - FEATURE_MAPPER(ch_file_preparation_results, ID.out.id_results) - ch_software_versions = ch_software_versions.mix(FEATURE_MAPPER.out.versions) + // Extract labelling_type from meta (auto-detected from SDRF) + ch_file_preparation_results.join(ID.out.id_results) + .multiMap { it -> + labelling_type: it[0].labelling_type + mzmls: it[1] + ids: it[2] + } + .set{ ch_iso_workflow } + ISOBARIC_WORKFLOW(ch_iso_workflow.labelling_type.first(), + ch_iso_workflow.mzmls.collect(), + ch_iso_workflow.ids.collect(), + ch_expdesign + ) + ch_software_versions = ch_software_versions.mix(ISOBARIC_WORKFLOW.out.versions) // - // MODULE: FILEMERGE + // SUBWORKFLOW: MSSTATS_CONVERTER // - FILE_MERGE(FEATURE_MAPPER.out.id_map.collect()) - ch_software_versions = ch_software_versions.mix(FILE_MERGE.out.versions) - - // - // SUBWORKFLOW: PROTEININFERENCE - // - PROTEIN_INFERENCE(FILE_MERGE.out.id_merge) - ch_software_versions = ch_software_versions.mix(PROTEIN_INFERENCE.out.versions) - - // - // SUBWORKFLOW: PROTEINQUANT - // - PROTEIN_QUANT(PROTEIN_INFERENCE.out.epi_idfilter, ch_expdesign) - ch_software_versions = ch_software_versions.mix(PROTEIN_QUANT.out.versions) + MSSTATS_CONVERTER(ISOBARIC_WORKFLOW.out.out_consensusXML, ch_expdesign, "ISO") + ch_software_versions = ch_software_versions.mix(MSSTATS_CONVERTER.out.versions) ID.out.psmrescoring_results .map { it -> it[1] } @@ -74,7 +72,7 @@ workflow TMT { emit: ch_pmultiqc_ids = ch_pmultiqc_ids ch_pmultiqc_consensus = ch_pmultiqc_consensus - final_result = PROTEIN_QUANT.out.out_mztab - msstats_in = PROTEIN_QUANT.out.msstats_csv + final_result = ISOBARIC_WORKFLOW.out.out_mztab + msstats_in = MSSTATS_CONVERTER.out.out_msstats versions = ch_software_versions }