From 0dccd6e4595730cb52bb93c0d2bcd8e132037a2e Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 6 Jun 2026 11:23:41 +0800 Subject: [PATCH 01/12] remove consensusid and move to parquet --- modules/local/utils/psm_clean/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/utils/psm_clean/main.nf b/modules/local/utils/psm_clean/main.nf index aa8fc3b4..201a85b4 100644 --- a/modules/local/utils/psm_clean/main.nf +++ b/modules/local/utils/psm_clean/main.nf @@ -25,9 +25,9 @@ process PSM_CLEAN { rescoring psm_feature_clean \\ --idparquet $idparquet \\ --mzml $mzml \\ - --output ${idxml.baseName}_clean.idparquet \\ + --output ${idparquet.baseName}_clean.idparquet \\ $args \\ - 2>&1 | tee ${idxml.baseName}_clean.log + 2>&1 | tee ${idparquet.baseName}_clean.log cat <<-END_VERSIONS > versions.yml "${task.process}": From 59e94618acb10261deb1cd6fd4213af1ff2bc6e8 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 6 Jun 2026 11:37:42 +0800 Subject: [PATCH 02/12] update --- AGENTS.md | 2 +- assets/multiqc_config.yml | 2 +- conf/modules/id.config | 4 +- modules/local/openms/comet/meta.yml | 2 +- modules/local/openms/consensusid/main.nf | 40 ------------------- modules/local/openms/consensusid/meta.yml | 40 ------------------- modules/local/openms/extractfeatures/main.nf | 1 - .../meta.yml | 29 +++++++------- .../local/openms/false_discovery_rate/main.nf | 40 ------------------- modules/local/openms/id_filter/main.nf | 2 +- .../local/openms/id_score_switcher/main.nf | 4 +- .../local/openms/id_score_switcher/meta.yml | 2 +- modules/local/openms/index_peptides/main.nf | 4 +- modules/local/openms/index_peptides/meta.yml | 4 +- modules/local/openms/msgf/meta.yml | 2 +- modules/local/openms/percolator/meta.yml | 2 +- modules/local/openms/proteomicslfq/main.nf | 12 +++--- modules/local/openms/proteomicslfq/meta.yml | 2 +- modules/local/openms/sage/meta.yml | 2 +- .../local/utils/msrescore_features/meta.yml | 4 +- .../utils/msrescore_fine_tuning/meta.yml | 4 +- modules/local/utils/psm_clean/meta.yml | 4 +- modules/local/utils/psm_conversion/meta.yml | 2 +- modules/local/utils/spectrum_features/main.nf | 6 +-- .../local/utils/spectrum_features/meta.yml | 4 +- .../local/peptide_database_search/main.nf | 4 +- 26 files changed, 52 insertions(+), 172 deletions(-) delete mode 100644 modules/local/openms/consensusid/main.nf delete mode 100644 modules/local/openms/consensusid/meta.yml rename modules/local/openms/{false_discovery_rate => extractfeatures}/meta.yml (51%) delete mode 100644 modules/local/openms/false_discovery_rate/main.nf diff --git a/AGENTS.md b/AGENTS.md index 453c8ac7..ebc18f2a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -352,7 +352,7 @@ withName: 'OPENMS_PERCOLATORADAPTER' { publishDir = [ path: { "${params.outdir}/intermediate_results/fdr_control" }, mode: params.publish_dir_mode, - pattern: '*.idXML' + pattern: '*.idparquet' ] } ``` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 3a52d98a..40d716fd 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -75,7 +75,7 @@ sp: fn: "*_ms_info.parquet" num_lines: 0 pmultiqc/idXML: - fn: "*.idXML" + fn: "*.idparquet" num_lines: 0 pmultiqc/msstats: fn: "*msstats_in.csv" diff --git a/conf/modules/id.config b/conf/modules/id.config index 1b2a4ac4..8e26888c 100644 --- a/conf/modules/id.config +++ b/conf/modules/id.config @@ -47,13 +47,13 @@ process { // PSM FDR control ID_FILTER withName: '.*:ID:PSM_FDR_CONTROL:ID_FILTER' { ext.args = "-score:psm \"$params.run_fdr_cutoff\"" - ext.suffix = '.idXML' + ext.suffix = '.idparquet' } // DDA_ID PSM FDR control ID_FILTER withName: '.*:DDA_ID:PSM_FDR_CONTROL:ID_FILTER' { ext.args = "-score:psm \"$params.run_fdr_cutoff\"" - ext.suffix = '.idXML' + ext.suffix = '.idparquet' } // MS2RESCORE diff --git a/modules/local/openms/comet/meta.yml b/modules/local/openms/comet/meta.yml index 5f255f4f..a36a020c 100644 --- a/modules/local/openms/comet/meta.yml +++ b/modules/local/openms/comet/meta.yml @@ -29,7 +29,7 @@ output: - id_files_comet: type: file description: Output file - pattern: "*.idXML" + pattern: "*.idparquet" - log: type: file description: log file diff --git a/modules/local/openms/consensusid/main.nf b/modules/local/openms/consensusid/main.nf deleted file mode 100644 index 76346fb2..00000000 --- a/modules/local/openms/consensusid/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -process CONSENSUSID { - tag "$meta.mzml_id" - label 'process_single' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" - - input: - tuple val(meta), path(id_file) - - output: - tuple val(meta), path("${meta.mzml_id}_consensus.idXML"), emit: consensusids - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.mzml_id}" - - """ - ConsensusID \\ - -in ${id_file} \\ - -out ${meta.mzml_id}_consensus.idXML \\ - -per_spectrum \\ - -threads $task.cpus \\ - -algorithm $params.consensusid_algorithm \\ - -filter:min_support $params.min_consensus_support \\ - -filter:considered_hits $params.consensusid_considered_top_hits \\ - -debug $params.consensusid_debug \\ - $args \\ - 2>&1 | tee ${meta.mzml_id}_consensusID.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ConsensusID: \$(ConsensusID 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/consensusid/meta.yml b/modules/local/openms/consensusid/meta.yml deleted file mode 100644 index c8a7df5a..00000000 --- a/modules/local/openms/consensusid/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: consensusid -description: Computes a consensus from results of multiple peptide identification engines. -keywords: - - consensus scoring - - peptide database search - - OpenMS -tools: - - ConsensusID: - description: | - Tool to Computes a consensus from results of multiple peptide identification engines. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_ConsensusID.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_ConsensusID.html -input: - - meta: - type: map - description: Groovy Map containing sample information - - id_file: - type: file - description: | - Putative peptide-spectrum matches in idXML format (potentially multiple per spectrum). Score needs to be comparable (e.g. probability) - pattern: "*.idXML" -output: - - meta: - type: map - description: Groovy Map containing sample information - - id_files_idx_ForIDPEP_FDR: - type: file - description: | - Identifications with annotated FDR. - pattern: "*.idXML" - - log: - type: file - description: log file - pattern: "*.log" - - version: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@daichengxin" diff --git a/modules/local/openms/extractfeatures/main.nf b/modules/local/openms/extractfeatures/main.nf index 046921f8..f5ca9fa1 100644 --- a/modules/local/openms/extractfeatures/main.nf +++ b/modules/local/openms/extractfeatures/main.nf @@ -36,4 +36,3 @@ process EXTRACTPSMFEATURES { PSMFeatureExtractor: \$(PSMFeatureExtractor 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) END_VERSIONS """ -} \ No newline at end of file diff --git a/modules/local/openms/false_discovery_rate/meta.yml b/modules/local/openms/extractfeatures/meta.yml similarity index 51% rename from modules/local/openms/false_discovery_rate/meta.yml rename to modules/local/openms/extractfeatures/meta.yml index 62fa100f..eff674af 100644 --- a/modules/local/openms/false_discovery_rate/meta.yml +++ b/modules/local/openms/extractfeatures/meta.yml @@ -1,33 +1,34 @@ -name: false_discovery_rate -description: Estimates the false discovery rate on peptide and protein level using decoy searches. +name: EXTRACTPSMFEATURES +description: Extracts PSM features from multiple search engines for rescoring. keywords: - - FDR - - decoy + - PSM + - feature extraction + - rescoring - OpenMS tools: - - FalseDiscoveryRate: + - PSMFeatureExtractor: description: | - Tool to estimate the false discovery rate on peptide and protein level. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_FalseDiscoveryRate.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_FalseDiscoveryRate.html + Tool to extract PSM features from identification results for downstream rescoring. + homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_PSMFeatureExtractor.html + documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_PSMFeatureExtractor.html input: - meta: type: map description: Groovy Map containing sample information - - id_file: + - id_files: type: file description: | - Identifications from searching a target-decoy database. - pattern: "*.idXML" + Identification files from one or multiple search engines. + pattern: "*.idparquet" output: - meta: type: map description: Groovy Map containing sample information - - id_files_idx_ForIDPEP_FDR: + - id_files_feat: type: file description: | - Identifications with annotated FDR. - pattern: "*.idXML" + PSM features in Parquet format for rescoring. + pattern: "*_feat.idparquet" - log: type: file description: log file diff --git a/modules/local/openms/false_discovery_rate/main.nf b/modules/local/openms/false_discovery_rate/main.nf deleted file mode 100644 index 37a872dd..00000000 --- a/modules/local/openms/false_discovery_rate/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -process FALSE_DISCOVERY_RATE { - tag "$meta.mzml_id" - label 'process_low' - label 'process_single' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" - - input: - tuple val(meta), path(id_file) - - output: - tuple val(meta), path("${id_file.baseName}_fdr.idXML"), emit: id_files_idx_ForIDPEP_FDR - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.mzml_id}" - - """ - FalseDiscoveryRate \\ - -in ${id_file} \\ - -out ${id_file.baseName}_fdr.idXML \\ - -threads $task.cpus \\ - -FDR:PSM ${params.run_fdr_cutoff} \\ - -algorithm:add_decoy_peptides \\ - -algorithm:add_decoy_proteins \\ - -algorithm:conservative ${params.fdr_conservative} \\ - $args \\ - 2>&1 | tee ${id_file.baseName}_fdr.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - FalseDiscoveryRate: \$(FalseDiscoveryRate 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/id_filter/main.nf b/modules/local/openms/id_filter/main.nf index 0f939bf2..2bf6c8cf 100644 --- a/modules/local/openms/id_filter/main.nf +++ b/modules/local/openms/id_filter/main.nf @@ -1,5 +1,5 @@ process ID_FILTER { - tag {task.ext.suffix == ".idXML" ? "$meta.mzml_id" : "$id_file.baseName"} + tag {task.ext.suffix == ".idparquet" ? "$meta.mzml_id" : "$id_file.baseName"} label 'process_very_low' label 'process_single' label 'openms' diff --git a/modules/local/openms/id_score_switcher/main.nf b/modules/local/openms/id_score_switcher/main.nf index bedd9fd7..97a9cd76 100644 --- a/modules/local/openms/id_score_switcher/main.nf +++ b/modules/local/openms/id_score_switcher/main.nf @@ -12,7 +12,7 @@ process ID_SCORE_SWITCHER { tuple val(meta), path(id_file), val(new_score) output: - tuple val(meta), path("${id_file.baseName}_pep.idXML"), emit: id_score_switcher + tuple val(meta), path("${id_file.baseName}_pep.idparquet"), emit: id_score_switcher path "versions.yml", emit: versions path "*.log", emit: log @@ -23,7 +23,7 @@ process ID_SCORE_SWITCHER { """ IDScoreSwitcher \\ -in ${id_file} \\ - -out ${id_file.baseName}_pep.idXML \\ + -out ${id_file.baseName}_pep.idparquet \\ -threads $task.cpus \\ -new_score ${new_score} \\ $args \\ diff --git a/modules/local/openms/id_score_switcher/meta.yml b/modules/local/openms/id_score_switcher/meta.yml index 246d603b..d20a3c52 100644 --- a/modules/local/openms/id_score_switcher/meta.yml +++ b/modules/local/openms/id_score_switcher/meta.yml @@ -18,7 +18,7 @@ input: type: file description: | Identifications from searching a target-decoy database. - pattern: "*.idXML" + pattern: "*.idparquet" output: - meta: type: map diff --git a/modules/local/openms/index_peptides/main.nf b/modules/local/openms/index_peptides/main.nf index e0628abb..6849853e 100644 --- a/modules/local/openms/index_peptides/main.nf +++ b/modules/local/openms/index_peptides/main.nf @@ -12,7 +12,7 @@ process INDEX_PEPTIDES { output: - tuple val(meta), path("${id_file.baseName}_idx.idXML"), emit: id_files_idx + tuple val(meta), path("${id_file.baseName}_idx.idparquet"), emit: id_files_idx path "versions.yml", emit: versions path "*.log", emit: log @@ -45,7 +45,7 @@ process INDEX_PEPTIDES { """ PeptideIndexer \\ -in ${id_file} \\ - -out ${id_file.baseName}_idx.idXML \\ + -out ${id_file.baseName}_idx.idparquet \\ -threads $task.cpus \\ -fasta ${database} \\ -enzyme:name "${enzyme}" \\ diff --git a/modules/local/openms/index_peptides/meta.yml b/modules/local/openms/index_peptides/meta.yml index eb937b22..2a784d7f 100644 --- a/modules/local/openms/index_peptides/meta.yml +++ b/modules/local/openms/index_peptides/meta.yml @@ -18,7 +18,7 @@ input: type: file description: | Input idXML file containing the identifications. - pattern: "*.idXML" + pattern: "*.idparquet" - database: type: file description: | @@ -32,7 +32,7 @@ output: type: file description: | Output idXML file. - pattern: "*.idXML" + pattern: "*.idparquet" - log: type: file description: log file diff --git a/modules/local/openms/msgf/meta.yml b/modules/local/openms/msgf/meta.yml index 8c7c8690..d8fe9bce 100644 --- a/modules/local/openms/msgf/meta.yml +++ b/modules/local/openms/msgf/meta.yml @@ -29,7 +29,7 @@ output: - id_files_msgf: type: file description: Output file - pattern: "*.idXML" + pattern: "*.idparquet" - log: type: file description: log file diff --git a/modules/local/openms/percolator/meta.yml b/modules/local/openms/percolator/meta.yml index b7be826d..aad911da 100644 --- a/modules/local/openms/percolator/meta.yml +++ b/modules/local/openms/percolator/meta.yml @@ -17,7 +17,7 @@ input: type: file description: | Input idXML file containing the identifications. - pattern: "*.idXML" + pattern: "*.idparquet" output: - meta: type: map diff --git a/modules/local/openms/proteomicslfq/main.nf b/modules/local/openms/proteomicslfq/main.nf index 176b71ae..bb2216c0 100644 --- a/modules/local/openms/proteomicslfq/main.nf +++ b/modules/local/openms/proteomicslfq/main.nf @@ -18,12 +18,12 @@ process PROTEOMICSLFQ { path "${expdes.baseName}_qpx", emit: out_qpx path "${expdes.baseName}_openms.consensusXML", emit: out_consensusXML path "*msstats_in.csv", emit: out_msstats, optional: true - path "debug_mergedIDs.idXML", emit: debug_mergedIDs, optional: true - path "debug_mergedIDs_inference.idXML", emit: debug_mergedIDs_inference, optional: true - path "debug_mergedIDsGreedyResolved.idXML", emit: debug_mergedIDsGreedyResolved, optional: true - path "debug_mergedIDsGreedyResolvedFDR.idXML", emit: debug_mergedIDsGreedyResolvedFDR, optional: true - path "debug_mergedIDsGreedyResolvedFDRFiltered.idXML", emit: debug_mergedIDsGreedyResolvedFDRFiltered, optional: true - path "debug_mergedIDsFDRFilteredStrictlyUniqueResolved.idXML", emit: debug_mergedIDsFDRFilteredStrictlyUniqueResolved, optional: true + path "debug_mergedIDs.idparquet", emit: debug_mergedIDs, optional: true + path "debug_mergedIDs_inference.idparquet", emit: debug_mergedIDs_inference, optional: true + path "debug_mergedIDsGreedyResolved.idparquet", emit: debug_mergedIDsGreedyResolved, optional: true + path "debug_mergedIDsGreedyResolvedFDR.idparquet", emit: debug_mergedIDsGreedyResolvedFDR, optional: true + path "debug_mergedIDsGreedyResolvedFDRFiltered.idparquet", emit: debug_mergedIDsGreedyResolvedFDRFiltered, optional: true + path "debug_mergedIDsFDRFilteredStrictlyUniqueResolved.idparquet", emit: debug_mergedIDsFDRFilteredStrictlyUniqueResolved, optional: true path "*.log", emit: log path "versions.yml", emit: versions diff --git a/modules/local/openms/proteomicslfq/meta.yml b/modules/local/openms/proteomicslfq/meta.yml index b8b327e1..7ca50007 100644 --- a/modules/local/openms/proteomicslfq/meta.yml +++ b/modules/local/openms/proteomicslfq/meta.yml @@ -18,7 +18,7 @@ input: - id_files: type: file description: Identifications in idXML or mzIdentML format with posterior error probabilities as score type. - pattern: "*.idXML" + pattern: "*.idparquet" - expdes: type: file description: An experimental design file diff --git a/modules/local/openms/sage/meta.yml b/modules/local/openms/sage/meta.yml index 11eed615..49690e82 100644 --- a/modules/local/openms/sage/meta.yml +++ b/modules/local/openms/sage/meta.yml @@ -29,7 +29,7 @@ output: - id_files_sage: type: file description: Output file - pattern: "*.idXML" + pattern: "*.idparquet" - log: type: file description: log file diff --git a/modules/local/utils/msrescore_features/meta.yml b/modules/local/utils/msrescore_features/meta.yml index 208da0e7..fe8162c1 100644 --- a/modules/local/utils/msrescore_features/meta.yml +++ b/modules/local/utils/msrescore_features/meta.yml @@ -13,7 +13,7 @@ input: - idxml_file: type: file description: idXML identification file - pattern: "*.idXML" + pattern: "*.idparquet" - mzml: type: file description: spectrum data file @@ -25,7 +25,7 @@ output: - idxml: type: file description: idXML identification file after MS2 rescoring - pattern: "*.idXML" + pattern: "*.idparquet" - version: type: file description: File containing software version diff --git a/modules/local/utils/msrescore_fine_tuning/meta.yml b/modules/local/utils/msrescore_fine_tuning/meta.yml index dd98ac3f..72fb9a60 100644 --- a/modules/local/utils/msrescore_fine_tuning/meta.yml +++ b/modules/local/utils/msrescore_fine_tuning/meta.yml @@ -18,7 +18,7 @@ input: - idxml_file: type: file description: idXML identification file - pattern: "*.idXML" + pattern: "*.idparquet" - mzml: type: file description: spectrum data file @@ -30,7 +30,7 @@ output: - idxml: type: file description: idXML identification file after MS2 rescoring - pattern: "*.idXML" + pattern: "*.idparquet" - version: type: file description: File containing software version diff --git a/modules/local/utils/psm_clean/meta.yml b/modules/local/utils/psm_clean/meta.yml index 84b1505b..32fb1c40 100644 --- a/modules/local/utils/psm_clean/meta.yml +++ b/modules/local/utils/psm_clean/meta.yml @@ -13,7 +13,7 @@ input: - idxml_file: type: file description: idXML identification file - pattern: "*.idXML" + pattern: "*.idparquet" - mzml: type: file description: spectrum data file @@ -25,7 +25,7 @@ output: - idxml: type: file description: idXML identification file after postprocessing - pattern: "*.idXML" + pattern: "*.idparquet" - version: type: file description: File containing software version diff --git a/modules/local/utils/psm_conversion/meta.yml b/modules/local/utils/psm_conversion/meta.yml index b3b3317f..86b6171d 100644 --- a/modules/local/utils/psm_conversion/meta.yml +++ b/modules/local/utils/psm_conversion/meta.yml @@ -13,7 +13,7 @@ input: - idxml_file: type: file description: idXML identification file - pattern: "*.idXML" + pattern: "*.idparquet" - spectrum_df: type: file description: spectrum data file diff --git a/modules/local/utils/spectrum_features/main.nf b/modules/local/utils/spectrum_features/main.nf index baaa989b..b0ee0cc6 100644 --- a/modules/local/utils/spectrum_features/main.nf +++ b/modules/local/utils/spectrum_features/main.nf @@ -10,7 +10,7 @@ process SPECTRUM_FEATURES { tuple val(meta), path(id_file), val(search_engine), path(ms_file) output: - tuple val(meta), path("${id_file.baseName}_snr.idXML"), val(search_engine), emit: id_files_snr + tuple val(meta), path("${id_file.baseName}_snr.idparquet"), val(search_engine), emit: id_files_snr path "versions.yml", emit: versions path "*.log", emit: log @@ -21,8 +21,8 @@ process SPECTRUM_FEATURES { """ rescoring spectrum2feature \\ --mzml "${ms_file}" \\ - --idxml "${id_file}" \\ - --output "${id_file.baseName}_snr.idXML" \\ + --idparquet "${id_file}" \\ + --output "${id_file.baseName}_snr.idparquet" \\ $args \\ 2>&1 | tee "${id_file.baseName}_snr_feature.log" diff --git a/modules/local/utils/spectrum_features/meta.yml b/modules/local/utils/spectrum_features/meta.yml index 1c53ddf9..4cfb9b9d 100644 --- a/modules/local/utils/spectrum_features/meta.yml +++ b/modules/local/utils/spectrum_features/meta.yml @@ -22,7 +22,7 @@ input: type: file description: | Input idXML file containing the identifications. - pattern: "*.idXML" + pattern: "*.idparquet" output: - meta: type: map @@ -31,7 +31,7 @@ output: type: file description: | Output file in idXML format - pattern: "*.idXML" + pattern: "*.idparquet" - log: type: file description: log file diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index e22ac32d..8423d00d 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -172,7 +172,7 @@ workflow PEPTIDE_DATABASE_SEARCH { } else { ch_id_files_out = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) } - + } else if (params.psm_clean == true) { ch_id_files = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) PSM_CLEAN(ch_id_files.combine(ch_mzmls_search, by: 0)) @@ -185,4 +185,4 @@ workflow PEPTIDE_DATABASE_SEARCH { emit: ch_id_files_idx = ch_id_files_out versions = ch_versions -} \ No newline at end of file +} From 3f76281719dac8ea8b77a15afe1965b64534441f Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 6 Jun 2026 11:46:32 +0800 Subject: [PATCH 03/12] Update main.nf --- modules/local/openms/extractfeatures/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/openms/extractfeatures/main.nf b/modules/local/openms/extractfeatures/main.nf index f5ca9fa1..16340ee1 100644 --- a/modules/local/openms/extractfeatures/main.nf +++ b/modules/local/openms/extractfeatures/main.nf @@ -36,3 +36,4 @@ process EXTRACTPSMFEATURES { PSMFeatureExtractor: \$(PSMFeatureExtractor 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) END_VERSIONS """ +} From 35668812e268dbe1e1d8be2ffcbb1ba1be6b719d Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 6 Jun 2026 11:53:43 +0800 Subject: [PATCH 04/12] Update main.nf --- subworkflows/local/dda_id/main.nf | 59 ++----------------------------- 1 file changed, 3 insertions(+), 56 deletions(-) diff --git a/subworkflows/local/dda_id/main.nf b/subworkflows/local/dda_id/main.nf index 29daeeba..a145297c 100644 --- a/subworkflows/local/dda_id/main.nf +++ b/subworkflows/local/dda_id/main.nf @@ -1,9 +1,7 @@ // // MODULE: Local to the pipeline // -include { MSRESCORE_FEATURES } from '../../../modules/local/utils/msrescore_features/main' include { PERCOLATOR } from '../../../modules/local/openms/percolator/main' -include { EXTRACTPSMFEATURES } from '../../../modules/local/openms/extractfeatures/main' include { PSM_CONVERSION } from '../../../modules/local/utils/psm_conversion/main' include { PHOSPHO_SCORING } from '../phospho_scoring/main' @@ -41,62 +39,11 @@ workflow DDA_ID { // // SUBWORKFLOW: Rescoring // - if (params.search_engines.tokenize(",").unique().size() > 1) { - if (params.ms2features_enable == true) { - // Only add ms2_model_dir if it's actually set and not empty - // Handle cases where parameter might be empty string, null, boolean true, or whitespace - // When --ms2features_model_dir is passed with no value, Nextflow may set it to boolean true - if (params.ms2features_model_dir && params.ms2features_model_dir != true) { - ms2_model_dir = channel.from(file(params.ms2features_model_dir, checkIfExists: true)) - } else { - // create a fake channel when don't specify model dir - ms2_model_dir = channel.from(file("pretrained_models")) - } - ch_id_files_feats.groupTuple(size: params.search_engines.tokenize(",").unique().size()) - .combine(ch_mzmls_search, by: 0) - .combine(ms2_model_dir).set{ ch_id_rescoring } + PERCOLATOR(ch_id_files_feats) + ch_rescoring_results = PERCOLATOR.out.id_files_perc + ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions) - MSRESCORE_FEATURES(ch_id_rescoring) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_feats = MSRESCORE_FEATURES.out.idxml - ch_id_files_feats.map { v -> [v[0], v[1]] }.set {ch_perc_input} - - } else{ - EXTRACTPSMFEATURES(ch_id_files_feats.groupTuple(size: params.search_engines.tokenize(",").unique().size())) - ch_consensus_input = EXTRACTPSMFEATURES.out.id_files_feat - ch_perc_input = ch_consensus_input - } - PERCOLATOR(ch_perc_input) - ch_rescoring_results = PERCOLATOR.out.id_files_perc - ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions) - } else { - if (params.ms2features_enable == true) { - // Only add ms2_model_dir if it's actually set and not empty - // Handle cases where parameter might be empty string, null, boolean true, or whitespace - // When --ms2features_model_dir is passed with no value, Nextflow may set it to boolean true - if (params.ms2features_model_dir && params.ms2features_model_dir != true) { - ms2_model_dir = channel.from(file(params.ms2features_model_dir, checkIfExists: true)) - } else { - // create a fake channel when don't specify model dir - ms2_model_dir = channel.from(file("pretrained_models")) - } - - ch_id_files_feats.groupTuple(size: params.search_engines.tokenize(",").unique().size()) - .combine(ch_mzmls_search, by: 0) - .combine(ms2_model_dir).set{ ch_id_rescoring } - - MSRESCORE_FEATURES(ch_id_rescoring) - ch_software_versions = ch_software_versions.mix(MSRESCORE_FEATURES.out.versions) - ch_id_files_feats = MSRESCORE_FEATURES.out.idxml - ch_id_files_feats.map { v -> [v[0], v[1]] }.set {ch_perc_input} - } else { - ch_perc_input = ch_id_files_feats - } - PERCOLATOR(ch_perc_input) - ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.versions) - ch_rescoring_results = PERCOLATOR.out.id_files_perc - } PSM_FDR_CONTROL(ch_rescoring_results) ch_software_versions = ch_software_versions.mix(PSM_FDR_CONTROL.out.versions) From acf43d282ec0c1efa45adf2a508afd4244b886f2 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 6 Jun 2026 12:07:47 +0800 Subject: [PATCH 05/12] update --- modules/local/openms/id_mapper/main.nf | 4 ++-- modules/local/utils/msrescore_fine_tuning/main.nf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/local/openms/id_mapper/main.nf b/modules/local/openms/id_mapper/main.nf index 1c88cede..945a04d2 100644 --- a/modules/local/openms/id_mapper/main.nf +++ b/modules/local/openms/id_mapper/main.nf @@ -4,8 +4,8 @@ process ID_MAPPER { label 'openms' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" + 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:latest' : + 'ghcr.io/bigbio/openms-tools-thirdparty:latest' }" input: tuple val(meta), path(id_file), path(map_file) diff --git a/modules/local/utils/msrescore_fine_tuning/main.nf b/modules/local/utils/msrescore_fine_tuning/main.nf index aa7ae59b..f7e4fa7f 100644 --- a/modules/local/utils/msrescore_fine_tuning/main.nf +++ b/modules/local/utils/msrescore_fine_tuning/main.nf @@ -7,7 +7,7 @@ process MSRESCORE_FINE_TUNING { 'ghcr.io/bigbio/quantms-rescoring:0.0.16' }" input: - tuple val(meta), path(idxml), path(mzml), val(groupkey), path(ms2_model_dir) + tuple val(meta), path(idparquet), path(mzml), val(groupkey), path(ms2_model_dir) output: tuple val(groupkey), path("retained_ms2.pth") , emit: model_weight @@ -49,7 +49,7 @@ process MSRESCORE_FINE_TUNING { """ rescoring transfer_learning \\ - --idxml ./ \\ + --idparquet ${idparquet.join(' --idparquet ')} \\ --mzml ./ \\ --save_model_dir ./ \\ --ms2_tolerance $ms2_tolerance \\ From 3d233db72e9850ecefd660f62a77f7599c612f73 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 6 Jun 2026 12:32:18 +0800 Subject: [PATCH 06/12] bump --- modules/local/utils/msrescore_features/main.nf | 2 +- modules/local/utils/msrescore_fine_tuning/main.nf | 4 ++-- modules/local/utils/psm_clean/main.nf | 4 ++-- modules/local/utils/spectrum_features/main.nf | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/local/utils/msrescore_features/main.nf b/modules/local/utils/msrescore_features/main.nf index 79b4244a..2e96d952 100644 --- a/modules/local/utils/msrescore_features/main.nf +++ b/modules/local/utils/msrescore_features/main.nf @@ -3,7 +3,7 @@ process MSRESCORE_FEATURES { label 'process_medium' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.16' : + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.17' : 'ghcr.io/bigbio/quantms-rescoring:0.0.17' }" input: diff --git a/modules/local/utils/msrescore_fine_tuning/main.nf b/modules/local/utils/msrescore_fine_tuning/main.nf index f7e4fa7f..36db5182 100644 --- a/modules/local/utils/msrescore_fine_tuning/main.nf +++ b/modules/local/utils/msrescore_fine_tuning/main.nf @@ -3,8 +3,8 @@ process MSRESCORE_FINE_TUNING { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.16' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.16' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.17' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.17' }" input: tuple val(meta), path(idparquet), path(mzml), val(groupkey), path(ms2_model_dir) diff --git a/modules/local/utils/psm_clean/main.nf b/modules/local/utils/psm_clean/main.nf index 201a85b4..cd7a9892 100644 --- a/modules/local/utils/psm_clean/main.nf +++ b/modules/local/utils/psm_clean/main.nf @@ -3,8 +3,8 @@ process PSM_CLEAN { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.16' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.16' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.17' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.17' }" input: tuple val(meta), path(idparquet), path(mzml) diff --git a/modules/local/utils/spectrum_features/main.nf b/modules/local/utils/spectrum_features/main.nf index b0ee0cc6..dad64be0 100644 --- a/modules/local/utils/spectrum_features/main.nf +++ b/modules/local/utils/spectrum_features/main.nf @@ -3,8 +3,8 @@ process SPECTRUM_FEATURES { label 'process_low' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.16' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.16' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.17' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.17' }" input: tuple val(meta), path(id_file), val(search_engine), path(ms_file) From d451d980b0c3e697b5a05031c6e14bbde275c5cc Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 6 Jun 2026 15:29:34 +0800 Subject: [PATCH 07/12] update --- conf/modules/id.config | 5 ----- modules/local/utils/spectrum_features/main.nf | 4 ++-- subworkflows/local/phospho_scoring/main.nf | 11 ++--------- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/conf/modules/id.config b/conf/modules/id.config index 8e26888c..96670030 100644 --- a/conf/modules/id.config +++ b/conf/modules/id.config @@ -6,11 +6,6 @@ process { - // FDRCONSENSUSID - withName: '.*:FDR_CONSENSUSID' { - ext.args = "-PSM true -protein false" - } - // ID_SCORE_SWITCHER for phospho scoring withName: '.*:ID:PHOSPHO_SCORING:ID_SCORE_SWITCHER' { ext.args = [ diff --git a/modules/local/utils/spectrum_features/main.nf b/modules/local/utils/spectrum_features/main.nf index dad64be0..101b8f7c 100644 --- a/modules/local/utils/spectrum_features/main.nf +++ b/modules/local/utils/spectrum_features/main.nf @@ -7,10 +7,10 @@ process SPECTRUM_FEATURES { 'ghcr.io/bigbio/quantms-rescoring:0.0.17' }" input: - tuple val(meta), path(id_file), val(search_engine), path(ms_file) + tuple val(meta), path(id_file), path(ms_file) output: - tuple val(meta), path("${id_file.baseName}_snr.idparquet"), val(search_engine), emit: id_files_snr + tuple val(meta), path("${id_file.baseName}_snr.idparquet"), emit: id_files_snr path "versions.yml", emit: versions path "*.log", emit: log diff --git a/subworkflows/local/phospho_scoring/main.nf b/subworkflows/local/phospho_scoring/main.nf index 13d5a80c..a233c33c 100644 --- a/subworkflows/local/phospho_scoring/main.nf +++ b/subworkflows/local/phospho_scoring/main.nf @@ -12,16 +12,9 @@ workflow PHOSPHO_SCORING { main: ch_version = channel.empty() - if (params.search_engines.split(",").size() != 1){ - ID_SCORE_SWITCHER(ch_id_files.combine(channel.value("\"Posterior Error Probability_score\""))) - ch_version = ch_version.mix(ID_SCORE_SWITCHER.out.versions) - ONSITE(ch_mzml_files.join(ID_SCORE_SWITCHER.out.id_score_switcher)) - ch_version = ch_version.mix(ONSITE.out.versions) - } else{ - ONSITE(ch_mzml_files.join(ch_id_files)) - ch_version = ch_version.mix(ONSITE.out.versions) - } + ONSITE(ch_mzml_files.join(ch_id_files)) + ch_version = ch_version.mix(ONSITE.out.versions) emit: From e2839a8fc9303ffab64fb01f11e9c3256d73f5aa Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:46:10 +0800 Subject: [PATCH 08/12] update TMT --- conf/modules/shared.config | 2 +- .../local/openms/id_conflict_resolver/main.nf | 33 ------- .../openms/id_conflict_resolver/meta.yml | 31 ------- modules/local/openms/id_mapper/main.nf | 36 -------- modules/local/openms/id_mapper/meta.yml | 40 --------- .../local/openms/isobaric_analyzer/main.nf | 79 ----------------- .../local/openms/isobaric_analyzer/meta.yml | 37 -------- .../local/openms/isobaric_workflow/main.nf | 88 +++++++++++++++++++ .../local/openms/isobaric_workflow/meta.yml | 42 +++++++++ .../openms/protein_inference_epifany/main.nf | 39 -------- .../openms/protein_inference_epifany/meta.yml | 34 ------- .../openms/protein_inference_generic/main.nf | 44 ---------- .../openms/protein_inference_generic/meta.yml | 34 ------- subworkflows/local/feature_mapper/main.nf | 26 ------ subworkflows/local/feature_mapper/meta.yml | 29 ------ subworkflows/local/protein_inference/main.nf | 40 --------- subworkflows/local/protein_inference/meta.yml | 30 ------- subworkflows/local/protein_quant/main.nf | 30 ------- subworkflows/local/protein_quant/meta.yml | 30 ------- workflows/tmt.nf | 46 +++++----- 20 files changed, 153 insertions(+), 617 deletions(-) delete mode 100644 modules/local/openms/id_conflict_resolver/main.nf delete mode 100644 modules/local/openms/id_conflict_resolver/meta.yml delete mode 100644 modules/local/openms/id_mapper/main.nf delete mode 100644 modules/local/openms/id_mapper/meta.yml delete mode 100644 modules/local/openms/isobaric_analyzer/main.nf delete mode 100644 modules/local/openms/isobaric_analyzer/meta.yml create mode 100644 modules/local/openms/isobaric_workflow/main.nf create mode 100644 modules/local/openms/isobaric_workflow/meta.yml delete mode 100644 modules/local/openms/protein_inference_epifany/main.nf delete mode 100644 modules/local/openms/protein_inference_epifany/meta.yml delete mode 100644 modules/local/openms/protein_inference_generic/main.nf delete mode 100644 modules/local/openms/protein_inference_generic/meta.yml delete mode 100644 subworkflows/local/feature_mapper/main.nf delete mode 100644 subworkflows/local/feature_mapper/meta.yml delete mode 100644 subworkflows/local/protein_inference/main.nf delete mode 100644 subworkflows/local/protein_inference/meta.yml delete mode 100644 subworkflows/local/protein_quant/main.nf delete mode 100644 subworkflows/local/protein_quant/meta.yml diff --git a/conf/modules/shared.config b/conf/modules/shared.config index c796635e..1a9ee7c4 100644 --- a/conf/modules/shared.config +++ b/conf/modules/shared.config @@ -34,7 +34,7 @@ process { } // Result tables from multiple pipelines including LFQ, TMT, DDA - withName: '.*:PROTEOMICSLFQ|PROTEIN_QUANTIFIER|MSSTATS_CONVERTER' { + withName: '.*:PROTEOMICSLFQ|PROTEIN_QUANTIFIER|MSSTATS_CONVERTER|ISOBARIC_WORKFLOW' { publishDir = [ path: { "${params.outdir}/quant_tables" }, mode: 'copy', diff --git a/modules/local/openms/id_conflict_resolver/main.nf b/modules/local/openms/id_conflict_resolver/main.nf deleted file mode 100644 index d9b135af..00000000 --- a/modules/local/openms/id_conflict_resolver/main.nf +++ /dev/null @@ -1,33 +0,0 @@ -process ID_CONFLICT_RESOLVER { - label 'process_low' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" - - input: - path consus_file - - output: - path "${consus_file.baseName}_resconf.consensusXML", emit: pro_resconf - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - - """ - IDConflictResolver \\ - -in ${consus_file} \\ - -threads $task.cpus \\ - -out ${consus_file.baseName}_resconf.consensusXML \\ - $args \\ - 2>&1 | tee ${consus_file.baseName}_resconf.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - IDConflictResolver: \$(IDConflictResolver 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/id_conflict_resolver/meta.yml b/modules/local/openms/id_conflict_resolver/meta.yml deleted file mode 100644 index 62db48b4..00000000 --- a/modules/local/openms/id_conflict_resolver/meta.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: id_conflict_resolver -description: Resolves ambiguous annotations of features with peptide identifications. -keywords: - - ambiguous - - OpenMS -tools: - - IDConflictResolver: - description: | - Resolves ambiguous annotations of features with peptide identifications. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_IDConflictResolver.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_IDConflictResolver.html -input: - - consus_file: - type: file - description: | - Input file (data annotated with identifications) - pattern: "*.{featureXML,consensusXML}" -output: - - pro_resconf: - type: file - description: Output file (data with one peptide identification per feature) - - log: - type: file - description: log file - pattern: "*.log" - - version: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@daichengxin" diff --git a/modules/local/openms/id_mapper/main.nf b/modules/local/openms/id_mapper/main.nf deleted file mode 100644 index 945a04d2..00000000 --- a/modules/local/openms/id_mapper/main.nf +++ /dev/null @@ -1,36 +0,0 @@ -process ID_MAPPER { - tag "$meta.mzml_id" - label 'process_medium' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:latest' : - 'ghcr.io/bigbio/openms-tools-thirdparty:latest' }" - - input: - tuple val(meta), path(id_file), path(map_file) - - output: - path "${id_file.baseName}_map.consensusXML", emit: id_map - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.mzml_id}" - - """ - IDMapper \\ - -id ${id_file} \\ - -in ${map_file} \\ - -threads $task.cpus \\ - -out ${id_file.baseName}_map.consensusXML \\ - $args \\ - 2>&1 | tee ${id_file.baseName}_map.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - IDMapper: \$(IDMapper 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/id_mapper/meta.yml b/modules/local/openms/id_mapper/meta.yml deleted file mode 100644 index 67e9ed6d..00000000 --- a/modules/local/openms/id_mapper/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: id_mapper -description: Assigns protein/peptide identifications to features or consensus features. -keywords: - - feature - - identification - - OpenMS -tools: - - IDMapper: - description: | - Assigns protein/peptide identifications to features or consensus features. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_IDMapper.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_IDMapper.html -input: - - meta: - type: map - description: Groovy Map containing sample information - - id_file: - type: file - description: | - Identifications file - pattern: "*.{idXML, mzid}" - - map_file: - type: file - description: Feature map/consensus map file - pattern: "*.{featureXML, consensusXML, mzq}" -output: - - id_map: - type: file - description: Output file - pattern: "*.{featureXML, consensusXML, mzq}" - - log: - type: file - description: log file - pattern: "*.log" - - version: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@daichengxin" diff --git a/modules/local/openms/isobaric_analyzer/main.nf b/modules/local/openms/isobaric_analyzer/main.nf deleted file mode 100644 index 6db8e587..00000000 --- a/modules/local/openms/isobaric_analyzer/main.nf +++ /dev/null @@ -1,79 +0,0 @@ -process ISOBARIC_ANALYZER { - tag "$meta.mzml_id" - label 'process_medium' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" - - input: - tuple val(meta), path(mzml_file) - - output: - tuple val(meta), path("${mzml_file.baseName}_iso.consensusXML"), emit: id_files_consensusXML - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.mzml_id}" - - if (params.quant_activation_method == "HCD" || params.quant_activation_method == "HCID") diss_meth = "auto" - else if (params.quant_activation_method == "CID") diss_meth = "Collision-induced dissociation" - else if (params.quant_activation_method == "ETD") diss_meth = "Electron transfer dissociation" - else if (params.quant_activation_method == "ECD") diss_meth = "Electron capture dissociation" - - def iso_normalization = params.iso_normalization ? "-quantification:normalization" : "" - def isotope_correction = params.isotope_correction ? "-quantification:isotope_correction true" : "-quantification:isotope_correction false" - - // Check for isotope correction and load the correction matrix - if (params.isotope_correction) { - if (params.plex_corr_matrix_file == null) { - error("plex_corr_matrix_file is required when isotope_correction is enabled") - } - - // Read the matrix file and format it into the command-line format - // Read the matrix file, skipping lines that start with '#' and process the matrix - def matrix_lines = new File(params.plex_corr_matrix_file).readLines() - .findAll { line -> !line.startsWith('#') && line.trim() } // Skip lines starting with '#' and empty lines - .drop(1) // Assuming the first non-comment line is a header - .collect { line -> - def values = line.split('/') - // Handle different labelling types - if (meta.labelling_type == 'tmt18plex' || meta.labelling_type == 'tmt16plex') { - return "\"${values[1]}/${values[2]}/${values[3]}/${values[4]}/${values[5]}/${values[6]}/${values[7]}/${values[8]}\"" - } else { - return "\"${values[1]}/${values[2]}/${values[3]}/${values[4]}\"" - } - } - - // Join the matrix lines into a format for the C++ tool - def correction_matrix = matrix_lines.join(" ") - - isotope_correction += " -${meta.labelling_type}:correction_matrix ${correction_matrix}" - } - - """ - IsobaricAnalyzer \\ - -type ${meta.labelling_type} \\ - -in ${mzml_file} \\ - -threads ${task.cpus} \\ - -extraction:select_activation "${diss_meth}" \\ - -extraction:reporter_mass_shift ${params.reporter_mass_shift} \\ - -extraction:min_reporter_intensity ${params.min_reporter_intensity} \\ - -extraction:min_precursor_purity ${params.min_precursor_purity} \\ - -extraction:precursor_isotope_deviation ${params.precursor_isotope_deviation} \\ - ${iso_normalization} \\ - -${meta.labelling_type}:reference_channel ${params.reference_channel} \\ - ${isotope_correction} \\ - -out ${mzml_file.baseName}_iso.consensusXML \\ - ${args} \\ - 2>&1 | tee ${mzml_file.baseName}_isob.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - IsobaricAnalyzer: \$(IsobaricAnalyzer --version 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/isobaric_analyzer/meta.yml b/modules/local/openms/isobaric_analyzer/meta.yml deleted file mode 100644 index faf4c4ae..00000000 --- a/modules/local/openms/isobaric_analyzer/meta.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: isobaric_analyzer -description: Extracts and normalizes isobaric labeling information from an LC-MS/MS experiment. -keywords: - - peak - - OpenMS -tools: - - IsobaricAnalyzer: - description: | - Extracts and normalizes isobaric labeling information from an LC-MS/MS experiment. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_IsobaricAnalyzer.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_IsobaricAnalyzer.html -input: - - meta: - type: map - description: Groovy Map containing sample information - - mzml_file: - type: file - description: Input profile data file. - pattern: "*.mzML" -output: - - meta: - type: map - description: Groovy Map containing sample information - - iso_consensusXML: - type: file - description: Output consensusXML file with quantitative information - pattern: "*.consensusXML" - - log: - type: file - description: log file - pattern: "*.log" - - version: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@daichengxin" diff --git a/modules/local/openms/isobaric_workflow/main.nf b/modules/local/openms/isobaric_workflow/main.nf new file mode 100644 index 00000000..4cf2d1af --- /dev/null +++ b/modules/local/openms/isobaric_workflow/main.nf @@ -0,0 +1,88 @@ +process ISOBARIC_WORKFLOW { + tag "${expdes.baseName}" + label 'process_high' + label 'openms' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:latest' : + 'ghcr.io/bigbio/openms-tools-thirdparty:latest' }" + + input: + val(labelling_type) + path(mzmls) + path(id_files) + path(expdes) + + output: + path "${expdes.baseName}_openms.mzTab", emit: out_mztab + path "${expdes.baseName}_openms.consensusXML", emit: out_consensusXML + path "${expdes.baseName}_qpx", emit: out_qpx + path "*.log", emit: log + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '' + def extractBaseName = { filename -> + def name = filename.toString() + name = name.replaceAll(/\.mzML$/, '') + + if (name.endsWith('.idparquet')) { + name = name.replaceAll(/\.idparquet$/, '') + name = name.replaceAll(/_(comet|msgf|sage|consensus)(_perc)?(_filter)?(_fdr)?$/, '') + } + return name + } + + def mzml_sorted = mzmls.collect().sort{ a, b -> + extractBaseName(a.name) <=> extractBaseName(b.name) + } + def id_sorted = id_files.collect().sort{ a, b -> + extractBaseName(a.name) <=> extractBaseName(b.name) + } + + // Build isotope correction matrix argument if enabled + def isotope_correction = "" + if (params.isotope_correction && params.plex_corr_matrix_file != null) { + def matrix_lines = new File(params.plex_corr_matrix_file).readLines() + .findAll { !it.startsWith('#') && it.trim() } + .drop(1) + .collect { line -> + def values = line.split('/') + if (labelling_type == 'tmt18plex' || labelling_type == 'tmt16plex') { + return "\"${values[1]}/${values[2]}/${values[3]}/${values[4]}/${values[5]}/${values[6]}/${values[7]}/${values[8]}\"" + } else { + return "\"${values[1]}/${values[2]}/${values[3]}/${values[4]}\"" + } + } + def correction_matrix = matrix_lines.join(" ") + isotope_correction = "-quantification:isotope_correction true -${labelling_type}:correction_matrix ${correction_matrix}" + } + + """ + IsobaricWorkflow \\ + -threads ${task.cpus} \\ + -in ${mzml_sorted.join(' ')} \\ + -in_id ${id_sorted.join(' ')} \\ + -exp_design ${expdes} \\ + -type ${labelling_type} \\ + -inference_method ${params.protein_inference_method} \\ + -protein_quantification ${params.protein_quant} \\ + -psmFDR ${params.psm_level_fdr_cutoff} \\ + -proteinFDR ${params.protein_level_fdr_cutoff} \\ + -picked_fdr ${params.picked_fdr} \\ + -picked_decoy_string ${params.decoy_string} \\ + -extraction:min_precursor_purity ${params.min_precursor_purity} \\ + -extraction:precursor_isotope_deviation ${params.precursor_isotope_deviation} \\ + -extraction:min_reporter_intensity ${params.min_reporter_intensity} \\ + ${isotope_correction} \\ + -out ${expdes.baseName}_openms.consensusXML \\ + -out_mzTab ${expdes.baseName}_openms.mzTab \\ + -out_qpx ${expdes.baseName}_qpx \\ + $args \\ + 2>&1 | tee isobaricworkflow.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + IsobaricWorkflow: \$(IsobaricWorkflow 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/local/openms/isobaric_workflow/meta.yml b/modules/local/openms/isobaric_workflow/meta.yml new file mode 100644 index 00000000..45971764 --- /dev/null +++ b/modules/local/openms/isobaric_workflow/meta.yml @@ -0,0 +1,42 @@ +name: isobaric_workflow +description: Extracts and normalizes isobaric labeling information from an LC-MS/MS experiment. +keywords: + - OpenMS + - quantification +tools: + - IsobaricWorkflow: + description: | + Extracts and normalizes isobaric labeling information from an LC-MS/MS experiment. + homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_IsobaricWorkflow.html + documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/release/latest/html/TOPP_IsobaricWorkflow.html + - mzmls: + type: file + description: Input Spectra in mzML format + pattern: "*.mzML" + - id_files: + type: file + description: Identifications in idparquet or mzIdentML format with posterior error probabilities as score type. + pattern: "*.idparquet" + - expdes: + type: file + description: An experimental design file + pattern: "*.tsv" +output: + - out_mztab: + type: file + description: mzTab file with analysis results + pattern: "*.mzTab" + - out_consensusXML: + type: file + description: ConsensusXML file for visualization and further processing in OpenMS. + pattern: "*.consensusXML" + - log: + type: file + description: log file + pattern: "*.log" + - version: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@MaLLLiYA" diff --git a/modules/local/openms/protein_inference_epifany/main.nf b/modules/local/openms/protein_inference_epifany/main.nf deleted file mode 100644 index ddafba50..00000000 --- a/modules/local/openms/protein_inference_epifany/main.nf +++ /dev/null @@ -1,39 +0,0 @@ -process PROTEIN_INFERENCE_EPIFANY { - label 'process_medium' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" - - input: - tuple val(meta), path(consus_file) - - output: - tuple val(meta), path("${consus_file.baseName}_epi.consensusXML"), emit: epi_inference - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - gg = params.protein_quant == 'shared_peptides' ? 'remove_proteins_wo_evidence' : 'none' - - """ - Epifany \\ - -in ${consus_file} \\ - -protein_fdr true \\ - -threads $task.cpus \\ - -algorithm:keep_best_PSM_only $params.keep_best_PSM_only \\ - -algorithm:update_PSM_probabilities $params.update_PSM_probabilities \\ - -greedy_group_resolution $gg \\ - -algorithm:top_PSMs $params.top_PSMs \\ - -out ${consus_file.baseName}_epi.consensusXML \\ - $args \\ - 2>&1 | tee ${consus_file.baseName}_inference.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - Epifany: \$(Epifany 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/protein_inference_epifany/meta.yml b/modules/local/openms/protein_inference_epifany/meta.yml deleted file mode 100644 index 23d09813..00000000 --- a/modules/local/openms/protein_inference_epifany/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: protein_inference_epifany -description: Runs a Bayesian protein inference. -keywords: - - Bayesian - - inference - - OpenMS -tools: - - Epifany: - description: | - It is a protein inference engine based on a Bayesian network. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/UTILS_Epifany.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/UTILS_Epifany.html -input: - - consus_file: - type: file - description: | - identification results. - pattern: "*.{idXML,consensusXML}" -output: - - epi_inference: - type: file - description: | - identification results with scored/grouped proteins. - pattern: "*.{idXML,consensusXML}" - - log: - type: file - description: log file - pattern: "*.log" - - version: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@daichengxin" diff --git a/modules/local/openms/protein_inference_generic/main.nf b/modules/local/openms/protein_inference_generic/main.nf deleted file mode 100644 index 8222fb63..00000000 --- a/modules/local/openms/protein_inference_generic/main.nf +++ /dev/null @@ -1,44 +0,0 @@ -process PROTEIN_INFERENCE_GENERIC { - label 'process_medium' - label 'openms' - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/openms-tools-thirdparty-sif:2025.04.14' : - 'ghcr.io/bigbio/openms-tools-thirdparty:2025.04.14' }" - - input: - tuple val(meta), path(consus_file) - - output: - tuple val(meta), path("${consus_file.baseName}_epi.consensusXML"), emit: protein_inference - path "versions.yml", emit: versions - path "*.log", emit: log - - script: - def args = task.ext.args ?: '' - gg = params.protein_quant == 'shared_peptides' ? '-Algorithm:greedy_group_resolution' : '' - groups = params.protein_quant == 'strictly_unique_peptides' ? 'false' : 'true' - - """ - ProteinInference \\ - -in ${consus_file} \\ - -threads $task.cpus \\ - -picked_fdr $params.picked_fdr \\ - -picked_decoy_string $params.decoy_string \\ - -protein_fdr true \\ - -Algorithm:use_shared_peptides $params.use_shared_peptides \\ - -Algorithm:annotate_indistinguishable_groups $groups \\ - -Algorithm:score_type "PEP" \\ - $gg \\ - -Algorithm:score_aggregation_method $params.protein_score \\ - -Algorithm:min_peptides_per_protein $params.min_peptides_per_protein \\ - -out ${consus_file.baseName}_epi.consensusXML \\ - $args \\ - 2>&1 | tee ${consus_file.baseName}_inference.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ProteinInference: \$(ProteinInference 2>&1 | grep -E '^Version(.*) ' | sed 's/Version: //g' | cut -d ' ' -f 1) - END_VERSIONS - """ -} diff --git a/modules/local/openms/protein_inference_generic/meta.yml b/modules/local/openms/protein_inference_generic/meta.yml deleted file mode 100644 index 98347934..00000000 --- a/modules/local/openms/protein_inference_generic/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: protein_inference_generic -description: Computes a protein identification score based on an aggregation of scores of identified peptides. -keywords: - - protein - - inference - - OpenMS -tools: - - ProteinInference: - description: | - Computes a protein identification score based on an aggregation of scores of identified peptides. - homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_ProteinInference.html - documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_ProteinInference.html -input: - - consus_file: - type: file - description: | - identification results. - pattern: "*.{idXML,consensusXML}" -output: - - protein_inference: - type: file - description: | - identification results with scored/grouped proteins. - pattern: "*.consensusXML" - - log: - type: file - description: log file - pattern: "*.log" - - version: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@daichengxin" diff --git a/subworkflows/local/feature_mapper/main.nf b/subworkflows/local/feature_mapper/main.nf deleted file mode 100644 index 308293da..00000000 --- a/subworkflows/local/feature_mapper/main.nf +++ /dev/null @@ -1,26 +0,0 @@ -// -// Assigns protein/peptide identifications to features or consensus features. -// - -include { ISOBARIC_ANALYZER } from '../../../modules/local/openms/isobaric_analyzer/main' -include { ID_MAPPER } from '../../../modules/local/openms/id_mapper/main' - -workflow FEATURE_MAPPER { - take: - ch_mzml_files - ch_id_files - - main: - ch_version = channel.empty() - - ISOBARIC_ANALYZER(ch_mzml_files) - ch_version = ch_version.mix(ISOBARIC_ANALYZER.out.versions) - - ID_MAPPER(ch_id_files.combine(ISOBARIC_ANALYZER.out.id_files_consensusXML, by: 0)) - ch_version = ch_version.mix(ID_MAPPER.out.versions) - - emit: - id_map = ID_MAPPER.out.id_map - - versions = ch_version -} diff --git a/subworkflows/local/feature_mapper/meta.yml b/subworkflows/local/feature_mapper/meta.yml deleted file mode 100644 index e2c40d97..00000000 --- a/subworkflows/local/feature_mapper/meta.yml +++ /dev/null @@ -1,29 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "feature_mapper" -description: Subworkflow for mapping features across different runs and conditions -keywords: - - feature - - mapping - - alignment - - proteomics -components: - - isobaric/analyzer - - id/mapper -input: - - ch_input: - type: file - description: | - Channel containing input files for feature mapping -output: - - ch_mapped_features: - type: file - description: | - Channel containing mapped features - - versions: - type: file - description: | - Software versions used in this subworkflow -authors: - - "@bigbio" -maintainers: - - "@bigbio" diff --git a/subworkflows/local/protein_inference/main.nf b/subworkflows/local/protein_inference/main.nf deleted file mode 100644 index 96381089..00000000 --- a/subworkflows/local/protein_inference/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -// -// ProteinInference -// - -include { PROTEIN_INFERENCE_EPIFANY } from '../../../modules/local/openms/protein_inference_epifany/main' -include { PROTEIN_INFERENCE_GENERIC } from '../../../modules/local/openms/protein_inference_generic/main' -include { ID_FILTER } from '../../../modules/local/openms/id_filter/main' - -workflow PROTEIN_INFERENCE { - take: - ch_consus_file - - main: - ch_version = channel.empty() - - if (params.protein_inference_method == "bayesian") { - PROTEIN_INFERENCE_EPIFANY(ch_consus_file) - ch_version = ch_version.mix(PROTEIN_INFERENCE_EPIFANY.out.versions) - ch_inference = PROTEIN_INFERENCE_EPIFANY.out.epi_inference - } else { - PROTEIN_INFERENCE_GENERIC(ch_consus_file) - ch_version = ch_version.mix(PROTEIN_INFERENCE_GENERIC.out.versions) - ch_inference = PROTEIN_INFERENCE_GENERIC.out.protein_inference - } - - ID_FILTER(ch_inference.combine(channel.value("-score:type_protein q-value"))) - ch_version = ch_version.mix(ID_FILTER.out.versions) - ID_FILTER.out.id_filtered - .multiMap{ it -> - meta: it[0] - results: it[1] - } - .set{ ch_epi_results } - - emit: - epi_idfilter = ch_epi_results.results - - versions = ch_version - -} diff --git a/subworkflows/local/protein_inference/meta.yml b/subworkflows/local/protein_inference/meta.yml deleted file mode 100644 index 7bab69e8..00000000 --- a/subworkflows/local/protein_inference/meta.yml +++ /dev/null @@ -1,30 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "protein_inference" -description: Subworkflow for protein inference from peptide identifications -keywords: - - protein - - inference - - proteomics - - peptides -components: - - protein/inference/epifany - - protein/inference/generic - - id/filter -input: - - ch_input: - type: file - description: | - Channel containing input files for protein inference -output: - - ch_protein_results: - type: file - description: | - Channel containing protein inference results - - versions: - type: file - description: | - Software versions used in this subworkflow -authors: - - "@bigbio" -maintainers: - - "@bigbio" diff --git a/subworkflows/local/protein_quant/main.nf b/subworkflows/local/protein_quant/main.nf deleted file mode 100644 index 6c9f295b..00000000 --- a/subworkflows/local/protein_quant/main.nf +++ /dev/null @@ -1,30 +0,0 @@ -// -// ProteinQuant -// - -include { ID_CONFLICT_RESOLVER as ID_CONFLICT_RESOLVER } from '../../../modules/local/openms/id_conflict_resolver/main' -include { PROTEIN_QUANTIFIER as PROTEIN_QUANTIFIER } from '../../../modules/local/openms/protein_quantifier/main' -include { MSSTATS_CONVERTER as MSSTATS_CONVERTER } from '../../../modules/local/openms/msstats_converter/main' - -workflow PROTEIN_QUANT { - take: - ch_conflict_file - ch_expdesign_file - - main: - ch_version = channel.empty() - - ID_CONFLICT_RESOLVER(ch_conflict_file) - ch_version = ch_version.mix(ID_CONFLICT_RESOLVER.out.versions) - - PROTEIN_QUANTIFIER(ID_CONFLICT_RESOLVER.out.pro_resconf, ch_expdesign_file) - ch_version = ch_version.mix(PROTEIN_QUANTIFIER.out.versions) - - MSSTATS_CONVERTER(ID_CONFLICT_RESOLVER.out.pro_resconf, ch_expdesign_file, "ISO") - ch_version = ch_version.mix(MSSTATS_CONVERTER.out.versions) - - emit: - msstats_csv = MSSTATS_CONVERTER.out.out_msstats - out_mztab = PROTEIN_QUANTIFIER.out.out_mztab - versions = ch_version -} diff --git a/subworkflows/local/protein_quant/meta.yml b/subworkflows/local/protein_quant/meta.yml deleted file mode 100644 index 71ae589f..00000000 --- a/subworkflows/local/protein_quant/meta.yml +++ /dev/null @@ -1,30 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "protein_quant" -description: Subworkflow for protein quantification from MS data -keywords: - - protein - - quantification - - proteomics - - ms -components: - - id/conflict/resolver - - protein/quantifier - - msstats/converter -input: - - ch_input: - type: file - description: | - Channel containing input files for protein quantification -output: - - ch_quant_results: - type: file - description: | - Channel containing protein quantification results - - versions: - type: file - description: | - Software versions used in this subworkflow -authors: - - "@bigbio" -maintainers: - - "@bigbio" diff --git a/workflows/tmt.nf b/workflows/tmt.nf index 6a882cef..a8b9f0d6 100644 --- a/workflows/tmt.nf +++ b/workflows/tmt.nf @@ -7,14 +7,12 @@ // // MODULES: Local to the pipeline // -include { FILE_MERGE } from '../modules/local/openms/file_merge/main' +include { ISOBARIC_WORKFLOW } from '../modules/local/openms/isobaric_workflow/main' +include { MSSTATS_CONVERTER } from '../modules/local/openms/msstats_converter/main' // // SUBWORKFLOWS: Consisting of a mix of local and nf-core/modules // -include { FEATURE_MAPPER } from '../subworkflows/local/feature_mapper/main' -include { PROTEIN_INFERENCE } from '../subworkflows/local/protein_inference/main' -include { PROTEIN_QUANT } from '../subworkflows/local/protein_quant/main' include { ID } from '../subworkflows/local/id/main' /* @@ -40,28 +38,28 @@ workflow TMT { ch_software_versions = ch_software_versions.mix(ID.out.versions) // - // SUBWORKFLOW: FEATUREMAPPER + // SUBWORKFLOW: ISOBARIC_WORKFLOW // - FEATURE_MAPPER(ch_file_preparation_results, ID.out.id_results) - ch_software_versions = ch_software_versions.mix(FEATURE_MAPPER.out.versions) + // Extract labelling_type from meta (auto-detected from SDRF) + ch_file_preparation_results.join(ID.out.id_results) + .multiMap { it -> + labelling_type: it[0].labelling_type + mzmls: it[1] + ids: it[2] + } + .set{ ch_iso_workflow } + ISOBARIC_WORKFLOW(ch_iso_workflow.labelling_type.first(), + ch_iso_workflow.mzmls.collect(), + ch_iso_workflow.ids.collect(), + ch_expdesign + ) + ch_software_versions = ch_software_versions.mix(ISOBARIC_WORKFLOW.out.versions) // - // MODULE: FILEMERGE + // SUBWORKFLOW: MSSTATS_CONVERTER // - FILE_MERGE(FEATURE_MAPPER.out.id_map.collect()) - ch_software_versions = ch_software_versions.mix(FILE_MERGE.out.versions) - - // - // SUBWORKFLOW: PROTEININFERENCE - // - PROTEIN_INFERENCE(FILE_MERGE.out.id_merge) - ch_software_versions = ch_software_versions.mix(PROTEIN_INFERENCE.out.versions) - - // - // SUBWORKFLOW: PROTEINQUANT - // - PROTEIN_QUANT(PROTEIN_INFERENCE.out.epi_idfilter, ch_expdesign) - ch_software_versions = ch_software_versions.mix(PROTEIN_QUANT.out.versions) + MSSTATS_CONVERTER(ISOBARIC_WORKFLOW.out.out_consensusXML, ch_expdesign, "ISO") + ch_software_versions = ch_software_versions.mix(MSSTATS_CONVERTER.out.versions) ID.out.psmrescoring_results .map { it -> it[1] } @@ -74,7 +72,7 @@ workflow TMT { emit: ch_pmultiqc_ids = ch_pmultiqc_ids ch_pmultiqc_consensus = ch_pmultiqc_consensus - final_result = PROTEIN_QUANT.out.out_mztab - msstats_in = PROTEIN_QUANT.out.msstats_csv + final_result = ISOBARIC_WORKFLOW.out.out_mztab + msstats_in = MSSTATS_CONVERTER.out.out_msstats versions = ch_software_versions } From 2f6e1c5d77571b6f4900ca01c1a8fd12f87790a1 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:48:06 +0800 Subject: [PATCH 09/12] Update main.nf --- modules/local/openms/isobaric_workflow/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/openms/isobaric_workflow/main.nf b/modules/local/openms/isobaric_workflow/main.nf index 4cf2d1af..f335c7d0 100644 --- a/modules/local/openms/isobaric_workflow/main.nf +++ b/modules/local/openms/isobaric_workflow/main.nf @@ -85,4 +85,4 @@ process ISOBARIC_WORKFLOW { IsobaricWorkflow: \$(IsobaricWorkflow 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) END_VERSIONS """ -} \ No newline at end of file +} From 1edb72a00c693c31b9d81232b11703dd8cfb3eca Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:55:59 +0800 Subject: [PATCH 10/12] Update main.nf --- .../local/openms/isobaric_workflow/main.nf | 21 ++----------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/modules/local/openms/isobaric_workflow/main.nf b/modules/local/openms/isobaric_workflow/main.nf index f335c7d0..8a2e5418 100644 --- a/modules/local/openms/isobaric_workflow/main.nf +++ b/modules/local/openms/isobaric_workflow/main.nf @@ -22,23 +22,6 @@ process ISOBARIC_WORKFLOW { script: def args = task.ext.args ?: '' - def extractBaseName = { filename -> - def name = filename.toString() - name = name.replaceAll(/\.mzML$/, '') - - if (name.endsWith('.idparquet')) { - name = name.replaceAll(/\.idparquet$/, '') - name = name.replaceAll(/_(comet|msgf|sage|consensus)(_perc)?(_filter)?(_fdr)?$/, '') - } - return name - } - - def mzml_sorted = mzmls.collect().sort{ a, b -> - extractBaseName(a.name) <=> extractBaseName(b.name) - } - def id_sorted = id_files.collect().sort{ a, b -> - extractBaseName(a.name) <=> extractBaseName(b.name) - } // Build isotope correction matrix argument if enabled def isotope_correction = "" @@ -61,8 +44,8 @@ process ISOBARIC_WORKFLOW { """ IsobaricWorkflow \\ -threads ${task.cpus} \\ - -in ${mzml_sorted.join(' ')} \\ - -in_id ${id_sorted.join(' ')} \\ + -in ${mzmls.join(' ')} \\ + -in_id ${id_files.join(' ')} \\ -exp_design ${expdes} \\ -type ${labelling_type} \\ -inference_method ${params.protein_inference_method} \\ From 82d2f5d228af0779ca2d9984d5191140dbe34511 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Tue, 9 Jun 2026 13:13:08 +0800 Subject: [PATCH 11/12] update --- .../local/utils/msrescore_fine_tuning/main.nf | 14 ++--- modules/local/utils/psm_clean/main.nf | 6 +- .../local/peptide_database_search/main.nf | 56 +++++-------------- 3 files changed, 24 insertions(+), 52 deletions(-) diff --git a/modules/local/utils/msrescore_fine_tuning/main.nf b/modules/local/utils/msrescore_fine_tuning/main.nf index 36db5182..75a5b5a3 100644 --- a/modules/local/utils/msrescore_fine_tuning/main.nf +++ b/modules/local/utils/msrescore_fine_tuning/main.nf @@ -7,19 +7,19 @@ process MSRESCORE_FINE_TUNING { 'ghcr.io/bigbio/quantms-rescoring:0.0.17' }" input: - tuple val(meta), path(idparquet), path(mzml), val(groupkey), path(ms2_model_dir) + tuple val(meta), path(idparquet), path(mzml), path(ms2_model_dir) output: - tuple val(groupkey), path("retained_ms2.pth") , emit: model_weight - path "versions.yml" , emit: versions - path "*.log" , emit: log + path("retained_ms2.pth") , emit: model_weight + path "versions.yml" , emit: versions + path "*.log" , emit: log when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${groupkey}_fine_tuning" + def prefix = task.ext.prefix ?: "fine_tuning" // Initialize tolerance variables def ms2_tolerance = null @@ -50,7 +50,7 @@ process MSRESCORE_FINE_TUNING { """ rescoring transfer_learning \\ --idparquet ${idparquet.join(' --idparquet ')} \\ - --mzml ./ \\ + --mzml ${mzml.join(' --mzml ')} \\ --save_model_dir ./ \\ --ms2_tolerance $ms2_tolerance \\ --ms2_tolerance_unit $ms2_tolerance_unit \\ @@ -62,7 +62,7 @@ process MSRESCORE_FINE_TUNING { ${force_transfer_learning} \\ ${consider_modloss} \\ $args \\ - 2>&1 | tee ${groupkey}_fine_tuning.log + 2>&1 | tee fine_tuning.log cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/utils/psm_clean/main.nf b/modules/local/utils/psm_clean/main.nf index cd7a9892..a132e146 100644 --- a/modules/local/utils/psm_clean/main.nf +++ b/modules/local/utils/psm_clean/main.nf @@ -23,11 +23,11 @@ process PSM_CLEAN { """ rescoring psm_feature_clean \\ - --idparquet $idparquet \\ + --idparquet ${idparquet.join(' --idparquet ')} \\ --mzml $mzml \\ - --output ${idparquet.baseName}_clean.idparquet \\ + --output ${mzml.baseName}_clean.idparquet \\ $args \\ - 2>&1 | tee ${idparquet.baseName}_clean.log + 2>&1 | tee ${mzml.baseName}_clean.log cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index 8423d00d..fd94db2d 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -91,57 +91,29 @@ workflow PEPTIDE_DATABASE_SEARCH { } else { // Preparing train datasets and fine tuning MS2 model - sage_train_datasets = ch_id_sage + train_datasets = ch_id_sage.mix(ch_id_msgf).mix(ch_id_comet) .combine(ch_mzmls_search, by: 0) .toSortedList() .flatMap() .randomSample(params.fine_tuning_sample_run, 2025) - .combine(channel.value("sage")) .groupTuple(by: 3) - msgf_train_datasets = ch_id_msgf - .combine(ch_mzmls_search, by: 0) - .toSortedList() - .flatMap() - .randomSample(params.fine_tuning_sample_run, 2025) - .combine(channel.value("msgf")) - .groupTuple(by: 3) - - comet_train_datasets = ch_id_comet - .combine(ch_mzmls_search, by: 0) - .toSortedList() - .flatMap() - .randomSample(params.fine_tuning_sample_run, 2025) - .combine(channel.value("comet")) - .groupTuple(by: 3) - - sage_train_datasets.mix(msgf_train_datasets) - .mix(comet_train_datasets) - .combine(ms2_model_dir) - .set { train_datasets } - MSRESCORE_FINE_TUNING(train_datasets) + MSRESCORE_FINE_TUNING(train_datasets.combine(ms2_model_dir)) ch_versions = ch_versions.mix(MSRESCORE_FINE_TUNING.out.versions) - channel.value("msgf").combine(ch_id_msgf.combine(ch_mzmls_search, by: 0)) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - .map { v -> [v[1], v[2], v[3], v[4], v[0] ] } - .set { msgf_features_input } - - channel.value("sage").combine(ch_id_sage.combine(ch_mzmls_search, by: 0)) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - .map { v -> [v[1], v[2], v[3], v[4], v[0] ] } - .set { sage_features_input } - - channel.value("comet").combine(ch_id_comet.combine(ch_mzmls_search, by: 0)) - .combine(MSRESCORE_FINE_TUNING.out.model_weight, by:0) - .map { v -> [v[1], v[2], v[3], v[4], v[0] ] } - .set { comet_features_input } + if (params.search_engines.tokenize(",").unique().size() > 1) { + ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage).groupTuple(size: params.search_engines.tokenize(",").unique().size()) + .combine(ch_mzmls_search, by: 0) + .combine(MSRESCORE_FINE_TUNING.out.model_weight).set{ ch_id_rescoring } + } else { + ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage).combine(ch_mzmls_search, by: 0) + .combine(MSRESCORE_FINE_TUNING.out.model_weight).set{ ch_id_rescoring } + } - MSRESCORE_FEATURES(msgf_features_input.mix(sage_features_input).mix(comet_features_input)) + MSRESCORE_FEATURES(ch_id_rescoring) ch_versions = ch_versions.mix(MSRESCORE_FEATURES.out.versions) ch_id_files_feats = MSRESCORE_FEATURES.out.idparquet - } } else{ if (params.search_engines.tokenize(",").unique().size() > 1) { @@ -167,8 +139,8 @@ workflow PEPTIDE_DATABASE_SEARCH { } } else if (params.search_engines.tokenize(",").unique().size() > 1) { - EXTRACTPSMFEATURES(ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage).groupTuple(size: params.search_engines.tokenize(",").unique().size())) - ch_id_files_out = EXTRACTPSMFEATURES.out.id_files_feat + PSM_CLEAN(ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage).groupTuple(size: params.search_engines.tokenize(",").unique().size()).combine(ch_mzmls_search, by: 0)) + ch_id_files_out = PSM_CLEAN.out.idparquet } else { ch_id_files_out = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) } @@ -176,7 +148,7 @@ workflow PEPTIDE_DATABASE_SEARCH { } else if (params.psm_clean == true) { ch_id_files = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) PSM_CLEAN(ch_id_files.combine(ch_mzmls_search, by: 0)) - ch_id_files_out = PSM_CLEAN.out.idxml + ch_id_files_out = PSM_CLEAN.out.idparquet ch_versions = ch_versions.mix(PSM_CLEAN.out.versions) } else { ch_id_files_out = ch_id_msgf.mix(ch_id_comet).mix(ch_id_sage) From 5d1dc3d28b820bd34fb00b27dae8714d9f34969a Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:20:37 +0800 Subject: [PATCH 12/12] update --- modules/local/utils/msrescore_features/main.nf | 4 ++-- modules/local/utils/msrescore_fine_tuning/main.nf | 4 ++-- modules/local/utils/psm_clean/main.nf | 4 ++-- subworkflows/local/peptide_database_search/main.nf | 13 ++++++++++++- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/modules/local/utils/msrescore_features/main.nf b/modules/local/utils/msrescore_features/main.nf index 2e96d952..449f2fd6 100644 --- a/modules/local/utils/msrescore_features/main.nf +++ b/modules/local/utils/msrescore_features/main.nf @@ -3,8 +3,8 @@ process MSRESCORE_FEATURES { label 'process_medium' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.17' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.17' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.18' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.18' }" input: tuple val(meta), path(id_files), path(mzml), path(model_weight) diff --git a/modules/local/utils/msrescore_fine_tuning/main.nf b/modules/local/utils/msrescore_fine_tuning/main.nf index 75a5b5a3..23636619 100644 --- a/modules/local/utils/msrescore_fine_tuning/main.nf +++ b/modules/local/utils/msrescore_fine_tuning/main.nf @@ -3,8 +3,8 @@ process MSRESCORE_FINE_TUNING { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.17' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.17' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.18' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.18' }" input: tuple val(meta), path(idparquet), path(mzml), path(ms2_model_dir) diff --git a/modules/local/utils/psm_clean/main.nf b/modules/local/utils/psm_clean/main.nf index a132e146..650ffc90 100644 --- a/modules/local/utils/psm_clean/main.nf +++ b/modules/local/utils/psm_clean/main.nf @@ -3,8 +3,8 @@ process PSM_CLEAN { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.17' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.17' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.18' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.18' }" input: tuple val(meta), path(idparquet), path(mzml) diff --git a/subworkflows/local/peptide_database_search/main.nf b/subworkflows/local/peptide_database_search/main.nf index fd94db2d..96c8aec3 100644 --- a/subworkflows/local/peptide_database_search/main.nf +++ b/subworkflows/local/peptide_database_search/main.nf @@ -91,7 +91,18 @@ workflow PEPTIDE_DATABASE_SEARCH { } else { // Preparing train datasets and fine tuning MS2 model - train_datasets = ch_id_sage.mix(ch_id_msgf).mix(ch_id_comet) + // Randomly select one search engine for fine-tuning sampling + engine_opts = [] + if (params.search_engines.contains("sage")) engine_opts.add("sage") + if (params.search_engines.contains("msgf")) engine_opts.add("msgf") + if (params.search_engines.contains("comet")) engine_opts.add("comet") + selected_engine = engine_opts[new Random(2025).nextInt(engine_opts.size())] + + ch_selected_engine = (selected_engine == "sage") ? ch_id_sage : + (selected_engine == "msgf") ? ch_id_msgf : + ch_id_comet + + train_datasets = ch_selected_engine .combine(ch_mzmls_search, by: 0) .toSortedList() .flatMap()