Skip to content

Commit

Permalink
Resolved bug when prefetching large studies
Browse files Browse the repository at this point in the history
Prefetching large studies resulted in error stated in issue

nf-core#236

Resolved this by handing over text file with filepaths instead
of list of filepaths. Rewrote sra_merge_samplesheet accordingly.

Tested with study with ~25k samples.
  • Loading branch information
Blaessle,Dr.,Alexander (GCBDS) BIP-DE-B committed Nov 7, 2023
1 parent a7ab1a2 commit ef9f1e7
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 13 deletions.
24 changes: 13 additions & 11 deletions modules/local/sra_merge_samplesheet/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,28 @@ process SRA_MERGE_SAMPLESHEET {
'nf-core/ubuntu:20.04' }"

input:
path ('samplesheets/*')
path ('mappings/*')

// path ('samplesheets/*')
// path ('mappings/*')
path 'samplesheets.txt'
path 'mappings.txt'

output:
path "samplesheet.csv", emit: samplesheet
path "id_mappings.csv", emit: mappings
path "versions.yml" , emit: versions

script:
"""
head -n 1 `ls ./samplesheets/* | head -n 1` > samplesheet.csv
for fileid in `ls ./samplesheets/*`; do
head -n 1 `head -n 1 samplesheets.txt` > samplesheet.csv
while read fileid; do
awk 'NR>1' \$fileid >> samplesheet.csv
done
head -n 1 `ls ./mappings/* | head -n 1` > id_mappings.csv
for fileid in `ls ./mappings/*`; do
done < samplesheets.txt
head -n 1 `head -n 1 mappings.txt` > id_mappings.csv
while read fileid; do
awk 'NR>1' \$fileid >> id_mappings.csv
done
done < mappings.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//')
Expand Down
14 changes: 12 additions & 2 deletions workflows/sra/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,24 @@ workflow SRA {
params.nf_core_rnaseq_strandedness ?: 'auto',
params.sample_mapping_fields
)


// Combine all sample sheet/mapping paths into two files
ch_samplesheets=SRA_TO_SAMPLESHEET.out.samplesheet.map{it[1].toString()}.collectFile(name: 'samplesheets.txt', newLine: true)
ch_mappings=SRA_TO_SAMPLESHEET.out.mappings.map{it[1].toString()}.collectFile(name: 'mappings.txt', newLine: true)

//
// MODULE: Create a merged samplesheet across all samples for the pipeline
//
// SRA_MERGE_SAMPLESHEET (
// SRA_TO_SAMPLESHEET.out.samplesheet.collect{it[1]},
// SRA_TO_SAMPLESHEET.out.mappings.collect{it[1]}
// )
SRA_MERGE_SAMPLESHEET (
SRA_TO_SAMPLESHEET.out.samplesheet.collect{it[1]},
SRA_TO_SAMPLESHEET.out.mappings.collect{it[1]}
ch_samplesheets,
ch_mappings
)

ch_versions = ch_versions.mix(SRA_MERGE_SAMPLESHEET.out.versions)

//
Expand Down

0 comments on commit ef9f1e7

Please sign in to comment.