Skip to content

Commit

Permalink
Merge pull request #210 from fls-bioinformatics-core/IlluminaData-fix…
Browse files Browse the repository at this point in the history
…-fastq-assignment

bcftbx/IlluminaData: fix bug in IlluminaProject assigning undetermined Fastqs
  • Loading branch information
pjbriggs authored Dec 16, 2022
2 parents 6f5a6c4 + f9e3427 commit 3a0df01
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 11 deletions.
16 changes: 9 additions & 7 deletions bcftbx/IlluminaData.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,13 +655,15 @@ def __init__(self,dirn):
leading_dir[0])
else:
# Handle 'undetermined' data
try:
fqs = [f for f in fastqs
if "lane%d" % IlluminaFastq(f).lane_number
== sample_name]
except TypeError:
# No lane, take all fastqs
fqs = [fq for fq in fastqs]
# Try Fastqs with matching lane
fqs = [fq for fq in fastqs
if IlluminaFastq(fq).lane_number is not None and
"lane%d" % IlluminaFastq(fq).lane_number
== sample_name]
if not fqs:
# No lane, take all fastqs without a lane
fqs = [fq for fq in fastqs
if IlluminaFastq(fq).lane_number is None]
self.samples.append(IlluminaSample(sample_dirn,
fastqs=fqs,
name=sample_name,
Expand Down
24 changes: 20 additions & 4 deletions bcftbx/test/test_IlluminaData.py
Original file line number Diff line number Diff line change
Expand Up @@ -928,8 +928,10 @@ def makeIlluminaDataDirectoryWithMixedLaneAndNoLaneSplitting(self):
top_dir=self.top_dir)
mock_illumina_data.add_fastq_batch('AB','AB1','AB1_S1',lanes=(1,2))
mock_illumina_data.add_fastq_batch('AB','AB2','AB2_S2',lanes=(1,2))
mock_illumina_data.add_fastq_batch('CDE','CDE3','CDE3_S3')
mock_illumina_data.add_fastq_batch('CDE','CDE4','CDE4_S4')
mock_illumina_data.add_fastq_batch('CDE','CDE3','CDE3_S3',
lanes=(3,4))
mock_illumina_data.add_fastq_batch('CDE','CDE4','CDE4_S4',
lanes=(3,4))
mock_illumina_data.add_undetermined(lanes=(1,2))
mock_illumina_data.create()
# Create second dir with no lane splitting
Expand All @@ -938,13 +940,20 @@ def makeIlluminaDataDirectoryWithMixedLaneAndNoLaneSplitting(self):
paired_end=True,
no_lane_splitting=True,
top_dir=self.top_dir)
mock_illumina_data2.add_fastq_batch('CDE','CDE3','CDE3_S3')
mock_illumina_data2.add_fastq_batch('CDE','CDE4','CDE4_S4')
mock_illumina_data2.add_fastq_batch('CDE','CDE3','CDE3_S3',
lanes=(3,4))
mock_illumina_data2.add_fastq_batch('CDE','CDE4','CDE4_S4',
lanes=(3,4))
mock_illumina_data2.add_undetermined(lanes=(3,4))
mock_illumina_data2.create()
# Move no lane splitting project into first dir
shutil.rmtree(os.path.join(mock_illumina_data.unaligned_dir,"CDE"))
shutil.move(os.path.join(mock_illumina_data2.unaligned_dir,"CDE"),
mock_illumina_data.unaligned_dir)
for f in ("Undetermined_S0_R1_001.fastq.gz",
"Undetermined_S0_R2_001.fastq.gz"):
shutil.move(os.path.join(mock_illumina_data2.unaligned_dir,f),
mock_illumina_data.unaligned_dir)
# Finish
self.mock_illumina_data = mock_illumina_data

Expand Down Expand Up @@ -1033,6 +1042,13 @@ def test_illumina_data_multiple_projects_paired_end_mixed_lane_splitting(self):
self.assertEqual(s.name,s_name)
self.assertEqual(illumina_data.format,'bcl2fastq2')
self.assertEqual(illumina_data.lanes,[1,2])
# Also check undetermined Fastqs are not double counted
undetermined_fastqs = []
for sample in illumina_data.undetermined.samples:
for fq in sample.fastq:
self.assertFalse(fq in undetermined_fastqs,
"%s: Fastq appears multiple times" % fq)
undetermined_fastqs.append(fq)

class TestCasavaSampleSheet(unittest.TestCase):

Expand Down

0 comments on commit 3a0df01

Please sign in to comment.