Skip to content

Commit

Permalink
Merge pull request #17 from qiyunzhu/bugfix
Browse files Browse the repository at this point in the history
Bug fix for duplicated sample name check
  • Loading branch information
mortonjt committed Aug 7, 2017
2 parents 775e1af + 10cb85a commit 0257fe1
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 19 deletions.
30 changes: 15 additions & 15 deletions plate_mapper/plate_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,43 +164,43 @@ def plate_mapper(input_f, barseq_f, output_f, names_f=None, special_f=None,
output_f.close()
print(' Done.')

# Check for repeated sample names
warning = ''
samples = Counter(samples)
repeated = [name for name, count in samples.items() if count > 1]
if repeated:
warning += (' Repeated samples: %s.\n'
% _print_list(sorted(repeated)))

# Validate sample names
if names_f:
print('Validating sample names...')
samples = Counter(samples)
names = set()
for line in names_f:
l = line.rstrip().split('\t')
if l[0]: # skip empty names
names.add(l[0]) # keep first field as name
names_f.close()
warning = ''
if names:
sample_set = set(samples)
# samples in plate map but not in name list
novel = sample_set - names
# samples in name list but not in plate map
missing = names - sample_set
# samples that occur more than one times in plate map
repeated = set()
for name in names:
if name in samples and samples[name] > 1:
repeated.add(name)
if novel:
warning += (' Novel samples: %s.\n'
% _print_list(sorted(novel)))
if missing:
warning += (' Missing samples: %s.\n'
% _print_list(sorted(missing)))
if repeated:
warning += (' Repeated samples: %s.\n'
% _print_list(sorted(repeated)))
print(' Done.')
if warning:
# display warning message
warnings.formatwarning = lambda msg, *a: str(msg)
warnings.warn('Warning:\n%s' % warning)
print('Task completed.')

# Display warning message
if warning:
warnings.formatwarning = lambda msg, *a: str(msg)
warnings.warn('Warning:\n%s' % warning)

print('Task completed.')


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion plate_mapper/tests/data/exp_mapping.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ sp220 TATGCCAGAGAT ATCG 1 C4 QZ 8/15/16
sp012 CCTCGCATGACC ATCG 3 A1 QZ 10/26/16
sp015 GGCGTAACGGCA ATCG 3 A2 QZ 10/26/16
sp018 GCGAGGAAGTCC ATCG 3 A3 QZ 10/26/16
blank4A CAAATTCGGGAT ATCG 3 A4 QZ 10/26/16
sp020 CAAATTCGGGAT ATCG 3 A4 QZ 10/26/16
sp013 CGCGCAAGTATT ATCG 3 B1 QZ 10/26/16
sp016 AATACAGACCTG ATCG 3 B2 QZ 10/26/16
sp019 GGACAAGTGCGA ATCG 3 B3 QZ 10/26/16
Expand Down
2 changes: 1 addition & 1 deletion plate_mapper/tests/data/plate_map.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ B sp002 sp005 sp007 missing4B a sample is missing
C sp003 blank2C blank3C sp220

Plate#2 1 2 3 4 # Who When
A sp012 sp015 sp018 blank4A 3 QZ 10/26/16
A sp012 sp015 sp018 sp020 3 QZ 10/26/16
B sp013 sp016 sp019
C
9 changes: 9 additions & 0 deletions plate_mapper/tests/data/plate_map_w_dup.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Plate#1 1 2 3 4 # Who When
A sp001 sp004 sp006 blank4A 1 QZ 8/15/16
B sp002 sp005 sp007 missing4B a sample is missing
C sp003 blank2C blank3C sp220

Plate#2 1 2 3 4 # Who When
A sp001 sp015 sp018 sp020 3 QZ 10/26/16
B sp013 sp016 sp019
C
1 change: 1 addition & 0 deletions plate_mapper/tests/data/sample_list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ sp016
sp017
sp018
sp019
sp020
blank2C
blank3C
blank4A
15 changes: 13 additions & 2 deletions plate_mapper/tests/test_plate_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,18 @@ def test_plate_mapper(self):
err = 'Error: Code \'+\' has no name.'
self.assertEqual(str(context.exception), err)

# test a successful conversion with repeated sample name warning
input_f = open(join(datadir, 'plate_map_w_dup.txt'), 'r')
barseq_f = open(join(datadir, 'barseq_temp.txt'), 'r')
output_f = open(obs_output_fp, 'w')
# check screen warning message
msg = ('Warning:\n'
' Repeated samples: sp001.\n')
with catch_warnings(record=True) as w:
simplefilter('always')
plate_mapper(input_f, barseq_f, output_f)
assert msg in str(w[-1].message)

# test a successful conversion with sample name validation warnings
input_f = open(join(datadir, 'plate_map.txt'), 'r')
barseq_f = open(join(datadir, 'barseq_temp.txt'), 'r')
Expand All @@ -119,8 +131,7 @@ def test_plate_mapper(self):
# check screen warning message
msg = ('Warning:\n'
' Novel samples: missing4B, sp220.\n'
' Missing samples: sp014, sp017.\n'
' Repeated samples: blank4A.\n')
' Missing samples: sp014, sp017.\n')
with catch_warnings(record=True) as w:
simplefilter('always')
plate_mapper(input_f, barseq_f, output_f, names_f)
Expand Down

0 comments on commit 0257fe1

Please sign in to comment.