This repository has been archived by the owner on Nov 9, 2023. It is now read-only.
/
test_quality_scores_plot.py
executable file
·206 lines (146 loc) · 12.2 KB
/
test_quality_scores_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/bin/env python
__author__ = "William Walters"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["William Walters"]
__license__ = "GPL"
__version__ = "1.9.0"
__maintainer__ = "William Walters"
__email__ = "William.A.Walters@colorado.edu"
from os import close
from os.path import isdir, isfile, exists
from shutil import rmtree
from tempfile import mkdtemp, mkstemp
from unittest import TestCase, main
from skbio.util import remove_files
from skbio.util import create_dir
from qiime.quality_scores_plot import generate_histogram,\
plot_qual_report, get_qual_stats, bin_qual_scores, write_qual_report
class QualityScoresPlotTests(TestCase):
""" Unit tests for the quality_scores_histogram.py module """
def setUp(self):
# create the temporary input files that will be used
self._files_to_remove = []
fd, self.qual_fp = mkstemp(prefix='qual_scores_', suffix='.qual')
close(fd)
seq_file = open(self.qual_fp, 'w')
seq_file.write(qual_scores)
seq_file.close()
self.output_dir = mkdtemp()
self.output_dir += '/'
create_dir(self.output_dir)
self.expected_output_text_file = expected_output_text_file
self._files_to_remove =\
[self.qual_fp]
def tearDown(self):
if self._files_to_remove:
remove_files(self._files_to_remove)
if exists(self.output_dir):
rmtree(self.output_dir)
def test_generate_histogram(self):
""" No errors when calling function, creates output files"""
# Cannot test content of graphics file, only successful execution
output_dir = self.output_dir
# Should not raise an error with good data
generate_histogram(self.qual_fp, output_dir, verbose=False)
expected_outfile = output_dir + 'quality_scores_plot.pdf'
self.assertTrue(isfile(expected_outfile))
# Test text file output for proper data
text_output_fp = output_dir + "quality_bins.txt"
text_output_f = open(text_output_fp, "U")
actual_text_output = "\n".join([line.strip()
for line in text_output_f])
self.assertEqual(actual_text_output, self.expected_output_text_file)
def test_write_qual_report(self):
""" Writes data to output text file properly """
output_dir = self.output_dir
qual_bins = [[1, 2, 6], [1, 2, 3], [2, 4], [4]]
expected_ave_bins = [3.00, 2.00, 3.00, 4.00]
expected_std_dev_bins = [2.16, 0.816, 1.0, 0]
expected_total_bases_bins = [3, 3, 2, 1]
score_min = 25
write_qual_report(expected_ave_bins, expected_std_dev_bins,
expected_total_bases_bins, output_dir, score_min)
# Test text file output for proper data
text_output_fp = output_dir + "quality_bins.txt"
text_output_f = open(text_output_fp, "U")
actual_text_output = [line.strip() for line in text_output_f]
ave_bin_index = 2
std_dev_bin_index = 4
total_bases_index = 6
actual_bins_ave =\
[float(f) for f in actual_text_output[ave_bin_index].split(',')]
actual_bins_std =\
[float(f)
for f in actual_text_output[std_dev_bin_index].split(',')]
actual_bins_total_bases =\
[float(f)
for f in actual_text_output[total_bases_index].split(',')]
self.assertEqual(actual_bins_ave, expected_ave_bins)
self.assertEqual(actual_bins_std, expected_std_dev_bins)
self.assertEqual(actual_bins_total_bases, expected_total_bases_bins)
def test_bin_qual_scores(self):
""" Properly bins qual scores according to nucleotide position """
qual_data = {'seq1': [10, 20, 30, 40], 'seq2': [11, 21, 31],
'seq3': [12, 22]}
expected_bins = [[10, 11, 12], [20, 21, 22], [30, 31], [40]]
actual_bins = bin_qual_scores(qual_data)
# Because of arbritrary dictionary order, need to sort results
for bin in range(len(actual_bins)):
actual_bins[bin].sort()
self.assertEqual(actual_bins, expected_bins)
def test_get_qual_stats(self):
""" Properly generates averages, std dev for bins """
qual_bins = [[1, 2, 6], [1, 2, 3], [2, 4], [4]]
expected_ave_bins = [3, 2, 3, 4]
expected_std_dev_bins = [2.16, 0.816, 1.0, 0]
expected_total_bases_bins = [3, 3, 2, 1]
score_min = 25
actual_ave_bins, actual_std_dev_bins, actual_total_bases_bins,\
suggested_trunc_pos = get_qual_stats(qual_bins, score_min)
# Should give correct suggested truncation position, where the quality
# score average went below 25, in this case, at base 0
expected_trunc_pos = 0
self.assertEqual(suggested_trunc_pos, expected_trunc_pos)
# Round standard deviation calculations
for n in range(len(actual_std_dev_bins)):
actual_std_dev_bins[n] = round(actual_std_dev_bins[n], 3)
self.assertEqual(actual_ave_bins, expected_ave_bins)
self.assertEqual(actual_std_dev_bins, expected_std_dev_bins)
self.assertEqual(actual_total_bases_bins, expected_total_bases_bins)
def test_plot_qual_report(self):
""" Is called without error """
output_dir = self.output_dir
score_min = 20
ave_bins = [3, 2, 3, 4]
std_dev_bins = [2.16, 0.816, 1.0, 0]
total_bases_bins = [3, 3, 2, 1]
plot_qual_report(ave_bins, std_dev_bins, total_bases_bins, score_min,
output_dir)
# Long strings at the end for better readability
qual_scores = """>seq1
36 36 36 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 40 40 40 40 40 40 40 40 40 40 38 38 39 40 40 40 40 40 40 40 40 40 40 40 40 40 40 37 37 37 37 37 33 33 33 36 36 37 37 37 37 37
37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37
37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 33 33 33 33 37 37 37 37 37 37 37 37 37 37 37 37
37 37 36 36 28 28 28 28 33 33 33 36 36 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 36 36 36 36 36 36 31 31 27 27 28 28 28 27 21 31 31 36 36 36 36 36 36
36 36 36 36 36 31 31 31 31 31 31 31
>seq2
35 35 35 35 33 31 31 31 33 35 35 35 35 35 35 35 35 35 35 35 35 35 23 20 20 31 31 33 33 33 35 23 17 17 21 20 20 20 31 31 33 35 35 35 35 35 33 33 33 35 31 31 31 35 35 35 35 35 35 35
31 31 31 33 35 35 35 35 35 35 35 35 35 35 31 31 31 26 26 26 26 35 35 35 35 35 35 35 33 31 31 31 35 35 35 35 35 35 35 35 35 35 35 35 35 35 31 31 31 35 35 35 33 33 33 33 33 35 35 35
35 35 35 35 35 35 35 35 35 35 35 35 35 35 30 26 26 26 30 33 35 35 35 35 35 35 35 35 33 33 33 35 33 27 27 25 25 25 27 14 14 14 14 14 25 25 34 34 35 35 35 32 33 33 32 35 35 32 25 25
15 20 20 20 28 35 33 33 33 33 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 29 24 24 24 29 35 35 35 35 33 33 31 31 34 34 34 34 34 34 31 20 20 20 20 20 31 34 31 31 31 31 32 31 31 33
34 25 25 20 20 18 25 28 28 22 20 22 28 28 28 30 30 29 29 29 30 25 25 25 29 29 26 26 25
>seq3
32 32 32 32 35 35 35 35 35 35 35 35 35 35 35 35 35 35 38 38 39 39 32 32 32 35 35 35 35 35 34 31 21 21 25 35 32 25 25 25 32 35 35 37 39 35 35 35 35 35 35 35 35 35 35 35 35 35 32 32
32 32 32 35 32 32 32 32 35 35 35 35 35 35 35 35 35 35 32 32 32 32 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 34 34 26 26 26 26 32 35 35 35 35 35
35 34 34 34 34 34 34 34 35 35 35 35 35 35 35 35 35 26 26 26 26 35 35 35 35 35 35 35 35 34 34 34 34 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 30 22 24 21 21 21 30 35 35 35 35
35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 29 29 26 34 35 27 27 27 27 35 35 35 35 35 35 35 32 32 32 32 32 35 35 35 35 35 35 35 35
35 35 31 32 32 25 28 25 25 25 25 30 30 30 30 30 30 30 30 28 22 22 22 28 28 30 25 22 22 22 30 30"""
expected_output_text_file = """# Suggested nucleotide truncation position (None if quality score average did not drop below the score minimum threshold): 245
# Average quality score bins
34.333,34.333,34.333,34.667,35.000,34.333,34.333,34.333,35.000,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,37.667,37.667,38.000,38.000,31.667,30.667,30.667,35.333,35.333,36.000,35.333,35.333,36.000,31.333,26.000,26.000,28.667,31.667,30.667,28.333,32.000,32.000,35.000,36.667,36.667,37.333,38.000,35.667,35.000,35.000,35.000,35.667,33.000,33.000,33.000,35.333,35.333,35.667,35.667,35.667,34.667,34.667,33.333,33.333,33.333,35.000,34.667,34.667,34.667,34.667,35.667,35.667,35.667,35.667,35.667,35.667,34.333,34.333,34.333,32.667,31.667,31.667,31.667,34.667,35.667,35.667,35.667,35.667,35.667,35.667,35.000,34.333,34.333,34.333,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,34.333,34.333,34.000,35.333,32.667,32.667,32.000,32.000,34.000,35.000,35.000,35.667,35.667,35.667,35.667,35.333,35.333,35.333,35.333,35.333,35.333,35.333,35.667,35.667,35.667,35.667,35.667,35.667,34.000,32.667,32.667,29.667,31.000,32.000,32.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.000,34.667,34.667,35.333,34.667,33.000,33.000,32.333,32.333,32.333,33.000,28.667,28.667,28.667,28.667,28.667,31.000,31.000,34.000,34.000,35.667,34.000,31.333,31.000,30.333,30.333,30.000,34.000,35.667,34.667,32.333,32.333,29.000,30.667,30.333,30.333,30.333,32.667,32.000,32.000,33.667,33.667,34.333,35.333,35.333,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,35.667,33.667,32.000,32.000,32.000,33.667,35.667,33.667,33.667,32.667,34.667,35.000,31.667,31.333,32.333,32.333,35.000,35.000,35.000,33.333,32.333,27.333,27.333,26.667,26.667,26.667,30.000,29.000,32.333,32.333,34.000,34.000,34.333,34.000,34.000,34.667,35.000,32.000,30.667,29.333,29.333,24.667,28.000,28.000,28.000,26.000,25.333,27.667,29.000,29.000,29.000,30.000,30.000,29.500,29.500,28.500,26.000,23.500,23.500,26.500,28.500,29.500,25.500,24.000,23.500,22.000,30.000,30.000
# Standard deviation bins
1.700,1.700,1.700,2.055,1.633,2.494,2.494,2.494,1.633,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,2.055,2.055,2.160,2.160,6.944,8.219,8.219,3.682,3.682,2.944,2.055,2.055,2.160,6.944,10.033,10.033,8.179,8.498,8.219,8.498,6.164,6.164,3.559,2.357,2.357,2.055,2.160,0.943,1.633,1.633,1.633,0.943,1.633,1.633,1.633,0.471,0.471,0.943,0.943,0.943,2.055,2.055,2.625,2.625,2.625,1.633,2.055,2.055,2.055,2.055,0.943,0.943,0.943,0.943,0.943,0.943,2.494,2.494,2.494,4.784,4.497,4.497,4.497,2.055,0.943,0.943,0.943,0.943,0.943,0.943,1.633,2.494,2.494,2.494,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,2.494,2.494,2.449,1.247,4.784,4.784,4.546,4.546,2.160,1.633,1.633,0.943,0.943,0.943,0.943,1.247,1.247,1.247,1.247,1.247,1.247,1.247,0.943,0.943,0.943,0.943,0.943,0.943,2.944,4.784,4.784,5.185,4.546,4.546,4.784,0.943,0.943,0.943,0.943,0.943,0.943,0.943,1.633,1.700,1.700,1.247,1.700,4.320,4.320,5.249,5.249,5.249,4.320,10.403,10.403,10.403,10.403,10.403,4.320,4.320,0.816,0.816,0.943,2.944,6.650,5.354,6.799,6.799,6.683,2.944,0.943,2.055,5.249,5.249,9.933,7.587,7.318,7.318,3.300,3.300,2.944,2.944,0.943,0.943,0.943,0.471,0.471,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,0.943,3.399,5.715,5.715,5.715,3.399,0.943,3.399,3.399,4.784,1.700,1.633,4.110,3.682,3.859,3.859,0.816,0.816,0.816,1.700,1.886,6.128,6.128,4.989,4.989,4.989,2.160,5.715,1.886,1.886,2.160,2.160,1.700,2.160,2.160,1.247,0.816,4.967,4.497,6.799,6.799,5.312,2.449,2.449,2.449,3.742,4.497,4.028,1.000,1.000,1.000,0.000,0.000,0.500,0.500,0.500,4.000,1.500,1.500,1.500,0.500,0.500,0.500,2.000,1.500,0.000,0.000,0.000
# Total bases per nucleotide position bins
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1"""
if __name__ == "__main__":
main()