This repository has been archived by the owner on Nov 9, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 269
/
quality_scores_plot.py
executable file
·96 lines (74 loc) · 3.59 KB
/
quality_scores_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python
# File created Sept 29, 2010
from __future__ import division
__author__ = "William Walters"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["William Walters", "Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.7.0"
__maintainer__ = "William Walters"
__email__ = "William.A.Walters@colorado.edu"
__status__ = "Release"
from qiime.util import make_option
from qiime.util import parse_command_line_parameters, get_options_lookup
from qiime.quality_scores_plot import generate_histogram
from qiime.util import create_dir
from qiime.parse import (parse_fastq_qual_score,parse_qual_score)
options_lookup = get_options_lookup()
script_info={}
script_info['brief_description']="""Generates histograms of sequence quality scores and number of nucleotides recorded at a particular index"""
script_info['script_description']="""Two plots are generated by this module.
The first shows line plots indicating the average and standard deviations
for the quality scores of the input quality score file,
starting with the first nucleotide and ending with the the final
nucleotide of the largest sequence.
A second histogram shows a line plot with the nucleotide count for each
position, so that one may easily visualize how sequence length drops off.
A dotted line shows the cut-off point for a score to be acceptable (default
is 25).
A text file logging the average, standard deviation, and base count
for each base position is also generated. These three sections are comma
separated.
The truncate_fasta_qual_files.py module can be used to create truncated
versions of the input fasta and quality score files. By using this module
to assess the beginning of poor quality base calls, one can determine
the base position to begin truncating sequences at."""
script_info['script_usage']=[]
script_info['script_usage'].append(("""Example:""","""Generate plots and output to the quality_histograms folder""","""%prog -q seqs.qual -o quality_histograms/"""))
script_info['output_description']="""A .pdf file with the two plots will be created in the output directory"""
script_info['required_options']= [\
make_option('-q', '--qual_fp',type='existing_filepath',
help='Quality score file used to generate histogram data.')
]
script_info['optional_options']= [\
make_option('-o', '--output_dir',type='new_dirpath',
help='Output directory. Will be created if does not exist. '+\
'[default: %default]', default="."),
make_option('-s', '--score_min', type='int',
help='Minimum quality score to be considered acceptable. Used to '+\
'draw dotted line on histogram for easy visualization of poor '+\
'quality scores. [default: %default]', default=25, ),
make_option('-v', '--verbose',
action='store_false', default=True,
help='Turn on this flag to disable verbose output. '+\
' [default: %default]')]
script_info['version'] = __version__
def main():
option_parser, opts, args =\
parse_command_line_parameters(suppress_verbose=True, **script_info)
qual_fp = opts.qual_fp
output_dir = opts.output_dir
score_min = int(opts.score_min)
verbose = opts.verbose
create_dir(output_dir)
if qual_fp.endswith('.fastq') or qual_fp.endswith('.fastq.gz'):
qual_parser = parse_fastq_qual_score
else:
qual_parser = parse_qual_score
generate_histogram(qual_fp,
output_dir,
score_min,
verbose,
qual_parser=qual_parser)
if __name__ == "__main__":
main()