-
Notifications
You must be signed in to change notification settings - Fork 295
/
fastq-to-fasta.py
executable file
·72 lines (55 loc) · 2.17 KB
/
fastq-to-fasta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#! /usr/bin/env python2
#
# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
# Copyright (C) Michigan State University, 2009-2014. It is licensed under
# the three-clause BSD license; see doc/LICENSE.txt.
# Contact: khmer-project@idyll.org
#
# pylint: disable=invalid-name,missing-docstring
"""
Convert FASTQ files to FASTA format.
% python scripts/fastq-to-fasta.py [ -n -o ] <fastq_name>
Use '-h' for parameter help.
"""
import sys
import argparse
import screed
def get_parser():
parser = argparse.ArgumentParser(
description='Converts FASTQ format (.fq) files to FASTA format (.fa).',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('input_sequence', help='The name of the input'
' FASTQ sequence file.')
parser.add_argument('-o', '--output', metavar="filename",
help='The name of the output'
' FASTA sequence file.',
type=argparse.FileType('w'),
default=sys.stdout)
parser.add_argument('-n', '--n_keep', default=False, action='store_true',
help='Option to drop reads containing \'N\'s in ' +
'input_sequence file.')
return parser
def main():
args = get_parser().parse_args()
print >> sys.stderr, ('fastq from ', args.input_sequence)
n_count = 0
for n, record in enumerate(screed.open(args.input_sequence,
parse_description=False)):
if n % 10000 == 0:
print>>sys.stderr, '...', n
sequence = record['sequence']
name = record['name']
if 'N' in sequence:
if not args.n_keep:
n_count += 1
continue
args.output.write('>' + name + '\n')
args.output.write(sequence + '\n')
print >> sys.stderr, '\n' + 'lines from ' + args.input_sequence
if not args.n_keep:
print >> sys.stderr, str(n_count) + ' lines dropped.'
else:
print >> sys.stderr, 'No lines dropped from file.'
print >> sys.stderr, 'Wrote output to', args.output
if __name__ == '__main__':
main()