/
check_interleaved_fastq
executable file
·76 lines (62 loc) · 2.14 KB
/
check_interleaved_fastq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python
# encoding: utf-8
import sys
import argparse
def interface():
parser = argparse.ArgumentParser()
parser.add_argument('INPUT',
type=argparse.FileType('r'),
default=sys.stdin,
nargs='?',
help='interleaved reads.')
parser.add_argument('GOOD_OUTPUT',
type=argparse.FileType('w'),
default=sys.stdout,
nargs='?',
help='Correctly Interleaved reads')
parser.add_argument('BAD_OUTPUT',
type=argparse.FileType('w'),
default=sys.stderr,
nargs='?',
help='Non Interleaved reads')
args = parser.parse_args()
return args
def process_reads(args):
inp = args.INPUT
fout = args.GOOD_OUTPUT
ferr = args.BAD_OUTPUT
# USING A WHILE LOOP MAKES THIS SUPER FAST
# Details here:
# http://effbot.org/zone/readline-performance.htm
forward = ["","","","",""]
name, strand = "", ""
while 1:
# process the first file
idstring = inp.readline()
if not idstring: break
if "/1" in idstring or "/2" in idstring:
name, strand = idstring.rstrip().split("/")
else:
name, strand = idstring.split(" ", 2)
strand = strand.split(":")[0]
seq = inp.readline()
plus = inp.readline()
quals = inp.readline()
if strand == "1":
if forward[0] != "":
ferr.write(forward[1]+forward[2]+forward[3]+forward[4])
forward = [name, idstring, seq, plus, quals]
elif name == forward[0]:
fout.write(forward[1]+forward[2]+forward[3]+forward[4])
fout.write(idstring+seq+plus+quals)
forward = ["","","","",""]
else:
ferr.write(forward[1]+forward[2]+forward[3]+forward[4])
forward = ["","","","",""]
inp.close()
fout.close()
ferr.close()
return 0
if __name__ == '__main__':
args = interface()
process_reads(args)