/
tab5_to_usam.py
50 lines (44 loc) · 1.48 KB
/
tab5_to_usam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python
from __future__ import print_function
import sys
# read lines from stdin
lines = sys.stdin.readlines()
# print sam header
print("@HD\tVN:1.5\tSO:unsorted")
# loop and print sam lines
for line in lines:
fields = line.split()
readName = fields[0]
firstSequence = fields[1]
firstQualities = fields[2]
secondSequence = fields[3]
secondQualities = fields[4]
# flags:
# 1 = paired (we assume that in this script)
# 4 = unmapped
# 8 = mate unmapped
# 64 = first of pair
# 128 = second of pair
firstFlags = 64 | 8 | 4 | 1
secondFlags = 128 | 8 | 4 | 1
# sam is the following tab-delimited columns:
#
# 1. read name
# 2. flags
# 3. ref (* = unaligned)
# 4. pos (0 = unaligned)
# 5. map qual (0 if unmapped)
# 6. cigar (* = unavailable)
# 7. mate ref (* = unaligned)
# 8. mate pos (0 = unaligned)
# 9. tlen (0 = unknown)
# 10. sequence
# 11. qualities
print("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t%s\t%s" % (readName + "/1",
firstFlags,
firstSequence,
firstQualities))
print("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t%s\t%s" % (readName + "/2",
secondFlags,
secondSequence,
secondQualities))