-
Notifications
You must be signed in to change notification settings - Fork 0
/
simplify.py
55 lines (43 loc) · 1.23 KB
/
simplify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import sys
inFile = open(sys.argv[1],'r')
inFile.next()
def getAln(stream):
block = []
for line in stream:
if line.strip() == '':
break
block.append(line.strip().split())
return block
def seq_metric(trp, ref):
matches = 0
mismatches = 0
ref_deletion = 0
ref_insertion = 0
ns = 0
for i in range(len(trp)):
a = trp[i]
b = ref[i]
if a == b:
matches += 1
else:
if b == '-':
ref_deletion += 1
elif a == '-':
ref_insertion += 1
elif b == 'n':
ns += 1
else:
mismatches += 1
return (matches, mismatches, ref_deletion, ref_insertion, ns)
while 1:
aln = getAln(inFile)
if len(aln) == 0:
break
ref_t, ref_name, ref_start, alnLength, ref_strand, ref_size, ref_seq = aln[1]
trp_t, trp_name, trp_start, alnLength, trp_strand, trp_size, trp_seq = aln[2]
ref_coord = ref_start + ',' + str(int(ref_start) + int(alnLength)) + ',' + ref_strand
trp_coord = trp_start + ',' + str(int(trp_start) + int(alnLength)) + ',' + trp_strand
m, mm, r_del, r_ins, ns = seq_metric(trp_seq, ref_seq)
indel = r_del + r_ins
alnLength_noIndel = int(alnLength) - r_del - r_ins
print '\t'.join(map(str,[ref_name, ref_size, ref_coord, trp_name, trp_size, trp_coord, alnLength, ref_seq, trp_seq, m, mm, r_del, r_ins, ns]))