-
Notifications
You must be signed in to change notification settings - Fork 0
/
bwt.py
51 lines (40 loc) · 926 Bytes
/
bwt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import sys
from collections import deque
try:
text = sys.argv[1]
except IndexError:
text = 'BANANA'
TAIL_CHR = '^'
text += TAIL_CHR
n = len(text)
#---------------------------------------------
# BWT
rL = list()
dq = deque(text)
for i in range(n):
rL.append(''.join(list(dq)))
dq.rotate(1)
rL.sort()
bwt = [s[-1] for s in rL]
sfx = [(n - e.index(TAIL_CHR) - 1) for e in rL]
for s,e in zip(sfx,rL):
print '%2d' % s,
print e
print
#---------------------------------------------
# reverse
# take the last col and sort to generate col 0
c0 = sorted(bwt)
print '\n'.join(c0) + '\n'
# get pairs as a demo:
c1 = [bwt[i]+c0[i] for i in range(n)]
c1.sort()
print '\n'.join(c1) + '\n'
# iterate starting with bwt again:
L = sorted(bwt)
for j in range(n-2):
L.sort()
L = [bwt[i]+L[i] for i in range(n)]
print '\n'.join(L) + '\n'
L = [e for e in L if not TAIL_CHR in e]
assert ''.join(L[0]) == text[:-1]