In [1]:
from collections import defaultdict

In [2]:
symbol = lambda c, i: f"{c}{i}"

def bwt(text):
    rotations = []
    for i in range(len(text)):
        rotation = text[len(text)-i:] + text[0:len(text)-i]
        rotations.append(rotation)
    rotations = sorted(rotations)
    
    last = [k[-1] for k in rotations]
    
    counts = defaultdict(int)
    new_last = list()
    for s in last:
        counts[s] += 1
        new_last.append(f"{s}{counts[s]}")
    return new_last

In [40]:
last = bwt("PANAMABANANAS$")
print(last)

['S1', 'M1', 'N1', 'P1', 'B1', 'N2', 'N3', 'A1', 'A2', 'A3', 'A4', 'A5', '$1', 'A6']


In [41]:
def get_first_occurence(last):
    first_column = sorted(last)
    first = dict()
    for i, c in enumerate(first_column):
        if c[0] not in first:
            first[c[0]] = i
    return first
            
first_occurence = get_first_occurence(last)
print(first_occurence)

{'$': 0, 'A': 1, 'B': 7, 'M': 8, 'N': 9, 'P': 12, 'S': 13}


In [63]:
def get_partial_suffix_array(last, first_occurence, C=5):
    suffix_array = dict()
    j = 0
    c = last[j]
    for i in range(0, len(last)):
        suffix_array[c] = len(last)-i-2
        j = first_occurence[c[0]] + int(c[1:]) - 1
        #j = last2first[c]
        c = last[j]
    suffix_array[symbol("$",1)] = len(last)-1
    
    suffix_array = sorted(suffix_array.items())
    
    partial = dict()
    for c, i in suffix_array:
        if i % C == 0:
            partial[c] = i
    
    
    return partial

In [54]:
partial_suffix_array = get_partial_suffix_array(last, first_occurence)
print(partial_suffix_array)

{'S1': 12, 'A6': 11, 'N3': 10, 'A5': 9, 'N2': 8, 'A4': 7, 'B1': 6, 'A1': 5, 'M1': 4, 'A2': 3, 'N1': 2, 'A3': 1, 'P1': 0, '$1': 13}
{'A1': 5, 'N3': 10, 'P1': 0}


In [9]:
def get_counts(column, k):
    counts = dict()
    for c in sorted({c[0] for c in column}):
        counts[c] = [0]
        
    for c, l in counts.items():
        for i, s in enumerate(column):
            if s[0] == c:
                l.append(l[i]+1)
            else:
                l.append(l[i])

    selected_indices = [i for i in range(0,len(column)+1) if i%k==0]
    
    for c, l in counts.items():
        counts[c] = [l[i] for i in selected_indices]
            
    return counts

In [55]:
def contains_symbol(last, top, bottom, symbol):
    for s in last[top:bottom+1]:
        if s[0] == symbol:
            return True
    return False

def get_count_symbol(symbol, pos, last, partial_count, C):
    start_pos = int(pos / C)
    symbol_counts = partial_count[symbol]
    count = symbol_counts[start_pos]
    for s in last[pos - (pos % C): pos]:
        if s[0] == symbol:
            count += 1
    return count

def better_bw_matching(first_occurence, last, pattern, partial_count, C):
    top = 0
    bottom = len(last)-1
    while top <= bottom:
        if len(pattern) > 0:
            symbol = pattern[-1]
            pattern = pattern[:-1]
            if contains_symbol(last, top, bottom, symbol):
                top = first_occurence[symbol] + get_count_symbol(symbol, top, last, partial_count, C)
                bottom = first_occurence[symbol] + get_count_symbol(symbol, bottom+1, last, partial_count, C) -1
                #print(f"symbol {symbol}: top {top} - bottom {bottom}")
            else:
                #print(f"no symbol {symbol} between top {top} and bottom {bottom}")
                return 0
        else:
            return bottom - top + 1
        
def bwt_matching_positions(suffix_array, first_occurence, last, pattern, partial_count, C):
    top = 0
    bottom = len(last)-1
    while top <= bottom:
        if len(pattern) > 0:
            symbol = pattern[-1]
            pattern = pattern[:-1]
            if contains_symbol(last, top, bottom, symbol):
                top = first_occurence[symbol] + get_count_symbol(symbol, top, last, partial_count, C)
                bottom = first_occurence[symbol] + get_count_symbol(symbol, bottom+1, last, partial_count, C) -1
                #print(f"symbol {symbol}: top {top} - bottom {bottom}")
            else:
                #print(f"no symbol {symbol} between top {top} and bottom {bottom}")
                return []
        else:
            #print(f"found pattern with last first symbol {symbol} from {top} to {bottom}")
            return lookup_suffixes(symbol, suffix_array, last, first_occurence, top, bottom)    
        
def lookup_suffixes(symbol, suffix_array, last, first_occurence, top, bottom):
    start_positions = []
    for i in range(top, bottom+1):
        symbol_suffix = i-first_occurence[symbol]+1
        symbol_with_suffix = f"{symbol}{symbol_suffix}"
        start_position = lookup_suffix(symbol_with_suffix, suffix_array, last, first_occurence, i)
        start_positions.append(start_position)
    return start_positions
    
def lookup_suffix(symbol, suffix_array, last, first_occurence, i):
    start_position = 0
    offset = 0
    while symbol not in suffix_array:
        symbol = last[i]
        i = first_occurence[symbol[0]] + int(symbol[1:]) - 1
        offset += 1
    start_position = suffix_array[symbol]+offset
    return start_position

In [56]:
C = 5
partial_counts = get_counts(last, C)
print(partial_counts)
better_bw_matching(first_occurence, last, "ana", partial_counts, C)

{'$': [0, 0, 0], 'A': [0, 0, 3], 'B': [0, 1, 1], 'M': [0, 1, 1], 'N': [0, 1, 3], 'P': [0, 1, 1], 'S': [0, 1, 1]}


0

In [57]:
def index_symbols(last):
    symbol_count = defaultdict(int)
    new_last = []
    for s in last:
        symbol_count[s] += 1
        new_last.append(f"{s}{symbol_count[s]}")
    return new_last

In [58]:
last = index_symbols("GGCGCCGC$TAGTCACACACGCCGTA")
C = 1
partial_counts = get_counts(last, C)
print(partial_counts)
first_occurence = get_first_occurence(last)
print(first_occurence)
patterns = ["ACC","CCG","CAG"]
for pattern in patterns:
    c = better_bw_matching(first_occurence, last, pattern, partial_counts, C)
    print(c)

{'$': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'A': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 5], 'C': [0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 10, 10], 'G': [0, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7], 'T': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3]}
{'$': 0, 'A': 1, 'C': 6, 'G': 16, 'T': 23}
1
2
1


In [59]:
with open("../data/dataset_301_7.txt", "r") as fin:
    lines = [line.strip() for line in fin]
    last = index_symbols(lines[0])
    patterns = lines[1].split(" ")
    C = 5
    partial_counts = get_counts(last, C)
    first_occurence = get_first_occurence(last)
    pattern_counts = []
    for pattern in patterns:
        c = better_bw_matching(first_occurence, last, pattern, partial_counts, C)
        pattern_counts.append(str(c))
    print(" ".join(pattern_counts))

1 0 0 1 1 0 0 0 0 1 1 1 0 0 1 0 0 1 1 1 0 0 1 0 0 0 1 0 0 1 1 0 0 1 0 1 1 0 1 0 0 0 1 0 0 0 0 0 0 1 1 0 1 0 1 1 1 1 0 1 0 0 0 0 0 1 1 1 0 1 0 1 0 1 0 1 1 1 1 0 0 1 0 0 1 0 1 1 1 0 0 1 1 0 0 1 0 0 1 0 1 0 1 1 0 1 1 1 0 1 1 0 1 0 1 0 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0 0 0 1 1 1 1 0 0 1 0 1 0 0 0 0 0 1 0 1 0 1 1 1 0 1 0 1 1 0 0 0 1 0 0 0 1 1 0 1 0 1 1 1 1 0 1 0 0 1 0 0 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 1 1 0 1 1 0 1 1 0 1 1 0 0 0 1 1 1 1 0 0 0 0 1 1 1 0 1 0 0 1 0 0 1 0 1 0 1 0 1 1 1 1 1 1 0 0 0 0 1 1 0 1 0 1 1 0 0 0 1 1 0 1 1 0 1 1 0 0 0 0 1 0 1 1 1 0 1 0 0 0 1 0 0 1 1 0 1 1 1 0 0 0 1 1 0 1 0 1 0 0 1 1 0 1 1 1 1 1 1 0 1 0 1 1 0 0 1 0 0 1 1 0 1 0 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 0 0 1 0 1 0 1 1 1 0 1 1 0 1 0 1 1 0 0 1 1 0 1 0 1 0 0 0 0 1 0 1 1 0 1 1 1 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 1 0 1 0 1 1 0 1 1 0 1 0 0 1 0 0 1 1 0 1 0 1 1 0 1 1 1 0 0 1 1 1 0 0 1 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 1 1 1 1 0 0 1 1 1 0 0 0 0 1 0 1 1 0 1 0 0 1 0 0 0 1 0 1 0 1 1 1 0 1 0 0 

In [64]:
def bwt_matching(text, patterns, C=5):
    text += "$"
    bwt_text = bwt(text)
    partial_counts = get_counts(bwt_text, C)
    first_occurence = get_first_occurence(bwt_text)
    partial_suffix_array = get_partial_suffix_array(bwt_text, first_occurence, C)
    found = defaultdict(list)
    p5 = int(0.05*len(patterns))+1
    for i, pattern in enumerate(patterns):
        c = bwt_matching_positions(partial_suffix_array, first_occurence, bwt_text, pattern, partial_counts, C)
        found[pattern].extend(c)
        if i % p5 == 0:
            print(f"pattern {i} of {len(patterns)} processed")
    return found
        
def print_matches(matches):
    for k, v in matches.items():
        pos = " ".join([str(i) for i in sorted(v)])
        print(f"{k}: {pos}")

In [65]:
found = bwt_matching("AATCGGGTTCAATCGGGGT", ["ATCG","GGGT"])
print_matches(found)

pattern 0 of 2 processed
pattern 1 of 2 processed
ATCG: 1 11
GGGT: 4 15


In [67]:
with open("../data/dataset_303_4-3.txt", "r") as fin:
    lines = [line.strip() for line in fin]
    patterns = lines[1].split(" ")
    found = bwt_matching(lines[0], patterns, C=1)
    print_matches(found)

pattern 0 of 15322 processed
pattern 767 of 15322 processed
pattern 1534 of 15322 processed
pattern 2301 of 15322 processed
pattern 3068 of 15322 processed
pattern 3835 of 15322 processed
pattern 4602 of 15322 processed
pattern 5369 of 15322 processed
pattern 6136 of 15322 processed
pattern 6903 of 15322 processed
pattern 7670 of 15322 processed
pattern 8437 of 15322 processed
pattern 9204 of 15322 processed
pattern 9971 of 15322 processed
pattern 10738 of 15322 processed
pattern 11505 of 15322 processed
pattern 12272 of 15322 processed
pattern 13039 of 15322 processed
pattern 13806 of 15322 processed
pattern 14573 of 15322 processed
AGTGGAAAG: 
CATCGATCA: 
AGTGTTTAG: 
AATGACCAA: 339
GGCCCAAGG: 
CCAGGAGCC: 
AATGCGCAA: 
CCCACCGCC: 
TTCCGTCTT: 
CATACCTCA: 
AATAACGAA: 
ATCGTGAAT: 
ATATTGAAT: 
CTTTGGGCT: 
GTACACCGT: 
CTGGTGACT: 
GTAACGAGT: 
ACACCCGAC: 
AGCTTAGAG: 
GCGATAGGC: 
CCTTTAGCC: 
CTACGACCT: 
ACAAATCAC: 
TTAACGTTT: 
ACTAGTCAC: 
ACCAATAAC: 
CATGACGCA: 
TACATCATA: 
TCACGGTTC: 
ATGATAA

CTGTTGACT: 
AGGTGATAG: 
AAGACGGAA: 
ATACGAAAT: 
CCAAAATCC: 
CGGCGGGCG: 
CATCTAACA: 4635
TCAACAGTC: 
CGGCCGGCG: 
TCCCTAGTC: 
CCACCGACC: 8005
TCGCCCATC: 
TGAGTGTTG: 445 452 1648 1655 5100
GACATGGGA: 
TCCCACTTC: 
GCCTGAAGC: 
GGCTTACGG: 
AGCCTCTAG: 
ATAGTCGAT: 
TCTGACCTC: 
GTCGTACGT: 
CAGCGACCA: 
ATTAGGAAT: 
GAAGACCGA: 
GATAGTCGA: 
GAACAGTGA: 
GTGCCGTGT: 
ACCGTCTAC: 
GTTCGCCGT: 
GATTTCGGA: 
TGCGGGTTG: 
CTCTGGTCT: 
CAATTATCA: 
TTCATTATT: 
CACCACACA: 
CGTTAGGCG: 
ATGCTCTAT: 
CCGTGGTCC: 
ACATAAGAC: 
TATCAAATA: 
TAGACATTA: 
TGAGCGTTG: 
CTTAGCCCT: 
CCGTGTACC: 
AGACTCGAG: 
CATGTTCCA: 
TCGTGCTTC: 
CCAAAGCCC: 
CCCGAGCCC: 
CGGTAAGCG: 827
GCAGCCAGC: 
TCGACCCTC: 
GTCAGCCGT: 
CACATATCA: 
CGCTATGCG: 
AGGGCCCAG: 
GGGTGAGGG: 
TAATCACTA: 
TGTAGGGTG: 
CGGCGCTCG: 6621
TCAATAATC: 
AACTGCTAA: 623 630 3078 3515 3522
GCGTTGCGC: 7877 8689
CCATTAGCC: 
GGGAGTGGG: 
CCTACTGCC: 
GCCTGAGGC: 
CGATGTGCG: 
CTCGCTTCT: 
TCGGTCCTC: 
GTTCATAGT: 2413 2522 3026
CAGTTGGCA: 
GAATGGGGA: 
TTGCGTTTT: 
CTTGCGACT: 
AGGTTATAG: 
TGCTTG

CCAGGGTCC: 
CGAATCCCG: 
AGCATGGAG: 
AAGTTCTAA: 
GGGAGCTGG: 
GTCCGTCGT: 6657 9305 9312
CAAAGCTCA: 
CATGCTACA: 1091 7468
GCCAGGCGC: 
GTGAGACGT: 
CGGAAGTCG: 
CGGTGCCCG: 
CCACTTTCC: 
TGTAAGATG: 
CGTGTGGCG: 
GGGCTCTGG: 
TGTCTATTG: 
CTGGCAGCT: 
CCAACAACC: 
AGTGTCCAG: 
ACGTCGTAC: 
TCATTAATC: 
CTCCCGACT: 
CACGGTTCA: 
GGGAGGAGG: 
CCCAAACCC: 
TCCCGGTTC: 3285
CATCTCCCA: 
ATGCACAAT: 
GTTAATAGT: 
GGCTCCCGG: 
TAGTAACTA: 
TGCTCCATG: 
AGATTACAG: 
GTTTTGGGT: 
ACTCACCAC: 1379
CTCGAACCT: 7686
CTACTACCT: 
GTGCTGCGT: 
ACCGAACAC: 
ACTAACGAC: 3932
TCTAAAATC: 
GTTATAGGT: 
TCCTCCATC: 
AACTGTTAA: 
TTATTCCTT: 
GACAAAGGA: 
TTATTGATT: 
TACTTGATA: 4400 6184 8603
AGATACTAG: 
AATTCTAAA: 1695 1702 4015 7270
GTCACACGT: 5029
TTGCCCATT: 
TAAAGCGTA: 
GTCTAGTGT: 
GCAGGTAGC: 
GATACCTGA: 
GTATGGTGT: 
GATGATTGA: 
CACTATACA: 
AATCGCAAA: 
TGTAGCATG: 
CCCATTTCC: 
TACGATATA: 
AGTTGCAAG: 
CAGGCAACA: 
TCTTTCGTC: 
CTGCGCACT: 
TACGGCATA: 
AACTAGAAA: 
TCATACATC: 
CGCCGCCCG: 
ATCTCGAAT: 
ACACTTGAC: 
ACCATACAC: 
ATTGACCAT: 
CGAGCGTCG: 


GTATAGCGT: 
CGAGGTGCG: 
TGTTCTTTG: 
ATTAATTAT: 2672
GCCTGCAGC: 
TATCGAGTA: 
GCAGTACGC: 
TAGCTCCTA: 
GATTCCGGA: 
GAAATTAGA: 
GCCCTTAGC: 
GTACTCTGT: 
GGTGCTTGG: 
ACTGGTGAC: 
GGCCTCTGG: 
CCAAGCCCC: 
TAGCTTTTA: 
CACTCTACA: 
CAGTTTGCA: 
CCCCTCTCC: 
CACTGGGCA: 
TGCGCGTTG: 3094 7874 8686
TAGTCTATA: 
GTGAGAAGT: 
CAGTTCACA: 
TTAATGTTT: 
CCGCTCACC: 
TTCTATCTT: 
GAAACCGGA: 
TTAGGAATT: 
CCCAATACC: 
GGTGAACGG: 
ATCCACGAT: 
CGTCTCACG: 
AATCAAAAA: 
TTTGAGCTT: 
TTTTATTTT: 
AGGCTAGAG: 
CGTGTTACG: 
TGAGGTCTG: 
TTGGGATTT: 
GAGGGTTGA: 
AGTTGGTAG: 
GTACGAAGT: 
GACCAAGGA: 
TGAACAATG: 
CTATACCCT: 
CCTTCGACC: 
TGGGGTCTG: 
AACCAACAA: 
CGATACACG: 
AGTATTCAG: 
ACCCCAGAC: 
GCATGGGGC: 
TGACTCTTG: 
GCGAGTAGC: 
CTAAGTGCT: 
CTTTTATCT: 
CGTCTAGCG: 6817
TGTCCGATG: 3324
CCAACGCCC: 
ACCAATGAC: 
CCATGGGCC: 2477 5177 8359 8913
ACAGCACAC: 
TGGGGGATG: 673 680 2266 2904 6008
CGTCATTCG: 
TCAACTCTC: 
GTATTTGGT: 7569
AGATGCGAG: 
TCCGCGCTC: 
TCCGAAATC: 
ATCCTGGAT: 
CTCGACCCT: 
ATTCTCGAT: 
TAGATTCTA: 
AATCAAGAA: 
AGATAGGAG: 4219
A

GTACCCTGT: 
TGATTTTTG: 
CGACTGGCG: 
ATCCACTAT: 
TGTTAGGTG: 
ACAACTTAC: 
TCCTCTCTC: 
TGGGGCGTG: 
ATTAACTAT: 
ATAGTTAAT: 
ACGGGCCAC: 
GTTTACTGT: 
CGGTAAACG: 
GTACTGAGT: 8793
TTAATTCTT: 2459
TGCACATTG: 
CTGCCGCCT: 7440
TGTCGTGTG: 
AGATAGTAG: 
GTGCCCTGT: 2942
CCGTCTACC: 
TGAGTTGTG: 
CGTAATCCG: 
TTCGTAGTT: 
CGTAGGCCG: 
ACACCGTAC: 
AATCTCAAA: 
GATCCAAGA: 
TATTTTATA: 
ATTATTCAT: 
GATACTTGA: 4398 6182 6189 8601 8608 9025
CGTACCACG: 
ACGACTTAC: 
ATTATAAAT: 
CGTCACCCG: 
TTGGCGATT: 
AGTTTAGAG: 
AGGGCGAAG: 
CGGAAGCCG: 
ATGGATCAT: 
CGATACGCG: 
GATGGACGA: 
GGGTAGGGG: 
CCCAACCCC: 
TCAGTTGTC: 
TTACTCATT: 
CGTCGACCG: 
CTACCCTCT: 39 4890 7202 7352 7359 8934
ACCGTAAAC: 
CGACGTGCG: 
GGTAAGTGG: 
TACCTTTTA: 
GTTCAGCGT: 
ACGCAATAC: 
TCGAGGGTC: 
AGTCTGTAG: 
ACTATTAAC: 
TTACCTGTT: 
CGCGCTGCG: 
CTCCAAGCT: 
ACTTACGAC: 
TCGACCATC: 
AGGCGCCAG: 
TGTGCGGTG: 
TATTATTTA: 
TGCGCAATG: 
CTTAAATCT: 
ACAAAAGAC: 
GCACGGAGC: 
TTTAGGGTT: 
CATGACACA: 
TCAGTCATC: 
ACTCTCCAC: 
CGGACGCCG: 
GAGTTACGA: 
CGCCAGTCG: 2389 3557 5251 55

GCCGGCCGC: 
TCACACGTC: 5030
CGCATTACG: 3727 7708
TCGCGCATC: 
AGAAACAAG: 
TCCTGGGTC: 
CGATCAGCG: 
CCCTTCCCC: 
TGTATTGTG: 1258 6790 6926
GTTCTTAGT: 
TCGCATATC: 
CGAGGATCG: 
CCGGTCTCC: 
TAAACTTTA: 
TACTGACTA: 
TGGGAGGTG: 
GAGCGGCGA: 
TGTGTCCTG: 
CATCCGGCA: 
ATCGTACAT: 7404 9384
TCAGGATTC: 
GAAGTTGGA: 
TGCATGTTG: 
AAAAAACAA: 
GGAGCAGGG: 
AGAATAAAG: 
TTCTTACTT: 
GCTTATCGC: 
ATTTGCAAT: 
AGTTTTCAG: 
CAGCCCCCA: 
AGCGACAAG: 
CGTGGTGCG: 
AGGGTGAAG: 
GACCGGAGA: 
GAATCTAGA: 
CGAGCAGCG: 
TCTAATATC: 
GGTACTTGG: 
GCTGTGCGC: 
AGCATGTAG: 
ATACACCAT: 
TAAGTGATA: 
ACGGCCGAC: 327
AAAGCCTAA: 
CCCTACTCC: 6083
GGGGCCGGG: 
GCGCGTCGC: 
GCCACATGC: 
GCGGCACGC: 
ACCTGTAAC: 
GGGGCGAGG: 6430
GATTACTGA: 
AGGCGGAAG: 
ATGGGCGAT: 
GTTTATAGT: 
CTTTCTTCT: 
ATATCTAAT: 
GAACTGAGA: 
CTAGTCCCT: 
GTAAAACGT: 
TCTAATTTC: 
TGATCATTG: 
TAACCCATA: 
CGAGGCCCG: 
CGACAGACG: 
GCCCAATGC: 
GTCGAGCGT: 
GCCGGATGC: 
ATCACACAT: 
TAACTGATA: 
TAGATACTA: 
ACCTATGAC: 
GTCCAGTGT: 
TTGGTCATT: 
AAACGCAAA: 
AGAGGCGAG: 6038 6121
CTCGCTACT: 
AATTGCAA

TCCCATGTC: 
AAGAGCTAA: 
GAGTGCAGA: 
AATACCCAA: 
TGTAGCTTG: 
AGAGGGTAG: 
GATTACGGA: 
GTGTGCCGT: 
GGATAGCGG: 
TGAAGACTG: 
TAACCCTTA: 
GTAGCAGGT: 
TCATCGATC: 
TCATGATTC: 
AATGTTTAA: 
TCCGAGCTC: 
GATGGCAGA: 
ATGATCGAT: 603 5699 9457
GGAGCTTGG: 
TACAGGGTA: 
TGGGGATTG: 
CAAGGAGCA: 
GTAGATCGT: 
GTCACCTGT: 
TTATTAATT: 
GCTACGAGC: 
TTGTTCTTT: 
ATTTATGAT: 
ATGTTTGAT: 
GTTCGGGGT: 
TTCCTACTT: 
GGTGACCGG: 
GGTCTTAGG: 
ACGCACAAC: 
TCGTTCGTC: 
GAAACCAGA: 
GTAGGTCGT: 
TGCAACCTG: 
GTACCTGGT: 
TCTCAGATC: 4259
CCCTGCCCC: 
CCACGCTCC: 
GAATATCGA: 
GTGACAGGT: 
GTTACCTGT: 
ATCATGAAT: 
GCTACTGGC: 
GCTGTTTGC: 9505
CATGGCACA: 
GCACTCAGC: 
AAACCGGAA: 
TGTGTAATG: 
ATTATCTAT: 
TGCTCAGTG: 
ACGTCTAAC: 
ATGGCGAAT: 
ATTGCGGAT: 
AGGAAAAAG: 
TGCGACGTG: 
CATCTGTCA: 
TGGGCGATG: 
AAATGCTAA: 
AATTAACAA: 
GTATGTGGT: 
ACCAGACAC: 
CCGGGGACC: 
CAGAGTCCA: 
CAAACATCA: 
GTAGTCAGT: 
ATGATCTAT: 
AGGTAAGAG: 
TAACCAATA: 
GAAGAAGGA: 
TACGTTATA: 
ATTTGCCAT: 
TCACCCCTC: 
CGGTAATCG: 
ATTGAACAT: 
CGTTTGGCG: 
TTACCATTT: 
ACGGTTTAC: 
TTCGAGG

ATTCAAAAT: 
TGGCACGTG: 
ATATCAAAT: 
GGAGACGGG: 6276
TGAGGTTTG: 
TCGTCGCTC: 
CCGTTAACC: 
TGCCCAATG: 
CGGCGTCCG: 
GCGAAAAGC: 
AGATATGAG: 3203
AAACTAGAA: 
CTGTTTGCT: 9506
TGGCAAGTG: 3145 8282 9075
ATTAACAAT: 
GGGTCGAGG: 
CAAGATGCA: 
ATCGGGGAT: 
TTACTCTTT: 
ATGTAAGAT: 
TACGGGCTA: 
AAAGACGAA: 
TGTTATATG: 
TCGTCCCTC: 
AACTCTAAA: 
CCAGCGTCC: 
ATCAGCAAT: 
ACCGACCAC: 6449 8007
AAATTTCAA: 
TCACTCGTC: 
ATACCTTAT: 
ACTGTCCAC: 3497
TCCACGCTC: 
TGTACCGTG: 
AGTCAAGAG: 
CTAGCCCCT: 
TAACAGCTA: 
TACTTCCTA: 
TCATCCATC: 
GCGCCATGC: 
ATTTATTAT: 
GTTGTACGT: 
TAAAGCATA: 
GACGACGGA: 
GACCCCAGA: 
CACCTGCCA: 
GTCGCCTGT: 
AGCCGCCAG: 
CCTTAAACC: 
GCGGGAGGC: 
AATTTGGAA: 
GTTGGTTGT: 
CAAGAGACA: 
GTACGACGT: 1449
CGGGATGCG: 8421
ATAGAGTAT: 
GTGGCGAGT: 
TGACGAGTG: 
GCAAACTGC: 
CCGAGAGCC: 1907
GACAACGGA: 
TCGTCTGTC: 1471 6701 9841
CCTTCTGCC: 
GATCTATGA: 
CTGCACTCT: 6105 6883 8483
TCTGCACTC: 6104 6882
GAGTTTAGA: 
TTTCATTTT: 
CAGATCACA: 
TGGCACATG: 
GGTAGATGG: 6419 8214
AATCATAAA: 
AATTGGTAA: 
TCATACTTC: 
TGATGAATG: 
GGG

GGTGATTGG: 
GGGTACCGG: 1283
GCATAAAGC: 
GTCCACGGT: 
GTTTCTGGT: 
GAAGAATGA: 5852
CATCAACCA: 
TAAGTATTA: 
CCCAAGGCC: 
GCGTGGGGC: 
TGCATGGTG: 9624
TCATCGCTC: 
CGCTACCCG: 
CTTAGTCCT: 
ACGCGGGAC: 
AGCGTCGAG: 
GTTCTTGGT: 
TTGACGTTT: 
GAGCTAAGA: 
TTAATGATT: 
CTTATCCCT: 
AGGCTTCAG: 
GAGTTCCGA: 
ACTGGGCAC: 
CGATAGGCG: 
TCCACGGTC: 
GACAAGCGA: 
GTCAGATGT: 
CGGATCACG: 
AGGGTGTAG: 
GACTGTCGA: 
TCGGCGATC: 
ATGGCGCAT: 
AGTGGGAAG: 
ACGAACTAC: 
ACCCAAGAC: 
GCGGTCCGC: 
GATTGTGGA: 
CAACTGTCA: 
GAGTAGAGA: 
CTAGAAACT: 
GCGCAAGGC: 
CATCATGCA: 
CTTCTCGCT: 
TGCCTGTTG: 
AGGTTTCAG: 
GGGCCCAGG: 9189
GGTGCACGG: 
TTGGCTATT: 
GTACCTTGT: 
GCTCACTGC: 
TGGATGATG: 
AAGGTGAAA: 
CTGGGTACT: 
GAGCAACGA: 
AGTGTGTAG: 
GCAGGAGGC: 
GAATCGAGA: 
TTCAGTGTT: 
ACCTCGAAC: 4350 7684
TTCTCAATT: 
CTATTTCCT: 
GGTTGCAGG: 
GGCACTTGG: 
TCGCCCCTC: 
TCTGGCATC: 
TTATCTCTT: 
GCCCCATGC: 
AAGGTTCAA: 
AATAAATAA: 
GAGCATAGA: 
ACGAATTAC: 
TCATTCTTC: 3914
ACTGTTCAC: 
TGCCGAATG: 
TCTCTAGTC: 
CCATAATCC: 
ACAAAGAAC: 
AGATTCGAG: 
ATAGCTGAT: 
CTCTCGTCT: 