In [157]:
import numpy as np
from numba import jit
#from numba.types import string
#import matplotlib
#import tabnanny as nanners

class EM_algorithm(object):
    def __init__(self,Q,Q_0,w, seq_array):
        
        self.Q = Q
        self.Q_0 = Q_0
        self.w = w
        self.seq_array = seq_array
        self.number_of_sequences = len(self.seq_array[:])
        self.sequence_length = len(self.seq_array[1][:])
        self.dictionary = {'A':np.int(0), 'C':np.int(1), 'G':np.int(2),'T':np.int(3)}
        self.ith_seq = self.seq_array[0][:]
        self.ith_dic = np.array([self.dictionary[self.ith_seq[i]] for i in range(len(self.ith_seq))], dtype=np.int)
        
    def get_Q_row(self, c):
        if c == 'A':
            return 0
        elif c == 'C':
            return 1
        elif c == 'G':
            return 2
        elif c == 'T':
            return 3
        else:
            print("Not valid")
            return
    
    def z_ij(self):
        z = np.array([[0 for x in range(self.sequence_length)] for y in range(self.number_of_sequences)], dtype=np.double)
        
        for i in range(self.number_of_sequences):
            
            ith_sequence = self.seq_array[i][:]
            ith_dictionary = np.array([self.dictionary[ith_sequence[i]] for i in range(len(ith_sequence))], dtype=np.int)
            den = self.denominator(ith_dictionary)
            
            for j in range(self.sequence_length - self.w + 1):
                
                #num = self.numerator(ith_dictionary,j)
                num = jit_numerator(self.sequence_length,j,self.w,self.Q,self.Q_0,ith_dictionary)
                z[i,j] = np.divide(num,den)
        
        self.z = z
                
        return self.z
    
    def numerator(self,ith_dictionary,j):
        scale_factor = np.double(4)
        prod1 = np.double(1)
        prod2 = np.double(1)
        prod3 = np.double(1)
        
        if j>0:
            for row in range(0,j):
                q_row = ith_dictionary[row]
                prod1 *= np.multiply(self.Q_0[q_row,0],scale_factor)
        else:
            q_row = ith_dictionary[0]
            prod1 *= np.multiply(self.Q_0[q_row,0],scale_factor)
            

        i = 1
        for row in range(j,j+w-1):
            q_row = ith_dictionary[row]
            prod2 *= np.multiply(self.Q[q_row,i],scale_factor)
            i += 1
        
        for row in range(j+w,self.sequence_length):
            q_row = ith_dictionary[row]
            prod3 *= np.multiply(self.Q_0[q_row,0],scale_factor)
            
        return np.multiply(np.multiply(prod1,prod2),prod3)
    
    def denominator(self,ith_dictionary):
        accumulator = np.double(0)
        #def jit_numerator(ith_sequence,j,w,Q,Q_0):
        for k in range(self.sequence_length - self.w):
            #accumulator += self.numerator(ith_dictionary,k)
            accumulator += jit_numerator(self.sequence_length,k,self.w,self.Q,self.Q_0,ith_dictionary)
        return accumulator 
                
        return accumulator
    def update_Q_z(self,num_updates):
        self.z = self.z_ij()
        
        for i in range(num_updates):
            self.Q = self.Q_estimate()
            self.z = self.z_ij()     
        return
    def Q_estimate(self):
        q_est = np.array([[np.double(0) for x in range(self.w)] for y in range(np.int(4))], dtype=np.double)
        den = self.q_denominator()
        chars = np.array(['A','C','G','T'])
        
        for k in range(self.w):
            for b in range(4):
                num = self.q_numerator(k,chars[b])
                q_est[b,k] = np.divide(num,den)
                
        return q_est
    
    # Check here for errors.... k and j might be mixed up
    def q_denominator(self):
        accumulator = np.double(0)
        for i in range(self.number_of_sequences):
            for j in range(self.sequence_length - self.w):
                accumulator += self.z[i,j]
                
        
        return accumulator
    
    def q_numerator(self,k,c):
        accumulator = np.double(0)
        for i in range(self.number_of_sequences):
            seq_of_interest = self.seq_array[i][:]
            for j in range(self.sequence_length - self.w):
                if seq_of_interest[j + k - 1] == c:
                    accumulator += self.z[i,j]
                    
        return accumulator
    
    
    def string_EM(self,num_similar):
        index = np.int(0)
        s = []
        for row in range(self.number_of_sequences):
            ith_seq = self.seq_array[row][:]
            a = self.z[row,:]
            b = self.z[row,:]
            a = sorted(a)[::-1]
            for i in range(num_similar):
                ind = b.tolist().index(a[i])
                s.append(self.seq_array[row][ind:(ind+self.w)])
                index += 1
        return s
            
            
@jit(nopython=True)
def jit_numerator(sequence_length,j,w,Q,Q_0,dictionary):
        
        scale_factor = np.double(4)
        prod1 = np.double(1)
        prod2 = np.double(1)
        prod3 = np.double(1)
        
        if j>0:
            for row in range(0,j):
                q_row = dictionary[row]
                prod1 *= np.multiply(Q_0[q_row,np.int(0)],scale_factor)
        else:
            q_row = dictionary[row]
            prod1 *= np.multiply(Q_0[q_row,np.int(0)],scale_factor)
            

        i = np.int(1)
        for row in range(j,j+w-1):
            q_row = dictionary[row]
            prod2 *= np.multiply(Q[q_row,i],scale_factor)
            i += 1
        
        for row in range(j+w,sequence_length):
            q_row = dictionary[row]
            prod3 *= np.multiply(Q_0[q_row,0],scale_factor)
            
        return np.multiply(np.multiply(prod1,prod2),prod3)

    


#    def string_EM(self,num_similar):
#        function [ s ] = string_EM( seq_array, Acon,w, num_similar )
#        index = 1;
#        for row = 1:num_sequences
#        seq_interest = seq_array(row,:);
#        [val, ind] = sort(Acon(row,:), 'descend');
#        for i = 1:num_similar
#        
#       s(index,:) = seq_interest(ind(i):(ind(i) + w -1));
#        index = index + 1;
        


In [158]:
w = np.int(7)
Q = np.array([[np.double(0.25) for x in range(w)] for y in range(4)], dtype=np.double)
Q_0 = np.array([[np.double(0.25) for x in range(1)] for y in range(4)], dtype=np.double)
seq_array1 = np.array(['TTAACCGAAAGTAGAAAATTAACCGCTTGAGCACTTATTTCCTGCTTAACCGGCATGTCATATGTTAATTTTTAACCGTCCTCAACTGCGCTGAATACGTCCTGTCAATTCAAATATATCACGTTGTGAGCAGCCCTGAAGAAGAAAACTTAACCGCTCAACAGCAGTTTAACCGATTACTATTACAATCAAACAACTTTAGTGCCGCGTGATACCGGGGGTTGAAGTGGGTGCATTGAGCCGTATTCTTCTTCCCCGTAAGAAAGTTGTGTATCCTTTTTACTGCGTTGTAATAGCTTCTGAAAACCTAAAAAATGAACGCTATGTAGCTCATATCCGTTTTGCATAAGTAAGAATAACTACTTGTGCAGGGTGCCGAAAGGGATGGAAAACCGCTGCAGCAACCCTTGTTACATACAGTCGGATCCATCTGACTTACTTTCCTTGCGTCTCCCTGCGCGATTTTGTTGGCCATTTTCCAGATCCTCTAGAATTTTTCAAGGGTCGAGCCGTAGGAGGATTCTCTCAGAAGGCAAAAACGCATCGAAAGCGTGCTTTGTAAGAATATTTGGTATGGCTAAAGTAAGCAAAGCCATATCCCGATCCCGATCCCGACTCTTATTCCGATCCCTTCCGCCACATCCTGCATGTTTATTCGAATACCAAATTAGCTCATCTTCGTTATTTCATCATCCCTTTCTGCTATGGCAAGGACAAGTTTTTTTCTAGCATCTCATCGAAAACTTTCCTCTCCCTAATTGGCCAAAGTTTTCATATTCATCATCAGTTAGAAAGTATAATATCAATCCCTTACCTCATTACAAGTTGTATCACACTAAAAAAATCATATATAAGTCTGTGAGAGTCTTCAATTATTTAGCGTAACACCTATTCACTTTCTAATCTTGTTTCTTGTTTTTACATTCTGCAATACAACACAACAACAAATATTAACTCAATTATTATTATTTATAATTACAAAAACAAAACAACAAGTTTGAGACTTTAATATCTTTTGATTACTAAAAACAACAAATTTCAATTAACCG',
              'ACCTGTAACTATGTTGGCACAAACGAAAAAAGTTTTGAGCTTAAGCACGCCTGAAAAGAAAATGTCCAAGAGTGTTAACCGATCCGTTAACCGAATCACGACTCTGTAATTTTTTTGTTAACCGAATGATGAGCCTAAGGTTAACCGTTAACCGCAATTCAGAAAAAGATTAGAAAAGCTTTAACATTAACCGGATTCCATTTCTGATAGGTTCTACTACGACCCTGTAGAAAGACCAGGCGTATCGAATTTAATTAACATTGTCAGTGGCATTCAAAGAAAATCGATTGAAGATGTCGTAGAAGATGTATCTCGTTTCAATAACTATAGGGATTTCAAAGATTATGTTTCAGAAGTGATAATTGAGGAATTGAAAGGCCCAAGAACAGAATTTGAGAAATATATCAACGAACCGACCTATTTGCATAGTGTCGTTGAATCTGGCATGCGCAAAGCGAGAGAAAAAGCAGCAAAAAACCTGGCCGACATTCATAAAATAATGGGCTTCTGACTGTCCCCGGCTTTAGGCTGCTGCAAACGCAATGTAAATAATAATACAGTTTAACTTGTACGTATCTTTGTTATTTCAGAATATGCAAGATTCCTCACTCGGCAGATCTCATCAATAAACGTTAGTATCTTGAAAAATACGCAGATGTTGGTACAGTCTCTTGCAGAGGGCCAGCACCGTTAGAATGTTGTGTTGCCACCGTCTGGCTCGAAAACAGGTCGATCTGTGTAAGTATTGTCCTGGGTGCAGTTATTTGTAGTAGCGATTTTCGCGGGCAGTGGAAAAGAACGGGTGCAGTGCGGCGATGAGTGCAATTAGCTGCCGAAGGGTTTGCACCGGTGATTCCGAGACCTTTTGCACACACTTGTATATATTTAGGGAAGTCACGGTAACTAATGGATTTCTTCTCAAATTTGAGACATATGGATTAGACTTTAATTCTGCCTGGTACTAAGCTACTTTTCAAAGGTATTTGAGATATTTCGTAAATTTTGCGGCCAAGGAGTTGAGGTGTTTAATACTTTTAACCGAAAACGAA',
              'GTGCAGACTAATGAAAAAGACCCCAGACCCATCTCCACCTTTTGCAAGCACGAAGAACGTAGGCATGTCAAACGAAGAACTTAACCGCGGAAAAAATGGTTAACGATCGAATTGTGGTGAAGGCCATCGAGCCAAAAGATGAGGAAGCCTGGAACAAACTGTGGAAGGAGTATCAGGGTTTCCAAAAAACGGTTATGCCTCCGGAAGTAGCCACCACTACCTTCGCAAGGTTTATAGACCCTACGGTCAAACTATGGGGTGCTCTAGCCTTTGACACCGAGACCGGCGATGCAATTGGCTTTGCACACTACCTGAACCATTTGACGTCGTGGCATGTCGAAGAAGTTGTCTACATGAATGACTTATATGTCACTGAACGCGCAAGAGTCAAAGGCGTAGGTAGAAAGCTCATAGAATTTGTATACAGCCGTGCCGATGAGCTCGGTACGCCTGCAGTGTACTGGGTTACAGACCATTACAATCACCGCGCACAGTTGCTGTACACCAAAGTGGCTTACAAGACAGATAAGGTTCTTTACAAGAGAAACGGATACTGAAACCGCTGTGTAATGTAGACTACTGTGTATAGAGATAACTCTAGCTTCTATAGGCTCACTTGCGAAGCTTTTTTCCCACTTTTTCCCAGTGAAATAGTTTCTTATTGGCTTTTCATAGTTTGTTCTCCGATTTAATCGCAAGCGCCGTACGGCGCAGGCACAGAATGACTTGCAGCTTAATCCAGGTGCATTTTAAGAAACTTATCTGCTGGGAAGACCCCGAAGTTCATCGTTTACAATGTGCCGTAAAGAACACATTTGAGGGTGCAATCTGGTTCGTCTTTTGCCATAATTAGTAGTGCTCATCCGTGATCCTTTAATTTACTTTTGTGATGCGATGATTTGCAGATACTATATAAGCAATGGACACCGTCCATCACCGATGAGGCTAGAACATCGTAGTGCTTTTAACCGTTAACCGTTAACCGTTAACCGTGTAAAAGAACTGTTAAAGACTTAACCGCCAGTACGAAAATTTTTTAACCGTCCATA',
              'AAGCCACTCGACAGCGTCTTCCAGTATAACGTTGACGAAGTCGTCGAAACCAACTAACGTGCCCTCGAACTCGCGGTTCGACTGCATTAACCGGCACAATCAACACTTTCTGGTTAATTGTTTTAACCGTTATCTATGACTTCCAAAGGCAAAATCTCCGGTAGACTCATTTTGCGCGCTTTAACCGTCTGTGTATGCTTGTGAGTGCACGTGTATACGCTATGCTTGTTGTGAGACCATCTATCCGCCAAGGAAGCCTTTTTTTAACCGCGCTTTCTCTGTATTTTTTCTACTTTAACCGTCAATAAGAAAGCAAGTATTCTGTTTTGCTACCCGGATGCAAAGACAAAACCCGAGCATCCGGGTAATAAAAGAGCAGGGTCCGGCTGAAGGTGCCAAAAAAGACCAAAAAGGGCTATATTATCTCTCTCTTACGATTGCCTCTGGTTAAAAAATGAAATAGATAATAAAAGGGCAGCAAAGAAAAAAAAGAAGAATACAATATACGTATACGTTCTCAACAATTTTTCGTCCTGTTTTTTTCTTGACCACGGTCACTACTAACTCCTGAGACATCAGACCAACGTGTAGAAAAAAAGAAATCGGGTCCGGCACTGACACACGCTCTGCGCCAATCAACCAGGGCGCATCGAGCATTCTTTCCAGGCCAGGGTGCAGAAATACTATTATTAAGTGCGCGAATACTGCTGGTGCATTAAACGAGCGAGTACAGTGAGCCCGCCGCTGACTGCCGACTCACGCCAGGCCGCTATTTCCTTCCCCCGCCCGCGTAAGCCGTGCCGAGCACCGTCGCAGCAGTAAGAAGGTGCGCATTTCATTCTCTGCACAGCGTAGATGAATGCACCCGATTTCTAAGTTGGTGTTGCAAGGCCCGGTGGAAGCGATGGGCTGTAGTTCGACCTTGAGCTTTTAGTTATATAAAATGCTCAATGTGAGCAAAAAAATACATAACACGACGGATTGTACGTTGTCGAAATCTTCTCTCAGCAGGTCATCACACATATACTTCCCGCCTTAACCGTTAACCG',
              'CCAATTAACCGTTAACCGACCTGAATCCTTAATTTACGGTTAACCGTACAGCGCCCGGTCATGTTCCCCGATTGCCTGTTTCAAAAACGATAGCAGCACGGAAATAACAATCACTGCTTCCAAGCACTCTCTGAACACAACGAAGAAAACGGCCACGTTAAACACTTTGTTAGGCATGGCGGGAAGTATATGTGTGATGACCTGCTGAGAGAAGATTTCGACAATTAACCGCGTACAATCCGTCGTGTTATTTAACCGGTATTTTTTTGCTCACATTGAGCATTTTATTTAACCGTTAACCGATAACTAAAAGCTCAAGGTCGAACTACAGCCCATCGCTTCCACCGGGCCTTGCAACACCAACTTAGAAATCGGGTGCATTCATCTACGCTGTGCAGAGAATGAAATGCGCACCTTCTTACTGCTGCGACGGTGCTCGGCACGGCTTACGCGGGCGGGGGAAGGAAATAGCGGCCTGGCGTGAGTCGGCAGTCAGCGGCGGGCTCACTGTACTCGCTCGTTTAATGCACCAGCAGTATTCGCGCACTTAATAATAGTATTTCTGCACCCTGGCCTGGAAAGAATGCTCGATGCGCCCTGGTTGATTGGCGCAGAGCGTGTGTCAGTGCCGGACCCGATTTCTTTTTTTCTACACGTTGGTCTGATGTCTCAGGAGTTAGTAGTGACCGTGGTCAAGAAAAAAACAGGACGAAAAATTGTTGAGAACGTATACGTATATTGTATTCTTCTTTTTTTTCTTTGCTGCCCTTTTATTATCTATTTCATTTTTTAACCAGAGGCAATCGTAAGAGAGAGATAATATAGCCCTTTTTGGTCTTTTTTGGCACCTTCAGCCGGACCCTGCTCTTTTATTACCCGGATGCTCGGGTTTTGTCTTTGCATCCGGGTAGCAAAACAGAATACTTGCTTTCTTATTGAAGTAGAAAAAATACAGAGAAAGCGAAAAAGGCTTCCTTGGCGGATAGATGGTCTCACAACAAGCATAGCGTATACACGTGCACTCACAAGCATACACAGAAGCGCGCAAA',
              'AAGGACAGAGAAATTCATTTAACCGTTAACCGTTAACCGTTAACCGTTAACCGTTAACCGTTAACCGATTAAGAGAGCTAGAACTCCAGGCCAAATGCAAAGAGGAGGTTTCAGAGGCAGAGGCGGTTTCAGAGGCAGAGGAGGTTTTAGAGGAGGTTTCAGAGGCGGCTACAGAGGAGGTTTCAGAGGCAGAGGGAACTTCAGAGGTAGAGGCGGCGCCAGAGGTGGTTTCAATGGACAAAAAAGGGAAAAGATTCCATTAGACCAAATGGAAAGATCAAAGGATACCTTATATATTAATAACGTCCCATTCAAAGCTACCAAAGAGGAGGTCGCTGAATTTTTCGGTACTGACGCCGACTCCATCTCTTTGCCAATGAGAAAAATGAGAGACCAACACACTGGTAGGATCTTCACATCCGATTCTGCTAATAGAGGTATGGCATTTGTCACTTTCAGTGGTGAAAACGTTGATATTGAAGCTAAAGCTGAAGAATTTAAAGGCAAGGTTTTCGGTGACAGGGAGTTAACTGTAGATGTTGCTGTTATTAGACCAGAAAATGATGAAGAAGAAATTGAGCAAGAAACTGGTTCTGAAGAAAAGCAAGAATAATTACTTCTTACCCACATCCCTATTTCTAACTTGAGTTTTTGCTAGAGTTTTGTATTTTTGTTCACCTTCCCTGCAAAAGAAATATGTGTATTTATATATGCGTGTATACCTATATATGATATGTAAAAATGAGACGCCCCTGTTTTATTTTCAAACACTTCCCCGTATAGTTTTTTGCAATGACACTACTTTAACTTCTTCGACATGATTTGCTTTAGCACTACGAAGGATTGCATCATAACGTTTCGAAAGGGGTGCACTTTTAAAAACCAGTAAGTGAGTGCCTCGTGAACTGCTATTTTCGTATTTTGAAAAAAAAAATAAAAAAAAACTCCCTTATATATATATATAAATATCTATGTACTAAATGTCAAATCGCTAGCTCTCACCTATATCTTATTCATGCTGCAACCTCAATGGTTCACTACCTCGGAGA',
              'ACAGAATTTCAAGAGTAGCTTTTAACCGTGCAGATAGCCATGTTGATGAGCAATTTAACCGACTTTGCGTACAAATACAACCTTTTGTTAACCGCAGATTAACCGCATCGTATATGGCCATGTGCACAGCTAATTTTTCTCACAATTGGTGCCAATTATATGTGGATATGAGATATCTAATAGAACGAGATGAAAAGCTGTATAGAATAAAAGAATTAACAAGGAACCTGCTGGAAACTAAACTGAACATGAAATTTAACCGATCGTATTAACCGTTAACCGTTGTCTGTCAACTAATCAGGCATCAATTGACCGAATTTCGTGAGAACGAGAGAAATCCATCGTGGGACGCCACCATTGAGAAACTGCTACCTTATATCTTGAAGGAAATTGTTCGTCCACTGCAGAAAATAAGAGGTGAAGAAGGTTCCCGTTACTTGTTGTCTTTCTTAAACTTTTTATATAACGATTGCGTGACAAAGGAAATATTAAAATGGCAGATTATCTCTGAGGTCAATTCCGAAAATTTGGGTGAGCTCGTATCTTTATTAGTGAACAACACCGATATACAATTATTAGCGAAGGAACCAAGTTACAAAAAGATGAGAGAAAAATTCGCCACTATGGGCAAGTTTCTACCATTACATTTAAAGGAGATTATGGAGATGTTTTACAATGGGGATTTTTATCTTTTTGCGACAGACGAACTAATCCAATGGATAGAGTTATTGTTTGCCGACACTCCCCTGCGAAGGAATGCCATTGATGATATTTACGAAATTAGAGGCACTGCTCTAGATGATTAAAAAATAATTAATCGTAGGATAGGTTACATGATCATTGCAATAGATGTACATCTTGTATTACGTATGCGTAAGTTGTCTTTTTTTCAGTTTTCTAATAAATTTCACGTGATGCGGTAGTTTTTTTTGGGGGTGCAAAGTTGGCACATCTTGTAGTGCATTTTTGAGCTTCTTAAATGACTTGAATCATCTTAAAACATTGCAGGATGAGATTTCCAACGACACAAGAGAGAACTAGCGCAAAAG',
              'TTCGTTATGCTGGACGCTCTCAAAACCCCCATGGCATTAACCGTTAACCGTTCACAGGTCTGATGGCCATCAGAGCAGGCCTTGCAAATCTCGTTATAACAGGAAGCATCAGGTTAAATATGTGTTATGTGTTGTGTTTTCCTTATTCAATTGTTATATGAAGGGATGTTCTTATGGATTTAAGAATAATCTCTGAACACAAATATATATGCGGAAGAGCGAAGGAAGAGAGCAGAAAAGGCTACAACGAAACCCTTTCATTTACTTATATAAATAGCCTGAGTCATTTTTCCTCTCAATTTTTGCATCATCATGATCATACATGCTTTTTTTTTCCTTCTCCATGGCTAATTGGGGTGCAACGCACCTTATCGCCTTTGACGGCCTTGGCTAAGCGGAGATAAAGCCTCCGAACTTCGGCCCCGAAGGGATTGGCGCGGATATACACGGAAATAGCTTGTCATTTTTCCTTCGGTTGATGTCCGCGGACCGAGAAGTGGTGTTTCCACATTACGTCTCAGTGATGCAATCATCATGGCACGGGCATTAATTAAGGTTAAGGAACCACTTCTCTACGCCATACTCCAAACAATAACAATATTAGACCTAGCCAATTGATACCCATGAGCGTGCAGTCATTGCCATGGATTATTCATCTCTTTTCGCTGTTCTCTTTCTTTTCAAGTTTGTCATCATCATGCCTTCACTTTTGCCTTTCCATCTTTCTTTTTGCTGCAAAATAAAGGGAAGAGGGGTAAAAACGCAAGGAAGAACAAGAAGAAGAGGGTAGTGCAAGAAAAAGAAAAGAAGAAAAAAAAAAAAAAGTAATCTTGATACCGTGAGCAAATAAGCTAACGGAAAGCGTAAGAAAGAAGAGCGTGTTTTGGGAAATAACACCACAGCATAAAGCTAAAATTCAGTTTATATAATCTATAGTAGTCCTATAGAAATTGTTAACCGCGAATAATTAACCGCGGAAACAATTAACCGTAGTCCACCTTAACCGAAAGCAAGCATTAACCGTAGGGAGTGGAGATAGCATCTAGGTT'
              ]);
seq_array2 = np.array([
    'CAGTGCTAATTTAACCGCGGATCGATTTAACCGTTAACCG',
    'GGCTATTCGATTTAACCGTTAACCGGCTATCGGGTTATAT',
    'TTAACCGGCTGATTAACCGCCGGCTTATCGTTAACCGTTA'
]);
em_test = EM_algorithm(Q,Q_0,w,seq_array1)
em_test.z_ij()
%prun em_test.update_Q_z(40)
em_test.z
s = em_test.string_EM(2)
print(s)

 ['AAAAAAT', 'AAAAAAT', 'AAAAAAG', 'AAAAAAG', 'AAAAAAT', 'AAAAAAT', 'AAAAAAG', 'AAAAAAG', 'AAAAAAA', 'AAAAAAA', 'AAAAAAG', 'AAAAAAG', 'AAAAAAT', 'AGAAAAT', 'AAAAAAA', 'AAAAAAA']
