In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def _seqs_to_x_lc(seqs, alphabet, 
                  return_features=False,
                  verbose=False, 
                  seq_to_print=0, 
                  features_to_print=20):
    
    # Get N, L, C
    N = len(seqs)
    L = len(seqs[0])
    C = len(alphabet)
    
    # Get vectors of unique lengths and characters
    l_uniq = np.arange(L).astype(int)
    c_uniq = np.array(list(alphabet))
    
    # Get (N,L) matrix of sequence characters
    seq_mat = np.array([list(seq) for seq in seqs])
    
    # Create (L,C) grids of positions and characters
    l_add_grid = np.tile(np.reshape(l_uniq,[L,1]),[1,C])
    c_add_grid = np.tile(np.reshape(c_uniq,[1,C]),[L,1])
    
    # Create (N,L,C) grid of characters in sequences
    seq_add_grid = np.tile(np.reshape(seq_mat,[N,L,1]),[1,1,C])

    # Compute (N,L,C) grid of one-hot encoded values
    x_add_grid = (seq_add_grid == c_add_grid[np.newaxis,:,:])
    
    # Compute number of features K
    K = L*C
    
    # Compute flattened lists positions and characters
    l_add = l_add_grid.reshape(K)
    c_add = c_add_grid.reshape(K)
    
    # Create one-hot encoded matrix to return
    x_add = x_add_grid.reshape(N,K)
    
    # Print features if requested
    if verbose:
        n=seq_to_print
        print(f'x[{n}] = {seqs[n]}')
        ix = x_add[n,:]
        cs = c_add[ix]
        ls = l_add[ix]
        k_max = min(ix.sum(), features_to_print)
        for k in range(k_max):
            name = f"x[{n}]_{ls[k]}:{cs[k]} = True"
            print(name)
    
    # If return features, create list of feature names and return with x_add
    if return_features:
        feature_names = [f'x_{l_add[k]}:{c_add[k]}' for k in range(K)]
        return x_add, feature_names
    
    # Otherwise, just return x_add
    else:
        return x_add

In [3]:
def _seqs_to_x_lclc(seqs, alphabet, 
                    return_features=False,
                    verbose=False, 
                    seq_to_print=0, 
                    features_to_print=20,
                    feature_mask='pairwise'):
    
    # Get N, L, C
    N = len(seqs)
    L = len(seqs[0])
    C = len(alphabet)
    
    # Get vectors of unique lengths and characters
    l_uniq = np.arange(L).astype(int)
    c_uniq = np.array(list(alphabet))
    
    # Get (N,L) matrix of sequence characters
    seq_mat = np.array([list(seq) for seq in seqs])
    
    # Get additive ohe
    x_add = _seqs_to_x_lc(seqs,alphabet)
    
    # Create (L,C) grids of positions and characters
    l1_grid = np.tile(np.reshape(l_uniq,[L,1,1,1]),[1,C,L,C])
    c1_grid = np.tile(np.reshape(c_uniq,[1,C,1,1]),[L,1,L,C])
    l2_grid = np.tile(np.reshape(l_uniq,[1,1,L,1]),[L,C,1,C])
    c2_grid = np.tile(np.reshape(c_uniq,[1,1,1,C]),[L,C,L,1])
    
    # Get indices for collapsing dimensions
    if feature_mask=='neighbor':
        keep = (l1_grid==l2_grid-1)
        K = int((C**2)*(L-1))
    elif feature_mask=='pairwise':
        keep = (l1_grid<l2_grid)
        K = int((C**2)*L*(L-1)/2)
    else:
        print(f'Invalid feature_mask={feature_mask}')
    assert K == keep.ravel().sum(), f"K={K} doesn't match keep.ravel().sum()={keep.ravel().sum()} "
    if verbose:
        print(f"K = {K} features")
    
    # Compute ohe for features
    x_add1 = x_add.reshape(N,L,C,1,1)
    x_add2 = x_add.reshape(N,1,1,L,C)
    x_pair = (x_add1*x_add2)[:,keep]
    
    # Print parameters
    l1_pair = l1_grid[keep]
    l2_pair = l2_grid[keep]
    c1_pair = c1_grid[keep]
    c2_pair = c2_grid[keep]
    
    # Print features if requested
    if verbose:
        n=seq_to_print
        print(f'x[{n}] = {seqs[n]}')
        ix = x_pair[n,:]
        c1s = c1_pair[ix]
        l1s = l1_pair[ix]
        c2s = c2_pair[ix]
        l2s = l2_pair[ix]
        k_max = min(ix.sum(), features_to_print)
        for k in range(k_max):
            name = f"x[{n}]_{l1s[k]}:{c1s[k]},{l2s[k]}:{c2s[k]} = True"
            print(name)
    
    # If return_features, create a list of feature names and return with x_pair
    if return_features:
        feature_names = [f'x_{l1_pair[k]}:{c1_pair[k]},{l2_pair[k]}:{c2_pair[k]}' for k in range(K)]
        return x_pair, feature_names
    # Otherwise, just return x_pair
    else:
        return x_pair

In [4]:
def _seqs_to_x_lclclc(seqs, alphabet, 
                      feature_mask="triplet",
                      return_features=False,
                      verbose=False, 
                      seq_to_print=0, 
                      features_to_print=20):
    
    # Get N, L, C
    N = len(seqs)
    L = len(seqs[0])
    C = len(alphabet)
    
    # Get vectors of unique lengths and characters
    l_uniq = np.arange(L).astype(int)
    c_uniq = np.array(list(alphabet))
    
    # Get (N,L) matrix of sequence characters
    seq_mat = np.array([list(seq) for seq in seqs])
    
    # Get additive ohe
    x_add = _seqs_to_x_lc(seqs,alphabet)
    
    # Create (L,C) grids of positions and characters
    l1_grid = np.tile(np.reshape(l_uniq,[L,1,1,1,1,1]),[1,C,L,C,L,C])
    c1_grid = np.tile(np.reshape(c_uniq,[1,C,1,1,1,1]),[L,1,L,C,L,C])
    l2_grid = np.tile(np.reshape(l_uniq,[1,1,L,1,1,1]),[L,C,1,C,L,C])
    c2_grid = np.tile(np.reshape(c_uniq,[1,1,1,C,1,1]),[L,C,L,1,L,C])
    l3_grid = np.tile(np.reshape(l_uniq,[1,1,1,1,L,1]),[L,C,L,C,1,C])
    c3_grid = np.tile(np.reshape(c_uniq,[1,1,1,1,1,C]),[L,C,L,C,L,1])
    
    # Get indices for collapsing dimensions
    if feature_mask=='markov3':
        keep = (l1_grid==l2_grid-1)*(l2_grid==l3_grid-1)
        K = int((C**3)*(L-2))
    elif feature_mask=='triplet':
        keep = (l1_grid<l2_grid)*(l2_grid<l3_grid)
        K = int((C**3)*L*(L-1)*(L-2)/6)
    else:
        print(f'Invalid feature_mask={feature_mask}')
    assert K == keep.ravel().sum(), f"K={K} doesn't match keep.ravel().sum()={keep.ravel().sum()} "
    if verbose:
        print(f"K = {K} features")
    
    # Compute ohe for features
    x_add1 = x_add.reshape(N,L,C,1,1,1,1)
    x_add2 = x_add.reshape(N,1,1,L,C,1,1)
    x_add3 = x_add.reshape(N,1,1,1,1,L,C)
    x_pair = (x_add1*x_add2*x_add3)[:,keep]
    
    # Print parameters
    l1_pair = l1_grid[keep]
    l2_pair = l2_grid[keep]
    l3_pair = l3_grid[keep]
    c1_pair = c1_grid[keep]
    c2_pair = c2_grid[keep]
    c3_pair = c3_grid[keep]
            
    # Print features if requested
    if verbose:
        n=seq_to_print
        print(f'x[{n}] = {seqs[n]}')
        ix = x_pair[n,:]
        c1s = c1_pair[ix]
        l1s = l1_pair[ix]
        c2s = c2_pair[ix]
        l2s = l2_pair[ix]
        c3s = c3_pair[ix]
        l3s = l3_pair[ix]
        k_max = min(ix.sum(), features_to_print)
        for k in range(k_max):
            name = f"x[{n}]_{l1s[k]}:{c1s[k]},{l2s[k]}:{c2s[k]},{l3s[k]}:{c3s[k]} = True"
            print(name)
    
    # If return_features, create a list of feature names and return with x_pair
    if return_features:
        feature_names = [f'x_{l1_pair[k]}:{c1_pair[k]},{l2_pair[k]}:{c2_pair[k]},{l3_pair[k]}:{c3_pair[k]}' \
                         for k in range(K)]
        return x_pair, feature_names
    # Otherwise, just return x_pair
    else:
        return x_pair

In [5]:
def _validate_seqs(seqs, alphabet, restrict_seqs_to_alphabet=True):
    """
    Makes sure that seqs is an array of equal-length sequences
    drawn from the set of characters in alphabet. Returns 
    a version of seqs cast as a numpy array of strings. 
    """

    # Cast as np.array
    if isinstance(seqs,str):
        seqs = np.array([seqs])
    elif isinstance(seqs,list):
        seqs = np.array(seqs).astype(str)
    elif isinstance(seqs,pd.Series):
        seqs = seqs.values.astype(str)
    else:
        assert False, f'type(seqs)={type(seqs)} is invalid.'
    
    # Make sure array is 1D
    assert len(seqs.shape)==1, f'seqs should be 1D; seqs.shape={seqs.shape}'
        
    # Get length and make sure its >= 1
    N = len(seqs)
    assert N >= 1, f'N={N} must be >= 1'
        
    # Make sure all seqs are the same length
    lengths = np.unique([len(seq) for seq in seqs])
    assert len(lengths==1), f"Sequences should all be the same length; found multiple lengths={lengths}"
    L = lengths[0]
    
    # Make sure sequences only contain characters in alphabet
    if restrict_seqs_to_alphabet:
        seq_chars = set(''.join(seqs))
        alphabet_chars = set(alphabet)
        assert seq_chars <= alphabet_chars, \
            f"seqs contain the following characters not in alphabet: {seq_chars-alphabet_chars}"

    return seqs

In [6]:
def get_gpmap_features(seqs, alphabet, gpmap_type="additive", restrict_seqs_to_alphabet=True):
    """
    Compute model features from a list of sequences.
    For sequences of length L and an alphabet of length C,
    the number of features K is, for different values of
    gpmap_type,
        "additive": K = 1 + L*C
        "neighbor": K = 1 + L*C + (L-1)*C*C
        "pairwise": K = 1 + L*C + (L*(L-1)/2)*C*C
        "markov3":  K = 1 + L*C + (L-1)*C*C + (L-2)*C*C*C
        "triplet":  K = 1 + L*C + (L*(L-1)/2)*C*C + (L*(L-1)*(L-2)/6)*C*C*C
    
    parameters
    ----------
    
    seqs: (str or array of str)
        Array of N sequences to encode
        
    alphabet: (array of characters)
        Array of C characters from which to build features
    
    gpmap_type: (str)
        The type of G-P map to create features for.
        Must be one of ["additive","neighbor","pairwise",
        "markov3","triplet"].
    
    restrict_seqs_to_alphabet: (bool)
        Whether to throw an error if seqs contains characters
        not in alphabet. If False, characters in seqs
        that are not in alphabet will have feature value 0 for 
        features that reference that character's position. This 
        might cause problems to arise during gauge fixing. 
        
    returns
    -------
    
    x: (2D np.ndarray)
        A binary numpy array of shape (N,K)
        
    features: (list of str)
        A list of feature names
    """
    # Validate seqs
    seqs = _validate_seqs(seqs, alphabet, restrict_seqs_to_alphabet=True)
    
    assert gpmap_type in ["additive","neighbor","pairwise","markov3","triplet"]
    
    # Get constant features
    N = len(seqs)
    x = np.ones(N).reshape(N,1)
    features = ['x_0']
    
    # Get additive features if appropriate
    if gpmap_type in ["additive","neighbor","pairwise","markov3","triplet"]:
        x_lc, features_lc = _seqs_to_x_lc(seqs, alphabet, return_features=True)
        x = np.hstack([x,x_lc])
        features = features + features_lc

    # Get neighbor or pairwise features if appropriate
    if gpmap_type in ["pairwise","triplet"]:
        x_lclc, features_lclc = _seqs_to_x_lclc(seqs, alphabet, 
                                                return_features=True, 
                                                feature_mask="pairwise")
        x = np.hstack([x,x_lclc])
        features = features + features_lclc
    elif gpmap_type in ["neighbor","markov3"]:
        x_lclc, features_lclc = _seqs_to_x_lclc(seqs, alphabet, 
                                                return_features=True, 
                                                feature_mask="neighbor")
        x = np.hstack([x,x_lclc])
        features = features + features_lclc
        
    # Get neighbor or pairwise features if appropriate
    if gpmap_type in ["markov3","triplet"]:
        x_lclclc, features_lclclc = _seqs_to_x_lclclc(seqs, alphabet, 
                                                      return_features=True,
                                                      feature_mask=gpmap_type)
        x = np.hstack([x,x_lclclc])
        features = features + features_lclclc

    # Return x and feature names
    return x, features

In [7]:
# Create a set of test sequences, as well as a test alphabet
alphabet = 'ACGT'
seqs = ['AGGTAGACATA','TGATCGGCATA']

In [8]:
# Check that the additive feature encoding is working
x, names = _seqs_to_x_lc(seqs, alphabet, 
                         verbose=True, 
                         features_to_print=5,
                         seq_to_print=1) 

x[1] = TGATCGGCATA
x[1]_0:T = True
x[1]_1:G = True
x[1]_2:A = True
x[1]_3:T = True
x[1]_4:C = True


In [9]:
# Check that the neighbor feature encoding is working
x, names = _seqs_to_x_lclc(seqs, alphabet, 
                           verbose=True, 
                           features_to_print=5,
                           seq_to_print=1,
                           feature_mask='neighbor') 

K = 160 features
x[1] = TGATCGGCATA
x[1]_0:T,1:G = True
x[1]_1:G,2:A = True
x[1]_2:A,3:T = True
x[1]_3:T,4:C = True
x[1]_4:C,5:G = True


In [10]:
# Check that the pairwise feature encoding is working
x, names = _seqs_to_x_lclc(seqs, alphabet, 
                           verbose=True, 
                           features_to_print=5,
                           seq_to_print=1,
                           feature_mask='pairwise') 

K = 880 features
x[1] = TGATCGGCATA
x[1]_0:T,1:G = True
x[1]_0:T,2:A = True
x[1]_0:T,3:T = True
x[1]_0:T,4:C = True
x[1]_0:T,5:G = True


In [11]:
# Check that the markov3 feature encoding is working
x, names = _seqs_to_x_lclclc(seqs, alphabet, 
                             verbose=True, 
                             features_to_print=5,
                             seq_to_print=1,
                             feature_mask="markov3") 

K = 576 features
x[1] = TGATCGGCATA
x[1]_0:T,1:G,2:A = True
x[1]_1:G,2:A,3:T = True
x[1]_2:A,3:T,4:C = True
x[1]_3:T,4:C,5:G = True
x[1]_4:C,5:G,6:G = True


In [12]:
# Check that the triplet feature encoding is working
x, names = _seqs_to_x_lclclc(seqs, alphabet, 
                             verbose=True, 
                             features_to_print=5,
                             seq_to_print=1,
                             feature_mask="triplet") 

K = 10560 features
x[1] = TGATCGGCATA
x[1]_0:T,1:G,2:A = True
x[1]_0:T,1:G,3:T = True
x[1]_0:T,1:G,4:C = True
x[1]_0:T,1:G,5:G = True
x[1]_0:T,1:G,6:G = True


In [13]:
# Test full encoding for an additive model
x, names = get_gpmap_features(seqs, alphabet, gpmap_type="additive")
print(f'x.shape={x.shape}')
print(f'names={names}')

x.shape=(2, 45)
names=['x_0', 'x_0:A', 'x_0:C', 'x_0:G', 'x_0:T', 'x_1:A', 'x_1:C', 'x_1:G', 'x_1:T', 'x_2:A', 'x_2:C', 'x_2:G', 'x_2:T', 'x_3:A', 'x_3:C', 'x_3:G', 'x_3:T', 'x_4:A', 'x_4:C', 'x_4:G', 'x_4:T', 'x_5:A', 'x_5:C', 'x_5:G', 'x_5:T', 'x_6:A', 'x_6:C', 'x_6:G', 'x_6:T', 'x_7:A', 'x_7:C', 'x_7:G', 'x_7:T', 'x_8:A', 'x_8:C', 'x_8:G', 'x_8:T', 'x_9:A', 'x_9:C', 'x_9:G', 'x_9:T', 'x_10:A', 'x_10:C', 'x_10:G', 'x_10:T']


In [14]:
# Test full encoding for a neighbor model
x, names = get_gpmap_features(seqs, alphabet, gpmap_type="neighbor")
print(f'x.shape={x.shape}')
print(f'names={names}')

x.shape=(2, 205)
names=['x_0', 'x_0:A', 'x_0:C', 'x_0:G', 'x_0:T', 'x_1:A', 'x_1:C', 'x_1:G', 'x_1:T', 'x_2:A', 'x_2:C', 'x_2:G', 'x_2:T', 'x_3:A', 'x_3:C', 'x_3:G', 'x_3:T', 'x_4:A', 'x_4:C', 'x_4:G', 'x_4:T', 'x_5:A', 'x_5:C', 'x_5:G', 'x_5:T', 'x_6:A', 'x_6:C', 'x_6:G', 'x_6:T', 'x_7:A', 'x_7:C', 'x_7:G', 'x_7:T', 'x_8:A', 'x_8:C', 'x_8:G', 'x_8:T', 'x_9:A', 'x_9:C', 'x_9:G', 'x_9:T', 'x_10:A', 'x_10:C', 'x_10:G', 'x_10:T', 'x_0:A,1:A', 'x_0:A,1:C', 'x_0:A,1:G', 'x_0:A,1:T', 'x_0:C,1:A', 'x_0:C,1:C', 'x_0:C,1:G', 'x_0:C,1:T', 'x_0:G,1:A', 'x_0:G,1:C', 'x_0:G,1:G', 'x_0:G,1:T', 'x_0:T,1:A', 'x_0:T,1:C', 'x_0:T,1:G', 'x_0:T,1:T', 'x_1:A,2:A', 'x_1:A,2:C', 'x_1:A,2:G', 'x_1:A,2:T', 'x_1:C,2:A', 'x_1:C,2:C', 'x_1:C,2:G', 'x_1:C,2:T', 'x_1:G,2:A', 'x_1:G,2:C', 'x_1:G,2:G', 'x_1:G,2:T', 'x_1:T,2:A', 'x_1:T,2:C', 'x_1:T,2:G', 'x_1:T,2:T', 'x_2:A,3:A', 'x_2:A,3:C', 'x_2:A,3:G', 'x_2:A,3:T', 'x_2:C,3:A', 'x_2:C,3:C', 'x_2:C,3:G', 'x_2:C,3:T', 'x_2:G,3:A', 'x_2:G,3:C', 'x_2:G,3:G', 'x_2:G,3:T

In [15]:
# Test full encoding for a pairwise model
x, names = get_gpmap_features(seqs, alphabet, gpmap_type="pairwise")
print(f'x.shape={x.shape}')
print(f'names={names[-100:]}')

x.shape=(2, 925)
names=['x_6:T,10:A', 'x_6:T,10:C', 'x_6:T,10:G', 'x_6:T,10:T', 'x_7:A,8:A', 'x_7:A,8:C', 'x_7:A,8:G', 'x_7:A,8:T', 'x_7:A,9:A', 'x_7:A,9:C', 'x_7:A,9:G', 'x_7:A,9:T', 'x_7:A,10:A', 'x_7:A,10:C', 'x_7:A,10:G', 'x_7:A,10:T', 'x_7:C,8:A', 'x_7:C,8:C', 'x_7:C,8:G', 'x_7:C,8:T', 'x_7:C,9:A', 'x_7:C,9:C', 'x_7:C,9:G', 'x_7:C,9:T', 'x_7:C,10:A', 'x_7:C,10:C', 'x_7:C,10:G', 'x_7:C,10:T', 'x_7:G,8:A', 'x_7:G,8:C', 'x_7:G,8:G', 'x_7:G,8:T', 'x_7:G,9:A', 'x_7:G,9:C', 'x_7:G,9:G', 'x_7:G,9:T', 'x_7:G,10:A', 'x_7:G,10:C', 'x_7:G,10:G', 'x_7:G,10:T', 'x_7:T,8:A', 'x_7:T,8:C', 'x_7:T,8:G', 'x_7:T,8:T', 'x_7:T,9:A', 'x_7:T,9:C', 'x_7:T,9:G', 'x_7:T,9:T', 'x_7:T,10:A', 'x_7:T,10:C', 'x_7:T,10:G', 'x_7:T,10:T', 'x_8:A,9:A', 'x_8:A,9:C', 'x_8:A,9:G', 'x_8:A,9:T', 'x_8:A,10:A', 'x_8:A,10:C', 'x_8:A,10:G', 'x_8:A,10:T', 'x_8:C,9:A', 'x_8:C,9:C', 'x_8:C,9:G', 'x_8:C,9:T', 'x_8:C,10:A', 'x_8:C,10:C', 'x_8:C,10:G', 'x_8:C,10:T', 'x_8:G,9:A', 'x_8:G,9:C', 'x_8:G,9:G', 'x_8:G,9:T', 'x_8:G,10:A'

In [16]:
# Test full encoding for a markov3 model
x, names = get_gpmap_features(seqs, alphabet, gpmap_type="markov3")
print(f'x.shape={x.shape}')
print(f'names={names[-100:]}')

x.shape=(2, 781)
names=['x_7:C,8:T,9:A', 'x_7:C,8:T,9:C', 'x_7:C,8:T,9:G', 'x_7:C,8:T,9:T', 'x_7:G,8:A,9:A', 'x_7:G,8:A,9:C', 'x_7:G,8:A,9:G', 'x_7:G,8:A,9:T', 'x_7:G,8:C,9:A', 'x_7:G,8:C,9:C', 'x_7:G,8:C,9:G', 'x_7:G,8:C,9:T', 'x_7:G,8:G,9:A', 'x_7:G,8:G,9:C', 'x_7:G,8:G,9:G', 'x_7:G,8:G,9:T', 'x_7:G,8:T,9:A', 'x_7:G,8:T,9:C', 'x_7:G,8:T,9:G', 'x_7:G,8:T,9:T', 'x_7:T,8:A,9:A', 'x_7:T,8:A,9:C', 'x_7:T,8:A,9:G', 'x_7:T,8:A,9:T', 'x_7:T,8:C,9:A', 'x_7:T,8:C,9:C', 'x_7:T,8:C,9:G', 'x_7:T,8:C,9:T', 'x_7:T,8:G,9:A', 'x_7:T,8:G,9:C', 'x_7:T,8:G,9:G', 'x_7:T,8:G,9:T', 'x_7:T,8:T,9:A', 'x_7:T,8:T,9:C', 'x_7:T,8:T,9:G', 'x_7:T,8:T,9:T', 'x_8:A,9:A,10:A', 'x_8:A,9:A,10:C', 'x_8:A,9:A,10:G', 'x_8:A,9:A,10:T', 'x_8:A,9:C,10:A', 'x_8:A,9:C,10:C', 'x_8:A,9:C,10:G', 'x_8:A,9:C,10:T', 'x_8:A,9:G,10:A', 'x_8:A,9:G,10:C', 'x_8:A,9:G,10:G', 'x_8:A,9:G,10:T', 'x_8:A,9:T,10:A', 'x_8:A,9:T,10:C', 'x_8:A,9:T,10:G', 'x_8:A,9:T,10:T', 'x_8:C,9:A,10:A', 'x_8:C,9:A,10:C', 'x_8:C,9:A,10:G', 'x_8:C,9:A,10:T', 'x_8

In [17]:
# Test full encoding for a triplet model
x, names = get_gpmap_features(seqs, alphabet, gpmap_type="triplet")
print(f'x.shape={x.shape}')
print(f'names={names[-100:]}')

x.shape=(2, 11485)
names=['x_7:T,8:C,10:A', 'x_7:T,8:C,10:C', 'x_7:T,8:C,10:G', 'x_7:T,8:C,10:T', 'x_7:T,8:G,9:A', 'x_7:T,8:G,9:C', 'x_7:T,8:G,9:G', 'x_7:T,8:G,9:T', 'x_7:T,8:G,10:A', 'x_7:T,8:G,10:C', 'x_7:T,8:G,10:G', 'x_7:T,8:G,10:T', 'x_7:T,8:T,9:A', 'x_7:T,8:T,9:C', 'x_7:T,8:T,9:G', 'x_7:T,8:T,9:T', 'x_7:T,8:T,10:A', 'x_7:T,8:T,10:C', 'x_7:T,8:T,10:G', 'x_7:T,8:T,10:T', 'x_7:T,9:A,10:A', 'x_7:T,9:A,10:C', 'x_7:T,9:A,10:G', 'x_7:T,9:A,10:T', 'x_7:T,9:C,10:A', 'x_7:T,9:C,10:C', 'x_7:T,9:C,10:G', 'x_7:T,9:C,10:T', 'x_7:T,9:G,10:A', 'x_7:T,9:G,10:C', 'x_7:T,9:G,10:G', 'x_7:T,9:G,10:T', 'x_7:T,9:T,10:A', 'x_7:T,9:T,10:C', 'x_7:T,9:T,10:G', 'x_7:T,9:T,10:T', 'x_8:A,9:A,10:A', 'x_8:A,9:A,10:C', 'x_8:A,9:A,10:G', 'x_8:A,9:A,10:T', 'x_8:A,9:C,10:A', 'x_8:A,9:C,10:C', 'x_8:A,9:C,10:G', 'x_8:A,9:C,10:T', 'x_8:A,9:G,10:A', 'x_8:A,9:G,10:C', 'x_8:A,9:G,10:G', 'x_8:A,9:G,10:T', 'x_8:A,9:T,10:A', 'x_8:A,9:T,10:C', 'x_8:A,9:T,10:G', 'x_8:A,9:T,10:T', 'x_8:C,9:A,10:A', 'x_8:C,9:A,10:C', 'x_8:C,9:A