## Chapter 2

#### Problem BA2A: Motif Enumeration
http://rosalind.info/problems/ba2a/

In [71]:
# from ba1g.py import HammingDistance
# from ba1j.py import CreateKmers

def HammingDistance(string1, string2):
    
    "This function calculates the Hamming Distance between two strings of equal length."
    
    # check if strings are the same length
    # alternate: assert len(string1) == len(string2), "Strings must be same length!"
    if len(string1) != len(string2):
        print("Strings must be the same length!")
        
    number_mismatches = 0
    string_length = len(string1)
    for i in range(string_length):
        if string1[i] != string2[i]:
            number_mismatches += 1

    return number_mismatches

assert HammingDistance("AACC", "AAAA") == 2

In [80]:
def CreateKmers(kmer, d):
    "This function creates all possible kmers in the string Dna of length k"
    new_kmers = []
    if d == 0:
        # why is this in brackets
        return [kmer]
    elif len(kmer) == 1:
        return ['A', 'G', 'C', 'T']
    # generate mismatches
    for neighbor in CreateKmers(kmer[1:], d):
        if HammingDistance(kmer[1:], neighbor) < d:
            new_kmers += ['A' + neighbor, 'C' + neighbor, 'G' + neighbor, 'T' + neighbor]
        else:
            new_kmers += [kmer[0] + neighbor]
            
    return new_kmers

assert CreateKmers('CAT', 1) == ['CAA', 'CAG', 'CAC', 'AAT', 'CAT', 'GAT', 'TAT', 'CCT', 'CGT', 'CTT']

In [83]:
def MotifEnumeration(k, d, Dna):
    patterns = set()
    first_DNA_string = Dna[0]
    
    # generate all possible kmers from first loop
    for i in range(len(first_DNA_string) - k + 1):
        kmer = first_DNA_string[i : i + k]
        for new_kmers in CreateKmers(kmer, d):
            patterns.add(new_kmers)
    
    # compare first_kmers with kmers in subsequent strings
    for dna in Dna[1:]:
        current_kmers = set()
        
        for p in range(len(dna) - k + 1):
            for pattern_p in patterns:
                if HammingDistance(dna[p : p + k], pattern_p) <= d:
                    current_kmers.add(pattern_p)
    
        patterns = patterns.intersection(current_kmers)
        
    return patterns

assert MotifEnumeration(3, 1, ["ATTTGGC", "TGCCTTA", "CGGTATC", "GAAAATT"]) == {'ATA', 'ATT', 'GTT', 'TTT'}

In [84]:
MotifEnumeration(3, 1, ["ATTTGGC", "TGCCTTA", "CGGTATC", "GAAAATT"])

{'ATA', 'ATT', 'GTT', 'TTT'}

#### Problem BA2B: Find a Median String
http://rosalind.info/problems/ba2b/

In [58]:
from itertools import product
import sys

# from ba1g.py import HammingDistance

def HammingDistance(string1, string2):
    
    "This function calculates the Hamming Distance between two strings of equal length."
    
    # check if strings are the same length
    # alternate: assert len(string1) == len(string2), "Strings must be same length!"
    if len(string1) != len(string2):
        print("Strings must be the same length!")
        
    number_mismatches = 0
    string_length = len(string1)
    for i in range(string_length):
        if string1[i] != string2[i]:
            number_mismatches += 1

    return number_mismatches

assert HammingDistance("AACC", "AAAA") == 2

In [63]:
def Distance(pattern, Dna):
    "This function finds the summed distances of a kmer pattern over multiple DNA strings"
    distances = []
    k = len(pattern)
    for Dna_line in Dna:
        # set current_min to large number
        current_min = sys.maxsize
        for i in range(len(Dna_line) - k + 1):
            # find minimum between current min and the HammingDistance of the pattern and slice of Dna_line
            current_min = min(HammingDistance(pattern, Dna_line[i : i + k]), current_min)
        distances.append(current_min)
    
    return sum(distances)

assert Distance('AAA', ['AAATTGACGCAT', 'GACGACCACGTT', 'CGTCAGCGCCTG', 'GCTGAGCACCGG', 'AGTACGGGACAG']) == 7

In [66]:
def FindMedianString(k, Dna):
    # set distance value to high number
    distance = sys.maxsize
    # generate all possible kmers of length k, iterate through all to find distance sum
    for pattern in product("ACGT", repeat = k):
        # join output of product (a list) into string
        pattern = "".join(pattern)
        # check distances for each pattern with distance function
        d = Distance(pattern, Dna)
        # compare each kmer and if it has a smaller distance sum than current value, redefine variables
        if d < distance:
            distance = d
            median = pattern
            
    return median

assert FindMedianString(3, ["AAATTGACGCAT", "GACGACCACGTT", "CGTCAGCGCCTG", "GCTGAGCACCGG", "AGTACGGGACAG"]) == 'ACG'

In [67]:
FindMedianString(3, ["AAATTGACGCAT", "GACGACCACGTT", "CGTCAGCGCCTG", "GCTGAGCACCGG", "AGTACGGGACAG"])

'ACG'

#### Problem BA2C: Find Profile-most Probable kmer in a String
http://rosalind.info/problems/ba2c/

In [54]:
def ProfileMostProbable(Dna, k, Profile):
    kmer_probability_max = 0
    max_kmer = ""
    
    for i in range(len(Dna) - k + 1):
        kmer = Dna[i : i + k]
        # kmer_probabilty must be set to 1 because it will be multiplied by subsequent probs
        kmer_probability = 1
        # iterate throuh kmers and multiple probabilities from Profile dictionaries
        for i, basepair in enumerate(kmer):
            # index through dictionary for value i in key basepair
            probability = Profile[basepair][i]
            # multiple probability by existing probability_value
            kmer_probability *= probability
        
        # check if kmer_probability is largest value seen so far, replace variable if so
        if kmer_probability > kmer_probability_max:
            kmer_probability_max = kmer_probability
            max_kmer = kmer
            
    return max_kmer

assert ProfileMostProbable("ACCTGTTTATTGCCTAAGTTCCGAACAAACCCAATATAGCCCGAGGGCCT", 5, {'A' : [0.2, 0.2, 0.3, 0.2, 0.3], 'C' : [0.4, 0.3, 0.1, 0.5, 0.1], 'G' : [0.3, 0.3, 0.5, 0.2, 0.4], 'T' : [0.1, 0.2, 0.1, 0.1, 0.2]}) == 'CCGAG'

In [55]:
DNA = "ACCTGTTTATTGCCTAAGTTCCGAACAAACCCAATATAGCCCGAGGGCCT"
k = 5
profile = { 'A' : [0.2, 0.2, 0.3, 0.2, 0.3],
            'C' : [0.4, 0.3, 0.1, 0.5, 0.1],
            'G' : [0.3, 0.3, 0.5, 0.2, 0.4],
            'T' : [0.1, 0.2, 0.1, 0.1, 0.2]
        }

In [56]:
ProfileMostProbable(DNA, k, profile)

'CCGAG'

#### Problem BA2D: Implement GreedyMotifSearch
http://rosalind.info/problems/ba2d/

In [25]:
# from ba1g.py import HammingDistance
# from ba2c.py import ProfileMostProbable ## changed for list Profile instead of dictionary

def HammingDistance(string1, string2):
    
    "This function calculates the Hamming Distance between two strings of equal length."
    
    # check if strings are the same length
    # alternate: assert len(string1) == len(string2), "Strings must be same length!"
    if len(string1) != len(string2):
        print("Strings must be the same length!")
        
    number_mismatches = 0
    string_length = len(string1)
    for i in range(string_length):
        if string1[i] != string2[i]:
            number_mismatches += 1

    return number_mismatches

assert HammingDistance("AACC", "AAAA") == 2

In [34]:
def ProfileMostProbableList(Dna, k, Profile):
    kmer_probability_max = 0
    max_kmer = ""
    pos = {'A' : 0, 'C' : 1, 'G' : 2, 'T' : 3}
    for i in range(len(Dna) - k + 1):
        kmer = Dna[i : i + k]
        # kmer_probabilty must be set to 1 because it will be multiplied by subsequent probs
        kmer_probability = 1
        # iterate throuh kmers and multiple probabilities from Profile dictionaries
        for j, basepair in enumerate(kmer):
            # index through dictionary for value i in key basepair
            probability = Profile[pos[basepair]][j]
            # multiple probability by existing probability_value
            kmer_probability *= probability
        
        # check if max_kmer is ""
        if not max_kmer:
            max_kmer = kmer
            kmer_probability_max = kmer_probability
        # check if kmer_probability is largest value seen so far, replace variable if so
        elif kmer_probability > kmer_probability_max:
            kmer_probability_max = kmer_probability
            max_kmer = kmer
    return max_kmer

assert ProfileMostProbableList("AAGAATCAGTCA", 3, [[0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [1.0, 1.0, 0.0], [0.0, 0.0, 0.0]]) == "AAG"

In [46]:
def Profile(passed_motifs, k):
    "Creates a profile matrix for each passed profile matrix."
    # create empty matrix of floats
    matrix = []
    for i in range(4):
        # make k number of [0.0] entries in matrix for each spot in kmer
        matrix.append([0.0] * k)
        
    # for each position in kmer, count bases
    number_motifs = len(passed_motifs)
    for i in range(k):
        motif_count = {"A" : 0, "C" : 0, "G" : 0, "T" : 0}
        for motif in passed_motifs:
            motif_count["A"] += motif[i].count("A")
            motif_count["C"] += motif[i].count("C")
            motif_count["G"] += motif[i].count("G")
            motif_count["T"] += motif[i].count("T")

        # create matrix of profiles for each base
        matrix[0][i] = motif_count["A"] / number_motifs
        matrix[1][i] = motif_count["C"] / number_motifs
        matrix[2][i] = motif_count["G"] / number_motifs
        matrix[3][i] = motif_count["T"] / number_motifs
        
    return matrix

assert Profile(['GGC', 'AAG'], 3) == [[0.5, 0.5, 0.0], [0.0, 0.0, 0.5], [0.5, 0.5, 0.5], [0.0, 0.0, 0.0]]

In [49]:
def Score(passed_motifs):
    "This function scores differences between passed motifs and a consensus kmer from probability profiles."
    k = len(passed_motifs[0])
    consensus = []
    for i in range(k):
        freq = {"A" : 0, "C" : 0, "G" : 0, "T" : 0}
        for motif in passed_motifs:
            freq["A"] += motif[i].count("A")
            freq["C"] += motif[i].count("C")
            freq["G"] += motif[i].count("G")
            freq["T"] += motif[i].count("T")
    
        # based on freq above, creat a consensus kmer to compare to passed motif
        max_freq = max(freq.values())
        for nt, count in freq.items():
            if count == max_freq:
                consensus.append(nt)
                break
                
    consensus = "".join(consensus)
    score_value = 0
    for motif in passed_motifs:
        score_value += HammingDistance(motif, consensus)
    
    return score_value

assert Score(['GGC', 'AAG', 'AAG', 'CAC', 'CAA']) == 7

In [52]:
def GreedyMotifSearch(k, t, Dna):
    # create list of best motifs from out of the first DNA string given
    best_motifs = []
    for seq in Dna:
        first = seq[0:k]
        best_motifs.append(first)
    # best_motifs = [seq[:k] for seq in dna]
    
    # iterate over kmers in first Dna string, create a motif list for each kmer
    first_seq = Dna[0]
    for start in range(len(first_seq) - k + 1):
        kmer = first_seq[start : start + k]
        # start motif list based on this kmer from first_seq
        motif = [kmer]
        
        # iterate over subsequent Dna strings, make  profile from them based on first_seq kmer
        for i in range(1, t):
            matrix = Profile(motif, k)
            most_probable = ProfileMostProbableList(Dna[i], k, matrix)
            motif.append(most_probable)
            
        # score motif, replace if best
        if Score(motif) < Score(best_motifs):
            best_motifs = motif
            
    return best_motifs

assert GreedyMotifSearch(3, 5, ["GGCGTTCAGGCA", "AAGAATCAGTCA", "CAAGGAGTTCGC", "CACGTCAATCAC", "CAATAATATTCG"]) == ['CAG', 'CAG', 'CAA', 'CAA', 'CAA']

In [53]:
GreedyMotifSearch(3, 5, ["GGCGTTCAGGCA", "AAGAATCAGTCA", "CAAGGAGTTCGC", "CACGTCAATCAC", "CAATAATATTCG"])

['CAG', 'CAG', 'CAA', 'CAA', 'CAA']

#### Problem BA2E: Implement GreedyMotifSearch with Pseudocounts
http://rosalind.info/problems/ba2e/

In [104]:
# from ba1g.py import HammingDistance
# from ba2c.py import ProfileMostProbable ## changed for list Profile instead of dictionary

def HammingDistance(string1, string2):
    
    "This function calculates the Hamming Distance between two strings of equal length."
    
    # check if strings are the same length
    # alternate: assert len(string1) == len(string2), "Strings must be same length!"
    if len(string1) != len(string2):
        print("Strings must be the same length!")
        
    number_mismatches = 0
    string_length = len(string1)
    for i in range(string_length):
        if string1[i] != string2[i]:
            number_mismatches += 1

    return number_mismatches

assert HammingDistance("AACC", "AAAA") == 2

In [105]:
def ProfileMostProbableList(Dna, k, Profile):
    kmer_probability_max = 0
    max_kmer = ""
    pos = {'A' : 0, 'C' : 1, 'G' : 2, 'T' : 3}
    for i in range(len(Dna) - k + 1):
        kmer = Dna[i : i + k]
        # kmer_probabilty must be set to 1 because it will be multiplied by subsequent probs
        kmer_probability = 1
        # iterate throuh kmers and multiple probabilities from Profile dictionaries
        for j, basepair in enumerate(kmer):
            # index through dictionary for value i in key basepair
            probability = Profile[pos[basepair]][j]
            # multiple probability by existing probability_value
            kmer_probability *= probability
        
        # check if max_kmer is ""
        if not max_kmer:
            max_kmer = kmer
            kmer_probability_max = kmer_probability
        # check if kmer_probability is largest value seen so far, replace variable if so
        elif kmer_probability > kmer_probability_max:
            kmer_probability_max = kmer_probability
            max_kmer = kmer
    return max_kmer

assert ProfileMostProbableList("AAGAATCAGTCA", 3, [[0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [1.0, 1.0, 0.0], [0.0, 0.0, 0.0]]) == "AAG"

In [106]:
def PseudocountsProfile(passed_motifs, k):
    "Creates a profile matrix for each passed profile matrix."
    # create empty matrix of floats
    matrix = []
    for i in range(4):
        # make k number of [0.0] entries in matrix for each spot in kmer
        matrix.append([0.0] * k)
        
    # for each position in kmer, count bases, add 4 to total_counts for pseudocounts
    total_counts = len(passed_motifs) + 4
    for i in range(k):
        motif_count = {"A" : 1, "C" : 1, "G" : 1, "T" : 1}
        for motif in passed_motifs:
            motif_count["A"] += motif[i].count("A")
            motif_count["C"] += motif[i].count("C")
            motif_count["G"] += motif[i].count("G")
            motif_count["T"] += motif[i].count("T")

        # create matrix of profiles for each base
        matrix[0][i] = motif_count["A"] / total_counts
        matrix[1][i] = motif_count["C"] / total_counts
        matrix[2][i] = motif_count["G"] / total_counts
        matrix[3][i] = motif_count["T"] / total_counts
        
    return matrix

assert PseudocountsProfile(['TAAC', 'GTCT', 'ACTA', 'AGGT'], 4) == [[0.375, 0.25, 0.25, 0.25], [0.125, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.125], [0.25, 0.25, 0.25, 0.375]]

In [107]:
def Score(passed_motifs):
    "This function scores differences between passed motifs and a consensus kmer from probability profiles."
    k = len(passed_motifs[0])
    consensus = []
    for i in range(k):
        freq = {"A" : 0, "C" : 0, "G" : 0, "T" : 0}
        for motif in passed_motifs:
            freq["A"] += motif[i].count("A")
            freq["C"] += motif[i].count("C")
            freq["G"] += motif[i].count("G")
            freq["T"] += motif[i].count("T")
    
        # based on freq above, creat a consensus kmer to compare to passed motif
        max_freq = max(freq.values())
        for nt, count in freq.items():
            if count == max_freq:
                consensus.append(nt)
                break
                
    consensus = "".join(consensus)
    score_value = 0
    for motif in passed_motifs:
        score_value += HammingDistance(motif, consensus)
    
    return score_value

assert Score(['GGC', 'AAG', 'AAG', 'CAC', 'CAA']) == 7

In [110]:
def GreedyMotifSearchPseudocounts(k, t, Dna):
    # create list of best motifs from out of the first DNA string given
    best_motifs = []
    for seq in Dna:
        first = seq[0:k]
        best_motifs.append(first)
    # best_motifs = [seq[:k] for seq in dna]
    
    # iterate over kmers in first Dna string, create a motif list for each kmer
    first_seq = Dna[0]
    for start in range(len(first_seq) - k + 1):
        kmer = first_seq[start : start + k]
        # start motif list based on this kmer from first_seq
        motif = [kmer]
        
        # iterate over subsequent Dna strings, make  profile from them based on first_seq kmer
        for i in range(1, t):
            matrix = PseudocountsProfile(motif, k)
            most_probable = ProfileMostProbableList(Dna[i], k, matrix)
            motif.append(most_probable)
            
        # score motif, replace if best
        if Score(motif) < Score(best_motifs):
            best_motifs = motif
            
    return best_motifs

assert GreedyMotifSearchPseudocounts(3, 5, ["GGCGTTCAGGCA", "AAGAATCAGTCA", "CAAGGAGTTCGC", "CACGTCAATCAC", "CAATAATATTCG"]) == ['TTC', 'ATC', 'TTC', 'ATC', 'TTC']

In [112]:
GreedyMotifSearchPseudocounts(3, 5, ["GGCGTTCAGGCA", "AAGAATCAGTCA", "CAAGGAGTTCGC", "CACGTCAATCAC", "CAATAATATTCG"])

['TTC', 'ATC', 'TTC', 'ATC', 'TTC']

#### Problem BA2F: Implement RandomizedMotifSearch
http://rosalind.info/problems/ba2f/

In [128]:
import random

def GetRandKmer(seq, k):
    "Get a random kmer length k from a given string seq"
    overlap = len(seq) - k + 1
    # overlap - 1 because randint is inclusive
    start = random.randint(0, overlap - 1)
    kmer = seq[start : start + k]
    
    return kmer

kmer = GetRandKmer("CATCATAACT", 3)

assert len(kmer) == 3
assert kmer in "CATCATAACT"

In [133]:
def PseudocountsProfile(passed_motifs, k):
    "Creates a profile matrix for each passed profile matrix."
    # create empty matrix of floats
    matrix = []    
    for i in range(4):
        # make k number of [0.0] entries in matrix for each spot in kmer
        matrix.append([0.0] * k)
        
    # for each position in kmer, count bases, add 4 to total_counts for pseudocounts
    total_counts = len(passed_motifs) + 4
    for i in range(k):
        motif_count = {"A" : 1, "C" : 1, "G" : 1, "T" : 1}
        for motif in passed_motifs:
            motif_count["A"] += motif[i].count("A")
            motif_count["C"] += motif[i].count("C")
            motif_count["G"] += motif[i].count("G")
            motif_count["T"] += motif[i].count("T")

        # create matrix of profiles for each base
        matrix[0][i] = motif_count["A"] / total_counts
        matrix[1][i] = motif_count["C"] / total_counts
        matrix[2][i] = motif_count["G"] / total_counts
        matrix[3][i] = motif_count["T"] / total_counts
        
    return matrix

assert PseudocountsProfile(['TAAC', 'GTCT', 'ACTA', 'AGGT'], 4) == [[0.375, 0.25, 0.25, 0.25], [0.125, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.125], [0.25, 0.25, 0.25, 0.375]]

In [48]:
def ProfileMostProbableList(Dna, k, Profile):
    kmer_probability_max = 0
    max_kmer = ""
    pos = {'A' : 0, 'C' : 1, 'G' : 2, 'T' : 3}
    for i in range(len(Dna) - k + 1):
        kmer = Dna[i : i + k]
        # kmer_probabilty must be set to 1 because it will be multiplied by subsequent probs
        kmer_probability = 1
        # iterate throuh kmers and multiple probabilities from Profile dictionaries
        for j, basepair in enumerate(kmer):
            # index through dictionary for value i in key basepair
            probability = Profile[pos[basepair]][j]
            # multiple probability by existing probability_value
            kmer_probability *= probability
        
        # check if max_kmer is ""
        if not max_kmer:
            max_kmer = kmer
            kmer_probability_max = kmer_probability
        # check if kmer_probability is largest value seen so far, replace variable if so
        elif kmer_probability > kmer_probability_max:
            kmer_probability_max = kmer_probability
            max_kmer = kmer
    return max_kmer

assert ProfileMostProbableList("AAGAATCAGTCA", 3, [[0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [1.0, 1.0, 0.0], [0.0, 0.0, 0.0]]) == "AAG"

In [49]:
def HammingDistance(string1, string2):
    
    "This function calculates the Hamming Distance between two strings of equal length."
    
    # check if strings are the same length
    # alternate: assert len(string1) == len(string2), "Strings must be same length!"
    if len(string1) != len(string2):
        print("Strings must be the same length!")
        
    number_mismatches = 0
    string_length = len(string1)
    for i in range(string_length):
        if string1[i] != string2[i]:
            number_mismatches += 1

    return number_mismatches

assert HammingDistance("AACC", "AAAA") == 2

In [65]:
def Score(passed_motifs):
    "This function scores differences between passed motifs and a consensus kmer from probability profiles."
    k = len(passed_motifs[0])
    consensus = []
    for i in range(k):
        freq = {"A" : 0, "C" : 0, "G" : 0, "T" : 0}
        for motif in passed_motifs:
            freq["A"] += motif[i].count("A")
            freq["C"] += motif[i].count("C")
            freq["G"] += motif[i].count("G")
            freq["T"] += motif[i].count("T")
    
        # based on freq above, creat a consensus kmer to compare to passed motif
        max_freq = max(freq.values())
        for nt, count in freq.items():
            if count == max_freq:
                consensus.append(nt)
                break
                
    consensus = "".join(consensus)
    score_value = 0
    for motif in passed_motifs:
        score_value += HammingDistance(motif, consensus)
    
    return score_value

assert Score(['GGC', 'AAG', 'AAG', 'CAC', 'CAA']) == 7

In [140]:
def RandomizedMotifSearchPseudocounts(k, t, Dna):
    # create best_motifs from randomly selected kmers from each line
    best_motifs = []
    for seq in Dna:
        kmer = GetRandKmer(seq, k)
        best_motifs.append(kmer)
    
    motifs = best_motifs
    
    # for each time to search, generate kmers, create matrix, and score
    while True:
        # make profile matrix
        matrix = PseudocountsProfile(motifs, k)
        motifs = []
        for seq in Dna:
            most_probable = ProfileMostProbableList(seq, k, matrix)
            motifs.append(most_probable)

        # score motif, replace if best, else stop while True
        if Score(motifs) < Score(best_motifs):
            best_motifs = motifs
            
        else:
            return best_motifs

In [141]:
def ManyRandomizedSearchPseudocounts(k, t, Dna, times = 1000):
    
    best_motifs = RandomizedMotifSearchPseudocounts(k, t, Dna)
    
    for i in range(times - 1):
        new_best_motifs = RandomizedMotifSearchPseudocounts(k, t, Dna)
        
        if Score(new_best_motifs) < Score(best_motifs):
            best_motifs = new_best_motifs
    
    return best_motifs

assert Score(ManyRandomizedSearchPseudocounts(8, 5, ["CGCCCCTCTCGGGGGTGTTCAGTAAACGGCCA", "GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG", "TAGTACCGAGACCGAAAGAAGTATACAGGCGT", "TAGATCAAGTTTCAGGTGCACGTCGGTGAACC", "AATCCACCAGCTCCACGTGCAATGTTGGCCTA"])) <= Score(["TCTCGGGG", "CCAAGGTG", "TACAGGCG", "TTCAGGTG", "TCCACGTG"]) + 1

In [142]:
ManyRandomizedSearchPseudocounts(8, 5, ["CGCCCCTCTCGGGGGTGTTCAGTAAACGGCCA", "GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG", "TAGTACCGAGACCGAAAGAAGTATACAGGCGT", "TAGATCAAGTTTCAGGTGCACGTCGGTGAACC", "AATCCACCAGCTCCACGTGCAATGTTGGCCTA"])

['TCTCGGGG', 'CCAAGGTG', 'TACAGGCG', 'TTCAGGTG', 'TCCACGTG']