In [2]:
import random

def RandomizedMotifSearch(Dna, k, t):
    # Randomly select k-mer motifs from each sequence
    motifs = []
    for string in Dna:
        index = random.randint(0, len(string)-k)
        motifs.append(string[index:index+k])

    BestMotifs = motifs.copy()

    while True:
        # Form a profile matrix (with pseudocounts)
        profile = MakeProfileWithPseudocounts(motifs)
        
        # Generate a new set of motifs
        motifs = MotifsFromProfile(Dna, k, profile)
        
        # If the new motifs have a lower score, update BestMotifs
        if Score(motifs) < Score(BestMotifs):
            BestMotifs = motifs.copy()
        else:
            return BestMotifs

def MakeProfileWithPseudocounts(motifs):
    k = len(motifs[0])
    profile = {'A': [1]*k, 'C': [1]*k, 'G': [1]*k, 'T': [1]*k}
    for string in motifs:
        for index, nucleotide in enumerate(string):
            profile[nucleotide][index] += 1
    for key in profile:
        for index in range(k):
            profile[key][index] /= (2 * len(motifs))
    return profile

def MotifsFromProfile(Dna, k, profile):
    motifs = []
    for string in Dna:
        motifs.append(ProfileMostProbable(string, k, profile))
    return motifs

def ProfileMostProbable(text, k, profile):
    max_prob = -1
    kmer = text[0:k]
    for i in range(len(text) - k + 1):
        prob = 1
        for j in range(k):
            prob *= profile[text[i+j]][j]
        if prob > max_prob:
            max_prob = prob
            kmer = text[i:i+k]
    return kmer

def Score(motifs):
    score = 0
    for i in range(len(motifs[0])):
        nucleotide_freq = {'A': 0, 'C': 0, 'G': 0, 'T': 0}
        for motif in motifs:
            nucleotide_freq[motif[i]] += 1
        score += sum(nucleotide_freq.values()) - max(nucleotide_freq.values())
    return score

def RepeatedRandomizedMotifSearch(Dna, k, t):
    best_score = float('inf')
    best_motifs = []
    for _ in range(1000):
        motifs = RandomizedMotifSearch(Dna, k, t)
        current_score = Score(motifs)
        if current_score < best_score:
            best_score = current_score
            best_motifs = motifs
    return best_motifs

# Example usage
Dna = ["CGCCCCTCTCGGGGGTGTTCAGTAAACGGCCA",
       "GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG",
       "TAGTACCGAGACCGAAAGAAGTATACAGGCGT",
       "TAGATCAAGTTTCAGGTGCACGTCGGTGAACC",
       "AATCCACCAGCTCCACGTGCAATGTTGGCCTA"]
k = 8
t = 5
print(RepeatedRandomizedMotifSearch(Dna, k, t))


['TCTCGGGG', 'CCAAGGTG', 'TACAGGCG', 'TTCAGGTG', 'TCCACGTG']
