## Problem: BA2F
Implement RandomizedMotifSearch
#### link: https://rosalind.info/problems/ba2f/

In [2]:
import random

In [3]:
# Score calculation function
def score(motif_set):
    k = len(motif_set[0])
    l = len(motif_set)
    total_count = 0
    for i in range(k):
        col = [motif[i] for motif in motif_set]
        max_count = max(col.count('A'), col.count('C'), col.count('G'), col.count('T'))
        total_count += (l - max_count)
    return total_count

In [4]:
# Build profile with pseudocounts
def build_profile(motif_set):
    k = len(motif_set[0])
    l = len(motif_set)
    profile = [[0.0]*k for _ in range(4)]
    
    for i in range(k):
        col = [motif[i] for motif in motif_set]
        profile[0][i] = (col.count('A') + 1) / (l + 4)
        profile[1][i] = (col.count('C') + 1) / (l + 4)
        profile[2][i] = (col.count('G') + 1) / (l + 4)
        profile[3][i] = (col.count('T') + 1) / (l + 4)

    return profile

In [5]:
# Profile most probable kmer
def probability(kmer, matrix):
    ans = 1
    for i in range(len(kmer)):
        if kmer[i] == 'A':
            ans *= matrix[0][i]
        if kmer[i] == 'C':
            ans *= matrix[1][i]
        if kmer[i] == 'G':
            ans *= matrix[2][i]
        if kmer[i] == 'T':
            ans *= matrix[3][i]
    return ans


# Profile most probable kmer
def most_probable_kmer(dna, k, matrix):
    n = len(dna)
    prob = -1e9
    ans = ""
    for i in range(n - k + 1):
        kmer = dna[i:i + k]
        x = probability(kmer, matrix)
        if x > prob:
            prob = x
            ans = kmer
    return ans

In [6]:
# Main function
def RandomizedMotifSearch(Dna, k, t):
    best_motifs = []
    for dna in Dna:
        idx = random.randint(0, len(dna) - k)
        best_motifs.append(dna[idx:idx + k])

    bestScore = score(best_motifs)


    while True:
        profile = build_profile(best_motifs)
        newMotifs = []
        for dna in Dna:
            newMotifs.append(most_probable_kmer(dna, k, profile))

        newScore = score(newMotifs)
        if newScore < bestScore:
            bestScore = newScore
            best_motifs = newMotifs
        else:
            return best_motifs



# Repeated Randomized Motif Search
def RepeatedRandomizedMotifSearch(Dna, k, t, N):
    best_motifs = RandomizedMotifSearch(Dna, k, t)
    bestScore = score(best_motifs)

    for _ in range(1, N):
        newMotifs = RandomizedMotifSearch(Dna, k, t)
        currentScore = score(newMotifs)
        if currentScore < bestScore:
            bestScore = currentScore
            best_motifs = newMotifs
    return best_motifs

In [None]:
k, t = 15, 20
Dna = input("enter DNA: ").split()
ans = RepeatedRandomizedMotifSearch(Dna, k, t, 1000)
for _ in ans:
    print(_)

ACTATTCGGAGCTCT
ACTCTGCGTGCCCTT
TCTTGTCGTGCCCGA
ACTTGTCGTGCGGAT
ACAGTTCGTGCCCTT
ACTTGTCGTGCCTCG
TTTTGTCGTGCCCTC
ACTTGGATTGCCCTT
ACTTGTCGTGGGTTT
ACTTGGTATGCCCTT
ATCGGTCGTGCCCTT
ACTTGTCGTTGACTT
ACTTGTCGGTTCCTT
ACTTTAGGTGCCCTT
ACTTGTGTGGCCCTT
TGGTGTCGTGCCCTT
ACTACACGTGCCCTT
ACTTTGTGTGCCCTT
ACTTGTCAATCCCTT
ACTTGTTCAGCCCTT
