In [1]:
def Probability(pattern, profile):
    prob = 1
    for i in range(len(pattern)):
        prob *= profile[pattern[i]][i]
    return prob


def ProfileMostProbableKmer(text, k, profile):
    bestScore = 0
    mostProbable = ''
    for i in range(len(text)-k+1):
        working = text[i:i+k]
        score = Probability(working, profile)
        if score > bestScore:
            bestScore = score
            mostProbable = working
    return mostProbable

In [9]:
def CountPsuedocounts(motifs):
    counts = {}
    nucleotides = ['A', 'C', 'G', 'T']
    for nucleotide in nucleotides:
        counts[nucleotide] = []
        for i in range(len(motifs[0])):
            counts[nucleotide].append(1)
    for j in range(len(motifs)):
        for x in  range(len(motifs[0])):
            nucleotide = motifs[j][x]
            counts[nucleotide][x] +=1
    return counts

def ProfilePsuedocounts(motifs):
    t = len(motifs)
    profile = {}
    counts = CountPsuedocounts(motifs)
    for nucleotide, motifList in sorted(counts.items()):
        profile[nucleotide] = motifList
        for motif, count in enumerate(motifList):
            motifList[motif] = count/(float(t+4))
    return profile

In [3]:
def Consensus(motifs):
    score = 0
    profile = ProfilePsuedocounts(motifs)
    consensus = ""
    nucleotides = ['A','C','G','T']
    for i in range(len(motifs[0])):
        maxScore = 0
        char = ""
        for nucleotide in nucleotides:
            if profile[nucleotide][i] > maxScore:
                maxScore = profile[nucleotide][i]
                char = nucleotide
        consensus += char
    return consensus
def Score(motifs):
    score = 0 
    consensus = Consensus(motifs)
    for motif in motifs:
        for i, char in enumerate(motif):
            if char != consensus[i]:
                score += 1
    return score

def Motifs(Dna, k, profile):
    motifs = []
    for string in Dna:
        motifs.append(ProfileMostProbableKmer(string, k, profile))
    return motifs

In [15]:
import random
def RandomizedMotifSearch(dna, k ,t):
    motifs = []
    for string in dna:
        index = random.randint(0, len(string)-k)
        motifs.append(string[index:index+k])
        
    bestMotifs = motifs.copy()
    while(True):
        profile = ProfilePsuedocounts(motifs)
        working = Motifs(dna, k, profile)
        if Score(working) < Score(bestMotifs):
            bestMotifs = working
        else:
            return bestMotifs

In [16]:
def loop(Dna, k, t):
    best_score = float('inf')
    best_motifs = []
    for _ in range(1000):
        motifs = RandomizedMotifSearch(Dna, k, t)
        current_score = Score(motifs)
        if current_score < best_score:
            best_score = current_score
            best_motifs = motifs
    return best_motifs

In [17]:
Dna = ["CGCCCCTCTCGGGGGTGTTCAGTAAACGGCCA",
       "GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG",
       "TAGTACCGAGACCGAAAGAAGTATACAGGCGT",
       "TAGATCAAGTTTCAGGTGCACGTCGGTGAACC",
       "AATCCACCAGCTCCACGTGCAATGTTGGCCTA"]
k = 8
t = 5
print(loop(Dna, k, t))

['TTCAGTAA', 'CCAAGGTG', 'TACAGGCG', 'TTCAGGTG', 'TCCACGTG']
