In [5]:
"""

"""

import random

def n_rand_motif_search(n, dna, k, t):
    best_mtfs = randomized_motif_search(dna, k, t)
    for _ in range(n):
        current_mtfs = randomized_motif_search(dna, k, t)
        if score(current_mtfs) > score(best_mtfs):
            best_mtfs = current_mtfs
    return best_mtfs


def randomized_motif_search(dna, k, t):
    ms = random_motifs(dna, k, t)
    best_motifs = ms
    while True:
        profile = profile_pseudo(ms)
        ms = motifs(profile, dna)
        if score(ms) < score(best_motifs):
            best_motifs = ms
        else:
            return best_motifs 

# Subroutines
        
def random_motifs(dna, k, t):
    t_dna = len(dna)
    mx_k = len(dna[0])
    random_motifs = []
    for i in range(t_dna):
        k_rand_j = random.randint(1, mx_k-k)
        random_motifs.append(dna[i][k_rand_j:k_rand_j+k])
    return random_motifs


def profile_pseudo(motifs):
    t = len(motifs)
    k = len(motifs[0])
    n_b = 4
    profile = count_pseudo(motifs)
    for base, l in profile.items():
        nl = [c / (t+n_b) for c in l]
        profile[base] = nl
    return profile


def motifs(profile, dna):
    n_strings = len(dna)
    k = len(profile["A"])
    motifs = []
    for s in range(n_strings):
        motifs.append(profile_kmer(dna[s], k, profile))        
    return motifs

def profile_kmer(text, k, profile):
    n = len(text)
    mx_p = -1
    result = ""
    for i in range(n-k+1):
        pattern = text[i:i+k]
        if prb(pattern, profile) > mx_p:
            mx_p = prb(pattern, profile)
            result = pattern
    return result

def prb(pattern, profile):
    p = 1
    for i in range(len(pattern)):
        p *= profile[pattern[i]][i]
    return p


def score(motifs):
    count = count_pseudo(motifs)
    base_consensus = consensus(motifs)
    score = 0
    i = 0
    for b in base_consensus:
        for k, l in count.items():
            if b != k:
                score += l[i] 
        i += 1
    return score

def count_pseudo(motifs):
    count = {} 
    t = len(motifs)
    k = len(motifs[0])
    for symbol in "ACGT":
        count[symbol] = [1] * k
    for i in range(t):
        for j in range(k):
            symbol = motifs[i][j]
            count[symbol][j] += 1
    return count

def consensus(motifs):
    k = len(motifs[0])
    cnt = count(motifs)
    result = ""
    for b in range(k):
        mx = 0
        frequent_symbol = ""
        for symbol in "ACGT":
            if cnt[symbol][b] > mx:
                mx = cnt[symbol][b]
                frequent_symbol = symbol
        result += frequent_symbol
    return result

def count(motif):
    count = {} 
    t = len(motif)
    k = len(motif[0])
    for symbol in "ACGT":
        count[symbol] = []
        for j in range(k):
             count[symbol].append(0)
    for i in range(t):
        for j in range(k):
            symbol = motif[i][j]
            count[symbol][j] += 1
    return count

DosR_dna = ["GCGCCCCGCCCGGACAGCCATGCGCTAACCCTGGCTTCGATGGCGCCGGCTCAGTTAGGGCCGGAAGTCCCCAATGTGGCAGACCTTTCGCCCCTGGCGGACGAATGACCCCAGTGGCCGGGACTTCAGGCCCTATCGGAGGGCTCCGGCGCGGTGGTCGGATTTGTCTGTGGAGGTTACACCCCAATCGCAAGGATGCATTATGACCAGCGAGCTGAGCCTGGTCGCCACTGGAAAGGGGAGCAACATC",
            "CCGATCGGCATCACTATCGGTCCTGCGGCCGCCCATAGCGCTATATCCGGCTGGTGAAATCAATTGACAACCTTCGACTTTGAGGTGGCCTACGGCGAGGACAAGCCAGGCAAGCCAGCTGCCTCAACGCGCGCCAGTACGGGTCCATCGACCCGCGGCCCACGGGTCAAACGACCCTAGTGTTCGCTACGACGTGGTCGTACCTTCGGCAGCAGATCAGCAATAGCACCCCGACTCGAGGAGGATCCCG",
            "ACCGTCGATGTGCCCGGTCGCGCCGCGTCCACCTCGGTCATCGACCCCACGATGAGGACGCCATCGGCCGCGACCAAGCCCCGTGAAACTCTGACGGCGTGCTGGCCGGGCTGCGGCACCTGATCACCTTAGGGCACTTGGGCCACCACAACGGGCCGCCGGTCTCGACAGTGGCCACCACCACACAGGTGACTTCCGGCGGGACGTAAGTCCCTAACGCGTCGTTCCGCACGCGGTTAGCTTTGCTGCC",
            "GGGTCAGGTATATTTATCGCACACTTGGGCACATGACACACAAGCGCCAGAATCCCGGACCGAACCGAGCACCGTGGGTGGGCAGCCTCCATACAGCGATGACCTGATCGATCATCGGCCAGGGCGCCGGGCTTCCAACCGTGGCCGTCTCAGTACCCAGCCTCATTGACCCTTCGACGCATCCACTGCGCGTAAGTCGGCTCAACCCTTTCAAACCGCTGGATTACCGACCGCAGAAAGGGGGCAGGAC",
            "GTAGGTCAAACCGGGTGTACATACCCGCTCAATCGCCCAGCACTTCGGGCAGATCACCGGGTTTCCCCGGTATCACCAATACTGCCACCAAACACAGCAGGCGGGAAGGGGCGAAAGTCCCTTATCCGACAATAAAACTTCGCTTGTTCGACGCCCGGTTCACCCGATATGCACGGCGCCCAGCCATTCGTGACCGACGTCCCCAGCCCCAAGGCCGAACGACCCTAGGAGCCACGAGCAATTCACAGCG",
            "CCGCTGGCGACGCTGTTCGCCGGCAGCGTGCGTGACGACTTCGAGCTGCCCGACTACACCTGGTGACCACCGCCGACGGGCACCTCTCCGCCAGGTAGGCACGGTTTGTCGCCGGCAATGTGACCTTTGGGCGCGGTCTTGAGGACCTTCGGCCCCACCCACGAGGCCGCCGCCGGCCGATCGTATGACGTGCAATGTACGCCATAGGGTGCGTGTTACGGCGATTACCTGAAGGCGGCGGTGGTCCGGA",
            "GGCCAACTGCACCGCGCTCTTGATGACATCGGTGGTCACCATGGTGTCCGGCATGATCAACCTCCGCTGTTCGATATCACCCCGATCTTTCTGAACGGCGGTTGGCAGACAACAGGGTCAATGGTCCCCAAGTGGATCACCGACGGGCGCGGACAAATGGCCCGCGCTTCGGGGACTTCTGTCCCTAGCCCTGGCCACGATGGGCTGGTCGGATCAAAGGCATCCGTTTCCATCGATTAGGAGGCATCAA",
            "GTACATGTCCAGAGCGAGCCTCAGCTTCTGCGCAGCGACGGAAACTGCCACACTCAAAGCCTACTGGGCGCACGTGTGGCAACGAGTCGATCCACACGAAATGCCGCCGTTGGGCCGCGGACTAGCCGAATTTTCCGGGTGGTGACACAGCCCACATTTGGCATGGGACTTTCGGCCCTGTCCGCGTCCGTGTCGGCCAGACAAGCTTTGGGCATTGGCCACAATCGGGCCACAATCGAAAGCCGAGCAG",
            "GGCAGCTGTCGGCAACTGTAAGCCATTTCTGGGACTTTGCTGTGAAAAGCTGGGCGATGGTTGTGGACCTGGACGAGCCACCCGTGCGATAGGTGAGATTCATTCTCGCCCTGACGGGTTGCGTCTGTCATCGGTCGATAAGGACTAACGGCCCTCAGGTGGGGACCAACGCCCCTGGGAGATAGCGGTCCCCGCCAGTAACGTACCGCTGAACCGACGGGATGTATCCGCCCCAGCGAAGGAGACGGCG",
            "TCAGCACCATGACCGCCTGGCCACCAATCGCCCGTAACAAGCGGGACGTCCGCGACGACGCGTGCGCTAGCGCCGTGGCGGTGACAACGACCAGATATGGTCCGAGCACGCGGGCGAACCTCGTGTTCTGGCCTCGGCCAGTTGTGTAGAGCTCATCGCTGTCATCGAGCGATATCCGACCACTGATCCAAGTCGGGGGCTCTGGGGACCGAAGTCCCCGGGCTCGGAGCTATCGGACCTCACGATCACC"]
t = len(DosR_dna) 
k = 15
N = 10000

from datetime import datetime
startTime = datetime.now()        

Dos_R_motifs = n_rand_motif_search(N, DosR_dna, k, t)

print(datetime.now() - startTime)

print()
print("Random motif 100,000 iterations for Dos_R: ", Dos_R_motifs)
print()
print("Score: ", score(Dos_R_motifs))




0:03:43.949110

Random motif 100,000 iterations for Dos_R:  ['TTAGGGCCGGAAGTC', 'TTCGGCAGCAGATCA', 'TTAGGGCACTTGGGC', 'CAAGCGCCAGAATCC', 'GCAGGCGGGAAGGGG', 'TCCGCCAGGTAGGCA', 'GGCGCGGACAAATGG', 'GCAGCGACGGAAACT', 'ATCGGTCGATAAGGA', 'TCTGGGGACCGAAGT']

Score:  117


In [6]:
print("Consensus: ", consensus(Dos_R_motifs))

Consensus:  TCAGGGCGCAAAGGA
