In [1]:
import numpy as np
import random

In [2]:
NB_RUNS = 1000
PSEUDO_COUNT = True

BASES = {'A': 0, 'C': 1, 'G': 2, 'T': 3}

In [4]:
def form_profile(motifs):
    if PSEUDO_COUNT:
        counts = np.ones((4, len(motifs[0])))
    else:
        counts = np.zeros((4, len(motifs[0])))
    for line in motifs:
        for j, base in enumerate(line):
            counts[BASES[base]][j] += 1
    freqs = counts / counts.sum(axis=0, keepdims=True)
    return freqs.tolist()

def profile_most_probable_kmer(text, k, profile):
    probabilities = dict()
    for i in range(len(text) - k + 1):
        kmer = text[i:i+k]
        if kmer not in probabilities.keys():
            probability = 1
            for j, base in enumerate(kmer):
                probability *= float(profile[BASES[base]][j])
            probabilities[kmer] = probability
    most_probable, _ = max(probabilities.items(), key=lambda x:x[1])
    return most_probable

def form_motifs(profile, dna, t):
    k = len(profile[0])
    return [profile_most_probable_kmer(chunk, k, profile) for chunk in dna]     

def score_motifs(motifs):
    n = len(motifs[0])
    counts = np.zeros((4, n))
    for line in motifs:
        for j, base in enumerate(line):
            counts[BASES[base]][j] += 1
    score = sum(len(motifs) - counts.max(axis=0))
    return score

def random_motifs(dna, k, seed=-1):
    if seed >= 0:
        random.seed(seed)
    n = len(dna[0])
    motifs = list()
    for chunk in dna:
        i = random.choice(range(n - k + 1))
        motifs.append(chunk[i:i+k])
    return motifs

In [5]:
def single_randomized_motif_search(dna, k, t, seed):
    best_motifs = random_motifs(dna, k, seed)
    best_score = score_motifs(best_motifs)
    
    motifs = best_motifs
    while True:
        profile = form_profile(motifs)
        motifs = form_motifs(profile, dna, t)
        score = score_motifs(motifs)
        if score < best_score:
            best_motifs = motifs
            best_score = score
        else: 
            return best_score, best_motifs
        

def randomized_motif_search(dna, k, t, n=NB_RUNS):
    scores = list()
    motif_sets = list()
    for i in range(n):
        score, motifs = single_randomized_motif_search(dna, k, t, i)
        scores.append(score)
        motif_sets.append(motifs)
    return motif_sets[np.argmin(scores)]

In [6]:
k = 8
t = 5
dna = [
    'CGCCCCTCTCGGGGGTGTTCAGTAAACGGCCA',
    'GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG',
    'TAGTACCGAGACCGAAAGAAGTATACAGGCGT',
    'TAGATCAAGTTTCAGGTGCACGTCGGTGAACC',
    'AATCCACCAGCTCCACGTGCAATGTTGGCCTA',
]
sample_in = (dna, k, t)
sample_out = [
    'TCTCGGGG',
    'CCAAGGTG',
    'TACAGGCG',
    'TTCAGGTG',
    'TCCACGTG',
]

assert randomized_motif_search(*sample_in) == sample_out

In [8]:
k = 15 
t = 20
dna_ = 'TGCCTAAAAGGGTCGCTCTCAGCGTGAGAGGCGGCAGATAGTGTACGCGATAACTGAACGCGCTTGGCAAGGGATGACTTCCTACACTCTAGGTATCAATAGCGAACTATTCGATACGTTAATACAGTATAGCTGAAAGGTCGAAAACAACTCCTGCCTAAAAGGGTCG CTCTCAGCGTGAGAGGCGGCAGATAGTGTACGCGATAACTGAACGCGAAGATAACATTAGATCTTGGCAAGGGATGACTTCCTACACTCTAGGTATCAATAGCGAACTATTCGATACGTTAATACAGTATAGCTGAAAGGTCGAAAACAACTCCTGCCTAAAAGGGTCG GAGCTAGAGACGAGACAACCACCGGGGTCGGAAGCTTTCACTTGGACGCAAGTCTACATGTTATAGTCTACATCTTTACCTCGCGAAGGTCTTTTACAAGGGTACTAATACTTGATCCTCTAGGCTTGGCCGGCGTGGACTCCCCACCGAGCCCACATAATGGGTAAGC TGCGCATAAACATCCCGTGCTTTTGGGGCGGTAGATACCTGTGCCGTAACCAAATAGTACATTAGATTGGTTCCTTAGCAAAGGAACATGGGCGATTCAACCTTGGTCTCATTGCAACTCCCTGATGATGTTTGATAATGCCTCCTGATGCGACCGGGCAGACTTACGG TCGAAGTTGGGAATCTGTCGCATCTCGCTTAGTCAATCTGTTGGGTGGATCTCGTAGCCAAGGGCGTCAATGAGAGGGAGTCACACACCTGACGTCGGATGAAGTCTCACTTAGATTTCCAAGTCACTCCAAGCGTAGTACTGGAGGTTTTTACGCCGGGCACGACTAC TGGTCTTCGAGCTGCCGGTCTACATTAGAGGGTAGAATGGCAACCCCTTCTACGGCACAGATAGGGCCTATTTTAGATACCGGGGGCTTCTGGCCGGTGTAGTATCCGTGTCCGGGCAAATGGCCATCCATGCGTATTATCGCTCATTTGGGCGTTGGTTCATAATTCA ACCCCTCGAGGCTCCCCTGTTCACGAGTCACTTGCGGAAGTGCGCCTGACATCTCCATTTGACGTGCTCGGTGGATAGTAGAGAGACCTACATTAGATAGATCCTTGGGCTGGACTCGTTCTTTCTCCTCGCTAAGAGAGCGGTAGGACGTCGCAAGTAATGCGAGACC GCTTCAAGCCGATTCTCTACATTAGATGGCGAGCAAACTCTCACCTGAGGTTGCGTCGAAAGTCGAAACTGGATCGTGGCTACTTTTGCAGGTATAGTATCAAATGAGATTACGCCCCAAGGATAATGTGGGTAGACGCACACTCCGTGTAGATCTACGATGAAAGGAG CGGCTTCCTGTGGCGTTCAATCCGAAGCGTACGCAATTACTTTCAAACAGCCAAGGGATGCCCGTCCCCGATTTCGGACACGAGTTATACCCGGGGCCCTCTGCAATGAGACTATAAGTCCTAATTAGATGTTTCGATGCATGGGCAATGGCCTGGCTCTATAGCAGAC TAGAGGCTCTTGAGCACTCCTTACTGCGACACGATTAAGTCTACACCTGATATATCCGACGAGATGAAACCATAAATCTGGCGTCGAGTTCGGCCAAAACAGGTGACAGCGGAAGACCCACAACGCTAACCGAGCGTCCCTCAACCTTTACTTCAGTAAATGACTCTCT ATTTCTTCAGTTTAGTCTAAAATAGCAGCCTTTCTCTTCAAACACAAGCTGGGGGTTCACTTAAGCCATCCGCCGTGAGTGTACCGAATAAGTCTACCCAAGATTGTACGTGGGTCCTGTGACTTACCGGCTAGTGAAAACGGTAAAGTGGATGAAACTAGCATTCAGA AGCGGGAAGCACACCAAAGAGAACATTAGATCTGGTTAATTTGTTAACCGCGTATTGGGTTGATATCCTTATTGACAACCCAGCCGGGCAGGAATGCATTCTACCGGCGTGCTGAAAACAAGCCGTCATAAGCCCCGCGATTGAACCCTGTTCGCCAGATGGGACAGCA TAGAGGGATCCAAGAAATAAATCGTGCGCGACTAACAATATCGGGCTTATCGGTTTCGCCGTCGTTGATCCGGGACGTAAGTCTGATTTAGATACTACTACCGTGATCCCAAGTTTCTGTACGGACGAAGAGGGTGGGGAATCCAGACCTGACCGCCGCGCTCCTGATT ACTACCTTCGTTTTGGGCCAAGACAACTTCTAAGCAACAAATTTCAGACTCGTACTCGCATCGTGCTACTCAAAGGATATCGTGTTGCAGAAACAGTTTCATAATACGTCATGTCATACCCTTAAGAATGGGGTAATAAGTCTACATTATTCTACCAAGCAAGAATGGT TACACACCTATGAGGTTTTGAAATCTTGGCGGTTGGCCGGTCGAGCGGAGGACCATTAAAACGGTTCTATCATCAACTTCCAACGAACATACCGGAGTTCGCCACACTCGTGGGTGAATATAATGTCTCAAGTAGCCATTAGATTAGAGCTTTGGTCACAAACCCCGAG TTTGATTCAGAATGAAGGTCACATCTAAATCCGGCTTATCACTGCCTGGGCGTGCGGCAAAATTAACTCGCGGCCTACTCCAAGACGGAATGACCAACTGCCTCGTCCACGCGGGATGCTTGGCGGGATAAGTCTACATTCCGTACTGCCGAAACCGAATTTTTCGCCC TATGCCAAAATTCTGCATAGTATTACTATTAGCGACTCTAATAGATTTGGACTGTTGAGACTAATCTCTAATGGATTCCCCGATTTATAGCCTTTGGCAGAGTTCAAGACTGGTTACCCCACACATCTCTACCACCATCGATCGGAAAGGAAGTTCTCATTAGATATTT TCTCGATGCAAAGTCGCTATTAGATAAAAACGCAAAGCGCGAGGTGCTGACCAAGGCGTTGTCTGTAGGAACCGTAGAATACCAAAGTATTAATTTGTAGATTGCGCGATGTCAGTGACCGGCGCAGTCGGAGGCGTGACGGCGGGGGGATTCGGAAATCCGCGATTCA GCACGGCAGCATTGGCAAGGTGGCGTGCAGATCTGAAGTGTGGAATGTAGCGAATCGCGACTGGAGACATCGCCGTCCCCGAGCGAGCAACCTTTCACCCGTCATAGTGTTTCCAGCAATAGGTAAGTCTAATCTAGATAAGCTGGTCCGGTACGCTCTGGAGACACAC CATGATTATTGGAAGTAAATGTTGGCGTATATGGCGGGCCCGTTCATCGAAGGAACGCTTGCATGTATCGAGGCTCGGTACTGCCCTGCAGTATATCAGTAAACCGACTGGGCTAGACTGAGTAGTCTACATTAGTGTTGGCGAAGTTGCTAGCCTCATTTCTGAAGGA'

dna = [line for line in dna_.split(' ') if line]

print(' '.join(randomized_motif_search(dna, k, t)))

['TAGTGTACGCGATAA', 'AAGATAACATTAGAT', 'AAGTCTACATGTTAT', 'AATAGTACATTAGAT', 'AAGTCTCACTTAGAT', 'CGGTCTACATTAGAG', 'AGACCTACATTAGAT', 'TTCTCTACATTAGAT', 'AAGTCCTAATTAGAT', 'AAGTCTACACCTGAT', 'AAGTCTACCCAAGAT', 'AAGAGAACATTAGAT', 'AAGTCTGATTTAGAT', 'AAGTCTACATTATTC', 'AAGTAGCCATTAGAT', 'AAGTCTACATTCCGT', 'AAGTTCTCATTAGAT', 'AAGTCGCTATTAGAT', 'AAGTCTAATCTAGAT', 'TAGTCTACATTAGTG']


In [13]:
def generate_random_profile_kmer(text, profile, k):
    weights = list()
    n = len(text) - k + 1
    for i in range(n):
        kmer = text[i:i+k]
        weight = np.product([profile[BASES[base]][j] for j, base in enumerate(kmer)])
        weights.append(weight)
    norm = sum(weights)
    probabilities = np.array(weights) / norm # [w/norm for w in weights]
    z = np.random.choice(n, p=probabilities)
    motif = text[z:z+k]
    return motif

def gibbs_sampler(dna, k, t, n):
    best_motifs = randomized_motif_search(dna, k, t, n//5)
    best_score = score_motifs(best_motifs)
    
    motifs = best_motifs
    for _ in range(n):
        i = random.randint(0, t-1)
        old_motif = motifs.pop(i)
        profile = form_profile(motifs)
        motif_i = generate_random_profile_kmer(old_motif, profile, k)
        motifs.insert(i, motif_i)
        score = score_motifs(motifs)
        if score < best_score:
            best_motifs = motifs
            best_score = score
        else:
            # print('Best score: ', best_score)
            return best_motifs

In [14]:
k = 8 
t = 5 
n = 100
dna = [
    'CGCCCCTCTCGGGGGTGTTCAGTAACCGGCCA',
    'GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG',
    'TAGTACCGAGACCGAAAGAAGTATACAGGCGT',
    'TAGATCAAGTTTCAGGTGCACGTCGGTGAACC',
    'AATCCACCAGCTCCACGTGCAATGTTGGCCTA',
]

sample_in = (dna, k, t, n)
sample_out = ['TCTCGGGG', 'CCAAGGTG', 'TACAGGCG', 'TTCAGGTG', 'TCCACGTG']

assert gibbs_sampler(*sample_in) == sample_out

False

In [15]:
k = 15 
t = 20 
n = 2000
dna_ = 'GCGCCAGTGTGCATTGCTCCATACTAGGGATCTCTAAAATTCCGCCCGTCAGACACAAAGCCGCTGGCCGGATAAGAGAGCCCGACAATAGGCAGCATCCTACACTGGGCCCTACTGACACTATGGAAGTTACACTTCTCCGAGCGTGAACATCTACAACTCGTAGGGTTGGGGTGATCACTCAAGTGGCCTCGTGGATTCCGGAAACTTAAGGCAGGACAAAGTTATAGGCATTGACAGAGAGGTCACGGTAGTAGTCACTGTGCCTTCCTATCTAACTTATCCAGCTAGAAGCGGTACGCCATCTCCTCAGCGCCAGTGTGCATT GCTCCATACTAGGGATCTCTAAAATTCCGCCCGTCAGACACAAAGCCGCTGGCCGGATAAGAGAGCCCGACAATAGGCAGCATCCTACACTGGGCCCTACTGACACTATGGAAGTTACACTTCTCCGAGCGTGAACATCTACAACTCGTAGGGTTGGGGTGATCACTCAAGTGGCCTCGTGGATTCCGGAAACTTAAGGCAGGACAAAGTTATAGGCATTGACAGAGAGGTCACGGTAGGTCTTGGTAAAAACATAGTCACTGTGCCTTCCTATCTAACTTATCCAGCTAGAAGCGGTACGCCATCTCCTCAGCGCCAGTGTGCATT GGATCTAACATCAGCTAGAACTGACGGATTTTAACGTATGGAGCCGCCGAGGCTAATCTTTCAGAGATTATCATGACTTACTGGGATTCGGGAAGATTCCCGCGCGCTGGCGAGTTTGAGTCCGCTCTACGAGGGGTAACGAAAGAAGACAGACTCACATGTCTCGTCACAAGGTAAACGGATCTGTTGGAGGGCCGAGCTCTAGAATGAATCCCCGTAAAAAACTGTGGGGGGCAGCGGACGCCGGTCATTGATAGTCAAGGACCATATTGTGCGCCTACCACCAGGTGTCTGCCACGGCTATGGCATTGGCCCTCTAAACCGCAA GCGATTCATCGTCGGAAATACTATACGTTAAGGAAAACAAGTATAGGCCACCTCGCCGAGTCGAAAAAGCGCGGGGTTTTCTCACATGCCCGTGGAGACGAGTCGGAACGGTTCCAGAGCCGTTCTTTCCCTTCTACCGTGTCCGGAAGTGTCCCCGGGTAAACAATATGCCGTCCAGCAAGAATCGTGGGCGACCCAATTGGACCTTCTGACCTTTCACTTAGATATCCAACAATTTAGACTTGTCTAGGGAACGACGGTGTTGATTTCACAATACAGGCCTGAGGGCACTACAGGTCGTGTAAGTGTTATTCTGAGCCGTCACCG AGTGCCTTGACCGTCGGTCCCTTGAATGCGCTTCCACGGAACCGCCTCCTTCCTTGCTCAGCGTGTATGGTTAATCCGTGTCAGAGGCGGCATAAACCTTAATGTCGCGGTCATAATAGATATGGTCTACCTGCCGTTGCCTTCTTGGCTCCAGGACCGGGACCGAATACGTGTGGCGCGCGACCGGTCGTCCCCGTATCGACATGAAAACATTCATTGCGAACCCATCATGTATACCCTCGGCGGCTTCTAACATTTGTCATCTTTCTTTAGCGGTGGAGCGGATGTGAAAAGCAGCCGCAGTACTCACGGTGCCTTTCGCTAGTT GGCGCACCACGCTCCAATGACGAAAGTGTTGTCAGATGGCTGCTGTCCCAAAAAAAACAAACAGCACAGCTGCAGTACCAGGGGAGTTAGGTTACGTGTCGTGCTTTCACAGATGTAATCATCCTAGAAGGACACGTAAAGTGAACGTGCGTACTAGGTATGTTTTGAAGTTCCTTATCAGTCAAAAGATGGTCCAATTCGCCTGCTGGGCGCACGCCAACGGCTCGAAGTTTAAAGTCAAACCTAGAGTACGGCACCTAGACATAAACCCACGGGAGGGTTATCCACGGCTTAAGAAACAGACGTTTCGTAACTTAGACTGAGCCC GCGATACTTCGCTCAATTTGAACTGGACTTTAGCTAGGTACAGCCGCCATCTAGATCACCGTCCCCGTAAAAGATTGGCATAACGCTGCTACCGATAGCACGCTTCCATCGCGCGGTTCCGCGTCCTGACTTATAAGGCGGCCTAACGCAGATTAGGCAGTTATTTCTAGTTCTATGCGAGCGGAACAACATTAAAAACGACCAAACAGTTCCAAGGTATATATGCTTGGCCGTAGTCTAGAAGCATCACTTCCCATTATACTTGTCTACTCATATTAGGTCCTGGACGTACCCTGGAAACCCCGAGCCTTCGCAGCGGCGACCTGG TCTCCTGGAAATGGTAACTTAAAGCAGGGGAGATGTCTCTCGCACGAACTTGGGACCCAGTATCAACGCCTGCTGCAGATCACCCAAGAGAATAAACCTTACCCGTGTCGGAGGATTTGCGCGGCAAATTGGCTGCGTGCGACGTCAGTCACAGGTCCCCGTAAAGGGACCCTTAGGATCGGCTCTCCGTCGAGATTATCCGGTTTCAAAATCTCAGTATGCCGATGTTTGGACTACACACCTGCGTACTGGAACTCAAATCGTTGAGGAGCCCGCATTATCACGTATCCCGTGTATGAAGGTTACCTTTCTCCCTTATGATAAGTG CCAGTCAAACTACTCCAAGCGTCCGGGCTCTTGCACTTTGTTAGGGCCCAGGCAACAGAGCTCTCCCTCCATGTCTAGCGGCGCCATGTATCCGACAGGCTGCACGTCGGTCCAGAGTGACTGCGCCGTCGCGTTCCTGCTCGCGGCGTCCTAGTCCGAAACCTCACCCCACGTACAGTGCTTGTGTCCTGACGGTTGTCCGTAATAGAGGCTCGAATTGCCGGTGGCCGGTTAGTTTCGCAGGAGATGAAAAAGCCGGAGTCTGCGTCCGTAAAAACAAAAGCCAAGGGGAAATCCATACGATAATTTGACAGAGATCCCGACGAG TTGTCTCCTCTGTATAGCGGAACGTCGCACTGCGGATGCAAGATTCCGGATAGCTCACGGATCTGTTGGCTTAGACTTACCGGTTGTCGTAAGCTGCTGTCCTGTTAAAAACACTAGACGCGTGCTCTAGATTGGTATATATTGGGCCTCAACGAACGGCGGTTCTTTTCAGACCCTTCGCCAGCATACTGCTAAGTATGAGCCAGGGTGCACATACCTAGACGTCGTTGTGGGCAGGACACGGCATGTGGTAGCAACCACCAGGTCTGAAGAGATTTGCTCGTCATCTAGAGTCGCAGCGACAACTTGGTCATTAATGGGAGACTC TCTGAAAGTAGTCCCCAGCAAAACACGAGTTGCGATTGCCTGTCCTGTAGGGAGAAATCTGACGGATCTAAGCATCTTGGGCAAGACTATCGAATGGGTGTATATAGACGAGCAATCCGACATACACACACGGCTAGTAGGGATCAGCACCAGTTTGCGCCCCGGTTTCGTTTACGATCACTTTTCTTGACGGTCAAGAGTGAGATGCTCCCGTATGTGAGCTCGACAAGAATAATCATCGTACCGGGGGGCAATGCTGAACGTAGGCTGTCATTATTTTCAGGCCGTGGATATACCAATACACTTAGCTATGAAGGCGTGAGTCTG GCGGGACGGCCGATAGACCCACAAGCATAGCCTGTTGGAAGCTATCCGGCACACTTCACCCGTAAAAACAATCGAGTGAGCAAACGACAGGTCATGTAAAGTCTTTCCCATTACCGCACGGAGGTTTACGGACCCTCCGACTGTGGGCTATCATTCGCGTTACGCACTCCCCAGCAGCGAATAATTATCGTTCGTGCTTGCCGTTTTTCTCTACAGGAAACAAGGTATTCTATCGTAAACCGGCTCTGGTGGGTATACGTTTAATACCTAGCAAAACCTTAATAGTAATGACAGATACTATATACCCTGGATGACCATATATGACAA GACGCCGATATGGCAGCATCGATTGGTGGTGCCGACGCGCAGTAAAGCGTCAATGTAAAAACAGAGCAATAAATGCCTTCGCGAGTAAGATTTAAAGTAGCCCCGAAAAAATATCATTGTCCTTAACAGCTATCACGCTGAGGAGTGTCCAACTCGTTTCTATTTCCAGAAGATGGCTCACGACTATGATAGCAACTCCTCCGACATAGCTGGCTGCCGTCCCATAACCAGCCGAGCGCGTACAGCGTGCGCACGTTGGAGGACAACGCCTTATGGACAACGCCGATTTATTGTTGACACATGCCTGAATGGTAGAGCACATCAACC TACGAGTTAGGCGCGCCCCGTAAAAACGACCTTAAGCTGAGCAAGCTGAGGCTCCTCTTGAAGACCACGTGAACATGCTTATCTCTGATTATTTTTTAATTCTGGGGATGACAATCCAGCGTGACTCTTGGAGTGCTATGCGACAGTTATTCAATAATTAACACAGTTAGTGAGGAATTCAGCATAGCGGGCCGGCCATCCCCCCAAAGGTAGGCACACTTAACTGAAGAAGCCGAGCGCCATTAACTCGTCTCAGTACTTCGTAGTACTTAGTTAGGGACAAACTGCGGAATCCGACGGCAGTGACATGCGACTCCAATCTGCCAA CCCAGCTTAGATTTATCCAACCTAAGTAACAAATGAGTCCCTTCAAAAACACAGGTAGATGTCAAGTCTTTTGCGCACAATTGGATCTTATTGCCCAGCTACCTTGTACGTGCGCGCACAGGTATCCCGTTTCAAAACTGGCACCCAGTGGGGAATGGAGGCTCCCATCGGCGGCCTACGGGACTATGCAGTGGTAAGCACTAGAAGAGACATACTTGACAAAAGAAAGGCGTCTCGAGGAGATTGGAACCGAAGACTGTGTGCAAGCCCTGCGCTTCTGTTGCTGACTCGATGGATAGTTTAGGCGACTATAGCTAGTACGCGTCG TCGGTATTCGGACCTAGGGTCGGTTGCAGCTCTTGTGCACGGTATGGTGATCTGTTAACGTTGAGTTCTATCTGATACGCATTCTATATGCACATACAGCTACATGGTCGTCCCCCATAAAACATCCGCACTCTAGTCGGAGTAAACCTTCGGGCAGACGCGCACCTACTTCGGTGGACGCTTGGATGTGGGGGTCAAGTATCCGAAACCGTGGGCAGATGTAAGACCACTGTGGACGAACAGGCAGAGGCCTACTGACAGTCTATCGATGTTATTAATGATACCAACGCTCGGGCGACGTCAAGTAGTGAATTTCGTGTTGCACGC AGCTCAATTGCTAGGAGATCACTGGATCTCGGCGGTAGGCCCTTTGCTAGAAGGGAAGCTCTGGGGTAGTTAGTTTACGTAAAAACAATTCGTAGGACTTTCAAAGTAATCAACGTAATCGCTTTTGAACCCAAATATTATTTCAGATCCGCCTAGCTTTACTATGTGAGGCATAGCAAACTTGGGACGTCTCTAGGACAATTCTGGTATGTACGCAGACGCTTGACCCGGATTACGCTGTCGAAGGAAGTATAAGGCTCTGTTCGAACCCGTGAGGGAACCGGCGGTTCTCCGTCATGCTCAAGTAATGACTCATCATACCGAATG TTCCAACATCCGGGTGGGTGCATGACGAGTCCCCGTTCCAACAATAGGAAACAGGACACGCCACATGCGACTACTTTCCCTCACAACTTTCGTCCCCTGCCTAGATTGGAGCGTTAGCATAGGCATAGAAGTTGTACTAACTACTAAACTCCGGTGCTGACGCGGCGAAACCACCTTGTGGTCACATGGTCGAGCTTCAATTGGACGACGGAGATCTCACCATGTAAAAGTATTGATTTCCGATAGGCTACCTTCCATGGGCTGCTCCAGACGTCATCAACCCGTCAATGCGGGAGGCTCAAAACATTATTGGCACAGCATGAAGAA TATGCAGATTGCGCCATCCGAATTCAATGCAACGGCCTTAGGGTGAGCTATTCTCCCTGTCCGACTAAAAACAGCGGGTCAGCCCTGCGGATTCCACGGGCGAGGTGCACTACGTTCTAGGCGTGGACAACAGGACATTGAGGCAGTACGGCCGGTCCCACGATCAGTCGCTGTTCATAATTTGAGGTCACGCATCCATCAAGAACATGAAAATCTTCATCACTGCACATAAATGCGAAGAGCACGGGCCTCATCTTCCGAAAAGGACGCATGTTCTTTTGCCGGACACACGCTGAGTCGAAGGGTAAAACAGACTGATACTCCGGT CTTGGCGCTGCATTGGCAGCACCAAGGGCTGACGCGCAGCACCCTATACTCCTATATCAATAGACGGAATTGGGTTCGCCAACCACTTAGTAAACTTCCTGAACCCGTAAGGCGGCATTCGTTGAATTGGGTGCAAACTTTACGCATTTGGGCCGGCAGCTCAATTCTTACGTAACAGAAATCCAGCTTGATGTGTGAGTCCCCGTAACGTCATAGATCTGTCAGCGTCCGCATCTACGGCGTGAAGTACGCAGGGCTTTAGGCTCCGCAGCCTCCCTGATCGCTCTGGGGTCCAATGTCAATTTGGCCCGACAGCGATGCAAATGG'

dna = [line for line in dna_.split(' ') if line]

result = gibbs_sampler(dna, k, t, n)
print(' '.join(result))


CTCCGAGCGTGAACA GTCTTGGTAAAAACA ATCCCCGTAAAAAAC GTCCCCGGGTAAACA GTCCCCGTATCGACA GTCCCAAAAAAAACA GTCCCCGTAAAAGAT GTCCCCGTAAAGGGA GCGTCCGTAAAAACA GTCCTGTTAAAAACA GTCCCCAGCAAAACA TCACCCGTAAAAACA GTCAATGTAAAAACA CGCCCCGTAAAAACG GTCCCTTCAAAAACA GTCCCCCATAAAACA GTTTACGTAAAAACA GTCCCCGTTCCAACA GTCCGACTAAAAACA GTCCCCGTAACGTCA
