### GibbsSamplerWithMultipleStarts(Dna, k, t, N, num_starts=20)

In [None]:
import random

def RandomMotifs(Dna, k, t):
    """Gera motivos aleatórios iniciais."""
    Motifs = []
    n = len(Dna[0])
    for i in range(t):
        start_index = random.randint(0, n - k)
        random_kmer = Dna[i][start_index:start_index + k]
        Motifs.append(random_kmer)
    return Motifs

def CountWithPseudocounts(Motifs):
    """Conta as frequências com pseudocontagens."""
    count = {nuc: [1] * len(Motifs[0]) for nuc in "ACGT"}
    for motif in Motifs:
        for j, nucleotide in enumerate(motif):
            count[nucleotide][j] += 1
    return count

def ProfileWithPseudocounts(Motifs):
    """Calcula a matriz de perfil com pseudocontagens."""
    t = len(Motifs)
    profile = CountWithPseudocounts(Motifs)
    for nucleotide in profile:
        for j in range(len(profile[nucleotide])):
            profile[nucleotide][j] /= (t + 4)  # Total inclui pseudocontagens
    return profile

def Consensus(Motifs):
    """Encontra o consenso a partir dos motivos."""
    consensus = ""
    k = len(Motifs[0])
    count = CountWithPseudocounts(Motifs)
    for j in range(k):
        max_count = 0
        most_frequent = ""
        for nucleotide in "ACGT":
            if count[nucleotide][j] > max_count:
                max_count = count[nucleotide][j]
                most_frequent = nucleotide
        consensus += most_frequent
    return consensus

def Score(Motifs):
    """Calcula a pontuação de um conjunto de motivos."""
    consensus = Consensus(Motifs)
    score = 0
    for motif in Motifs:
        for i, nucleotide in enumerate(motif):
            if nucleotide != consensus[i]:
                score += 1
    return score

def Pr(String, Profile):
    """Calcula a probabilidade de um k-mer baseado no perfil."""
    probability = 1
    for i, nucleotide in enumerate(String):
        probability *= Profile[nucleotide][i]
    return probability

def ProfileMostProbableKmer(Text, k, Profile):
    """Encontra o k-mer mais provável em uma sequência com base no perfil."""
    n = len(Text)
    max_prob = -1
    most_probable_kmer = Text[:k]  # Caso base se todos forem iguais
    for i in range(n - k + 1):
        kmer = Text[i:i + k]
        prob = Pr(kmer, Profile)
        if prob > max_prob:
            max_prob = prob
            most_probable_kmer = kmer
    return most_probable_kmer

def GibbsSampler(Dna, k, t, N):
    """Implementa o algoritmo Gibbs Sampler."""
    BestMotifs = RandomMotifs(Dna, k, t)
    BestScore = Score(BestMotifs)
    Motifs = BestMotifs[:]
    
    for _ in range(N):
        i = random.randint(0, t - 1)  # Escolhe aleatoriamente um índice
        excluded_motif = Motifs.pop(i)  # Remove um motivo
        Profile = ProfileWithPseudocounts(Motifs)  # Calcula o perfil sem o motivo removido
        new_motif = ProfileMostProbableKmer(Dna[i], k, Profile)  # Encontra o melhor k-mer
        Motifs.insert(i, new_motif)  # Adiciona o novo motivo
        
        current_score = Score(Motifs)
        if current_score < BestScore:  # Atualiza os melhores motivos
            BestMotifs = Motifs[:]
            BestScore = current_score
    
    return BestMotifs

def GibbsSamplerWithMultipleStarts(Dna, k, t, N, num_starts=20):
    """Executa o GibbsSampler com múltiplas inicializações."""
    BestMotifs = None
    BestScore = float("inf")
    
    for _ in range(num_starts):
        Motifs = GibbsSampler(Dna, k, t, N)
        current_score = Score(Motifs)
        if current_score < BestScore:  # Atualiza os melhores motivos globais
            BestMotifs = Motifs
            BestScore = current_score
            print(BestScore)
    
    return BestMotifs

def aspas_vir_str():
    with open("dataset_30309_11 (2).txt", "r") as file_1:
        linhas_1 = file_1.readlines()
    string_original = linhas_1[1].strip()
    palavras = string_original.split()
    return palavras

Dna = aspas_vir_str()
k = 15
t = 20
N = 2000

# print(aspas_vir_str())
print(GibbsSamplerWithMultipleStarts(Dna, k, t, N, num_starts=20))