# ba2a Implement MotifEnumeration

In [1]:
with open('rosalind_ba2a.txt') as file:
    k, d = map(lambda x: int(x), file.readline().rstrip().split(' '))
    dna = list(map(lambda x: x.rstrip(), file.readlines()))


def get_hamming_distance(dna1, dna2):
    hamming_distance = 0
    for i in range(len(dna1)):
        if dna1[i] != dna2[i]:
            hamming_distance += 1
    return hamming_distance


def get_neighbor(current_index, current_distance, max_d, current_pattern, neighbors):
    if current_distance >= max_d or current_index >= len(current_pattern):
        return neighbors
    new_pattern = current_pattern.copy()
    for symbol in ['A', 'C', 'G', 'T']:
        if symbol != current_pattern[current_index]:
            new_pattern[current_index] = symbol
            neighbors.append(''.join(new_pattern))
            get_neighbor(current_index + 1, current_distance + 1, max_d, new_pattern, neighbors)
    get_neighbor(current_index + 1, current_distance, max_d, current_pattern, neighbors)
    return neighbors


def get_k_mers_from_dna(dna, k):
    k_mers = set()
    pattern_len = len(dna[0])
    for pattern in dna:
        for i in range(pattern_len - k + 1):
            k_mers.add(pattern[i: i+k])
    return k_mers


def appear_in_dna(dna, k_mer, hamming_distance):
    pattern_len = len(dna[0])
    for pattern in dna:
        found = False
        for i in range(pattern_len - len(k_mer) + 1):
            current_k_mer = pattern[i: i+len(k_mer)]
            if get_hamming_distance(current_k_mer, k_mer) <= hamming_distance:
                found = True
                break
        if not found:
            return False
    return True


motif_list = set()
k_mer_list = get_k_mers_from_dna(dna, k)
for k_mer in k_mer_list:
    neighbors = get_neighbor(0, 0, d, list(k_mer), [])
    neighbors.append(k_mer)
    for neighbor in neighbors:
        if neighbor in motif_list:
            continue
        if appear_in_dna(dna, neighbor, d):
            motif_list.add(neighbor)
output = ' '.join(motif_list)
print(output)
with open('output.txt', 'w') as file:
    file.write(output)



ATACG ACTAG GTAGA TGAAC CAAAA ACATG ACGTC GCATA GTACA AAGCC GATAC ACTCG AGTAG ACACG CTTCG CGTAA GACCA GTACT AACTC ATCTA GCAAG TAGCA CGCCA TAGAT CCAGC CTTAC AACTG AAATT ACGAT GAACC CTAAA TCAGC AATGC GCCAC ATCCG TAAAG TAGTA ATAGG CCTCA GCTAC TAACC ACTTG GATCA AAACG ACGAC GGAGT ATTCG TCTAC ATTGT CCACA TCGTA AAAGA GCACC TTTCG CAAAG CTGCT TGTAG TTGAA CATTC AAATG ATACA AGTCT CGAAA CTAAC CATGC ACCAT ACAGA AGCAC TTATC GGACT TTTAG CATCC ATAGT CCGAC ATAGC TCGAA CAGAG GACTA ACGTA TGCAC CTAGA ATCAC CAAGG GACAA CACCT CACAG TGCAG GCAGA AAACA CTACT AAGAG GCAAC TAGGT CAACC TAACT AGATG ACGTT GCTCA CGACC AACGC TCATT CGGAA AGACC TCCCA CTACC CCGGA AACCT AATTG AGCTA GACAT ACCTT ATGAA GTCAC TGCTA CGTAG ATCCA TACAA ACCGA CACTT CAGTG TCCAA TTAAC GGACC ATACT ACAAC TAGTC TACGA ACTAT CAGAA TACGC TACCA TACTC GCCAA ATAAC ACTTA TCTGT AGAAA CCTAC GCGAC CGTAC CCATT ACAGG CTCGT AAGCT ACTAA TAGCC CAGCA AGAAC CGGTA AACCC CGCAA TAACA CCACC CCAAC AATGA TCACA CTCAC ACCTG TTCCG AAGTT TGCAT AGTTA TAGCT CCAAA CATAC ACTCC CCCA

# ba2b Find a Median String

In [2]:
import itertools


def get_hamming_distance(dna1, dna2):
    dna_len = len(dna1)
    hamming_distance = 0

    for i in range(dna_len):
        if dna1[i] != dna2[i]:
            hamming_distance += 1
    return hamming_distance


def get_text_distance(text, pattern):
    text_len = len(text)
    pattern_len = len(pattern)
    min_distance = pattern_len

    for i in range(text_len-pattern_len+1):
        temp_pattern = text[i:i+pattern_len]
        distance = get_hamming_distance(temp_pattern, pattern)
        if min_distance > get_hamming_distance(temp_pattern, pattern):
            min_distance = distance
    return min_distance


def get_dna_distance(dna, pattern):
    distance = 0
    for text in dna:
        text_distance = get_text_distance(text, pattern)
        distance += text_distance
    return distance


with open('rosalind_ba2b.txt') as file:
    k = int(file.readline().rstrip())
    dna = [pattern.rstrip() for pattern in file.readlines()]

symbols = 'GACT'
all_patterns = list(itertools.product(symbols, repeat=k))
min_distance = k*len(dna)
median = ''
for pattern in all_patterns:
    pattern = ''.join(pattern)
    distance = get_dna_distance(dna, pattern)
    if min_distance > distance:
        min_distance = distance
        median = pattern
output = median
print(output)
with open('output.txt', 'w') as file:
    file.write(output)

TTGTGC


# ba2c Find a Profile-most Probable k-mer in a String

In [3]:
symbol_index = {'A': 0, 'C': 1, 'G': 2, 'T': 3}


def get_probability(profile, pattern):
    pattern_len = len(pattern)
    probability = 1
    for i in range(pattern_len):
        probability *= profile[symbol_index[pattern[i]]][i]
    return probability


def get_most_probable_k_mer(profile, text, k):
    most_probable_k_mer = ''
    max_probability = -1
    text_len = len(text)
    for i in range(text_len - k + 1):
        pattern = text[i:i + k]
        probability = get_probability(profile, pattern)
        if probability > max_probability:
            max_probability = probability
            most_probable_k_mer = pattern
    return most_probable_k_mer


with open('rosalind_ba2c.txt') as file:
    text = file.readline().rstrip()
    k = int(file.readline().rstrip())
    rows = [row.rstrip() for row in file.readlines()]
    profile_matrix = []
    for row in rows:
        profile_matrix.append([float(probability) for probability in row.split(' ')])


output = get_most_probable_k_mer(profile_matrix, text, k)
print(output)
with open('output.txt', 'w') as file:
    file.write(output)


AAAGTAC


# ba2d Implement GreedyMotifSearch

In [4]:
import numpy as np

symbol_index = {'A': 0, 'C': 1, 'G': 2, 'T': 3}


def get_profile_from_motifs(motifs):
    k = len(motifs[0])
    t = len(motifs)
    profile = [[0.0]*k, [0.0]*k, [0.0]*k, [0.0]*k]
    for text in motifs:
        for i in range(k):
            index = symbol_index[text[i]]
            profile[index][i] += 1
    np_profile = np.array(profile)/t
    profile = np_profile.tolist()
    return profile


def get_probability(profile, pattern):
    pattern_len = len(pattern)
    probability = 1
    for i in range(pattern_len):
        probability *= profile[symbol_index[pattern[i]]][i]
    return probability


def get_score_from_motifs(motifs):
    t = len(motifs)
    motif_len = len(motifs[0])
    score = 0
    for i in range(motif_len):
        count = {}
        best_count = 0
        for motif in motifs:
            symbol = motif[i]
            if symbol not in count.keys():
                count[symbol] = 1
            else:
                count[symbol] += 1
            if count[symbol] > best_count:
                best_count = count[symbol]
        score += (t-best_count)
    return score


def get_most_probable_k_mer(profile, text, k):
    most_probable_k_mer = ''
    max_probability = -1
    text_len = len(text)
    for i in range(text_len - k + 1):
        pattern = text[i:i + k]
        probability = get_probability(profile, pattern)
        if probability > max_probability:
            max_probability = probability
            most_probable_k_mer = pattern
    return most_probable_k_mer


def greedy_motif_search(dna, k, t):
    best_motifs = [text[:k] for text in dna]
    best_score = get_score_from_motifs(best_motifs)
    text_len = len(dna[0])
    for i in range(text_len-k+1):
        current_motifs = [dna[0][i:i + k]]
        for j in range(1, t):
            current_profile = get_profile_from_motifs(current_motifs)
            most_probable_pattern = get_most_probable_k_mer(current_profile, dna[j], k)
            current_motifs.append(most_probable_pattern)
        current_score = get_score_from_motifs(current_motifs)
        if current_score < best_score:
            best_motifs = current_motifs
            best_score = current_score
    return best_motifs


with open('rosalind_ba2d.txt') as file:
    k, t = map(lambda x: int(x), file.readline().rstrip().split(' '))
    dna = [pattern.rstrip() for pattern in file.readlines()]


best_motifs = greedy_motif_search(dna, k, t)
output = '\n'.join(best_motifs)
print(output)
with open('output.txt', 'w') as file:
    file.write(output)

GTCCATCCGGAT
TTCATAAAATAT
TGTATGGGCCGC
TGCATTACATAT
ACCAATCCGTTT
ACCAATCCGCTT
TTCCTTGGCCAT
AAGAGTTATAAA
ACTATTACGCTT
ACTATTACGTTT
ACCAATGCGCTT
ACCAATCATTGC
ACGATTACGCTT
ACGAATGCGGTT
ACTAGTGCGGTT
ATTAATTCTCTC
ACCAGGGCCGTT
ACTAAAGGCGAT
ACTAGTTCGATT
ACGAGTTCGGTT
ACCAGTCCGTTT
ACCAGTACGATT
ACCATTCCGCTT
ACGATTTCGCTT
ACTATTGCGTTT


# ba2e Implement GreedyMotifSearch with Pseudocounts

In [5]:
import numpy as np

symbol_index = {'A': 0, 'C': 1, 'G': 2, 'T': 3}


def get_profile_from_motifs(motifs, pseudo_count=True):
    k = len(motifs[0])
    t = len(motifs)
    count = 0.0
    if pseudo_count:
        count = 1.0
    profile = [[count]*k, [count]*k, [count]*k, [count]*k]
    for text in motifs:
        for i in range(k):
            index = symbol_index[text[i]]
            profile[index][i] += 1
    divisor = t
    if pseudo_count:
        divisor += 4
    np_profile = np.array(profile)/divisor
    profile = np_profile.tolist()
    return profile


def get_probability(profile, pattern):
    pattern_len = len(pattern)
    probability = 1
    for i in range(pattern_len):
        probability *= profile[symbol_index[pattern[i]]][i]
    return probability


def get_score_from_motifs(motifs):
    t = len(motifs)
    motif_len = len(motifs[0])
    score = 0
    for i in range(motif_len):
        count = {}
        best_count = 0
        for motif in motifs:
            symbol = motif[i]
            if symbol not in count.keys():
                count[symbol] = 1
            else:
                count[symbol] += 1
            if count[symbol] > best_count:
                best_count = count[symbol]
        score += (t-best_count)
    return score


def get_most_probable_k_mer(profile, text, k):
    most_probable_k_mer = ''
    max_probability = -1
    text_len = len(text)
    for i in range(text_len - k + 1):
        pattern = text[i:i + k]
        probability = get_probability(profile, pattern)
        if probability > max_probability:
            max_probability = probability
            most_probable_k_mer = pattern
    return most_probable_k_mer


def greedy_motif_search(dna, k, t):
    best_motifs = [text[:k] for text in dna]
    best_score = get_score_from_motifs(best_motifs)
    text_len = len(dna[0])
    for i in range(text_len-k+1):
        current_motifs = [dna[0][i:i + k]]
        for j in range(1, t):
            current_profile = get_profile_from_motifs(current_motifs)
            most_probable_pattern = get_most_probable_k_mer(current_profile, dna[j], k)
            current_motifs.append(most_probable_pattern)
        current_score = get_score_from_motifs(current_motifs)
        if current_score < best_score:
            best_motifs = current_motifs
            best_score = current_score
    return best_motifs


with open('rosalind_ba2e.txt') as file:
    k, t = map(lambda x: int(x), file.readline().rstrip().split(' '))
    dna = [pattern.rstrip() for pattern in file.readlines()]


best_motifs = greedy_motif_search(dna, k, t)
output = '\n'.join(best_motifs)
print(output)
with open('output.txt', 'w') as file:
    file.write(output)

AGGTGGGTAGCT
TGTTGGGTCGCT
AGGTAGGTTGCT
TGCTAGGTAGCT
GGCTGGGTTGCT
TGCTGGGTAGCT
CGCTCGGTGGCT
GGGTTGGTGGCT
AGTTGGGTGGCT
GGCTGGGTAGCT
GGGTTGGTTGCT
CGCTCGGTCGCT
AGTTCGGTTGCT
AGCTGGGTTGCT
GGATGGGTTGCT
GGCTAGGTGGCT
CGGTTGGTTGCT
AGTTAGGTGGCT
GGCTTGGTGGCT
CGCTCGGTCGCT
GGTTTGGTTGCT
GGCTAGGTGGCT
AGTTTGGTAGCT
CGGTGGGTGGCT
AGTTAGGTGGCT


# ba2f Implement RandomizedMotifSearch

In [6]:
import random
import numpy as np

symbol_index = {'A': 0, 'C': 1, 'G': 2, 'T': 3}


def get_most_probable_k_mer(profile, text, k):
    most_probable_k_mer = ''
    max_probability = -1
    text_len = len(text)
    for i in range(text_len - k + 1):
        pattern = text[i:i + k]
        probability = get_probability(profile, pattern)
        if probability > max_probability:
            max_probability = probability
            most_probable_k_mer = pattern
    return most_probable_k_mer


def get_most_probable_k_mers(profile, dna, k):
    k_mers = []
    for text in dna:
        k_mers.append(get_most_probable_k_mer(profile, text, k))
    return k_mers


def get_probability(profile, pattern):
    pattern_len = len(pattern)
    probability = 1
    for i in range(pattern_len):
        probability *= profile[symbol_index[pattern[i]]][i]
    return probability


def get_profile_from_motifs(motifs, pseudo_count=False):
    k = len(motifs[0])
    t = len(motifs)
    count = 0.0
    if pseudo_count:
        count = 1.0
    profile = [[count]*k, [count]*k, [count]*k, [count]*k]
    for text in motifs:
        for i in range(k):
            index = symbol_index[text[i]]
            profile[index][i] += 1
    divisor = t
    if pseudo_count:
        divisor += 4
    np_profile = np.array(profile)/divisor
    profile = np_profile.tolist()
    return profile


def get_score_from_motifs(motifs):
    t = len(motifs)
    motif_len = len(motifs[0])
    score = 0
    for i in range(motif_len):
        count = {}
        best_count = 0
        for motif in motifs:
            symbol = motif[i]
            if symbol not in count.keys():
                count[symbol] = 1
            else:
                count[symbol] += 1
            if count[symbol] > best_count:
                best_count = count[symbol]
        score += (t-best_count)
    return score


def get_random_motifs(dna, k):
    text_len = len(dna[0])
    t = len(dna)
    random_motifs = []
    for i in range(t):
        random_index = random.randint(0, text_len-k)
        pattern = dna[i][random_index: random_index+k]
        random_motifs.append(pattern)
    return random_motifs


def randomized_motif_search(dna, k, t):
    best_motifs = get_random_motifs(dna, k)
    best_score = get_score_from_motifs(best_motifs)
    current_motifs = best_motifs.copy()
    while True:
        profile = get_profile_from_motifs(current_motifs, True)
        current_motifs = get_most_probable_k_mers(profile, dna, k)
        current_score = get_score_from_motifs(current_motifs)
        if current_score < best_score:
            best_score = current_score
            best_motifs = current_motifs
        else:
            return best_motifs, best_score


with open('rosalind_ba2f.txt') as file:
    k, t = map(lambda x: int(x), file.readline().rstrip().split(' '))
    dna = [pattern.rstrip() for pattern in file.readlines()]


best_score = k*t
best_motifs = []
for i in range(1000):
    motifs, score = randomized_motif_search(dna, k, t)
    if best_score > score:
        best_motifs = motifs
        best_score = score
    print(f'{i+1}/1000')
output = '\n'.join(best_motifs)
print(output)
with open('output.txt', 'w') as file:
    file.write(output)

1/1000
2/1000
3/1000
4/1000
5/1000
6/1000
7/1000
8/1000
9/1000
10/1000
11/1000
12/1000
13/1000
14/1000
15/1000
16/1000
17/1000
18/1000
19/1000
20/1000
21/1000
22/1000
23/1000
24/1000
25/1000
26/1000
27/1000
28/1000
29/1000
30/1000
31/1000
32/1000
33/1000
34/1000
35/1000
36/1000
37/1000
38/1000
39/1000
40/1000
41/1000
42/1000
43/1000
44/1000
45/1000
46/1000
47/1000
48/1000
49/1000
50/1000
51/1000
52/1000
53/1000
54/1000
55/1000
56/1000
57/1000
58/1000
59/1000
60/1000
61/1000
62/1000
63/1000
64/1000
65/1000
66/1000
67/1000
68/1000
69/1000
70/1000
71/1000
72/1000
73/1000
74/1000
75/1000
76/1000
77/1000
78/1000
79/1000
80/1000
81/1000
82/1000
83/1000
84/1000
85/1000
86/1000
87/1000
88/1000
89/1000
90/1000
91/1000
92/1000
93/1000
94/1000
95/1000
96/1000
97/1000
98/1000
99/1000
100/1000
101/1000
102/1000
103/1000
104/1000
105/1000
106/1000
107/1000
108/1000
109/1000
110/1000
111/1000
112/1000
113/1000
114/1000
115/1000
116/1000
117/1000
118/1000
119/1000
120/1000
121/1000
122/1000
123/1000
1

928/1000
929/1000
930/1000
931/1000
932/1000
933/1000
934/1000
935/1000
936/1000
937/1000
938/1000
939/1000
940/1000
941/1000
942/1000
943/1000
944/1000
945/1000
946/1000
947/1000
948/1000
949/1000
950/1000
951/1000
952/1000
953/1000
954/1000
955/1000
956/1000
957/1000
958/1000
959/1000
960/1000
961/1000
962/1000
963/1000
964/1000
965/1000
966/1000
967/1000
968/1000
969/1000
970/1000
971/1000
972/1000
973/1000
974/1000
975/1000
976/1000
977/1000
978/1000
979/1000
980/1000
981/1000
982/1000
983/1000
984/1000
985/1000
986/1000
987/1000
988/1000
989/1000
990/1000
991/1000
992/1000
993/1000
994/1000
995/1000
996/1000
997/1000
998/1000
999/1000
1000/1000
CAGAGAATGGTAAAC
CAGTCATTGCCCGCC
GCGAGGTTGCCCGCA
CAGAGGTTGCCCAGT
CAGAGGGGTCCCGCC
CAAGTGTTGCCCGCC
GAGAGGTTGCCCGTT
CAGAAACTGCCCGCC
CAGACTCTGCCCGCC
CAGAGGTTGAATGCC
CAGAGGACACCCGCC
CAGAGGTTCGTCGCC
CAGTTTTTGCCCGCC
CAGAGAGGGCCCGCC
CCCCGGTTGCCCGCC
CAGAGGTTGCAAACC
CAGAGGTGAGCCGCC
GTTAGGTTGCCCGCC
CAGAGTGAGCCCGCC
CAGAGGTTGCCTTTC


# ba2g Implement GibbsSampler

In [7]:
import random
import numpy as np

symbol_index = {'A': 0, 'C': 1, 'G': 2, 'T': 3}


def get_random_motifs(dna, k):
    text_len = len(dna[0])
    t = len(dna)
    random_motifs = []
    for i in range(t):
        random_index = random.randint(0, text_len-k)
        pattern = dna[i][random_index: random_index+k]
        random_motifs.append(pattern)
    return random_motifs


def get_probability(profile, pattern):
    pattern_len = len(pattern)
    probability = 1
    for i in range(pattern_len):
        probability *= profile[symbol_index[pattern[i]]][i]
    return probability


def get_most_probable_k_mer(profile, text, k):
    most_probable_k_mer = ''
    max_probability = -1
    text_len = len(text)
    for i in range(text_len - k + 1):
        pattern = text[i:i + k]
        probability = get_probability(profile, pattern)
        if probability > max_probability:
            max_probability = probability
            most_probable_k_mer = pattern
    return most_probable_k_mer


def get_score_from_motifs(motifs):
    t = len(motifs)
    motif_len = len(motifs[0])
    score = 0
    for i in range(motif_len):
        count = {}
        best_count = 0
        for motif in motifs:
            symbol = motif[i]
            if symbol not in count.keys():
                count[symbol] = 1
            else:
                count[symbol] += 1
            if count[symbol] > best_count:
                best_count = count[symbol]
        score += (t-best_count)
    return score


def get_profile_from_motifs(motifs, pseudo_count=False):
    k = len(motifs[0])
    t = len(motifs)
    count = 0.0
    if pseudo_count:
        count = 1.0
    profile = [[count]*k, [count]*k, [count]*k, [count]*k]
    for text in motifs:
        for i in range(k):
            index = symbol_index[text[i]]
            profile[index][i] += 1
    divisor = t
    if pseudo_count:
        divisor += 4
    np_profile = np.array(profile)/divisor
    profile = np_profile.tolist()
    return profile


def get_random_pattern_from_text(profile, text, k):
    text_len = len(text)
    sum_score = 0
    motif_score = []
    for i in range(text_len - k + 1):
        pattern = text[i:i + k]
        score = get_probability(profile, pattern)
        sum_score += score
        motif_score.append((pattern, sum_score))

    rand_num = random.random() * sum_score
    for item in motif_score:
        motif = item[0]
        value = item[1]
        if rand_num <= value:
            return motif


def gibbs_sampler(dna, k, t, n):
    current_motifs = get_random_motifs(dna, k)
    best_motifs = current_motifs.copy()
    best_score = get_score_from_motifs(best_motifs)
    for count in range(n):
        ignore_index = random.randint(0, t - 1)
        current_motifs.remove(current_motifs[ignore_index])
        profile = get_profile_from_motifs(current_motifs, True)
        random_motif = get_random_pattern_from_text(profile, dna[ignore_index], k)
        current_motifs.insert(ignore_index, random_motif)
        current_score = get_score_from_motifs(current_motifs)
        if current_score < best_score:
            best_motifs = current_motifs
            best_score = current_score
    return best_motifs, best_score


with open('rosalind_ba2g.txt') as file:
    k, t, n = map(lambda x: int(x), file.readline().rstrip().split(' '))
    dna = [pattern.rstrip() for pattern in file.readlines()]

best_motifs = []
best_score = 10000000000
for i in range(50):
    motifs, score = gibbs_sampler(dna, k, t, n)
    if score < best_score:
        best_motifs = motifs
        best_score = score
    print(f'{i+1}/50', score)
output = '\n'.join(best_motifs)
print(output)
with open('output.txt', 'w') as file:
    file.write(output)

1/50 72
2/50 101
3/50 108
4/50 63
5/50 81
6/50 91
7/50 114
8/50 72
9/50 82
10/50 72
11/50 82
12/50 82
13/50 72
14/50 90
15/50 72
16/50 82
17/50 80
18/50 63
19/50 63
20/50 82
21/50 63
22/50 92
23/50 72
24/50 97
25/50 82
26/50 63
27/50 73
28/50 82
29/50 99
30/50 82
31/50 116
32/50 81
33/50 63
34/50 105
35/50 90
36/50 107
37/50 72
38/50 89
39/50 72
40/50 72
41/50 72
42/50 106
43/50 83
44/50 72
45/50 91
46/50 99
47/50 82
48/50 101
49/50 63
50/50 104
AATGGCTGCGTTGCA
ACTTATTTGTTTGAG
ACTGGAGGGTTTGAG
ACTGGCTTTGATGAG
ACTGAAATGTTTGAG
TGGGGCTTGTTTGAG
ACTTTTTTGTTTGAG
ACTGTGGTGTTTGAG
ACTGGCTTGTCCAAG
ACTGGCTTGTTGTCG
GCTGGCTTGTTTGGC
ACTGGCTTGTTTCCC
AAGAGCTTGTTTGAG
ACTGGCTTGACAGAG
ACTGGCACTTTTGAG
ACTGGCAGCTTTGAG
ACTGGTGGGTTTGAG
ACCCTCTTGTTTGAG
CATGGCTTGTTTGAC
ACTGGCTGCATTGAG


# ba2h Implement DistanceBetweenPatternAndStrings

In [10]:
with open('rosalind_ba2h.txt') as file:
    pattern = file.readline().rstrip()
    dna = file.readline().rstrip().split(' ')


def get_hamming_distance(dna1, dna2):
    dna_len = len(dna1)
    hamming_distance = 0
    for i in range(dna_len):
        if dna1[i] != dna2[i]:
            hamming_distance += 1
    return hamming_distance


def get_min_hamming_distance(text, pattern):
    min_distance = get_hamming_distance(text[:len(pattern)], pattern)
    for i in range(1, len(text) - len(pattern) + 1):
        current_pattern = text[i: i+len(pattern)]
        min_distance = min(min_distance, get_hamming_distance(current_pattern, pattern))
    return min_distance


distance = 0
for text in dna:
    distance += get_min_hamming_distance(text, pattern)
output = str(distance)
print(output)
with open('output.txt', 'w') as file:
    file.write(output)

48
