In [2]:
def pattern_count(text: str, pattern: str) -> int:
    count =0
    m = len(pattern)
    n = len(text)
    if m == 0 or m > n:
        return 0
    for i in range(n-m+1):
        if text[i:i+m] == pattern:
            count += 1
    return count
count = pattern_count("ACGTACGTACGT", "CG")
print(count)


3


In [3]:
def frequent_words(text: str, k: int) -> list[str]:
    if k<= 0 or k > len(text):
        return []
    kmer_counts = {}
    n = len(text)
    max_count = 0

    for i in range(n-k+1):
        kmer = text[i:i+k]
        if kmer in kmer_counts:
            kmer_counts[kmer] += 1
        else:
            kmer_counts[kmer] = 1
        
        if kmer_counts[kmer] > max_count:
            max_count = kmer_counts[kmer]

    return [kmer for kmer,count in kmer_counts.items() if count == max_count]

print(frequent_words("CGTTTTGAACATTTTCAACAAGTTTTGCAACATTTT", 4))


['TTTT']


In [5]:
def minimum_skew(genome: str) -> list[int]:
    skew = [0]
    current_skew = 0

    for char in genome:
        if char == 'C':
            current_skew -= 1
        elif char == 'G':
            current_skew += 1
        skew.append(current_skew)
    min_skew = min(skew)
    return [i for i, val in enumerate(skew) if val == min_skew]

print(minimum_skew("CCGGCCGG"))

[2, 6]


In [8]:
def frequent_words_with_mismatches(text: str, k: int, d: int) -> list[str]:
    from collections import defaultdict
    from itertools import combinations, product

    def generate_neighbors(kmer: str, d: int) -> set[str]:
        neighbors = set()
        k = len(kmer)
        for i in range(d + 1):
            if i == 0:
                neighbors.add(kmer)
                continue
            for positions in combinations(range(k), i):
                substitutions = []
                for pos in positions:
                    original = kmer[pos]
                    possible = [nt for nt in ['A', 'C', 'G', 'T'] if nt != original]
                    substitutions.append(possible)
                for subs in product(*substitutions):
                    new_kmer = list(kmer)
                    for idx, pos in enumerate(positions):
                        new_kmer[pos] = subs[idx]
                    neighbors.add(''.join(new_kmer))
        return neighbors

    freq = defaultdict(int)
    n = len(text)
    for i in range(n - k + 1):
        kmer = text[i:i+k]
        neighbors = generate_neighbors(kmer, d)
        for neighbor in neighbors:
            freq[neighbor] += 1

    max_freq = max(freq.values(), default=0)
    result = [kmer for kmer, count in freq.items() if count == max_freq]
    result.sort()
    return result

print(frequent_words_with_mismatches("ACGTTGCATGTCGCATGATGCATGAGAGCT", 4, 1))

['ATGC', 'ATGT', 'GATG']
