In [29]:
from pathlib import Path



In [58]:
def get_run_starts(word, n=2):
    """
    Call a sequence of n >= 1 consecutive identical characters in a word
    an *n-run*.
    For each n-run in the given word, 
    record its starting index in the word, 
    and return the list of all such indices.
    """
    word = word + '!' # Add a special end character to simplify counting 
    starts = []  # Start indices of the n-runs found
    run = [0]  # Idices of the current run
    for i in range(1, len(word)):
        if word[i] == word[i - 1]:
            # Run continues
            run.append(i)
        else:
            # Run ends. Is it of length n?
            if len(run) == n:
                starts.append(run[0])
            run = [i]
    
    return starts

def max_consecutive_runs(word, n=2):
    """
    Return the maximum number of consecutive n-runs in the given word.
    """
    starts = get_run_starts(word, n)

    # Consecutive n-runs have start indices that differ by n.
    if len(starts) < 2:
        M = len(starts)
    else:
        M = 1
        count = 1
        for j in range(1, len(starts)):
            if starts[j] - starts[j - 1] == n:
                count += 1
            else:
                count = 1
            if count > M:
                M = count
    return M
    
# Test some
word = 'committee'
print(get_run_starts(word))
print(max_consecutive_runs(word))

[2, 5, 7]
2


In [67]:
# Run on all words
p = Path('../data/homework_01/words.txt')
with p.open() as src:
    for line in src:
        word = line.strip()
        M = max_consecutive_runs(word)
        if M >= 3:
            print(word, M)


bookkeeper 3
bookkeepers 3
bookkeeping 3
bookkeepings 3
