This problem was asked by Amazon.

Given an integer k and a string s, find the length of the longest substring that contains at most k distinct characters.

For example, given s = "abcba" and k = 2, the longest substring with k distinct characters is "bcb".

In [64]:
def maxSubstringLength(string, max_distinct_chars):
    """ Return the length of the longest substring that
    contains at most max_distinct_chars characters.
    Time complexity is O(n^2). """
    current_distinct_chars = set()
    max_substr_len = 0
    str_len = len(string)
    
    for i in range(str_len):
        # try substrings of increasing length starting at i, 
        # until we've reached the max_distinct_chars allowed.
        for j in range(i, str_len):
            current_distinct_chars.add(s[j])
            if len(current_distinct_chars) > max_distinct_chars:
                current_distinct_chars = set()
                break
            elif j-i+1 > max_substr_len:
                max_substr_len = j-i+1
    
    return max_substr_len

def maxSubstringLength2(s, max_distinct_chars):
    """ Return the length of the longest substring that
    contains at most max_distinct_chars characters.
    Time complexity is O(n). """
    char_count = {}
    substr_st = 0
    max_substr_len = 0
    
    for substr_end in range(len(s)):
        
        # add another character to end
        if s[substr_end] in char_count:
            char_count[s[substr_end]] += 1
        else:
            char_count[s[substr_end]] = 1
        
        # remove as many characters from start as necessary
        while len(char_count) > max_distinct_chars:
            if char_count[s[substr_st]] == 1:
                char_count.pop(s[substr_st])
            else:
                char_count[s[substr_st]] -= 1
            substr_st += 1
        
        # update max_substr_len if we currently have the longest substring
        if max_substr_len < substr_end - substr_st + 1:
                max_substr_len = substr_end - substr_st + 1
    
    return max_substr_len

In [65]:
s = "abcba"
k = 2
maxSubstringLength(s, k)

3

In [66]:
maxSubstringLength2(s, k)

3