This problem was asked by Amazon.

Run-length encoding is a fast and simple method of encoding strings. The basic idea is to represent repeated successive characters as a single count and character. For example, the string "AAAABBBCCDAA" would be encoded as "4A3B2C1D2A".

Implement run-length encoding and decoding. You can assume the string to be encoded have no digits and consists solely of alphabetic characters. You can assume the string to be decoded is valid.

In [82]:
def runlength_encode(string):
    """
    Returns run-length encoded string.
    @string must match "[A-Z]*"
    """
    encodings = []
    count = 1
    for i in range(len(string)):
        # append to encoding if we've reached the last character
        # or if the next character is different to the current one.
        if i == len(string)-1 or string[i] != string[i+1]:
            encodings.append(str(count) + string[i])
            count = 1
        else:
            count += 1
    return ''.join(encodings)

def runlength_decode(string):
    """
    Returns run-length decoded string.
    @string must match "([0-9]+[A-Z])*"
    """
    decodings = []
    count_digits = []
    for i in range(len(string)):
        if string[i].isdigit():
            # store digit
            count_digits.append(string[i])
        else:
            # compute count from stored digit sequence
            # and append the correct number of letters.
            count = int(''.join(count_digits))
            count_digits = []
            decodings.append(string[i] * count)
    return ''.join(decodings)

In [32]:
assert runlength_encode("") == ""

In [33]:
assert runlength_encode("A") == "1A" 

In [34]:
assert runlength_encode("AA") == "2A"

In [35]:
assert runlength_encode("AB") == "1A1B"

In [36]:
assert runlength_encode("ABB") == "1A2B"

In [42]:
assert runlength_encode("AAAABBBCCDAA") == "4A3B2C1D2A"

In [83]:
assert runlength_decode("") == ""

In [84]:
assert runlength_decode("0A0B0C") == ""

In [85]:
assert runlength_decode("1A") == "A"

In [86]:
assert runlength_decode("2A") == "AA"

In [87]:
assert runlength_decode("1A1B") == "AB"

In [88]:
assert runlength_decode("1A2B") == "ABB"

In [89]:
assert runlength_decode("4A3B2C1D2A") == "AAAABBBCCDAA"