In [8]:
# Part 2:
# - Given CSVs of product ID ranges. Must identify invalid product IDs.
# - Invalid IDs are made up of a sequence of digits that are repeated twice
#   in a row (e.g. "123123" is invalid because "123" is repeated).
# - The IDs contain no leading zeros.
# - Return the sum of all invalid product IDs.


def solve(filename):
    invalid_ids = set()
    ranges = []
    # open file and read ranges
    with open(filename, "r") as f:
        # read ranges from file, strip newline (if present) and split by comma
        ranges = f.read().replace("\n", "").split(",")
    for r in ranges:
        # grab value range
        i, f = [int(v) for v in r.split("-")]
        # check each value in range
        for v in range(i, f + 1):
            s = str(v)
            length = len(s)
            # check if length is even
            if length % 2 != 0:
                continue
            # split string in half
            first_half = s[: length // 2]
            second_half = s[length // 2 :]
            # check if halves are the same
            if first_half == second_half:
                invalid_ids.add(v)
    return sum(invalid_ids)


print(f"part1 - test: {solve('./data/day2-test.txt')}")
print(f"part1 - input: {solve('./data/day2-data.txt')}")

part1 - test: 1227775554
part1 - input: 53420042388


In [None]:
# Part 2:
# - Now an ID is invalid if some sequence of digit is repeated at least twice
def solve2(filename):
    invalid_ids = set()
    ranges = []
    # open file and read ranges
    with open(filename, "r") as f:
        # read ranges from file, strip newline (if present) and split by comma
        ranges = f.read().replace("\n", "").split(",")
    for r in ranges:
        # grab value range
        i, f = [int(v) for v in r.split("-")]
        for v in range(i, f + 1):
            s = str(v)
            length = len(s)
            # check all possible subsequence lengths (if it divides length evenly)
            for l in range(1, length // 2 + 1):
                if length % l != 0:
                    continue
                # check if subsequence of length l is repeated
                if all(
                    s[i : i + l] == s[i + l : i + 2 * l]
                    for i in range(0, length - 2 * l + 1, l)
                ):
                    invalid_ids.add(v)

    # return sum(invalid_ids), ",".join(map(str, sorted(invalid_ids)))
    return sum(invalid_ids)


print(f"part2 - test: {solve2('./data/day2-test.txt')}")
print(f"part2 - input: {solve2('./data/day2-data.txt')}")
# Runtime bad, needs optimization (regex maybe? )

part2 - test: (4174379265, '11,22,99,111,999,1010,222222,446446,565656,38593859,824824824,1188511885,2121212121')
part2 - input: (69553832684, '11,33,44,55,66,77,88,99,222,333,444,555,666,1111,1212,1313,1414,1515,1616,1717,1818,1919,2020,2121,2222,2323,2424,2525,3737,3838,3939,4040,4141,4242,4343,4444,4545,4646,4747,4848,4949,5050,5151,5252,5353,5454,5555,5656,5757,5858,5959,6060,6161,6262,6363,6464,6565,6666,6767,6868,6969,7070,7171,7272,7373,7474,7575,7676,7777,7878,7979,8080,8181,8282,8383,8484,8585,8686,8787,8888,8989,9090,33333,44444,55555,66666,88888,99999,100100,101010,101101,102102,103103,104104,105105,106106,107107,181181,181818,182182,183183,184184,185185,186186,187187,188188,189189,190190,191191,191919,192192,193193,194194,195195,196196,197197,198198,199199,200200,201201,202020,202202,203203,204204,205205,206206,207207,208208,209209,210210,211211,212121,212212,213213,214214,215215,216216,217217,218218,219219,220220,221221,222222,223223,224224,225225,226226,227227,228228,2292

In [None]:
# Part 2 (Optimization version)
def solve2(filename):
    invalid_ids = set()
    ranges = []
    # open file and read ranges
    with open(filename, "r") as f:
        # read ranges from file, strip newline (if present) and split by comma
        ranges = f.read().replace("\n", "").split(",")
    for r in ranges:
        # grab value range
        beg, end = [int(v) for v in r.split("-")]
        # check each value in range
        for v in range(beg, end + 1):
            s = str(v)
            l = 1
            i = l
            length = len(s)
            max_l = length // 2
            # check for repeated subsequences, from length 1 to max_l
            # functions like a sliding window
            # if a match is found, move i forward by l
            # if no match is found, increase l and reset i
            # time complexity ~O(n), better than previous O(n^2)
            while l <= max_l and i + l <= length:
                if s[0:l] == s[i : i + l]:
                    i += l
                else:
                    while True:
                        l += 1
                        if length % l == 0 or l > max_l:
                            break
                    i = 0
            if l <= max_l:
                invalid_ids.add(v)

    # return sum(invalid_ids), ",".join(map(str, sorted(invalid_ids)))
    return sum(invalid_ids)


print(f"part2 - test: {solve2('./data/day2-test.txt')}")
print(f"part2 - input: {solve2('./data/day2-data.txt')}")
# Runtime bad, needs optimization (regex maybe? )

part2 - test: (4174379265, '11,22,99,111,999,1010,222222,446446,565656,38593859,824824824,1188511885,2121212121')
part2 - input: (69553832684, '11,33,44,55,66,77,88,99,222,333,444,555,666,1111,1212,1313,1414,1515,1616,1717,1818,1919,2020,2121,2222,2323,2424,2525,3737,3838,3939,4040,4141,4242,4343,4444,4545,4646,4747,4848,4949,5050,5151,5252,5353,5454,5555,5656,5757,5858,5959,6060,6161,6262,6363,6464,6565,6666,6767,6868,6969,7070,7171,7272,7373,7474,7575,7676,7777,7878,7979,8080,8181,8282,8383,8484,8585,8686,8787,8888,8989,9090,33333,44444,55555,66666,88888,99999,100100,101010,101101,102102,103103,104104,105105,106106,107107,181181,181818,182182,183183,184184,185185,186186,187187,188188,189189,190190,191191,191919,192192,193193,194194,195195,196196,197197,198198,199199,200200,201201,202020,202202,203203,204204,205205,206206,207207,208208,209209,210210,211211,212121,212212,213213,214214,215215,216216,217217,218218,219219,220220,221221,222222,223223,224224,225225,226226,227227,228228,2292