In [30]:
# Part 2:
# - Given CSVs of product ID ranges. Must identify invalid product IDs.
# - Invalid IDs are made up of a sequence of digits that are repeated twice
#   in a row (e.g. "123123" is invalid because "123" is repeated).
# - The IDs contain no leading zeros.
# - Return the sum of all invalid product IDs.


def solve(filename):
    invalid_ids = set()
    ranges = []
    # open file and read ranges
    with open(filename, "r") as f:
        # read ranges from file, strip newline (if present) and split by comma
        ranges = f.read().replace("\n", "").split(",")
    for r in ranges:
        # grab value range
        i, f = [int(v) for v in r.split("-")]
        # check each value in range
        for v in range(i, f + 1):
            s = str(v)
            length = len(s)
            # check if length is even
            if length % 2 != 0:
                continue
            # split string in half
            first_half = s[: length // 2]
            second_half = s[length // 2 :]
            # check if halves are the same
            if first_half == second_half:
                invalid_ids.add(v)
    return sum(invalid_ids)


print(f"part1 - test: {solve('./data/day2-test.txt')}")
print(f"part1 - input: {solve('./data/day2-data.txt')}")

part1 - test: 1227775554
part1 - input: 53420042388


In [31]:
# Part 2:
# - Now an ID is invalid if some sequence of digit is repeated at least twice
def solve2(filename):
    invalid_ids = set()
    ranges = []
    # open file and read ranges
    with open(filename, "r") as f:
        # read ranges from file, strip newline (if present) and split by comma
        ranges = f.read().replace("\n", "").split(",")
    for r in ranges:
        # grab value range
        i, f = [int(v) for v in r.split("-")]
        for v in range(i, f + 1):
            s = str(v)
            length = len(s)
            # check all possible subsequence lengths (if it divides length evenly)
            for l in range(1, length // 2 + 1):
                if length % l != 0:
                    continue
                # check if subsequence of length l is repeated
                if all(
                    s[i : i + l] == s[i + l : i + 2 * l]
                    for i in range(0, length - 2 * l + 1, l)
                ):
                    invalid_ids.add(v)

    # return sum(invalid_ids), ",".join(map(str, sorted(invalid_ids)))
    return sum(invalid_ids)


print(f"part2 - test: {solve2('./data/day2-test.txt')}")
print(f"part2 - input: {solve2('./data/day2-data.txt')}")
# Runtime bad, needs optimization (regex maybe? )

part2 - test: 4174379265
part2 - input: 69553832684


In [32]:
# Part 2 (Optimization version)
def solve2(filename):
    invalid_ids = set()
    ranges = []
    # open file and read ranges
    with open(filename, "r") as f:
        # read ranges from file, strip newline (if present) and split by comma
        ranges = f.read().replace("\n", "").split(",")
    for r in ranges:
        # grab value range
        beg, end = [int(v) for v in r.split("-")]
        # check each value in range
        for v in range(beg, end + 1):
            s = str(v)
            l = 1
            i = l
            length = len(s)
            max_l = length // 2
            # check for repeated subsequences, from length 1 to max_l
            # functions like a sliding window
            # if a match is found, move i forward by l
            # if no match is found, increase l and reset i
            # time complexity ~O(n), better than previous O(n^2)
            while l <= max_l and i + l <= length:
                if s[0:l] == s[i : i + l]:
                    i += l
                else:
                    while True:
                        l += 1
                        if length % l == 0 or l > max_l:
                            break
                    i = 0
            if l <= max_l:
                invalid_ids.add(v)

    # return sum(invalid_ids), ",".join(map(str, sorted(invalid_ids)))
    return sum(invalid_ids)


print(f"part2 - test: {solve2('./data/day2-test.txt')}")
print(f"part2 - input: {solve2('./data/day2-data.txt')}")
# Runtime bad, needs optimization (regex maybe? )

part2 - test: 4174379265
part2 - input: 69553832684
