In [15]:
def read_file(fpath):
    with open(fpath, "r") as f:
        return f.read().strip()


code = read_file("input_small.txt")
code

'2333133121414131402'

In [16]:
def get_long_format(code):
    out = ""
    for i, d in enumerate(code):
        if i % 2 == 0:
            id_ = i // 2
            out += int(d) * str(id_)
        else:
            out += int(d) * "."
    return out


tests = [
    ("12345", "0..111....22222", "022111222......"),
    (
        "2333133121414131402",
        "00...111...2...333.44.5555.6666.777.888899",
        "0099811188827773336446555566..............",
    ),
]

for code, expected, _ in tests:
    print(code)
    computed = get_long_format(code)
    assert computed == expected, f"{computed} != {expected}"

get_long_format("23331331214141314020121")

12345
2333133121414131402


'00...111...2...333.44.5555.6666.777.88889910..11'

In [17]:
def iterate_over_spaces(code):
    for i, d in enumerate(code):
        if i % 2 != 0:
            for i in range(int(d)):
                yield "."


def iterate_over_digits(code, reverse: bool = False):
    if not reverse:
        iterable = enumerate(code)
    else:
        range_ = range(len(code) - 1, -1, -1)
        iterable = zip(range_, reversed(code))

    for i, d in iterable:
        if i % 2 == 0:
            id_ = i // 2
            for _ in range(int(d)):
                if not reverse:
                    digits = str(id_)
                else:
                    digits = reversed(str(id_))
                for c in digits:
                    yield c


def iterate_long_format(code):
    for i, d in enumerate(code):
        if i % 2 == 0:
            id_ = i // 2
            for _ in range(int(d)):
                for c in str(id_):
                    yield c
        else:
            for _ in range(int(d)):
                yield "."


def test_iterate_over_digits(code):
    long_code = get_long_format(code)
    long_code_no_spaces = long_code.replace(".", "")
    assert "".join([*iterate_over_digits(code)]) == long_code_no_spaces
    assert (
        "".join([*iterate_over_digits(code, reverse=True)]) == long_code_no_spaces[::-1]
    )


def test_iterate_over_spaces(code):
    long_code = get_long_format(code)
    n_spaces = long_code.count(".")

    assert len([*iterate_over_spaces(code)]) == n_spaces


def test_iterate_long_format(code):
    long_code = get_long_format(code)
    assert "".join([*iterate_long_format(code)]) == long_code


for code in [x[0] for x in tests] + [
    read_file("input_small.txt"),
    read_file("input.txt"),
]:
    test_iterate_over_digits(code)
    test_iterate_over_spaces(code)
    test_iterate_long_format(code)

In [18]:
print(get_long_format("12345"))
print("".join([*iterate_over_digits("12345")]))
print("".join([*iterate_over_spaces("12345")]))

0..111....22222
011122222
......


In [19]:
def build_compressed_code(code):
    iter_filesistem = iterate_long_format(code)
    iter_last_digit = iterate_over_digits(code, reverse=True)
    out = ""
    n_spaces = 0
    for c in iter_filesistem:
        if c == ".":
            n_spaces += 1
            out += next(iter_last_digit)
        else:
            out += c
    # replace last n_spaces with "."
    out = out[:-n_spaces] + "." * n_spaces
    return out


for code, _, expected in tests:
    assert build_compressed_code(code) == expected

In [20]:
def get_checksum(compressed_long_code):
    checksum = 0
    for i, d in enumerate(compressed_long_code):
        if d != ".":
            checksum += int(d) * i
    return checksum


assert get_checksum(build_compressed_code(tests[1][0])) == 1928

## Not working, it must be a list of int

The problem is actually simpler, since you don't have to reverse the IDs.
It is better to use a list as data structure

[id1, id2, id3], where id can be a several digits number

In [21]:
from typing import Generator


def iterate_long_format_fix(code: str) -> Generator[None | int, None, None]:
    for i, d in enumerate(code):
        if i % 2 == 0:
            id_ = i // 2
            for _ in range(int(d)):
                yield id_
        else:
            for _ in range(int(d)):
                yield None


def iterate_over_digits_fix(
    code: str, reverse: bool = False
) -> Generator[None | int, None, None]:
    if not reverse:
        iterable = enumerate(code)
    else:
        range_ = range(len(code) - 1, -1, -1)
        iterable = zip(range_, reversed(code))

    for i, d in iterable:
        if i % 2 == 0:
            id_ = i // 2
            for _ in range(int(d)):
                yield id_


def build_compressed_code_fix(code):
    iter_filesistem = iterate_long_format_fix(code)
    iter_last_digit = iterate_over_digits_fix(code, reverse=True)
    out = []
    n_spaces = 0
    for c in iter_filesistem:
        if c is None:
            n_spaces += 1
            out.append(next(iter_last_digit))
        else:
            out.append(c)
    # replace last n_spaces entries with None
    out = out[:-n_spaces] + [None] * n_spaces
    return out


def get_checksum_fix(compressed_long_code):
    checksum = 0
    for i, d in enumerate(compressed_long_code):
        if d is not None:
            checksum += d * i
    return checksum


assert get_checksum_fix(build_compressed_code_fix(tests[1][0])) == 1928

In [22]:
code = read_file("input.txt")
get_checksum_fix(build_compressed_code_fix(code))


6370402949053

## Part 2

It is possible to do:
* 1 loop for parsing data
* 1 loop with two pointers to create movements and final code directly


instead of 
* 1 loop for parsing data
* 1 loop to create movements 
* sort of movements
* 1 loop to create final code

In [23]:
def extract_sizes(code):
    id_sizes = []
    space_sizes = []
    for i, d in enumerate(code):
        if i % 2 == 0:
            id_sizes.append(int(d))
        else:
            space_sizes.append(int(d))
    return id_sizes, space_sizes


def get_index_with_space(space_sizes, size) -> None | int:
    """
    Find the first index in space_sizes that can fit the given size.
    """
    return next((i for i, s in enumerate(space_sizes) if s >= size), None)


def get_movements(code):
    id_sizes, space_sizes = extract_sizes(code)
    max_id = len(id_sizes) - 1
    reverse_range = range(max_id, -1, -1)

    movements = []
    ids_moved = set()
    for id_, size in zip(reverse_range, reversed(id_sizes)):
        # print(id_, size)
        index_fit = get_index_with_space(space_sizes, size)
        if index_fit is not None and index_fit < id_:
            space_sizes[index_fit] -= size

            space_sizes[id_ - 1] += size

            movements.append((id_, size, index_fit))

            ids_moved.add(id_)
    # we need stable sorting by index_fit
    movements.sort(key=lambda x: x[2])
    return movements, space_sizes, ids_moved, id_sizes


def get_final_code(movements, id_sizes, space_sizes, ids_moved):
    movements_ = movements.copy()
    final_code_extended = []

    for id_, (id_size, space_size) in enumerate(zip(id_sizes, space_sizes)):
        # First, Add IDs that were not moved
        if id_ not in ids_moved:
            final_code_extended.extend([id_] * id_size)

        # Second, Process movements that landed at current index
        removals = []
        for j, (id_extra, size_extra, index_extra) in enumerate(movements_):
            if index_extra == id_:
                final_code_extended.extend([id_extra] * size_extra)
                removals.append(j)
        for j in removals[::-1]:
            movements_.pop(j)

        # Third, add remaining spaces
        if space_size > 0:
            final_code_extended.extend([None] * space_size)

    return final_code_extended

In [24]:
code = read_file("input.txt")
movements, space_sizes, ids_moved, id_sizes = get_movements(code)
final_code = get_final_code(movements, id_sizes, space_sizes, ids_moved)
assert get_checksum_fix(final_code) == 6398096697992

## Other people

In [35]:
D = [
    (i // 2 + 1 if i % 2 else 0, int(d))
    for i, d in enumerate(open("input.txt").read().strip(), 1)
]

for i in range(len(D))[::-1]:
    for j in range(i):
        i_data, i_size = D[i]
        j_data, j_size = D[j]

        if i_data and not j_data and i_size <= j_size:
            D[i] = (0, i_size)
            D[j] = (0, j_size - i_size)
            D.insert(j, (i_data, i_size))


flatten = lambda x: [x for x in x for x in x]

print(sum(i * (c - 1) for i, c in enumerate(flatten([d] * s for d, s in D)) if c))

6398096697992


In [32]:
def aoc09_part1():
    line = open("input.txt").read().strip()
    disk = sum(
        [[-1 if idx % 2 else idx // 2] * int(c) for (idx, c) in enumerate(line)], []
    )  # flattened
    fill = [x for x in disk[::-1] if x >= 0]
    print(
        sum(
            i * v if v >= 0 else i * fill.pop(0)
            for (i, v) in enumerate(disk[: len(fill)])
        )
    )


def aoc09_part2():
    line = open("input.txt").read().strip()
    # part 2
    L = [[], []]
    pos = 0
    for idx, length in enumerate(map(int, line)):
        L[idx % 2].append((pos, length))  # L[0]: data, L[1]: free space
        pos += length
    for i, (dpos, dlen) in list(enumerate(L[0]))[::-1]:  # look at data starting right
        for j, (spos, slen) in enumerate(L[1]):  # look at free space starting left
            if spos < dpos and slen >= dlen:  # can move data to free space
                L[0][i] = (spos, dlen)
                L[1][j] = (
                    spos + dlen,
                    slen - dlen,
                )  # may create 0-length free space block, but that's ok
                break
    print(
        sum(v * dlen * (2 * dpos + dlen - 1) for v, (dpos, dlen) in enumerate(L[0]))
        // 2
    )  # look at my fancy math


aoc09_part2()

6398096697992
