In [None]:
import os
import sys

sys.path.insert(0, os.path.abspath("../utils"))
from aoc_utils import load_data, check

In [None]:
from collections import defaultdict
from itertools import accumulate, islice, count
from pathlib import Path

In [None]:
data = load_data(2024, 9)

In [None]:
# data, part_1, part_2
tests = [
    (
        """2333133121414131402""",
        1928,
        2858,
    ),
]
# performance tests provided by Standard_Bar8402
# (https://www.reddit.com/r/adventofcode/comments/1haauty/)
for filename, part1, part2 in [
    ("inputs/test_input_19-09_01.txt", 63614979355824, 97898222299196),
    ("inputs/test_input_19-09_02.txt", 4620970906611856, 5799706413896802),
]:
    if Path(filename).is_file():
        tests.append((Path(filename).read_text(), part1, part2))

# Part 1

In [None]:
def move_checksum(data):
    # represent files as (id, length)
    # e.g. [(0, 2), (1, 3), ..., (9, 2)]
    lengths = [int(c) for c in data]
    file_sectors = list(enumerate(lengths[::2]))
    checksum = 0
    pos = 0
    for i, length in enumerate(lengths):
        if i % 2 == 1:
            # rightmost sectors
            while length and file_sectors:
                idx, file_length = file_sectors.pop()
                if file_length > length:
                    file_sectors += [(idx, file_length - length)]
                    file_length = length
                checksum += idx * (2 * pos + file_length - 1) * file_length // 2
                length -= file_length
                pos += file_length
        else:
            # leftmost sectors
            idx, file_length = file_sectors.pop(0)
            checksum += idx * (2 * pos + file_length - 1) * file_length // 2
            pos += file_length
        if not file_sectors:
            return checksum
    raise AssertionError

In [None]:
check(move_checksum, tests)
move_checksum(data)

# Part 2

In [None]:
def defrag_checksum(data):
    # represent files as (id, length, position)
    # e.g. [(0, 2, 0), (1, 3, 5), ..., (9, 2, 40)]
    lengths = [int(c) for c in data]
    files = list(
        zip(
            count(),
            lengths[::2],
            # position (cumulative sum)
            islice(accumulate([0, *lengths]), 0, None, 2),
        ),
    )
    checksum = 0
    pos = 0
    done = set()
    rightmosts = defaultdict(lambda: len(files) - 1)
    for i, length in enumerate(lengths):
        if i % 2 == 1:
            while length:
                # find the rightmost sector that fits
                which = min(rightmosts[length], len(files) - 1)
                while which >= 0 and files[which][1] > length:
                    which -= 1
                rightmosts[length] = which
                if which < 0:
                    # nothing fits
                    pos += length
                    length = 0
                else:
                    idx, sector_length, file_pos = files.pop(which)
                    done.add(file_pos)
                    checksum += idx * (2 * pos + sector_length - 1) * sector_length // 2
                    length -= sector_length
                    pos += sector_length
        else:
            if pos not in done:
                # leftmost sector
                idx, sector_length, file_pos = files.pop(0)
                checksum += idx * (2 * pos + sector_length - 1) * sector_length // 2
            pos += length
        if not files:
            return checksum
    raise AssertionError

In [None]:
check(defrag_checksum, tests, 2)
defrag_checksum(data)

# First working solution

These are simpler solutions that work ok (resp ~80ms and ~300ms) with the original input, but do not scale as well.

In [None]:
def move_checksum(data):
    # expand files into sectors
    # e.g. 001112333...99
    file_sectors = [i for i, v in enumerate(data[::2]) for _ in range(int(v))]
    checksum = 0
    free = False
    pos = 0
    for c in data:
        for _ in range(int(c)):
            if free:
                # rightmost sectors
                checksum += pos * file_sectors.pop()
            else:
                # leftmost sectors
                checksum += pos * file_sectors.pop(0)
            if not file_sectors:
                return checksum
            pos += 1
        free = not free
    # unreachable
    raise AssertionError

In [None]:
check(move_checksum, tests)
move_checksum(data)

In [None]:
import numpy as np

In [None]:
def defrag_checksum(data):
    files = []
    free_lengths = []
    free_positions = []
    free = False
    pos = 0
    idx = 0
    for c in data:
        length = int(c)
        if free and length:
            free_lengths += [length]
            free_positions += [pos]
        if not free:
            files += [(idx, length, pos)]
            idx += 1
        pos += length
        free = not free
    free_lengths = np.array(free_lengths)
    free_positions = np.array(free_positions)
    checksum = 0
    for idx, length, pos in files[::-1]:
        candidates = free_lengths >= length
        if not candidates.any():
            checksum += idx * (2 * pos + length - 1) * length // 2
            continue
        free_pos = free_positions[candidates].min()
        if free_pos < pos:
            # move
            checksum += idx * (2 * free_pos + length - 1) * length // 2
            # update free sectors
            free_idx, *_ = (free_positions == free_pos).nonzero()
            free_lengths[free_idx] -= length
            free_positions[free_idx] += length
            # since we move files right to left, we fortunately do not have to
            # put reclaimed space back into the pool
        else:
            checksum += idx * (2 * pos + length - 1) * length // 2
    return checksum

In [None]:
check(defrag_checksum, tests, 2)
defrag_checksum(data)