
# Advent of Code 2023

> The effort of using machines to mimic the human mind has always struck me as rather silly. I would rather use them to mimic something better.

-- Edsger W. Dijkstra

## Imports and definitions

In [1]:
#type: ignore
from functools import reduce
from itertools import product, accumulate
from operator import or_, matmul
from math import inf, prod, isqrt
from dataclasses import dataclass
from collections import Counter, defaultdict
import re


def inputfunc(day, kind='lines', testing=False):
    """Generator to read input files."""
    filename = 'test.txt' if testing else f"input/{day}.txt"

    def gen(func):
        if kind == 'lines':
            text = [x.strip() for x in open(filename)]
        elif kind == 'chunks':
            text = [
                x.strip()
                for x in open(filename).read().split('\n\n')
                if x.strip()
            ]
        elif kind == 'single':
            text = open(filename).read().strip()
        elif kind == 'commas':
            text = [x.strip() for x in open(filename).read().split(',')]
        elif kind == 'raw':
            text = open(filename).read()

        def inner():
            return func(f=text)
        return inner
    return gen

## [Day 1 - Trebuchet?!](https://adventofcode.com/2023/day/1)

In [2]:
@inputfunc(1)
def input_1(f):
    return f


def find_digits(s):
    return [int(i) for i in s if i.isdigit()]


A = sum(
    10 * x[0] + x[-1]
    for x in (find_digits(s) for s in input_1())
)
assert A == 55123


d = {
    'one': 1,
    'two': 2,
    'three': 3,
    'four': 4,
    'five': 5,
    'six': 6,
    'seven': 7,
    'eight': 8,
    'nine': 9
}

regexp = re.compile(f'(?=(\\d|{"|".join(d)}))')

def find_digit_names(s):
    def to_number(p):
        if p.isdigit():
            return int(p)
        else:
            return d[p]

    return [to_number(p) for p in regexp.findall(s)]


A = sum(
    10 * x[0] + x[-1]
    for x in (find_digit_names(s) for s in input_1())
)
assert A == 55260

## [Day 2 - Cube Conundrum](https://adventofcode.com/2023/day/2)

In [3]:
@dataclass
class Game:
    gameid: int
    contents: list[Counter[str, int]]

        
@inputfunc(2)
def input_2(f):
    for l in f:
        num, items = l.split(':')
        
        _, gameid = num.split()
        
        contents = [
            Counter({
                b: int(a)
                for a, b in [
                    y.split()
                    for y in x.split(',')
                ]
            })
            for x in items.split(';')
        ]
        
        yield Game(int(gameid), contents)


games = list(input_2())


A = sum(
    game.gameid 
    for game in games
    if all(
        d < Counter({'red': 12, 'green': 13, 'blue': 14}) 
        for d in game.contents
    )
)
assert A == 2476


A = sum(
    prod(reduce(or_, game.contents).values())
    for game in games
)
assert A == 54911

## [Day 3 - Gear Ratios](https://adventofcode.com/2023/day/3)

In [4]:
class Grid:
    def __init__(self, g):
        self._grid = g
        self.columns = len(g[0])
        self.rows = len(g)

    def __getitem__(self, t):
        x, y = t
        return self._grid[y][x]
    
    def adjacent(self, x, y):
        return [
            self[i, j] 
            for i, j in {
                (x+1, y), (x-1, y), (x, y+1), (x, y-1),
                (x+1, y+1), (x-1, y+1), (x+1, y-1), (x-1, y-1)
            }
            if 0 <= i < self.columns and 0 <= j < self.rows
        ]
    

@dataclass
class PartNumber:
    value: int
    col_start: int
    col_end: int
    row: int


@inputfunc(3)
def input_3(f):
    return Grid([
        list(x) for x in f
    ])
    

def find_part_numbers(grid, is_symbol):
    val, symbol_adjacent = 0, False

    for j in range(grid.rows):
        for i in range(grid.columns):
            c = grid[i, j]
            if c.isdigit():
                val = 10 * val + int(grid[i, j])
            if not c.isdigit():
                if val and symbol_adjacent:
                    yield PartNumber(
                        val, i - len(str(val)), i - 1, j
                    )
                val, symbol_adjacent = 0, False
            if val and any(is_symbol(s) for s in grid.adjacent(i, j)):
                symbol_adjacent = True
        if val and symbol_adjacent:
            yield PartNumber(
                val, grid.columns - len(str(val)), grid.columns - 1, j
            )
        val, symbol_adjacent = 0, False
            
            
A = sum(
    p.value for p in find_part_numbers(
        input_3(), 
        lambda c: not (c.isdigit() or c == '.')
    )
)
assert A == 536576


def find_gear_ratios(grid):
    part_numbers = defaultdict(list)
    for p in find_part_numbers(grid, lambda c: c == '*'):
        part_numbers[p.row].append(p)
    
    for i, j in product(range(grid.columns), range(grid.rows)):
        if grid[i, j] != '*':
            continue
                
        adjacents = (
            [
                p for p in part_numbers[j] 
                if p.col_start == i+1 or p.col_end == i-1
            ] + [
                p for p in part_numbers[j-1] + part_numbers[j+1]
                if p.col_start <= i+1 and p.col_end >= i-1
            ]
        )
            
        if len(adjacents) == 2:
            yield adjacents
            
    
A = sum(a.value * b.value for a, b in find_gear_ratios(input_3()))
assert A == 75741499

## [Day 4 - Scratchcards](https://adventofcode.com/2023/day/4)

Part 1 is very simple. The only observation is that it's one of the many declinations of the count distinct problem: let the sets of winning numbers and of the player's numbers be $M$ and $N$ respectively, then $|M \cap N| = |M| + |N| - |M \cup N|$. This has a worst-case lower bound of $\Theta(n \log n)$ [(for proof see: Grigoriev 99)](https://www.semanticscholar.org/paper/Complexity-lower-bounds-for-randomized-computation-Grigoriev/7b0c914a951bc59bb0cee33f197e6418a3c2600b), which may be achieved in a variety of ways.

The code implements the expected linear time randomized solution of merging two hash-sets (technically, its worst-case complexity is $\Theta(n^2)$, but that happens with probability 0).

Part 2 is also very simple. Let $n$ be the number of cards, and let $m$ be the maximum number of numbers appearing in any given card.

Winning cards generate more cards with higher numbers, and the hypotheses of the problem ensure that only copies of cards in the initial range are generated. This is sufficient to easily prove termination, and it suggests a naive algorithm: 

- initialize an array $A[1..n]$ with $A[i] = 1$ for all $1 \le i \le n$, where $A[i]$ represents the copies of card $i$ we have, initially one.
- for all $1 \le j \le n$, compute how many winning numbers $w_i$ the $i$-th card has, and for all $j+1 \le k \le j+w$, let $A[k] \leftarrow A[k] + A[j]$
- the final answer is $\sum A[i]$.

If we let $f(m)$ be the complexity of the subroutine that computes how many numbers a given card has, it's easy to see that the above algorithm has complexity bounded by $\Theta(n(m + f(m)))$. But because $f(m) \in \Omega(m)$, this is already optimal.

Bonus variant. Imagine that instead of having to compute the matching numbers on the cards, we are instead given an oracle for that, in other words, $f(m) \in \Theta(1)$. Can we do better than $\Theta(mn)$? We sure can, thanks to the prefix trick!

Let us initialize an array $A[1..n+1]$ with $A[i] = 0$ for all $2 \le i \le n$, $A[1] = 1$ and $A[n+1] = -1$.

The idea is that the number of copies of card $i$ we have is represented by $\sum_{j=0}^i A[j]$. If we wish to increase by $h$ the amount of copies of cards $r+1, r+2, \dots, r+w$, it is sufficient to let $A[r+1] \leftarrow A[r+1] + h$ and $A[r+w+1] \leftarrow A[r+w+1] - h$.

Therefore, for each $1 \le j \le n$, we let $w_j$ be the winning numbers of the $j$-th card, and $c_j$ be the number of copies of that card. We may simply set $A[j+1] \leftarrow A[j+1] + c_j$ and $A[j+w_j+1] \leftarrow A[r+w_j+1] - c_j$.

At each loop iteration, $c_j$ may be computed as $c_{j-1} + A[j]$ by definition.

The final answer will be:

$$
      \sum_{i=1}^{n} \sum_{j=1}^{i} A[i]
    = \sum_{i=1}^{n+1} \left( \sum_{j=1}^{n+1} A[j] - \sum_{j=i+1}^{n+1} A[j] \right)
    = (n+1) \left( \sum_{i=1}^{n+1} A[i] \right) -  \left( \sum_{i=1}^{n+1} \sum_{j=i}^{n+1} A[i] \right)
    = \sum_{i=1}^{n+1} (n-i+1) A[i]
$$

The initialization and final computation are linear-time operations, and each loop iteration is constant time, which yields an algorithm of complexity $\Theta(nf(m))$.

This could have been such a beautiful problem :-(


In [5]:
@dataclass
class Scratchcard:
    cardid: int
    winning: set[int]
    my: set[int]
    
    def winning_numbers(self):
        return len(self.winning & self.my)

    def score(self):
        s = self.winning_numbers()
        return s and 2 ** (s-1)


@inputfunc(4)
def input_4(f):
    for l in f:
        num, items = l.split(':')
        
        _, cardid = num.split()
        
        winning, my = (
            {int(y) for y in x.split()}
            for x in items.split('|')
        ) 
        
        yield Scratchcard(int(cardid), winning, my)
       
        
A = sum(c.score() for c in input_4())
assert A == 23941


def recursive_scratchcards(cards):
    cards_array = [c.winning_numbers() for c in cards]
    prefix_array = [1] + [0] * (len(cards_array) - 1) + [-1]
    for n, (w, c) in enumerate(zip(cards_array, accumulate(prefix_array))):
        prefix_array[n+1] += c
        prefix_array[n+w+1] -= c
    return sum(n * c for n, c in enumerate(reversed(prefix_array)))
    

A = recursive_scratchcards(input_4())
assert A == 5571760

## [Day 5 - If You Give A Seed A Fertilizer](https://adventofcode.com/2023/day/5)

The mapping represented by the problem is a series of piecewise linear maps $f_1, f_2, \dots f_n$. The crucial observation is this: because a linear map sends compacts into compacts, then a piecewise linear map sends a finite union of compacts into a finite union of compacts.

The composition of two piecewise linear maps may be computed entirely symbolically, as can the image of a finite union of compacts through it. 

It is easy to see that the combination of these two sub-problems solves the original problem: one may compute the union of the starting sets (seeds) $U = U_1 \cup U_2 \dots \cup U_n$, the composition $F = f_n \circ f_{n-1} \circ \dots \circ f_1$ and the final answer $\inf F(U)$. Alternatively, we may directly compute $f_n(f_{n-1}( \dots f_1(x) \dots ))$.

### Lemma: Computing the intersections of two sorted sets of pairwise disjoint intervals

Let $\iota_i = [\alpha_i, \beta_i)$ be a $m$ pairwise disjoint intervals and $\kappa_i = [\phi_i, \psi_i)$ be $n$ pairwise disjoint intervals. Further, assume that $\alpha_1 \le \beta_1 \le \alpha_2 \le \beta_2 \le \dots \le \alpha_n \le \beta_n$ and likewise $\phi_1 \le \psi_1 \le \phi_2 \le \psi_2 \le \dots \le \phi_n \le \psi_n$.

Their intersections may be computed in time bounded by $\Theta(n + m)$, and they are at most $n + m$.

This may be done by performing a merge join:

- Let $a \leftarrow 1, b \leftarrow 1$.
- While $a \le m \wedge b \le n$, compute $I = \iota_a \cap \kappa_b$ and report it if $I \neq \emptyset$, then increment $a$ if $\beta_a \le \psi_b$, increment $b$ otherwise.

This works because the while loop preserves the following invariant: all unreported intersections lie at the right of $\min \alpha_a, \beta_b$.

To see that there are at most $n + m$ intersections it is sufficient to observe that intersections may only be created at boundary points.

### Step 1: Computing the composition of two piecewise linear maps

Let $f$ and $g$ be two piecewise linear maps, that is, functions in the form:

$$
f(x) = \left\{
    \begin{array}{ll}
        x + c_1 & \text{if } x \lt \alpha_1 \\
        x + c_2 & \text{if } \alpha_1 \le x \lt \alpha_2 \\
        \dots \\
        x + c_{m-1} & \text{if } \alpha_{m-1} \le x \lt \alpha_{m} \\
        x + c_m & \text{if } \alpha_m \le x \\
    \end{array}
\right.
$$

$$
g(x) = \left\{
    \begin{array}{ll}
        x + d_1 & \text{if } x \lt \beta_1 \\
        x + d_2 & \text{if } \beta_1 \le x \lt \beta_2 \\
        \dots \\
        x + d_{n-1} & \text{if } \beta_{n-1} \le x \lt \beta_{n-1} \\
        x + d_n & \text{if } \beta_n \le x \\
    \end{array}
\right.
$$

for arbitrary constants $\{c_i\}, \{d_i\}$, $m$ pairwise disjoint intervals $A_i = [\alpha_i, \alpha_{i+1})$, and $n$ pairwise disjoint intervals $B_i = [\beta_i, \beta_{i+1})$. For the sake of convenience, without loss of generality, we take all intervals to be closed on the left and open on the right.

We wish to compute $g \circ f$.

First, we sort the $B$. This takes time $\Theta(n \log n)$. Then, we compute and sort the images through $\Lambda_i = f(A_i)$ of each of the intervals on which $f$ is defined. This takes time $\Theta(m \log m)$. The intersection of these two sorted sets may be computed using the lemma.

But if $\Sigma_{ij} = \Lambda_i \cap A_j \neq \emptyset$ for some $i, j$, that means on the preimage $f^{-1}(\Sigma_{ij})$ it must hold $(g \circ f)(x) = x + c_i + d_j$.

Repeated over all intersections, this does in fact determine $g \circ f$ as a piecewise linear function.

### Step 2: Computing the image of a finite union of intervals through a piecewise linear map

We can use the same idea as before. Let the intervals be $U_1, U_2, \dots U_n$ with $U$ being their union, and let the piecewise linear map be $f$ with the same notation as the above paragraph.

We sort the $U_i$ and the $A_i$ and find their intersections.

If $\Sigma_{ij} = A_i \cap U_j \neq \emptyset$ for some $i, j$, then on $\Sigma_{ij}$ it holds $f(x) = x + c_i$.

The union of all $f(\Sigma_{ij})$ is exactly $f(U)$. Those intervals may be overlapping, but it's very easy to find their "clean" union: sort the intervals by their initial point, look at them in order, and replace each pair of overlapping intervals with their union by resizing the endpoints.

### Complexity

Both for the direct computation option (use Step 2 to compute $f_n(f_{n-1}( \dots f_1(x) \dots ))$ directly) and for the composition option (use Step 1 to compute $F = f_n \circ f_{n-1} \circ \dots \circ f_1$, then use Step 2 to compute $F(U)$), the complexity depends on the number of intersections.

If the procedure from the lemma produces $s$ intersections, the resulting output union of intervals (if we are using the first option) or the map composition (if we are using the second option) will have $s$ intervals. Because we know that intersecting sets of intervals of sizes $m$ and $n$ produces at most $n+m$ intersections, we conclude that both variants of the algorithm have complexity $\Theta(h \log h)$ where $h$ is the sum of the number of intervals of all the original piecewise linear maps.

### Bonus
Because all of this only uses properties of linear maps, it can be generalized further to "actual" linear maps, where each of the cases can be in the form $f(x) = ax + b$ with relatively little effort. And, best of all, it works on _real numbers_ with the only additional effort of bookkeeping on the interval boundaries.

In [6]:
@inputfunc(5, 'chunks')
def input_5(f):
    list_seeds = list(int(y) for y in f[0].split(':')[1].split())
    list_maps = [
        [
            tuple(int(t) for t in y.split())
            for y in x.split('\n')[1:]
        ]
        for x in f[1:]
    ]
    return list_seeds, list_maps


@dataclass
class IntRange:
    lo: int | float
    hi: int | float

    __bool__ = lambda s: s.lo < s.hi
    __lt__ = lambda s, o: (s.lo, s.hi) < (o.lo, o.hi)
    __and__ = lambda s, o: IntRange(max(s.lo, o.lo), min(s.hi, o.hi))


@dataclass
class LinearMap:
    delta: int
  
    __call__ = lambda s, x: IntRange(x.lo + s.delta, x.hi + s.delta)
    __invert__ = lambda s: LinearMap(-s.delta)
    __matmul__ = lambda s, o: LinearMap(s.delta + o.delta)
    I = lambda: LinearMap(0)


class IntRangeUnion:
    def __init__(self, pieces):
        self.union = []
        for p in sorted(pieces):
            if not self.union:
                self.union.append(p)
            elif p.lo <= self.union[-1].hi:
                self.union[-1].hi = p.hi
            else:
                self.union.append(p)
    
    @staticmethod 
    def from_list(seq):
        return IntRangeUnion([
            IntRange(seq[l], seq[l] + seq[l+1])
            for l in range(0, len(seq), 2)
        ])

    def inf(self):
        return self.union[0].lo


class PiecewiseLinearMap:
    I = lambda: PiecewiseLinearMap([(IntRange(-inf, +inf), LinearMap.I())])
    
    def __init__(self, pieces):
        self.pieces = sorted(pieces)
    
    @staticmethod 
    def from_list(m):
        l = sorted([
            (IntRange(source, source+length), LinearMap(dest - source))
            for dest, source, length in m
        ])
        
        return PiecewiseLinearMap(l + [
            (IntRange(-inf, l[0][0].lo), LinearMap.I()),
            (IntRange(l[-1][0].hi, inf), LinearMap.I())
        ])
        
    def __matmul__(self, other):
        def merge():
            l = iter(sorted(self.pieces, key=lambda u: u[1](u[0])))
            r = iter(other.pieces)
            (a, f), (b, g) = next(l, (None, None)), next(r, (None, None))
            while a and b:
                if s := (f(a) & b):
                    yield (~f)(s), f @ g
                if f(a).hi < b.hi:
                    a, f = next(l, (None, None))
                else:
                    b, g = next(r, (None, None))
                
        return PiecewiseLinearMap(merge()) 
   
    def __call__(self, x):
        def merge():
            l, r = iter(self.pieces), iter(x.union)
            (a, f), b = next(l, (None, None)), next(r, None)
            while a and b:
                if s := a & b:
                    yield f(s)
                if a.hi < b.hi:
                    a, f = next(l, (None, None))
                else:
                    b = next(r, None)
          
        return IntRangeUnion(merge()) 
         
          
seeds, maps = input_5()
parsed_maps = [PiecewiseLinearMap.from_list(m) for m in maps]

piecewise_map = reduce(matmul, parsed_maps, PiecewiseLinearMap.I())
map_composition = reduce(lambda u, v: lambda x: v(u(x)), parsed_maps, lambda u: u)


part_1_input = IntRangeUnion(IntRange(a, a+1) for a in seeds)
A = piecewise_map(part_1_input).inf()
B = map_composition(part_1_input).inf()
assert A == B == 510109797


part_2_input = IntRangeUnion.from_list(seeds)
A = piecewise_map(part_2_input).inf()
B = map_composition(part_2_input).inf()
assert A == B == 9622622


## [Day 6 - Wait For It](https://adventofcode.com/2023/day/6)

Solvable entirely with pen and paper!

If the available time is $t$, the initial press of the button has length $x$ and the record is $d$, the integer values of $x$ that beat the record are those that satisfy

$$
x(t-x) > d
$$

The quadratic equation has solutions:

$$
x_{1,2} = \frac{t \pm \sqrt{t^2 - 4d}}{2}
$$

The interval $(x_1, x_2)$ contains exactly $\left \lfloor x_1 \right \rfloor - \left \lfloor x_2 \right \rfloor$ integers... unless either $x_1$ or $x_2$ are integers, in which case it contains one less.

In [7]:
@inputfunc(6)
def input_6(f):
    return f[0].split()[1:], f[1].split()[1:]


def integer_solutions(t, d):
    fld = isqrt(t**2 - 4*d)
    cld = isqrt(t**2 - 4*d - 1) + 1 
    x1 = (t + fld) // 2
    x2 = (t - cld) // 2
   
    if fld != cld:
        return x1 - x2
    
    return x1 - x2 + 2 + ((t + fld) % 2) + ((t - cld) % 2)


time, distance = input_6()


A = prod(integer_solutions(int(t), int(r)) for t, r in zip(time, distance))
assert A == 2756160


A = integer_solutions(int(''.join(time)), int(''.join(distance)))
assert A == 34788142