In [1]:
test_input = """$ cd /
$ ls
dir a
14848514 b.txt
8504156 c.dat
dir d
$ cd a
$ ls
dir e
29116 f
2557 g
62596 h.lst
$ cd e
$ ls
584 i
$ cd ..
$ cd ..
$ cd d
$ ls
4060174 j
8033020 d.log
5626152 d.ext
7214296 k"""

In [2]:
from dataclasses import dataclass
from typing import Optional, Dict, Tuple, List
from __future__ import annotations
import math

In [3]:
# input = open("inputs/7").read()
input = test_input

In [18]:
@dataclass
class Node(object):
    label: str
    parent: Optional[Node]
    folders: Dict[str, Node]
    files: List[Tuple[int, str]]
    size: int

    def make_empty(label, parent=None):
        return Node(label, parent, folders={}, files=[], size=0)

    def __iter__(self):
        yield self

        for c in self.folders.values():
            # the for loop is the recursion!
            for n in c:
                yield n


In [19]:
def construct_data(instr):
    root = Node.make_empty("/")
    current_node = None

    # skip empty
    instruction_blocks = instr.split("$ ")[1:]

    for instruction_block in instruction_blocks:
        # don't want to deal with blanks because of newlines more carefully than this
        instruction, *results = [l for l in instruction_block.split("\n") if l != ""]

        if instruction == "ls":
            for r in results:
                a, b = r.split(" ")
                if a == 'dir':
                    current_node.folders[b] = Node.make_empty(b, current_node)
                else:
                    current_node.files.append((int(a), b))
        else:
            destination = instruction.split(' ')[1]

            # go to root
            if destination == '/':
                current_node = root
            # go to parent
            elif destination == "..":
                current_node = current_node.parent
            # go to child
            else:
                current_node = current_node.folders[destination]
    
    return root

In [20]:
root = construct_data(input)

In [23]:
def compute_sizes(node: Node):
    total_size = sum([size for (size, fn) in node.files])

    for child in node.folders.values():
        total_size += compute_sizes(child)

    node.size = total_size

    return total_size

In [24]:
# mutates tree
root_size = compute_sizes(root)
root_size

48381165

In [31]:
CHECK_SIZE = 100_000
sum(map(lambda n: n.size if n.size <= CHECK_SIZE else 0, root))

95437

In [33]:
TOTAL_SIZE = 70000000
FREE_SIZE = 30000000

unused_space = TOTAL_SIZE - root_size
min_to_clear = FREE_SIZE - unused_space

min(n.size if n.size > min_to_clear else math.inf for n in root)

24933642