Input

In [72]:
from pathlib import Path 
sample = Path('sample').read_text()
input = Path('input').read_text()

Parsing

In [73]:
import re
commandrx = re.compile(r'^\$ (?P<cmd>\w+)( (?P<arg>.*))?$')
lsrx = re.compile(r'^(?P<meta>\d+|dir) (?P<name>.*)$')

def parse(input):
  result = []
  lines = input.splitlines(False)
  i = 0

  while i < len(lines):
    cmd = commandrx.match(lines[i]).groupdict()

    if cmd['cmd'] == 'ls':
      children = []
      i += 1
      while i < len(lines) and (ls := lsrx.match(lines[i])):
        ls = ls.groupdict()
        if ls['meta'] == 'dir':
          children.append({ 'type': 'dir', 'name': ls['name'] })
        else:
          children.append({ 'type': 'file', 'name': ls['name'], 'size': int(ls['meta']) })
        i += 1
      result.append({ 'type': 'ls', 'children': children })
    else:
      result.append({ 'type': 'cd', 'name': cmd['arg'] })
      i += 1

  return result

Solution

In [74]:
def build(commands):
  root = { 'children': {}, 'size': None }
  cwd = root
  stack = [cwd]

  for cmd in commands:
    if cmd['type'] == 'cd':
      if cmd['name'] == '..':
        stack.pop()
        cwd = stack[-1]
      elif cmd['name'] == '/':
        cwd = root
        stack = [cwd]
      else:
        cwd = cwd['children'][cmd['name']]
        stack.append(cwd)
    else:
      for child in cmd['children']:
        if child['type'] == 'file':
          cwd['children'][child['name']] = { 'size': child['size'] }
        else:
          cwd['children'][child['name']] = { 'children': {}, 'size': None }
  return root

def update_sizes(node):
  if node['size'] is None:
    node['size'] = sum(update_sizes(child) for child in node['children'].values())
  return node['size']

def directories(node):
  if 'children' in node:
    yield node

    for child in node['children'].values():
      yield from directories(child)

def solve(input):
  root = build(parse(input))
  update_sizes(root)
  return sum(node['size'] for node in directories(root) if node['size'] < 100000)

assert solve(sample) == 95437
solve(input)

1206825

In [75]:
def solve(input):
  root = build(parse(input))
  update_sizes(root)
  cap = root['size'] - 40000000
  candidates = [node for node in directories(root) if node['size'] >= cap]
  candidates = sorted(candidates, key=lambda node: node['size'])
  return candidates[0]['size']

assert solve(sample) == 24933642
solve(input)

9608311