**Utilities**

In [1]:
from anytree import Node, Resolver, RenderTree, PreOrderIter
from enum import Enum, auto
from collections import deque

"""
Node's hidden properties:
    parent, children, siblings, ancestors, descendants
    root, leaves, is_root, is_leaf
    height, depth, path, iter_path_reverse (iterate **up** the tree from here)
I'll also give it a kwarg 'size': int.

PostOrderIter lists leaves first. Good for calculating directory size efficiently.
    Use filter_=lambda n: n.is_leaf
"""

def read_line_s(file):
    """
    'Safe' version of TextIO.readline(). This one uses exceptions gracefully and returns the line without EOL characters. For this app, it splits the line into components the app will need.
    """
    line = f.readline()
    if line == '':
        raise EOFError
    return line.rstrip().split(' ')


class Mode(Enum):
    """
    Enumerated state of the state machine implemented in the main app
    """
    CMD = auto()
    RESP = auto()


class FileStructure:
    """
    A stateful app interface to the stateless "anytree" API which implements a tree structure for the "file system".
    """

    _root_name = '__r__'  # anytree seems happier overall when I use a named root node

    def __init__(self):
        self._root = Node(self._root_name, size=0, is_dir=True)
        self._r = Resolver('name')
        self._pwd = self._root

    def _clean(self, name: str) -> str:
        """
        Converts app input special paths to anytree-tolerant paths. Assumes no compound pathing in app input (i.e. "/one/two" or "../three")
        """
        if name == '.':
            return self._pwd.name
        if name == '..':
            _p = self._pwd.parent
            # anytree returns None for "cd .." from root node
            return self._root_name if _p is None else _p.name
        elif name in ['', '/']:
            return self._root_name
        return name

    def __find_dir(self, name: str) -> Node:
        n = self._clean(name)
        # ASSUME No two directories share the same name
        if n == self._root_name:
            return self._root  # Quickly handle the built-in root directory
        # It's generally faster to walk from root to the node than from pwd
        for i in PreOrderIter(self._root, filter_=lambda _: _.is_dir):
            if i.name == n:
                return i

    @staticmethod
    def __incr_dir_sizes(file: Node):
        # I chose to implement this as an incremental, internalized method because the app already defines an incremental interface to the file system. (Its methods are only add() and cd(). The class already holds a lot of state information to provide the incremental interface, so why not have it statefully hold directory sizes and update them with each added file? A stateless interface would be nicer, but it doesn't make sense for this app.
        for a in file.ancestors:
            a.size += file.size
        return

    def render(self, maxlevel: any=None):
        _repr = deque()
        for pre, _, node in RenderTree(self._root, maxlevel=maxlevel):
            _repr.append(f'{pre}{node.name} {node.size}')
        _repr.appendleft(_repr.popleft().replace(self._root_name, '/'))
        return '\n'.join(_repr)

    def add(self, name: str, is_dir: bool, f_size: int=0):
        # Override size for directories so __incr_dir_size() works correctly
        _i = Node(self._clean(name), parent=self._pwd, size=f_size if not is_dir else 0, is_dir=is_dir)
        if not is_dir:
            self.__incr_dir_sizes(_i)

    def cd(self, name: str):
        self._pwd = self.__find_dir(self._clean(name))

    @property
    def pwd(self) -> str:
        return self._pwd.name.replace(self._root_name, '/')

    def list_sub_dirs(self, name: str) -> list[tuple[str, int]]:
        return [(i.name, i.size) for i in PreOrderIter(self.__find_dir(self._clean(name)), filter_=lambda _: _.is_dir)]

# Test the class
test = FileStructure()
test.add('one', is_dir=True)
test.add('one', is_dir=False, f_size = 33)
test.cd('one')
test.add('two', is_dir=True)
test.cd('two')
test.cd('.')
test.add('three', is_dir=False, f_size=21)
test.add('two', is_dir=False, f_size=22)
test.cd('..')
test.cd('/')
test.cd('..')
print(f'pwd: {test.pwd}')
print('Test tree:\n' + test.render())

pwd: /
Test tree:
/ 76
├── one 43
│   └── two 43
│       ├── three 21
│       └── two 22
└── one 33


**Part 1:**

In [4]:
# Build the file tree
file_tree = FileStructure()
mode = Mode.CMD
with open('../inputs/day7-input') as f:
    # State machine (flat design)
    while True:

        try:
            pieces = read_line_s(f)
        except EOFError:
            break

        print(f'{file_tree.pwd} → ' + ' '.join(pieces))  # For debugging

        if mode == Mode.RESP:
            # Response mode
            if pieces[0] == 'dir':
                file_tree.add(pieces[1], is_dir=True)
                continue
            elif pieces[0].isdigit():
                file_tree.add(pieces[1], is_dir=False, f_size=int(pieces[0]))
                continue
            else:
                mode = Mode.CMD

        # Command mode
        if pieces[0] == '$':
            if pieces[1] == 'cd':
                file_tree.cd(pieces[2])
            elif pieces[1] == 'ls':
                mode = Mode.RESP  # Switch to Response mode for next line
            else:
                raise ValueError(f'{pieces[1]} is not a valid input command.')

print('\nDirectory tree:\n' + file_tree.render())  # For debugging

#  Sum directories with size <= 1e6
dirs = file_tree.list_sub_dirs('/')
small_sizes = [s for n, s in dirs if s <= 100000]
print(f'Total size of small directories: {sum(small_sizes)}')

/ → $ cd /
/ → $ ls
/ → dir fwbjchs
/ → dir hmnpr
/ → dir jtrbrcjl
/ → dir lcgv
/ → dir ldqc
/ → dir vrvl
/ → $ cd fwbjchs
fwbjchs → $ ls
fwbjchs → 154619 wqdlv.mdw
fwbjchs → 21648 wvbnz
fwbjchs → $ cd ..
/ → $ cd hmnpr
hmnpr → $ ls
hmnpr → 178623 rftqqsrp.bfm
hmnpr → $ cd ..
/ → $ cd jtrbrcjl
jtrbrcjl → $ ls
jtrbrcjl → dir nmbfwc
jtrbrcjl → dir whqb
jtrbrcjl → $ cd nmbfwc
nmbfwc → $ ls
nmbfwc → 242645 lcgv
nmbfwc → 256365 wdzw.drg
nmbfwc → $ cd ..
jtrbrcjl → $ cd whqb
whqb → $ ls
whqb → 161522 mrqgpv.gsm
whqb → 48062 vpsgcl.gfh
whqb → $ cd ..
jtrbrcjl → $ cd ..
/ → $ cd lcgv
lcgv → $ ls
lcgv → dir cthtlwds
lcgv → dir grldv
lcgv → dir lnztfr
lcgv → dir vwhf
lcgv → dir znmzg
lcgv → $ cd cthtlwds
cthtlwds → $ ls
cthtlwds → dir dghvw
cthtlwds → dir tfwgg
cthtlwds → $ cd dghvw
dghvw → $ ls
dghvw → 107090 jmj.lzh
dghvw → $ cd ..
cthtlwds → $ cd tfwgg
tfwgg → $ ls
tfwgg → dir ddnfmsjc
tfwgg → 252616 fvj
tfwgg → dir gng
tfwgg → dir lcgv
tfwgg → 234528 mpb
tfwgg → 181198 pzqgf.cjd
tfwgg → dir 