**Utilities**

In [5]:
from anytree import Node, Resolver, TreeError, RenderTree, PostOrderIter, PreOrderIter
from enum import Enum, auto
from collections import deque

"""
Node's hidden properties:
    parent, children, siblings, ancestors, descendants
    root, leaves, is_root, is_leaf
    height, depth, path, iter_path_reverse (iterate **up** the tree from here)
I'll also give it a kwarg 'size': int.

PostOrderIter lists leaves first. Good for calculating directory size efficiently.
    Use filter_=lambda n: n.is_leaf
"""

def read_line_s(file):
    """
    'Safe' version of TextIO.readline(). This one uses exceptions gracefully and returns the line without EOL characters. For this app, it splits the line into components the app will need.
    """
    line = f.readline()
    if line == '':
        raise EOFError
    return line.rstrip().split(' ')


class Mode(Enum):
    """
    Enumerated state of the state machine implemented in the main app
    """
    CMD = auto()
    RESP = auto()


class FileStructure:
    """
    Implementation of the entire file structure. 'anytree' left the app responsible for too much.

    Properties: pwd, root
    Methods: add('name', size=1), cd('name')
    """

    def __init__(self, root_name: str=''):
        self._root = Node(root_name, size=0)
        self._r = Resolver('name')
        self._pwd = self._root
        # anytree's Walker works with Node object references, not their string names, so now I have to keep a dict of node objects. Ridiculous.
        self._node_bag = dict()
        self._node_bag[root_name] = self._root

    def _clean(self, name: str) -> str:
        if name in ['.', '..']:
            return self._r.get(self._pwd, name).name
        elif name == '/':
            return ''
        else:
            return name

    def __render(self, maxlevel: int):
        _repr = deque()
        for pre, _, node in RenderTree(self._root, maxlevel=maxlevel):
            _repr.append(f'{pre}{node.name} {node.size}')
        # TODO Replace anytree's '' at root node with char from __init__(root_name)
        return '\n'.join(_repr)

    def __repr__(self):
        return 'First 3 levels:\n' + self.__render(maxlevel=3)

    def render(self, maxlevel: any=None):
        """
        Same as __repr__ but this supports 'maxlevel'.
        """
        return self.__render(maxlevel)

    def add(self, name: str, size: int=0):
        n = self._clean(name)
        try:
            self._node_bag[n] = Node(n, parent=self._pwd, size=size)
        except TreeError:
            raise ValueError(f'{name} already exists in file tree.')

    def cd(self, name: str):
        n = self._clean(name)
        self._pwd = self._node_bag[n]

    def get_descendant_files(self, name:str) -> list[str]:
        n = self._clean(name)
        return [i.name for i in PostOrderIter(self._node_bag[n], filter_=lambda _: _.is_leaf)]

    def get_descendant_dirs(self, name:str, incl_this: bool=True) -> list[str]:
        """
        incl_this means to include the directory given by name
        """
        n = self._clean(name)
        _l = [i.name for i in PreOrderIter(self._node_bag[name], filter_=lambda _: not _.is_leaf)]
        if not incl_this:
            _l.remove((n, self._node_bag[n].size))
        return _l

    def set_item_size(self, name: str, size: int):
        n = self._clean(name)
        __item = self._node_bag[n]
        if __item.size > 0:
            raise UserWarning(f'Node {n} already has nonzero size {__item.size}.')
        __item.size = size
        return

    def get_item_size(self, name: str) -> int:
        return self._node_bag[self._clean(name)].size

    @property
    def pwd(self) -> str:
        return self._pwd.name

    @property
    def root(self) -> str:
        return self._root.name

# Test the class
test = FileStructure()
test.add('dir one')
test.add('dir two')
test.cd('dir one')
test.add('dir three')
test.cd('dir three')
test.add('file one', size=21)
test.add('file two', size=22)
test.cd('')
# TODO get_descendant_dirs() set_dir_size() for all
total_size = sum([test.get_item_size(n) for n in test.get_descendant_files('dir one')])
test.set_item_size('dir one', total_size)
test.set_item_size('dir three', test.get_item_size('dir one'))
print('Test tree:\n' + test.render())

Test tree:
 0
├── dir one 43
│   └── dir three 43
│       ├── file one 21
│       └── file two 22
└── dir two 0


**Part 1:**

In [7]:
# Build the file tree
file_tree = FileStructure()
mode = Mode.CMD
with open('../inputs/day7-input') as f:
    # State machine (flat design)
    while True:

        try:
            pieces = read_line_s(f)
        except EOFError:
            break

        print(f'/{file_tree.pwd} → ' + ' '.join(pieces))  # For debugging

        if mode == Mode.RESP:
            # Response mode
            if pieces[0] == 'dir':
                file_tree.add(pieces[1])
                continue
            elif pieces[0].isdigit():
                file_tree.add(pieces[1], int(pieces[0]))
                continue
            else:
                mode = Mode.CMD

        # Command mode
        if pieces[0] == '$':
            if pieces[1] == 'cd':
                file_tree.cd(pieces[2])
            elif pieces[1] == 'ls':
                mode = Mode.RESP  # Switch to Response mode for next line
            else:
                raise ValueError(f'{pieces[1]} is not a valid input command.')

# Add size to all directories
# TODO OH MY FUCKING GOD. There can be a directory and a file with the same name. My 'file bag' dict is useless now. I have to **walk** the tree with cd.
for d_n in file_tree.get_descendant_dirs(''):
    file_tree.set_item_size(d_n, sum([file_tree.get_item_size(f_n) for f_n in file_tree.get_descendant_files(d_n)]))

print('\nDirectory tree:' + file_tree.render())  # For debugging

# TODO Sum directories under size=1e6

/ → $ cd /
/ → $ ls
/ → dir fwbjchs
/ → dir hmnpr
/ → dir jtrbrcjl
/ → dir lcgv
/ → dir ldqc
/ → dir vrvl
/ → $ cd fwbjchs
/fwbjchs → $ ls
/fwbjchs → 154619 wqdlv.mdw
/fwbjchs → 21648 wvbnz
/fwbjchs → $ cd ..
/ → $ cd hmnpr
/hmnpr → $ ls
/hmnpr → 178623 rftqqsrp.bfm
/hmnpr → $ cd ..
/ → $ cd jtrbrcjl
/jtrbrcjl → $ ls
/jtrbrcjl → dir nmbfwc
/jtrbrcjl → dir whqb
/jtrbrcjl → $ cd nmbfwc
/nmbfwc → $ ls
/nmbfwc → 242645 lcgv
/nmbfwc → 256365 wdzw.drg
/nmbfwc → $ cd ..
/jtrbrcjl → $ cd whqb
/whqb → $ ls
/whqb → 161522 mrqgpv.gsm
/whqb → 48062 vpsgcl.gfh
/whqb → $ cd ..
/jtrbrcjl → $ cd ..
/ → $ cd lcgv
/lcgv → $ ls
/lcgv → dir cthtlwds
/lcgv → dir grldv
/lcgv → dir lnztfr
/lcgv → dir vwhf
/lcgv → dir znmzg
/lcgv → $ cd cthtlwds
/cthtlwds → $ ls
/cthtlwds → dir dghvw
/cthtlwds → dir tfwgg
/cthtlwds → $ cd dghvw
/dghvw → $ ls
/dghvw → 107090 jmj.lzh
/dghvw → $ cd ..
/cthtlwds → $ cd tfwgg
/tfwgg → $ ls
/tfwgg → dir ddnfmsjc
/tfwgg → 252616 fvj
/tfwgg → dir gng
/tfwgg → dir lcgv
/tfwgg → 234528

UserWarning: Node nmbfwc already has nonzero size 53472.