In [1]:
class Directory():
    def __init__(self, name: str, parent: "Directory | None" = None) -> None:
        self.name = name
        self.files = {}
        self.subdirectories: dict[str, Directory] = {}
        self.parent = parent
    
    def add_file(self, file_name: str, file_size: int) -> None:
        if file_name not in self.files:
            self.files[file_name] = file_size
    
    def add_dir(self, dir_name: str) -> None:
        if dir_name not in self.subdirectories:
            self.subdirectories[dir_name] = Directory(dir_name, self)
    
    def get_subdir(self, dir_name: str) -> "Directory":
        return self.subdirectories[dir_name]
    
    def size(self):
        total_size = sum(file_size for file_size in self.files.values())
        total_size += sum(subdir.size() for subdir in self.subdirectories.values())
        return total_size

    def total_filtered(self, filter_size: int = 100000):
        own_size = self.size()
        total_size = own_size if own_size <= filter_size else 0
        total_size += sum(subdir.total_filtered() for subdir in self.subdirectories.values())
        return total_size
    
    def min_dir_over_threshold(self, filter_threshhold: int) -> "Directory | None":
        subdir_best = []
        for subdir in self.subdirectories.values():
            subdir_total_filtered = subdir.min_dir_over_threshold(filter_threshhold)
            if subdir_total_filtered is not None:
                subdir_best.append(subdir_total_filtered)

        if subdir_best:
            return min(
                subdir_best,
                key=lambda p: p.size()
            )
        else:
            if self.size() >= filter_threshhold:
                return self
            else:
                return None
    
    def str_rep(self, level: int = 0) -> str:
        ret_str = " " * level * 2
        ret_str += f"- {self.name} (dir, dirsize={self.size()})\n"
        for file_name, file_size in self.files.items():
            ret_str += " " * (level + 1) * 2
            ret_str += f"- {file_name} (file, size={file_size}\n"
        for subdir in self.subdirectories.values():
            ret_str += subdir.str_rep(level + 1)
        return ret_str


In [2]:
def parse_input(filename: str):
    root_dir = Directory("/")
    curr_dir = root_dir
    with open(filename) as f:
        for row in f:
            row = row.rstrip().split(" ")
            if row[0] == "$":    
                if row[1] == "cd":
                    if row[2] == "/":
                        curr_dir = root_dir
                    elif row[2] == "..":
                        curr_dir = curr_dir.parent
                    else:
                        curr_dir = curr_dir.get_subdir(row[2])
                elif row[1] == "ls":
                    continue
            elif row[0] == "dir":
                curr_dir.add_dir(row[1])
            else:
                curr_dir.add_file(row[1], int(row[0]))
    return root_dir

In [3]:
test_input = parse_input("test-input.txt")
print(test_input.str_rep())
print(test_input.total_filtered())

- / (dir, dirsize=48381165)
  - b.txt (file, size=14848514
  - c.dat (file, size=8504156
  - a (dir, dirsize=94853)
    - f (file, size=29116
    - g (file, size=2557
    - h.lst (file, size=62596
    - e (dir, dirsize=584)
      - i (file, size=584
  - d (dir, dirsize=24933642)
    - j (file, size=4060174
    - d.log (file, size=8033020
    - d.ext (file, size=5626152
    - k (file, size=7214296

95437


In [4]:
test_input = parse_input("input.txt")
print(test_input.str_rep())
print(test_input.total_filtered())

- / (dir, dirsize=42476859)
  - pnm.slh (file, size=307337
  - zcrfndg.cms (file, size=212421
  - bsnqsfm (dir, dirsize=179236)
    - lccnhn (file, size=179236
  - dtqvbspj (dir, dirsize=494450)
    - gdjfp.mfp (file, size=221336
    - jjgpvcqv.jlq (file, size=273114
  - hhhtrws (dir, dirsize=10632043)
    - mgvdbtl.ztt (file, size=1606
    - wprqtd.wph (file, size=27538
    - gcbg (dir, dirsize=3658797)
      - lccnhn (file, size=186683
      - zjfs.mdf (file, size=32944
      - bgcwh (dir, dirsize=211273)
        - nvns (file, size=211273
      - bsnqsfm (dir, dirsize=1069939)
        - dtqvbspj (file, size=210022
        - pnm.slh (file, size=189603
        - rcsffv.gbt (file, size=199755
        - bsnqsfm (dir, dirsize=292856)
          - bsnqsfm.vww (file, size=292856
        - gpcpgfh (dir, dirsize=177703)
          - jrr.jnj (file, size=177703
      - jjgpvcqv (dir, dirsize=2003950)
        - bsnqsfm.fnn (file, size=233963
        - gpcpgfh.gtw (file, size=127603
        - jrs.b

In [5]:
def find_smallest_to_delete(
    root_dir: Directory,
    total_size: int = 70000000,
    size_needed: int = 30000000,
) -> int:
    free_space = total_size - root_dir.size()
    space_to_clean = size_needed - free_space
    return root_dir.min_dir_over_threshold(space_to_clean).size()

In [6]:
test_input = parse_input("test-input.txt")
print(find_smallest_to_delete(test_input))

24933642


In [7]:
test_input = parse_input("input.txt")
print(find_smallest_to_delete(test_input))

2481982
