In [1]:
import re
from dataclasses import dataclass

@dataclass
class File():
    size: int
    name: str
        
    def __str__(self):
        return f"{self.name} (file, size={self.size})"
    
class Directory():

    def __init__(self, name, parent=None):
        self.name = name
        self.contents = {}
        self.parent = parent
        
    def ls(self, level=0):
        indent = ' '*level
        print(f"{indent}- {self}")
        level += 2
        indent = ' '*level
        for name, thing in self.contents.items():
            if isinstance(thing, Directory):
                thing.ls(level=level)
            else:
                print(f"{indent}- {thing}")
            
    def __str__(self):
        return f"{self.name} (dir)"
    
    @property
    def size(self):
        total_size = 0
        for thing in self.contents.values():
            total_size += thing.size
        return total_size
    
    def sub_dirs(self):
        return [d for d in self.contents.values() if isinstance(d, Directory)]
    
    def recursive_sub_dirs(self):
        all_sub_dirs = self.sub_dirs()
        for s in self.sub_dirs():
            all_sub_dirs.extend(s.recursive_sub_dirs())
        return all_sub_dirs
        


In [3]:
filename = "AoC day 7 data.txt"
with open(filename, 'r') as f:
    data = [line.strip() for line in f.readlines()]

In [4]:
def parse_input_data(data):
    top_level_dir = Directory('/')
    current_dir = top_level_dir
    for line in data:
        sline = line.split(' ')
        if sline[0] == '$': # command
            if sline[1] == 'cd': # change directory
                if sline[2] == '/':
                    current_dir = top_level_dir
                elif sline[2] == '..':
                    current_dir = current_dir.parent
                else:
                    assert sline[2] in current_dir.contents.keys()
                    assert type(current_dir.contents[sline[2]]) == Directory
                    current_dir = current_dir.contents[sline[2]]                
            if sline[1] == 'ls': # ls
                next
        elif sline[0] == 'dir':
            current_dir.contents[sline[1]] = Directory(name=sline[1], parent=current_dir)
        elif re.match(r'\d+', sline[0]):
            current_dir.contents[sline[1]] = File(int(sline[0]), sline[1])
    return top_level_dir

In [5]:
tld = parse_input_data(data)

In [6]:
tld.ls()

- / (dir)
  - bsncwlw (dir)
    - gzsrhr.snp (file, size=264332)
    - ttctlvjp (file, size=94633)
  - dssnr (dir)
    - vmqbwtd.dqv (file, size=101990)
  - lpj.tpq (file, size=290062)
  - rfhsnp.mfv (file, size=183564)
  - sfb (dir)
    - dth (dir)
      - btvzp (file, size=214526)
    - gmgwq (dir)
      - czj.rsc (file, size=270924)
      - lfnhb (dir)
        - cvffv.wzr (file, size=110202)
        - lfnhb (file, size=45842)
        - mjqww.zhl (file, size=181410)
        - snqqrvwd.tsb (file, size=72379)
        - vdjjgd.lnb (file, size=46957)
        - vrsdff (dir)
          - jrcgrvp.plv (file, size=191477)
          - mjqww (file, size=292376)
          - vdjjgd.lnb (file, size=97997)
          - zplw.cmd (file, size=5027)
      - snqqrvwd.hbq (file, size=97804)
      - tjcpgqnm.dvv (file, size=99847)
    - hmvtj (dir)
      - dqvjbhg (dir)
        - vdjjgd.lnb (file, size=98579)
    - jrcgrvp (dir)
      - dzrcrcz (dir)
        - bppp.qfg (file, size=218954)
        - zwhtbhg.

In [7]:
tld.size

43629016

In [8]:
summed_total_sizes = 0
for d in tld.recursive_sub_dirs():
    if d.size < 100000:
        print(d.name, d.size)
        summed_total_sizes += d.size
print(summed_total_sizes)

hmvtj 98579
tvmv 80194
dqvjbhg 98579
cvjwqpz 46716
mcfdlh 15958
ttfczgwn 85314
ndbztbq 91717
wtt 32262
snqqrvwd 92446
vbqdpjp 59640
pchgrt 59640
nwmjhbt 81512
fgcmvwd 28530
jrcgrvp 5386
mjqww 40424
mjqww 19934
snqqrvwd 27161
vrsdff 19934
dbfqmmvq 19934
pghml 88913
ppvbjrhr 83453
wgfvcjcf 12672
cfzqvmlc 11321
vcfwwlb 11321
mjqww 82140
bljbhwg 7022
njv 17060
fwt 20557
rgvmcsb 89387
vrsdff 17060
bsgdhgf 32538
hzm 75543
jzdz 29854
lwzcrrn 75543
bmpsvbmv 2660
hwpndl 2660
blrd 86042
dlhdg 66562
gzwlzc 14589
bsqbbfgz 14589
1845346


In [20]:
# part 2
# What is the size we need to free up?
total_disk = 70000000
target_free_space = 30000000
current_free_space = total_disk - tld.size
print(f"current free space is {current_free_space}")
free_space_needed = target_free_space-current_free_space
print(f"additional free space needed is {free_space_needed}")
# find the smallest directory with size
best_selection = tld
for d in tld.recursive_sub_dirs():
    if (d.size >= free_space_needed) & (d.size < best_selection.size):
        best_selection = d
print(f"the best directory to delete is {best_selection.name} with size {best_selection.size}")

current free space is 26370984
additional free space needed is 3629016
the best directory to delete is lfnhb with size 3636703
