In [2]:
import numpy as np
import pandas as pd

# --- Day 7: No Space Left On Device ---

In [163]:
class Directory: 
    def __init__(self):
        self.contents = {}
    
    def add_content(self, stuff, name):
        self.contents[name] = stuff
    
    def exists(self, name):
        if name in self.contents.keys(): 
            return True
        return False
    
    def calculate_size(self):
        running_size_total = 0
       
        for k in self.contents.keys():
            if isinstance(self.contents[k], File): 
                running_size_total += self.contents[k].size
            elif isinstance(self.contents[k], Directory):
                running_size_total += self.contents[k].calculate_size()
            else:
                print("you've made a size calculation mistake")
                
        return running_size_total
    
    def __str__(self, prefix=''): 
        out = ''
        for k in self.contents.keys():
            out = out + prefix + k
            c = self.contents[k]
            if isinstance(c, File): 
                out = out + ' size: ' + str(c.size) + '\n'
            elif isinstance(c, Directory):
                out = out + '\n' + c.__str__(prefix + '|---')
            else:
                print("you've added something that's not a File or Directory? Weird. ")
        return out
        

class File: 
    def __init__(self, size):
        self.size = size


In [164]:
f = open('input7', 'r')

elfDirectory = Directory()
current_directory_location = []
line_number = 0
ls_mode = False

while True: 
    line = f.readline()
    if not line:
        break
    if line == '\n': 
        break
    if line == '$ cd /\n':
        continue
    
    # get to correct subdirectory
    current_directory = elfDirectory
    for d in current_directory_location: 
        current_directory = current_directory.contents[d]

    # after an ls command is read, parse ls commands until done
    if ls_mode: 
        if line[:3] == 'dir': 
            lsplit = line[:-1].split(' ')
            dirname = lsplit[1]
            if not current_directory.exists(dirname):
                current_directory.add_content(Directory(), dirname)
        elif line[0] == '$': 
            ls_mode = False
        else: 
            lsplit = line[:-1].split(' ')
            filesize = int(lsplit[0])
            filename = lsplit[1]
            if not current_directory.exists(filename):
                current_directory.add_content(File(filesize), filename)

    # read commands, not ls
    if not ls_mode: 
        if line[:4] == '$ cd': 
            sym, cmd, dest = line[:-1].split(' ')
            if dest == '..': 
                current_directory_location = current_directory_location[:-1]
            elif dest == '/': 
                current_directory_location = []
            else: 
                current_directory_location.append(dest)
        elif line[:4] == '$ ls': 
            ls_mode = True
            
    #print(line)
    
    #line_number += 1
    #if line_number > 25: 
        #break

In [165]:
elfDirectory.calculate_size()

44274331

In [166]:
# print(elfDirectory)

In [167]:
def get_sum(my_dir):
    running_total = 0
    for key in my_dir.contents.keys():
        content = my_dir.contents[key]
        if isinstance(content, Directory):
            dirsize = content.calculate_size()
            if dirsize <= 100000: 
                running_total += dirsize
            running_total += get_sum(content)
    return running_total

In [168]:
get_sum(elfDirectory)

1915606

## --- Part Two ---

In [169]:
70000000 - 44274331

25725669

In [175]:
# currently available space; we want it to be 30,000,000
disk_size = 70000000
sys_update_size = 30000000
available_space = disk_size - elfDirectory.calculate_size()
needs_to_free_space = sys_update_size - available_space
print(needs_to_free_space)

4274331


In [178]:
def get_smallest_gt_thresh(myDir, thresh=needs_to_free_space):
    best_dirsize = disk_size
    best_dirname = ''
    for key in myDir.contents.keys(): 
        content = myDir.contents[key]
        if isinstance(content, Directory):
            dirsize = content.calculate_size()
            if dirsize >= thresh and dirsize < best_dirsize:
                best_dirsize = dirsize
                best_dirname = key
            subdir_best_dirsize, subdir_best_dirname = get_smallest_gt_thresh(content)
            if subdir_best_dirsize >= thresh and subdir_best_dirsize < best_dirsize: 
                best_dirsize = subdir_best_dirsize
                best_dirname = subdir_best_dirname
    return best_dirsize, best_dirname

get_smallest_gt_thresh(elfDirectory)

(5025657, 'rlbhdgm')

# Chat GPT solution

I asked the following: 
```
Can you write me a solution to this problem in python: [Advent of Code Day 7 Part 1 Text]
```

Initially, ChatGPT wasn't able to finish printing the code -- I suspect that it just ran out of space. So after prompting it to continue a couple of times to little success, I asked it to give me the solution with no comments

In [213]:
class Directory:
  def __init__(self, name):
    self.name = name
    self.files = []
    self.directories = []

  def total_size(self):
    size = 0
    for file in self.files:
      size += file[1]
    for directory in self.directories:
      size += directory.total_size()
    return size

  def add_file(self, name, size):
    self.files.append((name, size))

  def add_directory(self, directory):
    self.directories.append(directory)


def parse_input(input_str):
  root = Directory("/")
  current_directory = root

  lines = input_str.strip().split("\n")
  for line in lines:
    if line.startswith("$"):
      parts = line.split()
      if parts[0] == "$ cd":
        if parts[1] == "..":
          current_directory = current_directory.parent
        elif parts[1] == "/":
          current_directory = root
        else:
          for directory in current_directory.directories:
            if directory.name == parts[1]:
              current_directory = directory
              break
    else:
      parts = line.split()
      for i in range(0, len(parts), 2):
        if parts[i] == "dir":
          directory = Directory(parts[i + 1])
          directory.parent = current_directory
          current_directory.add_directory(directory)
        else:
          current_directory.add_file(parts[i + 1], int(parts[i]))

  return root


def solve(root):
  result = 0

  for directory in root.directories:
    if directory.total_size() <= 100000:
      result += directory.total_size()

  return result


input_str = """
$ cd /
$ ls
dir a
14848514 b.txt
8504156 c.dat
dir d
$ cd a
$ ls
dir e
29116 f
2557 g
62596 h.lst
$ cd e
$ ls
584 i
$ cd ..
$ cd ..
$ cd d
$ ls
4060174 j
8033020 d.log
5626152 d.ext
7214296 k
"""

root = parse_input(input_str)
result = solve(root)
print(result)


0


Okay, clearly it's not a correct solution. Let's do a little troubleshooting to figure out what's happening. 

In [214]:
root.name

'/'

In [215]:
root.files

[('b.txt', 14848514),
 ('c.dat', 8504156),
 ('f', 29116),
 ('g', 2557),
 ('h.lst', 62596),
 ('i', 584),
 ('j', 4060174),
 ('d.log', 8033020),
 ('d.ext', 5626152),
 ('k', 7214296)]

In [216]:
[x.name for x in root.directories]

['a', 'd', 'e']

It didn't correctly parse the tree structure of the directory... so I asked it this:
```
Good attempt, but the tree structure is not parsed correctly in the above code. In the given example, directory `e` should be nested within directory `a`. Could you try again?
```

And it responded: 
```
Sure, here is a revised solution that correctly parses the tree structure:
```

In [224]:
class Directory:
  def __init__(self, name):
    self.name = name
    self.files = []
    self.directories = []

  def total_size(self):
    size = 0
    for file in self.files:
      size += file[1]
    for directory in self.directories:
      size += directory.total_size()
    return size

  def add_file(self, name, size):
    self.files.append((name, size))

  def add_directory(self, directory):
    self.directories.append(directory)


def parse_input(input_str):
  root = Directory("/")
  current_directory = root

  lines = input_str.strip().split("\n")
  for line in lines:
    if line.startswith("$"):
      parts = line.split()
      if parts[0] == "$ cd":
        if parts[1] == "..":
          current_directory = current_directory.parent
        elif parts[1] == "/":
          current_directory = root
        else:
          found = False
          for directory in current_directory.directories:
            if directory.name == parts[1]:
              current_directory = directory
              found = True
              break
          if not found:
            new_directory = Directory(parts[1])
            new_directory.parent = current_directory
            current_directory.add_directory(new_directory)
            current_directory = new_directory
    else:
      parts = line.split()
      for i in range(0, len(parts), 2):
        if parts[i] == "dir":
          directory = Directory(parts[i + 1])
          directory.parent = current_directory
          current_directory.add_directory(directory)
        else:
          current_directory.add_file(parts[i + 1], int(parts[i]))

  return root


def solve(root):
  result = 0

  for directory in root.directories:
    if directory.total_size() <= 100000:
      result += directory.total_size()

  return result


input_str = """
$ cd /
$ ls
dir a
14848514 b.txt
8504156 c.dat
dir d
$ cd a
$ ls
dir e
29116 f
2557 g
62596 h.lst
$ cd e
$ ls
584 i
$ cd ..
$ cd ..
$ cd d
$ ls
4060174 j
8033020 d.log
5626152 d.ext
7214296 k
"""

root = parse_input(input_str)
result = solve(root)
print(result)


0


In [225]:
[x.name for x in root.directories]

['a', 'd', 'e']

It's lying to me! This isn't correct. I'll try this again fresh later. 