### Cleaning up Day 7

Cleaning ingestion mainly

#### Early, Painful Finding:

- There is no guarantee that a directory name is distinct. For example, it is possible that `/test/a` and `/prod/a` both exist, meaning I need to account for `a` in a full path beyond just the name `a`. 
- To handle this I use a `dequqe` which lets me keep tabs on relative path

In [1]:
# samples
from collections import deque

def add_to_directory(i, dir_dict, dir_path, data):
    """
    Iterate through all files in relative directory until run into execution ("$") command
    """
    next_step = data[i]
    while not next_step.startswith('$'):
        if next_step.startswith('dir'):
            v = next_step.split('dir ')[1]
            dir_dict[dir_path].append(f'{dir_path}{v}/')
        else:
            dir_dict[dir_path].append(int(next_step.split(' ')[0]))
        i += 1
        next_step = data[i]
    return dir_dict

def generate_path(i, path_deque, data):
    v = data[i].split('$ cd ')[1] # find the local dir name
    if v == '/':
        print("Start")
    else:
        path_deque.append(f'{v}/')
    path = ''.join([x for x in path_deque])
    
    return path, path_deque
    
def sumDir(d, k):
    """Iterate over values in key (directory), adding file values and recursively handling directories"""
    sum_v = 0
    
    # iterate over each file or directory
    for c in d[k]:
        if c in d.keys(): # if another directory, recursively find sum of files in directory
            sum_v = (sum_v + sumDir(d, c))
        else: # otherwise just add file amount
            sum_v += c
    return sum_v

In [2]:
# Part 1 & Part 2 
dir_dict = {}

# read sample data
with open('data/day07_sample.txt') as fh:
    data = [line.strip() for line in fh.readlines()]
    
# new deque class
path_deque = deque(['/'])

# Build directory dictionary
for i in range(len(data)):
    if data[i] == '$ cd /': # print initial deque
        print(f"Starting at {path_deque}")
        
    if data[i] == '$ cd ..': # handle path when we navigate up a dir
        path_deque.pop()
    elif data[i].startswith('$ cd'): # update path, add a key and add files in dir "x"
        path, path_deque = generate_path(i, path_deque, data)
        dir_dict[path] = []
        try:
            i += 2 # move ahead past LS command
            dir_dict = add_to_directory(i, dir_dict, path, data)
        except:
            print("End of file")
    else:
        continue

print(dir_dict)

# Recursively find sum of vals per directory
dir_size = {k: sumDir(dir_dict, k) for k in dir_dict.keys()}

# part 1: sum of folders <= 100K in size
pt1 = sum([d for d in dir_size.values() if d <= 100_000])
assert(pt1 == 95437)

### Part 2: Thank you for going easier on us!
total = 70_000_000
need = 30_000_000
unused = total - dir_size['/']
remaining_need = need - unused
print(f"We need to delete at least: {remaining_need}")

pt2 = min([d for d in dir_size.values() if d >= remaining_need])
assert(pt2 == 24933642)

Starting at deque(['/'])
Start
End of file
{'/': ['/a/', 14848514, 8504156, '/d/'], '/a/': ['/a/e/', 29116, 2557, 62596], '/a/e/': [584], '/d/': [4060174, 8033020, 5626152, 7214296]}
We need to delete at least: 8381165


In [3]:
# Part 1 & Part 2 
dir_dict = {}

# read sample data
with open('data/day07.txt') as fh:
    data = [line.strip() for line in fh.readlines()]
    
# new deque class
path_deque = deque(['/'])

# Build directory dictionary
for i in range(len(data)):
    if data[i] == '$ cd /': # print initial deque
        print(f"Starting at {path_deque}")
        
    if data[i] == '$ cd ..': # handle path when we navigate up a dir
        path_deque.pop()
    elif data[i].startswith('$ cd'): # update path, add a key and add files in dir "x"
        path, path_deque = generate_path(i, path_deque, data)
        dir_dict[path] = []
        try:
            i += 2 # move ahead past LS command
            dir_dict = add_to_directory(i, dir_dict, path, data)
        except:
            print("End of file")
    else:
        continue

# Recursively find sum of vals per directory
dir_size = {k: sumDir(dir_dict, k) for k in dir_dict.keys()}

# part 1: sum of folders <= 100K in size
pt1 = sum([d for d in dir_size.values() if d <= 100_000])
print(f"Part 1 Result: {pt1}")

### Part 2: Thank you for going easier on us!
total = 70_000_000
need = 30_000_000
unused = total - dir_size['/']
remaining_need = need - unused
print(f"We need to delete at least: {remaining_need}")

pt2 = min([d for d in dir_size.values() if d >= remaining_need])
print(f"Part 2 Result: {pt2}")

Starting at deque(['/'])
Start
End of file
Part 1 Result: 1501149
We need to delete at least: 9199225
Part 2 Result: 10096985
