# Day 7

## Part 1

Find all of the directories with a total size of at most 100000. What is the sum of the total sizes of those directories?


In [1]:
# Libraries

import numpy as np
import pandas as pd

# Read input file
all_lines = []

with open('input.txt') as file:
    all_lines = [line.rstrip() for line in file]


In [2]:
# Parse input for commands and outputs

# Get all commands
commands_idxs = []
for idx, line in enumerate(all_lines):
    if line.startswith('$'):
        commands_idxs.append(idx)
commands_idxs.append(len(all_lines) + 1)  # to read the last line (ugly, I know)

# Combine commands and outputs
actions_list = []
for idx_start, idx_end in zip(commands_idxs[:-1], commands_idxs[1:]):
    command_str = all_lines[idx_start]
    output_str = all_lines[idx_start+1:idx_end]
    actions_list.append({'command': command_str, 'output': output_str})
    

In [3]:
# Process actions

current_dir = ''
filesystem = []

for action_idx, action in enumerate(actions_list):
    # cd command
    if 'cd' in action['command']:
        if 'cd /' in action['command']:
            current_dir = ''
            
        elif 'cd ..' in action['command']:
            current_dir = '/'.join(current_dir.split('/')[:-1])
        
        else:
            next_dir = action['command'].split('$ cd ')[1]
            current_dir = f'{current_dir}/{next_dir}'
    
    # ls command
    elif 'ls' in action['command']:
        for item in action['output']:
            if 'dir ' not in item:
                size = int(item.split(' ')[0])
                file_name = item.split(' ')[1]
                filesystem.append({'dir': current_dir, 'file': file_name, 'size': size})
    
    else:
        print('Unrecognized action!')


In [4]:
# Analyse filesystem
filesystem_df = pd.DataFrame(filesystem)
filesystem_df['dir'] = filesystem_df['dir'].replace({'': '/'})


In [5]:
# Split dir path to levels

def split_dir_to_levels(*, row: pd.Series) -> pd.Series:
    split_dir = pd.Series(dtype='object')
    if row['dir'] == '/':
        all_dirs = ['']
    else:
        all_dirs = row['dir'].split('/')
    
    for level_n, dir_name in enumerate(all_dirs):
        split_dir[f'level_{str(level_n).zfill(2)}'] = dir_name
    
    split_dir['dir'] = row['dir']
    split_dir['file'] = row['file']
    split_dir['size'] = row['size']
    
    return split_dir

# Split dirs
filesystem_splitted_df = filesystem_df.apply(lambda x: split_dir_to_levels(row=x), axis=1)
filesystem_splitted_df['level_00'] = filesystem_splitted_df['level_00'].replace('', '/')
display(filesystem_splitted_df)


Unnamed: 0,dir,file,level_00,level_01,level_02,level_03,level_04,level_05,level_06,level_07,level_08,level_09,level_10,level_11,size
0,/,fmftdzrp.fwt,/,,,,,,,,,,,,126880
1,/,hhfqgzfj.qvt,/,,,,,,,,,,,,173625
2,/,rfgtcj.tdn,/,,,,,,,,,,,,6337
3,/,vmc.cdf,/,,,,,,,,,,,,230140
4,/brdsppd,ndqmcv,/,brdsppd,,,,,,,,,,,218543
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273,/szfw/lwfgnzz/qgdcjq,prshd.qdj,/,szfw,lwfgnzz,qgdcjq,,,,,,,,,252274
274,/szfw/lwfgnzz/spwcmrl,tchv,/,szfw,lwfgnzz,spwcmrl,,,,,,,,,135076
275,/szfw/lwfgnzz/spwcmrl,vmc.cdf,/,szfw,lwfgnzz,spwcmrl,,,,,,,,,265608
276,/szfw/lwfgnzz/spwcmrl/lslmr,rrvwsbl,/,szfw,lwfgnzz,spwcmrl,lslmr,,,,,,,,172247


In [6]:
# Count dir sizes

all_dir_sizes = []

all_levels = sorted([col for col in filesystem_splitted_df.columns if 'level' in col])

for idx, _ in enumerate(all_levels):
    sel_levels = all_levels[:idx+1]
    dir_sizes = filesystem_splitted_df.groupby(by=sel_levels)['size'].sum()
    all_dir_sizes.append(dir_sizes)

# Create dataframe
all_dir_sizes_df = pd.DataFrame(pd.concat(all_dir_sizes))
display(all_dir_sizes_df)


Unnamed: 0,size
/,41272621
"(/, brdsppd)",5609550
"(/, dnjqmzgg)",811944
"(/, lbbcfjl)",11657747
"(/, mzdqcb)",18489666
...,...
"(/, mzdqcb, lwfgnzz, wtrbfrj, ggrsgzvv, gqtsmnr, jdml, lsprzlbf, lwfgnzz, tzghdrd, tjslbpb)",18188
"(/, mzdqcb, lwfgnzz, wtrbfrj, ggrsgzvv, gqtsmnr, jdml, lsprzlbf, lwfgnzz, tzghdrd, vhv)",395171
"(/, mzdqcb, lwfgnzz, wtrbfrj, ggrsgzvv, gqtsmnr, jdml, lsprzlbf, rllt, szfw, lsprzlbf)",291090
"(/, mzdqcb, lwfgnzz, wtrbfrj, ggrsgzvv, gqtsmnr, jdml, lsprzlbf, lwfgnzz, tzghdrd, tjslbpb, jvcgnbs)",18188


In [7]:
# Print result
all_dir_sizes_df.loc[all_dir_sizes_df['size'] <= 100000].sum()


size    1644735
dtype: int64

---

## Part 2

The total disk space available to the filesystem is 70000000. To run the update, you need unused space of at least 30000000. You need to find a directory you can delete that will free up enough space to run the update.

Find the smallest directory that, if deleted, would free up enough space on the filesystem to run the update. What is the total size of that directory?


In [8]:
# Disk space
total_disk_space = 70000000
required_space = 30000000


In [9]:
# Currently used space
current_size = all_dir_sizes_df.loc['/']['size']
current_space = total_disk_space - current_size
min_needed_to_delete = required_space - current_space


In [10]:
# Find smallest dir
sorted_dir_sizes_df = all_dir_sizes_df.sort_values(by='size', ascending=True)
sorted_dir_sizes_df.loc[sorted_dir_sizes_df['size'] >= min_needed_to_delete].head(1)

Unnamed: 0,size
"(/, lbbcfjl, csnjp, ffgfmcm, bcnvw)",1300850
