In [9]:
import pandas as pd
import numpy as np
import re
from dataclasses import dataclass, field

In [94]:
with open('input.txt', 'r') as f:
    lines = f.readlines()
    
data = [line[:-1] for line in lines]
data[:20]

['$ cd /',
 '$ ls',
 'dir gts',
 '68377 jvdqjhr.jvp',
 'dir lwhbw',
 '228884 nqth.gcn',
 'dir pcqjnl',
 '94844 ppwv.zsh',
 '97889 rqpw',
 'dir sqhw',
 'dir vllgn',
 'dir wdtm',
 'dir ztfdwp',
 '$ cd gts',
 '$ ls',
 '846 grwwbrgz.wft',
 '72000 mrnhn.psz',
 '155241 qvnbd.dqs',
 '6655 tndtmwfv',
 '$ cd ..']

In [95]:
@dataclass
class DiskObject:
    name: str
    parent: DirObject = field(default_factory=lambda: None, repr=False)
    size: int = field(default_factory=lambda: 0)

@dataclass
class FileObject(DiskObject):
    pass

@dataclass
class DirObject(DiskObject):
    children: list = field(default_factory=lambda: [], repr=False)
        
DirObject('/')

DirObject(name='/', size=0)

In [106]:
disk = [DirObject('/')]

current_dir = disk[0]
ls_active = False

for i, line in enumerate(lines):
    if line.startswith('$'):
        if ls_active:
            ls_active = False
        line = line[2:]    
        
        # Change Directory
        if line.startswith('cd'):
            new_dir = line[3:].strip()
            if new_dir == '/':
                # Outer most directory
                current_dir = disk[0]
            elif new_dir == '..':
                # One directory up
                current_dir = current_dir.parent
            else:
                # One directory down
                for d in current_dir.children:
                    if d.name == new_dir and type(d) == DirObject:
                        current_dir = d

        if line.startswith('ls'):
            ls_active = True
        
    else:
        if ls_active:
            # Currently listing files/dirs -> create them
            size_or_type, name = line.split(' ')
            name = name.strip()
            
            # Create Directory
            if size_or_type == 'dir':
                current_dir.children.append(DirObject(name, current_dir))
                
            # Create File
            else:
                size = int(size_or_type)
                current_dir.children.append(FileObject(name, current_dir, size))

In [114]:
def calculate_dir_size(d: DirObject):
    filter_dirs = []
    
    total = 0
    for c in d.children:
        if type(c) == FileObject:
            total += c.size
        elif type(c) == DirObject:
            filter_dirs.extend(calculate_dir_size(c))
            total += c.size
    d.size = total
    
    if total <= 100000 and total > 0:
        filter_dirs.append((d.name, total))
    
    return filter_dirs

filter_dirs = calculate_dir_size(disk[0])
filter_dirs

[('lwhbw', 99946),
 ('mnd', 28976),
 ('cmf', 73595),
 ('qwtwps', 73595),
 ('vcthd', 15016),
 ('ttmctqlc', 9006),
 ('bzrs', 71351),
 ('lrrl', 99225),
 ('fjt', 57947),
 ('qsvwfb', 23145),
 ('bpnlrhsb', 22875),
 ('jvdh', 95461),
 ('gtd', 50675),
 ('zplwvj', 50675),
 ('wgpqg', 65679),
 ('fhnnc', 84726),
 ('gphqmvpn', 23807),
 ('lrrl', 26548),
 ('djw', 98290),
 ('dsbjlmrf', 92510),
 ('mjfdjrgt', 86846),
 ('vllgn', 58389),
 ('cfhjvmh', 17554),
 ('wdwgp', 11714),
 ('htmwl', 45376),
 ('mrnhn', 2633),
 ('nvgmrpdf', 32919),
 ('mjfdjrgt', 16108),
 ('pbfhn', 16108),
 ('pdtm', 55975),
 ('mjfdjrgt', 26073),
 ('nbccdd', 62162),
 ('vbvtzmsg', 15318),
 ('vnmg', 83938),
 ('wdtm', 83938)]

In [115]:
sum([d[1] for d in filter_dirs])

1778099

In [136]:
def calculate_dir_size(d: DirObject):
    filter_dirs = []
    
    total = 0
    for c in d.children:
        if type(c) == FileObject:
            total += c.size
        elif type(c) == DirObject:
            filter_dirs.extend(calculate_dir_size(c))
            total += c.size
    d.size = total
    
    if total > 0:
        filter_dirs.append((d.name, total))
    
    return filter_dirs

filter_dirs = calculate_dir_size(disk[0])
filter_dirs

[('gts', 234742),
 ('lwhbw', 99946),
 ('gljcvm', 264381),
 ('gzfgc', 134989),
 ('cfhjvmh', 134989),
 ('jjfwr', 134989),
 ('cbrvhz', 131072),
 ('flcw', 216675),
 ('mnd', 28976),
 ('rfqbmb', 376723),
 ('lqwntmdg', 511712),
 ('fbrwd', 163166),
 ('mrnhn', 163166),
 ('srtqvcv', 163166),
 ('cpmvnf', 163166),
 ('cmf', 73595),
 ('qwtwps', 73595),
 ('vcthd', 15016),
 ('nzpdtfr', 380031),
 ('lrrl', 424584),
 ('mjfdjrgt', 317513),
 ('npqj', 340000),
 ('sntcbctt', 1082097),
 ('vzhvjp', 161341),
 ('jrswcjq', 173122),
 ('smgbdw', 307533),
 ('wdtm', 705220),
 ('dcfmtw', 2360401),
 ('bfjvt', 431526),
 ('ggnwqcj', 1010731),
 ('cfhjvmh', 306620),
 ('cjsrvg', 384199),
 ('ttmctqlc', 9006),
 ('mgnq', 279392),
 ('hghjzpgc', 552544),
 ('mrnhn', 2193696),
 ('bszd', 202536),
 ('dqdrngf', 177307),
 ('bzrs', 71351),
 ('lrrl', 99225),
 ('wtqgd', 212172),
 ('bwmglvmt', 1623571),
 ('wrtp', 267582),
 ('lrrl', 267582),
 ('tzqqmmp', 140115),
 ('wmjp', 459262),
 ('fjt', 57947),
 ('qsvwfb', 23145),
 ('zvlhngjm', 970840)

In [139]:
df = pd.DataFrame(filter_dirs, columns=['name', 'size'])
df = df.sort_values(['size'], ascending=False).reset_index(drop=True)
df

Unnamed: 0,name,size
0,/,41609574
1,wdtm,21756443
2,pcqjnl,13683830
3,vpm,11766511
4,dhmphrn,10125625
...,...,...
161,vbvtzmsg,15318
162,vcthd,15016
163,wdwgp,11714
164,ttmctqlc,9006


In [140]:
total_size = 70000000
target_size = 30000000

unused_size = total_size - df.loc[0, 'size']
free_up_size = target_size - unused_size
free_up_size

1609574

In [141]:
df[df['size'] >= free_up_size].iloc[-1]['size']

1623571