# Day 7

In [1]:
import pandas as pd

In [2]:
with open('/Users/williamkirkland/Data/KDS/adventofcode/2022/data/day7.txt') as file:
    master_list = [line.rstrip() for line in file]

In [3]:
master_list

['$ cd /',
 '$ ls',
 'dir fpljqj',
 '171526 ghtzhjwf.nls',
 'dir gsdsbld',
 'dir hbmjtb',
 '296801 mjfjqw.ccv',
 'dir nfn',
 'dir qmrsvfvw',
 '102565 qqjnqb.chd',
 'dir svgbqd',
 '$ cd fpljqj',
 '$ ls',
 '153563 ghtzhjwf.nls',
 '243252 gsvjgj.jsm',
 '154134 hghnrbqg.rzb',
 '$ cd ..',
 '$ cd gsdsbld',
 '$ ls',
 'dir npmncvhh',
 'dir qmrsvfvw',
 'dir sqtnlr',
 'dir vzndpc',
 '$ cd npmncvhh',
 '$ ls',
 '81366 dwbgr.ztr',
 '144577 fzjmcq',
 'dir mphhrqf',
 'dir rnmvggfd',
 '276454 zfl.ghv',
 '$ cd mphhrqf',
 '$ ls',
 'dir qlcfs',
 '111207 shmcrf.wlr',
 'dir zwsnwvnv',
 '$ cd qlcfs',
 '$ ls',
 '283904 fpljqj.pdw',
 '83520 hsclcqqt.pff',
 'dir htwl',
 'dir lqjhfdch',
 '5842 mdjzmbc.qtv',
 'dir nqfdhlcg',
 '120167 twgqhvft.cgw',
 '186998 zclhcr',
 'dir zfl',
 'dir zlqgr',
 '$ cd htwl',
 '$ ls',
 '268134 hmwnn.htq',
 '$ cd ..',
 '$ cd lqjhfdch',
 '$ ls',
 '21479 tpdsgf.hgd',
 '$ cd ..',
 '$ cd nqfdhlcg',
 '$ ls',
 'dir dhjfqv',
 '203675 ghtzhjwf.nls',
 '39527 qfwdmzfv.ggd',
 '$ cd dhjfqv',
 '$

In [4]:
class File:
    def __init__(self, name, parent, filepath, size):
        self.name = name
        self.parent = parent
        self.filepath = filepath
        self.size = size

    def get_size(self):
        return self.size

class Directory:
    def __init__(self, name, parent, filepath, children = [], files = []):
        self.name = name
        self.parent = parent
        self.filepath = filepath
        self.children = children
        self.files = files

    def touch_file(self, file_dict):
        file = File(name=file_dict['name'],
                    parent=self,
                    filepath = self.filepath + file_dict['name'],
                    size=file_dict['size'])
        self.files.append(file)
        return

    def mkdir(self, child_name):
        child_dir = Directory(name=child_name, 
                              parent=self,
                              filepath = self.filepath + child_name + '/',
                              children = [], 
                              files = [])
        self.children.append(child_dir)
        return child_dir

    def cd_parent(self):
        return self.parent

    def cd_child(self, child_name):
        child_dir = None
        for dir in self.children:
            if dir.name==child_name:
                child_dir = dir

        if child_dir:
            return child_dir
        else:
            return self.mkdir(child_name)

    def cd_root(self):
        if self.parent == None:
            return self
        else:
            self = self.cd_parent()
            self.cd_root()

    def get_size_of_all_files(self):
        total_size = 0
        if len(self.files) > 0:
            for file in self.files:
                total_size += file.get_size()
        return total_size

    def get_subdir_size(self):
        total_size = 0
        if len(self.children) > 0:
            for child in self.children:
                total_size += child.get_size()
        return total_size

    def get_size(self):
        file_size = self.get_size_of_all_files()
        subdir_size = self.get_subdir_size()
        return file_size + subdir_size

    def get_filepaths(self, cur_list):
        files = self.files
        for file in files:
            file_list = [file.filepath, 'file', file.size]
            cur_list.append(file_list)
            print(file.filepath)
        
        if len(self.children) > 0:
            for child in self.children:
                child_list = [child.filepath, 'dir', child.get_size()]
                cur_list.append(child_list)
                print(child.filepath, child.get_size())

                if len(child.children) == 0:
                    continue
                else:
                    child.get_filepaths(cur_list)

        return cur_list

    def print_filesystem(self):
        root = self.cd_root()
        file_list = []
        output_list = root.get_filepaths(file_list)
        root_list = [root.filepath, 'dir', root.get_size()]
        output_list.append(root_list)
        return pd.DataFrame(output_list, columns=['filepath', 'type', 'size'])

In [5]:
def build_filesystem(lines, root):
    # # Build root directory
    # root = Directory(name='/', parent=None, filepath='/')
    current_dir = root

    for line in lines:
        # Go to root
        if line[0:6]=='$ cd /':
            current_dir = current_dir.cd_root()
            print("GOING TO ROOT: ", current_dir.filepath)
        
        # List files
        elif line=='$ ls':
            print("LS...")
            continue

        # Back up a level
        elif line[0:7]=='$ cd ..':
            current_dir = current_dir.cd_parent()
            print("GOING TO PARENT: ", current_dir.filepath)

        # Cd to nested dir
        elif line[0:4]=='$ cd':
            dir_name = line[5:]
            current_dir = current_dir.cd_child(dir_name)
            print("GOING TO DIR: ", dir_name, current_dir.filepath)

        # If no $ in front of line, add item to dict
        else:
            line_vals = line.split(' ')
            # If directory, store in file_system
            if line_vals[0] == 'dir':
                _ = current_dir.mkdir(child_name=line_vals[1])
                print("CREATING CHILD: ", line_vals[1], [x.filepath for x in current_dir.children])
                continue
            else:
                file_dict = {'name': line_vals[1], 'size': int(line_vals[0])}
                current_dir.touch_file(file_dict=file_dict)
                print("CREATING FILE, ", line_vals[1], [x.filepath for x in current_dir.files])

    root_dir = current_dir.cd_root()

    return root

In [6]:
# Build root directory
root = Directory(name='/', parent=None, filepath='/')

In [7]:
file_system = build_filesystem(master_list, root)

GOING TO ROOT:  /
LS...
CREATING CHILD:  fpljqj ['/fpljqj/']
CREATING FILE,  ghtzhjwf.nls ['/ghtzhjwf.nls']
CREATING CHILD:  gsdsbld ['/fpljqj/', '/gsdsbld/']
CREATING CHILD:  hbmjtb ['/fpljqj/', '/gsdsbld/', '/hbmjtb/']
CREATING FILE,  mjfjqw.ccv ['/ghtzhjwf.nls', '/mjfjqw.ccv']
CREATING CHILD:  nfn ['/fpljqj/', '/gsdsbld/', '/hbmjtb/', '/nfn/']
CREATING CHILD:  qmrsvfvw ['/fpljqj/', '/gsdsbld/', '/hbmjtb/', '/nfn/', '/qmrsvfvw/']
CREATING FILE,  qqjnqb.chd ['/ghtzhjwf.nls', '/mjfjqw.ccv', '/qqjnqb.chd']
CREATING CHILD:  svgbqd ['/fpljqj/', '/gsdsbld/', '/hbmjtb/', '/nfn/', '/qmrsvfvw/', '/svgbqd/']
GOING TO DIR:  fpljqj /fpljqj/
LS...
CREATING FILE,  ghtzhjwf.nls ['/fpljqj/ghtzhjwf.nls']
CREATING FILE,  gsvjgj.jsm ['/fpljqj/ghtzhjwf.nls', '/fpljqj/gsvjgj.jsm']
CREATING FILE,  hghnrbqg.rzb ['/fpljqj/ghtzhjwf.nls', '/fpljqj/gsvjgj.jsm', '/fpljqj/hghnrbqg.rzb']
GOING TO PARENT:  /
GOING TO DIR:  gsdsbld /gsdsbld/
LS...
CREATING CHILD:  npmncvhh ['/gsdsbld/npmncvhh/']
CREATING CHILD:  qm

In [8]:
file_df = file_system.print_filesystem()

/ghtzhjwf.nls
/mjfjqw.ccv
/qqjnqb.chd
/fpljqj/ 550949
/gsdsbld/ 8021608
/gsdsbld/npmncvhh/ 4964676
/gsdsbld/npmncvhh/dwbgr.ztr
/gsdsbld/npmncvhh/fzjmcq
/gsdsbld/npmncvhh/zfl.ghv
/gsdsbld/npmncvhh/mphhrqf/ 3207818
/gsdsbld/npmncvhh/mphhrqf/shmcrf.wlr
/gsdsbld/npmncvhh/mphhrqf/qlcfs/ 2732978
/gsdsbld/npmncvhh/mphhrqf/qlcfs/fpljqj.pdw
/gsdsbld/npmncvhh/mphhrqf/qlcfs/hsclcqqt.pff
/gsdsbld/npmncvhh/mphhrqf/qlcfs/mdjzmbc.qtv
/gsdsbld/npmncvhh/mphhrqf/qlcfs/twgqhvft.cgw
/gsdsbld/npmncvhh/mphhrqf/qlcfs/zclhcr
/gsdsbld/npmncvhh/mphhrqf/qlcfs/htwl/ 268134
/gsdsbld/npmncvhh/mphhrqf/qlcfs/lqjhfdch/ 21479
/gsdsbld/npmncvhh/mphhrqf/qlcfs/nqfdhlcg/ 378276
/gsdsbld/npmncvhh/mphhrqf/qlcfs/nqfdhlcg/ghtzhjwf.nls
/gsdsbld/npmncvhh/mphhrqf/qlcfs/nqfdhlcg/qfwdmzfv.ggd
/gsdsbld/npmncvhh/mphhrqf/qlcfs/nqfdhlcg/dhjfqv/ 135074
/gsdsbld/npmncvhh/mphhrqf/qlcfs/zfl/ 17334
/gsdsbld/npmncvhh/mphhrqf/qlcfs/zlqgr/ 1367324
/gsdsbld/npmncvhh/mphhrqf/qlcfs/zlqgr/crs/ 220281
/gsdsbld/npmncvhh/mphhrqf/qlcfs/zlqgr/whrm/ 105

In [9]:
file_df

Unnamed: 0,filepath,type,size
0,/ghtzhjwf.nls,file,171526
1,/mjfjqw.ccv,file,296801
2,/qqjnqb.chd,file,102565
3,/fpljqj/,dir,550949
4,/gsdsbld/,dir,8021608
...,...,...,...
325,/nfn/zfl/zfl.mvw,file,64500
326,/nfn/zfl/rgvgqqd/,dir,26778
327,/qmrsvfvw/,dir,290013
328,/svgbqd/,dir,69927


In [10]:
dir_df = file_df[file_df['type']=='dir']
dir_df

Unnamed: 0,filepath,type,size
3,/fpljqj/,dir,550949
4,/gsdsbld/,dir,8021608
5,/gsdsbld/npmncvhh/,dir,4964676
9,/gsdsbld/npmncvhh/mphhrqf/,dir,3207818
11,/gsdsbld/npmncvhh/mphhrqf/qlcfs/,dir,2732978
...,...,...,...
322,/nfn/zfl/,dir,368100
326,/nfn/zfl/rgvgqqd/,dir,26778
327,/qmrsvfvw/,dir,290013
328,/svgbqd/,dir,69927


In [11]:
small_dir_df = dir_df[dir_df['size']<=100000]
print(small_dir_df.head())
small_total_size = small_dir_df['size'].sum()
print(small_total_size)

                                             filepath type   size
18          /gsdsbld/npmncvhh/mphhrqf/qlcfs/lqjhfdch/  dir  21479
23               /gsdsbld/npmncvhh/mphhrqf/qlcfs/zfl/  dir  17334
35         /gsdsbld/npmncvhh/mphhrqf/qlcfs/zlqgr/zfl/  dir  90468
36  /gsdsbld/npmncvhh/mphhrqf/qlcfs/zlqgr/zfl/hwlh...  dir  90468
41                /gsdsbld/npmncvhh/rnmvggfd/czzrdvc/  dir   2835
1783610


## Question 2

In [12]:
used_bytes = [x for x in file_df[file_df['filepath']=='/']['size']][0]

total_bytes = 70000000
total_needed = 30000000
max_used = total_bytes - total_needed
need_to_delete = used_bytes - max_used
print(used_bytes, max_used, need_to_delete)

44359867 40000000 4359867


In [13]:
eligible_df = dir_df[dir_df['size']>=need_to_delete]
eligible_df

Unnamed: 0,filepath,type,size
4,/gsdsbld/,dir,8021608
5,/gsdsbld/npmncvhh/,dir,4964676
76,/hbmjtb/,dir,8465165
134,/nfn/,dir,26391313
136,/nfn/mpz/,dir,14470980
138,/nfn/mpz/gntjg/,dir,4825246
185,/nfn/mpz/zfl/,dir,9067124
226,/nfn/mpz/zfl/nzlv/,dir,4419451
255,/nfn/qmrsvfvw/,dir,11212301
256,/nfn/qmrsvfvw/fpljqj/,dir,7330912


In [14]:
min_dir = eligible_df['size'].min()
min_dir

4370655