In [1]:
%matplotlib inline
import itertools

In [2]:
testline = '2333133121414131402'

In [3]:
with open('day9input.txt') as fp:
    data = fp.read().strip()

## Part 1 ##

In [4]:
def get_storage(line):
    storage = []
    fileid = 0
    for i, c in enumerate(line):
        if i%2 == 0:
            # file block
            storage.extend(itertools.repeat(fileid, int(c)))
            fileid += 1
        else:
            storage.extend(itertools.repeat(None, int(c)))
    return storage

In [5]:
def print_storage(storage):
    s = []
    for fileid in storage:
        if fileid is not None:
            s.append(str(fileid))
        else:
            s.append('.')
    return ''.join(s)

In [6]:
def defrag(storagein):
    storage = storagein.copy()
    last = len(storage) - 1
    for i, fileid in enumerate(storage):
        if fileid is not None:
            continue
        storage[i], storage[last] = storage[last], None
        last -= 1
        while storage[last] is None:
            last -= 1
        if last <= i+1:
            break
    return storage[:last+1]

In [7]:
def part1(line):
    storage = get_storage(line)
    compacted = defrag(storage)
    return sum(i*fileid for i, fileid in enumerate(compacted))

In [8]:
assert(1928 == part1(testline))

In [9]:
part1(data)

6283170117911

## Part 2 ##

In [10]:
def get_storage2(line):
    files = {}
    loc = 0
    fileid = 0
    for i, c in enumerate(line):
        if i%2 == 0:
            # file block
            sz = int(c)
            files[fileid] = {'start': loc, 'size': sz}
            fileid += 1
            loc += sz
        else:
            # free space
            sz = int(c)
            loc += sz
    return files   

In [11]:
def get_free(files):
    fileids_by_loc = {files[fileid]['start'] : fileid for fileid in files}
    file_starts = sorted(fileids_by_loc.keys())
    free = []
    for file1pos, file2pos in itertools.pairwise(file_starts):
        file1 = files[fileids_by_loc[file1pos]]
        freepos = file1pos + file1['size']
        freesz = file2pos - freepos
        if freesz > 0:
            free.append((freepos, freesz))
    return free

In [12]:
def defrag2(files):
    newfiles = files.copy()
    maxid = max(newfiles.keys())
    for fileid in range(maxid, 0, -1):
        free = get_free(newfiles)
        file = newfiles[fileid]
        filepos, filesz = file['start'], file['size']
        for freepos, freesz in free:
            if freepos >= filepos:
                break
            if freesz < filesz:
                continue
            file['start'] = freepos
            break
    return newfiles

In [13]:
def checksum(files):
    s = 0
    for fileid in files:
        file = files[fileid]
        start, sz = file['start'], file['size']
        for pos in range(start, start+sz):
            s += pos*fileid
    return s

In [14]:
def part2(line):
    files = get_storage2(line)
    files = defrag2(files)
    return checksum(files)

In [15]:
assert(2858 == part2(testline))

In [16]:
part2(data)

6307653242596