# Advent of Code 2018
## Day 5
Polymer reactions and performance testing

In [None]:
import numpy as np

### Read in the file

We'll test a number of ways of loading in the string from the text file and converting it to a list (or array) of characters

The first one we came up with during the lunch and learn:

In [None]:
def read_input_original(filename):
    char_list = []
    with open(filename, "r") as file:
        for line in file:
            for char in line:
                char_list.append(char)
    return char_list

In [None]:
%timeit read_input_original("input.txt")

Ok, benchmark performance is set. There's a fair bit of code and it's easy to follow, but it's not very pythonic and I bet I can make it faster

In [None]:
def read_input_comprehension(filename):
    with open(filename, "r") as file:
        char_list = [char for line in file for char in line]
    return char_list

In [None]:
%timeit read_input_comprehension("input.txt")

The list comprehension is a bit less readable, since you have to work backwards, we want the character, for each line in the file, for each character in the line.
On the plus side it's pythonic and it's over twice as fast as the first implementation

In [None]:
def read_input_final_np(filename):
    with open(filename, "r") as file:
        char_list = [char for line in file for char in line]
    char_arr = np.array(char_list)
    return char_arr        

In [None]:
%timeit read_input_final_np("input.txt")

I suspect that doing the actual analysis will end up faster if I can work with numpy arrays rather than python lists.
This works, but I don't like it. It's slow, and the nested function calls are hard to read.

In [None]:
# Set our fast read input as an easy name to use further
read_input = read_input_comprehension

### Character comparison

The next thing we need to be able to do is compare two characters and determine if they react.

Do some setup code

In [None]:
# Helper function to compare results
def test_comparison(comp_func):
    """Take a function that compares characters and run it through the puzzle input"""
    char_list = read_input("input.txt")
    for i in range(1, len(char_list)):
        comp_func(char_list[i - 1], char_list[i])
    return True

Here's the method we defined at lunch

In [None]:
def compare_characters_original(char1, char2):
    if (char1.upper() == char2.upper()) & (char1.isupper() != char2.isupper()):
        return True
    return False

In [None]:
%timeit test_comparison(compare_characters_original)

During lunch we figured there would be more characters of different case than there would be the same letters, so the comparison should be faster with the more restrictive condition first, let's test that

In [None]:
def compare_characters_diff_order(char1, char2):
    if (char1.isupper() != char2.isupper()) & (char1.upper() == char2.upper()):
        return True
    return False

In [None]:
%timeit test_comparison(compare_characters_diff_order)

Pretty much the same speed, what if we explicitly nest it?

In [None]:
def compare_characters_swapcase(char1, char2):
    return char1.swapcase() == char2

In [None]:
%timeit test_comparison(compare_characters_swapcase)

In [None]:
def compare_characters_fast_exit(char1, char2):
    if char1.upper() != char2.upper():
        return False
    elif char1.isupper() == char2.isupper():
        return False
    return True

In [None]:
%timeit test_comparison(compare_characters_fast_exit)

Ok, that's a lot less readable, but it is a little faster. It's generally a bad idea to return early in a function. It doesn't matter too much in a small function like this, but in a larger one it can make it much harder to interpret. Not a good habit to get in, although taking 75% of the time is pretty nice. Let's see if we can do better

In [None]:
def compare_characters(char1, char2):
    return (char1.upper() == char2.upper()) & (char1 != char2)

In [None]:
%timeit test_comparison(compare_characters)

One line, about as fast as anything else, quick and to the point. We'll go with that.

### The full algorithm

This is the part that's actually quite slow. Let's see if we can speed it up

First we make some helper functions

In [None]:
def validate_method(compression_func):
    """Read in the example text and make sure it gives the result you want"""
    eg_list = read_input("example.txt")
    result_list = compression_func(eg_list)
    result = len(result_list)
    assert result == 10
    return True

In [None]:
def run_full_method(compression_func):
    full_list = read_input("input.txt")
    answer = len(compression_func(full_list))
    return answer

Here's the original method:

In [None]:
def compress_polymer_method_1(input_list):
    polymers_removed = 1
    while polymers_removed > 0:
        polymers_removed = 0
        for i in range(len(input_list) - 1):
            if i >= len(input_list) - 1:
                break
            if compare_characters(input_list[i], input_list[i + 1]):
                input_list.pop(i)
                input_list.pop(i)
                polymers_removed += 1
    return input_list

In [None]:
validate_method(compress_polymer_method_1)

In [None]:
%timeit run_full_method(compress_polymer_method_1)

Ok, that's a pretty slow speed to beat, let's try and get it faster

In [None]:
def compress_polymer_method_2(input_list):
    i = 0
    while i < len(input_list) - 1:
        if compare_characters(input_list[i], input_list[i + 1]):
            input_list.pop(i)
            input_list.pop(i)
            if i != 0:
                i -= 1
        else:
            i += 1
    return input_list

In [None]:
validate_method(compress_polymer_method_2)

In [None]:
%timeit run_full_method(compress_polymer_method_2)

Well, that's a huge speedup already. But can we go faster?

In [None]:
# Have to get a numpy array to do numpy stuff
read_input = read_input_final_np

Does doing it with a numpy array rather than a list on its own do anything for us?

In [None]:
def compress_polymer_method_3(input_list):
    i = 0
    while i < len(input_list) - 1:
        if compare_characters(input_list[i], input_list[i + 1]):
            input_list = np.delete(input_list,[i, i+1])
            if i != 0:
                i -= 1
        else:
            i += 1
    return input_list

In [None]:
validate_method(compress_polymer_method_3)

In [None]:
%timeit run_full_method(compress_polymer_method_3)

Ooooh, counterintuitive! Numpy is slower!

# Final fast version

In [None]:
def compress_polymer(filename):
    with open(filename, "r") as f:
        input_str = f.read()
    output_str = ""
    for i in range(len(input_str)):
        if len(output_str) == 0:
            output_str += input_str[i]
        elif compare_characters(output_str[-1], input_str[i]):
            output_str = output_str[:-1]
        else:
            output_str += input_str[i]
    return len(output_str)

In [None]:
assert compress_polymer("example.txt") == 10

In [None]:
%timeit compress_polymer("input.txt")

In [None]:
compress_polymer("input.txt")

# Part 2

In [None]:
lower_alphabet = [chr(i) for i in range(ord('a'),ord('z')+1)]

In [None]:
def compress_polymer(input_str):
    output_str = ""
    for i in range(len(input_str)):
        if len(output_str) == 0:
            output_str += input_str[i]
        elif compare_characters(output_str[-1], input_str[i]):
            output_str = output_str[:-1]
        else:
            output_str += input_str[i]
    return output_str

In [None]:
def remove_char(input_str, char_to_drop):
    output_str = ""
    for char in input_str:
        if char.lower() != char_to_drop:
            output_str += char
    return output_str

In [None]:
def solve_part_2(filename):
    with open(filename, "r") as f:
        compressed_str = compress_polymer(f.read())
    results_dict = dict()
    for char in lower_alphabet:
        char_input = remove_char(compressed_str, char)
        char_output = compress_polymer(char_input)
        results_dict[char] = len(char_output)
    return min(results_dict.values())




In [None]:
%timeit solve_part_2('input.txt')