In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import math

In [2]:
BASE_DIR = Path('C:/Users/atw10wp4/Jupyter/AdventOfCode/Data')

In [3]:
fileName = '2021_08_input.txt'
fileNameFullPath = BASE_DIR / fileName

In [4]:
df = pd.read_csv(fileNameFullPath, header=None, sep=' ', )

In [5]:
def regex_from_string(letterstring):
    base = r'^{}'
    expr = '(?=.*{})'
    words = list(letterstring)
    return base.format(''.join(expr.format(w) for w in words))

In [6]:
def generate_signal_array(signal_patterns):
    signal_array = np.full(10, -1)
    
    # for decr of encoding refer to https://en.wikipedia.org/wiki/Seven-segment_display
    pgfedcba = pd.Series(np.full(8, ''), index=['p','g','f','e','d','c','b','a'])
    
    # 1 has 2 segments
    index_of_1 = int(signal_patterns[signal_patterns.str.len() == 2].index.values)
    signal_array[1] = index_of_1
    
    # 4 has 4 segments
    index_of_4 = int(signal_patterns[signal_patterns.str.len() == 4].index.values)
    signal_array[4] = index_of_4
    
    # 7 has 3 segments
    index_of_7 = int(signal_patterns[signal_patterns.str.len() == 3].index.values) 
    signal_array[7] = index_of_7
    
    # 8 has 7 segments
    index_of_8 = int(signal_patterns[signal_patterns.str.len() == 7].index.values)
    signal_array[8] = index_of_8
    
    # 7 - 1 = a
    pgfedcba['a'] = ''.join(set(signal_patterns[signal_array[7]]) - set(signal_patterns[signal_array[1]]))
    
    # 8 - 7 = gfed
    gfed = ''.join(set(signal_patterns[signal_array[8]]) - set(signal_patterns[signal_array[7]]))
    
    # size(6) AND all(gfed) -> 6  // else 0 or 9
    rows_of_size_6 = signal_patterns[signal_patterns.str.len() == 6]
    index_of_6 = int(rows_of_size_6[rows_of_size_6.str.contains(regex_from_string(gfed))].index.values)
    signal_array[6] = index_of_6
    
    # 8 - 6 = b
    pgfedcba['b'] = ''.join(set(signal_patterns[signal_array[8]]) - set(signal_patterns[signal_array[6]]))
    
    # size(5) AND !b -> 5 // else 2 OR 3
    rows_of_size_5 = signal_patterns[signal_patterns.str.len() == 5]
    index_of_5 = int(rows_of_size_5[~rows_of_size_5.str.contains(pgfedcba['b'])].index.values)
    signal_array[5] = index_of_5
    
    # 5 + b = 9
    letters_of_9 = ''.join(set(signal_patterns[signal_array[5]]) | set(pgfedcba['b']))
    index_of_9 = int(rows_of_size_6[rows_of_size_6.str.contains(regex_from_string(letters_of_9))].index.values)
    signal_array[9] = index_of_9
    
    # size(6) AND !9 AND !6 -> 0
    index_of_0 = (set(rows_of_size_6.index) - {signal_array[6]} - {signal_array[9]}).pop()
    signal_array[0] = index_of_0
    
    # 1 - b = c
    pgfedcba['c'] = ''.join(set(signal_patterns[signal_array[1]]) - set(pgfedcba['b']))
    
    # size(5) AND !c -> 2
    index_of_2 = int(rows_of_size_5[~rows_of_size_5.str.contains(pgfedcba['c'])].index.values)
    signal_array[2] = index_of_2
    
    # size(5) AND !5 AND !2 -> 3
    index_of_3 = (set(rows_of_size_5.index) - {signal_array[5]} - {signal_array[2]}).pop()
    signal_array[3] = index_of_3
    
    return signal_array

In [7]:
def decode_digits(digits):
    values = []
    for digit in digits:
        digit_size = len(digit)
        rows_of_size_x = signals[signals.str.len() == digit_size]
        row_index = int(rows_of_size_x[rows_of_size_x.str.contains(regex_from_string(digit))].index.values)
        value = np.where(signal_array == row_index)[0][0]
        values.append(str(value))
    return values

In [8]:
sum_of_values = 0
for i, line in df.iterrows():
    signals = line[0:10]
    digits = list(line[11:15])
    signal_array = generate_signal_array(signals)
    sum_of_values += int((''.join(decode_digits(digits))))
sum_of_values

1028926