In [81]:
import pandas as pd
import numpy as np
from pathlib import Path
from functools import reduce, partial

def read_data():
    data = Path('train_tickets.txt').read_text()
    sections = data.split("\n\n")
    
    ticket_fields = {}
    for line in sections[0].splitlines():
        field, ranges = line.split(":")
        ticket_fields[field] = []
        ranges = ranges.split(" or ")
        for r in ranges:
            low, high = r.split("-")
            ticket_fields[field].append((int(low),int(high)))
            
    my_ticket = [
        int(i) for i in sections[1].splitlines()[1].split(",")
    ]
    
    other_tickets = []
    for line in sections[2].splitlines()[1:]:
        other_tickets.append([
            int(i) for i in line.split(",") ])
            
    return ticket_fields, my_ticket, other_tickets

def flag_invalid(value, ranges):
    """If value is in one of the input ranges, return 0 (no flag as invalid)
    Else, return value (to add to the sum asked for by problem 1)
    """
    for r in ranges:
        if r[0] <= value <= r[1]:
            return 0
    return value

In [82]:
ticket_fields, my_ticket, other_tickets = read_data()
num_fields = len(ticket_fields)

all_ranges = []
for items in ticket_fields.values():
    all_ranges.extend(items)
    
ticket_arr = np.array(other_tickets)
invalid_fields = np.vectorize(partial(flag_invalid, ranges=all_ranges))(ticket_arr)
invalid_fields.sum()

26009

# Problem 2

Figure out which part of the ticket goes with which field and
multiply the departure fields together. 

In [83]:
# Turn invalid_fields from problem 1 into a row-mask of valid tickets
row_mask = np.sum(invalid_fields, axis=1) == 0
valid_ticket_arr = ticket_arr[row_mask, :]

In [96]:
# For each field, see which columns are valid using same function, but not merging all the ranges together
field_to_columns = {}
for field in ticket_fields.keys():
    invalid_fields = np.vectorize(partial(flag_invalid, ranges=ticket_fields[field]))(valid_ticket_arr)
    # true if the column could be the field (nonzero is invalid)
    field_to_columns[field] =  np.sum(invalid_fields, axis=0) == 0

possible_map_df = pd.DataFrame(field_to_columns)

solution = {}
for i in range(num_fields):
    row_sums = possible_map_df.apply(np.sum, axis=0)
    
    # find the column that only has one possible ticket index match
    # the value idxmin should = 1
    field = row_sums.idxmin()
    # the ticket index that matches is the one true (the max)
    # in a series of falses
    index = possible_map_df[field].idxmax()
    
    solution[field] = index
    
    # drop the ones we solved, then recompute the sums
    possible_map_df = possible_map_df.drop(field, axis='columns')
    possible_map_df = possible_map_df.drop(index, axis='index')

In [97]:
solution

{'type': 13,
 'seat': 17,
 'row': 15,
 'price': 16,
 'arrival location': 14,
 'arrival platform': 18,
 'arrival track': 2,
 'wagon': 6,
 'arrival station': 8,
 'zone': 1,
 'departure date': 3,
 'departure track': 12,
 'departure platform': 4,
 'departure station': 5,
 'departure location': 10,
 'departure time': 0,
 'train': 19,
 'class': 7,
 'duration': 11,
 'route': 9}

In [98]:
indexes = [solution[field] for field in solution.keys() if field.startswith('departure')]
values = [my_ticket[i] for i in indexes ] 
prod = reduce(lambda a,b:a*b, values, 1)
prod

589685618167