In [1]:
import numpy as np
import pandas as pd
import re

## READ THE DATA

In [2]:
## Function I totally came up with myself to split the input in three blocks for separate parsing
def SplitFile(fileLines):
    g = []
    for line in fileLines:
        if line == '':
            yield g
            g = []
        else:
            g.append(line)
    yield g

In [3]:
fileName = 'input.txt'
fileLines = open(fileName).read().splitlines()

fieldRequirements, myTicket, otherTickets = list(SplitFile(fileLines))

In [4]:
## Process field requirements in a dictionary
dicFieldRequirements = {}

for line in fieldRequirements:
    field, condition = re.split(': ', line)
    dicFieldRequirements[field] = condition

print(dicFieldRequirements)

{'departure location': '40-152 or 161-969', 'departure station': '39-838 or 845-971', 'departure platform': '39-209 or 217-970', 'departure track': '47-76 or 82-955', 'departure date': '41-167 or 178-949', 'departure time': '25-652 or 660-953', 'arrival location': '36-798 or 810-964', 'arrival station': '30-688 or 702-973', 'arrival platform': '44-248 or 268-969', 'arrival track': '45-536 or 552-956', 'class': '29-751 or 760-951', 'duration': '40-912 or 934-971', 'price': '44-896 or 911-965', 'route': '32-582 or 590-953', 'row': '46-269 or 282-971', 'seat': '49-114 or 134-971', 'train': '37-395 or 401-969', 'type': '43-180 or 206-960', 'wagon': '41-462 or 480-953', 'zone': '35-411 or 427-960'}


In [5]:
## Leave my ticket alone (for now)
print(myTicket)

['your ticket:', '139,109,61,149,101,89,103,53,107,59,73,151,71,67,97,113,83,163,137,167']


In [6]:
## Process other tickets in a data frame
otherTicketsData = pd.DataFrame([[number for number in ticket.split(sep=',')] for ticket in otherTickets[1:]])
otherTicketsData.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,660,948,772,679,610,713,875,887,750,335,895,553,774,802,797,336,823,566,861,599
1,428,760,595,55,490,877,63,570,742,735,830,854,566,989,292,748,445,876,677,329
2,396,92,686,894,353,668,666,794,743,142,604,344,437,440,352,680,439,431,636,385
3,319,322,318,301,736,485,727,437,679,486,129,516,488,643,560,880,384,846,609,644
4,851,865,837,744,150,624,140,569,627,733,607,352,221,630,731,212,331,713,674,635


## PART 1

In [7]:
## Yield True if value satisfies the condition, False if it doesn't. Pretty straightforward
def ProcessCondition(condition, value):
    value = int(value)
    condition1, condition2 = condition.split(sep=' or ')
    condition1 = [int(x) for x in condition1.split('-')]
    condition2 = [int(x) for x in condition2.split('-')]
    
    processedCondition = ((value >= condition1[0]) and (value <= condition1[1])) or ((value >= condition2[0]) and (value <= condition2[1]))
    return processedCondition

In [8]:
## Cycle through all tickets, count the invalid ones
invalidValues = 0

for row in otherTicketsData.index.values:
    for column in otherTicketsData.columns.values:
        number = int(otherTicketsData.loc[row, column])
        numberCondition = False
        for field in dicFieldRequirements:
            numberCondition += ProcessCondition(dicFieldRequirements[field], number)
        
        if not numberCondition:
            invalidValues += number

print("The sum of all invalid values in the other tickets is", invalidValues)

The sum of all invalid values in the other tickets is 20058


## PART 2

In [9]:
## Create new data frame with just valid tickets
validTicketsData = pd.DataFrame(columns=otherTicketsData.columns.values)

for row in otherTicketsData.index.values:
    rowIsValid = True
    
    for column in otherTicketsData.columns.values:
        number = int(otherTicketsData.loc[row, column])
        numberCondition = False
        for field in dicFieldRequirements:
            numberCondition += ProcessCondition(dicFieldRequirements[field], number)
        
        if not numberCondition:
            rowIsValid = False
            break
    
    if rowIsValid:
        validTicketsData = validTicketsData.append(otherTicketsData.loc[row,:], ignore_index=True)

validTicketsData.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,660,948,772,679,610,713,875,887,750,335,895,553,774,802,797,336,823,566,861,599
1,396,92,686,894,353,668,666,794,743,142,604,344,437,440,352,680,439,431,636,385
2,319,322,318,301,736,485,727,437,679,486,129,516,488,643,560,880,384,846,609,644
3,851,865,837,744,150,624,140,569,627,733,607,352,221,630,731,212,331,713,674,635
4,690,681,593,411,865,494,673,814,482,707,626,636,384,661,857,514,896,613,526,941


In [10]:
## By process of elimination, we'll sort out which field matches which index

## index -> field dictionary
dicFieldColumns = {}
## Dictionary of missing fields, keeps track of which fields we already found
dicUnknownFields = dicFieldRequirements.copy()

while dicUnknownFields:
    ## More than one iteration may be necessary, but each one should yield at least one recognized field
    ## This lets us know if the algorithm gets stuck
    print("Iterating while cycle: missing", len(dicUnknownFields), "unknown fields...")
    
    ## Cycle on all columns
    for column in validTicketsData.columns.values:
        
        ## For each column, define a list of fields compatible with its values
        validFields = []
        
        ## Cycle through all fields not assigned to any other column
        for field in dicUnknownFields:
            fieldIsCompatible = True
            
            ## Cycle through all values in the column
            for row in validTicketsData.index.values:
                value = validTicketsData.loc[row,column]
                requirement = dicFieldRequirements[field]
                
                ## Basic boolean algebra
                fieldIsCompatible *= ProcessCondition(requirement,value)
            
            ## If all values in the column are compatible with the field, we include it in the list
            if fieldIsCompatible:
                validFields.append(field)
        
        ## If there is a single compatible field, it must be the right one... Surely?
        if len(list(validFields)) == 1:
            dicFieldColumns[column] = str(validFields[0])
            del dicUnknownFields[str(validFields[0])]

print('\n')
print(dicFieldColumns)

Iterating while cycle: missing 20 unknown fields...
Iterating while cycle: missing 18 unknown fields...
Iterating while cycle: missing 17 unknown fields...
Iterating while cycle: missing 15 unknown fields...
Iterating while cycle: missing 13 unknown fields...
Iterating while cycle: missing 12 unknown fields...
Iterating while cycle: missing 7 unknown fields...
Iterating while cycle: missing 6 unknown fields...
Iterating while cycle: missing 4 unknown fields...
Iterating while cycle: missing 2 unknown fields...


{13: 'type', 15: 'arrival platform', 14: 'duration', 3: 'route', 9: 'departure platform', 2: 'departure station', 18: 'departure date', 7: 'departure location', 0: 'departure time', 4: 'departure track', 5: 'arrival track', 8: 'price', 12: 'zone', 10: 'row', 6: 'arrival location', 19: 'wagon', 1: 'arrival station', 16: 'class', 11: 'train', 17: 'seat'}


In [11]:
## Create data frame for our ticket, finally. A bit overkill, but it gets the job done
decodedColumns = [dicFieldColumns[index] for index in validTicketsData.columns.values]
myTicketData = pd.DataFrame([[int(number) for number in myTicket[1].split(sep=',')]], columns=decodedColumns)

prodValue = myTicketData.filter(regex='^departure').values.prod()
print("The product of values of fields starting with the word 'departure' on my ticket is", prodValue, ".")

The product of values of fields starting with the word 'departure' on my ticket is 366871907221 .
