In [1]:
from functools import reduce

In [2]:
testdata = """mxmxvkd kfcds sqjhc nhms (contains dairy, fish)
trh fvjkl sbzzf mxmxvkd (contains dairy)
sqjhc fvjkl (contains soy)
sqjhc mxmxvkd sbzzf (contains fish)""".splitlines()

with open('input', 'r') as inp:
    inputdata = [line.strip() for line in inp.readlines()]

In [3]:
def parse(lines):
    contents = []
    for line in lines:
        ingredients, allergens = line[:-1].split(' (contains ')
        contents.append((set(ingredients.split()), set(allergens.split(', '))))
    return contents

In [4]:
def munge(contents):
    allergen_candidates = dict()
    for ingredients, allergens in contents:
        for allergen in allergens:
            if allergen not in allergen_candidates:
                allergen_candidates[allergen] = [ingredients]
            else:
                allergen_candidates[allergen].append(ingredients)
    return allergen_candidates

In [5]:
def reduce_candidates(allergen_present_ins):
    allergen_present_in = dict()
    for allergen, candidatess in allergen_present_ins.items():
        allergen_present_in[allergen] = reduce(lambda a, b: a & b, candidatess)
        print(f'allergen: {allergen}, candidates: {allergen_present_in[allergen]}')
    return allergen_present_in

In [6]:
def eliminate_candidate(allergen_present_in, eliminated):
    for allergen, candidates in allergen_present_in.items():
        if eliminated in candidates:
            allergen_present_in[allergen].remove(eliminated)

In [7]:
def deduce_candidates(allergen_present_in):
    eliminated = dict()
    for allergen, candidates in allergen_present_in.copy().items():
        if len(candidates) == 1:
            eliminated[allergen] = candidates.pop()
            eliminate_candidate(allergen_present_in, eliminated[allergen])
            allergen_present_in.pop(allergen)
            print(f'Deduced that {eliminated[allergen]} must be {allergen}!')
    return eliminated

In [8]:
def count_unknown(contents, known):
    counter = 0
    known_ingredients = set(known.values())
    for ingredients, _ in contents:
        counter += len(ingredients - known_ingredients)
    return counter

In [9]:
contents = parse(inputdata)
allergen_present_ins = munge(contents)
allergen_present_in = reduce_candidates(allergen_present_ins)
ingredients = dict()
finished = False
while not finished:
    eliminated = deduce_candidates(allergen_present_in)
    if len(eliminated) == 0:
        finished = True
    else:
        ingredients.update(eliminated)
unnkown_occurences = count_unknown(contents, ingredients)
print(unnkown_occurences)
','.join([ingrendient for allergen, ingrendient in sorted(ingredients.items(), key = lambda item: item[0])])

allergen: shellfish, candidates: {'xxscc', 'gzxnc', 'gbcjqbm'}
allergen: peanuts, candidates: {'vvqj', 'trnnvn', 'gbcjqbm'}
allergen: wheat, candidates: {'nckqzsg'}
allergen: nuts, candidates: {'xxscc', 'gzxnc', 'nckqzsg'}
allergen: fish, candidates: {'xxscc', 'mjmqst', 'nckqzsg'}
allergen: eggs, candidates: {'xxscc', 'nckqzsg'}
allergen: soy, candidates: {'gzxnc', 'dllbjr', 'nckqzsg'}
allergen: sesame, candidates: {'gzxnc', 'trnnvn', 'nckqzsg', 'gbcjqbm'}
Deduced that nckqzsg must be wheat!
Deduced that xxscc must be eggs!
Deduced that gzxnc must be nuts!
Deduced that mjmqst must be fish!
Deduced that dllbjr must be soy!
Deduced that gbcjqbm must be shellfish!
Deduced that trnnvn must be sesame!
Deduced that vvqj must be peanuts!
1958


'xxscc,mjmqst,gzxnc,vvqj,trnnvn,gbcjqbm,dllbjr,nckqzsg'