In [1]:
import re
import itertools
from collections import defaultdict, deque
import numpy as np

In [2]:
test_input = '''mxmxvkd kfcds sqjhc nhms (contains dairy, fish)
trh fvjkl sbzzf mxmxvkd (contains dairy)
sqjhc fvjkl (contains soy)
sqjhc mxmxvkd sbzzf (contains fish)'''

In [3]:
def parse_input(inp):
    food_info = []

    for l in inp.split('\n'):
        m = re.match("(.*)\s\(contains\s(.*)\)", l)
        ingredients = m.group(1).split(' ')
        allergens = m.group(2).split(', ')

        food_info.append((set(ingredients), set(allergens)))
        
    all_ingredients = set(itertools.chain.from_iterable([x[0] for x in food_info]))
    all_allergens = set(itertools.chain.from_iterable([x[1] for x in food_info]))
    
    return food_info, all_ingredients, all_allergens

In [4]:
food_info, all_ingredients, all_allergens = parse_input(test_input)

In [5]:
food_info, all_ingredients, all_allergens

([({'kfcds', 'mxmxvkd', 'nhms', 'sqjhc'}, {'dairy', 'fish'}),
  ({'fvjkl', 'mxmxvkd', 'sbzzf', 'trh'}, {'dairy'}),
  ({'fvjkl', 'sqjhc'}, {'soy'}),
  ({'mxmxvkd', 'sbzzf', 'sqjhc'}, {'fish'})],
 {'fvjkl', 'kfcds', 'mxmxvkd', 'nhms', 'sbzzf', 'sqjhc', 'trh'},
 {'dairy', 'fish', 'soy'})

In [57]:
def solve(inp):
    foods, all_ingredients, all_allergens = parse_input(inp)

    allergen_to_possible_ingr = {}
    for allergen in all_allergens:
        possible_ing = all_ingredients.copy()
        for food_ingredients, confirmed_food_allergens in foods:
            if allergen in confirmed_food_allergens:
                possible_ing &= food_ingredients
        print(allergen, possible_ing)
        allergen_to_possible_ingr[allergen] = possible_ing
        
    could_have_allergens = set(itertools.chain.from_iterable(allergen_to_possible_ingr.values()))
    
    print()
    safe = all_ingredients - could_have_allergens
    print(could_have_allergens)
    print(safe)
    
    s = 0
    for food_ingredients, _ in foods:
        s += len(safe & food_ingredients)
    print(s)
    
    
    taken = set()
    ascending = sorted(allergen_to_possible_ingr.items(), key=lambda x: len(x[1]))
    
    allergen_to_ingredient = {}
    
    for allergen, ingredients in ascending:
        left = ingredients - taken
        print(f"on {allergen}. taken: {taken}. possible ingredients: {ingredients}. left: {left}")
        ing = left.pop()
        taken.add(ing)
        allergen_to_ingredient[allergen] = ing
    
    print(','.join([i for a, i in sorted(allergen_to_ingredient.items(), key=lambda x: x[0])]))

In [58]:
solve(test_input)

fish {'mxmxvkd', 'sqjhc'}
dairy {'mxmxvkd'}
soy {'fvjkl', 'sqjhc'}

{'mxmxvkd', 'fvjkl', 'sqjhc'}
{'nhms', 'trh', 'kfcds', 'sbzzf'}
5
on dairy. taken: set(). possible ingredients: {'mxmxvkd'}. left: {'mxmxvkd'}
on fish. taken: {'mxmxvkd'}. possible ingredients: {'mxmxvkd', 'sqjhc'}. left: {'sqjhc'}
on soy. taken: {'mxmxvkd', 'sqjhc'}. possible ingredients: {'fvjkl', 'sqjhc'}. left: {'fvjkl'}
mxmxvkd,sqjhc,fvjkl


In [59]:
solve(open('./inputs/21').read())

wheat {'lsgqf', 'lzvh', 'phc', 'fqqcnm'}
soy {'phc', 'rjc'}
eggs {'zmsdzh', 'spnd', 'phc', 'rjc'}
nuts {'pdt', 'phc'}
dairy {'phc'}
sesame {'phc', 'lsgqf', 'spnd', 'fqqcnm', 'zmsdzh', 'rjc'}
peanuts {'pdt', 'fqqcnm'}
fish {'zmsdzh', 'phc', 'fqqcnm'}

{'pdt', 'phc', 'lzvh', 'lsgqf', 'spnd', 'fqqcnm', 'zmsdzh', 'rjc'}
{'qpbl', 'mxq', 'rntk', 'zgvtn', 'sxsxm', 'gvdstsc', 'cldgd', 'flndv', 'nkzqj', 'nggbtk', 'chgjqc', 'fgcd', 'ksrjn', 'ggrc', 'lkgln', 'mgv', 'djdhrn', 'njdb', 'hbbgk', 'dchvb', 'ssdszsn', 'lcrs', 'kdm', 'strpjp', 'pdhlzg', 'bfnnnrn', 'xbvrx', 'gcpks', 'jbtlfv', 'tcclbr', 'hcjkd', 'hddd', 'mxzb', 'rxd', 'tktj', 'czvphx', 'hjzkg', 'prbk', 'snzxr', 'xvjk', 'hqd', 'rzcps', 'qzjrtl', 'rms', 'jhbnm', 'vhjpjdr', 'jp', 'dkg', 'gbhjv', 'xtgjslz', 'rbjmdn', 'ktnlk', 'jqhn', 'fslqkg', 'czkfv', 'rdkrtr', 'qrpzt', 'fgsr', 'jvhsj', 'bxqpgx', 'bkzbrm', 'hbmvpmt', 'jgbk', 'lpvfv', 'njqrhcc', 'rdjdq', 'pmhhqrk', 'cncpbssj', 'jqzklv', 'fnlk', 'xdcp', 'jfrlp', 'rsr', 'bvlb', 'cqqcd', 'jhv', '