In [55]:
from transformers import pipeline

# classifier = pipeline("zero-shot-classification", device=0, framework="pt")
classifier = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-3", device=0, framework="pt")

In [56]:
candidate_labels = [
    "Base/Flour",
    "Fat/Oil/Butter",
    "Sweetener",
    "Binder/Egg/Fruit/Yoghurt",
    "Leavener/Rising/Soda",
    "Flavorings",
    "Add-ins",
    "Seasoning",
    "Texture Enhancers",
    "Decorations/Toppings",
    "Liquid"
  ]

In [57]:
ingredients = [
    "Flour",
    "Butter",
    "Sugar",
    "Egg",
    "Baking Soda",
    "Vanilla extract",
    "Chocolate chips",
    "Salt",
    "Oats",
    "Sprinkles",
    "Milk",
    "Grandfathrers ashes"
]
ingrednames = set(ingredients);

In [58]:
import json

In [59]:
with open("data/results/ingredients.json", encoding="utf8") as file: 

    ingredients = json.load(file)

ingrednames = set()

for ingred in ingredients:
    ingrednames.add(ingred['name'])
ingrednames


{'black beans and white rice',
 'lasagna',
 'banana pudding',
 'beef jerky',
 'roll with meat and/or shrimp',
 'long rice noodles',
 'sausage english muffin sandwich',
 "cereal or granola bar kellogg's nutri-grain cereal bar",
 'bitter melon',
 'bread stuffing',
 'chicken or turkey tetrazzini',
 'biscuit',
 'spinach quiche',
 'chicken or turkey and corn hominy soup',
 'goose egg',
 'pork and vegetables',
 'blue or roquefort cheese dressing',
 'singapore sling',
 'egg',
 'black beans with meat',
 'turrnip greens',
 'multiple meat sandwich on white',
 'chili hot dog',
 'beans and franks',
 'cucumber salad made with cucumber and vinegar',
 'marie biscuit',
 'wheat germ oil',
 'mock chicken legs',
 'chicken fricassee',
 'quiche with meat',
 'chicken wing',
 'sesame seeds',
 'sugar substitute',
 'meat loaf made with beef and pork',
 'veal with gravy',
 'pork and vegetables excluding carrots',
 'ham and rice with mushroom sauce',
 'rum and cola',
 'cobbler',
 'watercress',
 'baby toddler cer

In [60]:
len(ingrednames)

1948

In [61]:
classified = classifier(
    list(ingrednames),
    candidate_labels=candidate_labels,
)

In [62]:
full_ingredients = {}

cutoffPoint = 0.125

for classi in classified:
    assignedClasses = []
    for i in range(len(classi["labels"])):
        label = classi["labels"][i]
        probability = classi["scores"][i]
        if(i == 0 or probability > cutoffPoint):
            assignedClasses.append(label)
    full_ingredients[classi["sequence"]] = {"cookiecat" : assignedClasses}

full_ingredients

{'black beans and white rice': {'cookiecat': ['Add-ins',
   'Texture Enhancers',
   'Liquid']},
 'lasagna': {'cookiecat': ['Decorations/Toppings', 'Liquid']},
 'banana pudding': {'cookiecat': ['Sweetener', 'Decorations/Toppings']},
 'beef jerky': {'cookiecat': ['Add-ins', 'Texture Enhancers']},
 'roll with meat and/or shrimp': {'cookiecat': ['Add-ins',
   'Texture Enhancers']},
 'long rice noodles': {'cookiecat': ['Texture Enhancers',
   'Add-ins',
   'Decorations/Toppings']},
 'sausage english muffin sandwich': {'cookiecat': ['Add-ins',
   'Texture Enhancers']},
 "cereal or granola bar kellogg's nutri-grain cereal bar": {'cookiecat': ['Add-ins',
   'Texture Enhancers']},
 'bitter melon': {'cookiecat': ['Flavorings',
   'Sweetener',
   'Decorations/Toppings']},
 'bread stuffing': {'cookiecat': ['Texture Enhancers',
   'Add-ins',
   'Base/Flour']},
 'chicken or turkey tetrazzini': {'cookiecat': ['Add-ins',
   'Flavorings',
   'Decorations/Toppings']},
 'biscuit': {'cookiecat': ['Base/Fl

In [63]:
for i in range(len(ingredients)):
    ingred = ingredients[i]
    if ingred["name"] in full_ingredients:
        ingredients[i]["bakingrole"] = full_ingredients[ingred['name']]['cookiecat']
    else:
        print(ingred["name"] + " not found?")

ingredients

[{'name': 'hummus',
  'tags': ['commercial'],
  'nutrients': [{'name': 'Cryptoxanthin, beta', 'amount': 3, 'unit': 'µg'},
   {'name': 'Tocopherol, delta', 'amount': 1.3, 'unit': 'mg'},
   {'name': 'Thiamin', 'amount': 0.15, 'unit': 'mg'},
   {'name': 'Riboflavin', 'amount': 0.115, 'unit': 'mg'},
   {'name': 'Folate, total', 'amount': 36, 'unit': 'µg'},
   {'name': 'Vitamin K (phylloquinone)', 'amount': 17.2, 'unit': 'µg'},
   {'name': 'Fatty acids, total trans', 'amount': 0.018, 'unit': 'g'},
   {'name': 'Fatty acids, total saturated', 'amount': 2.22, 'unit': 'g'},
   {'name': 'SFA 14:0', 'amount': 0.009, 'unit': 'g'},
   {'name': 'SFA 22:0', 'amount': 0.044, 'unit': 'g'},
   {'name': 'SFA 17:0', 'amount': 0.01, 'unit': 'g'},
   {'name': 'SFA 24:0', 'amount': 0.027, 'unit': 'g'},
   {'name': 'MUFA 24:1 c', 'amount': 0.005, 'unit': 'g'},
   {'name': 'MUFA 18:1 c', 'amount': 6.25, 'unit': 'g'},
   {'name': 'PUFA 18:2 n-6 c,c', 'amount': 6.81, 'unit': 'g'},
   {'name': 'MUFA 22:1 c', 'amo

In [64]:
with open("data/results/ingredients_roles3.json","w", encoding="utf8") as file: 

    ingredients = json.dump(ingredients, file)