In [1]:
from transformers import pipeline

classifier = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-3", device=0, framework="pt")

In [36]:
candidate_labels = [
    "Base",
    "Fat",
    "Sweetener",
    "Binding Agent",
    "Leavener/Rising agent",
    "Flavorings",
    "Add-ins",
    "Seasoning",
    "Texture Enhancers",
    "Decorations/Toppings",
    "Liquid"
  ]

In [45]:
ingredients = [
    "Flour",
    "Milk",
    "Baking powder",
    "Baking Soda",
    "baking ammonia",
    "pearl ash",
    "potash",
    "yeast",
    "Yoghurt",
    "Lemon juice",
    "Grandfathrers ashes"
]
ingrednames = set(ingredients);

In [50]:
import json

In [51]:
with open("data/results/ingredients.json", encoding="utf8") as file: 

    ingredients = json.load(file)

ingrednames = set()

for ingred in ingredients:
    ingrednames.add(ingred['name'])
ingrednames


{'candy',
 'egg',
 'chicken and vegetable entree with noodles',
 'peppers',
 'vegetable mixture',
 'ranch dip',
 'beef and rice',
 'fruit juice blend',
 'brioche',
 'chicken fillet wrap sandwich',
 'ham sandwich or sub',
 'chicken or turkey chow mein or chop suey with noodles',
 'rob roy',
 'vegan mayonnaise',
 'veal scallopini',
 'duck sauce',
 'pig in a blanket',
 'aloe vera juice drink',
 'lime souffle',
 'stuffed jalapeno pepper',
 'apricot nectar',
 'ham sandwich on white',
 'yokan',
 'meat and corn hominy soup',
 'beans with meat',
 'cheese flavored corn snacks cheetos',
 'vienna sausages stewed with potatoes',
 'salt',
 'pineapple juice',
 'arepa dominicana',
 'barbecue sauce',
 'snack bar',
 'chilaquiles',
 'dark green vegetables as ingredient in omelet',
 'seaweed soup',
 'seaweed',
 'soy chips',
 'shrimp',
 'taco or tostada salad',
 'cereal',
 'rum and diet cola',
 'ham stroganoff',
 'chicken or turkey and vegetables excluding carrots',
 'spaghetti sauce with meat',
 'butter 

In [52]:
len(ingrednames)

1948

In [53]:
classified = classifier(
    list(ingrednames),
    candidate_labels=candidate_labels,
)



In [54]:
full_ingredients = {}

cutoffPoint = 0.125

for classi in classified:
    assignedClasses = []
    for i in range(len(classi["labels"])):
        label = classi["labels"][i]
        probability = classi["scores"][i]
        if(i == 0 or probability > cutoffPoint):
            assignedClasses.append(label)
    full_ingredients[classi["sequence"]] = {"cookiecat" : assignedClasses}

full_ingredients

{'candy': {'cookiecat': ['Sweetener', 'Decorations/Toppings']},
 'egg': {'cookiecat': ['Add-ins',
   'Leavener/Rising agent',
   'Decorations/Toppings']},
 'chicken and vegetable entree with noodles': {'cookiecat': ['Add-ins',
   'Texture Enhancers']},
 'peppers': {'cookiecat': ['Flavorings', 'Decorations/Toppings']},
 'vegetable mixture': {'cookiecat': ['Add-ins', 'Decorations/Toppings']},
 'ranch dip': {'cookiecat': ['Decorations/Toppings']},
 'beef and rice': {'cookiecat': ['Add-ins', 'Texture Enhancers', 'Fat']},
 'fruit juice blend': {'cookiecat': ['Liquid', 'Add-ins']},
 'brioche': {'cookiecat': ['Base', 'Decorations/Toppings']},
 'chicken fillet wrap sandwich': {'cookiecat': ['Add-ins',
   'Texture Enhancers']},
 'ham sandwich or sub': {'cookiecat': ['Fat', 'Add-ins', 'Texture Enhancers']},
 'chicken or turkey chow mein or chop suey with noodles': {'cookiecat': ['Add-ins']},
 'rob roy': {'cookiecat': ['Decorations/Toppings', 'Add-ins']},
 'vegan mayonnaise': {'cookiecat': ['Add-

In [55]:
for i in range(len(ingredients)):
    ingred = ingredients[i]
    if ingred["name"] in full_ingredients:
        ingredients[i]["bakingrole"] = full_ingredients[ingred['name']]['cookiecat']
    else:
        print(ingred["name"] + " not found?")

ingredients

[{'name': 'hummus',
  'tags': ['commercial'],
  'nutrients': [{'name': 'Cryptoxanthin, beta', 'amount': 3, 'unit': 'µg'},
   {'name': 'Tocopherol, delta', 'amount': 1.3, 'unit': 'mg'},
   {'name': 'Thiamin', 'amount': 0.15, 'unit': 'mg'},
   {'name': 'Riboflavin', 'amount': 0.115, 'unit': 'mg'},
   {'name': 'Folate, total', 'amount': 36, 'unit': 'µg'},
   {'name': 'Vitamin K (phylloquinone)', 'amount': 17.2, 'unit': 'µg'},
   {'name': 'Fatty acids, total trans', 'amount': 0.018, 'unit': 'g'},
   {'name': 'Fatty acids, total saturated', 'amount': 2.22, 'unit': 'g'},
   {'name': 'SFA 14:0', 'amount': 0.009, 'unit': 'g'},
   {'name': 'SFA 22:0', 'amount': 0.044, 'unit': 'g'},
   {'name': 'SFA 17:0', 'amount': 0.01, 'unit': 'g'},
   {'name': 'SFA 24:0', 'amount': 0.027, 'unit': 'g'},
   {'name': 'MUFA 24:1 c', 'amount': 0.005, 'unit': 'g'},
   {'name': 'MUFA 18:1 c', 'amount': 6.25, 'unit': 'g'},
   {'name': 'PUFA 18:2 n-6 c,c', 'amount': 6.81, 'unit': 'g'},
   {'name': 'MUFA 22:1 c', 'amo

In [56]:
with open("data/results/ingredients_roles2.json","w", encoding="utf8") as file: 

    ingredients = json.dump(ingredients, file)