# Automated Metrics

In this notebook we compute the following metrics on our dataset of generated recipes.

- Percentage of ingredients correctly used
- Number of extraneous ingredients introduced
- BLEU-4



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import pathlib
import pandas as pd
import numpy as np
import spacy
import re

from collections import namedtuple
from tqdm import tqdm

tqdm.pandas()

  from pandas import Panel


In [None]:
CACHE_DIR = "./drive/Shared drives/Capstone/tmp"
pathlib.Path(CACHE_DIR).mkdir(exist_ok=True)
dataset_path = os.path.join(CACHE_DIR, 'emoji_text_recipes.pkl')

In [None]:
if not os.path.exists(dataset_path):
    raise SystemExit("Run preprocess_rnn_word.ipynb to generate data file before continuing")
else:
    recipes = pd.read_pickle(dataset_path)

In [None]:
recipes

0         🍴 Slow Cooker Chicken and Dumplings\n\n🥑\n• 4 ...
1         🍴 Awesome Slow Cooker Pot Roast\n\n🥑\n• 2 (10....
2         🍴 Brown Sugar Meatloaf\n\n🥑\n• 1/2 cup packed ...
3         🍴 Best Chocolate Chip Cookies\n\n🥑\n• 1 cup bu...
4         🍴 Homemade Mac and Cheese Casserole\n\n🥑\n• 8 ...
                                ...                        
125158    🍴 Cream Horns\n\n🥑\n• 1 sheet frozen puff past...
125159    🍴 Summer Corn Salad\n\n🥑\n• 4 ears fresh corn\...
125160    🍴 Zucchini Stuffed Tomatoes\n\n🥑\n• 4 large pl...
125162    🍴 Chocolate Cake with Armagnac Ice Cream\n\n🥑\...
125163    🍴 Crabby Bisque\n\n🥑\n• 3 (10.5-ounce) cans re...
Length: 105789, dtype: object

## The recipe file paths.

In [None]:
GPT2_recipe_dir = os.path.join(CACHE_DIR, 'gpt2_105791_title_prompt_output_recipes')
GPT2_recipe_path_str = 'gpt2_recipe_'

GRU_char_dir = os.path.join(CACHE_DIR, 'rnn_char_title_prompt_output_recipes')
GRU_char_path_str = 'rnn_char_recipe_'

GRU_word_dir = os.path.join(CACHE_DIR, 'rnn_word_punct_emoji_online_title_prompt_output_recipes')
GRU_word_path_str = 'rnn_word_recipe_'

checklist_dir = os.path.join(CACHE_DIR, 'checklist_53_rnn_prompt_recipes')
checklist_path_str = 'checklist_rnn_prompt_recipe_'

## Load the IngrProc ingredient extraction system



In [None]:
class IngredientProcessor(object):
  def __init__(self, use_cache=True):
    self.MEASURES = set(["tbsp", "tablespoon", "tablespoons",
            "tsp", "teaspoon", "teaspoons",
            "fl", "oz", "ounce", "ounces",
            "lb", "pound", "pounds",
            "cm", "centimeter", "centimeters", "centimetre", "centimetres",
            "inch", "inches",
            "can", "cans",
            "cup", "cups",
            "pint", "pints",
            "quart", "quarts",
            "gallon", "gallons",
            "ml", "milliliter", "millilitre", "milliliters", "millilitres",
            "l", "liter", "litre", "liters", "litres",
            "pkg", "package", "packages",
            "piece", "pieces",
            "slice", "slices",
            "small", "medium", "large",
            # "sliced", "diced", "minced", "chopped", "deveined",
            "extra"])
    self.CONTEXT_NEEDERS = set(["extract", "root", "sauce", "cream",
            "broth", "soup", "soda", "oil", "puree", "powder",
            "mix", "roast", "paste",
            "chip", "chips",
            "bean", "beans",
            "pepper", "peppers"
            "casing", "casings"])
    self.ADJECTIVES = set(['canned', 'softened', 'diced', 'chopped', 'semisweet', 'thawed', 'frozen', 'minced', 'peeled', 'seeded', 'prepared', \
                  'melted', 'pitted', 'uncooked', 'cooked', 'squeezed', 'lean', 'boneless', 'ground', 'divided', 'refrigerated', \
                  'skinless', 'crushed', 'grated', 'trimmed', 'crushed', 'sifted', 'all-purpose', 'allpurpose', 'drained', 'mashed', \
                  'rinsed', 'shredded', 'hulled', 'dry', 'dried', 'deveined', 'packed', 'fresh', 'freshly', 'sliced', 'halved', \
                  'washed', 'sweetened', 'unsweetened', 'extract', 'vegetable', 'large', 'small', 'tiny', 'fine', 'finely', 'beaten',
                  'bite-sized', 'bitesized', 'skinned', 'optional', 'toasted', 'lightly', 'loosely', 'juiced', 'cut', 'necessary', 'desired',
                  'quartered', 'slightly', 'coarsely', 'thin', 'chilled', 'scrambled', 'soft', 'hard', 'short', 'medium', 'long'] \
+ ['white', 'red', 'orange', 'yellow', 'green', 'blue', 'brown', 'black'])
    self.OTHER_PROBLEM_WORDS = set(["room", "temperature", "deep", "frying", 'if', 'to', 'into', 'taste', 'more', 'each', 'about', 'removed'])
    self.ALL_PROBLEM_WORDS = self.MEASURES.union(self.ADJECTIVES, self.OTHER_PROBLEM_WORDS)
    self.nlp = spacy.load("en_core_web_sm", disable=["textcat", "ner", "entity_ruler", "sentencizer", "merge_noun_chunks", "merge_entities", "merge_subtokens"])
    self.use_cache = use_cache
    self.cache = {}

  def deduce_core_ingredient(self, ingr_phrase):
    '''
      ingr_phrase: str
      returns: str

      Attempts to deduce a "canonical form" of the ingredient
      contained in ingr_phrase.
    '''
    words_in_phrase = ingr_phrase.split(" ")
    ingredWord = ""
    if len(words_in_phrase) >= 2 and words_in_phrase[-1].lower() in self.CONTEXT_NEEDERS:
      ingredWord = " ".join(words_in_phrase[-2:])
      # For diagnostic purposes, tell if this has happened?
      # print("DCG: ", words_in_phrase, ingredWord)
    else:
      ingredWord = words_in_phrase[-1]
    return ingredWord
  
  def cleanup_ingredient(self, ingredient):
    '''
      ingredient: str
      returns: List[str]

      Cleans up ingredient string and returns a list of ingredients
      in canonical form.
    '''
    # First clean up by removing unnecessary information
    
    # Uses regex to remove parenthesised portions and numbers,
    # https://www.kite.com/python/answers/how-to-use-regular-expressions-to-remove-text-within-parentheses-in-python
    ingredient = re.sub(r"\([^()]*\)|[0-9]|/|\.|,|'|\"", "", ingredient).lower()

    # Filter out measure words, adjectives, filler words, and other problem words
    ingredient = " ".join(word for word in ingredient.split() if not word.lower() in self.ALL_PROBLEM_WORDS)
    
    ingredient = ingredient.strip()

    if self.use_cache:
      if ingredient in self.cache:
        return self.cache[ingredient]

    # Apply spacy to get the most important noun phrase (hopefully)
    ingr_doc = self.nlp(ingredient)
    noun_phrases = [chunk.text for chunk in ingr_doc.noun_chunks]

    # Try to deduce the ingredient
    num_phrases = len(noun_phrases)
    ingr_list = ""
    if num_phrases == 0:
      # Ooh, here's a toughie.
      # If the ingredient has a ' - ', take out everything after the ' - '.
      # Same with ' and ' and ' or '
      ingredient = ingredient.split(' - ')[0].split(' and ')[0].split(' or ')[0]
      ingr_list = [ingredient] if ingredient else [] # don't add empty-string ingredients
    else:
      ingr_list = [self.deduce_core_ingredient(phrase) for phrase in noun_phrases]

    ingr_list

    if self.use_cache:
      self.cache[ingredient] = ingr_list

    return ingr_list

In [None]:
def flatten(lists):
    return [item for sublist in lists for item in sublist]

ProcessedRecipe = namedtuple('ProcessedRecipe', ['title', 'ingredients', 'instructions'])

IngrProc = IngredientProcessor()

def process_recipe(recipe):
    _, title, ingredients, instructions = re.split("🍴|🥑|🥣", recipe)

    # Process title
    title = title.strip()

    # Process ingredients
    ingredients = ingredients.replace('\n', '').split("•")
    ingredients = [ingredient.strip() for ingredient in ingredients if len(ingredient) > 0]
    cleaned_up_ingrs = flatten([IngrProc.cleanup_ingredient(ingredient) for ingredient in ingredients])

    # Process instructions
    instructions = instructions.replace('\n', '').split("‣")
    cleaned_up_instrs = [instruction.strip() for instruction in instructions if len(instruction) > 0]

    return ProcessedRecipe(title, cleaned_up_ingrs, cleaned_up_instrs)

def extract_title(recipe):
    _, title, _, _ = re.split("🍴|🥑|🥣", recipe)
    title = title.strip()
    return title

def extract_cleaned_ingredients(recipe):
    _, _, ingredients, _ = re.split("🍴|🥑|🥣", recipe)
    ingredients = ingredients.replace('\n', '').split("•")
    ingredients = [ingredient.strip() for ingredient in ingredients if len(ingredient) > 0]
    cleaned_up_ingrs = flatten([IngrProc.cleanup_ingredient(ingredient) for ingredient in ingredients]) 
    return cleaned_up_ingrs

def extract_instructions(recipe):
    _, _, _, instructions = re.split("🍴|🥑|🥣", recipe)
    instructions = instructions.replace('\n', '')
    return instructions

def extract_cleaned_instructions(recipe):
    _, _, _, instructions = re.split("🍴|🥑|🥣", recipe)
    instructions = instructions.replace('\n', '').split("‣")
    cleaned_up_instrs = [instruction.strip() for instruction in instructions if len(instruction) > 0]
    return cleaned_up_instrs 

In [None]:
to_exclude = '#$&*/<=>@[\\]^_`{|}~\t'
to_tokenize = '.,:;!?"-+()%\'\n'

def separate_punct(text):
    return re.sub(r'(['+to_tokenize+'])', r' \1 ', text)

In [None]:
# Helper functions for evaluation
def has_numbers(inputString):
    return any(char.isdigit() for char in inputString)

def filter_words(f, ing):
    return ' '.join(filter(f, ing.split()))

def remove_symbols(ing):
    return ing.translate({ord(i): ' ' for i in ':;.(),-+\'\"'})

## Calculate percentage of ingredients in ingredient list used in instructions

In [None]:
def avg_pct_of_ingrs_used(recipe):
    ingrs = extract_cleaned_ingredients(recipe)
    instrs = extract_instructions(recipe).lower()
    instrs = remove_symbols(instrs).split(' ')
    instr_words = set(instrs)

    ingrs_used = 0
    for ingr in ingrs:
        ingr_words = ingr.split(' ')
        if any(word in instr_words for word in ingr_words):
            ingrs_used += 1
        #     print("used:", ingr)
        # else:
        #     print("NOT used:", ingr)
    
    return ingrs_used / len(ingrs)


avg_pct_of_ingrs_used(recipes[4])

0.8571428571428571

In [None]:
recipes[4]

'🍴 Homemade Mac and Cheese Casserole\n\n🥑\n• 8 ounces whole wheat rotini pasta\n• 3 cups fresh broccoli florets\n• 1 medium onion, chopped\n• 3 cloves garlic, minced\n• 4 tablespoons butter, divided\n• 2 tablespoons all-purpose flour\n• 1/4 teaspoon salt\n• 1/8 teaspoon ground black pepper\n• 2 1/2 cups milk\n• 8 ounces Cheddar cheese, shredded\n• 4 ounces reduced-fat cream cheese, cubed and softened\n• 1/2 cup fine dry Italian-seasoned bread crumbs\n• Reynolds Wrap® Non Stick Aluminum Foil\n\n🥣\n‣ Preheat oven to 350 degrees F. Line a 2-quart casserole dish with Reynolds Wrap(R) Pan Lining Paper, parchment side up. No need to grease dish.\n‣ Cook the pasta in a large saucepan according to the package directions, adding the broccoli for the last 3 minutes of cooking. Drain. Return to the saucepan and set aside.\n‣ Cook the onion and garlic in 2 tablespoons hot butter in a large skillet 5 to 7 minutes or until tender. Stir in flour, salt, and black pepper. Add the milk all at once. Cook

In [None]:
np.mean([avg_pct_of_ingrs_used(recipe) for recipe in tqdm(recipes[:1000])])

# average percentage of ingredients used for first 1000 ground truth recipes: 0.9190585939020428


  0%|          | 0/1000 [00:00<?, ?it/s][A
  6%|▌         | 57/1000 [00:00<00:02, 434.09it/s][A
  8%|▊         | 81/1000 [00:00<00:02, 318.52it/s][A
 13%|█▎        | 129/1000 [00:00<00:02, 296.56it/s][A
 15%|█▍        | 148/1000 [00:02<00:31, 27.28it/s] [A
 16%|█▌        | 161/1000 [00:03<00:40, 20.90it/s][A
 17%|█▋        | 171/1000 [00:04<00:43, 18.97it/s][A
 18%|█▊        | 179/1000 [00:04<00:45, 18.04it/s][A
 18%|█▊        | 185/1000 [00:05<00:52, 15.42it/s][A
 19%|█▉        | 190/1000 [00:05<01:04, 12.65it/s][A
 19%|█▉        | 194/1000 [00:06<01:08, 11.82it/s][A
 20%|█▉        | 197/1000 [00:06<01:14, 10.76it/s][A
 20%|██        | 200/1000 [00:06<01:01, 13.01it/s][A
 20%|██        | 203/1000 [00:06<00:57, 13.88it/s][A
 21%|██        | 206/1000 [00:06<00:53, 14.80it/s][A
 21%|██        | 209/1000 [00:07<00:58, 13.48it/s][A
 21%|██        | 211/1000 [00:07<01:09, 11.42it/s][A
 22%|██▏       | 216/1000 [00:07<00:55, 14.09it/s][A
 22%|██▏       | 218/1000 [00:07<00

0.9190585939020428

### Percentage of ingredients GPT-2 uses correctly

In [None]:
N = 500
sum_pcts = 0
num_well_formed = 0
for i in tqdm(range(N)):
    input_path = os.path.join(GPT2_recipe_dir, f"{GPT2_recipe_path_str}{i}.txt")
    with open(input_path, 'r') as f:
        contents = f.read()
    contents = ('🍴 ' + contents).replace('<ING>', ' 🥑 ').replace('<INS>', ' 🥣 ')
    if '🥑' not in contents:
        pos_to_insert = contents.find('•')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥑' + contents[pos_to_insert:]
    if '🥣' not in contents:
        pos_to_insert = contents.find('‣')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥣' + contents[pos_to_insert:]  
    try:
        pct = avg_pct_of_ingrs_used(contents)
        sum_pcts += pct
        num_well_formed += 1
    except Exception as e:
        continue

print(f"\nNumber of well-formed recipes: {num_well_formed}")
print(f"\nPercentage of ingredients GPT-2 uses correctly: {sum_pcts / num_well_formed}")


  0%|          | 0/500 [00:00<?, ?it/s][A
 16%|█▌        | 78/500 [00:00<00:00, 778.42it/s][A
 20%|██        | 102/500 [00:00<00:01, 326.47it/s][A
 35%|███▍      | 173/500 [00:00<00:00, 389.40it/s][A
 42%|████▏     | 209/500 [00:00<00:00, 363.74it/s][A
 53%|█████▎    | 265/500 [00:00<00:00, 405.46it/s][A
 67%|██████▋   | 333/500 [00:00<00:00, 458.40it/s][A
 80%|███████▉  | 398/500 [00:00<00:00, 502.15it/s][A
100%|██████████| 500/500 [00:01<00:00, 487.96it/s]


Number of well-formed recipes: 478

Percentage of ingredients GPT-2 uses correctly: 0.8922614703678113





### Percentage of ingredients GRU-Char uses correctly



In [None]:
N = 500
sum_pcts = 0
num_well_formed = 0
for i in tqdm(range(N)):
    input_path = os.path.join(GRU_char_dir, f"{GRU_char_path_str}{i}.txt")
    with open(input_path, 'r') as f:
        contents = f.read()
    contents = ('🍴 ' + contents)
    if '🥑' not in contents:
        pos_to_insert = contents.find('•')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥑' + contents[pos_to_insert:]
    if '🥣' not in contents:
        pos_to_insert = contents.find('‣')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥣' + contents[pos_to_insert:]    
    try:
        pct = avg_pct_of_ingrs_used(contents)
        sum_pcts += pct
        num_well_formed += 1
    except Exception as e:
        continue

print(f"\nNumber of well-formed recipes: {num_well_formed}")
try:
    print(f"\nPercentage of ingredients GRU-Char uses correctly: {sum_pcts / num_well_formed}")
except ZeroDivisionError:
    pass


  0%|          | 0/500 [00:00<?, ?it/s][A
  3%|▎         | 14/500 [00:00<00:09, 49.03it/s][A
  3%|▎         | 16/500 [00:00<00:56,  8.59it/s][A
  4%|▎         | 18/500 [00:01<01:37,  4.94it/s][A
  4%|▍         | 19/500 [00:02<02:06,  3.80it/s][A
  4%|▍         | 20/500 [00:02<02:14,  3.56it/s][A
  4%|▍         | 21/500 [00:02<02:31,  3.16it/s][A
  4%|▍         | 22/500 [00:03<02:53,  2.75it/s][A
  5%|▍         | 23/500 [00:03<03:23,  2.35it/s][A
  5%|▍         | 24/500 [00:04<03:11,  2.49it/s][A
  5%|▌         | 25/500 [00:04<03:05,  2.56it/s][A
  5%|▌         | 26/500 [00:04<02:53,  2.73it/s][A
  5%|▌         | 27/500 [00:05<02:38,  2.98it/s][A
  6%|▌         | 28/500 [00:05<02:57,  2.67it/s][A
  6%|▌         | 29/500 [00:05<02:39,  2.96it/s][A
  6%|▌         | 30/500 [00:06<02:34,  3.05it/s][A
  6%|▌         | 31/500 [00:06<02:22,  3.30it/s][A
  6%|▋         | 32/500 [00:06<02:21,  3.30it/s][A
  7%|▋         | 33/500 [00:07<02:14,  3.47it/s][A
  7%|▋         | 34/


Number of well-formed recipes: 306

Percentage of ingredients GRU-Char uses correctly: 0.3595520198441286





In [None]:
contents

'Johnnycakes Italian Sausage Shortcake Balls according to drippings use as needed\n• 1 tablespoon vegetable oil for frying\n\n🥣\n‣ Melt butter in a pan over medium heat. Brown chops on all sides. Pour in egg mixture until the mixture resembles coarse crumbs. Remove from heat, and serve hot.'

### Percentage of ingredients GRU-Word uses correctly

In [None]:
### Percentage of ingredients GRU-Word uses correctly

N = 500
sum_pcts = 0
num_well_formed = 0
for i in tqdm(range(N)):
    input_path = os.path.join(GRU_word_dir, f"{GRU_word_path_str}{i}.txt")
    with open(input_path, 'r') as f:
        contents = f.read()
    if '🥑' not in contents:
        pos_to_insert = contents.find('•')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥑' + contents[pos_to_insert:]
    if '🥣' not in contents:
        pos_to_insert = contents.find('‣')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥣' + contents[pos_to_insert:]    
    try:
        pct = avg_pct_of_ingrs_used(contents)
        sum_pcts += pct
        num_well_formed += 1
    except Exception as e:
        continue

print(f"\nNumber of well-formed recipes: {num_well_formed}")
try:
    print(f"\nPercentage of ingredients GRU-Word uses correctly: {sum_pcts / num_well_formed}")
except ZeroDivisionError:
    pass


  0%|          | 0/500 [00:00<?, ?it/s][A
  0%|          | 2/500 [00:00<01:37,  5.09it/s][A
  1%|          | 3/500 [00:00<01:46,  4.67it/s][A
  1%|          | 4/500 [00:00<01:54,  4.33it/s][A
  1%|          | 5/500 [00:01<02:20,  3.51it/s][A
  1%|          | 6/500 [00:01<02:37,  3.13it/s][A
  1%|▏         | 7/500 [00:02<02:32,  3.23it/s][A
  2%|▏         | 8/500 [00:02<02:32,  3.23it/s][A
  2%|▏         | 9/500 [00:02<03:06,  2.63it/s][A
  2%|▏         | 10/500 [00:03<02:57,  2.76it/s][A
  2%|▏         | 11/500 [00:03<02:37,  3.10it/s][A
  2%|▏         | 12/500 [00:03<02:33,  3.18it/s][A
  3%|▎         | 13/500 [00:03<02:24,  3.36it/s][A
  3%|▎         | 14/500 [00:04<02:54,  2.78it/s][A
  3%|▎         | 15/500 [00:04<02:49,  2.86it/s][A
  3%|▎         | 16/500 [00:05<02:58,  2.71it/s][A
  3%|▎         | 17/500 [00:05<02:39,  3.04it/s][A
  4%|▎         | 18/500 [00:05<02:35,  3.10it/s][A
  4%|▍         | 19/500 [00:06<02:28,  3.25it/s][A
  4%|▍         | 20/500 [00:


Number of well-formed recipes: 377

Percentage of ingredients GRU-Word uses correctly: 0.44971789708878174





In [None]:
contents

'🍴 Spicy Lamb Patties with Cauliflower-Almond Rice\n\n 🥑 \n • 1 4 cup dry bread crumbs ( such as hidden % m ranch® ) \n • 1 tablespoon brown sugar \n • 1 tablespoon garlic powder \n \n 🥣 \n ‣ preheat oven to 350 degrees f ( 175 degrees c ) . grease a 9x13-inch baking dish . \n ‣ mix together the ground beef , onion powder , and jalapeno pepper in a mixing bowl . add the garlic ; mix to coat . \n ‣ in a separate bowl , mix together the ground beef , onion , green bell pepper , mushrooms , tomatoes , chipotle peppers , italian seasoning , red pepper flakes , 1 4 cup cheddar cheese and 1 4 cup of cheddar cheese . in a large bowl , mix the pineapple , red pepper , onion powder , and ground beef mixture ; add the turkey broth and remaining broth . gently stir the sauce mixture with the onion mixture and mixture into the juices . \n ‣ meanwhile , in a large saucepan over medium heat , cook the beef in the skillet until golden brown , 5 to 5 minutes per side . stir in the cooked turkey broth 

### Percentage of ingredients that Checklist used correctly

In [None]:
### Percentage of ingredients GRU-Word uses correctly

N = 50
sum_pcts = 0
num_well_formed = 0
for i in tqdm(range(N)):
    input_path = os.path.join(checklist_dir, f"{checklist_path_str}{i}.txt")
    with open(input_path, 'r') as f:
        contents = f.read()
    if '🥑' not in contents:
        pos_to_insert = contents.find('•')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥑' + contents[pos_to_insert:]
    if '🥣' not in contents:
        pos_to_insert = contents.find('‣')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥣' + contents[pos_to_insert:]    
    try:
        pct = avg_pct_of_ingrs_used(contents)
        sum_pcts += pct
        num_well_formed += 1
    except Exception as e:
        continue

print(f"\nNumber of well-formed recipes: {num_well_formed}")
try:
    print(f"\nPercentage of ingredients Checklist uses correctly: {sum_pcts / num_well_formed}")
except ZeroDivisionError:
    pass

100%|██████████| 50/50 [00:00<00:00, 643.16it/s]


Number of well-formed recipes: 47

Percentage of ingredients Checklist uses correctly: 0.31621125290980057





## Calculate average number of ingredients extraneously introduced

In [None]:
ingr_word_set = set(value for values in IngrProc.cache.values() for value in values)

In [None]:
STOP_WORDS = {'and', 'or', 'the', 'a', 'an'}
PROBLEM_WORDS = {'loaf', 'loaves', 'pan', 'pans', 'pot', 'pots', 'wire', 'wires', 'rack', 'racks', 'cutter', 'cutters',
                 'mixture', 'mixtures', 'paper', 'papers', 'sheet', 'sheets', 'foil', 'foils', 'skillet', 'skillets',
                 'container', 'containers', 'roll', 'rolls', 'strip', 'strips', 'saucepan', 'saucepans',
                 'marinade', 'marinades', 'wedge', 'wedges', 'wrap', 'wraps', 'bag', 'bags', 'skewer', 'skewers',
                 'cap', 'caps', 'glass', 'glasses', 'cup', 'cups', 'surface', 'surfaces', 'topping', 'toppings',
                 'shell', 'shells', 'masher', 'mashers', 'liquid', 'liquids', 'machine', 'machines', 'blender', 'blenders',
                 'vegetable', 'vegetables', 'meat', 'meats', 'ingredient', 'ingredients', 'fruit', 'fruits',
                 'crust', 'crusts', 'water', 'blend', 'blends', 'filling', 'fillings',
                 'half', 'halves', 'quarter', 'quarters', 'third', 'thirds', 'double', 'triple',
                 'small', 'medium', 'large', 'size', 'sizes', 
                 'put', 'rub', 'cook', 'cooking', 'rinse', 'meal', 'meals', 'towel', 'towels'}

def num_extra_ingrs(recipe):
    ingrs = extract_cleaned_ingredients(recipe)
    ingr_words = set(word for ingr in ingrs for word in ingr.split(' ') if word not in STOP_WORDS)

    instrs = extract_instructions(recipe).lower()
    instrs = remove_symbols(instrs)
    instr_doc = IngrProc.nlp(instrs)
    noun_phrases = [chunk.text for chunk in instr_doc.noun_chunks]
    filtered_noun_phrases = set([' '.join(word for word in phrase.split(' ') if word not in STOP_WORDS)
        for phrase in noun_phrases if any(word in ingr_word_set for word in phrase.split(' ') )])

    # print("ingredients:", ingrs)
    # print("instrs:", instrs)

    extra_ingrs = 0
    ext_ingr_words = ingr_words.union(PROBLEM_WORDS)
    for noun_phrase in filtered_noun_phrases:
        if all(word not in ext_ingr_words for word in noun_phrase.split(' ')):
            extra_ingrs += 1
            # print("Extra ingredient:", noun_phrase)
    
    return extra_ingrs

num_extra_ingrs(recipes[2])

0

In [None]:
np.mean([num_extra_ingrs(recipe) for recipe in recipes[:1000]])

1.085

### Extraneous ingredients GPT-2 introduces

In [None]:
N = 500
sum_pcts = 0
num_well_formed = 0
for i in tqdm(range(N)):
    input_path = os.path.join(GPT2_recipe_dir, f"{GPT2_recipe_path_str}{i}.txt")
    with open(input_path, 'r') as f:
        contents = f.read()
    contents = ('🍴 ' + contents).replace('<ING>', ' 🥑 ').replace('<INS>', ' 🥣 ')
    if '🥑' not in contents:
        pos_to_insert = contents.find('•')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥑' + contents[pos_to_insert:]
    if '🥣' not in contents:
        pos_to_insert = contents.find('‣')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥣' + contents[pos_to_insert:]  
    try:
        pct = num_extra_ingrs(contents)
        sum_pcts += pct
        num_well_formed += 1
    except Exception as e:
        continue

print(f"\nNumber of well-formed recipes: {num_well_formed}")
print(f"\nExtraIngrs: {sum_pcts / num_well_formed}")


  0%|          | 0/500 [00:00<?, ?it/s][A
  1%|          | 3/500 [00:00<00:21, 22.88it/s][A
  1%|          | 6/500 [00:00<00:21, 22.61it/s][A
  2%|▏         | 8/500 [00:00<00:22, 21.45it/s][A
  2%|▏         | 10/500 [00:00<00:23, 20.63it/s][A
  3%|▎         | 13/500 [00:00<00:23, 21.16it/s][A
  3%|▎         | 15/500 [00:00<00:24, 20.03it/s][A
  3%|▎         | 17/500 [00:00<00:25, 19.18it/s][A
  4%|▍         | 19/500 [00:00<00:26, 17.97it/s][A
  4%|▍         | 21/500 [00:01<00:26, 18.26it/s][A
  5%|▍         | 23/500 [00:01<00:27, 17.41it/s][A
  5%|▌         | 25/500 [00:01<00:27, 17.34it/s][A
  5%|▌         | 27/500 [00:01<00:27, 17.49it/s][A
  6%|▌         | 30/500 [00:01<00:25, 18.47it/s][A
  6%|▋         | 32/500 [00:01<00:24, 18.90it/s][A
  7%|▋         | 36/500 [00:01<00:22, 20.91it/s][A
  8%|▊         | 39/500 [00:01<00:20, 22.25it/s][A
  8%|▊         | 42/500 [00:02<00:19, 23.43it/s][A
  9%|▉         | 45/500 [00:02<00:20, 21.71it/s][A
 10%|▉         | 48/500


Number of well-formed recipes: 482

ExtraIngrs: 5.213692946058091





### Extraneous ingredients GRU-Char introduces

In [None]:
N = 500
sum_pcts = 0
num_well_formed = 0
for i in tqdm(range(N)):
    input_path = os.path.join(GRU_char_dir, f"{GRU_char_path_str}{i}.txt")
    with open(input_path, 'r') as f:
        contents = f.read()
    contents = ('🍴 ' + contents)
    if '🥑' not in contents:
        pos_to_insert = contents.find('•')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥑' + contents[pos_to_insert:]
    if '🥣' not in contents:
        pos_to_insert = contents.find('‣')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥣' + contents[pos_to_insert:]    
    try:
        pct = num_extra_ingrs(contents)
        sum_pcts += pct
        num_well_formed += 1
    except Exception as e:
        continue

print(f"\nNumber of well-formed recipes: {num_well_formed}")
try:
    print(f"\nExtraIngrs: {sum_pcts / num_well_formed}")
except ZeroDivisionError:
    pass


  0%|          | 0/500 [00:00<?, ?it/s][A
  0%|          | 2/500 [00:00<00:27, 18.03it/s][A
  1%|          | 6/500 [00:00<00:23, 21.12it/s][A
  2%|▏         | 8/500 [00:00<00:26, 18.87it/s][A
  2%|▏         | 11/500 [00:00<00:23, 20.51it/s][A
  3%|▎         | 14/500 [00:00<00:21, 22.45it/s][A
  3%|▎         | 17/500 [00:00<00:22, 21.18it/s][A
  4%|▍         | 20/500 [00:00<00:22, 21.28it/s][A
  5%|▍         | 23/500 [00:01<00:23, 20.60it/s][A
  6%|▌         | 28/500 [00:01<00:19, 24.50it/s][A
  6%|▌         | 31/500 [00:01<00:18, 25.17it/s][A
  7%|▋         | 34/500 [00:01<00:20, 22.58it/s][A
  7%|▋         | 37/500 [00:01<00:19, 23.18it/s][A
  8%|▊         | 40/500 [00:01<00:18, 24.25it/s][A
  9%|▉         | 45/500 [00:01<00:16, 28.31it/s][A
 10%|▉         | 49/500 [00:01<00:15, 29.38it/s][A
 11%|█         | 53/500 [00:01<00:14, 31.34it/s][A
 12%|█▏        | 58/500 [00:02<00:12, 35.25it/s][A
 12%|█▏        | 62/500 [00:02<00:13, 31.58it/s][A
 13%|█▎        | 66/500


Number of well-formed recipes: 306

ExtraIngrs: 11.549019607843137





### Extra ingredients GRU-Word Uses

In [None]:
### Percentage of ingredients GRU-Word uses correctly

N = 500
sum_pcts = 0
num_well_formed = 0
for i in tqdm(range(N)):
    input_path = os.path.join(GRU_word_dir, f"{GRU_word_path_str}{i}.txt")
    with open(input_path, 'r') as f:
        contents = f.read()
    if '🥑' not in contents:
        pos_to_insert = contents.find('•')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥑' + contents[pos_to_insert:]
    if '🥣' not in contents:
        pos_to_insert = contents.find('‣')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥣' + contents[pos_to_insert:]    
    try:
        pct = num_extra_ingrs(contents)
        sum_pcts += pct
        num_well_formed += 1
    except Exception as e:
        continue

print(f"\nNumber of well-formed recipes: {num_well_formed}")
try:
    print(f"\nExtraIngrs: {sum_pcts / num_well_formed}")
except ZeroDivisionError:
    pass


  0%|          | 0/500 [00:00<?, ?it/s][A
  1%|          | 5/500 [00:00<00:14, 34.88it/s][A
  1%|▏         | 7/500 [00:00<00:18, 26.78it/s][A
  2%|▏         | 9/500 [00:00<00:20, 23.84it/s][A
  2%|▏         | 11/500 [00:00<00:25, 19.44it/s][A
  3%|▎         | 13/500 [00:00<00:24, 19.55it/s][A
  3%|▎         | 15/500 [00:00<00:24, 19.67it/s][A
  4%|▎         | 18/500 [00:00<00:23, 20.86it/s][A
  4%|▍         | 21/500 [00:00<00:23, 20.63it/s][A
  5%|▍         | 23/500 [00:01<00:23, 20.10it/s][A
  5%|▌         | 25/500 [00:01<00:24, 19.50it/s][A
  5%|▌         | 27/500 [00:01<00:25, 18.55it/s][A
  6%|▌         | 29/500 [00:01<00:27, 17.30it/s][A
  6%|▋         | 32/500 [00:01<00:24, 19.35it/s][A
  7%|▋         | 35/500 [00:01<00:24, 19.00it/s][A
  7%|▋         | 37/500 [00:01<00:24, 19.00it/s][A
  8%|▊         | 40/500 [00:01<00:23, 19.62it/s][A
  9%|▊         | 43/500 [00:02<00:21, 21.19it/s][A
  9%|▉         | 46/500 [00:02<00:20, 21.63it/s][A
 10%|▉         | 49/500


Number of well-formed recipes: 468

ExtraIngrs: 16.337606837606838





### Extra ingredients Checklist uses

In [None]:
### Percentage of ingredients Checklist uses correctly

N = 50
sum_pcts = 0
num_well_formed = 0
for i in tqdm(range(N)):
    input_path = os.path.join(checklist_dir, f"{checklist_path_str}{i}.txt")
    with open(input_path, 'r') as f:
        contents = f.read()
    if '🥑' not in contents:
        pos_to_insert = contents.find('•')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥑' + contents[pos_to_insert:]
    if '🥣' not in contents:
        pos_to_insert = contents.find('‣')
        if pos_to_insert == -1:
            continue
        contents = contents[:pos_to_insert] + '🥣' + contents[pos_to_insert:]    
    try:
        pct = num_extra_ingrs(contents)
        sum_pcts += pct
        num_well_formed += 1
    except Exception as e:
        continue

print(f"\nNumber of well-formed recipes: {num_well_formed}")
try:
    print(f"\nExtraIngrs: {sum_pcts / num_well_formed}")
except ZeroDivisionError:
    pass

100%|██████████| 50/50 [00:02<00:00, 22.02it/s]


Number of well-formed recipes: 47

ExtraIngrs: 8.595744680851064





## Compute BLEU-4 scores for each dataset's recipes.



In [None]:
def recipe_to_ref_list(recipe):
    title = recipe['title']
    ingredients = recipe['ingredients']
    instructions = recipe['instructions']
    return separate_punct(f"{title}\n\n{ingredients}\n{instructions}").lower().split()

In [None]:
def extract_title_general(recipe_str):
    pos_avocado = recipe_str.find('🥑')
    pos_ing = recipe_str.find('<ING>')
    pos_dot = recipe_str.find('•')
    title_end = min((pos for pos in [pos_avocado, pos_ing, pos_dot] if pos != -1), default=0)
    return recipe_str[:title_end].strip('🍴').strip()

In [None]:
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu
from nltk.translate.bleu_score import SmoothingFunction

def new_calculate_bleu(candidate_dataset_path):
    reference_recipes = pd.read_pickle(os.path.join(CACHE_DIR, 'recipes.pkl'))
    candidates = load_file_recipes(candidate_dataset_path)
    titles_file = os.path.join(candidate_dataset_path, "titles.txt")

    if not os.path.exists(titles_file):
        return -1
    
    with open(titles_file) as f:
        titles = f.read().split('\n')

    if not any(titles):
        titles = [extract_title_general(recipe) for recipe in candidates]
    
    titles = [title.strip() for title in titles]

    # print("titles", titles)

    smoothing_function = SmoothingFunction().method4

    cumulative_score = 0
    count = 0

    for i in range(len(titles)):
        references = reference_recipes.loc[reference_recipes['title'] == titles[i]]
        if len(references) == 0:
            continue
            
        references = references.apply(recipe_to_ref_list, axis=1)
        cumulative_score += corpus_bleu([references], [separate_punct(candidates[i]).lower().split()], smoothing_function=smoothing_function) # BLEU-4
        # cumulative_score += sentence_bleu(references.iloc[0], candidates[i].split(), smoothing_function=smoothing_function) # BLEU-4
        count += 1
    
    print(count)
    return cumulative_score / count

In [None]:
new_calculate_bleu(GPT2_recipe_dir)

Reading recipe files
397


0.07984287807378489

In [None]:
new_calculate_bleu(GRU_char_dir)

Reading recipe files
500


0.06678002489497366

In [None]:
new_calculate_bleu(GRU_word_dir)

Reading recipe files
500


0.06665734213295724

In [None]:
new_calculate_bleu(checklist_dir)

Reading recipe files
45


0.06275659447820815