In [1]:
import pandas as pd

DATA_DIR = 'dataset'

# import rules csv
rules = pd.read_csv(f'{DATA_DIR}/rules_recipe_scale.csv')

In [None]:
# Create a visualization of cumulative lift distribution, i.e on the X axis we have the lift value and Y axis we have the cumulative percentage of rules that have a lift value less than or equal to the lift value on the X axis.,

import matplotlib.pyplot as plt
import numpy as np
lift_sweep = np.linspace(0, 50, 100)
row_count_per_lift = [
    len(rules[rules['lift'] <= lift]) for lift in lift_sweep
]
# Divide row_count_per_lift with total row count to get cumulative percentage
cumulative_percentage = [
    row_count / rules.shape[0] for row_count in row_count_per_lift
]

plt.plot(lift_sweep, cumulative_percentage)

In [None]:
lift_sweep = np.linspace(0, 10, 50)
row_count_per_lift = [
    len(rules[rules['lift'] <= lift]) for lift in lift_sweep
]
# Divide row_count_per_lift with total row count to get cumulative percentage
cumulative_percentage = [
    row_count / rules.shape[0] for row_count in row_count_per_lift
]

plt.plot(lift_sweep, cumulative_percentage)

In [2]:
# sample random 100 recipes from the dataset under dataset/full_dataset.csv
seed = 1010
sample_recipes = pd.read_csv(f'{DATA_DIR}/full_dataset.csv').sample(100, random_state=seed)
sample_recipes['directions'] = sample_recipes['directions'].apply(lambda x: eval(x))

In [3]:
# drop recipes that have directions with less than 125 characters in total
sample_recipes['directions_length'] = sample_recipes['directions'].apply(lambda x: len(' '.join(x)))
sample_recipes = sample_recipes[sample_recipes['directions_length'] > 125]

In [4]:
sample_recipes.shape

(89, 8)

In [5]:
sample_recipes.iloc[0].directions

['Combine sugar and orange rind.',
 'Separate biscuits.',
 'Dip each in butter and coat with sugar mixture.',
 'Stand biscuits on side, overlapping edges in a 9-inch tube pan.',
 'Bake at 350° for 30 minutes.']

In [6]:
from gensim.parsing.preprocessing import remove_stopwords, preprocess_string

In [7]:
sample_recipes['preprocessed'] = sample_recipes['directions'].apply(lambda x: preprocess_string(' '.join(x)))

In [8]:
sample_recipes.iloc[0].preprocessed[:5]

['combin', 'sugar', 'orang', 'rind', 'separ']

In [9]:
# From the antecedents column, convert from frozenset to list of strings
rules['antecedents'] = rules['antecedents'].apply(lambda x: list(eval(x)))

In [10]:
# Now apply the rules to the sample recipes and see how many rules are applicable to each recipe
from tqdm import tqdm
from collections import defaultdict

# Sort by lift and grab the first 400 rules
extracted_rules = rules.sort_values('lift', ascending=False)
import itertools
extracted_rule_list = extracted_rules['antecedents'].tolist()
extracted_rule_list.sort()
extracted_rule_list = list(k for k,_ in itertools.groupby(extracted_rule_list))
# sort the extracted rules by length of antecedents, largest to smallest
extracted_rule_list.sort(key=lambda x: len(x), reverse=True)

In [8]:

import re
# Create a RegEx pattern for 'antecedents' column, case insensitive
# Antecedents column is currently a list of strings
# For example for ['salt', 'pepper'], the pattern should capture a recipe that has both salt and pepper at any position in the sentence
# Even if salt comes before pepper or vice versa, the pattern should capture it
# The pattern should also capture recipes that have salt and pepper in the same sentence but not necessarily next to each other
# So the pattern should be : salt.*pepper|pepper.*salt
# We do this for all the 400 rules and combine them with a logical OR
# So what we can do is if len(rule) > 2, then we create all the possible permutation of the rule and join them with a logical OR

# Initialize the list to store the regex patterns
to_be_joined = []

# Iterate through each rule in the extracted_rule_list
for rule in extracted_rule_list:
    # Check the length of the rule
    if len(rule) == 1:
        # If there's only one ingredient in the rule, escape it and append it
        to_be_joined.append(re.escape(rule[0]))
    else:
        # If there are multiple ingredients, create permutations and join them with '.*'
        for permutation in itertools.permutations(rule):
            pattern = '.*'.join([re.escape(ingredient) for ingredient in permutation])
            to_be_joined.append(pattern)

# Now, to_be_joined contains the list of regex patterns
# remove empty strings from to_be_joined
to_be_joined = [x for x in to_be_joined if x]
# print(to_be_joined)
# Join with subpattern names so that we can refer to them later
# antecedents_pattern = '|'.join(
#     [
#         f'(?P<antecedent_{idx}>{pattern})' for idx, pattern in enumerate(to_be_joined)
#     ]
# )
# replace multiple iterations of | with a single |
# antecedents_pattern = re.sub(r'\|\|+', '|', antecedents_pattern)
# print(antecedents_pattern)
# compile the pattern
# antecedents_pattern = re.compile(antecedents_pattern)

In [12]:
# my pattern finds the shortest match, so if we have salt and pepper in the same sentence, it will only capture salt
# but we want to capture the longest match, so we need to sort the rules by length of antecedents, largest to smallest

In [13]:
# Test the pattern on a test string
temp = preprocess_string("do not forget to rise the flour and add salt and pepper to taste")
# apply the pattern to the test string
print(' '.join(temp))
# match = antecedents_pattern.search(' '.join(temp))
# if match:
#     for group_name, matched_text in match.groupdict().items():
#         if matched_text is not None:
#             print(f"Matched {group_name}: {matched_text}")

forget rise flour add salt pepper tast


In [15]:
# Apply all the patterns in to be joined list to temp
for pattern in to_be_joined:
    match = re.search(pattern, ' '.join(temp))
    if match:
        print(match.group(), pattern)

flour add salt pepper flour.*add.*salt.*pepper
add salt pepper tast add.*salt.*pepper.*tast
flour add salt flour.*add.*salt
flour add salt pepper flour.*add.*pepper
flour add salt pepper flour.*salt.*pepper
add salt pepper add.*salt.*pepper
add salt pepper tast add.*salt.*tast
add salt pepper tast add.*pepper.*tast
salt pepper tast salt.*pepper.*tast
add salt ad.*salt
add salt add.*salt
flour add flour.*add
flour add salt pepper flour.*pepper
flour add salt flour.*salt
add salt pepper ad.*pepper
add salt pepper add.*pepper
salt pepper salt.*pepper
rise flour add rise.*add
rise flour rise.*flour
rise flour add salt rise.*salt
add salt pepper tast add.*tast
pepper tast pepper.*tast
salt pepper tast salt.*tast
ad ad
flour flour
pepper pepper
rise rise
salt salt
tast tast


In [16]:
print(len(to_be_joined))

6580994


In [22]:
# Check in each recipe if there is a match
# Recipe matches will be a set of sets, where each set will contain words
recipe_matches = []
# we'll use the directions column for this
from multiprocessing import Pool, cpu_count
from functools import partial
import time
from helper import find_patterns

# find_patterns function gets a recipe and the to_be_joined array and returns a set which will be appended to recipe_matches
# Use multiprocessing to speed up the process

pool = Pool(cpu_count())
start_time = time.time()
fn = partial(find_patterns, to_be_joined=to_be_joined)
recipe_matches = pool.map(fn, sample_recipes['preprocessed'].tolist())

In [26]:
# Let's write our results to a txt file

with open('recipe_matches.txt', 'w') as f:
    i = 0
    for row_count, recipe in tqdm(sample_recipes.iterrows(), total=sample_recipes.shape[0]):
        to_write = f"""

        Recipe ID: {recipe.name}
        Recipe Name: {recipe['title']}
        Recipe Directions: {' '.join(recipe['directions'])}
        Recipe Matches: {recipe_matches[i]}
        """
        f.writelines(to_write + '\n')
        i += 1

  0%|          | 0/89 [00:00<?, ?it/s]

100%|██████████| 89/89 [00:01<00:00, 61.75it/s]


In [11]:
import helper
# get the first row
recipe = sample_recipes.iloc[0]
print(recipe)
fulfilled_rules, suggestions = helper.extract_rules(recipe['preprocessed'], extracted_rules)
print(fulfilled_rules)

Unnamed: 0                                                      350397
title                                            Orange Breakfast Ring
ingredients          ["1 c. sugar", "3 Tbsp. orange rind", "2 (12 o...
directions           [Combine sugar and orange rind., Separate bisc...
link                   www.cookbooks.com/Recipe-Details.aspx?id=205107
source                                                        Gathered
NER                  ["sugar", "orange rind", "buttermilk biscuits"...
directions_length                                                  190
preprocessed         [combin, sugar, orang, rind, separ, biscuit, d...
Name: 350397, dtype: object
{frozenset({'stand', 'bake', 'inch'}), frozenset({'stand', 'bake'}), frozenset({'stand', 'bake', 'mixtur'})}


In [12]:
recipe['directions']

['Combine sugar and orange rind.',
 'Separate biscuits.',
 'Dip each in butter and coat with sugar mixture.',
 'Stand biscuits on side, overlapping edges in a 9-inch tube pan.',
 'Bake at 350° for 30 minutes.']

In [15]:
# Now we'll ask ChatGPT to improve the directions with the suggestions we have extracted above
import openai
openai.api_key = "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"

# Create the text to send to GPT
prompt = helper.create_prompt(
    recipe['title'],
    recipe['directions'],
    fulfilled_rules,
    suggestions
)


In [19]:
[print(x.strip()) for x in prompt.split('.')]

The below recipe is for Orange Breakfast Ring
The original directions are as follows:
    (Combine sugar and orange rind
Separate biscuits
Dip each in butter and coat with sugar mixture
Stand biscuits on side, overlapping edges in a 9-inch tube pan
Bake at 350° for 30 minutes
)
    Some of the rules that are fulfilled by this recipe are:
    {frozenset({'stand', 'bake', 'inch'}), frozenset({'stand', 'bake'}), frozenset({'stand', 'bake', 'mixtur'})}
    Rewrite the recipe while keeping it as close to the original as possible
The only changes you make should be to fulfill the following rules:
    ["frozenset({'bowl', 'let', 'preheat'})", "frozenset({'oven', 'let'})", "frozenset({'oven', 'let'})"]
    Make sure the fulfill all of the new rules
But, you are not allowed to add any new ingredients, unless specified in the new rules
Explain all the changes you made and why you made them



[None, None, None, None, None, None, None, None, None, None, None]

In [18]:
# submit the prompt to GPT
response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages = [{
        "role": "user", "content": prompt
    }],
)


In [23]:
# Split to sentences
[print(x.strip()) for x in response.choices[0].message.content.split('.')]
print()

To make the necessary changes to fulfill the new rules, I will rewrite the recipe while keeping it as close to the original as possible
The changes I will make are as follows:

1
Preheat the oven to 350°
- This change fulfills the rule "frozenset({'oven', 'let'})"
2
In a bowl, combine sugar and orange rind
- This change fulfills the rule "frozenset({'bowl', 'let', 'preheat'})" by adding the step to use a bowl
3
Separate the biscuits
- This step remains the same and does not need any changes
4
Dip each biscuit in melted butter and coat with the sugar mixture
- This step remains the same and does not need any changes
5
Stand the coated biscuits on their sides, overlapping the edges, in a 9-inch tube pan
- This step remains the same and does not need any changes
6
Bake in the preheated oven at 350° for 30 minutes
- This step remains the same and does not need any changes




In [21]:
recipe['directions']

['Combine sugar and orange rind.',
 'Separate biscuits.',
 'Dip each in butter and coat with sugar mixture.',
 'Stand biscuits on side, overlapping edges in a 9-inch tube pan.',
 'Bake at 350° for 30 minutes.']

### Example #2

In [24]:
# Get the second recipe and apply the same process
recipe = sample_recipes.iloc[1]
print(recipe)
fulfilled_rules, suggestions = helper.extract_rules(recipe['preprocessed'], extracted_rules)
print(fulfilled_rules)

Unnamed: 0                                                     1130551
title                              Cranberry Bliss Bars (Easy Version)
ingredients          ["1/2 cup butter, melted, then cooled a bit", ...
directions           [Preheat oven to 350 degrees. Prepare a 9x13 p...
link                 www.food.com/recipe/cranberry-bliss-bars-easy-...
source                                                        Gathered
NER                  ["butter", "brown sugar", "eggs", "vanilla", "...
directions_length                                                  850
preprocessed         [preheat, oven, degre, prepar, pan, line, parc...
Name: 1130551, dtype: object
{frozenset({'sugar', 'mixer', 'preheat'}), frozenset({'mixer', 'preheat'}), frozenset({'sugar', 'mixer', 'oven'})}


In [46]:
import importlib
importlib.reload(helper)
propmt = helper.create_prompt(recipe['title'], recipe['directions'], fulfilled_rules, suggestions)
response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages = [{
        "role": "user", "content": propmt
    }],
)

In [47]:
[print(x.strip()) for x in propmt.split('.')]
print()

The below recipe is for Cranberry Bliss Bars (Easy Version)
The original directions are as follows:
    (Preheat oven to 350 degrees
Prepare a 9x13 pan by lining it with parchment paper for easy removal (or spray with non-stick spray)
Melt butter; cool slightly
In a mixer bowl, blend butter and brown sugar until smooth
Add all other ingredients except chips and Craisins
Once the mixture is just about blended, add 1/2 cup chips and 1/2 cup Craisins
Spread the thick batter into a 9x13 pan
Bake 350 degrees for 23-25 minutes or until a toothpick tests clean
Do not overbake
Allow to cool thoroughly
When cool, frost with ready-made Cream Cheese frosting
Chop remaining 1/4 cup Craisins into small bits and scatter the bits over the top of the frosting
Melt remaining 1/4 cup white chocolate chips in the microwave (hint: add 1/2 t
canola oil to create a thin drizzle); use a fork to drizzle the melted chocolate on top
Cut into 18 squares
)
    Some of the rules that are fulfilled by this recipe a

In [48]:
# Split to sentences
[print(x.strip()) for x in response.choices[0].message.content.split('.')]
print()

Recipe for Cranberry Bliss Bars (Easy Version)

Ingredients:
- 1 cup unsalted butter, melted and cooled slightly
- 1 1/2 cups packed brown sugar
- 2 large eggs
- 1 teaspoon vanilla extract
- 2 cups all-purpose flour
- 1/2 teaspoon baking powder
- 1/4 teaspoon salt
- 1/2 cup white chocolate chips
- 3/4 cup dried cranberries (Craisins)
- 1/4 cup white chocolate chips, for drizzling
- 1/2 teaspoon canola oil

Frosting:
- 1 container (16 ounces) ready-made cream cheese frosting

Instructions:
1
Preheat the oven to 350 degrees Fahrenheit or use an electric oven
This fulfills the rule "frozenset({'preheat', 'electr'})"
2
Line a 9x13 pan with parchment paper for easy removal or spray with non-stick spray
This step remains the same as the original recipe
3
In a mixer bowl, blend the melted butter and packed brown sugar until smooth
This step remains the same as the original recipe
4
Add the eggs and vanilla extract to the butter and brown sugar mixture
Mix well until combined
This step remains

In [49]:
recipe['directions']

['Preheat oven to 350 degrees. Prepare a 9x13 pan by lining it with parchment paper for easy removal (or spray with non-stick spray).',
 'Melt butter; cool slightly. In a mixer bowl, blend butter and brown sugar until smooth. Add all other ingredients except chips and Craisins. Once the mixture is just about blended, add 1/2 cup chips and 1/2 cup Craisins.',
 'Spread the thick batter into a 9x13 pan. Bake 350 degrees for 23-25 minutes or until a toothpick tests clean. Do not overbake.',
 'Allow to cool thoroughly.',
 'When cool, frost with ready-made Cream Cheese frosting. Chop remaining 1/4 cup Craisins into small bits and scatter the bits over the top of the frosting.',
 'Melt remaining 1/4 cup white chocolate chips in the microwave (hint: add 1/2 t. canola oil to create a thin drizzle); use a fork to drizzle the melted chocolate on top. Cut into 18 squares.']