In [1]:
import stanza as st
import numpy as np
import re as re
from typing import Optional
import transformers as tra

st.download('en')
depgram = st.Pipeline('en')#, processors='tokenize,mwt,pos,lemma,depparse,ner')
pipe2 = tra.pipeline(model="facebook/bart-large-mnli")

  from .autonotebook import tqdm as notebook_tqdm
Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json: 193kB [00:00, 32.2MB/s]                    
2023-03-14 10:48:57 INFO: Downloading default packages for language: en (English) ...
2023-03-14 10:48:59 INFO: File exists: C:\Users\Typic\stanza_resources\en\default.zip
2023-03-14 10:49:03 INFO: Finished downloading models and saved to C:\Users\Typic\stanza_resources.
2023-03-14 10:49:03 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES
Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json: 193kB [00:00, 38.6MB/s]                    
2023-03-14 10:49:05 INFO: Loading these models for language: en (English):
| Processor    | Package   |
----------------------------
| tokenize     | combined  |
| pos     

In [2]:
# if head_id is -1 or id is 0, this is the root node
class depNode():
    def __init__(self, id:int, head_id:int, text:str, typ: str, deps:dict):
        self.id = id
        self.head_id = head_id
        self.text = text
        self.typ = typ
        self.deps = deps
        if deps == None:
            self.deps = {}

    def addDependent(self, dep_id:int, dep_text:str, rel_type:str):
        if dep_id in self.deps.keys():
            return
        self.deps[dep_id] = (dep_id, dep_text, rel_type)

    def __str__(self):
        outputStr = ""
        outputStr += "ID: " + str(self.id)
        outputStr += "\n\tHead ID: " + str(self.head_id)
        outputStr += "\n\tText: " + self.text
        outputStr += "\n\tType: " + self.typ
        outputStr += "\n\tDependent Words: "
        for x in self.deps.keys():
            outputStr += "\n\t\tDep. ID: " + str(self.deps[x][0])
            outputStr += "\n\t\t\tDep. Text: " + self.deps[x][1]
            outputStr += "\n\t\t\tRelation Type: " + self.deps[x][2]
        return outputStr
            

# returns a easier to traverse dependency tree with word id in the sentence as the key, as well as a dictionary of a word to a list of ids of instances of it
def getDependency(input_dep:list):
    text_to_ids = {} # ex: For the sentence, "cakes are cakes": text_to_ids["cakes"] == [1,3] 
    dependency_dict = {}

    # add a special node for ROOT
    dependency_dict[0] = depNode(0, -1, "ROOT", "N/A", {})

    for entry in input_dep:

        id1 = entry[0].id
        txt1 = entry[0].text.lower()
        id2 = entry[2].id
        txt2 = entry[2].text.lower()
        rel_type = entry[1]

        # if either word id isn't in the dependency dictionary, add it
        if not id1 in dependency_dict.keys():
            dependency_dict[id1] = depNode(id1, entry[0].head, txt1, entry[0].xpos, {})
            if not txt1 in text_to_ids.keys():
                text_to_ids[txt1] = [id1]
            elif txt1 in text_to_ids.keys() and not id1 in text_to_ids[txt1]:
                text_to_ids[txt1] = text_to_ids[txt1] + [id1]

        if not id2 in dependency_dict.keys():
            dependency_dict[id2] = depNode(id2, entry[2].head, txt2, entry[2].xpos, {})
            if not txt2 in text_to_ids.keys():
                text_to_ids[txt2] = [id2]
            elif txt2 in text_to_ids.keys() and not id2 in text_to_ids[txt2]:
                text_to_ids[txt2] = text_to_ids[txt2] + [id2]

        # add a dependency into the head word
        dependency_dict[id1].addDependent(id2, txt2, rel_type)

    return (dependency_dict, text_to_ids)

# returns a list with each entry corresponding to a dependent word on the head word provided
# (word text, relation to head, word type, id in dependency dict)
# ex: ("it", "obj", "PRP", 5)
def getDepInfo(input_deps:dict, head:depNode):
    res = []
    for dd in head.deps:
        text = head.deps[dd][1]
        rel_type = head.deps[dd][2]
        word_type = input_deps[head.deps[dd][0]].typ
        res.append((text, rel_type, word_type, head.deps[dd][0]))
    return res

# returns the id, text, and type of the head word of the given dependency dict (in depNode form)
def getHeadWord(input_deps:dict):
    return input_deps[list(input_deps[0].deps.keys())[0]]

In [3]:
class Ingredient:
    def __init__(   self, 
                    og_text:str, # the original string
                    main_comp:str, # the main part of the ingredient component, IE chicken
                    quantity:str, # a number. if a non-numerical amount, this should be None (ex: some raisins). "a" -> 1. is a string rather than a float bc fractions are more readable for recipes
                    measurement:str, # the measurement the quantity is referring to (like a cup). if no measurement (like "2 apples"), is None. if vague, non-committal amount (ex: "some"), this does here.
                    sub_quantity:str, 
                    sub_measurement: str,
                    descriptors:list # other details of the ingredient listing, IE dependent nouns, adjectives, preparation verb parts (ex: "finely chopped")
                    ):
        self.og_text = og_text
        self.main_comp = main_comp
        self.quantity = quantity
        self.measurement = measurement
        self.sub_quantity = sub_quantity
        self.sub_measurement = sub_measurement
        self.descriptors = descriptors

    def __str__(self):
        outputStr = ""
        outputStr += "Ingredient: " + self.main_comp
        outputStr += "\n\tQuantity: " 
        if self.quantity == None:
            outputStr += "N/A"
        else:
            outputStr += self.quantity
        outputStr += "\n\tMeasurement: " 
        if self.measurement == None:
            if self.sub_quantity == None and self.sub_measurement == None:
                outputStr += "N/A"
            else:
                outputStr += self.sub_quantity + " " + self.sub_measurement + " "
        else:

            if self.sub_quantity != None:
                outputStr += self.sub_quantity + " " 
            if self.sub_measurement != None:
                outputStr += self.sub_measurement + " "

            outputStr += self.measurement
        
        outputStr += "\n\tDescriptors: "
        if len(self.descriptors) < 1:
            outputStr += "\n\t\tN/A"
        else:
            for dt in self.descriptors:
                outputStr += "\n\t\t" + dt
        outputStr += "\n\tOriginal text: " + self.og_text
        return outputStr

def combineItemsIntoPhrase(its:list):
    res = ""
    for x in its:
        if x in [".", ",", "'", ";", ":", "-", "/"] or (len(x) > 1 and x[0] in [".", ",", "'", ";", ":", "-", "/"]):
            res = res.rstrip()
            
        res += x
        res += " "  
    res = res.replace("&#39;", "'")
    return res.rstrip()

def floatFromFractionString(frac: str):
    try:
        return float(frac)
    except:
        numerator = frac[:frac.index("/")]
        denominator = frac[frac.index("/")+1:]

        return float(numerator) / float(denominator)

# tries to find a quantity and measurement if none were found by directly analyzing the head word
def tryFindQuantity(input_deps:dict, head:depNode, head_rel_type:str, meas: str):
    quantity = None
    measurement = None

    # if measurement, then set measurement
    if head_rel_type == "nmod:npmod":
        measurement = head.text

    if head_rel_type in ["nummod", 'det']:
        if head_rel_type == 'det' and head.text.lower() in ["a", "an"]:
            # print("???")
            return (str(1), measurement)
        elif head_rel_type == 'det' and meas != None:
            # print("???")
            return (head.text, measurement)
        elif head_rel_type == "nummod":
            # print("???")
            return (head.text, measurement)

    for dd in head.deps:
        rel_type = head.deps[dd][2]
        # print(head.deps[dd])
        (temp_quantity, temp_measurement) = tryFindQuantity(input_deps, input_deps[head.deps[dd][0]], rel_type, measurement)
        if temp_measurement != None:
            measurement = temp_measurement
        if temp_quantity != None:
            # print(temp_quantity)
            if quantity == None:
                quantity = temp_quantity
            else:
                quantity == str(floatFromFractionString(quantity) * floatFromFractionString(temp_quantity))
            if measurement == None:
                measurement = head.text


    return (quantity, measurement)

def getIngredientParameters(depgram, ingred:str):
    og_ingred = ingred
    sub_phrase = None
    sub_quantity = None
    sub_measurement = None
    after_comma = None
    if "(" in ingred and ")" in ingred:
        sub_measurement_result = re.search("\((.+)\)", ingred)
        if sub_measurement_result != None:
            sub_phrase = sub_measurement_result.group(1)
            # print(sub_phrase)
            ingred = ingred.replace(sub_phrase, "")
            nums = re.search("\s*([\d/\.]+)\s*", sub_phrase)
            if nums != None:
                sub_quantity = nums.group(1)
                sub_measurement = sub_phrase[nums.span()[1]:]

        # # ingred = ingred.replace("packages", "")
        # # ingred = ingred.replace("package", "")
        ingred = ingred.replace("(", "")
        ingred = ingred.replace(")", "")

    if "," in ingred:
        comma_result = re.search("\,(.+)", ingred)
        if comma_result != None:
            after_comma = comma_result.group(1)
            ingred = ingred.replace(after_comma, "")
            ingred = ingred.replace(",", "")

        # remove_list = ["package", "can"]
        
        # for rr in remove_list:
        #     temp_result = re.search("\s(" + rr + "s*)\s", ingred)
        #     if temp_result != None:
        #         print(temp_result.span())
        #         ingred = ingred[:temp_result.span()[0]] + " " + ingred[temp_result.span()[1]:]
        #         # print("CANNN")
        
        # nums = re.findall("\s*([\d/\.]+)\s*", ingred)
        # if nums != None:
        #     print(nums)
        #     nums.sort(key=lambda x: len(x), reverse=True)
        #     product = 1
        #     for nn in nums:
        #         ingred = ingred.replace(nn, "")
        #         product *= floatFromFractionString(nn)

        #     ingred = str(product) + ingred
        # print(ingred)

    doc = depgram(ingred)

    # consti = doc.sentences[0].constituency
    depend = getDependency(doc.sentences[0].dependencies)[0]
    head_dep = depend[list(depend[0].deps.keys())[0]]
    # print(head_dep)
    # print(consti)

    # for dd in depend:
    #     print(depend[dd])

    # from what I can tell, the head noun will reference 4 common types of dependent relations:
    #   1: "amod" | "parataxis": should go into the descriptors list [note that the parataxis is something weird where if there is a comma or something]
    #   2: "compound": should be part of the main_comp (make sure to figure out how to enjoin with the head word)
    #   3: "nmod:npmod": should be the measurement field. if it exists, we need to go there and get the nummod or a de ("a", "some", etc.)
    #   4: "nummod": this should be the quantity field. if this is dependent on the root noun, then there is no measurement word, so set it to None
    
    descriptors = []
    compounds = []
    measurement = None
    quantity = None
    main_comp = head_dep.text
    
    deps = getDepInfo(depend, head_dep)
    # print(deps)
    
    for dd in deps:
        if dd[1] == 'nmod:npmod':
            measurement = dd[0]
            temp_quant = getDepInfo(depend, depend[dd[3]])
            if temp_quant == None:
                continue
            for tq in temp_quant:
                if tq[0] == 'no':
                    continue
                if tq[1] == 'nummod':
                    quantity = tq[0]
                elif tq[1] == 'det' and (tq[0].lower() == 'a' or tq[0].lower() == 'an'):
                    quantity = str(1)
                elif quantity == None:
                    quantity = tq[0]
                    # note no break here bc this is a failsafe in case there is no appropriate quantity descriptor
        elif dd[1] == 'amod' or dd[1] == 'parataxis' or dd[1] == 'conj' or dd[1] == 'acl':
            # also add certain dependents on the amod if they exist (ex: "all-purpose flour" has "all" as dependent on "purpose")
            temp_text = dd[0]
            temp_desc = getDepInfo(depend, depend[dd[3]])
            temp_adds = []
            if temp_desc != None:
                for td in temp_desc:
                    if td[1] in ['det', 'amod', 'obl']:
                        temp_adds.append(td[0])
                    elif td[1] in ['punct']:
                        continue
                    else:
                        # print("What is this?? " + str(td))
                        continue
            if len(temp_adds) > 0:
                temp_adds = [temp_text] + temp_adds
                temp_text = combineItemsIntoPhrase(temp_adds)
            descriptors.append(temp_text)
        elif dd[1] == 'compound' or dd[1] == 'appos' or dd[1] == 'aux':
            remove_list = ["package", "packages", "can", "cans", "jar", "jars"]
            if dd[0] in remove_list:
                measurement = dd[0]
            else:
                # print(">>>" + str(depend[dd[3]]))
                for abc in list(depend[dd[3]].deps.keys()):
                    # print(depend[dd[3]].deps[abc])
                    # print(depend[abc])
                    if depend[dd[3]].deps[abc][2] == 'amod':
                        compounds.append(depend[abc].text)
                # print(list(depend[dd[3]].deps.keys()))
                compounds.append(dd[0])
        elif dd[1] == 'nummod':
            quantity = dd[0]
        elif dd[1] == 'det':
            if dd[0].lower() == 'a' or dd[0].lower() == 'an':
                quantity = str(1)
            else:
                if dd[0].lower() == 'no':
                    compounds.append(dd[0])
                    continue
                quantity = dd[0]
        elif dd[1] in ['aux', 'punct']:
            continue
        else:
            print("??????")
            # descriptors.append(dd[0])
            print(dd)

    if quantity == None and measurement == None:
        # do something to try and find something because we should have at least 1

        # first, let's try going through each of the compounds recursively with depth-first
        # temp_quant = None
        # temp_meas = None
        for cc in deps:
            (temp_quant, temp_meas) = tryFindQuantity(depend, depend[cc[3]], None, None)
            if quantity == None and temp_quant != None:
                quantity = temp_quant
                
            if measurement == None and temp_meas != None:
                measurement = temp_meas
                
            if quantity != None and measurement != None:
                break


    if len(compounds) > 0:
        compounds.append(main_comp)
        main_comp = combineItemsIntoPhrase(compounds)

    if after_comma != None:
        descriptors.append(after_comma.lstrip().rstrip())

    return Ingredient(og_ingred, main_comp, quantity, measurement, sub_quantity, sub_measurement, descriptors)
    

In [4]:
tools = ['stove', 'fan', 'microwave', 'oven', 'scale', 'grinder', 'mixer', 'blender', 'fryer', 'toaster', 'sandwich press', 'panini press', 'sandwich presses', 'panini presses', 'cooktop', 'juicer', 'machine', 'maker', 'grinder', 'shaker', 'saucepan', 'pan', 'kadai',  'cooker', 'pot', 'lid', 'food processor', 'processor', 'skillet', 'foil', 'sheet', 'steamer', 'purifier', 'colinder', 'kettle', 'bowl', 'plate', 'board', 'tray', 'trey', 'rack', 'knife', 'knives', 'spoon', 'fork', 'tongs', 'cup', 'turner', 'spatula', 'peeler', 'skinner', 'whisk', 'colander', 'opener', 'dish', 'masher', 'spinner', 'grater', 'shears', 'scissors', 'garlic press', 'grill', 'press', 'stockpot', 'ladle', 'mitt', 'guard', 'cover', 'trivet', 'timer', 'phone', 'container', 'fridge', 'refridgerator', 'freezer', 'tin', 'paper', 'towel', 'sponge', 'bag', 'bin', 'mill', 'mortar', 'pestle', 'dispenser', 'chopper', 'slicer', 'mandoline', 'jar', 'tool', 'utensil', 'appliance']

class Step:
    def __init__(   self, 
                    id: int = 0, # the number of the step
                    root_action: str = "", # the root verb of the step
                    actions: dict = None, 
                    ingredients: dict = None, # the ingredients required as keys (num/amount [count/mass] and list of qualifiers [adjs] as value)
                        # could also use some ingredient ID instead of using a list of qualifiers on the ingredient [the class referred to by the ID]
                    tools: dict = None, # the tools and/or appliances and/or pots/pans/bowls required
                    details: dict = None, # adverbs and descriptors. key is a numerical ID. value is DetailType enum and substring
                    original_text: str = ""): 
        self.root_action = root_action
        self.actions = {}
        if actions != None:
            self.actions = actions
        self.ingredients = {}
        if ingredients != None:
            self.ingredients = ingredients
        self.tools = {}
        if tools != None:
            self.tools = tools
        self.details = {}
        if details != None:
            self.details = details
        self.id = id 
        self.original_text = original_text



    def __str__(self):
        outputStr = ""
        outputStr = outputStr +  "id: " + str(self.id)
        outputStr = outputStr + "\nOriginal text: " + str(self.original_text)
        outputStr = outputStr + "\n\tRoot action: " + str(self.root_action)
        outputStr = outputStr + "\n\tAll actions: " + str(self.actions)
        outputStr = outputStr + "\n\tIngredients: " + str(self.ingredients)
        outputStr = outputStr + "\n\tTools: " + str(self.tools)
        outputStr = outputStr + "\n\tDetails: " + str(self.details)
        return outputStr
    
def stepsListClean(depgram, steps:list):
    cleaned_steps = []
    for step in steps:
        step = step.replace(";", ".")
        semicolon_result = re.search("\:(.+)",step)
        if semicolon_result != None:
            step = semicolon_result.group(1)

        cleaned_steps.append(step.lstrip().rstrip())
    comb = combineItemsIntoPhrase(cleaned_steps)
    test_d = depgram(comb)
    new_steps = []
    for ss in test_d.sentences:
        new_steps.append(ss.text)
    return new_steps

        

In [5]:
all_steps1 = stepsListClean (    depgram,
                                ['Preheat the oven to 350 degrees F (175 degrees C). Place a pan of water on the lowest oven rack.', 'Make the crust: Crush ladyfingers into fine crumbs. Place crumbs into a bowl with melted butter and coffee-flavored liqueur; stir until evenly combined. Press into the bottom of a 9-inch springform pan.', 'Make the filling: Beat cream cheese, mascarpone cheese, and sugar in a large bowl with an electric mixer until very smooth, 2 to 3 minutes. Scrape down the sides of the bowl and mix in coffee-flavored liqueur. Add flour and eggs; mix on low speed just until smooth. If batter seems too thick, mix in heavy cream. Pour batter over the crust.', 'Bake on the center rack of the preheated oven until just set, 40 to 45 minutes.', 'Open the oven door, turn off the heat, and leave cheesecake to cool on the center rack for 20 minutes.', 'Remove from the oven, transfer to a wire rack to cool completely, about 30 more minutes.', 'Refrigerate for at least 3 hours, or overnight.', 'Just before serving, grate semisweet chocolate over the top.']
                            )
print(all_steps1)
steps_para1 = combineItemsIntoPhrase(all_steps1)
      

['Preheat the oven to 350 degrees F (175 degrees C).', 'Place a pan of water on the lowest oven rack.', 'Crush ladyfingers into fine crumbs.', 'Place crumbs into a bowl with melted butter and coffee-flavored liqueur.', 'stir until evenly combined.', 'Press into the bottom of a 9-inch springform pan.', 'Beat cream cheese, mascarpone cheese, and sugar in a large bowl with an electric mixer until very smooth, 2 to 3 minutes.', 'Scrape down the sides of the bowl and mix in coffee-flavored liqueur.', 'Add flour and eggs.', 'mix on low speed just until smooth.', 'If batter seems too thick, mix in heavy cream.', 'Pour batter over the crust.', 'Bake on the center rack of the preheated oven until just set, 40 to 45 minutes.', 'Open the oven door, turn off the heat, and leave cheesecake to cool on the center rack for 20 minutes.', 'Remove from the oven, transfer to a wire rack to cool completely, about 30 more minutes.', 'Refrigerate for at least 3 hours, or overnight.', 'Just before serving, gr

In [7]:
def removeXWords(depgram, text:str):

    final = text

    the_result = re.search("(^|\s)the\W(s\W)*",text.lower())
    if the_result != None:
        final = (text[:the_result.span()[0]] + " " +  text[the_result.span()[1]:]).lstrip().rstrip()

    a_result = re.search("(^|\s)an*\W",final.lower())
    if a_result != None:
        final = (final[:a_result.span()[0]] + " " +  final[a_result.span()[1]:]).lstrip().rstrip()

    of_result = re.search("(^|\s)of\W",final.lower())
    if of_result != None:
        final = (final[:of_result.span()[0]] + " " +  final[of_result.span()[1]:]).lstrip().rstrip()

    edge_result = re.search("(^|\s)edges*\W",final.lower())
    if edge_result != None:
        final = (final[:edge_result.span()[0]] + " " +  final[edge_result.span()[1]:]).lstrip().rstrip()

    top_result = re.search("(^|\s)tops*\W",final.lower())
    if top_result != None:
        final = (final[:top_result.span()[0]] + " " +  final[top_result.span()[1]:]).lstrip().rstrip()

    side_result = re.search("(^|\s)sides*\W",final.lower())
    if side_result != None:
        final = (final[:side_result.span()[0]] + " " +  final[side_result.span()[1]:]).lstrip().rstrip()
    
    bottom_result = re.search("(^|\s)bottoms*\W",final.lower())
    if bottom_result != None:
        final = (final[:bottom_result.span()[0]] + " " +  final[bottom_result.span()[1]:]).lstrip().rstrip()
    
    outer_result = re.search("(^|\s)outer\W",final.lower())
    if outer_result != None:
        final = (final[:outer_result.span()[0]] + " " +  final[outer_result.span()[1]:]).lstrip().rstrip()

    inner_result = re.search("(^|\s)inner\W",final.lower())
    if inner_result != None:
        final = (final[:inner_result.span()[0]] + " " +  final[inner_result.span()[1]:]).lstrip().rstrip()

    inside_result = re.search("(^|\s)in(?:side)*\W",final.lower())
    if inside_result != None:
        final = (final[:inside_result.span()[0]] + " " +  final[inside_result.span()[1]:]).lstrip().rstrip()

    outside_result = re.search("(^|\s)out(?:side)*\W",final.lower())
    if outside_result != None:
        final = (final[:outside_result.span()[0]] + " " +  final[outside_result.span()[1]:]).lstrip().rstrip()

    around_result = re.search("(^|\s)a*round\W",final.lower())
    if around_result != None:
        final = (final[:around_result.span()[0]] + " " +  final[around_result.span()[1]:]).lstrip().rstrip()

    with_result = re.search("(^|\s)with(?:in)*\W",final.lower())
    if with_result != None:
        final = (final[:with_result.span()[0]] + " " +  final[with_result.span()[1]:]).lstrip().rstrip()
    
    on_result = re.search("(^|\s)on(?:to)*\W",final.lower())
    if on_result != None:
        final = (final[:on_result.span()[0]] + " " +  final[on_result.span()[1]:]).lstrip().rstrip()

    to_result = re.search("(^|\s)to(wards*)*\W",final.lower())
    if to_result != None:
        final = (final[:to_result.span()[0]] + " " +  final[to_result.span()[1]:]).lstrip().rstrip()

    return final

# takes a "test" string to compare to a "standard" string, using multiple heuristics
def isDerivativeOfSecond(depgram, test:str, standard:str):
    test_words = re.findall("[\d\w\-\/\']+", test)
    standard_words = re.findall("[\d\w\-\/\']+", standard)
    # standard_words_plural = list(sw + "s" for sw in standard_words)
    # print(test_words)
    # first score: overall score of how many words from the test string are in the standard string
    matches_score = 0
    for tt in test_words:
        # if tt in standard_words:
        #     num_matches += 1
        # for sw in standard_words:
        search_result = re.search("(^|\W)" + tt + "(\W|$)", standard)
        if search_result != None:
            matches_score += 1 / len(standard_words)

    general_prop = matches_score / len(test_words)

    # add other scores if necessary
    
    return (standard, general_prop, test)
    
test_phrase = 'onto a cutting board'
print(removeXWords(depgram, test_phrase))
test_doc = depgram(test_phrase)
# consti = test_doc.sentences[0].constituency
# deps = getDependency(test_doc.sentences[0].dependencies)[0]
# for dd in deps.keys():
#     print(deps[dd])
isDerivativeOfSecond(depgram, test_phrase, "skillet")
tool_rankings = []
for wrd in tools:
    tool_rankings.append(isDerivativeOfSecond(depgram, removeXWords(depgram, test_phrase), wrd))

tool_rankings.sort(key=lambda x: x[1], reverse=True)

if len(tool_rankings) > 0:
    print(tool_rankings[0])

ingredient_rankings = []
for ingredient in ingredients:
    ingredient = ingredient.main_comp
    ingredient_rankings.append(isDerivativeOfSecond(depgram, removeXWords(depgram, 'Beat cream cheese, mascarpone cheese, and sugar in a large bowl with an electric mixer until very smooth, 2 to 3 minutes.'), ingredient))

ingredient_rankings.sort(key=lambda x: x[1], reverse=True)

if len(ingredient_rankings) > 0:
    print(ingredient_rankings[0])

cutting board
('board', 0.5, 'cutting board')
('cream cheese', 0.08333333333333333, 'Beat cream cheese, mascarpone cheese, and sugar large bowl an electric mixer until very smooth, 2 3 minutes.')


In [6]:
test_phrases = ['1 cup butter, softened', '1 cup white sugar', '1 cup packed brown sugar', '2 eggs', '2 teaspoons vanilla extract', '1 teaspoon baking soda', '2 teaspoons hot water', '0.5 teaspoon salt', '3 cups all-purpose flour', '2 cups semisweet chocolate chips', 'a cup chopped walnuts', 'an orange']
test_phrases = ['1 tablespoon olive oil', '1 small onion, diced', '4 cloves garlic, minced', '1.5 pounds ground beef', '1 teaspoon garlic powder', '1 (28 ounce) jar sausage flavored spaghetti sauce', '1 (8 ounce) can tomato sauce', '1 teaspoon dried oregano', '1 (8 ounce) package shredded mozzarella cheese', '1 (8 ounce) package shredded provolone cheese', '1 (15 ounce) container ricotta cheese', '0.25 cup milk', '2 large eggs', '0.5 teaspoon dried oregano', '9 no-boil lasagna noodles', '0.25 cup grated Parmesan cheese']
test_phrases = ['2 (7 ounce) packages shirataki noodles, drained', '1 tablespoon vegetable oil', '1 (12 ounce) package tofu, cut into chunks', '0.25 cup reduced-sodium soy sauce', '0.5 cup lemon juice', '0.25 cup white sugar', '2 tablespoons peanut butter', '1 tablespoon sriracha hot sauce', '2 eggs', '1 (4.5 ounce) can mushrooms', '0.5 cup chopped cashews, divided', '1 cup bean sprouts', '1 lime, cut into wedges']
test_phrases = ['3 cups Burgundy wine', '2 onions, thinly sliced', '2 carrots, chopped', '2 tablespoons brandy', '1 clove garlic, crushed', '10 whole black peppercorns', '1 teaspoon salt', '1 sprig fresh parsley', '1 bay leaf', '2 pounds cubed beef chuck roast', '4 tablespoons olive oil, divided', '0.25 pound bacon, cubed', '2 onions, chopped', '3 tablespoons all-purpose flour', '2 cloves garlic, crushed', '1 tablespoon tomato paste', '1 (10.5 ounce) can beef broth', 'salt and pepper to taste', '4 tablespoons butter', '1 pound fresh mushrooms, sliced']
test_phrases = ['0.25 cup sesame oil', '0.25 cup lemon juice', '0.25 cup soy sauce', '2 tablespoons brown sugar, or more to taste', '1 tablespoon sesame seeds', '1 teaspoon ground mustard', '1 teaspoon ground ginger', '0.25 teaspoon garlic powder', '4 (6 ounce) salmon steaks']
test_phrases = ['2 tablespoons olive oil', '1 carrot, diced', '0.5 green bell pepper, diced', '2 cups shrimp, peeled and deveined', '0.5 onion, diced', '0.5 (15.25 ounce) can whole kernel corn, drained', '2 cloves garlic, thinly sliced', '1 tablespoon olive oil', '2 eggs, beaten', '4 cups cooked rice, cooled - or more to taste', '2 tablespoons oyster sauce, or more to taste', '2 tablespoons soy sauce', '1 tablespoon butter', '0.5 teaspoon salt', '1 teaspoon butter, or as needed', '4 eggs, divided']
test_phrases = ['1 (12 ounce) package ladyfingers', '0.25 cup unsalted butter, melted', '2 tablespoons coffee-flavored liqueur', '3 (8 ounce) packages cream cheese, softened', '1 (8 ounce) container mascarpone cheese, softened', '1 cup white sugar', '2 tablespoons coffee-flavored liqueur', '0.25 cup all-purpose flour', '2 large eggs', '1 teaspoon heavy cream, or more as needed', '0.25 ounce semisweet chocolate'] 
# test_phrases = ['1 tablespoon vegetable oil', '1 cup long grain white rice', '1.5 cups chicken broth', '1 tomato, seeded and chopped', '0.5 onion, finely chopped', '0.5 green bell pepper, finely chopped', '1 fresh jalapeno pepper, chopped', '0.5 cup chopped fresh cilantro', '1 cube chicken bouillon', '1 clove garlic, halved', '0.5 teaspoon ground cumin', 'salt and pepper to taste']







# test_phrases = ['1 (28 ounce) jar sausage flavored spaghetti sauce', '1 (8 ounce) can tomato sauce', '1 (8 ounce) package shredded mozzarella cheese', '1 (8 ounce) package shredded provolone cheese', '1 (15 ounce) container ricotta cheese']
# test_phrases = ['1 (12 ounce) package tofu, cut into chunks']
# test_phrases = ['0.25 cup reduced-sodium soy sauce']
# test_doc = depgram(removeProblemWords(test_phrase))
# test_phrases = ['1 (8 ounce) container mascarpone cheese, softened']

# print(test_doc.sentences[0].constituency, 0)
# print(test_doc.sentences[0].constituency, 0)
# depe = getDependency(test_doc.sentences[0].dependencies)
# for dd in depe[0]:
#     print(depe[0][dd])

# # removeProblemWords(test_phrase)
# getHeadWord(depe[0])
# determineVaguenessFromDep(depe[0])

# print(type(["hiya"]))
ingredients = []

for tp in test_phrases:
    and_result = re.search("(.+)\sand\s(.+)", tp)
    if and_result != None:
        ingredients.append(getIngredientParameters(depgram, and_result.group(1).lstrip().rstrip()))
        tp = and_result.group(2).lstrip().rstrip()
    ingredients.append(getIngredientParameters(depgram, tp))

for ing in ingredients:
    print(ing)

Ingredient: ladyfingers
	Quantity: N/A
	Measurement: 12 ounce package
	Descriptors: 
		N/A
	Original text: 1 (12 ounce) package ladyfingers
Ingredient: butter
	Quantity: 0.25
	Measurement: cup
	Descriptors: 
		unsalted
		melted
	Original text: 0.25 cup unsalted butter, melted
Ingredient: liqueur
	Quantity: 2
	Measurement: tablespoons
	Descriptors: 
		flavored
	Original text: 2 tablespoons coffee-flavored liqueur
Ingredient: cream cheese
	Quantity: 3
	Measurement: 8 ounce packages
	Descriptors: 
		softened
	Original text: 3 (8 ounce) packages cream cheese, softened
Ingredient: mascarpone cheese
	Quantity: 1
	Measurement: 8 ounce container
	Descriptors: 
		softened
	Original text: 1 (8 ounce) container mascarpone cheese, softened
Ingredient: sugar
	Quantity: 1
	Measurement: cup
	Descriptors: 
		white
	Original text: 1 cup white sugar
Ingredient: liqueur
	Quantity: 2
	Measurement: tablespoons
	Descriptors: 
		flavored
	Original text: 2 tablespoons coffee-flavored liqueur
Ingredient: purpo

In [8]:
import steps_parser_ver2 as spv2

print(ingredients)
jjj = spv2.doParsing(pipe2, depgram, steps_para1, ingredients)

[<__main__.Ingredient object at 0x000001D925A43400>, <__main__.Ingredient object at 0x000001D925928A60>, <__main__.Ingredient object at 0x000001D925A428F0>, <__main__.Ingredient object at 0x000001D925A43280>, <__main__.Ingredient object at 0x000001D925A40D00>, <__main__.Ingredient object at 0x000001D925A40E50>, <__main__.Ingredient object at 0x000001D925939CC0>, <__main__.Ingredient object at 0x000001D9258E1810>, <__main__.Ingredient object at 0x000001D9258E17B0>, <__main__.Ingredient object at 0x000001D925A0B880>, <__main__.Ingredient object at 0x000001D925A0B430>]
1
[({
  "id": 0,
  "text": "ROOT"
}, 'root', {
  "id": 1,
  "text": "oven",
  "lemma": "oven",
  "upos": "NOUN",
  "xpos": "NN",
  "feats": "Number=Sing",
  "head": 0,
  "deprel": "root",
  "start_char": 0,
  "end_char": 4
})]
[({
  "id": 2,
  "text": "degrees",
  "lemma": "degree",
  "upos": "NOUN",
  "xpos": "NNS",
  "feats": "Number=Plur",
  "head": 0,
  "deprel": "root",
  "start_char": 4,
  "end_char": 11
}, 'nummod', 

In [9]:
action_priority_list = ['bake', 'boil', 'fry', 'cook', 'flambe', 'microwave', 'steam', 'broil', 'grill', 'roast', 'sear', 'saute', 'sauté','poach', 'simmer', 'braise', 'barbeque', 'barbecue',# cooking methods have priority
                        'freeze','cool','refrigerate' # then freezing/cooling methods
                        ]
all_tools = []
all_actions = []
for j in jjj:
    print(jjj[j])
    for aa in jjj[j].actions:
        all_actions.append(jjj[j].actions[aa][1].lower())
    for tt in jjj[j].tools:
        curr_tool = jjj[j].tools[tt][2]
        # if len(all_tools) > 0 and curr_tool in all_tools:#any(spv2.isDerivativeOfSecond(curr_tool, xx)[1] > 0 for xx in all_tools):
        #     continue
        if curr_tool != '':
            all_tools.append(curr_tool)
            # print(all_tools)

all_tools.sort(key=lambda x: len(x), reverse=True)

# print(all_tools)
selected_tools = []
for tt in all_tools:
    t_consti = depgram(tt).sentences[0].constituency
    # print(str(t_consti))
    if "(VP" in str(t_consti) or "(RB" in str(t_consti):
        continue
    elif (any(spv2.isDerivativeOfSecond(tt, xx)[1] > 0 for xx in selected_tools) and len(re.findall(tt, str(all_tools))) < 4) or tt in selected_tools:
        # print(re.findall(tt, str(all_tools)))
        continue
    else:
        selected_tools.append(tt)

# TOOL LIST
print(selected_tools)


main_action = ""
for ap in action_priority_list:
    if any(spv2.isDerivativeOfSecond(ap,aa)[1] > 0 for aa in all_actions):
        main_action = ap
        break

if main_action == "":
    main_action = all_actions[0]

# PRIMARY COOKING METHOD
print(main_action)

id: 1
Original text: Preheat the oven to 350 degrees F (175 degrees C).
	Root action: Preheat
	All actions: {0: ('Preheat', 'Preheat the oven to 350 degrees F 175 degrees C')}
	Ingredients: {}
	Tools: {'the oven': ('the oven', 'preheat', 'oven')}
	Details: {0: ('to', 'to 350 degrees F 175 degrees C', 'preheat')}
id: 2
Original text: Place a pan of water on the lowest oven rack.
	Root action: Place
	All actions: {0: ('Place', 'Place a pan of water on the lowest oven rack')}
	Ingredients: {}
	Tools: {'a pan of water': ('a pan of water', 'place', 'pan water'), 'the lowest oven rack': ('the lowest oven rack', 'place', 'oven rack')}
	Details: {0: ('of', 'of water', 'place'), 1: ('on', 'on the lowest oven rack', 'place')}
id: 3
Original text: Crush ladyfingers into fine crumbs.
	Root action: Crush
	All actions: {0: ('Crush', 'Crush ladyfingers into fine crumbs')}
	Ingredients: {'ladyfingers': ('ladyfingers', 'crush', 'ladyfingers')}
	Tools: {}
	Details: {0: ('into', 'into fine crumbs', 'crus

In [17]:
### MAKE HEALTHY
def makeHealthy(ingredients: list, main_action: str, action_priority_list: list):
    new_instructions = []

    min_transforms = 10

    # ingredient word as key to an array of options. this array is made of tuples for suggestions. the 1st (not 0th) element specifies whether it is a quantity mod ('multi'), a substitution ('sub'), or a modded ver ('mod')
    healthy_ingr_subs = {'chocolate':[('dark', 'mod'), ('hazelnut', 'sub')], 'sugar':[('coconut sugar', 'sub'), (0.75, 'multi')], 'salt':[(0.25, 'multi')], 'cheese':[('reduced-fat', 'mod')], 'milk':[('skim', 'mod'), ('almond milk', 'sub')]}

    # basically, just analyze the steps, ingredients, and main cooking action to see what can be made healthier through IF statements, and maybe webscraped substitutions

    # first, if the cooking method is unhealthy, change it
    if main_action == "fry":
        new_instructions.append("Instead of frying, try sauteing instead. This means to not use very much cooking oil in the pan, though a little bit to keep it from sticking is recommended. It should take around the same amount of time as frying.")

    # now, go through the ingredients
    num_subs = 0 # counter for the number of substitutions made. if not at least 3 substitions are made, do some later

    ingr_seen = []

    for ingr in ingredients:

        # replace meats with healthier meats
        isMeat = checkMembership(ingr.main_comp, 'meat')
        isFish = checkMembership(ingr.main_comp, 'fish')
        # 'turkey' in ingr.main_comp
        # 'chicken' in ingr.main_comp
        # print(ingr.main_comp + ": " + str(isMeat))
        # print(ingr.main_comp + ": " + str(isFish))
        if isMeat > 0.8 and isFish < 0.2 and not 'turkey' in ingr.main_comp.lower() and not 'chicken' in ingr.main_comp.lower():
            new_instructions.append("Instead of " + ingr.main_comp + ", it would be healthier to use chicken or turkey (we will default to chicken). You can use the same amount (" + getQuantityString(ingr, 1) + "), though it may take longer to cook than red meats since you don't want it \"rare.\"")
            num_subs += 1
            ingr.main_comp = "chicken"
            continue
        # print(getQuantityString(ingr, 0.5))
        
        # go through healthy ingr subs and do those substitutions
        for hk in healthy_ingr_subs.keys():
            if spv2.isDerivativeOfSecond(hk, ingr.main_comp)[1] > 0:
                # different things to say based on if there are mod, sub, or multi tagged suggestions
                # first, sort the diff tag suggestions into different lists
                ingr_seen.append(ingr.main_comp)
                tags = healthy_ingr_subs[hk]
                modss = []
                subss = []
                multis = []
                for tt in tags:
                    if tt[1] == "mod":
                        modss.append(tt[0])
                    elif tt[1] == "sub":
                        subss.append(tt[0])
                    elif tt[1] == "multi":
                        multis.append(tt[0])

                ingr_res = ""
                for mm in modss:
                    ingr_res += "You can use a " + mm + " version of " + ingr.main_comp + " instead to make this healthier. "
                for su in subss:
                    ingr_res += "You could substitute " + ingr.main_comp + " with " + su + ". "
                for mu in multis:
                    ingr_res += "If you don't want to replace " + ingr.main_comp + ", you could use " + str(mu) + " of the original amount: " + getQuantityString(ingr,1) + "(new amount: " + getQuantityString(ingr, mu) + "). "

                new_instructions.append(ingr_res)
                num_subs += 1
                break

    if num_subs < min_transforms:
        for ingr in ingredients:
            # skip past ingredients we already have a transformation for
            if ingr.main_comp in ingr_seen:
                continue
            # find a substitution for an untransformed ingredient
            new_sub = getSubList(ingr.main_comp, ["healthy"])[0]
            new_instructions.append("You could substitute " + ingr.main_comp + " with " + new_sub.lower() + ".")
            num_subs += 1
            if num_subs >= min_transforms:
                break 

    for abc in new_instructions:
        print(abc)
    return new_instructions

makeHealthy(ingredients, main_action, action_priority_list)
    


You can use a reduced-fat version of cream cheese instead to make this healthier. 
You can use a reduced-fat version of mascarpone cheese instead to make this healthier. 
You could substitute sugar with coconut sugar. If you don't want to replace sugar, you could use 0.75 of the original amount: 1 cup(new amount: 0.75 cup). 
You can use a dark version of semisweet chocolate instead to make this healthier. You could substitute semisweet chocolate with hazelnut. 
You could substitute ladyfingers with 1 – biscotti.
You could substitute butter with for 1 cup of butter, substitute ½ cup applesauce and a ½ cup oil, she recommends. (one cup of butter has 1,630 calories and 184 grams of fat. a ....
You could substitute liqueur with exercise.
You could substitute liqueur with exercise.
You could substitute purpose flour with coconut flour.
You could substitute eggs with applesauce.


['You can use a reduced-fat version of cream cheese instead to make this healthier. ',
 'You can use a reduced-fat version of mascarpone cheese instead to make this healthier. ',
 "You could substitute sugar with coconut sugar. If you don't want to replace sugar, you could use 0.75 of the original amount: 1 cup(new amount: 0.75 cup). ",
 'You can use a dark version of semisweet chocolate instead to make this healthier. You could substitute semisweet chocolate with hazelnut. ',
 'You could substitute ladyfingers with 1 – biscotti.',
 'You could substitute butter with for 1 cup of butter, substitute ½ cup applesauce and a ½ cup oil, she recommends. (one cup of butter has 1,630 calories and 184 grams of fat. a\xa0....',
 'You could substitute liqueur with exercise.',
 'You could substitute liqueur with exercise.',
 'You could substitute purpose flour with coconut flour.',
 'You could substitute eggs with applesauce.']

In [119]:
### MAKE UNHEALTHY

# replace oils with melted butter
# add more salt or just some salt in general lol
# extra sugar if there is sugar
# add whipped cream or heavy cream if sugar
# pan cooking methods -> fry
# white meat/fish -> red meat

def makeUnhealthy(ingredients: list, main_action: str, action_priority_list: list):
    new_instructions = []

    min_transforms = 4

    # ingredient word as key to an array of options. this array is made of tuples for suggestions. the 1st (not 0th) element specifies whether it is a quantity mod ('multi'), a substitution ('sub'), or a modded ver ('mod')
    unhealthy_ingr_subs = {'oil':[('corn', 'mod'), ('melted butter', 'sub')], 'sugar':[(1.25, 'multi'), ('saccharin', 'sub')], 'salt':[(1.25, 'multi')], 'egg':[('1.5', 'multi')], 'milk':[('heavy cream', 'sub')]}

    # basically, just analyze the steps, ingredients, and main cooking action to see what can be made healthier through IF statements, and maybe webscraped substitutions

    # first, if the cooking method is healthy and not baking, change it to frying
    if any(spv2.isDerivativeOfSecond(apl, main_action)[1] > 0 for apl in ['flambe', 'steam', 'broil', 'grill', 'roast', 'sear', 'saute', 'sauté','poach', 'simmer', 'braise', 'barbeque', 'barbecue']):
        new_instructions.append("Instead of trying to " + main_action +  ", try frying instead. This means a pan, a stove, and a lot of oil are needed, though butter could also be used.")

    # now, go through the ingredients
    num_subs = 0 # counter for the number of substitutions made. if not at least 3 substitions are made, do some later

    ingr_seen = []

    for ingr in ingredients:

        # replace meats with healthier meats
        isMeat = checkMembership(ingr.main_comp, 'meat')
        isFish = checkMembership(ingr.main_comp, 'fish')
        isWhiteMeat = checkMembership(ingr.main_comp, 'white meat')
        isRedMeat = checkMembership(ingr.main_comp, 'red meat')
        # 'turkey' in ingr.main_comp
        # 'chicken' in ingr.main_comp
        # print(ingr.main_comp + ": " + str(isMeat))
        # print(ingr.main_comp + ": " + str(isFish))
        if isFish > 0.8 or (isMeat > 0.8 and (isWhiteMeat > isRedMeat or "chicken" in ingr.main_comp.lower())):
            new_instructions.append("Instead of " + ingr.main_comp + ", it would be less healthy to use a red meat like beef, pork, lamb, etc. We'll default to beef. You can use the same amount (" + getQuantityString(ingr, 1) + "). Cooking time is up to you depending on how you want your meat done (well-done, medium, rare, etc.).")
            num_subs += 1
            ingr.main_comp = "beef"
            continue
        # print(getQuantityString(ingr, 0.5))
        
        # go through unhealthy ingr subs and do those substitutions
        for hk in unhealthy_ingr_subs.keys():
            if spv2.isDerivativeOfSecond(hk, ingr.main_comp)[1] > 0:
                # different things to say based on if there are mod, sub, or multi tagged suggestions
                # first, sort the diff tag suggestions into different lists
                ingr_seen.append(ingr.main_comp)
                tags = unhealthy_ingr_subs[hk]
                modss = []
                subss = []
                multis = []
                for tt in tags:
                    if tt[1] == "mod":
                        modss.append(tt[0])
                    elif tt[1] == "sub":
                        subss.append(tt[0])
                    elif tt[1] == "multi":
                        multis.append(tt[0])

                ingr_res = ""
                for mm in modss:
                    ingr_res += "You can use a " + mm + " version of " + ingr.main_comp + " instead to make this unhealthier. "
                for su in subss:
                    ingr_res += "You could substitute " + ingr.main_comp + " with " + su + ". "
                for mu in multis:
                    ingr_res += "Use more " + ingr.main_comp + ". You could use " + str(mu) + " of the original amount: " + getQuantityString(ingr, mu) + ". "

                new_instructions.append(ingr_res)
                num_subs += 1
                break

    if num_subs < min_transforms:
        if not 'salt' in ingr_seen:
            new_instructions.append("You could add 1/2 teaspoon of salt.")
            num_subs += 1
        for ingr in ingredients:
            # skip past ingredients we already have a transformation for
            if ingr.main_comp in ingr_seen:
                continue
            # find a substitution for an untransformed ingredient
            new_sub = getSubList(ingr.main_comp, ["unhealthy"])[0]
            new_instructions.append("You could substitute " + ingr.main_comp + " with " + new_sub.lower() + ".")
            num_subs += 1
            if num_subs >= min_transforms:
                break 

    for abc in new_instructions:
        print(abc)
    return new_instructions


# makeUnhealthy(ingredients, main_action, action_priority_list)

You could substitute sugar with saccharin. Use more sugar. You could use 1.25 of the original amount: 1.25 cup. 
You could add 1/2 teaspoon of salt.
You could substitute ladyfingers with unhealthy.
You could substitute butter with olive oil.


['You could substitute sugar with saccharin. Use more sugar. You could use 1.25 of the original amount: 1.25 cup. ',
 'You could add 1/2 teaspoon of salt.',
 'You could substitute ladyfingers with unhealthy.',
 'You could substitute butter with olive oil.']

In [123]:
### MAKE VEGETARIAN
# also try and do a bit of vegan-leaning stuff just to have more stuff changed

# replace meat/fish with tofu or eggplant (default to tofu)
# maybe have a message if the recipe doesn't seem to have meat in it
# replace eggs with some substitute
# replace milk with plant based milk
def makeVeg(ingredients: list, main_action: str, action_priority_list: list):
    new_instructions = []

    min_transforms = 3

    # ingredient word as key to an array of options. this array is made of tuples for suggestions. the 1st (not 0th) element specifies whether it is a quantity mod ('multi'), a substitution ('sub'), or a modded ver ('mod')
    veg_ingr_subs = {'egg':[('aquafaba', 'sub')], 'cheese':[('cashew cheese', 'sub')], 'milk':[('almond milk', 'sub')], 'honey':[('maple syrup', 'sub')]}

    # basically, just analyze the steps, ingredients, and main cooking action to see what can be made healthier through IF statements, and maybe webscraped substitutions


    # now, go through the ingredients
    num_subs = 0 # counter for the number of substitutions made. if not at least 3 substitions are made, do some later

    ingr_seen = []

    for ingr in ingredients:

        # replace meats with healthier meats
        isMeat = checkMembership(ingr.main_comp, 'meat')
        isFish = checkMembership(ingr.main_comp, 'fish')
        # 'turkey' in ingr.main_comp
        # 'chicken' in ingr.main_comp
        # print(ingr.main_comp + ": " + str(isMeat))
        # print(ingr.main_comp + ": " + str(isFish))
        if isMeat > 0.8 or isFish > 0.8:
            new_instructions.append("Instead of " + ingr.main_comp + ", use either tofu or eggplant. We will default to tofu for fish and eggplant for other meats, but either could be used. You can use the same amount (" + getQuantityString(ingr, 1) + "). These usually take around 20-35 minutes to cook depending on the method, but are likely fine with the original time specified by the recipe.")
            num_subs += 1
            if isMeat > isFish:
                ingr.main_comp = "eggplant"
            else:
                ingr.main_comp = "tofu"
            continue
        # print(getQuantityString(ingr, 0.5))
        
        # go through healthy ingr subs and do those substitutions
        for hk in veg_ingr_subs.keys():
            if spv2.isDerivativeOfSecond(hk, ingr.main_comp)[1] > 0:
                # different things to say based on if there are mod, sub, or multi tagged suggestions
                # first, sort the diff tag suggestions into different lists
                ingr_seen.append(ingr.main_comp)
                tags = veg_ingr_subs[hk]
                modss = []
                subss = []
                multis = []
                for tt in tags:
                    if tt[1] == "mod":
                        modss.append(tt[0])
                    elif tt[1] == "sub":
                        subss.append(tt[0])
                    elif tt[1] == "multi":
                        multis.append(tt[0])

                ingr_res = ""
                for mm in modss:
                    ingr_res += "You can use a " + mm + " version of " + ingr.main_comp + " instead to make this more vegetarian-friendly. "
                for su in subss:
                    ingr_res += "You could substitute " + ingr.main_comp + " with " + su + ". "
                for mu in multis:
                    ingr_res += "If you don't want to replace " + ingr.main_comp + ", you could use " + str(mu) + " of the original amount: " + getQuantityString(ingr, mu) + ". "

                new_instructions.append(ingr_res)
                num_subs += 1
                break

    if num_subs < min_transforms:
        for ingr in ingredients:
            # skip past ingredients we already have a transformation for
            if ingr.main_comp in ingr_seen:
                continue
            # find a substitution for an untransformed ingredient
            new_sub = getSubList(ingr.main_comp, ["vegetarian"])[0]
            new_instructions.append("You could substitute " + ingr.main_comp + " with " + new_sub.lower() + ".")
            num_subs += 1
            if num_subs >= min_transforms:
                break 

    for abc in new_instructions:
        print(abc)
    return new_instructions

makeVeg(ingredients, main_action, action_priority_list)

You could substitute cream cheese with cashew cheese. 
You could substitute mascarpone cheese with cashew cheese. 
You could substitute ladyfingers with 1 – biscotti.


['You could substitute cream cheese with cashew cheese. ',
 'You could substitute mascarpone cheese with cashew cheese. ',
 'You could substitute ladyfingers with 1 – biscotti.']

In [20]:
# MAKE NON-VEG

# try to determine main ingredient through matching ingredients to the recipe name. look for common meat substitutes like tofu, eggplant, mushroom, etc. before trying that, as this is more likely to be a good transformation
# once the main ingredient is identified, figure out what meat it should be substituted for. probably default to chicken
# make misc other changes like adding citrus or changing any substitutes to the original thing they are replacing
def makeNonVeg(ingredients: list, main_action: str, action_priority_list: list, recipe_name: str):
    new_instructions = []

    min_transforms = 2

    # ingredient word as key to an array of options. this array is made of tuples for suggestions. the 1st (not 0th) element specifies whether it is a quantity mod ('multi'), a substitution ('sub'), or a modded ver ('mod')
    veg_ingr_subs = {'eggplant':[('beef', 'sub')], 'tofu':[('chicken', 'sub')], 'mushrooms':[('pork', 'sub')]}

    # basically, just analyze the steps, ingredients, and main cooking action to see what can be made healthier through IF statements, and maybe webscraped substitutions
    

    # now, go through the ingredients
    num_subs = 0 # counter for the number of substitutions made. if not at least 3 substitions are made, do some later

    ingr_seen = []

    for ingr in ingredients: 
        # go through healthy ingr subs and do those substitutions
        for hk in veg_ingr_subs.keys():
            if spv2.isDerivativeOfSecond(hk, ingr.main_comp)[1] > 0:
                # different things to say based on if there are mod, sub, or multi tagged suggestions
                # first, sort the diff tag suggestions into different lists
                ingr_seen.append(ingr.main_comp)
                tags = veg_ingr_subs[hk]
                modss = []
                subss = []
                multis = []
                for tt in tags:
                    if tt[1] == "mod":
                        modss.append(tt[0])
                    elif tt[1] == "sub":
                        subss.append(tt[0])
                    elif tt[1] == "multi":
                        multis.append(tt[0])

                ingr_res = ""
                for mm in modss:
                    ingr_res += "You can use a " + mm + " version of " + ingr.main_comp + " instead to make this more non-veg. "
                for su in subss:
                    ingr_res += "You could substitute " + ingr.main_comp + " with " + su + ". "
                for mu in multis:
                    ingr_res += "If you don't want to replace " + ingr.main_comp + ", you could use " + str(mu) + " of the original amount: " + getQuantityString(ingr, mu) + ". "

                new_instructions.append(ingr_res)
                num_subs += 1
                break

    if num_subs < min_transforms:
        new_instructions.append("Sprinkle bacon bits on top of the finished product.")
        num_subs += 1
        for ingr in ingredients:
            
            if num_subs >= min_transforms:
                break 
            # skip past ingredients we already have a transformation for
            if ingr.main_comp in ingr_seen:
                continue
            # find a substitution for an untransformed ingredient
            new_sub = getSubList(ingr.main_comp, ["non-veg"])[0]
            new_instructions.append("You could substitute " + ingr.main_comp + " with " + new_sub.lower() + ".")
            num_subs += 1
            if num_subs >= min_transforms:
                break 

    for abc in new_instructions:
        print(abc)
    return new_instructions

makeNonVeg(ingredients, main_action, action_priority_list, "tiramisu cheesecake")

Sprinkle bacon bits on top of the finished product.
You could substitute ladyfingers with .


['Sprinkle bacon bits on top of the finished product.',
 'You could substitute ladyfingers with .']

In [21]:
### MAKE INDIAN

# butter -> ghee
# cinnamon, ginger -> cardamom
# citrus -> coriander
# pepper -> tumeric
# add cumin (1/2 tsp)
# sugar -> jaggery powder
# rice -> basmati
# jalapeno -> chilli
# flour -> rice flour
# milk -> coconut milk
# fruit -> tamarind
# beans -> chickpeas
# peas -> chickpeas
def makeInd(ingredients: list, main_action: str, action_priority_list: list):
    new_instructions = []

    min_transforms = 3

    # ingredient word as key to an array of options. this array is made of tuples for suggestions. the 1st (not 0th) element specifies whether it is a quantity mod ('multi'), a substitution ('sub'), or a modded ver ('mod')
    ind_ingr_subs = {'butter':[('ghee', 'sub')], 'cinnamon':[('cardamom', 'sub')], 'milk':[('coconut milk', 'sub')], 'ginger':[('cardamom', 'sub')], 'citrus':[('coriander', 'sub')], 'pepper':[('tumeric', 'sub')], 'sugar':[('jaggery powder', 'sub')], 'rice':[('basmati', 'mod')], 'jalapeno':[('chilli pepper', 'sub')],'chilli':[(1.25, 'multi')], 'flour':[('rice', 'mod')], 'beans':[('chickpeas', 'sub')], 'peas':[('chickpeas', 'sub')]}

    # basically, just analyze the steps, ingredients, and main cooking action to see what can be made healthier through IF statements, and maybe webscraped substitutions


    # now, go through the ingredients
    num_subs = 0 # counter for the number of substitutions made. if not at least 3 substitions are made, do some later

    ingr_seen = []

    for ingr in ingredients:

        # replace meats with healthier meats
        isFruit = checkMembership(ingr.main_comp, 'fruit')
        # 'turkey' in ingr.main_comp
        # 'chicken' in ingr.main_comp
        # print(ingr.main_comp + ": " + str(isMeat))
        # print(ingr.main_comp + ": " + str(isFish))
        if isFruit > 0.8:
            new_instructions.append("Instead of " + ingr.main_comp + ", use tamarind. You can use the same amount (" + getQuantityString(ingr, 1) + ").")
            num_subs += 1
            ingr.main_comp = "tamarind"
            
            continue
        # print(getQuantityString(ingr, 0.5))
        
        # go through healthy ingr subs and do those substitutions
        for hk in ind_ingr_subs.keys():
            if spv2.isDerivativeOfSecond(hk, ingr.main_comp)[1] > 0:
                # different things to say based on if there are mod, sub, or multi tagged suggestions
                # first, sort the diff tag suggestions into different lists
                ingr_seen.append(ingr.main_comp)
                tags = ind_ingr_subs[hk]
                modss = []
                subss = []
                multis = []
                for tt in tags:
                    if tt[1] == "mod":
                        modss.append(tt[0])
                    elif tt[1] == "sub":
                        subss.append(tt[0])
                    elif tt[1] == "multi":
                        multis.append(tt[0])

                ingr_res = ""
                for mm in modss:
                    ingr_res += "You can use a " + mm + " version of " + ingr.main_comp + " instead to make this more Indian. "
                for su in subss:
                    ingr_res += "You could substitute " + ingr.main_comp + " with " + su + ". "
                for mu in multis:
                    ingr_res += "If you don't want to replace " + ingr.main_comp + ", you could use " + str(mu) + " of the original amount: " + getQuantityString(ingr, mu) + ". "

                new_instructions.append(ingr_res)
                num_subs += 1
                break

    if num_subs < min_transforms:
        for ingr in ingredients:
            # skip past ingredients we already have a transformation for
            if ingr.main_comp in ingr_seen:
                continue
            # find a substitution for an untransformed ingredient
            new_sub = getSubList(ingr.main_comp, ["indian"])[0]
            new_instructions.append("You could substitute " + ingr.main_comp + " with " + new_sub.lower() + ".")
            num_subs += 1
            if num_subs >= min_transforms:
                break 

    for abc in new_instructions:
        print(abc)
    return new_instructions

makeInd(ingredients, main_action, action_priority_list)

You could substitute butter with ghee. 
You could substitute sugar with jaggery powder. 
You can use a rice version of purpose flour instead to make this more Indian. 


['You could substitute butter with ghee. ',
 'You could substitute sugar with jaggery powder. ',
 'You can use a rice version of purpose flour instead to make this more Indian. ']

In [22]:
### CHANGE PORTIONS
def getNewPortions(ingredients:list, multiplier: float):
    new_ingr = []
    for ingr in ingredients:
        newQ = None
        newSQ = None
        if ingr.quantity != None:
            newQ = str(floatFromFractionString(ingr.quantity) * multiplier)
        if ingr.quantity == None and ingr.sub_quantity != None:
            newSQ = str(floatFromFractionString(ingr.sub_quantity) * multiplier)
        elif ingr.sub_quantity != None:
            newSQ = ingr.sub_quantity
        new_ingr.append(Ingredient(ingr.og_text, ingr.main_comp, newQ, ingr.measurement, newSQ, ingr.sub_measurement, ingr.descriptors))

    for ni in new_ingr:
        print(ni)

    return new_ingr

getNewPortions(ingredients, 2)

Ingredient: ladyfingers
	Quantity: N/A
	Measurement: 24.0 ounce package
	Descriptors: 
		N/A
	Original text: 1 (12 ounce) package ladyfingers
Ingredient: butter
	Quantity: 0.5
	Measurement: cup
	Descriptors: 
		unsalted
		melted
	Original text: 0.25 cup unsalted butter, melted
Ingredient: liqueur
	Quantity: 4.0
	Measurement: tablespoons
	Descriptors: 
		flavored
	Original text: 2 tablespoons coffee-flavored liqueur
Ingredient: cream cheese
	Quantity: 6.0
	Measurement: 8 ounce packages
	Descriptors: 
		softened
	Original text: 3 (8 ounce) packages cream cheese, softened
Ingredient: mascarpone cheese
	Quantity: 2.0
	Measurement: 8 ounce container
	Descriptors: 
		softened
	Original text: 1 (8 ounce) container mascarpone cheese, softened
Ingredient: sugar
	Quantity: 2.0
	Measurement: cup
	Descriptors: 
		white
	Original text: 1 cup white sugar
Ingredient: liqueur
	Quantity: 4.0
	Measurement: tablespoons
	Descriptors: 
		flavored
	Original text: 2 tablespoons coffee-flavored liqueur
Ingred

[<__main__.Ingredient at 0x1d949ddf070>,
 <__main__.Ingredient at 0x1d949ddea40>,
 <__main__.Ingredient at 0x1d949ddead0>,
 <__main__.Ingredient at 0x1d949ddea10>,
 <__main__.Ingredient at 0x1d949ddeb90>,
 <__main__.Ingredient at 0x1d949ddeb60>,
 <__main__.Ingredient at 0x1d949ddebf0>,
 <__main__.Ingredient at 0x1d949ddd180>,
 <__main__.Ingredient at 0x1d949ddd1b0>,
 <__main__.Ingredient at 0x1d949ddd1e0>,
 <__main__.Ingredient at 0x1d949ddd210>]

In [None]:
### LACTOSE FREE


In [12]:
# SUBSTITUTION WEBSCRAPER
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import requests

def get_soup(url:str):
    response = requests.get(url)
    return BeautifulSoup(response.text, "html.parser")

chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

def get_substitute(query:str):
    query = re.sub("[.,;!]", "", query)
    # chrome_options = Options()
    # chrome_options.add_argument("--headless")
    # driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
    url = "http://www.google.com/search?q=" + query + "&start=" + str((0))
    driver.get(url)
    query_html = BeautifulSoup(driver.page_source, 'html.parser')
    results = query_html.find('div', id="search")
    results = results.find('div', class_="v7W49e")
    results = results.find('div', class_="MjjYud")
    first_attempt = results.find_all('li', class_="TrT0Xe")
    substitutes = []
    if not first_attempt:
        second_attempt = results.find('span', class_="ILfuVd")
        if not second_attempt:
            third_attempt = results.find_all('span', {"class": None, "id": None, "data-ved": None})
            substitutes.append(third_attempt[-1].text)
        else:
            substitutes.append(second_attempt.text)
    else:
        for result in first_attempt:
            result = result.text.split('.')[0]
            substitutes.append(result)
    
    if substitutes:
        # print("No worries. Here are the substitutes. \n")
        # counter = 1
        # for s in substitutes:
            # print(f"{counter}. {s}")
            # counter += 1
            # print()
        return substitutes
    else:
        print('error??')

[WDM] - Downloading: 100%|██████████| 6.78M/6.78M [00:01<00:00, 5.24MB/s]


In [13]:
# checks the zero-shot rating between a potential member and a categorical grouping (ex: "grape", "fruit" should yield a high value)
def checkMembership(test:str, category:str):
    return pipe2(test, category)['scores'][0]

# returns a list of webscraped substitutions for input ingredient, with the parameter keywords added
def getSubList(target:str, parameters:list):
    res_string = "what can I substitute for " + target
    for pp in parameters:
        res_string += pp + " "
    return get_substitute(res_string)

# returns a logical string for the quantity of the ingredient one needs
def getQuantityString(ingr:Ingredient, multiplier: float):
    res = ""
    if ingr.quantity != None:
        if multiplier != 1:
            res += str(floatFromFractionString(ingr.quantity) * multiplier)
        else:
            res += ingr.quantity
    if (ingr.sub_quantity != None or ingr.sub_measurement != None) and ingr.quantity != None:
        res += " ("
    if ingr.sub_quantity != None:
        if ingr.quantity == None and multiplier != 1:
            res += str(floatFromFractionString(ingr.sub_quantity) * multiplier) + " "
        else:
            res += ingr.sub_quantity + " "
    if ingr.sub_measurement != None:
        res += ingr.sub_measurement
    if (ingr.sub_quantity != None or ingr.sub_measurement != None) and ingr.quantity != None:
        res = res.rstrip()
        res += ")"
    if ingr.measurement != None:
        res += " " + ingr.measurement

    return res.lstrip().rstrip()

subs = getSubList("grapes", ["healthy"])
print(subs)
# print(depgram(subs[0]).sentences[0].sentiment)
print(checkMembership("beef", "meat"))



# for each ingredient in a recipe, tag whether they are healthy or veg
ingredient_tags = []
for ingr in ingredients:
    print(ingr.main_comp)
    ingredient_tags.append((ingr.main_comp, checkMembership(ingr.main_comp, 'healthy'), checkMembership(ingr.main_comp, 'unhealthy')))

print(ingredient_tags)


['Cantaloupe', 'Raspberries', 'Strawberries', 'Lemons', 'Watermelons', 'Avocado']
0.9996551871299744
ladyfingers
butter
liqueur
cream cheese
mascarpone cheese
sugar
liqueur
purpose flour
eggs
cream
semisweet chocolate
[('ladyfingers', 0.28030824661254883, 0.14260831475257874), ('butter', 0.20062124729156494, 0.01401258260011673), ('liqueur', 0.21149224042892456, 0.1882508397102356), ('cream cheese', 0.42560112476348877, 0.07041860371828079), ('mascarpone cheese', 0.1270536631345749, 0.014007995836436749), ('sugar', 0.0005132924998179078, 0.9010599255561829), ('liqueur', 0.21149224042892456, 0.1882508397102356), ('purpose flour', 0.614686906337738, 0.01286742277443409), ('eggs', 0.3757863938808441, 0.08893810212612152), ('cream', 0.4646458327770233, 0.13785110414028168), ('semisweet chocolate', 0.16506345570087433, 0.1975330114364624)]


In [126]:
abc_txt = 'grate semisweet chocolate over the top'
abc_doc = depgram(abc_txt)
consti = abc_doc.sentences[0].constituency
deps = getDependency(abc_doc.sentences[0].dependencies)[0]
print(consti)
for dd in deps:
    print(deps[dd])

root_word = deps[list(deps[0].deps.keys())[0]]
if "VB" in root_word.typ:
    text = abc_txt.replace(root_word.text, "")
    print(text)

print(pipe2("turkey", "meat substitute"))
print(pipe2("eggplant", "meat substitute"))

(ROOT (NP (NP (NML (VB grate) (NN semisweet)) (NN chocolate)) (PP (IN over) (NP (DT the) (NN top)))))
ID: 0
	Head ID: -1
	Text: ROOT
	Type: N/A
	Dependent Words: 
		Dep. ID: 1
			Dep. Text: grate
			Relation Type: root
ID: 1
	Head ID: 0
	Text: grate
	Type: VB
	Dependent Words: 
		Dep. ID: 3
			Dep. Text: chocolate
			Relation Type: obj
		Dep. ID: 6
			Dep. Text: top
			Relation Type: obl
ID: 3
	Head ID: 1
	Text: chocolate
	Type: NN
	Dependent Words: 
		Dep. ID: 2
			Dep. Text: semisweet
			Relation Type: compound
ID: 2
	Head ID: 3
	Text: semisweet
	Type: NN
	Dependent Words: 
ID: 6
	Head ID: 1
	Text: top
	Type: NN
	Dependent Words: 
		Dep. ID: 4
			Dep. Text: over
			Relation Type: case
		Dep. ID: 5
			Dep. Text: the
			Relation Type: det
ID: 4
	Head ID: 6
	Text: over
	Type: IN
	Dependent Words: 
ID: 5
	Head ID: 6
	Text: the
	Type: DT
	Dependent Words: 
 semisweet chocolate over the top
{'sequence': 'turkey', 'labels': ['meat substitute'], 'scores': [0.5821810960769653]}
{'sequence': '