In [43]:
import stanza as st
import numpy as np
import re as re
from typing import Optional
import transformers as tra

st.download('en')
depgram = st.Pipeline('en')#, processors='tokenize,mwt,pos,lemma,depparse,ner')
pipe = tra.pipeline(model="facebook/bart-large-mnli")

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json: 193kB [00:00, 64.2MB/s]                    
2023-03-06 12:09:14 INFO: Downloading default packages for language: en (English) ...
2023-03-06 12:09:15 INFO: File exists: C:\Users\Typic\stanza_resources\en\default.zip
2023-03-06 12:09:19 INFO: Finished downloading models and saved to C:\Users\Typic\stanza_resources.
2023-03-06 12:09:19 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES
Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json: 193kB [00:00, 32.2MB/s]                    
2023-03-06 12:09:21 INFO: Loading these models for language: en (English):
| Processor    | Package   |
----------------------------
| tokenize     | combined  |
| pos          | combined  |
| lemma        | combined  |
| 

In [36]:
class depNode():
    def __init__(self, id:int, head_id:int, text:str, typ: str, deps:dict):
        self.id = id
        self.head_id = head_id
        self.text = text
        self.typ = typ
        self.deps = deps
        if deps == None:
            self.deps = {}

    def addDependent(self, dep_id:int, dep_text:str, rel_type:str):
        if dep_id in self.deps.keys():
            return
        self.deps[dep_id] = (dep_id, dep_text, rel_type)

    def __str__(self):
        outputStr = ""
        outputStr += "ID: " + str(self.id)
        outputStr += "\n\tHead ID: " + str(self.head_id)
        outputStr += "\n\tText: " + self.text
        outputStr += "\n\tType: " + self.typ
        outputStr += "\n\tDependent Words: "
        for x in self.deps.keys():
            outputStr += "\n\t\tDep. ID: " + str(self.deps[x][0])
            outputStr += "\n\t\t\tDep. Text: " + self.deps[x][1]
            outputStr += "\n\t\t\tRelation Type: " + self.deps[x][2]
        return outputStr

def formatConstituency(tree, num_tabs):
    newline = "\n" + "\t"*num_tabs
    
    label = tree.label
    children = tree.children

    res = newline + "(" + label

    if label == str(tree):
        res = " " + label
    else:
        for branch in children:
            res += formatConstituency(branch, num_tabs+1)

        res += newline + ")"

    return res

def getDependency(input_dep:list):
    text_to_ids = {} # ex: For the sentence, "cakes are cakes": text_to_ids["cakes"] == [1,3] 
    dependency_dict = {}

    # add a special node for ROOT
    dependency_dict[0] = depNode(0, -1, "ROOT", "N/A", {})

    for entry in input_dep:

        id1 = entry[0].id
        txt1 = entry[0].text.lower()
        id2 = entry[2].id
        txt2 = entry[2].text.lower()
        rel_type = entry[1]

        # if either word id isn't in the dependency dictionary, add it
        if not id1 in dependency_dict.keys():
            dependency_dict[id1] = depNode(id1, entry[0].head, txt1, entry[0].xpos, {})
            if not txt1 in text_to_ids.keys():
                text_to_ids[txt1] = [id1]
            elif txt1 in text_to_ids.keys() and not id1 in text_to_ids[txt1]:
                text_to_ids[txt1] = text_to_ids[txt1] + [id1]

        if not id2 in dependency_dict.keys():
            dependency_dict[id2] = depNode(id2, entry[2].head, txt2, entry[2].xpos, {})
            if not txt2 in text_to_ids.keys():
                text_to_ids[txt2] = [id2]
            elif txt2 in text_to_ids.keys() and not id2 in text_to_ids[txt2]:
                text_to_ids[txt2] = text_to_ids[txt2] + [id2]

        # add a dependency into the head word
        dependency_dict[id1].addDependent(id2, txt2, rel_type)

    return (dependency_dict, text_to_ids)
    
print(formatConstituency(test_doc.sentences[0].constituency, 0))
depe = getDependency(test_doc.sentences[0].dependencies)
for dd in depe[0]:
    print(depe[0][dd])



(ROOT
	(S
		(VP
			(VB Let
			)
			(S
				(NP
					(PRP me
					)
				)
				(VP
					(VB know
					)
					(NP
						(NP
							(DT the
							)
							(NN temperature
							)
						)
						(SBAR
							(S
								(NP
									(PRP I
									)
								)
								(VP
									(VBP need
									)
								)
							)
						)
					)
				)
			)
		)
	)
)
ID: 0
	Head ID: -1
	Text: ROOT
	Type: N/A
	Dependent Words: 
		Dep. ID: 1
			Dep. Text: let
			Relation Type: root
ID: 1
	Head ID: 0
	Text: let
	Type: VB
	Dependent Words: 
		Dep. ID: 2
			Dep. Text: me
			Relation Type: obj
		Dep. ID: 3
			Dep. Text: know
			Relation Type: xcomp
ID: 2
	Head ID: 1
	Text: me
	Type: PRP
	Dependent Words: 
ID: 3
	Head ID: 1
	Text: know
	Type: VB
	Dependent Words: 
		Dep. ID: 5
			Dep. Text: temperature
			Relation Type: obj
ID: 5
	Head ID: 3
	Text: temperature
	Type: NN
	Dependent Words: 
		Dep. ID: 4
			Dep. Text: the
			Relation Type: det
		Dep. ID: 7
			Dep. Text: need
			Relation Type: acl:relcl
ID: 4
	Hea

In [101]:
# makes sentence parsing better through removing problematic verbs that make sentences more complex.
# this should only be done if we know it isn't about the recipe or next/previous/current step!!!! it removes some stuff important to those
def removeProblemWords(text:str):
    final = text

    let_result = re.search("(^|\s)let\W(s\W)*",text.lower())
    if let_result != None:
        final = (text[:let_result.span()[0]] + " " +  text[let_result.span()[1]:]).lstrip().rstrip()

    can_result = re.search("(^|\s)can\W",final.lower())
    if can_result != None:
        final = (final[:can_result.span()[0]] + " " +  final[can_result.span()[1]:]).lstrip().rstrip()
    
    know_result = re.search("(^|\s)know\W",final.lower())
    if know_result != None:
        final = (final[:know_result.span()[0]] + " " +  final[know_result.span()[1]:]).lstrip().rstrip()

    me_result = re.search("(^|\s)me\W",final.lower())
    if me_result != None:
        final = (final[:me_result.span()[0]] + " " +  final[me_result.span()[1]:]).lstrip().rstrip()
    
    you_result = re.search("(^|\s)you\W",final.lower())
    if you_result != None:
        final = (final[:you_result.span()[0]] + " " +  final[you_result.span()[1]:]).lstrip().rstrip()
    
    we_result = re.search("(^|\s)we\W",final.lower())
    if we_result != None:
        final = (final[:we_result.span()[0]] + " " +  final[we_result.span()[1]:]).lstrip().rstrip()

    love_result = re.search("(^|\s)love(s|d)*\W",final.lower()) # evil programmer removes love
    if love_result != None:
        final = (final[:love_result.span()[0]] + " " +  final[love_result.span()[1]:]).lstrip().rstrip()

    like_result = re.search("(^|\s)like(s|d)*\W",final.lower())
    if like_result != None:
        final = (final[:like_result.span()[0]] + " " +  final[like_result.span()[1]:]).lstrip().rstrip()

    step_result = re.search("(^|\s)steps*\W",final.lower())
    if step_result != None:
        final = (final[:step_result.span()[0]] + " " +  final[step_result.span()[1]:]).lstrip().rstrip()

    return final

# returns a list with each entry corresponding to a dependent word on the head word provided
# (word text, relation to head, word type)
# ex: ("it", "obj", "PRP")
def getDepInfo(input_deps:dict, head:depNode):
    res = []
    for dd in head.deps:
        text = head.deps[dd][1]
        rel_type = head.deps[dd][2]
        word_type = input_deps[head.deps[dd][0]].typ
        res.append((text, rel_type, word_type))
    return res

def isVague(text:str, typ:str):
    text = text.lower()
    if "PRP" in typ or "DT" in typ:
        return True
    if "thing" in text:
        return True
    if text == "stuff":
        return True
    if "WP" in typ or "WRB" in typ:
        return True
    if "IN" in typ:
        return True
    if "JJ" in typ:
        return True
    
    return False

def determineVaguenessFromDep(input_deps:dict):
    head = getHeadWord(input_deps)
    head_type = head.typ

    print(head)
    dep_list = getDepInfo(input_deps, head)

    # condition based on head type
    if "VB" in head_type:
        # if it is a verb, then we check the object and nsubj, then maybe obl in that order
        for dl in dep_list:
            if "obj" in dl[1]:
                # ok so if the object is vague, then the input could be vague but let's check nsubj first
                if isVague(dl[0], dl[2]):
                    break
                # if the object isn't vague, then the input is absolutely being specific. The "these" case is handled outside of this function
                else: 
                    return False
        for dl in dep_list:
            if "nsubj" in dl[1]:
                # if the nsubj is vague, 
                if isVague(dl[0], dl[2]):
                    break
                # if the object isn't vague, then the input is absolutely being specific. The "these" case is handled outside of this function
                else: 
                    return False
    elif "NN" in head_type:
        pass
    elif head_type == "WRB":
        pass
    else:
        pass
    return head_type

# wrapper for determineVaguenessFromDep, with a few easy exit cases
# NOTE that vagueness doesn't really check how vague something is, but is more of a distinguisher between situations we would
# call a google search or where we would check recipe steps.
# so for example, "What is it?" would be an actually vague question that this says is vague.
# However, "How do I cut these strawberries?" isn't actually vague by definition since it is being very specific, but it would fall under the same
# type of procedure we would use for a vague step, where we check the instructions for info.
# Something like "How do I cut a strawberry?" is kind of middle-of-the-road vagueness compared to the previous examples, but we would say it is being
# specific since they aren't explicitly referring to the instructions (there may be some implicit intention there but ambiguity is hard).
def determineVagueness(text:str):
    # if "these" is in the text, then it must be referring to something specifically in relation to the recipe
    # this is also an important exception to my other logic, bc "strawberries" is Q4 BUT "these strawberries" is Q3
    if "these" in text.lower():
        return True
    
    test_doc = depgram(removeProblemWords(text))
    return determineVaguenessFromDep(getDependency(test_doc.sentences[0].dependencies))

# returns the id, text, and type of the head word of the given dependency dict (in depNode form)
def getHeadWord(input_deps:dict):
    return input_deps[list(input_deps[0].deps.keys())[0]]

In [79]:
test_phrase = "How hot should it be?"
test_doc = depgram(removeProblemWords(test_phrase))

print(test_doc.sentences[0].constituency, 0)
print(formatConstituency(test_doc.sentences[0].constituency, 0))
depe = getDependency(test_doc.sentences[0].dependencies)
for dd in depe[0]:
    print(depe[0][dd])

# removeProblemWords(test_phrase)
getHeadWord(depe[0])
determineVaguenessFromDep(depe[0])

(ROOT (SBARQ (WHADVP (WRB How) (JJ hot)) (SQ (MD should) (NP (PRP it)) (VP (VB be))) (. ?))) 0

(ROOT
	(SBARQ
		(WHADVP
			(WRB How
			)
			(JJ hot
			)
		)
		(SQ
			(MD should
			)
			(NP
				(PRP it
				)
			)
			(VP
				(VB be
				)
			)
		)
		(. ?
		)
	)
)
ID: 0
	Head ID: -1
	Text: ROOT
	Type: N/A
	Dependent Words: 
		Dep. ID: 5
			Dep. Text: be
			Relation Type: root
ID: 2
	Head ID: 5
	Text: hot
	Type: JJ
	Dependent Words: 
		Dep. ID: 1
			Dep. Text: how
			Relation Type: advmod
ID: 1
	Head ID: 2
	Text: how
	Type: WRB
	Dependent Words: 
ID: 5
	Head ID: 0
	Text: be
	Type: VB
	Dependent Words: 
		Dep. ID: 2
			Dep. Text: hot
			Relation Type: xcomp
		Dep. ID: 3
			Dep. Text: should
			Relation Type: aux
		Dep. ID: 4
			Dep. Text: it
			Relation Type: nsubj
		Dep. ID: 6
			Dep. Text: ?
			Relation Type: punct
ID: 3
	Head ID: 5
	Text: should
	Type: MD
	Dependent Words: 
ID: 4
	Head ID: 5
	Text: it
	Type: PRP
	Dependent Words: 
ID: 6
	Head ID: 5
	Text: ?
	Type: .
	Dependent Words: 
VB


In [102]:
master_prompt_list = [    
    "Can you share the list of ingredients?",    
    "What are the ingredients used?",    
    "Could you please provide the ingredients?",    
    "May I know what the ingredients are?",    
    "What's in this dish?",    
    "Ingredients, please?",    
    "What goes into making this?",   
    "Can you show me the ingredient list?",    
    "Let me see the list of ingredients.",    
    "Can we go to the ingredient list?",
    "I'd love to know what ingredients are in this recipe.",
    "Can you tell me the components of this dish?",
    "What are the constituent parts of this meal?",
    "Please share the list of components for this recipe.",
    "Could you provide me with a list of what's in this?",
    "May I have a rundown of the ingredients?",
    "What are the contents of this dish?",
    "I'm curious about what goes into making this, could you tell me?",
    "Do you have a list of what's in this recipe?",
    "Can you let me know what ingredients are used in this dish?",
    "Proceed to the following step.",
    "Let's move on to the next step.",    
    "Next step Please",
    "Can we move forward with the next step?",    
    "Advance to the next step.",    
    "Let's continue with the next step.",    
    "Can we continue reading the recipe steps?",    
    "Move on to the next step.",    
    "Tell me the following step.",    
    "Continue reading the recipe steps.",
    "Next step.",    
    "Proceed to the subsequent step.",    
    "Let's advance to the following step.",    
    "What's the subsequent step in the recipe?",   
    "Can you guide me to the next step?",    
    "What comes after this step?",    
    "Let's move forward with the following step.",    
    "What's the next instruction in the recipe?",    
    "What's the next step on our recipe?",    
    "Can we move on to the next recipe part?",
    "Return to the previous step",    
    "Step back to the last instruction",    
    "Go back one step",    
    "What was the last step again?",    
    "Revisit the previous step",
    "Repeat the last step",    
    "Move back to the last step",    
    "Can we go back one step?",    
    "Let's backtrack to the previous step",    
    "Reverse to the previous step",    
    "Retreat to the last instruction",
    "Let's step back to the previous instruction",    
    "Can we move back to the last step?",    
    "Let's go back one step",    
    "Let's revisit the previous instruction",    
    "Take a step back to the last step",    
    "Can we backtrack to the previous instruction?",    
    "Return to the preceding step",    
    "Let's reverse to the last instruction",       
    "Can we step back one instruction?",
    "Take me to the last step",
    "Repeat the current step please.",
    "Repeat the recipe step",
    "Could you say the current step again?",  
    "Please repeat the current instruction.",    
    "Can you say that step again?",    
    "Repeat the current recipe step.",    
    "Say the current step once more please.",    
    "Could you repeat the current instruction?",       
    "Let's repeat the current step.",    
    "Can you restate the current step?",    
    "Could you go over the current step again?",    
    "Repeat the current step one more time.",          
    "Say the current step one more time, please.",          
    "Could you go over the current step once more?",    
    "Can you restate the current instruction?",    
    "Please repeat the current step.",    
    "Can you repeat the current step, please?",    
    "Say the current instruction again, please.",
    "Repeat the step.",
    "Say the last recipe step again.",
    "How many do I need?",
    "How much do I need?",
    "What amount of this do I need?",
    "Around how much of this ingredient do I need for this step?",
    "Do I use a lot of this ingredient?",
    "Do I use a little of this ingredient?",
    "How many cups do I need?",
    "What should I fill my measuring cup up to?",
    "How much should I use?",
    "How hot should it be?",
    "How many degrees should it be set to?",
    "How high do I set the oven?",
    "What temperature should it be at?",
    "What is the best temperature for this step?",
    "How cold should it be?",
    "How many degrees should I set it to?",
    "How warm should I make it?",
    "Should it be cool?",
    "How long should I do this?",
    "How much time should this take?",
    "How long do I wait?",
    "How long will this take?",
    "How many minutes do I do this?",
    "How much time until I do this?",
    "For how long?",
    "What should I set my timer to?",
    "When will this be done?",
    "What should I check to see if it is done?",
    "What should it look like when I'm done?",
    "When is it done?",
    "When should I stop?",
    "How long should I microwave it for?",
    "How should I cut strawberries?",
    "How should I cut them?",
    "How should I cut these?",
    "How should I cut these strawberries?"]

vote_counter = {}
for prompt in master_prompt_list:
    mod_prompt = removeProblemWords(prompt)
    test_doc = depgram(mod_prompt)
    depe = getDependency(test_doc.sentences[0].dependencies)
    v = determineVaguenessFromDep(depe[0])
    print(mod_prompt)
    if not v in vote_counter.keys():
        vote_counter[v] = 1
    else:
        vote_counter[v] += 1

# now convert to list
votes_list = []
for k in vote_counter.keys():
    votes_list.append((k, vote_counter[k]))

votes_list.sort(key=lambda x: x[1], reverse=True)
print(votes_list)

ID: 1
	Head ID: 0
	Text: share
	Type: VB
	Dependent Words: 
		Dep. ID: 3
			Dep. Text: list
			Relation Type: obj
		Dep. ID: 6
			Dep. Text: ?
			Relation Type: punct
[('list', 'obj', 'NN'), ('?', 'punct', '.')]
share the list of ingredients?
ID: 1
	Head ID: 0
	Text: what
	Type: WP
	Dependent Words: 
		Dep. ID: 2
			Dep. Text: are
			Relation Type: cop
		Dep. ID: 4
			Dep. Text: ingredients
			Relation Type: nsubj
		Dep. ID: 6
			Dep. Text: ?
			Relation Type: punct
[('are', 'cop', 'VBP'), ('ingredients', 'nsubj', 'NNS'), ('?', 'punct', '.')]
What are the ingredients used?
ID: 3
	Head ID: 0
	Text: provide
	Type: VB
	Dependent Words: 
		Dep. ID: 1
			Dep. Text: could
			Relation Type: aux
		Dep. ID: 2
			Dep. Text: please
			Relation Type: discourse
		Dep. ID: 5
			Dep. Text: ingredients
			Relation Type: obj
		Dep. ID: 6
			Dep. Text: ?
			Relation Type: punct
[('could', 'aux', 'MD'), ('please', 'discourse', 'UH'), ('ingredients', 'obj', 'NNS'), ('?', 'punct', '.')]
Could please provid