# CFG as Rule-Based method

In [None]:
from nltk import CFG
import pandas as pd

# Define a CFG for the code syntax
code_grammar = CFG.fromstring("""
  S -> "robot" "." Action "(" Arguments ")"
  Action -> "pour" | "move" | "pick_up" | "open" | "close" | "place" | "turn" | "push" | "adjust" | "fill" | "take_out" | "stack" | "give" | "replace"
  Arguments -> Arg | Arg "," Arg | Arg "," NamedArg
  Arg -> Object | Location
  NamedArg -> Name "=" Value
  Object -> "coffee" | "cube" | "pen" | "door" | "window" | "box" | "tray" | "book" | "light" | "tv" | "ball" | "apple" | "knob" | "floor" | "trash" | "cup" | "towel" | "page" | "table" | "lid" | "shoes" | "phone" | "remote" | "chair" | "temperature" | "paper" | "plant" | "keys" | "cat" | "lamp" | "whiteboard" | "cart" | "chairs" | "car" | "mirror" | "bathtub" | "lightbulb" | "pillows" | "laundry"
  Location -> "cup" | "desk" | "refrigerator" | "garden" | "shelf" | "windowsill" | "closet" | "sink" | "corner" | "sofa"
  Name -> "direction" | "location" | "temperature" | "shape"
  Value -> "'left'" | "'right'" | "'forward'" | "'backward'" | "'clockwise'" | "'circle'" | "22" | "100"
""")


In [None]:
from nltk import ChartParser
# Define a CFG for instructions
instruction_grammar = CFG.fromstring("""
  S -> VP
  VP -> VB NP PP | VB NP | NP VB ADJP
  NP -> DT NN | NN
  PP -> IN NP
  VB -> "Pour" | "Move" | "Pick" | "Open" | "Close" | "Push" | "Fill" | "Place" | "Turn" | "is"
  ADJP -> JJ
  DT -> "the" | "a"
  NN -> "coffee" | "cup" | "cube" | "pen" | "door" | "window" | "box"
  JJ -> "full"
  IN -> "until" | "to" | "with"
""")


# Parse an instruction

instruction_parser = ChartParser(instruction_grammar)
instruction = ["Pour", "the", "coffee", "until", "the", "cup", "is", "full"]

for tree in instruction_parser.parse(instruction):
    tree.pretty_print()



In [None]:
instruction_to_code = {
    ("Pour", "the", "coffee", "until", "the", "cup", "is", "full"): "robot.pour(coffee, cup, fill_until_full=True)",
    ("Move", "the", "cube", "to", "the", "left"): "robot.move(cube, direction='left')",
    # Add other mappings here
}


**TODO**: Work on the mapping function to be able to map from the instructions CRF to the code CRF

In [None]:
def map_instruction_to_code(instruction_tokens, mapping_rules):
    # Here, `mapping_rules` defines how instruction parts map to code arguments.
    action = instruction_tokens[0].lower()  # Assume first token is the action verb
    args = instruction_tokens[1:]          # Remaining tokens are arguments
    # Dynamically create code from action and arguments
    return f"robot.{action}({', '.join(args)})"

instruction_tokens = ["Move", "cube", "direction='left'"]
mapping_rules = {}  # Define more complex rules if needed
code = map_instruction_to_code(instruction_tokens, mapping_rules)
print(code)


robot.move(cube, direction='left')


In [None]:
from nltk import ChartParser

instruction_parser = ChartParser(instruction_grammar)
instruction = ["Pour", "the", "coffee", "until", "the", "cup", "is", "full"]

for tree in instruction_parser.parse(instruction):
    tree.pretty_print()



now from a promt to what we need

In [None]:
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt_tab')

prompt = "Move the cube to the left"
tokens = word_tokenize(prompt)
print(tokens)


['Move', 'the', 'cube', 'to', 'the', 'left']


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [None]:
from nltk import CFG, ChartParser

grammar = CFG.fromstring("""
  S -> VB NP PP
  VB -> "Move" | "Pour" | "Pick" | "Place" | "Turn"
  NP -> DT NN
  PP -> IN DT NN | IN JJ
  DT -> "the" | "a"
  NN -> "cube" | "coffee" | "door" | "window" | "button" | "tray" | "light"
  JJ -> "left" | "right"
  IN -> "to" | "on" | "with"
""")

parser = ChartParser(grammar)


In [None]:
for tree in parser.parse(tokens):
    tree.pretty_print()

## Generalization for All Instructions
Expand your grammar rules and extraction logic to handle the variety of instructions in your dataset. For example:

* Include additional prepositional phrases (e.g., "from", "away from").
* Handle different argument structures (e.g., "Fill the cup with water").

Notes on possible improvements:
- Automatically recognize where the verb is in the sentence?

-> assumes that there is only one verb per sentence


--> maybe we can test how many verbs are given in the input before parsing ?

--> HumanEval might be too complex of a dataset for our rule

# Research Iterations

## **Attempt 1 : Going straight from Natural language to code only using 1 CFG tree**

In [None]:
'''
Main idea of the code :

Go from natural language input such as : "Move the cube to the left"

End end up with an output in the form of : robot.move(cube, direction='left')

'''

import nltk
from nltk import CFG
from nltk.parse.generate import generate

# Define the CFG grammar
grammar = CFG.fromstring("""
S -> ACTION OBJECT PREP DIRECTION
ACTION -> 'Move'|'Transfer'|'Pour'|'Pick'
OBJECT -> 'the' ITEM
ITEM -> 'cube' | 'sphere' | 'box' | 'orange'
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'

""")


# Function to transform parsed input into robot commands
def parse_to_command(parsed_tree):
    action = parsed_tree[0][0]  # e.g., "Move"
    obj = parsed_tree[1][1]    # e.g., "cube"
    direction = parsed_tree[3][1]  # e.g., "left"
    return f"robot.move({obj}, direction='{direction}')"

# Initialize parser
parser = nltk.ChartParser(grammar)

# Input command
command = "Move the cube to the left"
tokens = command.split()

# Parse the command and convert to robot code
try:
    for tree in parser.parse(tokens):
        print("Parsed Tree:")
        print(tree)
        robot_command = parse_to_command(tree)
        print("\nGenerated Robot Code:")
        print(robot_command)
except ValueError:
    print("Could not parse command.")


Parsed Tree:
(S
  (ACTION Move)
  (OBJECT the (ITEM cube))
  (PREP to)
  (DIRECTION the (SIDE left)))

Generated Robot Code:
robot.move((ITEM cube), direction='(SIDE left)')


In [None]:
# Initialize parser
parser = nltk.ChartParser(grammar)

# Input command
command = "Move the orange to the right"
tokens = command.split()

# Parse the command and convert to robot code
try:
    for tree in parser.parse(tokens):
        print("Parsed Tree:")
        print(tree)
        robot_command = parse_to_command(tree)
        print("\nGenerated Robot Code:")
        print(robot_command)
except ValueError:
    print("Could not parse command.")

Parsed Tree:
(S
  (ACTION Move)
  (OBJECT the (ITEM orange))
  (PREP to)
  (DIRECTION the (SIDE right)))

Generated Robot Code:
robot.move((ITEM orange), direction='(SIDE right)')


## **Attempt 2 : Make attempt 1 compatible with more complex syntactic structures via POS tagging**

In [None]:
import spacy
from nltk import CFG
from nltk.parse import ChartParser

In [None]:
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


True

In [None]:


# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define a base CFG grammar
base_grammar = """
S -> ACTION OBJECT PREP DIRECTION
ACTION -> 'Move'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple'
"""

# Helper function to dynamically extend CFG grammar
def extend_grammar_with_verbs(grammar, verb):
    """
    Adds a new verb to the ACTION rule of the grammar.
    Ensures the verb is appended to the ACTION rule without overwriting.
    """
    grammar_lines = grammar.strip().split("\n")
    for i, line in enumerate(grammar_lines):
        if line.startswith("ACTION ->"):
            # Add the new verb if it doesn't exist
            if f"'{verb}'" not in line:
                grammar_lines[i] = f"{line} | '{verb}'"
            break
    return "\n".join(grammar_lines)


# Function to extract the verb using spaCy POS tagging and extend grammar
def resolve_action_and_extend_grammar(command, current_grammar):
    doc = nlp(command)
    for token in doc:
        if token.pos_ == "VERB":  # spaCy POS tagging for verbs
            # Extend the grammar with the new verb
            updated_grammar = extend_grammar_with_verbs(current_grammar, token.text)
            print(f"Extended Grammar with Verb: {token.text}")
            print(f'updated_grammar{updated_grammar}')
            return token.text, updated_grammar
    return None, current_grammar

# Function to parse the command and generate robot code
def parse_to_command_dynamic_grammar(command, grammar_text):
    # Resolve the action and update grammar
    action, updated_grammar_text = resolve_action_and_extend_grammar(command, grammar_text)

    # Load the updated grammar
    updated_grammar = CFG.fromstring(updated_grammar_text)
    parser = ChartParser(updated_grammar)

    tokens = command.split()  # Simple tokenization
    if not action:
        return "No action (verb) found in the command."

    # Parse with the updated grammar
    try:
        for tree in parser.parse(tokens):
            print("Parsed Tree:")
            print(tree)

            # Extract object and direction
            obj = None
            direction = None
            for subtree in tree.subtrees():
                if subtree.label() == 'ITEM':
                    obj = " ".join(subtree.leaves())
                elif subtree.label() == 'SIDE':
                    direction = " ".join(subtree.leaves())

            if obj and direction:
                return f"robot.{action.lower()}({obj}, direction='{direction}')", updated_grammar_text
            else:
                return "Could not parse object or direction."
    except ValueError:
        return "Could not parse the command with the grammar."




In [None]:
# Initialize base grammar
grammar_text = base_grammar
print(f"Initial Grammar:\n{grammar_text}")

Initial Grammar:

S -> ACTION OBJECT PREP DIRECTION
ACTION -> 'Move'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple'



In [None]:
# Example command
command = "Place the box to the up"  # "Lift" is not in the initial grammar
robot_command, new_grammar = parse_to_command_dynamic_grammar(command, grammar_text)

print("\nGenerated Robot Code:")
print(robot_command)

Extended Grammar with Verb: Place
updated_grammarS -> ACTION OBJECT PREP DIRECTION
ACTION -> 'Move' | 'Place'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple'
Parsed Tree:
(S
  (ACTION Place)
  (OBJECT the (ITEM (WORD box)))
  (PREP to)
  (DIRECTION the (SIDE up)))

Generated Robot Code:
robot.place(box, direction='up')


In [None]:
# Example command
command = "Lift the box to the up"  # "Lift" is not in the initial grammar
robot_command, new_grammar_2 = parse_to_command_dynamic_grammar(command, new_grammar)

print("\nGenerated Robot Code:")
print(robot_command)

Extended Grammar with Verb: Lift
updated_grammarS -> ACTION OBJECT PREP DIRECTION
ACTION -> 'Move' | 'Place' | 'Lift'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple'
Parsed Tree:
(S
  (ACTION Lift)
  (OBJECT the (ITEM (WORD box)))
  (PREP to)
  (DIRECTION the (SIDE up)))

Generated Robot Code:
robot.lift(box, direction='up')


In [None]:
# Example command
command = "Drop the box to the left"  # "Lift" is not in the initial grammar
robot_command, new_grammar_3 = parse_to_command_dynamic_grammar(command, new_grammar_2)

print("\nGenerated Robot Code:")
print(robot_command)

Extended Grammar with Verb: Drop
updated_grammarS -> ACTION OBJECT PREP DIRECTION
ACTION -> 'Move' | 'Place' | 'Lift' | 'Drop'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple'
Parsed Tree:
(S
  (ACTION Drop)
  (OBJECT the (ITEM (WORD box)))
  (PREP to)
  (DIRECTION the (SIDE left)))

Generated Robot Code:
robot.drop(box, direction='left')


## **Attempt 3 : Extend the POS Tagging updates from verbs to nouns and other syntactic structures**

Nika notted that at some point we could simplify our model by resolving not only if words are present already in our CFG, but by also resolving if some synonyms (or at least words very close in meaning) could be used if they are already present in our CFG.


In [None]:
import spacy
from nltk import CFG
from nltk.parse import ChartParser

In [None]:
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


True

In [None]:
import spacy
from nltk import CFG
from nltk.parse import ChartParser

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define the base CFG grammar
base_grammar = """
S -> ACTION OBJECT PREP DIRECTION
ACTION -> 'Move'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple'
"""

# Helper function to dynamically extend CFG grammar
def extend_grammar_with_verbs(grammar, verb):
    """Adds a new verb to the ACTION rule of the grammar."""
    grammar_lines = grammar.strip().split("\n")
    for i, line in enumerate(grammar_lines):
        if line.startswith("ACTION ->"):
            if f"'{verb}'" not in line:
                grammar_lines[i] = f"{line} | '{verb}'"
            break
    return "\n".join(grammar_lines)

def extend_grammar_with_items(grammar, item):
    """Adds a new item to the ITEM rule of the grammar."""
    grammar_lines = grammar.strip().split("\n")
    for i, line in enumerate(grammar_lines):
        if line.startswith("WORD ->"):
            if f"'{item}'" not in line:
                grammar_lines[i] = f"{line} | '{item}'"
            break
    return "\n".join(grammar_lines)

# Function to extract actions and items using spaCy POS tagging
def resolve_and_extend_grammar(command, current_grammar):
    doc = nlp(command)
    updated_grammar = current_grammar

    # Identify verbs and items
    for token in doc:
        if token.pos_ == "VERB":
            updated_grammar = extend_grammar_with_verbs(updated_grammar, token.text)
        elif token.pos_ in {"NOUN", "PROPN"}:  # Items are typically nouns or proper nouns
            updated_grammar = extend_grammar_with_items(updated_grammar, token.text)

    return updated_grammar

# Function to parse the command and generate robot code
def parse_to_command_dynamic_grammar(command, grammar_text):
    # Resolve actions and items, and update the grammar
    updated_grammar_text = resolve_and_extend_grammar(command, grammar_text)

    # Load the updated grammar
    updated_grammar = CFG.fromstring(updated_grammar_text)
    parser = ChartParser(updated_grammar)

    tokens = command.split()  # Simple tokenization
    # Parse with the updated grammar
    try:
        for tree in parser.parse(tokens):
            print("Parsed Tree:")
            print(tree)

            # Extract action, object, and direction
            action = None
            obj = None
            direction = None
            for subtree in tree.subtrees():
                if subtree.label() == 'ACTION':
                    action = " ".join(subtree.leaves())
                elif subtree.label() == 'ITEM':
                    obj = " ".join(subtree.leaves())
                elif subtree.label() == 'SIDE':
                    direction = " ".join(subtree.leaves())

            if action and obj and direction:
                return f"robot.{action.lower()}({obj}, direction='{direction}')", updated_grammar_text
            else:
                return "Could not parse action, object, or direction."
    except ValueError:
        return "Could not parse the command with the grammar."

# Initialize base grammar
grammar_text = base_grammar
print(f"Initial Grammar:\n{grammar_text}")

# Example command
command = "Place the lamp to the right"  # "Place" and "lamp" are not in the initial grammar
robot_command, new_grammar = parse_to_command_dynamic_grammar(command, grammar_text)

print("\nGenerated Robot Code:")
print(robot_command)


Initial Grammar:

S -> ACTION OBJECT PREP DIRECTION
ACTION -> 'Move'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple'

Parsed Tree:
(S
  (ACTION Place)
  (OBJECT the (ITEM (WORD lamp)))
  (PREP to)
  (DIRECTION the (SIDE right)))

Generated Robot Code:
robot.place(lamp, direction='right')


In [None]:
print(new_grammar)

S -> ACTION OBJECT PREP DIRECTION
ACTION -> 'Move' | 'Place'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple' | 'lamp' | 'right'


In [None]:
# Example command
command = "Place the tree to the up"  # "Lift" is not in the initial grammar
robot_command, new_grammar = parse_to_command_dynamic_grammar(command, new_grammar)
print(f'new_grammar : {new_grammar}')

print("\nGenerated Robot Code:")
print(robot_command)

Parsed Tree:
(S
  (ACTION Place)
  (OBJECT the (ITEM (WORD tree)))
  (PREP to)
  (DIRECTION the (SIDE up)))
new_grammar : S -> ACTION OBJECT PREP DIRECTION
ACTION -> 'Move' | 'Place'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple' | 'lamp' | 'right' | 'tree' | 'up'

Generated Robot Code:
robot.place(tree, direction='up')


In [None]:
# Example command
command = "Lift the box to the up"  # "Lift" is not in the initial grammar
robot_command, new_grammar_2 = parse_to_command_dynamic_grammar(command, new_grammar)

print("\nGenerated Robot Code:")
print(robot_command)

Parsed Tree:
(S
  (ACTION Lift)
  (OBJECT the (ITEM (WORD box)))
  (PREP to)
  (DIRECTION the (SIDE up)))

Generated Robot Code:
robot.lift(box, direction='up')


In [None]:
print(f'new grammar 2 {new_grammar_2}')

new grammar 2 S -> ACTION OBJECT PREP DIRECTION
ACTION -> 'Move' | 'Place' | 'Lift'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple' | 'lamp' | 'right' | 'tree' | 'up'


In [None]:
# Example command
command = "Drop the box to the left"  # "Lift" is not in the initial grammar
robot_command, new_grammar_3 = parse_to_command_dynamic_grammar(command, new_grammar_2)

print("\nGenerated Robot Code:")
print(robot_command)

Parsed Tree:
(S
  (ACTION Drop)
  (OBJECT the (ITEM (WORD box)))
  (PREP to)
  (DIRECTION the (SIDE left)))

Generated Robot Code:
robot.drop(box, direction='left')


## **Attempt 4 : Now that we know that our code can "learn" new nouns and verbs syntactically and add them correctly to the CFG tree, we will attempt to see if our model can also learn new syntactic structures altogether**

Now that we know that our code can "learn" new nouns and verbs syntactically and add them correctly to the CFG tree, we will attempt to see if our model can also learn new syntactic structures altogether

In [None]:
import spacy
from nltk import CFG
from nltk.parse import ChartParser

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define the base CFG grammar
base_grammar = """
S -> ACTION OBJECT PREP DIRECTION ADVERB
S -> ACTION ADVERB OBJECT PREP DIRECTION
ACTION -> 'Move' | 'Drop'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple'
ADVERB -> 'slowly' | 'quickly'
"""

# Helper function to dynamically extend CFG grammar
def extend_grammar_with_verbs(grammar, verb):
    """Adds a new verb to the ACTION rule of the grammar."""
    grammar_lines = grammar.strip().split("\n")
    for i, line in enumerate(grammar_lines):
        if line.startswith("ACTION ->"):
            if f"'{verb}'" not in line:
                grammar_lines[i] = f"{line} | '{verb}'"
            break
    return "\n".join(grammar_lines)

def extend_grammar_with_items(grammar, item):
    """Adds a new item to the ITEM rule of the grammar."""
    grammar_lines = grammar.strip().split("\n")
    for i, line in enumerate(grammar_lines):
        if line.startswith("WORD ->"):
            if f"'{item}'" not in line:
                grammar_lines[i] = f"{line} | '{item}'"
            break
    return "\n".join(grammar_lines)

def extend_grammar_with_adverbs(grammar, adverb):
    """Adds a new adverb to the ADVERB rule of the grammar."""
    grammar_lines = grammar.strip().split("\n")
    for i, line in enumerate(grammar_lines):
        if line.startswith("ADVERB ->"):
            if f"'{adverb}'" not in line:
                grammar_lines[i] = f"{line} | '{adverb}'"
            break
    return "\n".join(grammar_lines)

# Function to extract actions, items, and adverbs using spaCy POS tagging
def resolve_and_extend_grammar(command, current_grammar):
    doc = nlp(command)
    updated_grammar = current_grammar

    # Identify verbs, items, and adverbs
    for token in doc:
        if token.pos_ == "VERB":
            updated_grammar = extend_grammar_with_verbs(updated_grammar, token.text)
        elif token.pos_ in {"NOUN", "PROPN"}:  # Items are typically nouns or proper nouns
            updated_grammar = extend_grammar_with_items(updated_grammar, token.text)
        elif token.pos_ == "ADV":  # Adverbs
            updated_grammar = extend_grammar_with_adverbs(updated_grammar, token.text)

    return updated_grammar

# Function to parse the command and generate robot code
def parse_to_command_dynamic_grammar(command, grammar_text):
    # Resolve actions, items, and adverbs, and update the grammar
    updated_grammar_text = resolve_and_extend_grammar(command, grammar_text)

    # Load the updated grammar
    updated_grammar = CFG.fromstring(updated_grammar_text)
    parser = ChartParser(updated_grammar)

    tokens = command.split()  # Simple tokenization
    # Parse with the updated grammar
    try:
        for tree in parser.parse(tokens):
            print("Parsed Tree:")
            print(tree)

            # Extract action, object, direction, and adverb
            action = None
            obj = None
            direction = None
            adverb = None
            for subtree in tree.subtrees():
                if subtree.label() == 'ACTION':
                    action = " ".join(subtree.leaves())
                elif subtree.label() == 'ITEM':
                    obj = " ".join(subtree.leaves())
                elif subtree.label() == 'SIDE':
                    direction = " ".join(subtree.leaves())
                elif subtree.label() == 'ADVERB':
                    adverb = " ".join(subtree.leaves())

            # Construct robot command
            adverb_param = f", {adverb.lower()}=True" if adverb else ""
            if action and obj and direction:
                return f"robot.{action.lower()}({obj}, direction='{direction}'{adverb_param})", updated_grammar_text
            else:
                return "Could not parse action, object, direction, or adverb.", updated_grammar_text
    except ValueError:
        return "Could not parse the command with the grammar.", grammar_text




In [None]:
# Initialize base grammar
grammar_text = base_grammar

# Example command
command = "Drop the box to the left slowly"  # "Drop" and "slowly" should be dynamically handled
robot_command, gram = parse_to_command_dynamic_grammar(command, grammar_text)

print("\nGenerated Robot Code:")
print(robot_command)

print(f'grammar =\n {gram}')

Parsed Tree:
(S
  (ACTION Drop)
  (OBJECT the (ITEM (WORD box)))
  (PREP to)
  (DIRECTION the (SIDE left))
  (ADVERB slowly))

Generated Robot Code:
robot.drop(box, direction='left', slowly=True)
grammar =
 S -> ACTION OBJECT PREP DIRECTION ADVERB
S -> ACTION ADVERB OBJECT PREP DIRECTION
ACTION -> 'Move' | 'Drop'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple'
ADVERB -> 'slowly' | 'quickly'


In [None]:
# Example command
command = "Drop the box to the left gently"  # "Drop" and "slowly" should be dynamically handled
robot_command, gram = parse_to_command_dynamic_grammar(command, gram)

print("\nGenerated Robot Code:")
print(robot_command)

print(f'grammar =\n {gram}')

Parsed Tree:
(S
  (ACTION Drop)
  (OBJECT the (ITEM (WORD box)))
  (PREP to)
  (DIRECTION the (SIDE left))
  (ADVERB gently))

Generated Robot Code:
robot.drop(box, direction='left', gently=True)
grammar =
 S -> ACTION OBJECT PREP DIRECTION ADVERB
S -> ACTION ADVERB OBJECT PREP DIRECTION
ACTION -> 'Move' | 'Drop'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple' | 'left'
ADVERB -> 'slowly' | 'quickly' | 'gently'


In [None]:
command = "Move the cuve to the left gently"  # "Drop" and "slowly" should be dynamically handled
robot_command, gram = parse_to_command_dynamic_grammar(command, gram)

print("\nGenerated Robot Code:")
print(robot_command)

print(f'grammar =\n {gram}')

Parsed Tree:
(S
  (ACTION Move)
  (OBJECT the (ITEM (WORD cuve)))
  (PREP to)
  (DIRECTION the (SIDE left))
  (ADVERB gently))

Generated Robot Code:
robot.move(cuve, direction='left', gently=True)
grammar =
 S -> ACTION OBJECT PREP DIRECTION ADVERB
S -> ACTION ADVERB OBJECT PREP DIRECTION
ACTION -> 'Move' | 'Drop'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down'
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple' | 'left' | 'cuve'
ADVERB -> 'slowly' | 'quickly' | 'gently'


## Attempt 5 : Use the previous developed syntax and test it on Mariana's CHATGPT syntetic dataset


In [None]:
df = pd.read_csv("instructions_and_code.csv")
df

Unnamed: 0,Instruction,Code
0,Pour the coffee until the cup is full,"robot.pour(coffee, cup, fill_until_full=True)"
1,Move the cube to the left,"robot.move(cube, direction='left')"
2,Pick up the pen from the desk,"robot.pick_up(pen, desk)"
3,Open the door,robot.open(door)
4,Close the window,robot.close(window)
5,Push the box forward,"robot.push(box, direction='forward')"
6,Lift the tray off the table,"robot.lift(tray, table)"
7,Place the book on the shelf,"robot.place(book, shelf)"
8,Turn on the light,robot.turn_on(light)
9,Turn off the TV,robot.turn_off(tv)


### Attempt 1 (non dynamic structure)

In [None]:
df['Instruction'].unique()

array(['Pour the coffee until the cup is full'], dtype=object)

In [None]:
df = pd.read_csv("instructions_and_code.csv")
df

Unnamed: 0,Instruction,Code
0,Pour the coffee until the cup is full,"robot.pour(coffee, cup, fill_until_full=True)"
1,Move the cube to the left,"robot.move(cube, direction='left')"
2,Pick up the pen from the desk,"robot.pick_up(pen, desk)"
3,Open the door,robot.open(door)
4,Close the window,robot.close(window)
5,Push the box forward,"robot.push(box, direction='forward')"
6,Lift the tray off the table,"robot.lift(tray, table)"
7,Place the book on the shelf,"robot.place(book, shelf)"
8,Turn on the light,robot.turn_on(light)
9,Turn off the TV,robot.turn_off(tv)


In [None]:
df["Generated Code"] = ""
latent_gram = gram

for index, row in df.iterrows():
    command = row['Instruction']
    if parse_to_command_dynamic_grammar(command, latent_gram) is None :
      df["Generated Code"][index] = "Could not parse this grammar"
    else:
      new_code, new_grammar = parse_to_command_dynamic_grammar(command, latent_gram)
      if new_code is None :
        df["Generated Code"][index] = "Could not parse this grammar"
      else:
        df["Generated Code"][index] = new_code
        latent_gram = new_grammar

df

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df["Generated Code"][index] = new_code
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or 

Unnamed: 0,Instruction,Code,Generated Code
0,Pour the coffee until the cup is full,"robot.pour(coffee, cup, fill_until_full=True)",Could not parse the command with the grammar.
1,Move the cube to the left,"robot.move(cube, direction='left')",Could not parse this grammar
2,Pick up the pen from the desk,"robot.pick_up(pen, desk)",Could not parse the command with the grammar.
3,Open the door,robot.open(door),Could not parse this grammar
4,Close the window,robot.close(window),Could not parse this grammar
5,Push the box forward,"robot.push(box, direction='forward')",Could not parse this grammar
6,Lift the tray off the table,"robot.lift(tray, table)",Could not parse the command with the grammar.
7,Place the book on the shelf,"robot.place(book, shelf)",Could not parse the command with the grammar.
8,Turn on the light,robot.turn_on(light),Could not parse the command with the grammar.
9,Turn off the TV,robot.turn_off(tv),Could not parse the command with the grammar.


### Attempt 2: Dynamic Structure

In [None]:
import spacy
from nltk import CFG
from nltk.parse import ChartParser

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define the base CFG grammar
base_grammar = """
S -> ACTION OBJECT PREP DIRECTION ADVERB
S -> ACTION ADVERB OBJECT PREP DIRECTION
S -> ACTION OBJECT CONJ ACTION OBJECT
S -> ACTION OBJECT PREP LOCATION
S -> ACTION OBJECT
S -> ACTION OBJECT PREP DIRECTION
S -> ACTION OBJECT PREP TOOL
S -> ACTION OBJECT PREP OBJECT
ACTION -> 'Move' | 'Drop' | 'Pick' | 'Pour' | 'Open' | 'Close' | 'Push' | 'Lift' | 'Place' | 'Turn' | 'Give' | 'Sweep' | 'Take' | 'Fill' | 'Fold' | 'Set' | 'Cut'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to' | 'from' | 'on' | 'with' | 'in' | 'off' | 'out' | 'into'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down' | 'forward' | 'backward' | 'clockwise'
LOCATION -> 'the' PLACE
PLACE -> WORD
TOOL -> 'the' TOOL_ITEM
TOOL_ITEM -> WORD
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple' | 'pen' | 'door' | 'window' | 'light' | 'TV' | 'table' | 'knob' | 'floor' | 'trash' | 'cup' | 'water' | 'towel' | 'shoes' | 'closet' | 'dock' | 'button' | 'remote' | 'thermostat' | 'scissors' | 'refrigerator'
ADVERB -> 'slowly' | 'quickly'
CONJ -> 'and'
"""

# Function to dynamically extend CFG grammar
def extend_grammar(rule, value):
    """Adds a new value to the specified rule of the grammar."""
    if f"'{value}'" not in rule:
        return f"{rule} | '{value}'"
    return rule

def resolve_and_extend_grammar(command, current_grammar):
    doc = nlp(command)
    grammar_lines = current_grammar.strip().split("\n")
    updated_grammar = grammar_lines[:]
    rules = {line.split("->")[0].strip(): line for line in grammar_lines}

    for token in doc:
        if token.pos_ == "VERB":
            rules["ACTION"] = extend_grammar(rules["ACTION"], token.text)
        elif token.pos_ in {"NOUN", "PROPN"}:
            rules["WORD"] = extend_grammar(rules["WORD"], token.text)
        elif token.pos_ == "ADV":
            rules["ADVERB"] = extend_grammar(rules["ADVERB"], token.text)
        elif token.text.lower() in {"to", "from", "on", "with", "in", "off", "out", "into"}:
            rules["PREP"] = extend_grammar(rules["PREP"], token.text)

    updated_grammar = "\n".join(rules.values())
    return updated_grammar

def parse_to_command_dynamic_grammar(command, grammar_text):
    updated_grammar_text = resolve_and_extend_grammar(command, grammar_text)
    updated_grammar = CFG.fromstring(updated_grammar_text)
    parser = ChartParser(updated_grammar)

    tokens = command.split()
    try:
        for tree in parser.parse(tokens):
            print("Parsed Tree:")
            print(tree)

            action = None
            obj = None
            prep = None
            direction = None
            adverb = None
            tool = None

            for subtree in tree.subtrees():
                label = subtree.label()
                if label == 'ACTION':
                    action = " ".join(subtree.leaves())
                elif label == 'ITEM':
                    obj = " ".join(subtree.leaves())
                elif label == 'SIDE':
                    direction = " ".join(subtree.leaves())
                elif label == 'ADVERB':
                    adverb = " ".join(subtree.leaves())
                elif label == 'TOOL_ITEM':
                    tool = " ".join(subtree.leaves())

            adverb_param = f", {adverb.lower()}=True" if adverb else ""
            if action and obj:
                return f"robot.{action.lower()}({obj}, direction='{direction}'{adverb_param})", updated_grammar_text
            else:
                return "Could not parse action or object.", updated_grammar_text
    except ValueError:
        return "Could not parse the command with the grammar.", grammar_text


In [None]:
#Create a new entry in the df dataframe called df["Generated Code"]

df["Generated Code"] = ""
df

Unnamed: 0,Instruction,Code,Generated Code
0,Pour the coffee until the cup is full,"robot.pour(coffee, cup, fill_until_full=True)",
1,Move the cube to the left,"robot.move(cube, direction='left')",
2,Pick up the pen from the desk,"robot.pick_up(pen, desk)",
3,Open the door,robot.open(door),
4,Close the window,robot.close(window),
5,Push the box forward,"robot.push(box, direction='forward')",
6,Lift the tray off the table,"robot.lift(tray, table)",
7,Place the book on the shelf,"robot.place(book, shelf)",
8,Turn on the light,robot.turn_on(light),
9,Turn off the TV,robot.turn_off(tv),


In [None]:

df["Generated Code"] = ""
latent_gram = base_grammar

for index, row in df.iterrows():
    command = row['Instruction']
    if parse_to_command_dynamic_grammar(command, latent_gram) is None :
      df["Generated Code"][index] = "Could not parse this grammar"
    else:
      new_code, new_grammar = parse_to_command_dynamic_grammar(command, latent_gram)
      df["Generated Code"][index] = new_code
      latent_gram = new_grammar

df

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df["Generated Code"][index] = new_code
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or 

Parsed Tree:
(S
  (ACTION Lift)
  (OBJECT the (ITEM (WORD tray)))
  (PREP off)
  (OBJECT the (ITEM (WORD table))))
Parsed Tree:
(S
  (ACTION Lift)
  (OBJECT the (ITEM (WORD tray)))
  (PREP off)
  (OBJECT the (ITEM (WORD table))))
Parsed Tree:
(S
  (ACTION Place)
  (OBJECT the (ITEM (WORD book)))
  (PREP on)
  (OBJECT the (ITEM (WORD shelf))))
Parsed Tree:
(S
  (ACTION Place)
  (OBJECT the (ITEM (WORD book)))
  (PREP on)
  (OBJECT the (ITEM (WORD shelf))))
Parsed Tree:
(S
  (ACTION Move)
  (OBJECT the (ITEM (WORD ball)))
  (PREP to)
  (OBJECT the (ITEM (WORD right))))
Parsed Tree:
(S
  (ACTION Move)
  (OBJECT the (ITEM (WORD ball)))
  (PREP to)
  (OBJECT the (ITEM (WORD right))))
Parsed Tree:
(S
  (ACTION Give)
  (OBJECT the (ITEM (WORD apple)))
  (PREP to)
  (OBJECT the (ITEM (WORD person))))
Parsed Tree:
(S
  (ACTION Give)
  (OBJECT the (ITEM (WORD apple)))
  (PREP to)
  (OBJECT the (ITEM (WORD person))))
Parsed Tree:
(S
  (ACTION Sweep)
  (OBJECT the (ITEM (WORD floor)))
  (PREP with

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df["Generated Code"][index] = new_code
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or 

Parsed Tree:
(S
  (ACTION Close)
  (OBJECT the (ITEM (WORD lid)))
  (PREP on)
  (OBJECT the (ITEM (WORD jar))))
Parsed Tree:
(S
  (ACTION Close)
  (OBJECT the (ITEM (WORD lid)))
  (PREP on)
  (OBJECT the (ITEM (WORD jar))))
Parsed Tree:
(S
  (ACTION Put)
  (OBJECT the (ITEM (WORD shoes)))
  (PREP in)
  (OBJECT the (ITEM (WORD closet))))
Parsed Tree:
(S
  (ACTION Put)
  (OBJECT the (ITEM (WORD shoes)))
  (PREP in)
  (OBJECT the (ITEM (WORD closet))))
Parsed Tree:
(S
  (ACTION Push)
  (OBJECT the (ITEM (WORD button)))
  (PREP on)
  (OBJECT the (ITEM (WORD remote))))
Parsed Tree:
(S
  (ACTION Push)
  (OBJECT the (ITEM (WORD button)))
  (PREP on)
  (OBJECT the (ITEM (WORD remote))))
Parsed Tree:
(S
  (ACTION Water)
  (OBJECT the (ITEM (WORD plants)))
  (PREP in)
  (OBJECT the (ITEM (WORD garden))))


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df["Generated Code"][index] = new_code
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or 

Parsed Tree:
(S
  (ACTION Water)
  (OBJECT the (ITEM (WORD plants)))
  (PREP in)
  (OBJECT the (ITEM (WORD garden))))
Parsed Tree:
(S
  (ACTION Plug)
  (OBJECT the (ITEM (WORD charger)))
  (PREP into)
  (OBJECT the (ITEM (WORD laptop))))
Parsed Tree:
(S
  (ACTION Plug)
  (OBJECT the (ITEM (WORD charger)))
  (PREP into)
  (OBJECT the (ITEM (WORD laptop))))
Parsed Tree:
(S
  (ACTION Put)
  (OBJECT the (ITEM (WORD cat)))
  (PREP on)
  (OBJECT the (ITEM (WORD couch))))
Parsed Tree:
(S
  (ACTION Put)
  (OBJECT the (ITEM (WORD cat)))
  (PREP on)
  (OBJECT the (ITEM (WORD couch))))


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df["Generated Code"][index] = new_code
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or 

Parsed Tree:
(S
  (ACTION Move)
  (OBJECT the (ITEM (WORD plant)))
  (PREP to)
  (OBJECT the (ITEM (WORD windowsill))))
Parsed Tree:
(S
  (ACTION Move)
  (OBJECT the (ITEM (WORD plant)))
  (PREP to)
  (OBJECT the (ITEM (WORD windowsill))))
Parsed Tree:
(S
  (ACTION Replace)
  (OBJECT the (ITEM (WORD lightbulb)))
  (PREP in)
  (OBJECT the (ITEM (WORD lamp))))
Parsed Tree:
(S
  (ACTION Replace)
  (OBJECT the (ITEM (WORD lightbulb)))
  (PREP in)
  (OBJECT the (ITEM (WORD lamp))))
Parsed Tree:
(S
  (ACTION Stack)
  (OBJECT the (ITEM (WORD boxes)))
  (PREP in)
  (OBJECT the (ITEM (WORD corner))))
Parsed Tree:
(S
  (ACTION Stack)
  (OBJECT the (ITEM (WORD boxes)))
  (PREP in)
  (OBJECT the (ITEM (WORD corner))))
Parsed Tree:
(S
  (ACTION Stack)
  (OBJECT the (ITEM (WORD books)))
  (PREP on)
  (OBJECT the (ITEM (WORD shelf))))
Parsed Tree:
(S
  (ACTION Stack)
  (OBJECT the (ITEM (WORD books)))
  (PREP on)
  (OBJECT the (ITEM (WORD shelf))))
Parsed Tree:
(S
  (ACTION Arrange)
  (OBJECT the (IT

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df["Generated Code"][index] = "Could not parse this grammar"
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the 

Unnamed: 0,Instruction,Code,Generated Code
0,Pour the coffee until the cup is full,"robot.pour(coffee, cup, fill_until_full=True)",Could not parse the command with the grammar.
1,Move the cube to the left,"robot.move(cube, direction='left')",Could not parse this grammar
2,Pick up the pen from the desk,"robot.pick_up(pen, desk)",Could not parse this grammar
3,Open the door,robot.open(door),Could not parse this grammar
4,Close the window,robot.close(window),Could not parse this grammar
5,Push the box forward,"robot.push(box, direction='forward')",Could not parse this grammar
6,Lift the tray off the table,"robot.lift(tray, table)","robot.lift(table, direction='None')"
7,Place the book on the shelf,"robot.place(book, shelf)","robot.place(shelf, direction='None')"
8,Turn on the light,robot.turn_on(light),Could not parse this grammar
9,Turn off the TV,robot.turn_off(tv),Could not parse this grammar


In [None]:
error_df = df[df['Generated Code'] == 'Could not parse this grammar']
error_df

Unnamed: 0,Instruction,Code,Generated Code
1,Move the cube to the left,"robot.move(cube, direction='left')",Could not parse this grammar
2,Pick up the pen from the desk,"robot.pick_up(pen, desk)",Could not parse this grammar
3,Open the door,robot.open(door),Could not parse this grammar
4,Close the window,robot.close(window),Could not parse this grammar
5,Push the box forward,"robot.push(box, direction='forward')",Could not parse this grammar
8,Turn on the light,robot.turn_on(light),Could not parse this grammar
9,Turn off the TV,robot.turn_off(tv),Could not parse this grammar
12,Turn the knob clockwise,"robot.turn(knob, direction='clockwise')",Could not parse this grammar
14,Take the trash out,robot.take_out(trash),Could not parse this grammar
15,Fill the cup with water,"robot.fill(cup, water)",Could not parse this grammar


### Attempt 3 : Trying to fix case by case:

In [None]:
import spacy
from nltk import CFG
from nltk.parse import ChartParser

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define the base CFG grammar
base_grammar = """
S -> ACTION OBJECT PREP DIRECTION ADVERB
S -> ACTION OBJECT PREP LOCATION
S -> ACTION ADVERB OBJECT PREP DIRECTION
S -> ACTION OBJECT CONJ ACTION OBJECT
S -> ACTION OBJECT
S -> ACTION OBJECT PREP DIRECTION
S -> ACTION OBJECT PREP TOOL
S -> ACTION OBJECT PREP OBJECT
S -> ACTION OBJECT ORIENTATION
S -> ACTION OBJECT PREP OBJECT PREP LOCATION
S -> ACTION OBJECT CONJ ACTION OBJECT PREP LOCATION
S -> ACTION OBJECT PREP WORD
ACTION -> 'Move' | 'Drop' | 'Pick' | 'Pick up' | 'Pour' | 'Open' | 'Close' | 'Push' | 'Lift' | 'Place' | 'Turn' | 'Give' | 'Sweep' | 'Take' | 'Fill' | 'Fold' | 'Set' | 'Cut' | 'Put' | 'Remove' | 'Drive'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to' | 'from' | 'on' | 'with' | 'in' | 'off' | 'out' | 'into' | 'onto'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down' | 'forward' | 'backward' | 'clockwise' | 'counterclockwise'
LOCATION -> 'the' PLACE
PLACE -> WORD
TOOL -> 'the' TOOL_ITEM
TOOL_ITEM -> WORD
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple' | 'pen' | 'door' | 'window' | 'light' | 'TV' | 'table' | 'knob' | 'floor' | 'trash' | 'cup' | 'water' | 'towel' | 'shoes' | 'closet' | 'dock' | 'button' | 'remote' | 'thermostat' | 'scissors' | 'refrigerator' | 'desk' | 'phone' | 'cart' | 'car' | 'bathtub' | 'hammer' | 'nail' | 'wall' | 'laundry' | 'washing machine' | 'charging dock' | 'coffee table' | 'parking spot' | 'paper'
ADVERB -> 'slowly' | 'quickly'
CONJ -> 'and'
ORIENTATION -> 'towards' | 'away from' | 'clockwise' | 'counterclockwise' | 'forward' | 'backward'
"""


# Function to dynamically extend CFG grammar
def extend_grammar_with_verbs(grammar, verb):
    """Adds a new verb to the ACTION rule of the grammar."""
    grammar_lines = grammar.strip().split("\n")
    for i, line in enumerate(grammar_lines):
        if line.startswith("ACTION ->"):
            if f"'{verb}'" not in line:
                grammar_lines[i] = f"{line} | '{verb}'"
            break
    return "\n".join(grammar_lines)

def extend_grammar_with_items(grammar, item):
    """Adds a new item to the ITEM rule of the grammar."""
    grammar_lines = grammar.strip().split("\n")
    for i, line in enumerate(grammar_lines):
        if line.startswith("WORD ->"):
            if f"'{item}'" not in line:
                grammar_lines[i] = f"{line} | '{item}'"
            break
    return "\n".join(grammar_lines)

def extend_grammar_with_adverbs(grammar, adverb):
    """Adds a new adverb to the ADVERB rule of the grammar."""
    grammar_lines = grammar.strip().split("\n")
    for i, line in enumerate(grammar_lines):
        if line.startswith("ADVERB ->"):
            if f"'{adverb}'" not in line:
                grammar_lines[i] = f"{line} | '{adverb}'"
            break
    return "\n".join(grammar_lines)


    # Function to extract actions, items, and adverbs using spaCy POS tagging
def resolve_and_extend_grammar(command, current_grammar):
    doc = nlp(command)
    updated_grammar = current_grammar

    # Identify verbs, items, and adverbs
    for token in doc:
        if token.pos_ == "VERB":
            updated_grammar = extend_grammar_with_verbs(updated_grammar, token.text)
        elif token.pos_ in {"NOUN", "PROPN"}:  # Items are typically nouns or proper nouns
            updated_grammar = extend_grammar_with_items(updated_grammar, token.text)
        elif token.pos_ == "ADV":  # Adverbs
            updated_grammar = extend_grammar_with_adverbs(updated_grammar, token.text)

    return updated_grammar

def parse_to_command_dynamic_grammar(command, grammar_text):
    updated_grammar_text = resolve_and_extend_grammar(command, grammar_text)
    updated_grammar = CFG.fromstring(updated_grammar_text)
    parser = ChartParser(updated_grammar)

    tokens = command.split()
    try:
        for tree in parser.parse(tokens):

            action = None
            obj = None
            prep = None
            direction = None
            location = None
            adverb = None
            tool = None
            orientation = None

            # Parse through subtrees
            for subtree in tree.subtrees():
                label = subtree.label()
                if label == 'ACTION':
                    action = " ".join(subtree.leaves())
                elif label == 'ITEM':
                    obj = " ".join(subtree.leaves())
                elif label == 'SIDE':
                    direction = " ".join(subtree.leaves())
                elif label == 'PLACE':
                    location = " ".join(subtree.leaves())
                elif label == 'ADVERB':
                    adverb = " ".join(subtree.leaves())
                elif label == 'TOOL_ITEM':
                    tool = " ".join(subtree.leaves())
                elif label == 'ORIENTATION':
                    orientation = " ".join(subtree.leaves())

            # Handle ORIENTATION similarly to DIRECTION
            if orientation:
                direction = orientation  # Treat orientation the same as direction

            adverb_param = f", {adverb.lower()}=True" if adverb else ""
            if action and obj and direction:
                return f"robot.{action.lower()}({obj}, direction='{direction}'{adverb_param})", updated_grammar_text
            elif action and obj and location:
                return f"robot.{action.lower()}({obj}, '{location}'{adverb_param})", updated_grammar_text
            elif action and obj:
                return f"robot.{action.lower()}({obj}{adverb_param})", updated_grammar_text
            else:
                return "Could not parse action or object.", updated_grammar_text
    except ValueError:
        return "Could not parse the command with the grammar.", grammar_text


In [None]:
import spacy
from nltk import CFG
from nltk.parse import ChartParser

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define the base CFG grammar
base_grammar = """
S -> ACTION OBJECT PREP DIRECTION ADVERB
S -> ACTION OBJECT PREP LOCATION
S -> ACTION ADVERB OBJECT PREP DIRECTION
S -> ACTION OBJECT CONJ ACTION OBJECT
S -> ACTION OBJECT
S -> ACTION OBJECT PREP DIRECTION
S -> ACTION OBJECT PREP TOOL
S -> ACTION OBJECT PREP OBJECT
S -> ACTION OBJECT ORIENTATION
S -> ACTION OBJECT PREP OBJECT PREP LOCATION
S -> ACTION OBJECT CONJ ACTION OBJECT PREP LOCATION
S -> ACTION OBJECT PREP WORD
ACTION -> 'rove' | 'rotate' | 'draw' | 'change_color' | 'pen_down'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to' | 'from' | 'on' | 'with' | 'in' | 'off' | 'out' | 'into' | 'onto'
DIRECTION -> 'the' SIDE
SIDE -> 'left' | 'right' | 'up' | 'down' | 'forward' | 'backward' | 'clockwise' | 'counterclockwise'
LOCATION -> 'the' PLACE
PLACE -> WORD
TOOL -> 'the' TOOL_ITEM
TOOL_ITEM -> WORD
WORD -> 'cube' | 'sphere' | 'box' | 'book' | 'chair' | 'robot' | 'apple' | 'pen' | 'door' | 'window' | 'light' | 'TV' | 'table' | 'knob' | 'floor' | 'trash' | 'cup' | 'water' | 'towel' | 'shoes' | 'closet' | 'dock' | 'button' | 'remote' | 'thermostat' | 'scissors' | 'refrigerator' | 'desk' | 'phone' | 'cart' | 'car' | 'bathtub' | 'hammer' | 'nail' | 'wall' | 'laundry' | 'washing machine' | 'charging dock' | 'coffee table' | 'parking spot' | 'paper'
ADVERB -> 'slowly' | 'quickly'
CONJ -> 'and'
ORIENTATION -> 'towards' | 'away from' | 'clockwise' | 'counterclockwise' | 'forward' | 'backward'
"""


# Function to dynamically extend CFG grammar
def extend_grammar_with_verbs(grammar, verb):
    """Adds a new verb to the ACTION rule of the grammar."""
    grammar_lines = grammar.strip().split("\n")
    for i, line in enumerate(grammar_lines):
        if line.startswith("ACTION ->"):
            if f"'{verb}'" not in line:
                grammar_lines[i] = f"{line} | '{verb}'"
            break
    return "\n".join(grammar_lines)

def extend_grammar_with_items(grammar, item):
    """Adds a new item to the ITEM rule of the grammar."""
    grammar_lines = grammar.strip().split("\n")
    for i, line in enumerate(grammar_lines):
        if line.startswith("WORD ->"):
            if f"'{item}'" not in line:
                grammar_lines[i] = f"{line} | '{item}'"
            break
    return "\n".join(grammar_lines)

def extend_grammar_with_adverbs(grammar, adverb):
    """Adds a new adverb to the ADVERB rule of the grammar."""
    grammar_lines = grammar.strip().split("\n")
    for i, line in enumerate(grammar_lines):
        if line.startswith("ADVERB ->"):
            if f"'{adverb}'" not in line:
                grammar_lines[i] = f"{line} | '{adverb}'"
            break
    return "\n".join(grammar_lines)


    # Function to extract actions, items, and adverbs using spaCy POS tagging
def resolve_and_extend_grammar(command, current_grammar):
    doc = nlp(command)
    updated_grammar = current_grammar

    # Identify verbs, items, and adverbs
    for token in doc:
        if token.pos_ == "VERB":
            updated_grammar = extend_grammar_with_verbs(updated_grammar, token.text)
        elif token.pos_ in {"NOUN", "PROPN"}:  # Items are typically nouns or proper nouns
            updated_grammar = extend_grammar_with_items(updated_grammar, token.text)
        elif token.pos_ == "ADV":  # Adverbs
            updated_grammar = extend_grammar_with_adverbs(updated_grammar, token.text)

    return updated_grammar

def parse_to_command_dynamic_grammar(command, grammar_text):
    updated_grammar_text = resolve_and_extend_grammar(command, grammar_text)
    updated_grammar = CFG.fromstring(updated_grammar_text)
    parser = ChartParser(updated_grammar)

    tokens = command.split()
    actions = []  # Collect actions for the output format
    try:
        for tree in parser.parse(tokens):
            action_data = {}

            # Parse through subtrees
            for subtree in tree.subtrees():
                label = subtree.label()
                if label == 'ACTION':
                    action_data['action_type'] = " ".join(subtree.leaves())
                elif label == 'ITEM':
                    action_data['object'] = " ".join(subtree.leaves())
                elif label == 'SIDE':
                    action_data['direction'] = " ".join(subtree.leaves())
                elif label == 'PLACE':
                    action_data['location'] = " ".join(subtree.leaves())
                elif label == 'ADVERB':
                    action_data['adverb'] = " ".join(subtree.leaves()).lower()
                elif label == 'TOOL_ITEM':
                    action_data['tool'] = " ".join(subtree.leaves())
                elif label == 'ORIENTATION':
                    action_data['orientation'] = " ".join(subtree.leaves())

            # Consolidate direction and orientation
            if 'orientation' in action_data:
                action_data['direction'] = action_data.pop('orientation')

            # Append the action to the actions list
            actions.append(action_data)

        # Format the actions in the expected YAML-like format
        output = "actions:\n"
        output += f"  number_of_actions: {len(actions)}\n\n"
        for i, action in enumerate(actions, 1):
            output += f"  action{i}:\n"
            for key, value in action.items():
                output += f"    {key}: {value}\n"
            output += "\n"

        return output.strip(), updated_grammar_text
    except ValueError:
        return "Could not parse the command with the grammar.", grammar_text



In [None]:
command = "Move the cup to the left slowly"
new_code, new_grammar = parse_to_command_dynamic_grammar(command, base_grammar)
new_code

'actions:\n  number_of_actions: 1\n\n  action1:\n    action_type: Move\n    object: cup\n    direction: left\n    adverb: slowly'

In [None]:

##################testing loop
latent_gram = base_grammar

for index, row in error_df.iterrows():
    command = row['Instruction']
    if parse_to_command_dynamic_grammar(command, latent_gram) is None :
      error_df["Generated Code"][index] = "Could not parse this grammar"
    else:
      new_code, new_grammar = parse_to_command_dynamic_grammar(command, latent_gram)
      error_df["Generated Code"][index] = new_code
      latent_gram = new_grammar

error_df


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  error_df["Generated Code"][index] = new_code
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  error_df["Generate

Unnamed: 0,Instruction,Code,Generated Code
1,Move the cube to the left,"robot.move(cube, direction='left')","robot.move(cube, direction='left')"
2,Pick up the pen from the desk,"robot.pick_up(pen, desk)",Could not parse this grammar
3,Open the door,robot.open(door),robot.open(door)
4,Close the window,robot.close(window),robot.close(window)
5,Push the box forward,"robot.push(box, direction='forward')","robot.push(box, direction='forward')"
8,Turn on the light,robot.turn_on(light),Could not parse this grammar
9,Turn off the TV,robot.turn_off(tv),Could not parse this grammar
12,Turn the knob clockwise,"robot.turn(knob, direction='clockwise')","robot.turn(knob, direction='clockwise')"
14,Take the trash out,robot.take_out(trash),Could not parse this grammar
15,Fill the cup with water,"robot.fill(cup, water)",robot.fill(cup)


In [None]:
print(f'the number of elements in error_df[Generated Code] that are not equal to "Could not parse this grammar" is {len(error_df[error_df["Generated Code"] != "Could not parse this grammar"])}')

the number of elements in error_df[Generated Code] that are not equal to "Could not parse this grammar" is 12


In [None]:
list_unsolved_commands = error_df[error_df["Generated Code"] == "Could not parse this grammar"]["Instruction"].tolist()
len(list_unsolved_commands)

10

In [None]:
command = "Move the cup to the left slowly"
new_code, new_grammar = parse_to_command_dynamic_grammar(command, latent_gram)
new_code

"robot.move(cup, direction='left', slowly=True)"

### Attempt 6 :  Using a LSTM instead of a Rule based method:

In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv("instructions_and_code.csv")


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer

# Define the model
model = Sequential()
vocab_size = 100
max_sequence_length = 100

# Embedding layer to convert input words to word embeddings
model.add(Embedding(input_dim=vocab_size, output_dim=100, input_length=max_sequence_length))

# LSTM layer for learning the sequence patterns
model.add(LSTM(units=128, return_sequences=True))
model.add(Dropout(0.2))

# Another LSTM layer to capture more complex patterns
model.add(LSTM(units=128))

# Dense layer for generating outputs
model.add(Dense(64, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))

model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
# 1. Tokenize the input and output sequences
tokenizer_input = Tokenizer(num_words=vocab_size, oov_token='<OOV>') # Create a tokenizer for input
tokenizer_input.fit_on_texts(df['Instruction'])  # Fit the tokenizer on your instruction data
input_sequences = tokenizer_input.texts_to_sequences(df['Instruction']) # Convert instructions to sequences

tokenizer_output = Tokenizer(num_words=vocab_size, oov_token='<OOV>')  # Create a tokenizer for output
tokenizer_output.fit_on_texts(df['Code'])  # Fit the tokenizer on your code data
output_sequences = tokenizer_output.texts_to_sequences(df['Code']) # Convert code to sequences

# 2. Pad the sequences to ensure uniform length
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='post', truncating='post')
output_sequences = pad_sequences(output_sequences, maxlen=max_sequence_length, padding='post', truncating='post')

# 3. Convert the output sequences to NumPy arrays and flatten to 1D
output_sequences = output_sequences.reshape(output_sequences.shape[0], -1) # Reshape to a 1D array for sparse_categorical_crossentropy

# Create a mask to ignore padded values in the loss calculation
mask = output_sequences != 0  # Assume 0 is the padding value


# 4. Fit the model
model.fit(input_sequences, output_sequences, epochs=10, batch_size=32, sample_weight=mask)




Epoch 1/10


AttributeError: 'NoneType' object has no attribute 'items'

### Attempt 7 : The .yaml interpretation of the code

In [None]:
!pip install transformers torch
from transformers import BertModel, BertTokenizer
import torch
import pandas as pd



In [None]:
def get_bert_embedding(word):
    # Load pre-trained BERT tokenizer and model
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertModel.from_pretrained('bert-base-uncased')

    # Tokenize the input word and convert to tensor
    inputs = tokenizer(word, return_tensors="pt")

    # Extract input IDs and attention mask
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    # Pass data through BERT model
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)

    # Extract the last hidden states (embeddings)
    last_hidden_states = outputs.last_hidden_state

    # The output has dimensions [batch_size, sequence_length, hidden_size]
    # For a single word, take the first element of the batch and the second token (first is [CLS])
    embedding = last_hidden_states[0, 1]

    # Convert PyTorch tensor to NumPy array
    return embedding.numpy()

In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer

def get_sbert_embedding(word):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embedding = model.encode(word)
    return embedding

In [None]:
#Generating a dataset of embeddings from a list of words

list_of_words = ["move", "rotate", "draw", "change_colors", "pen_down"]
df_sbert = pd.DataFrame(columns = ["verb", "embedding"])
for word in list_of_words:
  df_sbert = pd.concat([df_sbert, pd.DataFrame([{"verb": word, "embedding": get_sbert_embedding(word)}])], ignore_index=True)
df_sbert

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Unnamed: 0,verb,embedding
0,move,"[0.024703356, -0.03555954, 0.026363244, -0.028..."
1,rotate,"[-0.0047539584, 0.054445975, -0.058048636, -0...."
2,draw,"[0.02108034, 0.02142658, -0.027628534, -0.0167..."
3,change_colors,"[0.0036541086, 0.03610057, 0.018009296, -0.015..."
4,pen_down,"[0.00167139, 0.03985135, 0.011079031, 0.013109..."


In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv("/content/drive/My Drive/COMP 550 Final Project/action_data.csv") # Use astype to change data types, and object for numpy arrays
df = df.T
df = df.reset_index()
df.columns = ['verb', 'embedding']
df = df.astype({"verb": str, "embedding": object})
#Changing the type of the embeddings as floats
df['embedding'] = df['embedding'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))

print(f'type of the verb entries = {type(df["verb"][0])}')
print(f'type of the embedding entries = {type(df["embedding"][0])}')
df

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/My Drive/COMP 550 Final Project/action_data.csv'

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity


def compare_verb_to_df (verb, df = df_sbert):
  #We start off by generating an embedding using Bert for our verb :
  embedding_verb = get_sbert_embedding(verb)

  # Reshape embedding_verb to be a 2D array
  embedding_verb = embedding_verb.reshape(1, -1)  # Reshape to (1, 768)

  similarity = 0
  final_verb = ""
  for iter,row in df.iterrows():
    new_verb, new_embe = row['verb'], row['embedding']

    # Reshape new_embe to be a 2D array
    new_embe = new_embe.reshape(1, -1)  # Reshape to (1, 768) if new_embe is (768,)

    # Calculate cosine similarity correctly
    new_similarity = cosine_similarity(embedding_verb, new_embe)
    if new_similarity > similarity:
      similarity = new_similarity
      final_verb = new_verb
  return final_verb

result = compare_verb_to_df("travel")
result

'move'

In [None]:
import spacy
from nltk import CFG
from nltk.parse import ChartParser

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define the updated CFG grammar
base_grammar = """
S -> ACTION OBJECT PREP DIRECTION ADVERB
S -> ACTION OBJECT PREP LOCATION
S -> ACTION ADVERB OBJECT PREP DIRECTION
S -> ACTION OBJECT CONJ ACTION OBJECT
S -> ACTION OBJECT
S -> ACTION OBJECT PREP DIRECTION
S -> ACTION OBJECT PREP TOOL
S -> ACTION OBJECT PREP OBJECT
S -> ACTION OBJECT ORIENTATION
S -> ACTION OBJECT PREP OBJECT PREP LOCATION
S -> ACTION OBJECT CONJ ACTION OBJECT PREP LOCATION
S -> ACTION OBJECT PREP WORD
ACTION -> 'move' | 'rotate' | 'draw' | 'change_color' | 'pen_down'
OBJECT -> 'the' ITEM
ITEM -> WORD
PREP -> 'to' | 'from' | 'on' | 'with' | 'in' | 'off' | 'out' | 'into' | 'onto'
DIRECTION -> 'left' | 'right' | 'forward' | 'backward'
ORIENTATION ->  'clockwise' | 'counterclockwise'
VELOCITY -> 'slow' | 'fast' |'very fast'
SPEED ->'1'|'10'|'20' ##########Or define it here as a range ??
ADVERB -> 'slowly' | 'quickly'
CONJ -> 'and'
FIGURE -> 'triangle' | 'square'
COLOR -> 'red' | 'blue' | 'green' | 'yellow' | 'black' | 'white'
ANGLE -> '90' | '180' | '270' | '30' ##########Or define it here as a range ??
LOCATION -> 'the' PLACE
PLACE -> WORD
TOOL -> 'the' TOOL_ITEM
TOOL_ITEM -> WORD
PEN_DOWN -> 'True' | 'False'
LOCATION -> 'the' PLACE
PLACE -> WORD
TOOL -> 'the' TOOL_ITEM
TOOL_ITEM -> WORD
"""

def parse_to_command_dynamic_grammar(command, grammar_text, df = df_sbert):
    # Parse natural language command into tokens
    doc = nlp(command)
    verbs = [token.text for token in doc if token.pos_ == "VERB"]
    #Parsing the rest of the sentence using the grammar
    #cfg_grammar = CFG.fromstring(grammar_text)
    #parser = ChartParser(cfg_grammar)
    #print(f'this is the cfg_grammar{cfg_grammar}')
    #tokens = command.split()



    actions = []  # Collect actions for the output format

    try:
        # Loop through detected verbs
        for verb in verbs:
            action_data = {}
            final_verb = compare_verb_to_df(verb, df)  # Map the verb to a specific action

            if final_verb == 'move':
                action_data['action_type'] = 'move'
                action_data['direction'] = 'left'  # Default or dynamically assigned
                action_data['distance'] = 1  # Example value
                action_data['lin_velocity'] = 10

            elif final_verb == 'rotate':
                action_data['action_type'] = 'rotate'
                action_data['direction'] = 'clockwise'  # Example value
                action_data['angle'] = 30  # Example value

            elif final_verb == 'draw':
                action_data['action_type'] = 'draw'
                action_data['figure'] = 'triangle'  # Example value

            elif final_verb == 'change_colors':
                action_data['action_type'] = 'change_colors'
                action_data['color'] = 'red'  # Example value

            elif final_verb == 'pen_down':
                action_data['action_type'] = 'pen_down'
                action_data['pen_down'] = True  # Example value

            actions.append(action_data)

        # Format the actions in the expected YAML-like format
        output = "actions:\n"
        output += f"  number_of_actions: {len(actions)}\n\n"
        for i, action in enumerate(actions, 1):
            output += f"  action{i}:\n"
            for key, value in action.items():
                output += f"    {key}: {value}\n"
            output += "\n"

        return output.strip()

    except Exception as e:
        return f"Error parsing command: {str(e)}"

# Example usage
# df should be a DataFrame with verbs mapped to specific actions
command = "Travel to the left, then travel to the right, then rotate counterclockwise"
print(parse_to_command_dynamic_grammar(command, base_grammar, df_sbert))






actions:
  number_of_actions: 3

  action1:
    action_type: move
    direction: left
    distance: 1
    lin_velocity: 10

  action2:
    action_type: move
    direction: left
    distance: 1
    lin_velocity: 10

  action3:
    action_type: rotate
    direction: clockwise
    angle: 30


### Attempt 8 : The .yaml interpretation of the code (This time, use CFG to populate the list of parameters (not only populating verbs))

In [None]:
!pip install transformers torch
from transformers import BertModel, BertTokenizer
import torch
import pandas as pd



In [None]:
def get_bert_embedding(word):
    # Load pre-trained BERT tokenizer and model
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertModel.from_pretrained('bert-base-uncased')

    # Tokenize the input word and convert to tensor
    inputs = tokenizer(word, return_tensors="pt")

    # Extract input IDs and attention mask
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    # Pass data through BERT model
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)

    # Extract the last hidden states (embeddings)
    last_hidden_states = outputs.last_hidden_state

    # The output has dimensions [batch_size, sequence_length, hidden_size]
    # For a single word, take the first element of the batch and the second token (first is [CLS])
    embedding = last_hidden_states[0, 1]

    # Convert PyTorch tensor to NumPy array
    return embedding.numpy()

In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer

def get_sbert_embedding(word):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embedding = model.encode(word)
    return embedding

In [None]:
#Generating a dataset of embeddings from a list of words

list_of_words = ["move", "rotate", "draw", "change_colors", "pen_down"]
df_sbert = pd.DataFrame(columns = ["verb", "embedding"])
for word in list_of_words:
  df_sbert = pd.concat([df_sbert, pd.DataFrame([{"verb": word, "embedding": get_sbert_embedding(word)}])], ignore_index=True)
df_sbert

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Unnamed: 0,verb,embedding
0,move,"[0.024703356, -0.03555954, 0.026363244, -0.028..."
1,rotate,"[-0.0047539584, 0.054445975, -0.058048636, -0...."
2,draw,"[0.02108034, 0.02142658, -0.027628534, -0.0167..."
3,change_colors,"[0.0036541086, 0.03610057, 0.018009296, -0.015..."
4,pen_down,"[0.00167139, 0.03985135, 0.011079031, 0.013109..."


In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity


def compare_verb_to_df (verb, df = df_sbert):
  #We start off by generating an embedding using Bert for our verb :
  embedding_verb = get_sbert_embedding(verb)

  # Reshape embedding_verb to be a 2D array
  embedding_verb = embedding_verb.reshape(1, -1)  # Reshape to (1, 768)

  similarity = 0
  final_verb = ""
  for iter,row in df.iterrows():
    new_verb, new_embe = row['verb'], row['embedding']

    # Reshape new_embe to be a 2D array
    new_embe = new_embe.reshape(1, -1)  # Reshape to (1, 768) if new_embe is (768,)

    # Calculate cosine similarity correctly
    new_similarity = cosine_similarity(embedding_verb, new_embe)
    if new_similarity > similarity:
      similarity = new_similarity
      final_verb = new_verb
  return final_verb

result = compare_verb_to_df("travel")
result

'move'

In [None]:
import string

def extract_actions(tree, df=df_sbert):
    actions = []
    direction_counter = {}  # To track the count of DIRECTION for each action type (e.g., move)
    distance_counter = {}  # To track the count of DISTANCE for each action type (e.g., move)

    for subtree in tree.subtrees():
        if subtree.label() == 'ACTION':
            action_data = {}
            verb = subtree.leaves()[0]
            final_verb = compare_verb_to_df(verb, df)  # Check verb against the DataFrame
            action_data["action_type"] = final_verb

            # Track the occurrence of each action type (e.g., move)
            if final_verb == 'move':
                # Count the number of directions and distances associated with 'move'
                if final_verb not in direction_counter:
                    direction_counter[final_verb] = 0
                direction_counter[final_verb] += 1

                if final_verb not in distance_counter:
                    distance_counter[final_verb] = 0
                distance_counter[final_verb] += 1

                # Extract direction and distance based on their counts for each 'move'
                directions = [leaf for subtree in tree.subtrees() if subtree.label() == 'DIRECTION' for leaf in subtree.leaves()]
                direction = directions[direction_counter[final_verb] - 1] if direction_counter[final_verb] <= len(directions) else 'left'

                distances = [leaf for subtree in tree.subtrees() if subtree.label() == 'DISTANCE' for leaf in subtree.leaves()]
                distance = distances[distance_counter[final_verb] - 1] if distance_counter[final_verb] <= len(distances) else '1'

                # Extract other properties for 'move'
                lin_velocity = next(
                    (leaf for subtree in tree.subtrees() if subtree.label() == 'VELOCITY' for leaf in subtree.leaves()), '10'
                )

                action_data['direction'] = direction
                action_data['distance'] = distance
                action_data['lin_velocity'] = lin_velocity

            elif final_verb == 'rotate':
                orientation = next(
                    (leaf for subtree in tree.subtrees() if subtree.label() == 'ORIENTATION' for leaf in subtree.leaves()), 'clockwise'
                )
                angle = next(
                    (leaf for subtree in tree.subtrees() if subtree.label() == 'ANGLE' for leaf in subtree.leaves()), '30'
                )
                action_data['orientation'] = orientation
                action_data['angle'] = angle

            elif final_verb == 'draw':
                figure = next(
                    (leaf for subtree in tree.subtrees() if subtree.label() == 'FIGURE' for leaf in subtree.leaves()), 'triangle'
                )
                action_data['figure'] = figure

            elif final_verb == 'change_colors':
                color = next(
                    (leaf for subtree in tree.subtrees() if subtree.label() == 'COLOR' for leaf in subtree.leaves()), 'red'
                )
                action_data['color'] = color

            elif final_verb == 'pen_down':
                action_data['pen_down'] = True
            else:
                action_data['pen_down'] = False  # For cases where no action type is matched

            actions.append(action_data)

    return actions




def preprocess_command(command, df):
    """
    Preprocesses the command by:
    1. Converting to lowercase.
    2. Removing punctuation except when between two potential digits.
    3. Resolving each verb to its final_verb using the given dataframe.
    """
    import re

    # Convert to lowercase
    command = command.lower()

    # Use regex to remove punctuation except when between digits
    # Pattern explanation:
    # (?<!\d) ensures the character before the punctuation is NOT a digit
    # (?!\d) ensures the character after the punctuation is NOT a digit
    command = re.sub(r"(?<!\d)[.,!?;:](?!\d)", "", command)

    # Process the entire command with spaCy
    doc = nlp(command)

    # Iterate through tokens to resolve verbs
    resolved_tokens = []
    for token in doc:
        # print(f"Token: {token}, POS: {token.pos_}")  # Debugging output
        if token.pos_ == "VERB":
            try:
                # Resolve verb to final_verb
                final_verb = compare_verb_to_df(token.text, df)
                resolved_tokens.append(final_verb)
            except Exception:
                # If no resolution is found, keep the original token
                resolved_tokens.append(token.text)
        else:
            # Include relevant non-verb tokens
            resolved_tokens.append(token.text)

    # Reconstruct the resolved command
    resolved_command = " ".join(resolved_tokens)
    #print(f'this is the resolved command: <{resolved_command}>')
    return resolved_command




In [None]:
angles = " | ".join(f"'{i}'" for i in range(1, 361))
print(angles)

'1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '10' | '11' | '12' | '13' | '14' | '15' | '16' | '17' | '18' | '19' | '20' | '21' | '22' | '23' | '24' | '25' | '26' | '27' | '28' | '29' | '30' | '31' | '32' | '33' | '34' | '35' | '36' | '37' | '38' | '39' | '40' | '41' | '42' | '43' | '44' | '45' | '46' | '47' | '48' | '49' | '50' | '51' | '52' | '53' | '54' | '55' | '56' | '57' | '58' | '59' | '60' | '61' | '62' | '63' | '64' | '65' | '66' | '67' | '68' | '69' | '70' | '71' | '72' | '73' | '74' | '75' | '76' | '77' | '78' | '79' | '80' | '81' | '82' | '83' | '84' | '85' | '86' | '87' | '88' | '89' | '90' | '91' | '92' | '93' | '94' | '95' | '96' | '97' | '98' | '99' | '100' | '101' | '102' | '103' | '104' | '105' | '106' | '107' | '108' | '109' | '110' | '111' | '112' | '113' | '114' | '115' | '116' | '117' | '118' | '119' | '120' | '121' | '122' | '123' | '124' | '125' | '126' | '127' | '128' | '129' | '130' | '131' | '132' | '133' | '134' | '135' | '136' | '137' | '138' | '139

In [None]:
# Generate numbers with 0.1 increments
decimal_digits = [f"'{i / 10:.1f}'" for i in range(1, 1000)]  # '0.1' to '99.9'
decimal_digits.append("'100'")  # Add '100' for the last value

# Generate whole numbers
whole_numbers = [f"'{i}'" for i in range(1, 101)]  # '1' to '100'

# Combine both lists and join with " | "
all_digits = " | ".join(decimal_digits + whole_numbers)
print(all_digits)

'0.1' | '0.2' | '0.3' | '0.4' | '0.5' | '0.6' | '0.7' | '0.8' | '0.9' | '1.0' | '1.1' | '1.2' | '1.3' | '1.4' | '1.5' | '1.6' | '1.7' | '1.8' | '1.9' | '2.0' | '2.1' | '2.2' | '2.3' | '2.4' | '2.5' | '2.6' | '2.7' | '2.8' | '2.9' | '3.0' | '3.1' | '3.2' | '3.3' | '3.4' | '3.5' | '3.6' | '3.7' | '3.8' | '3.9' | '4.0' | '4.1' | '4.2' | '4.3' | '4.4' | '4.5' | '4.6' | '4.7' | '4.8' | '4.9' | '5.0' | '5.1' | '5.2' | '5.3' | '5.4' | '5.5' | '5.6' | '5.7' | '5.8' | '5.9' | '6.0' | '6.1' | '6.2' | '6.3' | '6.4' | '6.5' | '6.6' | '6.7' | '6.8' | '6.9' | '7.0' | '7.1' | '7.2' | '7.3' | '7.4' | '7.5' | '7.6' | '7.7' | '7.8' | '7.9' | '8.0' | '8.1' | '8.2' | '8.3' | '8.4' | '8.5' | '8.6' | '8.7' | '8.8' | '8.9' | '9.0' | '9.1' | '9.2' | '9.3' | '9.4' | '9.5' | '9.6' | '9.7' | '9.8' | '9.9' | '10.0' | '10.1' | '10.2' | '10.3' | '10.4' | '10.5' | '10.6' | '10.7' | '10.8' | '10.9' | '11.0' | '11.1' | '11.2' | '11.3' | '11.4' | '11.5' | '11.6' | '11.7' | '11.8' | '11.9' | '12.0' | '12.1' | '12.2' | '

In [None]:
import spacy
from nltk import CFG
from nltk.parse import ChartParser

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define the updated CFG grammar
base_grammar = f"""
S -> ACTION OBJECT DIRECTION ADVERB DIRECTION ADVERB ACTION ORIENTATION
S -> ACTION OBJECT DIRECTION ADVERB DIRECTION ADVERB ACTION ORIENTATION CONJ S
S -> ACTION OBJECT DIRECTION ADVERB ACTION ORIENTATION
S -> ACTION OBJECT DIRECTION ADVERB ACTION ORIENTATION CONJ S
S -> ACTION OBJECT PREP DIRECTION ADVERB
S -> ACTION OBJECT PREP LOCATION
S -> ACTION ADVERB OBJECT PREP DIRECTION
S -> ACTION OBJECT CONJ ACTION OBJECT
S -> ACTION OBJECT
S -> ACTION OBJECT PREP PREP DIRECTION
S -> ACTION OBJECT PREP PREP DIRECTION DISTANCE
S -> ACTION OBJECT PREP DIRECTION
S -> ACTION OBJECT PREP DIRECTION VELOCITY
S -> ACTION OBJECT PREP TOOL
S -> ACTION OBJECT PREP OBJECT
S -> ACTION OBJECT ORIENTATION
S -> ACTION OBJECT PREP OBJECT PREP LOCATION
S -> ACTION OBJECT CONJ ACTION OBJECT PREP LOCATION
S -> ACTION OBJECT PREP WORD
S -> ACTION OBJECT CONJ ACTION OBJECT PREP LOCATION
S -> ACTION OBJECT DIRECTION ADVERB ACTION DIRECTION ADVERB ACTION ORIENTATION
S -> ACTION PREP COLOR
S -> ACTION OBJECT PREP COLOR
S -> ACTION CONJ FIGURE
S -> ACTION | ACTION S
S -> ACTION OBJECT ITEM DIRECTION NUMBER UNIT
S -> ACTION PREP PREP DIRECTION DISTANCE CONJ DISTANCE PREP ANGLE UNITS ORIENTATION
S -> ACTION PREP PREP DIRECTION DISTANCE ADVERB ACTION DISTANCE COUNTERCLOCKWISE
S -> ACTION OBJECT ORIENTATION ADVERB ACTION DIRECTION
S -> ACTION ITEM PREP COLOR
S -> ACTION PREP DIRECTION ADVERB ACTION PREP DIRECTION ADVERB ACTION ORIENTATION
S -> ACTION DRAWING ADVERB ACTION DRAWING
S -> ACTION DRAWING ADVERB ACTION DIRECTION
S -> ACTION OBJECT ITEM PREP COLOR CONJ ACTION DRAWING
S -> ACTION OBJECT CONJ ACTION DIRECTION
S -> PEN_DOWN OBJECT CONJ ACTION DIRECTION NUMBER CONJ NUMBER
S -> ACTION DISTANCE DIRECTION
S -> ACTION ANGLE ORIENTATION
S -> ACTION ANGLE ORIENTATION ADVERB ACTION
S -> ACTION OBJECT CONJ ACTION DIRECTION DISTANCE
S -> ACTION OBJECT DISTANCE CONJ ACTION DIRECTION DISTANCE
S -> ACTION OBJECT DISTANCE PREP VELOCITY
S -> ACTION OBJECT PREP COLOR CONJ ACTION DRAWING
S -> ACTION PREP PREP DIRECTION DISTANCE PREP VELOCITY ADVERB ACTION ANGLE UNIT ORIENTATION
S -> ACTION DIRECTION DISTANCE CONJ VELOCITY ADVERB ACTION ANGLE UNIT ORIENTATION
S -> ACTION CONJ ACTION DIRECTION PREP VELOCITY
S -> ACTION ANGLE UNIT ORIENTATION ADVERB ACTION DISTANCE PREP VELOCITY
S -> ACTION DIRECTION DISTANCE ADVERB ACTION DIRECTION DISTANCE
S -> ACTION DIRECTION DISTANCE UNIT PREP CONJ ITEM PREP VELOCITY UNIT ADVERB ACTION ANGLE UNIT ORIENTATION
S -> ACTION DIRECTION DISTANCE UNIT PREP VELOCITY UNIT CONJ ADVERB ACTION ANGLE UNIT ORIENTATION
S -> ACTION PREP OBJEC ITEM PREP COLOR CONJ ACTION DRAWING
S -> ACTION PREP COLOR CONJ ACTION DRAWING
S -> ACTION PREP CONJ DIRECTION DISTANCE UNIT PREP VELOCITY UNIT ADVERB ACTION PREP CONJ DISTANCE UNIT PREP VELOCITY UNIT
S -> ACTION ANGLE UNIT ORIENTATION
S -> ACTION OBJECT CONJ ACTION DISTANCE UNIT PREP VELOCITY UNIT
S -> ACTION DIRECTION DISTANCE UNIT PREP VELOCITY UNIT
S -> ACTION ANGLE UNIT ORIENTATION
S -> ACTION DIRECTION DISTANCE UNIT PREP VELOCITY UNIT ADVERB ACTION ANGLE UNIT ORIENTATION
S -> ACTION ANGLE UNIT ORIENTATION ADVERB ACTION DISTANCE UNIT PREP VELOCITY UNIT
S -> ACTION DRAWING CONJ ACTION PREP COLOR
S -> ACTION DIRECTION DISTANCE UNIT PREP CONJ ITEM PREP VELOCITY UNIT ADVERB ACTION ANGLE UNIT

ACTION -> 'move' | 'rotate' | 'draw' | 'changecolor' | 'pendown'
OBJECT -> 'the' ITEM
ITEM -> 'robot' | 'kinova_robot' | 'pen' | 'turtle' | 'color' |'pen' |'velocity'
PREP -> 'to' | 'from' | 'on' | 'with' | 'in' | 'off' | 'out' | 'into' | 'onto' | 'the' |'at' |'of'
DIRECTIO -> the DIRECTION
DIRECTION -> 'left' | 'right' | 'forward' | 'backward' | 'forwards' | 'backwards'
ORIENTATION -> 'clockwise' | 'counterclockwise'
VELOCITY -> DIGIT
SPEED -> '1' | '10' | '20'
ADVERB -> 'slowly' | 'quickly' | 'then'
CONJ -> 'and'|'a' |'at'|'the'
DRAWING -> 'a' FIGURE
FIGURE -> 'triangle' | 'square'
COLOR -> 'red' | 'blue' | 'green' | 'black' | 'white'
ANGLE -> {angles}
LOCATION -> 'the' PLACE
PLACE -> WORD
TOOL -> 'the' TOOL_ITEM
TOOL_ITEM -> WORD
PEN_DOWN -> 'pen_down'
NUMBER -> DIGIT | DIGIT NUMBER | NUMBER NUMBER
DIGIT -> {all_digits}
UNIT -> 'meters' | 'centimeters' | 'inches' | 'feet' | 'cm' | 'mm' | 'ft' | 'in' | 'm' | 'degrees' | 'ms' |'m/s'
DISTANCE -> DIGIT
"""


def parse_to_command_static_grammar(command, grammar_text, df = df_sbert):
    """
    Parses the preprocessed command into actions using a static CFG grammar.
    """
    # Preprocess the command to resolve verbs and clean the text
    resolved_command = preprocess_command(command, df_sbert)


    # Parse the resolved command into tokens
    tokens = resolved_command.split()
    #print(f'this is the tokens: <{tokens}>')

    # Load the grammar and initialize the parser
    cfg_grammar = CFG.fromstring(grammar_text)
    parser = ChartParser(cfg_grammar)

    actions = []  # Collect actions for the output format

    try:
        parse_trees = list(parser.parse(tokens))
        #print(f'this is the parse_trees {parse_trees}')
        if not parse_trees:
            return "No valid parse tree found."
        for tree in parse_trees:
            actions.extend(extract_actions(tree))

        # Format the actions in the expected YAML-like format
        output = "actions:\n"
        output += f"  number_of_actions: {len(actions)}\n\n"
        for i, action in enumerate(actions, 1):
            output += f"  action{i}:\n"
            for key, value in action.items():
                output += f"    {key}: {value}\n"
            output += "\n"

        return output.strip()

    except Exception as e:
        return f"Error parsing command: {str(e)}"






In [None]:
# Example usage
# df should be a DataFrame with verbs mapped to specific actions
command = "Pendown and move backward at 22"
print(parse_to_command_static_grammar(command, base_grammar, df_sbert))

actions:
  number_of_actions: 2

  action1:
    action_type: pen_down
    pen_down: True

  action2:
    action_type: move
    direction: backward
    distance: 1
    lin_velocity: 22


In [None]:



# Example commands
commands = [
    "Displace forward 2m at a velocity of 1.5m/s, then turn 45 degrees counterclockwise.",
    "Move backward 3m at 2m/s and then rotate 90 degrees counterclockwise",
"Sketch a square.",
"Changecolor to red and draw a triangle.",
"Turn 30 degrees counterclockwise.",
"Move backward 2m at 0.5m/s.",
"Spin 180 degrees counterclockwise.",
"Move right 0.5m at 0.8m/s, then rotate 60 degrees counterclockwise",
"Changecolor the pen to green and paint a triangle",
"Advance forward 4m at 1.2m/s, then turn 90 degrees counterclockwise",
"Rotate 45 degrees counterclockwise, then advance 2m at 0.9m/s.",
"Draw a square and changecolor to red",
"Shift forward 3m at a velocity of 2m/s then spin 90 degrees.",
"Pendown and move backward at 22",
"Move to the left 5 at 8 then rotate 62 degrees counterclockwise"
]

# Process each command
for command in commands:
    print(f"Command: {command}\n")
    print(parse_to_command_static_grammar(command, base_grammar, df_sbert))
    print("___________________________________________________________________________")

Command: Displace forward 2m at a velocity of 1.5m/s, then turn 45 degrees counterclockwise.

actions:
  number_of_actions: 2

  action1:
    action_type: move
    direction: forward
    distance: 2
    lin_velocity: 1.5

  action2:
    action_type: rotate
    orientation: counterclockwise
    angle: 45
___________________________________________________________________________
Command: Move backward 3m at 2m/s and then rotate 90 degrees counterclockwise

actions:
  number_of_actions: 2

  action1:
    action_type: move
    direction: backward
    distance: 3
    lin_velocity: 2

  action2:
    action_type: rotate
    orientation: counterclockwise
    angle: 90
___________________________________________________________________________
Command: Sketch a square.

actions:
  number_of_actions: 1

  action1:
    action_type: draw
    figure: square
___________________________________________________________________________
Command: Changecolor to red and draw a triangle.

actions:
  numbe

**List of 10 sentences our code actually can handle :**

1) Rotate the pen counterclockwise

2) Spin the pen counterclockwise then go forward

3) Change_color to red

4) Move to right then move to right then rotate counterclockwise

5) Move to right then move to right then spin counterclockwise

6) Draw a square then go forward

7) Spin the robot counterclockwise

8) Sketch a square then go backward

9) Pendown

10) change_color to blue

11) Rotate 62 clockwise

12) Move 2 backward

13) Change_color to green

14) Change_color to blue

15) Change_color to red




From the previous sentences :
- Change_color the pen color to green and paint a triangle.
or Change_color the pen to green and paint a triangle.

- Move forward 5 at 8, then rotate 62 degrees counterclockwise

- Move to the left 5 at 8, then rotate 62 degrees counterclockwise

- Advance forward 4 at 12, then turn 91 degrees counterclockwise

- Pendown and move backward at 22




In [None]:
'''
Move the robot forward slowly then backward quickly rotate clockwise.
Rotate the pen to the left slowly and move the turtle forward quickly.
Draw the kinova_robot to the left on the table.
Move the pen to the robot. [GOOD BUT THAT's NOT GOOD NEWS]
Rotate the turtle clockwise and draw the pen onto the floor.
Draw the kinova_robot to the right with the pen to 5 meters.
Move the robot forward slowly then quickly rotate counterclockwise and draw clockwise.
Pendown to blue.
Changecolor the robot to red.
Rotate the pen with the tool.



'''

"\nMove the robot forward slowly then backward quickly rotate clockwise.\nRotate the pen to the left slowly and move the turtle forward quickly.\nDraw the kinova_robot to the left on the table.\nMove the pen to the robot. [GOOD BUT THAT's NOT GOOD NEWS]\nRotate the turtle clockwise and draw the pen onto the floor.\nDraw the kinova_robot to the right with the pen to 5 meters.\nMove the robot forward slowly then quickly rotate counterclockwise and draw clockwise.\nPendown to blue.\nChangecolor the robot to red.\nRotate the pen with the tool.\n\n\n\n"

### Attempt 9: using ROSGPT-like trained LLM



In [None]:
!pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.54.5
    Uninstalling openai-1.54.5:
      Successfully uninstalled openai-1.54.5
Successfully installed openai-0.28.0


In [None]:
import openai

# Replace this with your actual OpenAI API key

def generate_actions(command):
    """
    Sends the input command to GPT-3.5 and generates the actions in the desired format. If some values are not specified, just assume a baseline value of 1 for any parameter for which there is missing information.
    For every verb that is inputed, note that you can only resolve that verb to one of 5 options : rotate, move, change_color, draw, or pen_down.
    Any other verb would have to be first swtiched to the one it corresponds the most to in this list, then parsed in yaml.
    """
    prompt = f"""
    Command: "{command}"

    Parse the command into the following format:
    actions:
      number_of_actions: <number>

      action1:
        action_type: <action type>
        direction: <direction> = 'forward' (if not specified)
        distance: <distance> = 1 (if not specified)
        lin_velocity: <velocity> = 1 (if not specified)

      action2:
        action_type: <action type>
        direction: <direction>
        distance: <distance>
        lin_velocity: <velocity>

      # Extend as needed based on the number of actions in the command
    """

    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that formats commands into structured action plans."},
                {"role": "user", "content": prompt},
            ],
            max_tokens=500,
            temperature=0.7,
        )

        # Extract the content of the response
        return response['choices'][0]['message']['content']

    except openai.error.OpenAIError as e:
        return f"An error occurred: {str(e)}"

# Example commands
commands = [
    "Displace forward 2m at a velocity of 1.5m/s, then turn 45 degrees counterclockwise.",
    "Move backward 3m at 2m/s and then rotate 90 degrees counterclockwise",
"Sketch a square.",
"Changecolor to red and draw a triangle.",
"Turn 30 degrees counterclockwise.",
"Move backward 2m at 0.5m/s.",
"Spin 180 degrees counterclockwise.",
"Move right 0.5m at 0.8m/s, then rotate 60 degrees counterclockwise",
"Changecolor the pen to green and paint a triangle",
"Advance forward 4m at 1.2m/s, then turn 90 degrees counterclockwise",
"Rotate 45 degrees counterclockwise, then advance 2m at 0.9m/s.",
"Draw a square and changecolor to red",
"Shift forward 3m at a velocity of 2m/s, then spin 90 degrees (assumed clockwise).",
"Pendown and move backward at 22",
"Move to the left 5 at 8, then rotate 62 degrees counterclockwise"
]

# Process each command
for command in commands:
    print(f"Command: {command}\n")
    actions = generate_actions(command)
    print(actions)
    print("\n" + "-"*50 + "\n")


Command: Displace forward 2m at a velocity of 1.5m/s, then turn 45 degrees counterclockwise.


actions:
  number_of_actions: 2

  action1:
    action_type: Displace
    direction: forward
    distance: 2
    lin_velocity: 1.5

  action2:
    action_type: Turn
    direction: counterclockwise
    angle: 45

--------------------------------------------------

Command: Move backward 3m at 2m/s and then rotate 90 degrees counterclockwise


```yaml
actions:
  number_of_actions: 2

  action1:
    action_type: move
    direction: backward
    distance: 3
    lin_velocity: 2

  action2:
    action_type: rotate
    direction: counterclockwise
    angle: 90
```

--------------------------------------------------

Command: Sketch a square.


```yaml
actions:
  number_of_actions: 4

  action1:
    action_type: move
    direction: forward
    distance: 1
    lin_velocity: 1

  action2:
    action_type: rotate
    direction: right
    angle: 90

  action3:
    action_type: move
    direction: forward

## **Additional tests w CFG**

In [None]:
import spacy
from nltk import CFG
from nltk.parse import ChartParser

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define the updated CFG grammar
base_grammar = """
  S -> SimpleCommand | ComplexCommand | CommandSequence
  SimpleCommand -> ACTION OBJECT PrepPhrase
  SimpleCommand -> ACTION OBJECT DIRECTION
  SimpleCommand -> ACTION OBJECT

  ComplexCommand -> ACTION OBJECT DIRECTION ADVERB
  ComplexCommand -> ACTION OBJECT DIRECTION ADVERB ACTION ORIENTATION
  ComplexCommand -> ACTION OBJECT DIRECTION ACTION ORIENTATION
  ComplexCommand -> ACTION OBJECT PREP PREP DIRECTION
  ComplexCommand -> ACTION OBJECT PREP PREP DIRECTION DISTANCE

  CommandSequence -> SimpleCommand CONJ S

  PrepPhrase -> PREP DIRECTION

  ACTION -> 'move' | 'rotate' | 'draw' | 'changecolor' | 'pendown'
  OBJECT -> 'the' ITEM
  ITEM -> 'robot' | 'kinova_robot' | 'pen' | 'turtle'
  PREP -> 'to' | 'from' | 'on' | 'with' | 'in' | 'off' | 'out' | 'into' | 'onto' | 'at' | 'the'
  DIRECTION -> 'left' | 'right' | 'forward' | 'backward'
  ORIENTATION -> 'clockwise' | 'counterclockwise'
  ADVERB -> 'slowly' | 'quickly' | 'then'
  CONJ -> 'and' | 'then'
  COLOR -> 'red' | 'blue' | 'green' | 'yellow' | 'black' | 'white'
  LOCATION -> 'the' PLACE
  PLACE -> WORD
  TOOL -> 'the' TOOL_ITEM
  TOOL_ITEM -> WORD
  NUMBER -> DIGIT | DIGIT NUMBER | NUMBER NUMBER
  DIGIT -> '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
  UNIT -> 'meters' | 'centimeters' | 'inches' | 'feet' | 'cm' | 'mm' | 'ft' | 'in'
  DISTANCE -> NUMBER UNIT
"""


def parse_to_command_static_grammar(command, grammar_text, df = df_sbert):
    """
    Parses the preprocessed command into actions using a static CFG grammar.
    """
    # Preprocess the command to resolve verbs and clean the text
    resolved_command = preprocess_command(command, df_sbert)


    # Parse the resolved command into tokens
    tokens = resolved_command.split()
    print(f'this is the tokens: <{tokens}>')

    # Load the grammar and initialize the parser
    cfg_grammar = CFG.fromstring(grammar_text)
    parser = ChartParser(cfg_grammar)

    actions = []  # Collect actions for the output format

    try:
        parse_trees = list(parser.parse(tokens))
        #print(f'this is the parse_trees {parse_trees}')
        if not parse_trees:
            return "No valid parse tree found."
        for tree in parse_trees:
            actions.extend(extract_actions(tree))

        # Format the actions in the expected YAML-like format
        output = "actions:\n"
        output += f"  number_of_actions: {len(actions)}\n\n"
        for i, action in enumerate(actions, 1):
            output += f"  action{i}:\n"
            for key, value in action.items():
                output += f"    {key}: {value}\n"
            output += "\n"

        return output.strip()

    except Exception as e:
        return f"Error parsing command: {str(e)}"







In [None]:
# Example usage
# df should be a DataFrame with verbs mapped to specific actions
command = "move the robot to the left 1 cm"
print(parse_to_command_static_grammar(command, base_grammar, df_sbert))

this is the resolved command: <move the robot to the left 1 cm>
this is the tokens: <['move', 'the', 'robot', 'to', 'the', 'left', '1', 'cm']>
actions:
  number_of_actions: 1

  action1:
    action_type: move
    direction: left
    distance: 1
    lin_velocity: 10


In [None]:
import nltk
from nltk import CFG
from sentence_transformers import SentenceTransformer
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Define the CFG for robot tasks
grammar = CFG.fromstring("""
  S -> Move | Rotate | Draw | Change_Color | Pen_Down
  Move -> 'move' Direction | 'move' To_Location
  Rotate -> 'rotate' Angle | 'rotate' Direction
  Draw -> 'draw' Shape | 'draw' 'freeform'
  Change_Color -> 'change_color' Color
  Pen_Down -> 'pen_down'

  Direction -> 'forward' | 'backward' | 'left' | 'right'
  To_Location -> 'to' Location
  Location -> 'location1' | 'location2' | 'location3'
  Angle -> '90' 'degrees' | '180' 'degrees' | '360' 'degrees'
  Shape -> 'circle' | 'square' | 'triangle'
  Color -> 'red' | 'blue' | 'green'
""")

# Define a function to compute Sentence-BERT embeddings
def get_sbert_embedding(word):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embedding = model.encode(word, convert_to_tensor=True)
    return embedding

# Pre-compute embeddings for the recognized verbs
list_of_words = ["move", "rotate", "draw", "change_color", "pen_down"]
df_sbert = pd.DataFrame(columns=["verb", "embedding"])
for word in list_of_words:
    df_sbert = pd.concat([df_sbert, pd.DataFrame([{"verb": word, "embedding": get_sbert_embedding(word)}])], ignore_index=True)

# Function to resolve verbs based on cosine similarity
def resolve_verb(input_verb):
    input_embedding = get_sbert_embedding(input_verb)
    df_sbert['similarity'] = df_sbert['embedding'].apply(lambda x: cosine_similarity(x.unsqueeze(0), input_embedding.unsqueeze(0)).flatten()[0])
    closest_verb = df_sbert.loc[df_sbert['similarity'].idxmax(), 'verb']
    return closest_verb

# Function to parse a command using nltk and the defined grammar
def parse_command(command):
    words = command.split()
    verb = words[0]
    resolved_verb = resolve_verb(verb)
    command = command.replace(verb, resolved_verb, 1)
    parser = nltk.ChartParser(grammar)
    try:
        for tree in parser.parse(command.split()):
            return tree
    except ValueError:
        return "Command not understood"

# Example usage
#command = "advance forward"
#parsed_command = parse_command(command)
#print(parsed_command)


In [None]:
# Example usage
# df should be a DataFrame with verbs mapped to specific actions
command = "move"
print(parse_command(command))

None
