##Run All
Enter necessary information in Startup [OPENAI_API_KEY, OPENAI_ORGANIZATION_ID] then press Ctrl+F9 (Windows) or Toolbar > Runtime > Run All

In [None]:
#@title Startup
import os
%pip install Flask Flask_socketio Openai

os.environ["OPENAI_API_KEY"] = "Enter OPENAI API KEY"
os.environ["OPENAI_ORGANIZATION_ID"] = "Enter OPENAI ORG ID"

In [None]:
#@title Verifier

import xml.etree.ElementTree as ET

mandatory_properties = {
    'Add': ['vessel', 'reagent'],
    'Separate': ['purpose', 'product_phase', 'from_vessel', 'separation_vessel', 'to_vessel'],
    'Transfer': ['from_vessel', 'to_vessel'],
    'StartStir': ['vessel'],
    'Stir': ['vessel', 'time'],
    'StopStir': ['vessel'],
    'HeatChill': ['vessel', 'temp', 'time'],
    'HeatChillToTemp': ['vessel', 'temp'],
    'StartHeatChill': ['vessel', 'temp'],
    'StopHeatChill': ['vessel'],
    'EvacuateAndRefill': ['vessel'],
    'Purge': ['vessel'],
    'StartPurge': ['vessel'],
    'StopPurge': ['vessel'],
    'Filter': ['vessel'],
    'FilterThrough': ['from_vessel', 'to_vessel', 'through'],
    'WashSolid': ['vessel', 'solvent', 'volume'],
    'Wait': ['time'],
    'Repeat': ['repeats'],
    'CleanVessel': ['vessel'],
    'Crystallize': ['vessel'],
    'Dissolve': ['vessel', 'solvent'],
    'Dry': ['vessel'],
    'Evaporate': ['vessel'],
    'Irradiate': ['vessel', 'time'],
    'Precipitate': ['vessel'],
    'ResetHandling': [],
    'RunColumn': ['from_vessel', 'to_vessel']
}

optional_properties = {
    'Add': ['vessel', 'reagent', 'volume', 'mass', 'amount', 'dropwise', 'time', 'stir', 'stir_speed', 'viscous', 'purpose'],
    'Separate': ['purpose', 'product_phase', 'from_vessel', 'separation_vessel', 'to_vessel', 'waste_phase_to_vessel', 'solvent', 'solvent_volume', 'through', 'repeats', 'stir_time', 'stir_speed', 'settling_time'],
    'Transfer': ['from_vessel', 'to_vessel', 'volume', 'amount', 'time', 'viscous', 'rinsing_solvent', 'rinsing_volume', 'rinsing_repeats', 'solid'],
    'StartStir': ['vessel', 'stir_speed', 'purpose'],
    'Stir': ['vessel', 'time', 'stir_speed', 'continue_stirring', 'purpose'],
    'StopStir': ['vessel'],
    'HeatChill': ['vessel', 'temp', 'time', 'stir', 'stir_speed', 'purpose'],
    'HeatChillToTemp': ['vessel', 'temp', 'active', 'continue_heatchill', 'stir', 'stir_speed', 'purpose'],
    'StartHeatChill': ['vessel', 'temp', 'purpose'],
    'StopHeatChill': ['vessel'],
    'EvacuateAndRefill': ['vessel', 'gas', 'repeats'],
    'Purge': ['vessel', 'gas', 'time', 'pressure', 'flow_rate'],
    'StartPurge': ['vessel', 'gas', 'pressure', 'flow_rate'],
    'StopPurge': ['vessel'],
    'Filter': ['vessel', 'filtrate_vessel', 'stir', 'stir_speed', 'temp', 'continue_heatchill', 'volume'],
    'FilterThrough': ['from_vessel', 'to_vessel', 'through', 'eluting_solvent', 'eluting_volume', 'eluting_repeats', 'residence_time'],
    'WashSolid': ['vessel', 'solvent', 'volume', 'filtrate_vessel', 'temp', 'stir', 'stir_speed', 'time', 'repeats'],
    'Wait': ['time'],
    'Repeat': ['repeats', 'children', 'loop_variables', 'iterative'],
    'CleanVessel': ['vessel', 'solvent', 'volume', 'temp', 'repeats'],
    'Crystallize': ['vessel', 'ramp_time', 'ramp_temp'],
    'Dissolve': ['vessel', 'solvent', 'volume', 'amount', 'temp', 'time', 'stir_speed'],
    'Dry': ['vessel', 'time', 'pressure', 'temp', 'continue_heatchill'],
    'Evaporate': ['vessel', 'time', 'pressure', 'temp', 'stir_speed'],
    'Irradiate': ['vessel', 'time', 'wavelegth', 'color', 'temp', 'stir', 'stir_speed', 'cooling_power'],
    'Precipitate': ['vessel', 'time', 'temp', 'stir_speed', 'reagent', 'volume', 'amount', 'add_time'],
    'ResetHandling': ['solvent', 'volume', 'repeats'],
    'RunColumn': ['from_vessel', 'to_vessel', 'column'],
}

reagent_properties = ["name", "inchi", "cas", "role", "preserve",
                      "use_for_cleaning", "clean_with", "stir", "temp", "atmosphere", "purity"]


def parse_hardware(root, error_list, available_hardware):
    hardware_list = []
    tag_lst = list(root.iter('Hardware'))
    tags = []
    strs = []
    error = ""
    for item in tag_lst:
        tags += [elem.tag for elem in item.iter()]
        strs += [ET.tostring(item, encoding='unicode', method='xml').strip()]
    for item in tags:
        if item not in ["Hardware", "Component"]:
            error = "The Hardware section should only contain Component tags"
    for hardware in root.iter('Hardware'):
        for component in hardware.iter('Component'):
            # Check if the 'id' attribute exists in the component
            if 'id' not in component.attrib:
                error_str = "One or more Component tags do not have the 'id' attribute."
                step_str = ET.tostring(
                    component, encoding='unicode', method='xml').strip()
                error_list.append(
                    {"step": step_str, "errors": [error_str]})
            else:
                if available_hardware:
                    if component.attrib['id'] not in available_hardware:
                        wrong_hardware = component.attrib['id']
                        error_str = f"{wrong_hardware} is not defined in the given Hardware list. The available Hardware is: {', '.join(available_hardware)[:-2]}."
                        step_str = ET.tostring(
                            component, encoding='unicode', method='xml').strip()
                        error_list.append(
                            {"step": "Hardware definition", "errors": [error_str]})
                hardware_list.append(component.attrib['id'])
    return hardware_list, error_list, (error, strs)


def parse_reagents(root, error_list, available_reagents):
    reagent_list = []
    for reagents in root.iter('Reagents'):
        for reagent in reagents.iter('Reagent'):
            if available_reagents:
                if reagent.attrib['name'] not in available_reagents:
                    wrong_reagent = reagent.attrib['name']
                    error_str = f"{wrong_reagent} is not defined in the given Reagents list. The available reagents are: {', '.join(available_reagents)[:-2]}."
                    error_list.append(
                        {"step": "Reagents definition", "errors": [error_str]})
            errors = []
            if 'name' not in reagent.attrib:
                errors.append(f"You must have 'name' property in Reagent")
            else:
                reagent_list.append(reagent.attrib['name'])
            for attr in reagent.attrib:
                if attr not in reagent_properties:
                    errors.append(
                        f"The {attr} property in Reagent is not allowed")
            if errors:
                step_str = ET.tostring(
                    reagent, encoding='unicode', method='xml').strip()
                error_list.append({"step": step_str, "errors": errors})
    return reagent_list


def verify_procedure(root, hardware, reagents, error_list):
    for procedure in root.iter('Procedure'):
        for step in procedure:
            errors = []
            # Check whether action is valid
            action = step.tag
            if action not in mandatory_properties:
                errors.append(f"There is no {action} action in XDL")
            else:
                for prop in mandatory_properties[action]:
                    if prop not in step.attrib:
                        errors.append(
                            f"You must have '{prop}' property when doing '{step.tag}'")
                for attr in step.attrib:
                    if attr not in optional_properties[action]:
                        allowed_actions = list(
                            set(optional_properties[action] + mandatory_properties[action]))
                        errors.append(
                            f"The {attr} property in the {action} procedure is not allowed. The allowed properties are: {', '.join(allowed_actions)}.")
                # Check vessels are defined in Hardware
                # print(error_list)
                if len(error_list) == 0 or "Hardware" not in error_list[0]["step"].lower():
                    for attr in ['vessel', 'from_vessel', 'to_vessel']:
                        if attr in step.attrib and step.attrib[attr] not in hardware:
                            errors.append(
                                f"{step.attrib[attr]} is not defined in Hardware")
                # Check reagents are defined in Reagents
                if 'reagent' in step.attrib and step.attrib['reagent'] not in reagents:
                    reagent_name = step.attrib["reagent"]
                    errors.append(f"{reagent_name} is not defined in Reagents")

                # Check if there is any text content between tags
                for elem in step.iter():
                    if elem.text and elem.text.strip() and elem != step:
                        errors.append(
                            "There should be no text content between tags.")

            if errors:
                step_str = ET.tostring(
                    step, encoding='unicode', method='xml').strip()
                step_str = ' '.join(step_str.split())
                error_list.append({"step": step_str, "errors": errors})
    return error_list


def verify_synthesis(root, available_hardware, available_reagents):
    error_list = []
    for element in root.iter():
        if element.text and element.text.strip():
            errors = [
                f"Tags should not have text content: '{element.text.strip()}'"]
            step_str = ET.tostring(
                element, encoding='unicode', method='xml').strip()
            error_list.append({"step": step_str, "errors": errors})

    hardware, hardware_list_error_list, (errors, strs) = parse_hardware(
        root, error_list, available_hardware)
    if errors != "":
        error_list.append({"step": "Hardware definition", "errors": [errors]})

        # return error_list
        # return [{"step": "Hardware definition", "errors": errors}]
    reagents = parse_reagents(root, error_list, available_reagents)
    return verify_procedure(root, hardware, reagents, error_list)


def verify_xdl(xdl, available_hardware=None, available_reagents=None):
    """
    Verify XDL and return errors
    :param xdl: The XDL string to verify
    :return: Returns an empty list if the input is valid.
             Returns a string if the input cannot be parsed as XML.
             Returns a list of dictionary if it has errors. Each element has two fields.
               "step": The string of the line which contains error.
               "errors": The error messages for that line.
    """
    if "<XDL>" and "</XDL>" not in xdl :
        error_message = f"\n{xdl}\nThis XDL was not correct. XDL should start with <XDL> and end with </XDL>. Please fix the errors."
        return [{"errors": [error_message]}]
    xdl = xdl[xdl.index("<XDL>"):xdl.index("</XDL>")+6]

    try:
        root = ET.fromstring(xdl)
    except Exception as e:
        return [{"errors": ["Input XDL cannot be parsed as XML, there is {} error".format(str(e).split(":")[0])]}]

    return verify_synthesis(root, available_hardware, available_reagents)


In [None]:
#@title XDL Description
XDL_description = """XDL files will follow XML syntax and consist of three mandatory sections: Hardware, where virtual vessels that the reaction mixture can reside in are declared. Reagents, where all reagents that are used in the procedure are declared, and Procedure, where the synthetic actions involved in the procedure are linearly declared.

XDL File Stub:
<XDL>
  <Synthesis>
      <Hardware>
          <!-- ... -->
      </Hardware>

      <Reagents>
          <!-- ... -->
      </Reagents>

      <Procedure>
          <!-- ... -->
      </Procedure>
  </Synthesis>
</XDL>

Hardware:
Each individual reagent, unless otherwise stated should be contained within their own component.

(format is(Property, Type, Description))

id, str, Name of hardware

Reagents:
The Reagents section contains Reagent elements with the props below.
Any reagents which were combined before the experiment should be combined as one reagent before the procedure. (i.e. 'lime juice mixed with sugar' = <Reagent name='lime juice mixed with sugar')

Reagent:
Reagent used by procedure.

(format is(Property, Type, Description))

name, str, Name of reagent

Procedure:
All steps included in the Full Steps Specification may be given within the Procedure block of a XDL file. Additionally, the Procedure block may be, but does not have to be, divided up into Prep, Reaction, Workup and Purification blocks, each of which can contain any of the steps in the specification.


Here is a list of tags that can be used in this language:
Liquid Handling: Add, Separate, Transfer,
Stirring: StartStir, Stir, StopStir,
Temperature Control: HeatChill, HeatChillToTemp, StartHeatChill, StopHeatChill
Inert Gas: EvacuateAndRefill, Purge, StartPurge, StopPurge
Filtration: Filter, FilterThrough, WashSolid
Special: Wait, Repeat,
Other: CleanVessel, Crystallize, Dissolve, Dry, Evaporate, Irradiate, Precipitate, ResetHandling, RunColumn

Steps:
Liquid Handling:
Add liquid or solid reagent. Reagent identity (ie liquid or solid) is determined by the solid property of a reagent in the Reagent section.

The quantity of the reagent can be specified using either volume (liquid units) or amount (all accepted units e.g. ‘g’, ‘mL’, ‘eq’, ‘mmol’).

format(Property	Type	Description)
vessel	vessel	Vessel to add reagent to.
reagent	reagent	Reagent to add.

Separate:
Perform separation.
format(Property Type Description)
Property	Type	Description
purpose	str	'wash' or 'extract'. 'wash' means that product phase will not be the added solvent phase, 'extract' means product phase will be the added solvent phase. If no solvent is added just use 'extract'.
product_phase	str	'top' or 'bottom'. Phase that product will be in.
from_vessel	vessel	Contents of from_vessel are transferred to separation_vessel and separation is performed.
separation_vessel	vessel	Vessel in which separation of phases will be carried out.
to_vessel	vessel	Vessel to send product phase to.


Transfer:
Transfer liquid from one vessel to another.

The quantity to transfer can be specified using either volume (liquid units) or amount (all accepted units e.g. ‘g’, ‘mL’, ‘eq’, ‘mmol’).

format(Property	Type	Description)
from_vessel	vessel	Vessel to transfer liquid from.
to_vessel	vessel	Vessel to transfer liquid to.


Stirring:
StartStir:
Start stirring vessel.
format(Property	Type	Description)
vessel	vessel	Vessel to start stirring.


Stir:
Stir vessel for given time.
format(Property	Type	Description)
vessel	vessel	Vessel to stir.
time	float	Time to stir vessel for.


StopStir:
Stop stirring given vessel.
format(Property	Type	Description)
vessel	vessel	Vessel to stop stirring.

Temperature Control:
HeatChill:
Heat or chill vessel to given temp for given time.
format(Property	Type	Description)
vessel	vessel	Vessel to heat or chill.
temp	float	Temperature to heat or chill vessel to.
time	float	Time to heat or chill vessel for."""

In [None]:
#@title XDL Generation
import os
import openai

def prompt(instructions, description, max_tokens, model="text-davinci-003"):
    if "Enter" in os.environ["OPENAI_API_KEY"]:
      raise Exception("Missing API_Key or ORG_ID")

    """Function that calls the OpenAI API"""
    if model == "text-davinci-003":
        response = openai.Completion.create(
            model="text-davinci-003",
            prompt=description + "\nConvert to XDL:\n" + instructions,
            temperature=0,
            max_tokens=max_tokens,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
        )
        return response["choices"][0]["text"]

    elif model == "gpt-3.5-turbo" or model == "gpt-4":
        if "Enter" in os.environ["OPENAI_ORGANIZATION_ID"]:
          raise Exception("Missing API_Key or ORG_ID")
        response = openai.ChatCompletion.create(
            model=model,
            messages = [
                {"role":"system", "content":"You are a natural language to XDL translator, you must also do your best to correct any incorrect XDL, only use items contained in the description, here is a description of XDL:\n"+description },
                {"role":"user", "content": f"\nConvert/Correct the following to proper XDL:\n{instructions}"}
            ],
            temperature=0,
            max_tokens=max_tokens,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
        )
        return response["choices"][0]["message"]["content"]


def translate(input_xdl, model):
    """Function that translates the input XDL"""

    openai.api_key = os.environ["OPENAI_API_KEY"]
    openai.organization = os.environ["OPENAI_ORGANIZATION_ID"]

    correct_syntax = False
    errors = {}
    prev_input_xdl = input_xdl

    # Start 10 iteration for loop to limit token usage
    for step in range(10):
        try:
            gpt3_output = prompt(input_xdl, XDL_description, 1000, model)
        except Exception as e:
            print(f"\u001b[31m{e}.\033[0m Too many tokens required or invalid API key.")
            break
        compile_correct = verify_xdl(gpt3_output)
        errors[step] = {
            "errors": compile_correct,
            "input_xdl": input_xdl,
            "gpt3_output": gpt3_output,
        }
        if not compile_correct:
            correct_syntax = True
            break
        else:
            error_list = set()
            for item in compile_correct:
                for error in item["errors"]:
                    error_list.add(error)
            error_message = f"\n{gpt3_output}\nThis XDL was not correct. These were the errors\n{os.linesep.join(list(error_list))}\nPlease fix the errors."
            input_xdl = f"{prev_input_xdl} {error_message}"

        print("gpt3_output:::")
        print(error_message, "\n")
    try:
        if correct_syntax:
            xdl = gpt3_output
        else:
            xdl = "The correct XDL could not be generated."

    except Exception as e:
        print(f"Error: {e}")

    print(f"XDL: {xdl}")
    print(f"Final syntax valid: {correct_syntax}")
    return xdl

def main():
    print("Welcome to the XDL Translator CLI!")

    input_xdl = input("Enter the text to be converted to XDL: ")
    model = input("Choose the GPT model version (text-davinci-003, gpt-3.5-turbo, or gpt-4): ")

    translated_xdl = translate(input_xdl, model)

    print(translated_xdl)


if __name__ == "__main__":
    main()
