In [1]:
import os
os.environ['OPENAI_API_KEY'] = "<YOUR_API_KEY_HERE>"

In [2]:
from ASPIRE_LINQX.ai.chains.microservice import module_to_microservice, object_to_microservice
from ASPIRE_LINQX.ai.memory.internal_logging import CustomActionLogSummaryMemory, FSAMemory, ConversationSummaryMemory

from ASPIRE_LINQX.testing.models.drivers import MicrowaveSynthesizer as MSModule
from ASPIRE_LINQX.testing.models.drivers.MicrowaveSynthesizerObject import MicrowaveSynthesizer as MSObject

from langchain_openai import ChatOpenAI
from ASPIRE_LINQX.utils.benchmarking.output import AgentOutputBenchmarker, AgentRegexOutputBenchmarker
from ASPIRE_LINQX.utils.benchmarking.path import AgentPathBenchmarker
from ASPIRE_LINQX.utils.benchmarking.state import AgentStateBenchmarker

from ASPIRE_LINQX.ai.agents.core import create_linqx_chat_agent

In [3]:
module_command_microservice = module_to_microservice(MSModule)

{'query': '\n    Create a command from the below information. \n    Do not include any parameters that are not in the function signature.\n    If the function signature denotes that it returns something, check to docstring to find the return signature.\n\n    Function name: allocate_session\n    Microservice: MicrowaveSynthesizer\n    UUID: e9a87625-b075-49c3-b60e-40a37cd2ec89\n    Function Signature: () -> dict\n    Docstring: Allocates a session on the microwave synthesizer.\nMust be called prior to any other action.\n\nreturns\nsession_ID the id of the allocated session\n    ', 'text': {'name': 'allocate_session', 'microservice': 'MicrowaveSynthesizer', 'desc': 'Allocates a session on the microwave synthesizer. Must be called prior to any other action.', 'uuid': 'e9a87625-b075-49c3-b60e-40a37cd2ec89', 'parameters': {}, 'has_return': True, 'return_signature': {'session_ID': 'the id of the allocated session'}}}
{'query': '\n    Create a command from the below information. \n    Do not

In [4]:
ms_object = MSObject()
object_command_microservice = object_to_microservice(ms_object)

{'query': '\n    Create a command from the below information. \n    Do not include any parameters that are not in the function signature.\n    If the function signature denotes that it returns something, check to docstring to find the return signature.\n\n    Function name: allocate_session\n    Microservice: MicrowaveSynthesizer\n    UUID: 63b45e82-e4b0-4d19-953e-02dd03be06af\n    Function Signature: () -> dict\n    Docstring: Allocates a session on the microwave synthesizer.\nMust be called prior to any other action.\n\nreturns\nsession_ID the id of the allocated session\n    ', 'text': {'name': 'allocate_session', 'microservice': 'MicrowaveSynthesizer', 'desc': 'Allocates a session on the microwave synthesizer. Must be called prior to any other action.', 'uuid': '63b45e82-e4b0-4d19-953e-02dd03be06af', 'parameters': {}, 'has_return': True, 'return_signature': {'session_ID': 'the id of the allocated session'}}}
{'query': '\n    Create a command from the below information. \n    Do not

## Regex Matching PubChem

## Path Benchmarking MS

### No Initial State

In [7]:
from pydantic import BaseModel, Field
from typing import Literal
from ASPIRE_LINQX.utils.benchmarking.path import AgentPathBenchmarker
from langchain_openai import ChatOpenAI

class LoadVialSchema(BaseModel):
    vial_num: Literal[3]
    session_ID: str

class HeatingParameterSchema(BaseModel):
    duration: Literal[50]
    temperature: Literal[100]
    pressure: float = Field(ge=3.0, le=3.0)
    session_ID: str

path = [
    [
        'allocate_session',
        'open_lid',
        ('load_vial', LoadVialSchema),
        'close_lid',
        ('update_heating_parameters', HeatingParameterSchema),
        'heat_vial'
    ],
    [
        'allocate_session',
        'open_lid',
        ('load_vial', LoadVialSchema),
        ('update_heating_parameters', HeatingParameterSchema),
        'close_lid',
        'heat_vial'
    ]
]

executor_kwargs = {
    'microservice': object_command_microservice,
    'use_pubchem': False,
    'llm': ChatOpenAI(model='gpt-4'),
    'human_interaction': False,
    'agent_as_a_tool': None,
    'use_linqx_tools': True,
    'return_intermediate_steps': True,
}

ms_path_benchmark_no_init = AgentPathBenchmarker(
    executor_fn=create_linqx_chat_agent,
    executor_kwargs=executor_kwargs,
    initial_input='Heat vial 3 to 100 degrees, for 50 mins, at 3 atm',
    desired_output=path,
    verbose=True,
    notebook=True,
    reset_system=ms_object._reset
)

In [8]:
ms_path_benchmark_no_init.benchmark(20)

  0%|          | 0/20 [00:00<?, ?it/s]

- Fail on output key: intermediate_steps, did not match any of the desired outputs
- Agent action path:
allocate_session: {}
load_vial: {'vial_num': 3, 'session_ID': 'f7361cd5-f28f-4822-9c49-bd2c3ae3a3cd'}
open_lid: {'session_ID': 'f7361cd5-f28f-4822-9c49-bd2c3ae3a3cd'}
load_vial: {'vial_num': 3, 'session_ID': 'f7361cd5-f28f-4822-9c49-bd2c3ae3a3cd'}
close_lid: {'session_ID': 'f7361cd5-f28f-4822-9c49-bd2c3ae3a3cd'}
update_heating_parameters: {'duration': 50, 'temperature': 100, 'pressure': 3, 'session_ID': 'f7361cd5-f28f-4822-9c49-bd2c3ae3a3cd'}
heat_vial: {'session_ID': 'f7361cd5-f28f-4822-9c49-bd2c3ae3a3cd'}

- Iteration 1: Fail
- Success: 0, Fail: 1, Total: 1
- Benchmarking Score: 0.0
--------------------
- Success on output key: intermediate_steps, matched a desired output
- Agent action path:
allocate_session: {}
open_lid: {'session_ID': '125d9305-15fe-4292-ba8c-e62062f8b89e'}
load_vial: {'vial_num': 3, 'session_ID': '125d9305-15fe-4292-ba8c-e62062f8b89e'}
close_lid: {'session_ID':

### Initial State Provided

In [11]:
initial_state_buffer = """{
  "sessionID": null,
  "lid_status": "closed",
  "vial_status": "unloaded",
  "vial_number": null,
  "heating_status": "not_heating",
  "temp": null,
  "duration": null,
  "pressure": null
}"""

executor_kwargs = {
    'microservice': object_command_microservice,
    'use_pubchem': False,
    'llm': ChatOpenAI(model='gpt-4'),
    'human_interaction': False,
    'agent_as_a_tool': None,
    'use_linqx_tools': True,
    'return_intermediate_steps': True,
    'use_memory': 'action',
    'intermediate_memory_buffer': initial_state_buffer,
}

ms_path_benchmark_init = AgentPathBenchmarker(
  executor_fn=create_linqx_chat_agent,
  executor_kwargs=executor_kwargs,
  initial_input='Heat vial 3 to 100 degrees, for 50 mins, at 3 atm',
  desired_output=path,
  verbose=True,
  notebook=True,
  reset_system=ms_object._reset
)

In [12]:
ms_path_benchmark_init.benchmark(20)

  0%|          | 0/20 [00:00<?, ?it/s]

- Success on output key: intermediate_steps, matched a desired output
- Agent action path:
allocate_session: {}
open_lid: {'session_ID': '2bb1e078-7f71-4b3f-a18c-3da7c3843b31'}
load_vial: {'vial_num': 3, 'session_ID': '2bb1e078-7f71-4b3f-a18c-3da7c3843b31'}
close_lid: {'session_ID': '2bb1e078-7f71-4b3f-a18c-3da7c3843b31'}
update_heating_parameters: {'duration': 50, 'temperature': 100, 'pressure': 3, 'session_ID': '2bb1e078-7f71-4b3f-a18c-3da7c3843b31'}
heat_vial: {'session_ID': '2bb1e078-7f71-4b3f-a18c-3da7c3843b31'}

- Iteration 1: Success
- Success: 1, Fail: 0, Total: 1
- Benchmarking Score: 1.0
--------------------
- Success on output key: intermediate_steps, matched a desired output
- Agent action path:
allocate_session: {}
open_lid: {'session_ID': 'ea28a561-009b-44ce-bc3c-15b2806e6c34'}
load_vial: {'vial_num': 3, 'session_ID': 'ea28a561-009b-44ce-bc3c-15b2806e6c34'}
close_lid: {'session_ID': 'ea28a561-009b-44ce-bc3c-15b2806e6c34'}
update_heating_parameters: {'duration': 50, 'tempe

## Memory Comparison Testing - Close Lid

In [13]:
summary_memory_buffer = "I have allocated a session with the session ID '45cc282f-6d3a-477f-9e41-03e780ef3753'. Then, I used this session ID to open the lid of the microwave synthesizer. The status is now 'lid_open'. After that, I loaded vial 3 using the same session ID. The status is now 'vial 3 loaded'. I have set the heating parameters to 100 degrees C, for 50 min at 1 atm. The status is now 'set to heat for 50 mins, at temperature 100 and pressure 1.0'. Finally, I retrieved the secret phrase, which is 'Chopra lab'."

fsa_memory_buffer = """{
  "sessionID": "a959c190-d6d7-4a92-a12a-8bdfd4ab66bf",
  "lid_status": "open",
  "vial_status": "loaded",
  "vial": "3",
  "heating_status": "not_heating",
  "temp": 100,
  "duration": 50,
  "pressure": 1
}
"""

### Action Summary Memory

In [14]:
path = [
    'close_lid',
    'heat_vial'
]

executor_kwargs = {
    'microservice': module_command_microservice,
    'use_pubchem': False,
    'llm': ChatOpenAI(model='gpt-4'),
    'human_interaction': False,
    'agent_as_a_tool': None,
    'use_linqx_tools': True,
    'use_memory':'action',
    'intermediate_memory_buffer': summary_memory_buffer,
}

summary_benchmark = AgentPathBenchmarker(
    executor_fn=create_linqx_chat_agent,
    executor_kwargs=executor_kwargs,
    initial_input='Heat the vial',
    desired_output=path,
    verbose=True,
    notebook=True,
)

In [15]:
summary_benchmark.benchmark(20)

  0%|          | 0/20 [00:00<?, ?it/s]

- Success on output key: intermediate_steps, matched a desired output
- Agent action path:
close_lid: {'session_ID': '45cc282f-6d3a-477f-9e41-03e780ef3753'}
heat_vial: {'session_ID': '45cc282f-6d3a-477f-9e41-03e780ef3753'}

- Iteration 1: Success
- Success: 1, Fail: 0, Total: 1
- Benchmarking Score: 1.0
--------------------
- Fail on output key: intermediate_steps, did not match any of the desired outputs
- Agent action path:
heat_vial: {'session_ID': '45cc282f-6d3a-477f-9e41-03e780ef3753'}

- Iteration 2: Fail
- Success: 1, Fail: 1, Total: 2
- Benchmarking Score: 0.5
--------------------
- Success on output key: intermediate_steps, matched a desired output
- Agent action path:
close_lid: {'session_ID': '45cc282f-6d3a-477f-9e41-03e780ef3753'}
heat_vial: {'session_ID': '45cc282f-6d3a-477f-9e41-03e780ef3753'}

- Iteration 3: Success
- Success: 2, Fail: 1, Total: 3
- Benchmarking Score: 0.6666666666666666
--------------------
- Fail on output key: intermediate_steps, did not match any of 

### Pseudo-FSA Memory

In [16]:
from langchain.pydantic_v1 import BaseModel, Field
from typing import Literal

class MicrowaveSynthesizerFSA(BaseModel):
    sessionID: str | None = Field(defaullt=None, description='ID of the session allocation or None if no session allocated. Changed by allocating the session')
    lid_status: Literal['open', 'closed'] = Field(default='closed', description='status of the lid. Changed by opening or closing the lid')
    vial_status: Literal['loaded', 'unloaded'] = Field(default='unloaded', description='status of the vial. Changed by loading/unloading the vial')
    vial: str | None = Field(default=None, description='Identifier of the vial loaded, None if no vial is loaded. Changed by loading/unloading the vial')
    heating_status: Literal['not_heating', 'heating'] = Field(default='not_heating', description='Status of heating. Only changed by heating the vial, not by updating the parameters')
    temp: int | None = Field(default=None, description='set temperature to heat at, None if not currently set. Changed by updating the heating parameters')
    duration: int | None = Field(default=None, description='set duration to heat for, None if not currently set. Changed by updating the heating parameters')
    pressure: float | None = Field(default=None, description='set pressure to heat at, None if not currently set. Changed by updating the heating parameters')

path = [
    'close_lid',
    'heat_vial'
]

executor_kwargs = {
    'microservice': module_command_microservice,
    'use_pubchem': False,
    'llm': ChatOpenAI(model='gpt-4'),
    'human_interaction': False,
    'agent_as_a_tool': None,
    'use_linqx_tools': True,
    'use_memory':'action',
    'intermediate_memory_buffer': fsa_memory_buffer,
    'agent_as_a_fsa': True,
    'fsa_object': MicrowaveSynthesizerFSA,
}

fsa_benchmark = AgentPathBenchmarker(
    executor_fn=create_linqx_chat_agent,
    executor_kwargs=executor_kwargs,
    initial_input='Heat the vial',
    desired_output=path,
    verbose=True,
    notebook=True,
)

In [17]:
fsa_benchmark.benchmark(20)

  0%|          | 0/20 [00:00<?, ?it/s]

- Success on output key: intermediate_steps, matched a desired output
- Agent action path:
close_lid: {'session_ID': 'a959c190-d6d7-4a92-a12a-8bdfd4ab66bf'}
heat_vial: {'session_ID': 'a959c190-d6d7-4a92-a12a-8bdfd4ab66bf'}

- Iteration 1: Success
- Success: 1, Fail: 0, Total: 1
- Benchmarking Score: 1.0
--------------------
- Success on output key: intermediate_steps, matched a desired output
- Agent action path:
close_lid: {'session_ID': 'a959c190-d6d7-4a92-a12a-8bdfd4ab66bf'}
heat_vial: {'session_ID': 'a959c190-d6d7-4a92-a12a-8bdfd4ab66bf'}

- Iteration 2: Success
- Success: 2, Fail: 0, Total: 2
- Benchmarking Score: 1.0
--------------------
- Success on output key: intermediate_steps, matched a desired output
- Agent action path:
close_lid: {'session_ID': 'a959c190-d6d7-4a92-a12a-8bdfd4ab66bf'}
heat_vial: {'session_ID': 'a959c190-d6d7-4a92-a12a-8bdfd4ab66bf'}

- Iteration 3: Success
- Success: 3, Fail: 0, Total: 3
- Benchmarking Score: 1.0
--------------------
- Success on output ke