In [1]:
import os
from dotenv import load_dotenv
from pathlib import Path

# Get the project root directory (sciborg root)
current_dir = Path.cwd()
project_root = current_dir

# Look for sciborg root by going up until we find .env or sciborg directory
while project_root != project_root.parent:
    if (project_root / '.env').exists() or project_root.name == 'sciborg':
        break
    project_root = project_root.parent
    if project_root == project_root.parent:  # Reached filesystem root
        # Fallback: assume we're in notebooks/testing, go up two levels
        project_root = Path.cwd()
        if 'notebooks' in str(project_root):
            project_root = project_root.parent.parent
        break

# Load .env file from project root
env_path = project_root / '.env'
if env_path.exists():
    load_dotenv(dotenv_path=str(env_path))
    print(f"Loaded .env from: {env_path}")
else:
    # Try loading from current directory as fallback
    load_dotenv()
    print("Warning: .env not found in project root, trying current directory")

# Get API keys from environment
openai_key = os.getenv("OPENAI_API_KEY")
if openai_key:
    os.environ["OPENAI_API_KEY"] = openai_key
else:
    print("Warning: OPENAI_API_KEY not found in environment variables")



In [2]:
import os
import sys

sys.path.insert(0, str(project_root)) 

from langchain_openai import ChatOpenAI
from sciborg.ai.agents.core3 import SciborgAgent
from sciborg.ai.chains.microservice import module_to_microservice
from sciborg.ai.chains.workflow import create_workflow_planner_chain, create_workflow_constructor_chain

from sciborg.utils.drivers import MicrowaveSynthesizer, MicrowaveSynthesizerObject, PubChemCaller, ragCaller
from sciborg.core.library.base import BaseDriverMicroservice


In [3]:
file_path = str(project_root / 'ai/agents/driver_pubchem.json')

pubchem_command_microservice = module_to_microservice(PubChemCaller)
print(pubchem_command_microservice)

with open(file_path, 'w') as outfile:
    outfile.write(pubchem_command_microservice.model_dump_json(indent=2))


Processing functions:  12%|█▎        | 1/8 [00:02<00:14,  2.07s/function]

{'name': 'get_assay_description_from_AID', 'microservice': 'PubChemCaller', 'desc': 'Function purpose Get high level Assay description, protocol and comment on the scores for a given assay id.', 'uuid': '3b0b4b02-e546-4601-ba7b-99e1471ea47f', 'parameters': {'aid': {'name': 'aid', 'data_type': 'str', 'is_optional': False, 'is_list': False, 'desc': 'string representation of a list of assay IDs'}}, 'has_return': True, 'return_signature': {'dict': 'a dictionary of description, protocol and comment for the given assay ID'}}
<class 'dict'>


Processing functions:  25%|██▌       | 2/8 [00:03<00:10,  1.81s/function]

{'name': 'get_assay_name_from_aid', 'microservice': 'PubChemCaller', 'desc': 'Function purpose', 'uuid': '3b0b4b02-e546-4601-ba7b-99e1471ea47f', 'parameters': {'aid': {'name': 'aid', 'data_type': 'str', 'desc': 'string representation of a list of assay IDs', 'is_optional': False, 'is_list': False, 'from_var': False}}, 'has_return': True, 'return_signature': {'str': 'a dictionary of names for each assay ID'}}
<class 'dict'>


Processing functions:  38%|███▊      | 3/8 [00:06<00:12,  2.49s/function]

{'name': 'get_assay_results_for_compund', 'microservice': 'PubChemCaller', 'desc': 'Function purpose\nGets all the assay results for a provided compund with an optional filter of assay activity type.', 'uuid': '3b0b4b02-e546-4601-ba7b-99e1471ea47f', 'parameters': {'cid': {'name': 'cid', 'data_type': 'str', 'desc': 'a single cid representing a compound', 'is_optional': False}, 'activity_name': {'name': 'activity_name', 'data_type': 'str', 'desc': 'the specific activity type to filter on for example Ki, Kd, IC50, etc', 'is_optional': True}}, 'has_return': True, 'return_signature': {'dict': 'a disctionary representation of the assay result table'}}
<class 'dict'>


Processing functions:  50%|█████     | 4/8 [00:08<00:08,  2.17s/function]

{'name': 'get_cid_from_name', 'microservice': 'PubChemCaller', 'desc': 'Function purpose Get the cid from chemical name', 'uuid': '3b0b4b02-e546-4601-ba7b-99e1471ea47f', 'parameters': {'inp': {'name': 'inp', 'data_type': 'str', 'desc': 'string representation of a chemical name', 'is_optional': False, 'is_list': False, 'from_var': False}}, 'has_return': True, 'return_signature': {'dict': 'a dictionary of cids for the given chemical name'}}
<class 'dict'>


Processing functions:  62%|██████▎   | 5/8 [00:11<00:06,  2.24s/function]

{'name': 'get_compound_property_table', 'microservice': 'PubChemCaller', 'desc': 'A short description of the command', 'uuid': '3b0b4b02-e546-4601-ba7b-99e1471ea47f', 'parameters': {'name': {'name': 'name', 'data_type': 'str', 'is_optional': False, 'is_list': False}, 'inp': {'name': 'inp', 'data_type': 'str', 'is_optional': False, 'is_list': False}, 'inp_format': {'name': 'inp_format', 'data_type': 'str', 'is_optional': False, 'is_list': False}, 'inp_type': {'name': 'inp_type', 'data_type': 'str', 'is_optional': False, 'is_list': False}, 'property_list': {'name': 'property_list', 'data_type': 'str', 'is_optional': False, 'is_list': False}}, 'has_return': True, 'return_signature': {'str': 'string', 'int': 'integer'}}
<class 'dict'>


Processing functions:  75%|███████▌  | 6/8 [00:15<00:05,  2.88s/function]

{'name': 'get_description', 'microservice': 'PubChemCaller', 'desc': "Function purpose\nGet description of a substance or a compound, for assay description, use get_assay_description() instead\n\nInputs\ninp: string representation of an single chemical name only\ninp_format: string of either of name, sid, cid, smiles\ninp_type: 'compound' if inp_format is cid, name or smiles of compound or 'substance' if inp_format is sid, name, smiles of substance\n\nreturns\ndict: a dictionary of descriptions for each identifier", 'uuid': '3b0b4b02-e546-4601-ba7b-99e1471ea47f', 'parameters': {'inp': {'name': 'inp', 'data_type': 'str', 'desc': 'string representation of an single chemical name only', 'is_optional': False, 'is_list': False, 'from_var': False}, 'inp_format': {'name': 'inp_format', 'data_type': 'str', 'desc': 'string of either of name, sid, cid, smiles', 'is_optional': False, 'is_list': False, 'from_var': False}, 'inp_type': {'name': 'inp_type', 'data_type': 'str', 'desc': "'compound' if 

Processing functions:  88%|████████▊ | 7/8 [00:16<00:02,  2.45s/function]

{'name': 'get_smiles_from_name', 'microservice': 'PubChemCaller', 'desc': 'Function purpose Get SMILES for a given chemical name.', 'uuid': '3b0b4b02-e546-4601-ba7b-99e1471ea47f', 'parameters': {'inp': {'name': 'inp', 'data_type': 'str', 'is_optional': False, 'is_list': False, 'desc': 'string representation of a chemical name'}}, 'has_return': True, 'return_signature': {'str': 'a text representing the SMILES for the given chemical name'}}
<class 'dict'>


Processing functions: 100%|██████████| 8/8 [00:21<00:00,  2.68s/function]

{'name': 'get_synonym', 'microservice': 'PubChemCaller', 'desc': 'Function purpose Get Synonym of a substance or compound.', 'uuid': '3b0b4b02-e546-4601-ba7b-99e1471ea47f', 'parameters': {'inp': {'name': 'inp', 'data_type': 'str', 'desc': 'string representation of a list of identifiers', 'is_optional': False, 'is_list': False, 'from_var': False}, 'inp_format': {'name': 'inp_format', 'data_type': 'str', 'desc': 'string which can be either of name, sid, cid, smiles', 'is_optional': False, 'is_list': False, 'from_var': False}, 'inp_type': {'name': 'inp_type', 'data_type': 'str', 'desc': "'compound' if inp_format is cid, name or smiles of compound or 'substance' if inp_format is sid, name, smiles of substance", 'is_optional': False, 'is_list': False, 'from_var': False}}, 'has_return': True, 'return_signature': {'dict': 'a dictionary of synonyms for each identifier'}}
<class 'dict'>
name='PubChemCaller' uuid=UUID('3b0b4b02-e546-4601-ba7b-99e1471ea47f') desc='This is the Module for Pubchem A




In [4]:
file_path = str(project_root / 'ai/agents/driver_MicroSynth.json')

driver_command_microservice = module_to_microservice(MicrowaveSynthesizer)
print(type(driver_command_microservice))

with open(file_path, 'w') as outfile:
    outfile.write(driver_command_microservice.model_dump_json(indent=2))


Processing functions:  12%|█▎        | 1/8 [00:01<00:08,  1.18s/function]

{'name': 'allocate_session', 'microservice': 'MicrowaveSynthesizer', 'desc': 'Allocates a session on the microwave synthesizer. Must be called prior to any other action.', 'uuid': '2ecbd2f2-7615-43f3-8a40-a596d9437a87', 'parameters': {}, 'has_return': True, 'return_signature': {'session_ID': 'the id of the allocated session'}}
<class 'dict'>


Processing functions:  25%|██▌       | 2/8 [00:02<00:08,  1.43s/function]

{'name': 'close_lid', 'microservice': 'MicrowaveSynthesizer', 'desc': 'Closes the lid on the microwave synthesizer. Must be run prior to running heating.', 'uuid': '2ecbd2f2-7615-43f3-8a40-a596d9437a87', 'parameters': {'session_ID': {'name': 'session_ID', 'data_type': 'str', 'desc': 'the id of the current session', 'is_optional': False, 'is_list': False, 'from_var': False}}, 'has_return': True, 'return_signature': {'status': 'is a status string that provides the result of the operation'}}
<class 'dict'>


Processing functions:  38%|███▊      | 3/8 [00:04<00:07,  1.56s/function]

{'name': 'get_precent_conversion', 'microservice': 'MicrowaveSynthesizer', 'desc': 'Gets the precent conversion of synthesis after running the experiment. Can only be called after heating', 'uuid': '2ecbd2f2-7615-43f3-8a40-a596d9437a87', 'parameters': {'session_ID': {'name': 'session_ID', 'data_type': 'str', 'desc': 'the id of the current session', 'is_optional': False, 'is_list': False, 'from_var': False}}, 'has_return': True, 'return_signature': {'precent_conversion': 'denots the precent conversion of the sysntesis reaciton'}}
<class 'dict'>


Processing functions:  50%|█████     | 4/8 [00:06<00:06,  1.55s/function]

{'name': 'heat_vial', 'microservice': 'MicrowaveSynthesizer', 'desc': 'Heats the loaded vial to the set heating parameters. Must be run after loading vial, closing lid, and updating heating_parameters', 'uuid': '2ecbd2f2-7615-43f3-8a40-a596d9437a87', 'parameters': {'session_ID': {'name': 'session_ID', 'data_type': 'str', 'is_optional': False, 'is_list': False, 'desc': 'the id of the current session'}}, 'has_return': True, 'return_signature': {'status': 'is a status string that provides the result of the operation'}}
<class 'dict'>


Processing functions:  62%|██████▎   | 5/8 [00:08<00:06,  2.02s/function]

{'name': 'load_vial', 'microservice': 'MicrowaveSynthesizer', 'desc': 'Loads a vial into the microwave synthesizer. Must be run prior to heating.', 'uuid': '2ecbd2f2-7615-43f3-8a40-a596d9437a87', 'parameters': {'vial_num': {'name': 'vial_num', 'data_type': 'int', 'precision': -1, 'upper_limit': 10, 'lower_limit': 1, 'is_optional': False, 'is_list': False, 'default': None, 'from_var': False, 'desc': 'The vial number, must be between 1 and 10'}, 'session_ID': {'name': 'session_ID', 'data_type': 'str', 'precision': -1, 'upper_limit': None, 'lower_limit': None, 'is_optional': False, 'is_list': False, 'default': None, 'from_var': False, 'desc': 'The id of the current session'}}, 'has_return': True, 'return_signature': {'status': 'status is a status string that provides the result of the operation'}}
<class 'dict'>


Processing functions:  75%|███████▌  | 6/8 [00:10<00:03,  1.91s/function]

{'name': 'open_lid', 'microservice': 'MicrowaveSynthesizer', 'desc': 'Opens the lid on the microwave synthesizer. Must be run prior to loading a vial.', 'uuid': '2ecbd2f2-7615-43f3-8a40-a596d9437a87', 'parameters': {'session_ID': {'name': 'session_ID', 'data_type': 'str', 'is_optional': False, 'is_list': False, 'desc': 'the id of the current session'}}, 'has_return': True, 'return_signature': {'status': 'is a status string that provides the result of the operation'}}
<class 'dict'>


Processing functions:  88%|████████▊ | 7/8 [00:12<00:01,  1.80s/function]

{'name': 'unload_vial', 'microservice': 'MicrowaveSynthesizer', 'desc': 'Unloads a vial from the microwave synthesizer. Must be run after heating.', 'uuid': '2ecbd2f2-7615-43f3-8a40-a596d9437a87', 'parameters': {'session_ID': {'name': 'session_ID', 'data_type': 'str', 'is_optional': False, 'is_list': False, 'desc': 'the id of the current session'}}, 'has_return': True, 'return_signature': {'status': 'is a status string that provides the result of the operation'}}
<class 'dict'>


Processing functions: 100%|██████████| 8/8 [00:16<00:00,  2.01s/function]

{'name': 'update_heating_parameters', 'microservice': 'MicrowaveSynthesizer', 'desc': 'Sets the heating parameters of the microwave synthesizer. Must be run prior to heating.', 'uuid': '2ecbd2f2-7615-43f3-8a40-a596d9437a87', 'parameters': {'duration': {'name': 'duration', 'data_type': 'int', 'precision': -1, 'upper_limit': 60, 'lower_limit': 1, 'is_optional': False, 'is_list': False, 'default': None, 'from_var': False, 'desc': 'A integer between 1 and 60 minutes'}, 'temperature': {'name': 'temperature', 'data_type': 'int', 'precision': -1, 'upper_limit': 250, 'lower_limit': 25, 'is_optional': False, 'is_list': False, 'default': None, 'from_var': False, 'desc': 'An integer between 25 and 250 celsius'}, 'pressure': {'name': 'pressure', 'data_type': 'float', 'precision': -1, 'upper_limit': 10, 'lower_limit': 1, 'is_optional': False, 'is_list': False, 'default': None, 'from_var': False, 'desc': 'A float between 1 and 10 atm'}, 'session_ID': {'name': 'session_ID', 'data_type': 'str', 'preci




In [3]:
file_path = str(project_root / 'ai/agents/driver_rag.json')

rag_microservice = module_to_microservice(ragCaller)
print(rag_microservice)

with open(file_path, 'w') as outfile:
    outfile.write(rag_microservice.model_dump_json(indent=2))


Processing functions: 100%|██████████| 1/1 [00:03<00:00,  3.81s/function]

{'name': 'external_information_retrieval', 'microservice': 'ragCaller', 'desc': 'Retrieve external information and generate an answer using a RAG approach.', 'uuid': '6ed1b866-a35a-46c2-9cb7-951ef821d019', 'parameters': {'question': {'name': 'question', 'data_type': 'str', 'is_optional': False, 'desc': 'The question to be answered.'}, 'vectorstore_path': {'name': 'vectorstore_path', 'data_type': 'str', 'is_optional': False, 'desc': 'Path to load the vector store from. Usually provided by the user.'}, 'embeddings': {'name': 'embeddings', 'data_type': 'Any', 'is_optional': False, 'desc': "Embedding model or the string 'openai' to use for vector store retrieval."}, 'llm': {'name': 'llm', 'data_type': 'langchain_core.language_models.base.BaseLanguageModel', 'is_optional': True, 'default': {'client': '<openai.resources.chat.completions.Completions object at 0x14af969c6550>', 'async_client': '<openai.resources.chat.completions.AsyncCompletions object at 0x14af96983c10>', 'root_client': '<ope




In [4]:
agent_2 = SciborgAgent(
    microservice=pubchem_command_microservice,
    llm=ChatOpenAI(model='gpt-4'),
    human_interaction=False,
    memory_file='core_memory.json',
    rag_vectordb_path = str(project_root / 'embeddings/NIH_docs_embeddings'),
    use_memory = ['chat', 'action', 'embedding'],
    return_intermediate_steps=True,
    verbose=True
)


  memories.append(ConversationBufferWindowMemory(


In [5]:
agent_2.tools

[LinqxTool(name='get_assay_description_from_AID', description='Function Signature:\n(aid: str) -> dict\nFunction Docstring:\nFunction purpose\nGet high level Assay description, protocol and comment on the scores for a given assay id.\n\nInputs\naid: string representation of a list of assay IDs\n\nreturns\ndict: a dictionary of description, protocol and comment for the given assay ID', args_schema=<class 'langchain_core.utils.pydantic.get_assay_description_from_AID_'>, linqx_command=BaseDriverCommand(name='get_assay_description_from_AID', microservice='PubChemCaller', uuid=UUID('3b0b4b02-e546-4601-ba7b-99e1471ea47f'), desc='Function purpose Get high level Assay description, protocol and comment on the scores for a given assay id.', parameters={'aid': ParameterModel(name='aid', data_type='str', precision=-1, upper_limit=None, lower_limit=None, allowed_values=[], is_optional=False, is_list=False, default='None', from_var=False, var_name='', desc='string representation of a list of assay I

In [7]:
agent_2.invoke({
    "question": "How does microwave irradiation influence reaction mechanisms differently compared to conventional heating methods?"
})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mQuestion: How does the structure of fatty acids affect their properties?
Thought: The question asks about the impact of fatty acid structure on their properties. This is related to chemistry, specifically the structure-property relationship in organic compounds. I can use the PubChem API to get information about fatty acids, their structure, and properties. 

Action:
```
{
  "action": "get_description",
  "action_input": {
    "inp": "fatty acids",
    "inp_format": "name",
    "inp_type": "compound"
  }
}
```[0m[38;5;200m[1;3m{'InformationList': {'Information': [{'CID': 264, 'Title': 'Butyric Acid'}, {'CID': 264, 'Description': 'Butyric acid is a straight-chain saturated fatty acid that is butane in which one of the terminal methyl groups has been oxidised to a carboxy group. It has a role as a Mycoplasma genitalium metabolite and a human urinary metabolite. It is a straight-chain saturated fatty acid and a fatty acid 4:0

  chain = LLMChain(llm=self.llm, prompt=self.prompt) #this will be deprecated soon



[1m> Finished chain.[0m


{'input': '',
 'chat_history': 'Human: How do you manual tune a SCIEX 4000 QTRAP?\nAI: The manual tuning procedure for a SCIEX 4000 QTRAP mass spectrometer involves the following steps:\n\n1. Mass range of the 4000 Qtrap is 3-2800 Da.\n2. Enable the MS hardware profile with ESI source.\n3. Connect a standalone syringe pump running at 10 uL/min to the source using peek tubing.\n4. Open a prebuilt method in Analyst labeled Q1.\n5. Under continuous infusion, scan the entire mass range and collect 10 scans on average.\n6. Right-click in TIC to open the mass window.\n7. Click on "replace" or "update" icon to adjust the tune parameters.\n\n(Source: papers/SOP/2024_4000 User Guide.pdf, page 1)\nHuman: I was optimizing a method for fatty acids on the SCIEX 4000 QTRAP and i found the best parameters were voltage = 25 eV, inlet temperature = 200 celsius, inlet gas = 15, and nanomate tip voltage = 1600 eV\nAI: Thank you for sharing your optimized parameters for fatty acids on the SCIEX 4000 QTRAP