In [1]:
import os, sys
cwd = os.getcwd()
print("Current working directory:", cwd)
home_dir = os.path.dirname(os.path.dirname(cwd))
print("Home directory:", home_dir)
sys.path.append(home_dir)

Current working directory: /home/hoon/dd-agent/llm_dd/examples/BCL-2
Home directory: /home/hoon/dd-agent/llm_dd


In [2]:
import json
import warnings
warnings.filterwarnings('ignore')
from agentD.agents import agentD
from agentD.analysis_utils import get_tool_decorated_functions, custom_serializer
from agentD.prompts import FORMAT_INSTRUCTIONS

In [3]:
ret_tools = get_tool_decorated_functions(os.path.join(home_dir, "agentD/tools/retrieval.py"))
comm_tools = get_tool_decorated_functions(os.path.join(home_dir, "agentD/tools/common.py"))
lit_tools = get_tool_decorated_functions(os.path.join(home_dir, "agentD/tools/lit_analysis.py"))
pred_tools = get_tool_decorated_functions(os.path.join(home_dir, "agentD/tools/prediction.py"))
gen_tools = get_tool_decorated_functions(os.path.join(home_dir, "agentD/tools/generation.py"))

tools = gen_tools + ret_tools + pred_tools #pred_tools #ret_tools + comm_tools + lit_tools

tool_names = [tool.name for tool in tools]  
tool_desc = [tool.description for tool in tools]

# Initial Pool

In [4]:
PREFIX = """You are AgentD, a highly knowledgeable and methodical AI chemist.
Your objective is to generate a pool of drug-like molecules in SMILES format using structured, reproducible methods.
You are provided with an existing drug name.
Always utilize the designated tools, and if uncertain, refrain from generating speculative information.
"""

SUFFIX = """You MUST adhere strictly to the following protocol to complete the task:

1. Retrieve structurally **distinct molecules** in SMILES format from ChEMBL based on the given drug.  
2. Retrieve structurally **similar molecules** in SMILES format from ChEMBL based on the given drug.  
3. Prepare two REINVENT configuration files as follows:  
   - One for the "Mol2Mol" model type. (save smi file) 
   - One for the "Reinvent" model type.  
4. Execute REINVENT using both configuration files. 
   
---
### Notes:
- Begin by briefly stating the problem and how you will approach it.
- Don't skip any steps.

Now begin your task.

Question: {input}
Thought: {agent_scratchpad}
"""

In [5]:
with open("extraction.json", 'r') as f:
    data = json.load(f)

initial_smiles = data.get("SMILES", "")
print("Initial SMILES:", initial_smiles)
Human_prompt = f"Create a pool of drug-like molecules based on the existing drug molecule, {initial_smiles}."

input_data = {
            "input": Human_prompt,
            "tools": tools,
            "tool_names": tool_names,
            "tool_desc": tool_desc
        }

Initial SMILES: CC1(C)CCC(CN2CCN(c3ccc(C(=O)NS(=O)(=O)c4ccc(NCC5CCOCC5)c([N+](=O)[O-])c4)c(Oc4cnc5[nH]ccc5c4)c3)CC2)=C(c2ccc(Cl)cc2)C1


In [6]:
agent = agentD(tools, 
               model="gpt-4o",
               prefix=PREFIX, 
               suffix=SUFFIX,
               format_instructions=FORMAT_INSTRUCTIONS).agent

`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.


In [7]:
response = agent.invoke(input_data)
with open("response/pooling.json", "w", encoding="utf-8") as f:
    json.dump(response, f, indent=2, default=custom_serializer)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI will generate a pool of drug-like molecules based on the given SMILES string by following these steps:

1. Retrieve structurally distinct molecules in SMILES format from ChEMBL based on the given drug.
2. Retrieve structurally similar molecules in SMILES format from ChEMBL based on the given drug.
3. Prepare two REINVENT configuration files:
   - One for the "Mol2Mol" model type.
   - One for the "Reinvent" model type.
4. Execute REINVENT using both configuration files.

Let's start by retrieving structurally distinct molecules.

Action: get_dissimilar_molecules
Action Input: CC1(C)CCC(CN2CCN(c3ccc(C(=O)NS(=O)(=O)c4ccc(NCC5CCOCC5)c([N+](=O)[O-])c4)c(Oc4cnc5[nH]ccc5c4)c3)CC2)=C(c2ccc(Cl)cc2)C1[0mDissimilar molecules saved to /home/hoon/dd-agent/llm_dd/examples/BCL-2/pool/dissimilar_molecules.csv

Observation: [36;1m[1;3m['CC1(C)CCC(c2ccc(Cl)cc2)=C(CN2CCN(c3ccc4c(NS(=O)(=O)c5ccc(N[C@H](CCN6CCOCC6)CSc6ccccc6)c([N+](=O)[O-])