In [1]:
import os, sys
cwd = os.getcwd()
print("Current working directory:", cwd)
home_dir = os.path.dirname(os.path.dirname(cwd))
print("Home directory:", home_dir)
sys.path.append(home_dir)

Current working directory: /home/hoon/dd-agent/llm_dd/example/test_case
Home directory: /home/hoon/dd-agent/llm_dd


# Preliminary Candidates Set-up (exemplary filtering)

In [2]:
import pandas as pd
from agentD.analysis.drug_likeness_analyzer import DrugLikenessAnalyzer
import json

## Filtering

In [3]:
original_data = pd.read_csv("analysis_result/combined_sampling.csv")
first_updated_data = pd.read_csv("analysis_result/combined_sampling_update.csv")
second_updated_data = pd.read_csv("analysis_result/combined_sampling_update_update.csv")

# stack the three dataframes
combined_data = pd.concat([original_data, first_updated_data, second_updated_data], ignore_index=True)
combined_data = combined_data.drop_duplicates(subset=['SMILES'], keep='last')
combined_data = combined_data.reset_index(drop=True)


rule_threshold = 2 
qed_threshold = 0.5 
affinity_threshold = 6 

filtered_data = combined_data[
    (combined_data['num_passed_rules'] > rule_threshold) &
    (combined_data['QED'] > qed_threshold) &
    (combined_data['Affinity'] > affinity_threshold)
]

filtered_data = filtered_data.reset_index(drop=True)
filtered_data.to_csv(f"analysis_result/filtered_r{rule_threshold}_q{qed_threshold}_a{affinity_threshold}.csv", index=False)
filtered_data 

Unnamed: 0,Method,SMILES,lipinski_rule_of_5,veber_rule,ghose_filter,rule_of_3,oprea_lead_like,fda_criteria,fda_approval_category,num_passed_rules,QED,Affinity,smiles,summary
0,Mol2Mol_sampling,O=C(NS(=O)(=O)c1ccc(N2CCN(C3CCCCC3)CC2=O)cc1)c...,True,True,False,False,True,False,Low (Requires Optimization),3.0,0.514987,6.048075,,
1,Mol2Mol_sampling,O=C(NS(=O)(=O)c1ccc(N2CCNCC2)c(O)c1)c1ccc(N2CC...,True,True,False,False,True,False,Low (Requires Optimization),3.0,0.588783,6.194175,,


## Visualization with predicted properties

In [6]:
# property
original_prop = pd.read_csv("property/combined_sampling.csv")
first_updated_prop = pd.read_csv("property/combined_sampling_update.csv")
second_updated_prop = pd.read_csv("property/combined_sampling_update_update.csv")  
drug_prop = pd.read_csv("property/venetoclax.csv") 

In [8]:
entry_no = 0
row = filtered_data.iloc[entry_no]
smiles = row['SMILES']
method = row['Method']

# Determine which property dataframe to use based on the number of '_update' in the Method
update_count = method.count('_update')

if update_count == 0:
    prop_source = original_prop
elif update_count == 1:
    prop_source = first_updated_prop
else:
    prop_source = second_updated_prop

# Retrieve the property dictionary
prop_dict = prop_source[prop_source['SMILES'] == smiles].iloc[0].to_dict()

# Run analysis
analyzer = DrugLikenessAnalyzer(data=prop_dict)
analyzer.generate_all_radar_plots(save_directory=f"visuals/new_mol_batch2_{entry_no}")

 GENERATING ALL RADAR PLOTS

Attempting Lipinski Rule of 5...
 Saved: visuals/new_mol_batch2_0/lipinski_radar_plot.png
 Lipinski Rule of 5: SUCCESS

Attempting Veber Rule...
 Saved: visuals/new_mol_batch2_0/veber_radar_plot.png
 Veber Rule: SUCCESS

Attempting Ghose Filter...
 Saved: visuals/new_mol_batch2_0/ghose_radar_plot.png
 Ghose Filter: SUCCESS

Attempting Rule of 3...
 Saved: visuals/new_mol_batch2_0/rule_of_3_radar_plot.png
 Rule of 3: SUCCESS

Attempting Oprea Lead-like...
 Saved: visuals/new_mol_batch2_0/oprea_radar_plot.png
 Oprea Lead-like: SUCCESS

 RADAR PLOT SUMMARY:
Successful plots: 5
Failed/skipped: 0

 Successfully generated:
  - Lipinski Rule of 5
  - Veber Rule
  - Ghose Filter
  - Rule of 3
  - Oprea Lead-like


{'successful': [('lipinski', 'Lipinski Rule of 5'),
  ('veber', 'Veber Rule'),
  ('ghose', 'Ghose Filter'),
  ('rule_of_3', 'Rule of 3'),
  ('oprea', 'Oprea Lead-like')],
 'failed': [],
 'total_attempted': 5}

## Existing drug case

In [21]:
import json
with open("extraction.json", "r") as f:
    extraction = json.load(f)
existing_drug = extraction['SMILES']

prop_dict = drug_prop[drug_prop['SMILES'] == existing_drug].iloc[0].to_dict() #.to_dict()
analyzer = DrugLikenessAnalyzer(data=prop_dict)
analyzer.generate_all_radar_plots(save_directory="visuals/new_venetoclax")

 GENERATING ALL RADAR PLOTS

Attempting Lipinski Rule of 5...
 Saved: visuals/new_venetoclax/lipinski_radar_plot.png
 Lipinski Rule of 5: SUCCESS

Attempting Veber Rule...
 Saved: visuals/new_venetoclax/veber_radar_plot.png
 Veber Rule: SUCCESS

Attempting Ghose Filter...
 Saved: visuals/new_venetoclax/ghose_radar_plot.png
 Ghose Filter: SUCCESS

Attempting Rule of 3...
 Saved: visuals/new_venetoclax/rule_of_3_radar_plot.png
 Rule of 3: SUCCESS

Attempting Oprea Lead-like...
 Saved: visuals/new_venetoclax/oprea_radar_plot.png
 Oprea Lead-like: SUCCESS

 RADAR PLOT SUMMARY:
Successful plots: 5
Failed/skipped: 0

 Successfully generated:
  - Lipinski Rule of 5
  - Veber Rule
  - Ghose Filter
  - Rule of 3
  - Oprea Lead-like


{'successful': [('lipinski', 'Lipinski Rule of 5'),
  ('veber', 'Veber Rule'),
  ('ghose', 'Ghose Filter'),
  ('rule_of_3', 'Rule of 3'),
  ('oprea', 'Oprea Lead-like')],
 'failed': [],
 'total_attempted': 5}