In [1]:
from rdkit import Chem
from rdkit.Chem import RDConfig
from rdkit.Chem.QED import qed
import os
from tqdm import tqdm
import seaborn as sns
import numpy as np

In [2]:
import sys
sys.path.append('../evaluation/')
sys.path.append(os.path.join(RDConfig.RDContribDir, "SA_Score"))
import sascorer

In [3]:
from utils import build_pdb_dict

In [65]:
bridge_type = 'vp'

# egnn
# fixed point init
# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-05-31_14_11_45.077216'
# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-05-30_23_27_53.688104'
# Gaussian noise init
# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-06-17_23_04_23.779433'
# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-06-17_23_06_28.209248'

# transformer
# root_path = '../lightning_logs/vp_bridge_CombinedSparseGraphDataset_2024-06-01_21_36_34.208973'
# root_path = '../lightning_logs/vp_bridge_CombinedSparseGraphDataset_2024-05-31_23_42_37.443630'

# only basic

# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-07-19_14_29_28.164795'
# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-07-19_14_29_44.297462'
# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-07-30_02_43_56.802640'

# basic + aromatic

# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-07-20_02_24_30.913781'
# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-07-20_13_18_12.746286'

# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-07-21_23_58_36.562980'
# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-07-30_02_43_19.450576'

# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-07-31_22_40_53.679692'

# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-07-31_22_40_53.679692'    # 0.97 0.32
# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-08-01_11_35_53.861210'    # 0.88 0.29

root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-07-31_22_41_26.883181'

aromatic = False
optimization = True

gen_path = os.path.join(root_path, 'reconstructed_mols')
gen_path = gen_path + '_aromatic_mode' if aromatic else gen_path
gen_path = gen_path + '_optimized' if optimization else gen_path

In [66]:
raw_data_path = '../../data/cleaned_crossdocked_data/raw'
pdb_dict, pdb_rev_dict = build_pdb_dict(raw_data_path)

In [67]:
def get_mols(gen_path, raw_data_path=raw_data_path):
    gen_mols, ref_mols = {}, {}
    for file in tqdm(os.listdir(gen_path)):
        ligand = file.split('.')[0]
        ref_folder = pdb_rev_dict[file]

        gen_m = Chem.MolFromMolFile(os.path.join(gen_path, file))
        ref_m = Chem.MolFromMolFile(os.path.join(raw_data_path, ref_folder, file))
        if gen_m == None or ref_m == None:
            continue
            
        gen_mols[ligand] = gen_m
        ref_mols[ligand] = ref_m

    return gen_mols, ref_mols

In [68]:
gen_mols, ref_mols = get_mols(gen_path)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6888/6888 [00:30<00:00, 225.82it/s]


In [69]:
len(gen_mols), len(ref_mols)

(6888, 6888)

In [70]:
def compute_sa_score(mols, threshold = 5.5):
    
    sa_scores = [sascorer.calculateScore(mol) if mol!=None else 10 for mol in tqdm(mols)]
    pct_easily_synthesized = len([score for score in sa_scores if score <= threshold])/len(sa_scores)
    return sa_scores, pct_easily_synthesized

In [71]:
sa_scores, pct_easily_synthesized = compute_sa_score(gen_mols.values())
pct_easily_synthesized

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6888/6888 [00:01<00:00, 5796.69it/s]


0.6451800232288037

In [72]:
ref_sa_scores, ref_pct_easily_synthesized = compute_sa_score(ref_mols.values())

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6888/6888 [00:00<00:00, 7796.28it/s]


In [73]:
ref_pct_easily_synthesized

0.9907084785133565

In [74]:
np.mean(sa_scores), np.mean(ref_sa_scores)

(5.146101916816015, 2.862210129716047)

In [75]:
qed_scores = [qed(mol) for mol in tqdm(list(gen_mols.values()))]
ref_qed_scores = [qed(mol) for mol in tqdm(ref_mols.values())]
qed_scores, ref_qed_scores

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6888/6888 [00:09<00:00, 731.76it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6888/6888 [00:06<00:00, 989.54it/s]


([0.5047319539402334,
  0.36920407564700897,
  0.4858330900513291,
  0.6195248190862033,
  0.485647085023808,
  0.3659735580833296,
  0.5334721466759034,
  0.2529712952492788,
  0.6697333879948393,
  0.21879856107634005,
  0.2779632527120406,
  0.4601030369172989,
  0.0634508778781253,
  0.5251602047670143,
  0.24972243763241278,
  0.4113347511254448,
  0.20471751905936977,
  0.4911740613206,
  0.3962526427027204,
  0.2793686365536752,
  0.3370259194327873,
  0.5975869558973325,
  0.10024571765812366,
  0.5728006774866798,
  0.6980848639279481,
  0.6175126930576891,
  0.2938475496392687,
  0.3992094799877991,
  0.5582405858298835,
  0.577829751756549,
  0.4498862522575735,
  0.6276185378005529,
  0.3397601479280762,
  0.2689635798961259,
  0.2859795792563848,
  0.27934478822248887,
  0.37218548894387254,
  0.3300683321195934,
  0.5522136127291223,
  0.3260404057605059,
  0.5213828800895949,
  0.07516855514662547,
  0.1559077820876417,
  0.48320131899900803,
  0.3988409141738499,
  0.41

In [76]:
np.mean(qed_scores), np.mean(ref_qed_scores)

(0.4364975223470737, 0.5513536946692938)