In [9]:
import pandas as pd 
import numpy as np
import os
from math import ceil
from tqdm import tqdm

In [2]:
import sys
sys.path.append('../evaluation/')

In [3]:
from utils import build_pdb_dict

In [12]:
bridge_type = 'vp'

# log_path = '../../docking_res/logs/' + bridge_type
log_path = '../lightning_logs/vp_bridge_2024-05-12_14_04_39.541860/logs'
raw_data_path = '../../data/cleaned_crossdocked_data/raw'

log_files = os.listdir(log_path)
pdb_dict, pdb_rev_dict = build_pdb_dict(raw_data_path)

In [13]:
len(log_files)

6902

In [14]:
def get_minimized_affinity(filename, mode='ref'):
    if mode == 'ref':
        with open(filename, 'r') as file:
            for line in file:
                if '<minimizedAffinity>' in line:
                    # The next line after '<minimizedAffinity>' contains the value
                    return float(next(file))
            
    else:
        with open(filename, 'r') as f:
            lines = f.readlines()
            lines = lines[19:28]
#             print(len(lines))
            n = len(lines)
            arr = np.zeros(n)
#             print(filename)
            for i in range(n):
                arr[i] = float(lines[i][11:17])
#             print(arr)
            try:
                score = np.min(arr)
            except:
                score = None
        return score

In [15]:
score_dict = {}
for log_file in tqdm(log_files):
    ligand_name = log_file.split('.')[0]
    ligand_file = ligand_name + '.sdf'
    pdb_folder = pdb_rev_dict[ligand_file]
    ref_file = os.path.join(raw_data_path, pdb_folder, ligand_file)
    ref_score = get_minimized_affinity(ref_file, 'ref')
#     print(log_file)
    gen_score = get_minimized_affinity(os.path.join(log_path, log_file), 'gen')
    score_dict[ligand_name] = [gen_score, ref_score]

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6902/6902 [01:10<00:00, 98.05it/s]


In [16]:
df = pd.DataFrame.from_dict(score_dict, orient='index', columns=['Binding affinity', 'Reference'])
df

Unnamed: 0,Binding affinity,Reference
3iph_A_rec_3lhj_lhj_lig_tt_min_0,-6.67,-11.55377
3p1e_B_rec_4ts8_xz8_lig_tt_min_0,-3.68,-6.94268
3g0w_A_rec_5t8j_77t_lig_tt_docked_1,-3.96,-8.17374
1anc_A_rec_1j14_ben_lig_tt_min_0,-4.05,-6.60193
5j0d_A_rec_4nr4_2lk_lig_tt_docked_0,-3.78,-7.99735
...,...,...
3v4a_A_rec_2pip_dht_lig_tt_min_0,-5.66,-7.81757
4m5a_A_rec_5y48_ura_lig_tt_min_0,-1.74,-4.83650
4ls1_A_rec_5h73_7l7_lig_tt_min_0,-3.87,-6.20760
2pvj_A_rec_2pvk_p45_lig_tt_docked_14,-7.30,-8.33211


In [17]:
sum(df['Binding affinity'] < df['Reference']) / len(df)

0.043175891046073604

In [29]:
pdb_rev_dict['4ls1_A_rec_5h73_7l7_lig_tt_min_0.sdf']

'PYRD_HUMAN_65_395_0'

In [None]:
def evaluate_pdb(model, meta_data):
    score_dict = {}
    for index, row in meta_data.iterrows():
        pdb_id = row['pdb']
#         print(pdb_id)
        score_dict[pdb_id] = []
        log_path = os.path.join('logs/' + model, pdb_id)
        for filename in os.listdir(log_path):
            log_file = os.path.join(log_path, filename)
            with open(log_file, 'r') as f:
                lines = f.readlines()
#             print(len(lines))
            lines = lines[16:]
            n = ceil(len(lines)/14)
            arr = np.zeros(n)
#             print(filename)
            for i in range(n):
                arr[i] = float(lines[i*14+3][11:17])
            score = np.min(arr)
            smi = filename.split('.')[0]
            score_dict[pdb_id].append(score)
            
    score_dict = {k:np.array(v) for k, v in score_dict.items()}
    return score_dict