In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pickle
import pandas as pd
import numpy as np
from rdkit import Chem
from openbabel import pybel

from tqdm import tqdm
import os
from collections import defaultdict

In [4]:
import sys
sys.path.append('../')
sys.path.append('../data_processing/')
sys.path.append('../evaluation/')

In [5]:
from pharmacophore_matching import center2zero
from script_utils import load_qm9_data, load_dataset
from data_processing.ligand import Ligand
from utils_eval import extract_pp, extract_all_pp, pp_match

In [6]:
# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-08-19_21_05_04.140916'
root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-08-19_21_05_04.140916/ligand_based'
# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-08-19_13_30_50.133260'
# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-08-19_13_30_50.133260/ligand_based_old'

# root_path = '../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-10-14_13_02_07.121254/ligand_based'

In [7]:
aromatic = True

# we don't use optimized mols for pp matching eval
optimization = False

fn = 'reconstructed_mols' + '_aromatic_mode' if aromatic else 'reconstructed_mols'
match_fn = fn + '_matches.pkl'
score_fn = fn + '_scores.csv'
match_file = os.path.join(root_path, match_fn)
score_file = os.path.join(root_path, score_fn)
match_file

'../lightning_logs/vp_bridge_egnn_CombinedSparseGraphDataset_2024-08-19_21_05_04.140916/ligand_based/reconstructed_mols_aromatic_mode_matches.pkl'

In [8]:
gen_path = os.path.join(root_path, 'reconstructed_mols')
gen_path = gen_path + '_aromatic_mode' if aromatic else gen_path
gen_path = gen_path + '_optimized' if optimization else gen_path

In [9]:
with open(match_file, 'rb') as f:
    match_dict = pickle.load(f)

In [10]:
if 'ligand_based' in root_path:
    pp_info_file = '../../data/cleaned_crossdocked_data/metadata_HDBSCAN_non_filtered/test_pp_info.pkl'
else:
    pp_info_file = '../../data/cleaned_crossdocked_data/metadata/test_pp_info.pkl'
# pp_info_file = '../../data/cleaned_crossdocked_data/metadata/test_pp_info.pkl'
with open(pp_info_file, 'rb') as f:
    pp_info = pickle.load(f)

# pp_info

In [11]:
pp_info['3ny8_A_rec_4ldo_ale_lig_tt_docked_4']

{'pp_atom_indices': [[[0, 1, 2, 3, 4, 5], [6], [7], [8], [12]]],
 'pp_positions': [tensor([[ 2.0536,  4.5065, 50.1497],
          [ 0.7475,  2.8215, 51.8759],
          [ 0.6321,  2.3142, 49.3126],
          [ 3.6812,  7.6494, 50.0184],
          [ 3.1239,  8.7804, 52.3812]])],
 'pp_types': [tensor([[0., 0., 1., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 1., 0., 0.],
          [0., 0., 0., 0., 0., 1., 0., 0.],
          [0., 0., 0., 0., 0., 1., 0., 0.],
          [0., 0., 0., 0., 0., 1., 0., 0.]])],
 'pp_index': [tensor([0, 1, 2, 3, 4])],
 'node_pp_index': [tensor([ 0,  0,  0,  0,  0,  0,  1,  2,  3, -1, -1, -1,  4])]}

In [12]:
test_dataset = load_dataset('CombinedSparseGraphDataset', '../../data/cleaned_crossdocked_data', split='test', aromatic=aromatic)
pp_info = center2zero(pp_info, test_dataset)

use processed_HDBSCAN_non_filtered
use processed_HDBSCAN_non_filtered
use processed_HDBSCAN_non_filtered
use processed_HDBSCAN_non_filtered
use processed_HDBSCAN_non_filtered


100%|██████████████████████████████████████████████████████████████████████| 15235/15235 [00:02<00:00, 6769.94it/s]


In [13]:
pp_info = dict(pp_info)

In [14]:
len(test_dataset)

15235

In [15]:
lig_list = [file.split('.')[0] for file in os.listdir(gen_path)]
len(lig_list)

14835

In [16]:
all_aro_lig = []
for lig in tqdm(lig_list):
    pps = pp_info[lig]['pp_types']
    for pps_ in pps:
        pp_label = np.argmax(pps_.numpy(), axis=-1)
        if 2 in pp_label:
            # print(lig)
            # print(pp_label)
            all_aro_lig.append(lig)

100%|████████████████████████████████████████████████████████████████████| 14835/14835 [00:00<00:00, 153407.40it/s]


In [17]:
# all_aro_lig

In [18]:
len(all_aro_lig)

12760

In [19]:
def check_match(pp_types, pp_positions, pp_atom_indices, ref_pp_info, lig, typ=2, threshold=1.5):
    pps = pp_types
    ref_pps = np.argmax(ref_pp_info['pp_types'].numpy(), axis=-1)
    ref_pp_positions = ref_pp_info['pp_positions'].numpy()

    match = np.zeros_like(ref_pps)
    aro_match = False
    num_atom = 0
    for i, ref_pp in enumerate(ref_pps):
        ref_pos = ref_pp_positions[i]
        for j, pp in enumerate(pps):
            if type(pp) == list:
                for pp_ in pp:
                    if pp_ == ref_pp:
                        pos = pp_positions[j]
                        dist = np.sqrt(np.sum((pos - ref_pos) ** 2))
                        if dist < threshold:
                            match[i] = 1
                            if pp_ == typ:
                                # print(lig, num_atom)
                                aro_match = True
                                num_atom = len(pp_atom_indices[j])
                            continue
                            # match[i] += 1
            else:
                if pp == ref_pp:
                    pos = pp_positions[j]
                    dist = np.sqrt(np.sum((pos - ref_pos) ** 2))
                    if dist < threshold:
                        match[i] = 1
                        if pp == typ:
                            # print(lig, num_atom)
                            aro_match = True
                            num_atom = len(pp_atom_indices[j])
                        continue
                        # match[i] += 1
    return match, aro_match, num_atom

In [20]:
def compute_matching_scores(generated_path, pp_info, typ=2, threshold=1.5):
    score_dict = {}
    match_dict = {}
    aro_match_l = []
    for file in tqdm(os.listdir(generated_path)):
        lig = file.split('.')[0]
        mol_path = os.path.join(generated_path, lig+'.sdf')
        rdmol = Chem.MolFromMolFile(mol_path, sanitize=True)
        pbmol = next(pybel.readfile("sdf", mol_path))
        try:
            rdmol = Chem.AddHs(rdmol)
            ligand = Ligand(pbmol, rdmol, atom_positions=None, conformer_axis=None, filtering=False, preprocess=False)
        except:
            print('ligand init failed')
            continue
        pp_atom_indices, pp_positions, pp_types, pp_index = extract_all_pp(ligand)
        # print(pp_atom_indices, pp_types)
        
        ref_pp_info = pp_info[lig]
        if not all(k in list(ref_pp_info.keys()) for k in ['pp_types', 'pp_positions']):
            print(ref_pp_info)
            continue
        if isinstance(ref_pp_info['pp_types'], list):
            # print(ref_pp_info)
            ref_pp_info = {k:v[-1] for k, v in ref_pp_info.items()}     # To address the iterated list issue, for now we use the last element (which is appended to the list at last). TODO: fix this in the data processing script
        
        match, aro_match, num_atom = check_match(pp_types, pp_positions, pp_atom_indices, ref_pp_info, lig, typ=typ, threshold=threshold)
        match_dict[lig] = match
        score = np.mean(match)
        score_dict[lig] = score
        if aro_match:
            aro_match_l.append((lig, num_atom))
            print(lig, num_atom)

    return match_dict, score_dict, aro_match_l

In [21]:
match_dict, score_dict, aro_match_l = compute_matching_scores(gen_path, pp_info, typ=2)

  1%|▋                                                                        | 138/14835 [00:00<01:05, 223.52it/s]

4yur_A_rec_3cok_anp_lig_tt_min_0 5


  4%|██▋                                                                      | 556/14835 [00:02<01:02, 228.18it/s]

3nbq_B_rec_3nbq_urf_lig_tt_docked_2 6


  4%|███                                                                      | 632/14835 [00:02<00:58, 244.26it/s]

4gi4_A_rec_1s38_maq_lig_tt_min_0 4


  5%|███▊                                                                     | 779/14835 [00:03<00:59, 237.36it/s]

3p1c_B_rec_3p1f_3pf_lig_tt_min_0 6


  6%|████                                                                     | 826/14835 [00:03<01:01, 227.55it/s][21:40:46] Explicit valence for atom # 3 N, 4, is greater than permitted
  6%|████▏                                                                    | 851/14835 [00:03<01:00, 231.66it/s]

ligand init failed


  7%|████▉                                                                   | 1026/14835 [00:05<01:11, 193.55it/s]

1pw7_A_rec_1oty_6mp_lig_tt_min_0 7


  8%|█████▌                                                                  | 1147/14835 [00:05<01:01, 223.13it/s]

5swg_A_rec_5swt_71a_lig_tt_min_0 6
4np3_A_rec_4np3_2l2_lig_tt_min_0 4


  8%|██████                                                                  | 1238/14835 [00:06<01:06, 204.35it/s][21:40:48] atom 12 has specified valence (1) smaller than the drawn valence 2.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

 10%|██████▊                                                                 | 1416/14835 [00:06<01:01, 217.55it/s][21:40:49] Explicit valence for atom # 3 N, 4, is greater than permitted
 10%|███████                                                                 | 1464/14835 [00:07<01:00, 222.83it/s]

ligand init failed
4q4s_A_rec_1p0b_pq0_lig_tt_min_0 7


 10%|███████▌                                                                | 1557/14835 [00:07<01:02, 212.67it/s]

4np3_A_rec_4np2_2l1_lig_tt_min_0 4


 11%|███████▉                                                                | 1630/14835 [00:07<00:57, 230.77it/s][21:40:50] Explicit valence for atom # 12 N, 4, is greater than permitted
 11%|████████▏                                                               | 1681/14835 [00:08<00:55, 237.56it/s]

ligand init failed


 12%|████████▊                                                               | 1819/14835 [00:09<01:20, 161.12it/s][21:40:51] Explicit valence for atom # 3 N, 4, is greater than permitted
 12%|████████▉                                                               | 1839/14835 [00:09<01:18, 165.95it/s][21:40:51] atom 9 has specified valence (1) smaller than the drawn valence 2.
[21:40:51] atom 10 has specified valence (4) smaller than the drawn valence 5.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

The valence field specifies a valence 1 that is
less than the observed explicit valence 2.
The valence field specifies a valence 4 that is
less than the observed explicit valence 5.

 13%|█████████                                                               | 1879/14835 [00:09<01:15, 171.07it/s]

ligand init failed
5eyc_A_rec_3i5n_b2d_lig_tt_docked_0 5
5bzf_A_rec_4np2_2l1_lig_tt_min_0 4


 13%|█████████▋                                                              | 1988/14835 [00:10<01:04, 198.93it/s][21:40:52] Explicit valence for atom # 20 N, 4, is greater than permitted
 14%|█████████▊                                                              | 2010/14835 [00:10<01:04, 198.13it/s]

4gg9_A_rec_1s38_maq_lig_tt_min_0 4
ligand init failed


 14%|██████████▏                                                             | 2098/14835 [00:10<01:01, 208.05it/s]

5lpq_A_rec_1s38_maq_lig_tt_min_0 4
3euf_A_rec_3nbq_urf_lig_tt_min_0 6


 16%|███████████▌                                                            | 2391/14835 [00:11<00:57, 214.78it/s]

2vce_A_rec_2vce_tc7_lig_tt_docked_0 6


 18%|████████████▋                                                           | 2605/14835 [00:12<00:54, 225.08it/s]

4xud_A_rec_5lsa_dnc_lig_tt_docked_2 6


 18%|█████████████                                                           | 2681/14835 [00:13<00:51, 234.18it/s]

3nvw_C_rec_3nvw_gun_lig_tt_min_0 7


 18%|█████████████▏                                                          | 2705/14835 [00:13<01:54, 105.90it/s][21:40:56] Explicit valence for atom # 7 N, 4, is greater than permitted
 19%|█████████████▎                                                          | 2752/14835 [00:13<01:24, 143.07it/s]

ligand init failed


 19%|█████████████▋                                                          | 2815/14835 [00:14<01:08, 174.52it/s]

1eoc_A_rec_2buz_4nc_lig_tt_docked_4 6


 20%|██████████████▋                                                         | 3031/14835 [00:15<00:55, 212.85it/s]

4deh_A_rec_3i5n_b2d_lig_tt_docked_0 4
1y5x_A_rec_4puk_2wu_lig_tt_min_0 6


 21%|███████████████▍                                                        | 3188/14835 [00:16<00:55, 211.43it/s][21:40:58] Explicit valence for atom # 17 N, 4, is greater than permitted
 22%|███████████████▋                                                        | 3234/14835 [00:16<00:53, 216.37it/s]

ligand init failed
5egr_A_rec_1f3e_dpz_lig_tt_min_0 8


 22%|███████████████▊                                                        | 3256/14835 [00:16<00:55, 208.17it/s][21:40:58] Explicit valence for atom # 3 N, 4, is greater than permitted
 22%|████████████████▏                                                       | 3325/14835 [00:16<00:52, 218.63it/s]

ligand init failed


 23%|████████████████▎                                                       | 3351/14835 [00:16<00:49, 230.39it/s][21:40:59] Explicit valence for atom # 0 N, 4, is greater than permitted
 23%|████████████████▍                                                       | 3375/14835 [00:16<00:50, 227.80it/s]

5bzq_A_rec_5bzi_4wu_lig_tt_min_0 7
ligand init failed


 23%|████████████████▋                                                       | 3446/14835 [00:17<00:50, 227.69it/s]

2bur_A_rec_1eoc_4nc_lig_tt_min_0 6
1k4h_A_rec_1p0b_pq0_lig_tt_docked_0 6


 24%|█████████████████▏                                                      | 3545/14835 [00:18<01:26, 129.89it/s]

5egr_A_rec_1s38_maq_lig_tt_docked_0 4
4nyw_A_rec_4yk0_98_lig_tt_min_0 4


 24%|█████████████████▌                                                      | 3614/14835 [00:18<01:04, 174.19it/s]

3c2y_A_rec_1s38_maq_lig_tt_min_0 4


 25%|█████████████████▉                                                      | 3706/14835 [00:18<00:53, 208.00it/s][21:41:01] Explicit valence for atom # 33 N, 4, is greater than permitted
 25%|██████████████████▏                                                     | 3754/14835 [00:19<00:50, 219.26it/s]

ligand init failed


 26%|███████████████████                                                     | 3925/14835 [00:19<00:49, 221.24it/s]

1eob_A_rec_2buz_4nc_lig_tt_min_0 6


 27%|███████████████████▌                                                    | 4043/14835 [00:20<00:51, 210.85it/s][21:41:02] Explicit valence for atom # 12 N, 4, is greater than permitted
 28%|███████████████████▊                                                    | 4090/14835 [00:20<00:50, 214.49it/s]

ligand init failed


 28%|███████████████████▉                                                    | 4112/14835 [00:20<00:49, 215.63it/s][21:41:02] Explicit valence for atom # 7 N, 4, is greater than permitted
 28%|████████████████████▏                                                   | 4163/14835 [00:20<00:46, 228.50it/s]

ligand init failed
4mrg_A_rec_5bzf_4x3_lig_tt_docked_2 6


 28%|████████████████████▍                                                   | 4209/14835 [00:21<00:48, 220.09it/s][21:41:03] Explicit valence for atom # 7 N, 4, is greater than permitted
 29%|████████████████████▌                                                   | 4233/14835 [00:21<00:47, 225.37it/s][21:41:03] atom 22 has specified valence (2) smaller than the drawn valence 3.
The valence field specifies a valence 2 that is
less than the observed explicit valence 3.

 29%|████████████████████▊                                                   | 4284/14835 [00:21<00:44, 235.28it/s]

ligand init failed


 29%|█████████████████████                                                   | 4335/14835 [00:21<00:47, 223.00it/s]

4xuc_A_rec_5lsa_dnc_lig_tt_min_0 6


 30%|█████████████████████▌                                                  | 4430/14835 [00:22<00:45, 227.96it/s][21:41:04] atom 23 has specified valence (4) smaller than the drawn valence 5.
[21:41:04] atom 24 has specified valence (1) smaller than the drawn valence 2.
The valence field specifies a valence 4 that is
less than the observed explicit valence 5.

The valence field specifies a valence 4 that is
less than the observed explicit valence 5.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

 30%|█████████████████████▌                                                  | 4454/14835 [00:22<00:45, 229.63it/s]

1kwc_B_rec_1eir_bpy_lig_tt_min_0 6


 30%|█████████████████████▉                                                  | 4521/14835 [00:22<01:18, 131.41it/s]

2vce_A_rec_2vce_tc7_lig_tt_min_0 6


 31%|██████████████████████                                                  | 4542/14835 [00:23<01:10, 146.48it/s][21:41:05] Explicit valence for atom # 2 N, 4, is greater than permitted
 31%|██████████████████████▎                                                 | 4588/14835 [00:23<00:57, 177.65it/s]

ligand init failed


 31%|██████████████████████▎                                                 | 4609/14835 [00:23<00:58, 174.24it/s][21:41:05] atom 0 has specified valence (1) smaller than the drawn valence 2.
[21:41:05] atom 3 has specified valence (4) smaller than the drawn valence 5.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

The valence field specifies a valence 1 that is
less than the observed explicit valence 2.
The valence field specifies a valence 4 that is
less than the observed explicit valence 5.

 31%|██████████████████████▋                                                 | 4672/14835 [00:23<00:52, 193.08it/s]

5mme_A_rec_4nr4_2lk_lig_tt_min_0 5


 32%|██████████████████████▉                                                 | 4736/14835 [00:23<00:50, 200.41it/s][21:41:06] Explicit valence for atom # 11 N, 4, is greater than permitted
 32%|███████████████████████▏                                                | 4781/14835 [00:24<00:47, 209.69it/s]

5eob_A_rec_4ap7_f47_lig_tt_docked_0 4
1y5v_A_rec_1f3e_dpz_lig_tt_min_0 8
ligand init failed


 33%|███████████████████████▋                                                | 4877/14835 [00:24<00:43, 227.00it/s]

5bzc_A_rec_4mre_2c9_lig_tt_min_0 7
4q4o_A_rec_1enu_apz_lig_tt_min_0 7


 34%|████████████████████████▏                                               | 4973/14835 [00:25<00:42, 230.87it/s]

1eoc_A_rec_1eoc_4nc_lig_tt_min_0 6


[21:41:07] atom 23 has specified valence (4) smaller than the drawn valence 5.
[21:41:07] atom 24 has specified valence (1) smaller than the drawn valence 2.
The valence field specifies a valence 4 that is
less than the observed explicit valence 5.

The valence field specifies a valence 4 that is
less than the observed explicit valence 5.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

 34%|████████████████████████▍                                               | 5048/14835 [00:25<00:40, 240.51it/s]

5bzj_A_rec_4np2_2l1_lig_tt_docked_10 4


 34%|████████████████████████▊                                               | 5100/14835 [00:25<00:42, 230.78it/s][21:41:07] atom 24 has specified valence (1) smaller than the drawn valence 2.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

 36%|█████████████████████████▌                                              | 5278/14835 [00:26<00:41, 232.96it/s][21:41:08] Explicit valence for atom # 8 N, 4, is greater than permitted
 36%|█████████████████████████▊                                              | 5325/14835 [00:26<00:42, 222.44it/s]

ligand init failed


 36%|██████████████████████████                                              | 5372/14835 [00:27<01:31, 103.47it/s][21:41:09] atom 23 has specified valence (4) smaller than the drawn valence 5.
[21:41:09] atom 24 has specified valence (1) smaller than the drawn valence 2.
[21:41:09] Explicit valence for atom # 22 N, 4, is greater than permitted
The valence field specifies a valence 4 that is
less than the observed explicit valence 5.

The valence field specifies a valence 4 that is
less than the observed explicit valence 5.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

 37%|██████████████████████████▎                                             | 5419/14835 [00:27<01:05, 143.42it/s]

ligand init failed
5mpz_A_rec_5mqg_f31_lig_tt_docked_3 6


 38%|███████████████████████████▋                                            | 5697/14835 [00:28<00:40, 225.60it/s]

4q4s_A_rec_4gi4_0ex_lig_tt_min_0 4
4k8a_B_rec_4kab_4ka_lig_tt_min_0 7
5bze_A_rec_5bzg_4x6_lig_tt_min_0 7


 39%|████████████████████████████▏                                           | 5819/14835 [00:29<00:40, 222.05it/s]

3zxz_A_rec_3cd8_l5g_lig_tt_docked_0 4


 42%|██████████████████████████████▏                                         | 6215/14835 [00:30<00:38, 224.87it/s][21:41:13] Explicit valence for atom # 12 N, 4, is greater than permitted
 42%|██████████████████████████████▍                                         | 6262/14835 [00:31<00:39, 217.76it/s]

ligand init failed


 43%|███████████████████████████████▏                                        | 6437/14835 [00:32<00:45, 183.98it/s]

4q4o_A_rec_1f3e_dpz_lig_tt_min_0 8


[21:41:14] atom 0 has specified valence (4) smaller than the drawn valence 5.
[21:41:14] atom 2 has specified valence (1) smaller than the drawn valence 2.
The valence field specifies a valence 4 that is
less than the observed explicit valence 5.

The valence field specifies a valence 4 that is
less than the observed explicit valence 5.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

 44%|███████████████████████████████▋                                        | 6531/14835 [00:32<00:40, 203.43it/s]

4q8w_A_rec_1f3e_dpz_lig_tt_min_0 8
5jsw_A_rec_1enu_apz_lig_tt_min_0 7


 46%|█████████████████████████████████▎                                      | 6867/14835 [00:34<00:37, 214.36it/s][21:41:16] Explicit valence for atom # 2 N, 4, is greater than permitted


3ns1_L_rec_3ns1_pm6_lig_tt_min_0 7
ligand init failed


 47%|█████████████████████████████████▊                                      | 6960/14835 [00:34<00:36, 216.45it/s]

4mrg_A_rec_5bzg_4x6_lig_tt_min_0 7


 48%|██████████████████████████████████▎                                     | 7075/14835 [00:35<00:34, 224.59it/s]

5i86_B_rec_4yk0_98_lig_tt_min_0 4
4gh1_A_rec_1f3e_dpz_lig_tt_min_0 8


 48%|██████████████████████████████████▍                                     | 7098/14835 [00:35<00:34, 225.29it/s][21:41:17] Explicit valence for atom # 2 N, 4, is greater than permitted
 48%|██████████████████████████████████▊                                     | 7171/14835 [00:35<00:33, 229.04it/s]

ligand init failed


 49%|███████████████████████████████████▏                                    | 7242/14835 [00:36<00:33, 228.74it/s][21:41:18] Explicit valence for atom # 13 N, 4, is greater than permitted
 49%|███████████████████████████████████▎                                    | 7265/14835 [00:36<00:33, 225.76it/s]

3sr6_C_rec_3ns1_pm6_lig_tt_min_0 7
ligand init failed


 51%|████████████████████████████████████▍                                   | 7510/14835 [00:37<00:34, 211.93it/s]

4q8u_A_rec_1f3e_dpz_lig_tt_min_0 8


 52%|█████████████████████████████████████▌                                  | 7727/14835 [00:38<00:29, 237.35it/s][21:41:20] Explicit valence for atom # 4 N, 4, is greater than permitted
 52%|█████████████████████████████████████▋                                  | 7775/14835 [00:38<00:30, 230.50it/s]

ligand init failed


 53%|██████████████████████████████████████▎                                 | 7888/14835 [00:39<00:34, 202.78it/s]

2qzr_A_rec_1s38_maq_lig_tt_min_0 4


 53%|██████████████████████████████████████▌                                 | 7934/14835 [00:39<00:31, 215.84it/s][21:41:21] Explicit valence for atom # 16 N, 4, is greater than permitted
 54%|██████████████████████████████████████▋                                 | 7984/14835 [00:39<00:30, 227.07it/s]

ligand init failed


 55%|███████████████████████████████████████▊                                | 8212/14835 [00:40<00:31, 208.78it/s]

4q4p_A_rec_1f3e_dpz_lig_tt_min_0 8


 56%|████████████████████████████████████████▏                               | 8283/14835 [00:41<00:29, 225.09it/s]

4gi4_A_rec_1f3e_dpz_lig_tt_min_0 8
4rl2_B_rec_4eyf_pnk_lig_tt_docked_0 5


 56%|████████████████████████████████████████▋                               | 8375/14835 [00:42<00:41, 155.76it/s]

5eob_A_rec_3i5n_b2d_lig_tt_docked_0 4


 57%|████████████████████████████████████████▉                               | 8445/14835 [00:42<00:32, 193.86it/s]

2buq_A_rec_2buz_4nc_lig_tt_min_0 6
4ts8_A_rec_4yk0_98_lig_tt_min_0 4


 58%|█████████████████████████████████████████▉                              | 8633/14835 [00:43<00:28, 217.53it/s][21:41:25] atom 0 has specified valence (1) smaller than the drawn valence 2.
[21:41:25] atom 4 has specified valence (4) smaller than the drawn valence 5.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

The valence field specifies a valence 1 that is
less than the observed explicit valence 2.
The valence field specifies a valence 4 that is
less than the observed explicit valence 5.

 58%|██████████████████████████████████████████                              | 8657/14835 [00:43<00:27, 222.22it/s]

1ozm_A_rec_1f3e_dpz_lig_tt_docked_1 8


 59%|██████████████████████████████████████████▍                             | 8747/14835 [00:43<00:28, 210.41it/s]

2wgj_A_rec_3ccn_lkg_lig_tt_docked_0 4
4mrf_A_rec_4mre_2c9_lig_tt_min_0 6


 59%|██████████████████████████████████████████▋                             | 8795/14835 [00:43<00:27, 220.34it/s][21:41:26] Explicit valence for atom # 12 N, 4, is greater than permitted
 60%|██████████████████████████████████████████▉                             | 8841/14835 [00:44<00:27, 219.11it/s]

ligand init failed


 61%|███████████████████████████████████████████▉                            | 9054/14835 [00:45<00:26, 216.34it/s]

1q2r_A_rec_1enu_apz_lig_tt_min_0 7


 63%|█████████████████████████████████████████████▎                          | 9339/14835 [00:46<00:28, 193.40it/s]

5lyw_A_rec_5lyw_7bh_lig_tt_min_0 4
1s38_A_rec_1q2r_9dg_lig_tt_min_0 8


 64%|█████████████████████████████████████████████▊                          | 9440/14835 [00:47<00:23, 227.94it/s]

1eoc_A_rec_2buz_4nc_lig_tt_min_0 6


 65%|██████████████████████████████████████████████▌                         | 9583/14835 [00:47<00:23, 219.60it/s][21:41:30] Explicit valence for atom # 6 N, 4, is greater than permitted
 65%|██████████████████████████████████████████████▌                         | 9606/14835 [00:48<00:23, 220.01it/s]

2bur_A_rec_2buz_4nc_lig_tt_min_0 6
ligand init failed
ligand init failed


[21:41:30] Explicit valence for atom # 0 N, 4, is greater than permitted
 66%|███████████████████████████████████████████████▎                        | 9759/14835 [00:48<00:21, 237.50it/s]

3eue_A_rec_3nbq_urf_lig_tt_min_0 6
5bzf_A_rec_5bzg_4x6_lig_tt_min_0 7
5bzg_A_rec_5bzg_4x6_lig_tt_min_0 7


 66%|███████████████████████████████████████████████▋                        | 9831/14835 [00:49<00:22, 225.62it/s]

5i02_A_rec_1f3e_dpz_lig_tt_min_0 8


 67%|████████████████████████████████████████████████▎                       | 9953/14835 [00:49<00:21, 221.98it/s]

4gg9_A_rec_1q2r_9dg_lig_tt_min_0 8
4q8t_A_rec_1s39_aqo_lig_tt_min_0 8


[21:41:31] Explicit valence for atom # 0 N, 4, is greater than permitted
 67%|███████████████████████████████████████████████▊                       | 10002/14835 [00:49<00:20, 231.27it/s]

ligand init failed


 68%|████████████████████████████████████████████████▏                      | 10079/14835 [00:50<00:19, 240.82it/s]

3b9j_C_rec_3ns1_pm6_lig_tt_min_0 7


 69%|████████████████████████████████████████████████▋                      | 10178/14835 [00:50<00:19, 234.07it/s]

4q8v_A_rec_1q2r_9dg_lig_tt_min_0 8
5bzs_A_rec_4mre_2c9_lig_tt_min_0 7


 69%|█████████████████████████████████████████████████                      | 10255/14835 [00:50<00:19, 238.41it/s]

5ep7_A_rec_3p1f_3pf_lig_tt_min_0 6
3nbq_B_rec_3nbq_urf_lig_tt_min_0 6
4eyf_A_rec_4eyf_pnk_lig_tt_min_0 4


 71%|██████████████████████████████████████████████████▎                    | 10504/14835 [00:52<00:20, 209.52it/s]

1q2r_A_rec_1f3e_dpz_lig_tt_min_0 8


 72%|██████████████████████████████████████████████████▊                    | 10627/14835 [00:52<00:18, 223.57it/s]

4xue_B_rec_5lsa_dnc_lig_tt_docked_10 6
1oty_A_rec_1oty_6mp_lig_tt_min_0 7
1efz_A_rec_1f3e_dpz_lig_tt_min_0 8
5bzc_A_rec_5bzg_4x6_lig_tt_min_0 7


 72%|███████████████████████████████████████████████████▏                   | 10699/14835 [00:53<00:17, 231.87it/s][21:41:35] atom 0 has specified valence (1) smaller than the drawn valence 2.
[21:41:35] atom 7 has specified valence (3) smaller than the drawn valence 4.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

The valence field specifies a valence 1 that is
less than the observed explicit valence 2.
The valence field specifies a valence 3 that is
less than the observed explicit valence 4.

 72%|███████████████████████████████████████████████████▍                   | 10746/14835 [00:53<00:18, 221.20it/s][21:41:35] Explicit valence for atom # 8 N, 4, is greater than permitted
 73%|███████████████████████████████████████████████████▋                   | 10797/14835 [00:53<00:17, 236.83it/s]

1enu_A_rec_1f3e_dpz_lig_tt_min_0 8
ligand init failed


[21:41:35] Explicit valence for atom # 31 N, 4, is greater than permitted
 73%|███████████████████████████████████████████████████▉                   | 10849/14835 [00:53<00:17, 230.36it/s]

ligand init failed


 74%|████████████████████████████████████████████████████▍                  | 10946/14835 [00:54<00:17, 228.42it/s]

4ouf_B_rec_5ktu_6xb_lig_tt_docked_12 4
4ffs_A_rec_4ojt_ade_lig_tt_min_0 7


 74%|████████████████████████████████████████████████████▊                  | 11045/14835 [00:54<00:16, 229.21it/s]

3ny8_A_rec_4ldo_ale_lig_tt_docked_4 7
4knb_A_rec_3ccn_lkg_lig_tt_docked_0 4
4mrd_A_rec_5bzg_4x6_lig_tt_min_0 7


 75%|█████████████████████████████████████████████████████                  | 11094/14835 [00:54<00:16, 232.09it/s][21:41:37] Explicit valence for atom # 16 N, 4, is greater than permitted
 75%|█████████████████████████████████████████████████████▏                 | 11118/14835 [00:55<00:16, 226.01it/s]

1k9s_D_rec_1a69_fmb_lig_tt_min_0 6
ligand init failed


 76%|█████████████████████████████████████████████████████▉                 | 11280/14835 [00:56<00:29, 120.89it/s]

3ki1_A_rec_3ki3_g9h_lig_tt_min_0 4


 76%|██████████████████████████████████████████████████████                 | 11305/14835 [00:56<00:24, 144.22it/s][21:41:38] Explicit valence for atom # 7 N, 4, is greater than permitted
 76%|██████████████████████████████████████████████████████▏                | 11326/14835 [00:56<00:22, 154.04it/s]

ligand init failed


 77%|██████████████████████████████████████████████████████▋                | 11419/14835 [00:56<00:16, 206.11it/s]

5i00_A_rec_1enu_apz_lig_tt_min_0 6


 78%|███████████████████████████████████████████████████████▍               | 11587/14835 [00:57<00:13, 236.13it/s]

5bzt_A_rec_4mre_2c9_lig_tt_min_0 7
5bzk_A_rec_5bzg_4x6_lig_tt_min_0 7


 80%|████████████████████████████████████████████████████████▉              | 11895/14835 [00:58<00:13, 212.52it/s]

5jxq_A_rec_1p0b_pq0_lig_tt_min_0 6
5bzr_A_rec_4mre_2c9_lig_tt_min_0 7
4dei_A_rec_3cd8_l5g_lig_tt_docked_0 5


 81%|█████████████████████████████████████████████████████████▎             | 11988/14835 [00:59<00:12, 222.16it/s]

4fr6_A_rec_1p0b_pq0_lig_tt_min_0 7
5bzl_A_rec_4mre_2c9_lig_tt_min_0 7


 81%|█████████████████████████████████████████████████████████▊             | 12081/14835 [00:59<00:12, 215.72it/s]

5bzp_A_rec_4np2_2l1_lig_tt_docked_12 4


 82%|██████████████████████████████████████████████████████████▉             | 12148/14835 [01:00<00:26, 99.95it/s][21:41:42] atom 26 has specified valence (1) smaller than the drawn valence 2.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

 84%|███████████████████████████████████████████████████████████▌           | 12444/14835 [01:02<00:11, 207.92it/s][21:41:44] Explicit valence for atom # 8 N, 4, is greater than permitted
 84%|███████████████████████████████████████████████████████████▊           | 12491/14835 [01:02<00:10, 217.43it/s]

ligand init failed


 85%|████████████████████████████████████████████████████████████▏          | 12585/14835 [01:02<00:09, 226.88it/s]

5bzj_A_rec_4mre_2c9_lig_tt_min_0 7


 86%|█████████████████████████████████████████████████████████████▎         | 12820/14835 [01:03<00:09, 219.59it/s]

4puk_A_rec_1enu_apz_lig_tt_min_0 7
4yk0_D_rec_4yk0_98_lig_tt_docked_1 4


 88%|██████████████████████████████████████████████████████████████▏        | 12981/14835 [01:04<00:15, 116.53it/s][21:41:47] atom 0 has specified valence (1) smaller than the drawn valence 2.
[21:41:47] atom 3 has specified valence (4) smaller than the drawn valence 5.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

The valence field specifies a valence 1 that is
less than the observed explicit valence 2.
The valence field specifies a valence 4 that is
less than the observed explicit valence 5.

 88%|██████████████████████████████████████████████████████████████▋        | 13098/14835 [01:05<00:08, 196.22it/s]

2ei0_A_rec_2ei1_d1n_lig_tt_docked_2 7


 89%|███████████████████████████████████████████████████████████████        | 13167/14835 [01:05<00:07, 212.48it/s][21:41:47] Explicit valence for atom # 8 N, 4, is greater than permitted
 89%|███████████████████████████████████████████████████████████████▏       | 13190/14835 [01:05<00:07, 214.08it/s]

1eob_A_rec_2buu_4nc_lig_tt_min_0 6
ligand init failed
1oum_A_rec_1oty_6mp_lig_tt_docked_8 7


 90%|███████████████████████████████████████████████████████████████▊       | 13321/14835 [01:06<00:07, 209.97it/s]

5iu6_A_rec_1oty_6mp_lig_tt_docked_8 7


 90%|███████████████████████████████████████████████████████████████▉       | 13367/14835 [01:06<00:06, 219.29it/s]

3p1f_A_rec_5mqg_f31_lig_tt_docked_4 7
1yw7_A_rec_5ji6_6kn_lig_tt_docked_1 4


 92%|█████████████████████████████████████████████████████████████████▏     | 13617/14835 [01:07<00:05, 239.64it/s]

5uad_A_rec_3zxz_krw_lig_tt_min_0 4


 92%|█████████████████████████████████████████████████████████████████▌     | 13698/14835 [01:07<00:04, 251.86it/s][21:41:50] atom 25 has specified valence (1) smaller than the drawn valence 2.
[21:41:50] atom 29 has specified valence (4) smaller than the drawn valence 5.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

The valence field specifies a valence 1 that is
less than the observed explicit valence 2.
The valence field specifies a valence 4 that is
less than the observed explicit valence 5.

 93%|██████████████████████████████████████████████████████████████████▏    | 13828/14835 [01:08<00:04, 240.48it/s][21:41:50] Explicit valence for atom # 3 N, 4, is greater than permitted
 94%|██████████████████████████████████████████████████████████████████▍    | 13884/14835 [01:08<00:03, 256.70it/s]

4lbu_A_rec_1f3e_dpz_lig_tt_min_0 8
ligand init failed
4q4p_A_rec_1s39_aqo_lig_tt_min_0 8


 94%|██████████████████████████████████████████████████████████████████▊    | 13960/14835 [01:09<00:03, 244.00it/s]

3rr4_A_rec_1p0b_pq0_lig_tt_min_0 6
1vdv_A_rec_3nvw_gun_lig_tt_min_0 7


 96%|███████████████████████████████████████████████████████████████████▊   | 14181/14835 [01:10<00:03, 208.21it/s][21:41:52] atom 15 has specified valence (1) smaller than the drawn valence 2.
[21:41:52] atom 19 has specified valence (4) smaller than the drawn valence 5.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

The valence field specifies a valence 1 that is
less than the observed explicit valence 2.
The valence field specifies a valence 4 that is
less than the observed explicit valence 5.

 96%|████████████████████████████████████████████████████████████████████   | 14231/14835 [01:10<00:02, 225.48it/s]

2vch_A_rec_2vce_tc7_lig_tt_min_0 6


 97%|████████████████████████████████████████████████████████████████████▋  | 14355/14835 [01:11<00:02, 231.22it/s]

3ki3_A_rec_3ess_18n_lig_tt_docked_3 8


 97%|████████████████████████████████████████████████████████████████████▊  | 14379/14835 [01:11<00:02, 223.71it/s][21:41:53] Explicit valence for atom # 14 N, 4, is greater than permitted
 97%|█████████████████████████████████████████████████████████████████████  | 14425/14835 [01:11<00:01, 219.02it/s]

ligand init failed


 98%|█████████████████████████████████████████████████████████████████████▎ | 14495/14835 [01:11<00:01, 217.58it/s][21:41:54] Explicit valence for atom # 4 N, 4, is greater than permitted
 98%|█████████████████████████████████████████████████████████████████████▌ | 14539/14835 [01:12<00:01, 212.96it/s]

ligand init failed


 98%|█████████████████████████████████████████████████████████████████████▊ | 14583/14835 [01:12<00:01, 212.41it/s][21:41:54] atom 0 has specified valence (4) smaller than the drawn valence 5.
[21:41:54] atom 1 has specified valence (1) smaller than the drawn valence 2.
The valence field specifies a valence 4 that is
less than the observed explicit valence 5.

The valence field specifies a valence 4 that is
less than the observed explicit valence 5.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

 99%|██████████████████████████████████████████████████████████████████████ | 14631/14835 [01:12<00:00, 207.61it/s]

2buu_A_rec_1eoc_4nc_lig_tt_min_0 6


 99%|██████████████████████████████████████████████████████████████████████ | 14652/14835 [01:12<00:00, 204.74it/s][21:41:54] atom 23 has specified valence (4) smaller than the drawn valence 5.
[21:41:54] atom 24 has specified valence (1) smaller than the drawn valence 2.
The valence field specifies a valence 4 that is
less than the observed explicit valence 5.

The valence field specifies a valence 4 that is
less than the observed explicit valence 5.
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.

 99%|██████████████████████████████████████████████████████████████████████▎| 14696/14835 [01:12<00:00, 208.82it/s][21:41:55] Explicit valence for atom # 8 N, 4, is greater than permitted
 99%|██████████████████████████████████████████████████████████████████████▌| 14745/14835 [01:13<00:00, 221.20it/s]

ligand init failed


100%|███████████████████████████████████████████████████████████████████████| 14835/14835 [01:13<00:00, 201.96it/s]

3nvv_C_rec_3nvw_gun_lig_tt_min_0 7





In [44]:
aro_match_l

[('4yur_A_rec_3cok_anp_lig_tt_min_0', 5),
 ('3nbq_B_rec_3nbq_urf_lig_tt_docked_2', 6),
 ('4gi4_A_rec_1s38_maq_lig_tt_min_0', 4),
 ('3p1c_B_rec_3p1f_3pf_lig_tt_min_0', 6),
 ('1pw7_A_rec_1oty_6mp_lig_tt_min_0', 7),
 ('5swg_A_rec_5swt_71a_lig_tt_min_0', 6),
 ('4np3_A_rec_4np3_2l2_lig_tt_min_0', 4),
 ('4q4s_A_rec_1p0b_pq0_lig_tt_min_0', 7),
 ('4np3_A_rec_4np2_2l1_lig_tt_min_0', 4),
 ('5eyc_A_rec_3i5n_b2d_lig_tt_docked_0', 5),
 ('5bzf_A_rec_4np2_2l1_lig_tt_min_0', 4),
 ('4gg9_A_rec_1s38_maq_lig_tt_min_0', 4),
 ('5lpq_A_rec_1s38_maq_lig_tt_min_0', 4),
 ('3euf_A_rec_3nbq_urf_lig_tt_min_0', 6),
 ('2vce_A_rec_2vce_tc7_lig_tt_docked_0', 6),
 ('4xud_A_rec_5lsa_dnc_lig_tt_docked_2', 6),
 ('3nvw_C_rec_3nvw_gun_lig_tt_min_0', 7),
 ('1eoc_A_rec_2buz_4nc_lig_tt_docked_4', 6),
 ('4deh_A_rec_3i5n_b2d_lig_tt_docked_0', 4),
 ('1y5x_A_rec_4puk_2wu_lig_tt_min_0', 6),
 ('5egr_A_rec_1f3e_dpz_lig_tt_min_0', 8),
 ('5bzq_A_rec_5bzi_4wu_lig_tt_min_0', 7),
 ('2bur_A_rec_1eoc_4nc_lig_tt_min_0', 6),
 ('1k4h_A_rec_1p

In [45]:
len(aro_match_l)

120

In [46]:
avg_score_lig_df = pd.read_csv(os.path.join(root_path, fn+'_avg_score_by_lig.csv'), header=0, index_col=0)
avg_score_rec_df = pd.read_csv(os.path.join(root_path, fn+'_avg_score_by_rec.csv'), header=0, index_col=0)
# score_by_lig

In [47]:
filtered_lig = avg_score_lig_df[(avg_score_lig_df['score'] >= 0.5) &
                                (avg_score_lig_df['num sample'] >= 10)]
filtered_lig

Unnamed: 0,score,num sample
mb3,1.000000,60
fa1,0.961039,11
ae3,0.953125,32
pge,0.906250,24
6te,0.900000,11
...,...,...
2ck,0.505376,31
dqu,0.502778,90
5wv,0.500000,18
30x,0.500000,11


In [48]:
filtered_rec = avg_score_rec_df[(avg_score_rec_df['score'] >= 0.5) &
                                (avg_score_rec_df['num sample'] >= 10)]
filtered_rec

Unnamed: 0,score,num sample
1e3k_A,0.775073,13
4apu_B,0.740767,10
5l0k_B,0.728556,13
5hx8_A,0.714368,18
5khx_A,0.714178,20
...,...,...
1vdv_A,0.504219,38
1mdl_A,0.504167,10
4q4m_A,0.500463,12
5ep7_A,0.500397,13


In [51]:
aro_match_lig = [lig[0][lig[0].rfind('rec')+9:lig[0].rfind('rec')+12] for lig in aro_match_l]
aro_match_lig = set(aro_match_lig)
aro_match_lig

{'0ex',
 '18n',
 '2c9',
 '2l1',
 '2l2',
 '2lk',
 '2wu',
 '3pf',
 '4ka',
 '4nc',
 '4wu',
 '4x3',
 '4x6',
 '6kn',
 '6mp',
 '6xb',
 '71a',
 '7bh',
 '98_',
 '9dg',
 'ade',
 'ale',
 'anp',
 'apz',
 'aqo',
 'b2d',
 'bpy',
 'd1n',
 'dnc',
 'dpz',
 'f31',
 'f47',
 'fmb',
 'g9h',
 'gun',
 'krw',
 'l5g',
 'lkg',
 'maq',
 'pm6',
 'pnk',
 'pq0',
 'tc7',
 'urf'}

In [52]:
aro_match_rec = set([lig[0][:6] for lig in aro_match_l])
aro_match_rec

{'1efz_A',
 '1enu_A',
 '1eob_A',
 '1eoc_A',
 '1k4h_A',
 '1k9s_D',
 '1kwc_B',
 '1oty_A',
 '1oum_A',
 '1ozm_A',
 '1pw7_A',
 '1q2r_A',
 '1s38_A',
 '1vdv_A',
 '1y5v_A',
 '1y5x_A',
 '1yw7_A',
 '2buq_A',
 '2bur_A',
 '2buu_A',
 '2ei0_A',
 '2qzr_A',
 '2vce_A',
 '2vch_A',
 '2wgj_A',
 '3b9j_C',
 '3c2y_A',
 '3eue_A',
 '3euf_A',
 '3ki1_A',
 '3ki3_A',
 '3nbq_B',
 '3ns1_L',
 '3nvv_C',
 '3nvw_C',
 '3ny8_A',
 '3p1c_B',
 '3p1f_A',
 '3rr4_A',
 '3sr6_C',
 '3zxz_A',
 '4deh_A',
 '4dei_A',
 '4eyf_A',
 '4ffs_A',
 '4fr6_A',
 '4gg9_A',
 '4gh1_A',
 '4gi4_A',
 '4k8a_B',
 '4knb_A',
 '4lbu_A',
 '4mrd_A',
 '4mrf_A',
 '4mrg_A',
 '4np3_A',
 '4nyw_A',
 '4ouf_B',
 '4puk_A',
 '4q4o_A',
 '4q4p_A',
 '4q4s_A',
 '4q8t_A',
 '4q8u_A',
 '4q8v_A',
 '4q8w_A',
 '4rl2_B',
 '4ts8_A',
 '4xuc_A',
 '4xud_A',
 '4xue_B',
 '4yk0_D',
 '4yur_A',
 '5bzc_A',
 '5bze_A',
 '5bzf_A',
 '5bzg_A',
 '5bzj_A',
 '5bzk_A',
 '5bzl_A',
 '5bzp_A',
 '5bzq_A',
 '5bzr_A',
 '5bzs_A',
 '5bzt_A',
 '5egr_A',
 '5eob_A',
 '5ep7_A',
 '5eyc_A',
 '5i00_A',
 '5i02_A',

In [53]:
[lig for lig in filtered_lig.index.tolist() if lig in aro_match_lig]

['4nc',
 '18n',
 'pm6',
 'pq0',
 '6kn',
 '2c9',
 'dpz',
 'apz',
 '4wu',
 'f31',
 'ale',
 '6mp',
 '98_',
 '9dg',
 '6xb']

In [54]:
[rec for rec in filtered_rec.index.tolist() if rec in aro_match_rec]

['3ny8_A',
 '5bzf_A',
 '5bzg_A',
 '2buq_A',
 '5bze_A',
 '4mrg_A',
 '5bzr_A',
 '4mrf_A',
 '3p1f_A',
 '2buu_A',
 '1oty_A',
 '5bzq_A',
 '5bzc_A',
 '5mpz_A',
 '3p1c_B',
 '4np3_A',
 '4ts8_A',
 '1q2r_A',
 '5i86_B',
 '4ouf_B',
 '5bzk_A',
 '4ffs_A',
 '1vdv_A',
 '5ep7_A']

In [22]:
for lig in aro_match_l:
    if lig[1] == 6:
        print(lig)
    # if lig.startswith('1k4h_A'):
    #     print(lig)

    # if 'ale' in lig:
    #     print(lig)

('3nbq_B_rec_3nbq_urf_lig_tt_docked_2', 6)
('3p1c_B_rec_3p1f_3pf_lig_tt_min_0', 6)
('5swg_A_rec_5swt_71a_lig_tt_min_0', 6)
('3euf_A_rec_3nbq_urf_lig_tt_min_0', 6)
('2vce_A_rec_2vce_tc7_lig_tt_docked_0', 6)
('4xud_A_rec_5lsa_dnc_lig_tt_docked_2', 6)
('1eoc_A_rec_2buz_4nc_lig_tt_docked_4', 6)
('1y5x_A_rec_4puk_2wu_lig_tt_min_0', 6)
('2bur_A_rec_1eoc_4nc_lig_tt_min_0', 6)
('1k4h_A_rec_1p0b_pq0_lig_tt_docked_0', 6)
('1eob_A_rec_2buz_4nc_lig_tt_min_0', 6)
('4mrg_A_rec_5bzf_4x3_lig_tt_docked_2', 6)
('4xuc_A_rec_5lsa_dnc_lig_tt_min_0', 6)
('1kwc_B_rec_1eir_bpy_lig_tt_min_0', 6)
('2vce_A_rec_2vce_tc7_lig_tt_min_0', 6)
('1eoc_A_rec_1eoc_4nc_lig_tt_min_0', 6)
('5mpz_A_rec_5mqg_f31_lig_tt_docked_3', 6)
('2buq_A_rec_2buz_4nc_lig_tt_min_0', 6)
('4mrf_A_rec_4mre_2c9_lig_tt_min_0', 6)
('1eoc_A_rec_2buz_4nc_lig_tt_min_0', 6)
('2bur_A_rec_2buz_4nc_lig_tt_min_0', 6)
('3eue_A_rec_3nbq_urf_lig_tt_min_0', 6)
('5ep7_A_rec_3p1f_3pf_lig_tt_min_0', 6)
('3nbq_B_rec_3nbq_urf_lig_tt_min_0', 6)
('4xue_B_rec_5lsa_d