In [10]:
import os
from rdkit import Chem
from ccdc import io
from tqdm import tqdm
from ccdc import conformer

In [31]:
import pandas as pd
pdbbind_dir = '../v2019-other-PL/'
widths = [6,6,7,6,17,9,200]
cols = 'PDB code, resolution, release year, -logKd/Ki, Kd/Ki, reference, ligand name'.replace(', ', ',').split(',')
pl_data = pd.read_fwf(f'{pdbbind_dir}index/INDEX_general_PL_data.2019', widths=widths, skiprows=6, header=None)
pl_data.columns=cols

In [35]:
pl_data[pl_data['ligand name'] == '(9-mer)']

Unnamed: 0,PDB code,resolution,release year,-logKd/Ki,Kd/Ki,reference,ligand name
508,6evm,2.00,2018,2.82,Kd>1500uM //,6evm.pdf,(9-mer)
1210,5eok,2.80,2016,3.49,Kd=320uM //,5eod.pdf,(9-mer)
1246,3tf7,2.75,2011,3.52,Kd~300uM //,3tf7.pdf,(9-mer)
1255,4y5i,1.40,2016,3.52,IC50=301.3uM //,4y5i.pdf,(9-mer)
1276,3obq,1.40,2010,3.54,Kd=290uM //,3obq.pdf,(9-mer)
...,...,...,...,...,...,...,...
15360,1fzk,1.70,2001,8.40,Kd=4nM //,1fzk.pdf,(9-mer)
15770,3rwg,2.10,2012,8.55,IC50=2.8nM //,3rwc.pdf,(9-mer)
16793,1jp5,2.70,2001,9.22,Ki=0.6nM //,1jp5.pdf,(9-mer)
17201,5u98,2.00,2017,9.70,Kd=0.2nM //,5u98.pdf,(9-mer)


In [33]:
pl_data['ligand name'].value_counts()

(11-mer)    212
(9-mer)     196
(12-mer)    184
(10-mer)    181
(5-mer)     174
           ... 
(9E1)         1
(5RA)         1
(3A9)         1
(CWX)         1
(7FA)         1
Name: ligand name, Length: 11766, dtype: int64

In [2]:
pdbbind_dir = '/home/baillifb/v2019-other-PL/'

In [3]:
pdb_ids = sorted([dir for dir in os.listdir(pdbbind_dir) if len(dir) == 4])

In [13]:
conformer_generator = conformer.ConformerGenerator()
conformer_generator.settings.max_conformers = 50

INFO: /home/baillifb/CCDC/CSD_2021/bin/mogul


In [17]:
mols = []
for pdb_id in tqdm(pdb_ids) :
    filepath = f'{pdbbind_dir}{pdb_id}/{pdb_id}_ligand.mol2'
    mol_reader = io.MoleculeReader(filepath)
    mol = mol_reader[0]
    conformers = conformer_generator.generate(mol)
    print(len(conformers))

  0%|                                      | 2/17652 [00:06<16:12:04,  3.30s/it]

50
50


  0%|                                      | 3/17652 [00:07<11:13:31,  2.29s/it]

50


  0%|                                       | 4/17652 [00:08<9:07:56,  1.86s/it]

50


  0%|                                       | 5/17652 [00:08<6:18:32,  1.29s/it]

8
1


  0%|                                       | 9/17652 [00:09<2:21:50,  2.07it/s]

29
1
1


  0%|                                      | 10/17652 [00:13<5:43:14,  1.17s/it]

50


  0%|                                     | 11/17652 [00:18<11:03:33,  2.26s/it]

50


  0%|                                     | 12/17652 [00:23<14:08:12,  2.89s/it]

50


  0%|                                     | 13/17652 [00:26<13:57:30,  2.85s/it]

50


  0%|                                     | 14/17652 [00:26<10:32:17,  2.15s/it]

50


  0%|                                     | 15/17652 [00:29<11:59:33,  2.45s/it]

50


  0%|                                     | 16/17652 [00:32<13:18:57,  2.72s/it]

50


  0%|                                      | 18/17652 [00:35<9:11:31,  1.88s/it]

50
3


  0%|                                     | 19/17652 [00:38<11:16:00,  2.30s/it]

50


  0%|                                     | 19/17652 [00:40<10:26:53,  2.13s/it]


KeyboardInterrupt: 

In [None]:
mols = []
for pdb_id in pdb_ids :
    mol = None
    try :
        mol = Chem.rdmolfiles.MolFromMol2File(f'{pdbbind_dir}{pdb_id}/{pdb_id}_ligand.mol2', removeHs=False)
        if mol is not None :
            rdmol = Chem.MolFromSmiles(Chem.MolToSmiles(mol))
            if rdmol is not None :
                #mol = PropertyMol(mol)
                mol.GetConformer().SetProp('PDB_ID', pdb_id)
                mols.append(mol)
    except :
        print('Impossible to read mol2 file for ' + pdb_id)