In [41]:
import os
import pickle
import copy
import logging

from tqdm import tqdm
from rdkit.Geometry.rdGeometry import Point3D
from rdkit import Chem
from ccdc.conformer import ConformerGenerator, MoleculeMinimiser
from ccdc.molecule import Molecule

In [2]:
data_dir_path = 'data/'

In [3]:
with open(os.path.join(data_dir_path, 'raw', 'pdbbind_general_conf_ensemble_library.p'), 'rb') as f :
    general_CEL = pickle.load(f)

In [4]:
with open(os.path.join(data_dir_path, 'raw', 'pdbbind_refined_conf_ensemble_library.p'), 'rb') as f :
    refined_CEL = pickle.load(f)

In [5]:
all_CEL = general_CEL
all_CEL.merge(refined_CEL)

In [24]:
ccdc_conformer_generator = ConformerGenerator(nthreads=12)
ccdc_conformer_generator.settings.max_conformers = 50

INFO: /home/benoit/CCDC/CSD_2021/bin/mogul
INFO: /home/benoit/CCDC/CSD_2021/bin/mogul
INFO: /home/benoit/CCDC/CSD_2021/bin/mogul
INFO: /home/benoit/CCDC/CSD_2021/bin/mogul
INFO: /home/benoit/CCDC/CSD_2021/bin/mogul
INFO: /home/benoit/CCDC/CSD_2021/bin/mogul
INFO: /home/benoit/CCDC/CSD_2021/bin/mogul
INFO: /home/benoit/CCDC/CSD_2021/bin/mogul
INFO: /home/benoit/CCDC/CSD_2021/bin/mogul
INFO: /home/benoit/CCDC/CSD_2021/bin/mogul
INFO: /home/benoit/CCDC/CSD_2021/bin/mogul
INFO: /home/benoit/CCDC/CSD_2021/bin/mogul


In [25]:
def ccdc_conformers_to_rdkit_mol(ccdc_conformers, rdkit_mol) :
        
    generated_conf_ids = []

    for conformer in ccdc_conformers :
        new_rdkit_conf = copy.deepcopy(rdkit_mol).GetConformer()
        for i, atom in enumerate(conformer.molecule.atoms) :
            point3d = Point3D(*atom.coordinates)
            new_rdkit_conf.SetAtomPosition(i, point3d)
        conf_id = rdkit_mol.AddConformer(new_rdkit_conf, assignId=True)
        generated_conf_ids.append(conf_id)

    return generated_conf_ids

In [49]:
initial_ccdc_mols = []
conformer_generator = ConformerGenerator()
conformer_generator.settings.max_conformers = 1

for smiles, conf_ensemble in tqdm(all_CEL.get_unique_molecules()) :
    try :
        smiles = Chem.MolToSmiles(Chem.MolFromSmiles(smiles))
        if conf_ensemble.mol.GetNumHeavyAtoms() < 50 :
            ccdc_mol = Molecule.from_string(smiles, format='smiles')
            conformers = conformer_generator.generate(ccdc_mol)
            initial_ccdc_mols.append(conformers)
    except Exception as e :
        print(e)

  0%|                                                 | 0/12348 [00:00<?, ?it/s]

INFO: /home/benoit/CCDC/CSD_2021/bin/mogul


  0%|                                      | 17/12348 [00:11<2:53:34,  1.18it/s]

INFO: /home/benoit/CCDC/CSD_2021/bin/mogul


  0%|                                      | 23/12348 [00:18<2:47:43,  1.22it/s]


KeyboardInterrupt: 

In [35]:
ccdc_conformers = ccdc_conformer_generator.generate(initial_ccdc_mols[:5])

In [46]:
mol

In [None]:
molecule_minimiser = conformer.MoleculeMinimiser()
minimised_mol = molecule_minimiser.minimise(mol)

In [25]:
logging.basicConfig(filename='example.log', level=logging.DEBUG)
generated_mols = []
for smiles, conf_ensemble in tqdm(all_CEL.get_unique_molecules()) :
    logging.info('Processing ' + smiles)
    try :
        smiles = Chem.MolToSmiles(Chem.MolFromSmiles(smiles))
        if conf_ensemble.mol.GetNumHeavyAtoms() < 50 :
            ccdc_mol = Molecule.from_string(smiles, format='smiles')
            if conf_ensemble.mol.GetNumAtoms() == len(ccdc_mol.atoms) :
                ccdc_conformers = ccdc_conformer_generator.generate(ccdc_mol)
                logging.info('Conformations generated')
                generated_conf_ids = ccdc_conformers_to_rdkit_mol(ccdc_conformers, conf_ensemble.mol)
                generated_mols.append(conf_ensemble.mol)
                
    except Exception as e :
        print(e)
        logging.warning('Exception occured: ' + str(e))

  0%|                                      | 16/12348 [00:10<1:13:56,  2.78it/s]

INFO: /home/benoit/CCDC/CSD_2021/bin/mogul


  0%|                                      | 26/12348 [00:20<2:37:14,  1.31it/s]

INFO: /home/benoit/CCDC/CSD_2021/bin/mogul


  1%|▏                                     | 66/12348 [01:06<3:12:31,  1.06it/s]

INFO: /home/benoit/CCDC/CSD_2021/bin/mogul
INFO: /home/benoit/CCDC/CSD_2021/bin/mogul


  1%|▌                                    | 182/12348 [03:00<3:21:18,  1.01it/s]


KeyboardInterrupt: 

In [None]:
generated_CEL = ConformationEnsembleLibrary(generated_mols)

In [None]:
with open(os.path.join(data_dir_path, 'raw', 'pdbbind_conf_ensemble_library_ccdc_generated.p'), 'wb') as f :
    pickle.dump(generated_CEL, f)
    
# here we only have the molecules parsed identically by RDKit (from mol2) and CSD (from smiles)