In [8]:
#import nglview as nv
from openbabel import pybel
import warnings
from pathlib import Path

from opencadd.structure.core import Structure
from opencadd.io.dataframe import DataFrame
import glob
import os
import tqdm


In [None]:

# filter warnings
warnings.filterwarnings("ignore")
ob_log_handler = pybel.ob.OBMessageHandler()
pybel.ob.obErrorLog.SetOutputLevel(0)

# filter warnings
warnings.filterwarnings("ignore")
ob_log_handler = pybel.ob.OBMessageHandler()
pybel.ob.obErrorLog.SetOutputLevel(0)

def split_sdf_file(sdf_path, folder_name):
    """
    Split an SDF file into seperate files for each molecule.
    Each file is named with consecutive numbers.

    Parameters
    ----------
    sdf_path: str or pathlib.Path
        Path to SDF file that should be split.
    """
    sdf_path = Path(sdf_path)
    stem = sdf_path.stem
    #parent = sdf_path.parent
    molecules = pybel.readfile("sdf", str(sdf_path))

    mol_title = ''
    pose_rank = 1
    if not os.path.exists(folder_name):
        os.mkdir(folder_name)
    for i, molecule in enumerate(molecules, 1):
        if molecule.title == mol_title:
            pose_rank += 1
        else:
            pose_rank = 1

        mol_title = molecule.title
        molecule.write("sdf", os.path.join(folder_name, f"{stem}_{mol_title}_{pose_rank}.sdf"), overwrite=True)
    return

# Get a list of sdf files in the current folder.
sdf_files = glob.glob('../data/*.sdf')

# For each sdf file, split it into seperate files for each molecule.
for sdf_file in sdf_files:
    # Get the file name of sdf file, without path and extension.
    file_name = os.path.splitext(os.path.basename(sdf_file))[0]
    split_sdf_file(sdf_file, file_name)

In [9]:
# Get all sdf files included in folders starting with frame*
sdf_files = glob.glob('../data/frame*/*.sdf')

# Get the list of receptor pdbqt files in the current folder.
receptor_files = glob.glob('*.pdbqt')

# For each sdf_file in sdf_files
for sdf_file in tqdm.tqdm(sdf_files):
    # Read in the sdf file using pybel
    mol = next(pybel.readfile("sdf", sdf_file))

    # Find out the receptor frame number from the sdf file name.
    # The sdf file name is of the form frame_XXX_YYY.sdf
    # where XXX is the receptor frame number.
    receptor_frame = os.path.splitext(os.path.basename(sdf_file))[0].split('_')[1]

    # Get the receptor pdbqt file corresponding to the receptor frame number.
    receptor_file = [file for file in receptor_files if receptor_frame in file][0]

    # Read in the receptor pdbqt file using pybel
    receptor = next(pybel.readfile("pdbqt", receptor_file))

    # Combine the receptor and ligand into a single structure.
    receptor.OBMol += mol.OBMol
    
    # Write out the combined structure as a pdb file in the folder of sdf_file with approriate name.
    # The pdb file name is of the form frame_XXX_YYY.pdb
    # where XXX is the receptor frame number and YYY is the ligand pose number.
    # The pdb file is written out in the same folder as the sdf file.
    # 
    receptor.write("pdb",str(Path(sdf_file).parent / f"{os.path.splitext(os.path.basename(sdf_file))[0]}.pdb"), overwrite=True)


100%|██████████| 98671/98671 [20:49:24<00:00,  1.32it/s]   
