In [None]:
from meeko import MoleculePreparation
from vina import Vina
from rdkit import Chem
from meeko import PDBQTMolecule
from meeko import PDBQTWriterLegacy

In [None]:
v = Vina(sf_name='vina', cpu=8)
v.set_receptor('Data/9f6a.pdbqt')
v.set_ligand_from_file('Data/lig-1.pdbqt')

docking_box = {"center": [136.733, 172.819, 99.189], "box_size": [11.69, 7.09, 7.60]}
v.compute_vina_maps(**docking_box)

v.dock(exhaustiveness=8, n_poses=5)

In [4]:
preparator = MoleculePreparation()

In [None]:
test_lig = Chem.SDMolSupplier('Data/ligands.sdf')[0]
test_lig

In [85]:
test_lig = Chem.AddHs(test_lig)

In [None]:
Chem.rdDistGeom.EmbedMolecule(test_lig)

In [None]:
test_lig

In [88]:
mol_setups = preparator.prepare(test_lig)

In [None]:
for setup in mol_setups:
    pdbqt_string, is_ok, error_msg = PDBQTWriterLegacy.write_string(setup)
    if is_ok:
        print(pdbqt_string, end = "")

In [91]:
ligands = Chem.SDMolSupplier('Data/ligands.sdf')

In [None]:
len(ligands)

In [None]:
for i, mol in enumerate(ligands):
    if mol is None:
        continue
    if i >= 100:
        break
    print(ligands[i].GetProp('_Name'))

In [None]:
from rdkit.Chem import SDMolSupplier
 
def process_ligands_in_batches(input_sdf_file, batch_size):
    suppl = SDMolSupplier(input_sdf_file)
 
    batch = []
    for idx, mol in enumerate(suppl):
        if mol is not None:
            batch.append(mol)
 
        # Process and flush the batch when the batch size is reached
        if len(batch) == batch_size:
            process_batch(batch, idx // batch_size)
            batch.clear()  # Clear the batch after processing
 
    # Process any remaining ligands in the final batch
    if batch:
        process_batch(batch, idx // batch_size)
 
def process_batch(batch, batch_idx):
    # Convert batch to PDBQT or perform docking in a single step
    print(f"Processing batch {batch_idx + 1} with {len(batch)} ligands.")
    # Replace with actual conversion and docking logic
 
# Example usage
input_sdf_file = 'ligands.sdf'
batch_size = 100  # Adjust batch size for performance and memory
 
process_ligands_in_batches(input_sdf_file, batch_size)

In [11]:
import os

In [12]:
if not os.path.exists('Output'):
    os.makedirs('Output')

In [8]:
from openbabel import pybel
from openbabel import openbabel

In [None]:
mol_iter = pybel.readfile(filename = 'Data/ligands_10.sdf', format = 'sdf')

In [None]:
type(mol_iter)

In [None]:
for mol in mol_iter:
    print(mol.molwt)

In [17]:
from openbabel import openbabel
from openbabel import pybel

# Input and output file paths
input_smi = "Data/ligands_10_2.smi"
output_sdf = "output_molecules4.pdbqt"

converter = openbabel.OBConversion()
converter.SetInAndOutFormats("sdf", "pdbqt")

# Desired pH
pH = 7.0

# Open the output file
with open(output_sdf, "w") as outfile:
    # Read molecules from the SDF file
    for mol in pybel.readfile("smi", input_smi):
        # Convert to OBMol for direct manipulation
        obmol = mol.OBMol

        # Adjust hydrogens at the specified pH
        obmol.AddHydrogens(False, True, pH)
    
        # Convert the adjusted molecule back to Pybel format
        adjusted_mol = pybel.Molecule(obmol)

        adjusted_mol.make3D()

        # Write output
        outfile.write(adjusted_mol.write("pdbqt"))


In [None]:
import ringtail as rtc
from joblib import Parallel, delayed
from rdkit import Chem
from meeko import MoleculePreparation, PDBQTWriterLegacy
from vina import Vina
from openbabel import openbabel, pybel
from re import split, sub, MULTILINE

In [4]:
# Ligand library file in .sdf format.
sdf_file = "Data/ligands_10.sdf"

# Ligand library file in .smi format.
smi_file = "Data/ligands_10.smi"
smi_file_no_header = "Data/ligands_10_2.smi"

suppl = Chem.SDMolSupplier(sdf_file) # SDMolSupplier is an iterator. There's also an experimental 'MultithreadedSDMolSupplier' that may be faster.
suppl2 = Chem.SmilesMolSupplier(smi_file, delimiter="\t") # Iterator for the .smi ligand file.
# NOTE: Suspicion is that the pybel.readfile() iterator causes the pickling issue with joblib.
suppl4 = pybel.readfile("smi", smi_file_no_header) # Test pybel iterator for the ligand batching.

In [None]:
# Text mol name extraction from rdkit mol object 

suppl2 = Chem.SmilesMolSupplier(smi_file, delimiter="\t") # Iterator for the .smi ligand file.
preparator = MoleculePreparation()

for mol in suppl2:

    mol_name = mol.GetProp('_Name')

    mol = Chem.AddHs(mol)
    Chem.rdDistGeom.EmbedMolecule(mol)
      
    mol_setups = preparator.prepare(mol)
    for setup in mol_setups:
        pdbqt_string, is_ok, error_msg = PDBQTWriterLegacy.write_string(setup)
        if is_ok:
            modified_pdbqt = f"REMARK Name = {mol_name.strip()}\n{pdbqt_string}"
            print(modified_pdbqt)

In [None]:
with open(smi_file_no_header, "r") as f:
        for line in f:
            parts = line.strip().split("\t")
            if len(parts) >= 2:  # Ensure the line has both SMILES and name.
                smiles, mol_name = line, parts[1]
                print((smiles, mol_name))  # Append a tuple of SMILES and name.

In [None]:
# OK
preparator = MoleculePreparation()

for mol in suppl2:

    mol = Chem.AddHs(mol)
    Chem.rdDistGeom.EmbedMolecule(mol)
        
    mol_setups = preparator.prepare(mol)
    for setup in mol_setups:
        pdbqt_string, is_ok, error_msg = PDBQTWriterLegacy.write_string(setup)
        if is_ok:
            print(pdbqt_string)

In [None]:
# OK
preparator = MoleculePreparation()

for mol in suppl2:

    mol = Chem.AddHs(mol)
    Chem.rdDistGeom.EmbedMolecule(mol)
        
    mol_setups = preparator.prepare(mol)
    for setup in mol_setups:
        pdbqt_string, is_ok, error_msg = PDBQTWriterLegacy.write_string(setup)
        if is_ok:
            print(pdbqt_string)

In [None]:
preparator = MoleculePreparation()

for mol in pybel.readfile("smi", smi_file_no_header):

    obmol = mol.OBMol

    # Add hydrogens and consider protonation states.
    # AddHydrogen() params: (bool polaronly=false, bool correctForPH=false, double pH=7.4)
    obmol.AddHydrogens(False, False, 7.4)

    # Convert the Open Babel molecule back to pybel molecule.
    mol = pybel.Molecule(obmol)

    mol.make3D()
    #mol.localopt() # Coordinate improvement.

    pybel_string = mol.write(format = "pdbqt")

    print(pybel_string)

In [None]:
from re import split, sub, MULTILINE

suppl4 = pybel.readfile("smi", smi_file_no_header)

for mol in suppl4:
    smiles_string = split("\t", mol.write(format = "smi"))[0]
    mol_name = split("\t", mol.write(format = "smi"))[1]
    obmol = mol.OBMol
    
    # Add hydrogens and consider protonation states
    obmol.AddHydrogens(False, False, 7.4)
    
    # Convert the Open Babel molecule back to pybel molecule
    mol = pybel.Molecule(obmol)
    mol.make3D()
    
    # Write PDBQT and insert custom REMARK fields
    pybel_string = mol.write(format = "pdbqt")
    # modified_pdbqt = f"REMARK  Name = {mol_name.strip()}\nREMARK  SMILES = {smiles_string.strip()}\n{pybel_string}"
    modified_pdbqt = f"REMARK  SMILES = {smiles_string.strip()}\n{pybel_string}"
    modified_pdbqt = sub(r'^REMARK\s+Name\s*=.*$', f'REMARK Name = {mol_name.strip()}', modified_pdbqt, flags=MULTILINE) # Replaces the Name remark with the molecule name.
    print(modified_pdbqt)

In [127]:
def molecule_prep2(idx, mol):
    try:
        # Convert the pybel molecule to Open Babel molecule.
        obmol = smiles_to_obmol(mol)

        # Add hydrogens and consider protonation states.
        obmol.AddHydrogens(False, True, 7.4)

        # Convert the Open Babel molecule back to pybel molecule.
        mol = pybel.Molecule(obmol)

        mol.make3D()

        return idx, mol.write(format = "pdbqt")
    except Exception as e:
        return idx, None, str(e)

In [124]:
from openbabel import pybel
from openbabel import openbabel

def smiles_to_obmol(smiles_string):
    # Create an OpenBabel conversion object
    obConversion = openbabel.OBConversion()
    obConversion.SetInAndOutFormats("smi", "mol")
    
    # Create an OBMol object
    obmol = openbabel.OBMol()
    
    # Convert SMILES to OBMol
    obConversion.ReadString(obmol, smiles_string)
    
    return obmol

In [None]:
batch = []
suppl4 = pybel.readfile("smi", smi_file_no_header)
for idx, mol in enumerate(suppl4):
    if mol is not None:


        batch.append(mol.write(format = "smi"))

    if len(batch) == 10:

        converted_batch = Parallel(n_jobs=2)(
            delayed(molecule_prep2)(idx, mol) # Delayed call for molecule_prep2() once below evaluation to done.
            for idx, mol in enumerate(batch)
            if mol is not None
        )

In [None]:
# Extracting the SMILES string from the rdkit mol object
smiles_string = Chem.MolToSmiles(mol)

In [None]:
pybel.informats

In [3]:
from scrubber import Scrub
from rdkit import Chem

In [None]:
scrub = Scrub(ph_low = 2, ph_high = 11)

smiles_list = [
    "CCCCCSCC[N+](C)(C)C.[Br-]",
]

# mol = Chem.MolFromSmiles("Clc1c(OCCC3)c3ccc1C(=O)Nc2nc[nH]c2")

mol_states = [scrub(Chem.MolFromSmiles("COC(CN)CN1CCCC1"))]

for smiles in smiles_list:
    for mol_state in scrub(Chem.MolFromSmiles(smiles)):
        print("SMILES: ", smiles, Chem.MolToSmiles(mol_state), "Conformers: %d" % mol_state.GetNumConformers())

In [56]:
mol = Chem.MolFromSmiles("C=CCN1CC23CCC2(COC3)C1")
mol_name = "mol1"
if mol is None:
    raise ValueError(f"Invalid SMILES string: {smiles}")

# Assign the molecule name to the RDKit object.
mol.SetProp('_Name', mol_name)

# Prepare the ligand with Meeko.
preparator = MoleculePreparation()

variants = []

# Scrubber handles protonation states, 3D coordinates, and tautomers.
for mol_index, mol_state in enumerate(scrub(mol)):
    variant_mol_name = f"{mol_name}-{mol_index}"
    
    mol_setups = preparator.prepare(mol_state)

    for setup in mol_setups:
        pdbqt_string, is_ok, error_msg = PDBQTWriterLegacy.write_string(setup)
        if is_ok:
            modified_pdbqt = f"REMARK Name = {variant_mol_name.strip()}\n{pdbqt_string}"
            variants.append((variant_mol_name, modified_pdbqt))
print(variants)

RuntimeError: {'INITIAL_COORDS': 42, 'FIRST_MINIMIZATION': 2, 'CHECK_TETRAHEDRAL_CENTERS': 256}

In [55]:
from rdkit.Chem import AllChem as Chem


smiles = "C=CCN1CC23CCC2(COC3)C1"

mol = Chem.MolFromSmiles(smiles)

fragments = Chem.GetMolFrags(mol, asMols=True)

print(len(fragments))

# print(Chem.MolToSmiles(fragments[0]))
# main_fragment = max(fragments, key=lambda m: m.GetNumAtoms())
# print(Chem.MolToSmiles(main_fragment))

1
