In [13]:
import glob
import pandas as pd
import rdkit
from rdkit import Chem
from rdkit.Chem.rdchem import BondType as BT
from rdkit.Chem import AllChem, GetPeriodicTable, RemoveHs
from rdkit.Chem import Descriptors
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import scipy
from scipy import stats

In [14]:
#Copied from https://github.com/gcorso/DiffDock/blob/main/datasets/process_mols.py
def read_molecule(molecule_file, sanitize=False, calc_charges=False, remove_hs=False):
    if molecule_file.endswith('.mol2'):
        mol = Chem.MolFromMol2File(molecule_file, sanitize=False, removeHs=False)
    elif molecule_file.endswith('.sdf'):
        supplier = Chem.SDMolSupplier(molecule_file, sanitize=False, removeHs=False)
        mol = supplier[0]
    elif molecule_file.endswith('.pdbqt'):
        with open(molecule_file) as file:
            pdbqt_data = file.readlines()
        pdb_block = ''
        for line in pdbqt_data:
            pdb_block += '{}\n'.format(line[:66])
        mol = Chem.MolFromPDBBlock(pdb_block, sanitize=False, removeHs=False)
    elif molecule_file.endswith('.pdb'):
        mol = Chem.MolFromPDBFile(molecule_file, sanitize=False, removeHs=False)
    else:
        raise ValueError('Expect the format of the molecule_file to be '
                         'one of .mol2, .sdf, .pdbqt and .pdb, got {}'.format(molecule_file))

    try:
        if sanitize or calc_charges:
            Chem.SanitizeMol(mol)

        if calc_charges:
            # Compute Gasteiger charges on the molecule.
            try:
                AllChem.ComputeGasteigerCharges(mol)
            except:
                warnings.warn('Unable to compute charges for the molecule.')

        if remove_hs:
            mol = Chem.RemoveHs(mol, sanitize=sanitize)
    except Exception as e:
        print(e)
        print("RDKit was unable to read the molecule.")
        return None

    return mol

In [22]:
dd_out_dir = "/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset"
mol2_write_dir = "/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/mol_files_231209"

#Obtain output poses as mol2s
for dd_out in glob.glob(f"{dd_out_dir}/index*"):
    
    #Find sdf
    print(dd_out)
    sdf_highest_conf = f"{dd_out}/rank1.sdf"
    pdb_name = dd_out.split("/")[-1].split("_")[2][-4:]
    print(pdb_name)
    
    #Load sdf
    sdf_load = read_molecule(sdf_highest_conf, remove_hs=False, sanitize=True)
    
    #mol2 save
    #ref https://www.rdkit.org/docs/GettingStartedInPython.html
    #ref https://www.rdkit.org/docs/cppapi/classRDKit_1_1PDBWriter.html
    #no mol2 option ref https://github.com/rdkit/rdkit/discussions/3647
    #https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html
    #print(Chem.MolToMolBlock(sdf_load)) 
    #print(Chem.MolToMolBlock(sdf_load),file=open(f'{mol2_write_dir}/{pdb_name}.mol2','w+'))
    write_pdb = rdkit.Chem.rdmolfiles.PDBWriter(f'{mol2_write_dir}/{pdb_name}_ligand_dd.pdb')
    write_pdb.write(sdf_load)
    write_pdb.close()

/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index312_data-PDBBind_processed-6mji-6mji_protein_processed.pdb____data-PDBBind_processed-6mji-6mji_ligand.sdf
6mji
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index15_data-PDBBind_processed-6g24-6g24_protein_processed.pdb____data-PDBBind_processed-6g24-6g24_ligand.sdf
6g24
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index149_data-PDBBind_processed-6nri-6nri_protein_processed.pdb____data-PDBBind_processed-6nri-6nri_ligand.sdf
6nri
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index262_data-PDBBind_processed-6hzc-6hzc_protein_processed.pdb____data-PDBBind_processed-6hzc-6hzc_ligand.sdf
6hzc
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index336_data-PDBBind_processed-6i67-6i67_protein_processed.pdb____data-PDBBind_processed-6i67-6i67_ligand.

/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index104_data-PDBBind_processed-6cyh-6cyh_protein_processed.pdb____data-PDBBind_processed-6cyh-6cyh_ligand.sdf
6cyh
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index315_data-PDBBind_processed-6op9-6op9_protein_processed.pdb____data-PDBBind_processed-6op9-6op9_ligand.sdf
6op9
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index274_data-PDBBind_processed-6j9y-6j9y_protein_processed.pdb____data-PDBBind_processed-6j9y-6j9y_ligand.sdf
6j9y
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index82_data-PDBBind_processed-6i41-6i41_protein_processed.pdb____data-PDBBind_processed-6i41-6i41_ligand.sdf
6i41
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index46_data-PDBBind_processed-6n9l-6n9l_protein_processed.pdb____data-PDBBind_processed-6n9l-6n9l_ligand.s

/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index94_data-PDBBind_processed-6rnu-6rnu_protein_processed.pdb____data-PDBBind_processed-6rnu-6rnu_ligand.sdf
6rnu
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index148_data-PDBBind_processed-6pnn-6pnn_protein_processed.pdb____data-PDBBind_processed-6pnn-6pnn_ligand.sdf
6pnn
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index195_data-PDBBind_processed-6rpg-6rpg_protein_processed.pdb____data-PDBBind_processed-6rpg-6rpg_ligand.sdf
6rpg
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index121_data-PDBBind_processed-6i68-6i68_protein_processed.pdb____data-PDBBind_processed-6i68-6i68_ligand.sdf
6i68
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index113_data-PDBBind_processed-6s07-6s07_protein_processed.pdb____data-PDBBind_processed-6s07-6s07_ligand.

/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index96_data-PDBBind_processed-6izq-6izq_protein_processed.pdb____data-PDBBind_processed-6izq-6izq_ligand.sdf
6izq
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index77_data-PDBBind_processed-6kqi-6kqi_protein_processed.pdb____data-PDBBind_processed-6kqi-6kqi_ligand.sdf
6kqi
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index243_data-PDBBind_processed-6i8m-6i8m_protein_processed.pdb____data-PDBBind_processed-6i8m-6i8m_ligand.sdf
6i8m
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index258_data-PDBBind_processed-6e3o-6e3o_protein_processed.pdb____data-PDBBind_processed-6e3o-6e3o_ligand.sdf
6e3o
/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index128_data-PDBBind_processed-6gbw-6gbw_protein_processed.pdb____data-PDBBind_processed-6gbw-6gbw_ligand.s

5