In [14]:
#try reading with true and false sanitize, does it change rg?
import rdkit
from rdkit import Chem
from rdkit.Chem.rdchem import BondType as BT
from rdkit.Chem import AllChem, GetPeriodicTable, RemoveHs
from rdkit.Chem import Descriptors

In [15]:
#Copied from https://github.com/gcorso/DiffDock/blob/main/datasets/process_mols.py
def read_molecule(molecule_file, sanitize=False, calc_charges=False, remove_hs=False):
    if molecule_file.endswith('.mol2'):
        mol = Chem.MolFromMol2File(molecule_file, sanitize=False, removeHs=False)
    elif molecule_file.endswith('.sdf'):
        supplier = Chem.SDMolSupplier(molecule_file, sanitize=False, removeHs=False)
        mol = supplier[0]
    elif molecule_file.endswith('.pdbqt'):
        with open(molecule_file) as file:
            pdbqt_data = file.readlines()
        pdb_block = ''
        for line in pdbqt_data:
            pdb_block += '{}\n'.format(line[:66])
        mol = Chem.MolFromPDBBlock(pdb_block, sanitize=False, removeHs=False)
    elif molecule_file.endswith('.pdb'):
        mol = Chem.MolFromPDBFile(molecule_file, sanitize=False, removeHs=False)
    else:
        raise ValueError('Expect the format of the molecule_file to be '
                         'one of .mol2, .sdf, .pdbqt and .pdb, got {}'.format(molecule_file))

    try:
        if sanitize or calc_charges:
            Chem.SanitizeMol(mol)

        if calc_charges:
            # Compute Gasteiger charges on the molecule.
            try:
                AllChem.ComputeGasteigerCharges(mol)
            except:
                warnings.warn('Unable to compute charges for the molecule.')

        if remove_hs:
            mol = Chem.RemoveHs(mol, sanitize=sanitize)
    except Exception as e:
        print(e)
        print("RDKit was unable to read the molecule.")
        return None

    return mol

In [17]:
for mtry in ["/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index261_data-PDBBind_processed-6qi7-6qi7_protein_processed.pdb____data-PDBBind_processed-6qi7-6qi7_ligand.sdf/rank1.sdf",
             "/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index249_data-PDBBind_processed-6a73-6a73_protein_processed.pdb____data-PDBBind_processed-6a73-6a73_ligand.sdf/rank1.sdf",
             "/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index241_data-PDBBind_processed-6n4b-6n4b_protein_processed.pdb____data-PDBBind_processed-6n4b-6n4b_ligand.sdf/rank1.sdf",
             "/Users/dsharon/Documents/MIT/6.8701/Project/Data/From_Hannes/user_predictions_testset/index232_data-PDBBind_processed-6oxr-6oxr_protein_processed.pdb____data-PDBBind_processed-6oxr-6oxr_ligand.sdf/rank1.sdf",
             "/Users/dsharon/Documents/MIT/6.8701/Project/Code/HarmonicFlow/FlowSite/data/PDBBind_processed/6kzc/6kzc_ligand.mol2",
             "/Users/dsharon/Documents/MIT/6.8701/Project/Code/HarmonicFlow/FlowSite/data/PDBBind_processed/6kzc/6kzc_ligand.sdf",
             "/Users/dsharon/Documents/MIT/6.8701/Project/Code/HarmonicFlow/FlowSite/data/PDBBind_processed/5ayt/5ayt_ligand.mol2",
             "/Users/dsharon/Documents/MIT/6.8701/Project/Code/HarmonicFlow/FlowSite/data/PDBBind_processed/5ayt/5ayt_ligand.sdf"]:
    
    #true and false sanitize
    s_true = read_molecule(mtry, remove_hs=False, sanitize=True)
    s_false = read_molecule(mtry, remove_hs=False, sanitize=False)
    
    #Process molecule both ways
    #Ref https://www.rdkit.org/docs/GettingStartedInPython.html#list-of-available-descriptors
    #Ref https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors.html#module-rdkit.Chem.Descriptors
    #Ref https://www.rdkit.org/docs/GettingStartedInPython.html#list-of-available-descriptors
    #Ref https://www.rdkit.org/docs/GettingStartedInPython.html
    #Ref https://www.rdkit.org/docs/source/rdkit.Chem.rdMolDescriptors.html
    #Ref https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors3D.html
    rg_s_true = rdkit.Chem.Descriptors3D.RadiusOfGyration(s_true)
    rg_s_false = rdkit.Chem.Descriptors3D.RadiusOfGyration(s_false)
    as_s_true = rdkit.Chem.Descriptors3D.Asphericity(s_true)
    as_s_false = rdkit.Chem.Descriptors3D.Asphericity(s_false)
    r_s_true = Descriptors.NumAliphaticRings(s_true)
    r_s_false = Descriptors.NumAliphaticRings(s_false)
    
    print(f"rg true {rg_s_true} false {rg_s_false} diff {rg_s_true - rg_s_false}")
    print(f"rg true {as_s_true} false {as_s_false} diff {as_s_true - as_s_false}")
    print(f"rings true {r_s_true} false {r_s_false} diff {r_s_true - r_s_false}")

rg true 6.133548920773422 false 6.133548920773422 diff 0.0
rg true 0.924842342900612 false 0.924842342900612 diff 0.0
rings true 0 false 0 diff 0
rg true 3.7566257772545804 false 3.7566257772545804 diff 0.0
rg true 0.033251438443999484 false 0.033251438443999484 diff 0.0
rings true 1 false 0 diff 1
rg true 4.3735794370618155 false 4.3735794370618155 diff 0.0
rg true 0.33783610281277276 false 0.33783610281277276 diff 0.0
rings true 0 false 0 diff 0
rg true 5.332831986390477 false 5.332831986390477 diff 0.0
rg true 0.6130870205969303 false 0.6130870205969303 diff 0.0
rings true 2 false 0 diff 2
rg true 6.170484502853873 false 6.170484502853873 diff 0.0
rg true 0.6311586573389485 false 0.6311586573389485 diff 0.0
rings true 1 false 3 diff -2
rg true 6.1702327689411955 false 6.1702327689411955 diff 0.0
rg true 0.6312639502597474 false 0.6312639502597474 diff 0.0
rings true 1 false 0 diff 1
rg true 3.7194849265621874 false 3.7194849265621874 diff 0.0
rg true 0.5188469944990629 false 0.51884