In [1]:
import pandas
from openeye import oechem, oespruce

# Loading protein, splitting, and aligning to reference

In [2]:
pdb_fn = "input_pdbs/5WKK.pdb"
## Load molecule to extract
ifs = oechem.oemolistream()
ifs.SetFlavor(
    oechem.OEFormat_PDB,
    oechem.OEIFlavor_PDB_Default
    | oechem.OEIFlavor_PDB_DATA
    | oechem.OEIFlavor_PDB_ALTLOC,
)
ifs.open(pdb_fn)
complex_mol = oechem.OEGraphMol()
oechem.OEReadMolecule(ifs, complex_mol)
ifs.close()

## Keep only highest occupancy alt location
alf = oechem.OEAltLocationFactory(complex_mol)
if alf.GetGroupCount() != 0:
    print(complex_mol.NumAtoms())
    alf.MakePrimaryAltMol(complex_mol)
    print(complex_mol.NumAtoms())

2634
2603


In [3]:
## Test splitting
lig_mol = oechem.OEGraphMol()
prot_mol = oechem.OEGraphMol()
water_mol = oechem.OEGraphMol()
oth_mol = oechem.OEGraphMol()

## Make splitting split out covalent ligands
## TODO: look into different covalent-related options here
opts = oechem.OESplitMolComplexOptions()
opts.SetSplitCovalent(True)
opts.SetSplitCovalentCofactors(True)
print(
    oechem.OESplitMolComplex(lig_mol, prot_mol, water_mol, oth_mol, complex_mol)
)

print(
    complex_mol.NumAtoms(),
    lig_mol.NumAtoms(),
    prot_mol.NumAtoms(),
    water_mol.NumAtoms(),
    oth_mol.NumAtoms(),
)

True
2603 62 2256 24 261


In [4]:
## Load ref PDB molecule
ref_fn = "input_pdbs/reference.pdb"
ref_complex_mol = oechem.OEGraphMol()
ifs.open(ref_fn)
oechem.OEReadMolecule(ifs, ref_complex_mol)
ifs.close()
print(ref_complex_mol.NumAtoms())

## Perform splitting for ref
ref_lig_mol = oechem.OEGraphMol()
ref_prot_mol = oechem.OEGraphMol()
ref_water_mol = oechem.OEGraphMol()
ref_oth_mol = oechem.OEGraphMol()
print(
    oechem.OESplitMolComplex(
        ref_lig_mol, ref_prot_mol, ref_water_mol, ref_oth_mol, ref_complex_mol
    )
)
print(
    ref_complex_mol.NumAtoms(),
    ref_lig_mol.NumAtoms(),
    ref_prot_mol.NumAtoms(),
    ref_water_mol.NumAtoms(),
    ref_oth_mol.NumAtoms(),
)

2832
True
2832 34 2316 18 464


In [5]:
## Align new protein to ref
aln_res = oespruce.OESuperposeResults()
superpos = oespruce.OESuperpose()
superpos.SetupRef(ref_prot_mol)
superpos.Superpose(aln_res, prot_mol)
print(f"RMSD: {aln_res.GetRMSD()}")

prot_mol_aligned = prot_mol.CreateCopy()
print(aln_res.Transform(prot_mol_aligned))

## Save new aligned molecule for inspection
out_fn = "test/5WKK_split_aligned.pdb"
ofs = oechem.oemolostream()
ofs.SetFlavor(oechem.OEFormat_PDB, oechem.OEOFlavor_PDB_Default)
ofs.open(out_fn)
oechem.OEWriteMolecule(ofs, prot_mol_aligned)
ofs.close()

RMSD: 1.8288194400350355
True


# Load in new ligand and create DesignUnit

In [6]:
## Load ligand molecule
sdf_fn = f"fragalysis/aligned/Mpro-P2007_0A/Mpro-P2007_0A.sdf"
ifs = oechem.oemolistream()
ifs.SetFlavor(
    oechem.OEFormat_SDF,
    oechem.OEIFlavor_SDF_Default,
)
ifs.open(sdf_fn)
coords_mol = oechem.OEGraphMol()
oechem.OEReadMolecule(ifs, coords_mol)
ifs.close()
print(coords_mol.NumAtoms())

## Load CDD compound tracker
compound_df = pandas.read_csv(
    "fragalysis/extra_files/Mpro_compound_tracker_csv.csv"
)

## Get SMILES from compound tracker
idx = compound_df["Dataset"] == "Mpro-P2007"
smiles = compound_df.loc[idx, "SMILES"].values[0]
new_lig_mol = oechem.OEGraphMol()
oechem.OESmilesToMol(new_lig_mol, smiles)
print(new_lig_mol.NumAtoms())

20
20


In [7]:
## Make copy of new_lig_mol so we don't mess up the atom ordering in the
##  actual molecule
temp_mol = new_lig_mol.CreateCopy()

## Reorder the atoms and bonds so the iterate together
oechem.OECanonicalOrderAtoms(coords_mol)
oechem.OECanonicalOrderBonds(coords_mol)
oechem.OECanonicalOrderAtoms(temp_mol)
oechem.OECanonicalOrderBonds(temp_mol)

## Copy coordinates from the SDF-loaded mol into the SMILES-generated one
## Build new coordinate dict with the proper numbering for the
##  smiles-based molecule
new_lig_mol_coords = {}
coords_mol_coords = coords_mol.GetCoords()
for a1, a2 in zip(temp_mol.GetAtoms(), coords_mol.GetAtoms()):
    print(a1, a2)
    idx1 = a1.GetIdx()
    idx2 = a2.GetIdx()
    new_lig_mol_coords[idx1] = coords_mol_coords[idx2]
new_lig_mol.SetCoords(new_lig_mol_coords)

15 C 18 C
16 C 17 C
 3 C  5 C
14 C 19 C
 4 C  4 C
 2 C  6 C
17 C 16 C
19 C  7 C
11 C 15 C
 6 C  2 C
 5 C  3 C
 1 C  8 C
13 C 13 C
10 C 11 C
 7 C  1 C
12 N 14 N
 9 N 10 N
18 N 12 N
 8 O  0 O
 0Cl  9Cl


True

# Brief aside to see if transferring coordinates works
(run to here)

In [8]:
from openeye import oedepict

oedepict.OEPrepareDepiction(new_lig_mol_test)
oedepict.OEPrepareDepiction(smiles_mol_test)
width, height = 600, 600

opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
opts.SetAtomPropertyFunctor(oedepict.OEDisplayAtomIdx())
opts.SetAtomPropLabelFont(oedepict.OEFont(oechem.OEDarkGreen))

disp = oedepict.OE2DMolDisplay(new_lig_mol_test, opts)
oedepict.OERenderMolecule("test1.png", disp)

disp = oedepict.OE2DMolDisplay(smiles_mol_test, opts)
oedepict.OERenderMolecule("test2.png", disp)

NameError: name 'new_lig_mol_test' is not defined

In [None]:
## Build new coordinate dict with the proper numbering for the
##  smiles-based molecule
smiles_mol_coords = {}
new_lig_mol_coords = new_lig_mol_test.GetCoords()
for a1, a2 in zip(new_lig_mol_test.GetAtoms(), smiles_mol_test.GetAtoms()):
    idx1 = a1.GetIdx()
    idx2 = a2.GetIdx()
    smiles_mol_coords[idx2] = new_lig_mol_coords[idx1]
smiles_mol_test.SetCoords(smiles_mol_coords)

## Add Hs to both to be able to see double bonds in PDB
print(oechem.OEAddExplicitHydrogens(new_lig_mol_test))
print(oechem.OEAddExplicitHydrogens(smiles_mol_test))
## Update H coordinates to be 3D (not sure why this is necessary,
##  I think only for smiles_mol_test)
print(oechem.OESet3DHydrogenGeom(new_lig_mol_test))
print(oechem.OESet3DHydrogenGeom(smiles_mol_test))

## Save both as PDB to load and see if it worked
ofs = oechem.oemolostream()
ofs.SetFlavor(oechem.OEFormat_PDB, oechem.OEOFlavor_PDB_Default)

ofs.open("test/original_ligand.pdb")
oechem.OEWriteMolecule(ofs, new_lig_mol_test)
ofs.close()

ofs.open("test/coord_copied_ligand.pdb")
oechem.OEWriteMolecule(ofs, smiles_mol_test)
ofs.close()
## It works!

# End aside
(run from here)

In [8]:
## Add explicit Hs to protein and new ligand to prep
prot_mol_prep = prot_mol_aligned.CreateCopy()
new_lig_prep = new_lig_mol.CreateCopy()
print(oechem.OEAddExplicitHydrogens(prot_mol_prep))
print(oechem.OEAddExplicitHydrogens(new_lig_prep))
## Update H coordinates to be 3D (not sure why this is necessary)
print(oechem.OESet3DHydrogenGeom(new_lig_prep))

## Re-reorder the atoms and bonds
# oechem.OECanonicalOrderAtoms(new_lig_prep)
# oechem.OECanonicalOrderBonds(new_lig_prep)

True
True
True


In [9]:
from openeye import oedepict


def PrepareDepiction(mol, clearcoords=False, suppressH=True):
    oechem.OESetDimensionFromCoords(mol)
    oechem.OEPerceiveChiral(mol)

    if mol.GetDimension() != 2 or clearcoords:
        if mol.GetDimension() == 3:
            oechem.OE3DToBondStereo(mol)
            oechem.OE3DToAtomStereo(mol)
        if suppressH:
            oechem.OESuppressHydrogens(mol)
        # oedepict.OEAddDepictionHydrogens(mol)

        oedepict.OEDepictCoordinates(mol)
        oechem.OEMDLPerceiveBondStereo(mol)

    mol.SetDimension(2)
    return True


depict_mol = new_lig_prep.CreateCopy()
PrepareDepiction(depict_mol, False, False)
width, height = 600, 600

opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
opts.SetAtomPropertyFunctor(oedepict.OEDisplayAtomIdx())
opts.SetAtomPropLabelFont(oedepict.OEFont(oechem.OEDarkGreen))

disp = oedepict.OE2DMolDisplay(depict_mol, opts)
oedepict.OERenderMolecule("new_lig.png", disp)

for a in new_lig_prep.GetAtoms():
    print(a)

 0Cl
 1 C
 2 C
 3 C
 4 C
 5 C
 6 C
 7 C
 8 O
 9 N
10 C
11 C
12 N
13 C
14 C
15 C
16 C
17 C
18 N
19 C
20 H
21 H
22 H
23 H
24 H
25 H
26 H
27 H
28 H
29 H
30 H
31 H


In [10]:
## Make the design unit and save
du = oechem.OEDesignUnit()
print(oespruce.OEMakeDesignUnit(du, prot_mol_prep, new_lig_prep))
print(du.HasProtein(), du.HasLigand())
du_out_fn = "test/5WKK_new_lig.oedu"
if du.HasProtein() and du.HasLigand():
    oechem.OEWriteDesignUnit(du_out_fn, du)

True
True True




In [11]:
## Save protein + new ligand as PDB
new_complex_mol = oechem.OEGraphMol()
print(du.GetProtein(new_complex_mol))
print(new_complex_mol.NumAtoms())
print(du.GetLigand(new_complex_mol))
print(new_complex_mol.NumAtoms())
print(
    du.GetComponents(
        new_complex_mol,
        oechem.OEDesignUnitComponents_Protein
        | oechem.OEDesignUnitComponents_Ligand,
    )
)
print(new_complex_mol.NumAtoms())
ofs.open("test/5WKK_new_lig.pdb")
oechem.OEWriteMolecule(ofs, new_complex_mol)
ofs.close()

## Also save just lig to check CONECT records
ofs.open("test/5WKK_new_lig_only.pdb")
oechem.OEWriteMolecule(ofs, new_lig_prep)
ofs.close()

True
4583
True
32
True
4615


# Re-docking of new ligand

In [12]:
from kinoml.docking.OEDocking import pose_molecules

# score_pose = True
docked_molecules = pose_molecules(du, [new_lig_prep.CreateCopy()])
print(docked_molecules)

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


None


In [13]:
mol = new_lig_prep.CreateCopy()
# print(oechem.OEClearChiralPerception(mol))
# print(oechem.OEPerceiveChiral(mol, True))
print(oechem.OE3DToAtomStereo(mol))

False


In [14]:
print(mol.HasPerceived(oechem.OEPerceived_Chiral))
print(oechem.OEClearChiralPerception(mol))
print(mol.HasPerceived(oechem.OEPerceived_Chiral))
print(oechem.OEPerceiveChiral(mol))
print(mol.HasPerceived(oechem.OEPerceived_Chiral))
for a in mol.GetAtoms():
    if a.IsChiral():
        print(a)

False
None
False
True
True


In [16]:
print(new_lig_mol_coords)
for a in new_lig_prep.GetAtoms():
    #     print(a, [str(nbr) for nbr in a.GetAtoms()])
    print(a, a.GetIdx(), new_lig_prep.GetCoords()[a.GetIdx()])

print("-----")

for a in coords_mol.GetAtoms():
    print(a, a.GetIdx(), coords_mol.GetCoords()[a.GetIdx()])

{15: (3.7990000247955322, 2.1019999980926514, 19.53499984741211), 16: (4.125, 1.7589999437332153, 20.87299919128418), 3: (12.58899974822998, 0.9039999842643738, 24.055999755859375), 14: (4.525000095367432, 1.6080000400543213, 18.489999771118164), 4: (11.255999565124512, 0.5249999761581421, 24.040000915527344), 2: (13.498000144958496, 0.29899999499320984, 23.201000213623047), 17: (5.171999931335449, 0.9449999928474426, 21.1299991607666), 19: (11.72700023651123, -1.100000023841858, 22.344999313354492), 11: (7.394000053405762, -0.4729999899864197, 18.738000869750977), 6: (9.326000213623047, -0.7480000257492065, 23.024999618530273), 5: (10.805000305175781, -0.45899999141693115, 23.16900062561035), 1: (13.053000450134277, -0.7110000252723694, 22.3700008392334), 13: (5.631999969482422, 0.7609999775886536, 18.75), 10: (7.065999984741211, -0.328000009059906, 20.047000885009766), 7: (8.734000205993652, -0.08100000023841858, 21.79800033569336), 12: (6.514999866485596, 0.19699999690055847, 17.923