# Calculate RMS - Glide (XP, SP, HTVS)

In [None]:
# import libraries
import pandas as pd
from IPython.display import display
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem
from spyrmsd import rmsd
import spyrmsd.molecule
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import spyrmsd.rmsd

In [None]:
# define complex-ligand dictionary
complex_ligand = {
'1S1C':'GNP',
'2ESM': 'M77',
'2ETK': 'HFS',
'2ETR': 'Y27',
'2V55': 'ANP',
'3D9V': 'H52',
'3NCZ': '3NC',
'3TV7': '07Q',
'3TWJ': '07R',
'3V8S': '0HD',
'4W7P': '3J7',
'4YVC': '4KH',
'4YVE': '4KK',
'5BML': '4TW',
'5HVU': '65R',
'5KKS': '6U1',
'5KKT': '6U2',
'5UZJ': '8UV',
'5WNE': 'B4J',
'5WNF': 'B4V',
'5WNG': 'B4Y',
'5WNH': 'B5G',
'6E9W': 'J0P',
'7JOU': 'VFS',
'7S25': '86G',
'7S26': '86K',
}

complex_ligand = dict(sorted(complex_ligand.items(), key=lambda item: item[1]))

molecules_order = []
for complex, ligand in complex_ligand.items():
    molecules_order.append(ligand)

print(molecules_order)

In [None]:
# read aligned poses
aligned_path = "../materials/aligned_molecules.sdf"
with Chem.SDMolSupplier(aligned_path) as w:
    al_mols = [mol for mol in w]

aligned = {}
i = 0
for m in al_mols:
    code = molecules_order[i]
    if(code != 'GNP'):
        aligned[molecules_order[i]] = m
    i = i + 1

print(aligned)

## MOE (for comparison)

In [None]:
# read docked poses
# + remove GNP

docked_path = f"../materials/docking/moe/moe_docked.sdf"
with Chem.SDMolSupplier(docked_path, removeHs=True) as w:
    mols_temp = [mol for mol in w]

mols = {}
conf_count = 0

for mol in molecules_order:
    mols[mol] = list()

for i in range (0, len(mols_temp)):
    docked = mols_temp[i]
    mol_num = int(docked.GetProp("mseq"))-1
    code = molecules_order[mol_num]
    if(code != "GNP"):
        mols[code].append(docked)

molecules_order.remove("GNP")

print(mols)
mols_moe = mols

In [None]:
# calculate rms for best ten poses

rms_res = []
for code, al_mol in aligned.items():
    conf_count = 0
    for doc_mol in mols[code]:
        spy_al = spyrmsd.molecule.Molecule.from_rdkit(al_mol)
        spy_doc = spyrmsd.molecule.Molecule.from_rdkit(doc_mol)
        rms = spyrmsd.rmsd.rmsdwrapper(spy_al, spy_doc, symmetry=True, strip=True)
        if(conf_count < 10):
            rms_res.append({"code": code, "confId": conf_count, "rms": rms[0]})
        conf_count = conf_count + 1

df = pd.DataFrame.from_records(rms_res)
display(df.head())

In [None]:
# return df with poses with lowest rms
min_rms_indeces = df.groupby('code')['rms'].idxmin()
min_df_moe = df.loc[min_rms_indeces, ['code', 'confId', 'rms']].reset_index(drop=True)

## Glide XP

In [None]:
# read top ten molecules from Glide XP
docked_path = "../materials/docking/glide/glide_xp_docked.sdf"
with Chem.SDMolSupplier(docked_path, removeHs=True) as w:
    mols_temp = [mol for mol in w]

molecules_order.append("GNP")
molecules_order.sort()

mols = {}

for mol in molecules_order:
    if mol != "GNP":
        mols[mol] = list()

for i in range (0, len(mols_temp)):
    docked = mols_temp[i]
    mol_num = int(docked.GetProp("i_m_source_file_index")) - 1
    code = molecules_order[mol_num]
    if(code != "GNP"):
        mols[code].append(docked)

molecules_order.remove("GNP")

for code, poses in mols.items():
    conf_count = 0
    poses_temp = poses
    mols[code] = list()
    for pose in poses_temp:
        if conf_count < 10:
            mols[code].append(pose)
            conf_count = conf_count + 1

print(mols)
mols_xp = mols

In [None]:
# calculate rms for best ten poses

rms_res = []
for code, al_mol in aligned.items():
    conf_count = 0
    for doc_mol in mols[code]:
        spy_al = spyrmsd.molecule.Molecule.from_rdkit(al_mol)
        spy_doc = spyrmsd.molecule.Molecule.from_rdkit(doc_mol)
        rms = spyrmsd.rmsd.rmsdwrapper(spy_al, spy_doc, symmetry=True, strip=True)
        if(conf_count < 10):
            rms_res.append({"code": code, "confId": conf_count, "rms": rms[0]})
        conf_count = conf_count + 1

df = pd.DataFrame.from_records(rms_res)
display(df.head())

In [None]:
# return df with poses with lowest rms
min_rms_indeces = df.groupby('code')['rms'].idxmin()
min_df_xp = df.loc[min_rms_indeces, ['code', 'confId', 'rms']].reset_index(drop=True)
display(min_df_xp)

df["rank_rms"] = df.groupby("code")["rms"].rank(method="dense", ascending=True).astype(int)
out_path = f"../materials/graphs/glide_xp.csv"
df.to_csv(out_path, index=False)

In [None]:
df_merged_xp = min_df_xp.merge(min_df_moe, on="code", suffixes=("_xp", "_moe"))
display(df_merged_xp.head())

In [None]:
# show graph
fig = px.scatter(df_merged_xp, x="rms_xp", y="rms_moe", color="code",
            labels={
                    "code": "PDB Code",
                    "rms_xp": "RMS Glide XP",
                    "rms_moe": "RMS MOE"
                },
            title="Glide XP x MOE")
fig.update_traces(textposition="bottom right")

fig.add_trace(go.Scatter(
    x=np.linspace(0, 5, 100),
    y=np.linspace(0, 5, 100),
    mode="lines",
    name="y = x",
    line=dict(color="red", width=1.5)
))


fig.show()

## Glide SP

In [None]:
# read top ten molecules from Glide SP
docked_path = "../materials/docking/glide/glide_sp_docked.sdf"
with Chem.SDMolSupplier(docked_path, removeHs=True) as w:
    mols_temp = [mol for mol in w]

molecules_order.append("GNP")
molecules_order.sort()

mols = {}

for mol in molecules_order:
    if mol != "GNP":
        mols[mol] = list()

for i in range (0, len(mols_temp)):
    docked = mols_temp[i]
    mol_num = int(docked.GetProp("i_m_source_file_index")) - 1
    code = molecules_order[mol_num]
    if(code != "GNP"):
        mols[code].append(docked)

molecules_order.remove("GNP")

for code, poses in mols.items():
    conf_count = 0
    poses_temp = poses
    mols[code] = list()
    for pose in poses_temp:
        if conf_count < 10:
            mols[code].append(pose)
            conf_count = conf_count + 1

print(mols)
mols_sp = mols

In [None]:
# calculate rms for best ten poses

rms_res = []
for code, al_mol in aligned.items():
    conf_count = 0
    for doc_mol in mols[code]:
        spy_al = spyrmsd.molecule.Molecule.from_rdkit(al_mol)
        spy_doc = spyrmsd.molecule.Molecule.from_rdkit(doc_mol)
        rms = spyrmsd.rmsd.rmsdwrapper(spy_al, spy_doc, symmetry=True, strip=True)
        if(conf_count < 10):
            rms_res.append({"code": code, "confId": conf_count, "rms": rms[0]})
        conf_count = conf_count + 1

df = pd.DataFrame.from_records(rms_res)
display(df.head())

In [None]:
# return df with poses with lowest rms
min_rms_indeces = df.groupby('code')['rms'].idxmin()
min_df_sp = df.loc[min_rms_indeces, ['code', 'confId', 'rms']].reset_index(drop=True)
display(min_df_sp)

df["rank_rms"] = df.groupby("code")["rms"].rank(method="dense", ascending=True).astype(int)
out_path = f"../materials/graphs/glide_sp.csv"
df.to_csv(out_path, index=False)

In [None]:
df_merged_sp = min_df_sp.merge(min_df_moe, on="code", suffixes=("_sp", "_moe"))
display(df_merged_sp.head())

In [None]:
# show graph
fig = px.scatter(df_merged_sp, x="rms_sp", y="rms_moe", color="code",
            labels={
                    "code": "PDB Code",
                    "rms_sp": "RMS Glide SP",
                    "rms_moe": "RMS MOE"
                },
            title="Glide SP x MOE")
fig.update_traces(textposition="bottom right")

fig.add_trace(go.Scatter(
    x=np.linspace(0, 5, 100),
    y=np.linspace(0, 5, 100),
    mode="lines",
    name="y = x",
    line=dict(color="red", width=1.5)
))


fig.show()

## Glide HTVS

In [None]:
# read top ten molecules from Glide XP
docked_path = "../materials/docking/glide/glide_htvs_docked.sdf"
with Chem.SDMolSupplier(docked_path, removeHs=True) as w:
    mols_temp = [mol for mol in w]

molecules_order.append("GNP")
molecules_order.sort()

mols = {}

for mol in molecules_order:
    if mol != "GNP":
        mols[mol] = list()

for i in range (0, len(mols_temp)):
    docked = mols_temp[i]
    mol_num = int(docked.GetProp("i_m_source_file_index")) - 1
    code = molecules_order[mol_num]
    if(code != "GNP"):
        mols[code].append(docked)

molecules_order.remove("GNP")

for code, poses in mols.items():
    conf_count = 0
    poses_temp = poses
    mols[code] = list()
    for pose in poses_temp:
        if conf_count < 10:
            mols[code].append(pose)
            conf_count = conf_count + 1

print(mols)
mols_htvs = mols

In [None]:

# calculate rms for best ten poses

rms_res = []
for code, al_mol in aligned.items():
    conf_count = 0
    for doc_mol in mols[code]:
        spy_al = spyrmsd.molecule.Molecule.from_rdkit(al_mol)
        spy_doc = spyrmsd.molecule.Molecule.from_rdkit(doc_mol)
        rms = spyrmsd.rmsd.rmsdwrapper(spy_al, spy_doc, symmetry=True, strip=True)
        if(conf_count < 10):
            rms_res.append({"code": code, "confId": conf_count, "rms": rms[0]})
        conf_count = conf_count + 1

df = pd.DataFrame.from_records(rms_res)
display(df.head())

In [None]:
# return df with poses with lowest rms
min_rms_indeces = df.groupby('code')['rms'].idxmin()
min_df_htvs = df.loc[min_rms_indeces, ['code', 'confId', 'rms']].reset_index(drop=True)
display(min_df_htvs)

df["rank_rms"] = df.groupby("code")["rms"].rank(method="dense", ascending=True).astype(int)
out_path = f"../materials/graphs/glide_htvs.csv"
df.to_csv(out_path, index=False)

In [None]:
df_merged_htvs = min_df_htvs.merge(min_df_moe, on="code", suffixes=("_htvs", "_moe"))
display(df_merged_htvs.head())

In [None]:
# show graph
fig = px.scatter(df_merged_htvs, x="rms_htvs", y="rms_moe", color="code",
            labels={
                    "code": "PDB Code",
                    "rms_htvs": "RMS Glide HTVS",
                    "rms_moe": "RMS MOE"
                },
            title="Glide HTVS x MOE")
fig.update_traces(textposition="bottom right")

fig.add_trace(go.Scatter(
    x=np.linspace(0, 5, 100),
    y=np.linspace(0, 5, 100),
    mode="lines",
    name="y = x",
    line=dict(color="red", width=1.5)
))


fig.show()