# Calculate RMS - MOE

In [None]:
# import libraries
import pandas as pd
from IPython.display import display
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem
from spyrmsd import rmsd
import spyrmsd.molecule
import spyrmsd.rmsd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import spyrmsd.rmsd

In [None]:
# define complex-ligand dictionary
complex_ligand = {
'1S1C':'GNP',
'2ESM': 'M77',
'2ETK': 'HFS',
'2ETR': 'Y27',
'2V55': 'ANP',
'3D9V': 'H52',
'3NCZ': '3NC',
'3TV7': '07Q',
'3TWJ': '07R',
'3V8S': '0HD',
'4W7P': '3J7',
'4YVC': '4KH',
'4YVE': '4KK',
'5BML': '4TW',
'5HVU': '65R',
'5KKS': '6U1',
'5KKT': '6U2',
'5UZJ': '8UV',
'5WNE': 'B4J',
'5WNF': 'B4V',
'5WNG': 'B4Y',
'5WNH': 'B5G',
'6E9W': 'J0P',
'7JOU': 'VFS',
'7S25': '86G',
'7S26': '86K',
}

complex_ligand = dict(sorted(complex_ligand.items(), key=lambda item: item[1]))

molecules_order = []
for complex, ligand in complex_ligand.items():
    molecules_order.append(ligand)

print(molecules_order)

In [None]:
# read aligned poses
aligned_path = "../materials/aligned_molecules.sdf"
with Chem.SDMolSupplier(aligned_path) as w:
    al_mols = [mol for mol in w]

aligned = {}
i = 0
for m in al_mols:
    code = molecules_order[i]
    if(code != 'GNP'):
        aligned[molecules_order[i]] = m
    i = i + 1

print(aligned)

In [None]:
# read docked poses
# + remove GNP

docked_path = f"../materials/docking/moe/moe_docked.sdf"
with Chem.SDMolSupplier(docked_path, removeHs=True) as w:
    mols_temp = [mol for mol in w]

mols = {}
conf_count = 0

for mol in molecules_order:
    mols[mol] = list()

for i in range (0, len(mols_temp)):
    docked = mols_temp[i]
    mol_num = int(docked.GetProp("mseq"))-1
    code = molecules_order[mol_num]
    if(code != "GNP"):
        mols[code].append(docked)

molecules_order.remove("GNP")

print(mols)

In [None]:
# calculate rms for best ten poses

rms_res = []
for code, al_mol in aligned.items():
    conf_count = 0
    for doc_mol in mols[code]:
        #rms = Chem.rdMolAlign.CalcRMS(doc, al) # reference in second argument
        spy_al = spyrmsd.molecule.Molecule.from_rdkit(al_mol)
        spy_doc = spyrmsd.molecule.Molecule.from_rdkit(doc_mol)
        rms = spyrmsd.rmsd.rmsdwrapper(spy_al, spy_doc, symmetry=True, strip=True)
        if(conf_count < 10):
            rms_res.append({"code": code, "confId": conf_count, "rms": rms[0]})
        conf_count = conf_count + 1

df = pd.DataFrame.from_records(rms_res)
display(df.head())


out_path = f"../materials/graphs/moe.csv"
df.to_csv(out_path, index=False)

In [None]:
# return df with poses with lowest rms
min_rms_indeces = df.groupby('code')['rms'].idxmin()
min_rms_df = df.loc[min_rms_indeces, ['code', 'confId', 'rms']].reset_index(drop=True)
display(min_rms_df)

# Vina 1.2.7

In [None]:
# read top ten molecules from vina 
docked_path_vina = f"../materials/docking/vina/vina127_docked.sdf"
with Chem.SDMolSupplier(docked_path_vina, removeHs=True) as w:
    mols_temp_vina = [mol for mol in w]

mols_vina = {}
mol_count = 0

for i in range (0, len(mols_temp_vina)):
    docked = mols_temp_vina[i]
    confId = int(docked.GetProp("_ConfID"))
    if(confId == 0):
        code = molecules_order[mol_count]
        mols_vina[code] = list()
        mol_count = mol_count + 1
    mols_vina[code].append(docked)
    
print(mols_vina)

In [None]:
# recalcul RMS between Vina docked and aligned

rms_res_vina = []
for code, al_mol in aligned.items():
    conf_count = 0
    for doc_mol in mols_vina[code]:
        spy_al = spyrmsd.molecule.Molecule.from_rdkit(al_mol)
        spy_doc = spyrmsd.molecule.Molecule.from_rdkit(doc_mol)
        rms = spyrmsd.rmsd.rmsdwrapper(spy_al, spy_doc, symmetry=True, strip=True)
        if(conf_count < 10):
            rms_res_vina.append({"code": code, "confId": conf_count, "rms": rms[0]})
        conf_count = conf_count + 1

df_vina = pd.DataFrame.from_records(rms_res_vina)
display(df_vina.head())


out_path = f"./rms_df_vina.csv"
df_vina.to_csv(out_path, index=False)

In [None]:
# return df with poses with lowest rms
min_rms_indeces_vina = df_vina.groupby('code')['rms'].idxmin()
min_rms_df_vina = df_vina.loc[min_rms_indeces_vina, ['code', 'confId', 'rms']].reset_index(drop=True)
display(min_rms_df_vina)

## Graph (Vina x MOE)

In [None]:
df_merged = min_rms_df.merge(min_rms_df_vina, on="code", suffixes=("_moe", "_vina"))
display(df_merged)

In [None]:
# show graph
fig = px.scatter(df_merged, x="rms_moe", y="rms_vina", color="code",
            labels={
                    "code": "PDB Code",
                    "rms_moe": "RMS MOE",
                    "rms_vina": "RMS Vina"
                },
            title="MOE x Vina 1.2.7")
fig.update_traces(textposition="bottom right")

fig.add_trace(go.Scatter(
    x=np.linspace(0, 5, 100),
    y=np.linspace(0, 5, 100),
    mode="lines",
    name="y = x",
    line=dict(color="red", width=1.5)
))


fig.show()