# Rescore poses using IFs
* try getting optimal weights

## Import, define, read

In [None]:
# import libraries
import pandas as pd
#pd.set_option('future.no_silent_downcasting', True)
from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem import AllChem
from spyrmsd import rmsd
import spyrmsd.molecule
import numpy as np
import prolif as plf
from IPython.display import display
import plotly.express as px
import plotly.graph_objects as go
from collections import defaultdict
from scipy.optimize import minimize

In [None]:
# define list of bonds to find
df_ifs_aligned = pd.read_csv("../materials/rescore/ifs_aligned_prolif.csv")
df_ifs_aligned = df_ifs_aligned.set_index("code")
percent_threshold = 0
bonds = {}
for col in df_ifs_aligned.columns:
    percent = df_ifs_aligned[col].loc["Percentage"]
    if  percent > percent_threshold:
        bonds[col] = percent/100


print(f"Number of bonds to look for (align): {len(bonds)}")
print(f"Bonds to look for (align): {bonds}")


In [None]:
# read protein molecule to calculate IFs on (save to PDBBlock)
protein_file = "../materials/2etr.pdb"
rdkit_prot = Chem.MolFromPDBFile(protein_file, removeHs=False)
protein = plf.Molecule(rdkit_prot)

In [None]:
# define complex-ligand dictionary
complex_ligand = {
'1S1C':'GNP',
'2ESM': 'M77',
'2ETK': 'HFS',
'2ETR': 'Y27',
'2V55': 'ANP',
'3D9V': 'H52',
'3NCZ': '3NC',
'3TV7': '07Q',
'3TWJ': '07R',
'3V8S': '0HD',
'4W7P': '3J7',
'4YVC': '4KH',
'4YVE': '4KK',
'5BML': '4TW',
'5HVU': '65R',
'5KKS': '6U1',
'5KKT': '6U2',
'5UZJ': '8UV',
'5WNE': 'B4J',
'5WNF': 'B4V',
'5WNG': 'B4Y',
'5WNH': 'B5G',
'6E9W': 'J0P',
'7JOU': 'VFS',
'7S25': '86G',
'7S26': '86K',
}

complex_ligand = dict(sorted(complex_ligand.items(), key=lambda item: item[1]))

molecules_order = []
for complex, ligand in complex_ligand.items():
    molecules_order.append(ligand)

print(molecules_order)

In [None]:
# read aligned poses
aligned_path = "../materials/aligned_molecules.sdf"
with Chem.SDMolSupplier(aligned_path) as w:
    al_mols = [mol for mol in w]

aligned = {}
i = 0
for m in al_mols:
    code = molecules_order[i]
    if(code != 'GNP'):
        aligned[molecules_order[i]] = m
    i = i + 1

print(aligned)

In [None]:
# read first ten docked poses from MOE
# + remove GNP

docked_path = f"../materials/docking/moe/moe_docked.sdf"
with Chem.SDMolSupplier(docked_path, removeHs=True) as w:
    mols_temp_moe = [mol for mol in w]

mols_moe = {}

for mol in molecules_order:
    if mol != "GNP":
        mols_moe[mol] = list()

for i in range (0, len(mols_temp_moe)):
    docked = mols_temp_moe[i]
    mol_num = int(docked.GetProp("mseq"))-1
    code = molecules_order[mol_num]
    if(code != "GNP"):
        mols_moe[code].append(docked)

molecules_order.remove("GNP")

for code, poses in mols_moe.items():
    conf_count = 0
    poses_temp = poses
    mols_moe[code] = list()
    for pose in poses_temp:
        if conf_count < 10:
            mols_moe[code].append(pose)
            conf_count = conf_count + 1

print(mols_moe)


In [None]:
# read top ten molecules from vina 
docked_path_vina = f"../materials/docking/vina/vina127_docked.sdf"
with Chem.SDMolSupplier(docked_path_vina, removeHs=True) as w:
    mols_temp_vina = [mol for mol in w]

mols_vina = {}
mol_count = 0

for i in range (0, len(mols_temp_vina)):
    docked = mols_temp_vina[i]
    confId = int(docked.GetProp("_ConfID"))
    if(confId == 0):
        code = molecules_order[mol_count]
        mols_vina[code] = list()
        mol_count = mol_count + 1
    mols_vina[code].append(docked)
    
print(mols_vina)

In [None]:
# read top ten molecules from Glide XP
docked_path_glide_xp = f"../materials/docking/glide/glide_xp_docked.sdf"
with Chem.SDMolSupplier(docked_path_glide_xp, removeHs=True) as w:
    mols_temp_glide_xp = [mol for mol in w]

molecules_order.append("GNP")
molecules_order.sort()

mols_glide_xp = {}

for mol in molecules_order:
    if mol != "GNP":
        mols_glide_xp[mol] = list()

for i in range (0, len(mols_temp_glide_xp)):
    docked = mols_temp_glide_xp[i]
    mol_num = int(docked.GetProp("i_m_source_file_index")) - 1
    code = molecules_order[mol_num]
    if(code != "GNP"):
        mols_glide_xp[code].append(docked)

molecules_order.remove("GNP")

for code, poses in mols_glide_xp.items():
    conf_count = 0
    poses_temp = poses
    mols_glide_xp[code] = list()
    for pose in poses_temp:
        if conf_count < 10:
            mols_glide_xp[code].append(pose)
            conf_count = conf_count + 1

print(mols_glide_xp)

In [None]:
# read top ten molecules from Glide HTVS
docked_path_glide_htvs = f"../materials/docking/glide/glide_htvs_docked.sdf"
with Chem.SDMolSupplier(docked_path_glide_htvs, removeHs=True) as w:
    mols_temp_glide_htvs = [mol for mol in w]

molecules_order.append("GNP")
molecules_order.sort()

mols_glide_htvs = {}

for mol in molecules_order:
    if mol != "GNP":
        mols_glide_htvs[mol] = list()

for i in range (0, len(mols_temp_glide_htvs)):
    docked = mols_temp_glide_htvs[i]
    mol_num = int(docked.GetProp("i_m_source_file_index")) - 1
    code = molecules_order[mol_num]
    if(code != "GNP"):
        mols_glide_htvs[code].append(docked)

molecules_order.remove("GNP")

for code, poses in mols_glide_htvs.items():
    conf_count = 0
    poses_temp = poses
    mols_glide_htvs[code] = list()
    for pose in poses_temp:
        if conf_count < 10:
            mols_glide_htvs[code].append(pose)
            conf_count = conf_count + 1

print(mols_glide_htvs)

In [None]:
# read top ten molecules from Glide SP
docked_path_glide_sp = f"../materials/docking/glide/glide_sp_docked.sdf"
with Chem.SDMolSupplier(docked_path_glide_sp, removeHs=True) as w:
    mols_temp_glide_sp = [mol for mol in w]

molecules_order.append("GNP")
molecules_order.sort()

mols_glide_sp = {}

for mol in molecules_order:
    if mol != "GNP":
        mols_glide_sp[mol] = list()

for i in range (0, len(mols_temp_glide_sp)):
    docked = mols_temp_glide_sp[i]
    mol_num = int(docked.GetProp("i_m_source_file_index")) - 1
    code = molecules_order[mol_num]
    if(code != "GNP"):
        mols_glide_sp[code].append(docked)

molecules_order.remove("GNP")

for code, poses in mols_glide_sp.items():
    conf_count = 0
    poses_temp = poses
    mols_glide_sp[code] = list()
    for pose in poses_temp:
        if conf_count < 10:
            mols_glide_sp[code].append(pose)
            conf_count = conf_count + 1

print(mols_glide_sp)

## Define functions

In [None]:
""" calculate RMSD between aligned and docked dataset
params: aligned (dict), docked (dict)
return: pd.df, columns = [code, confID, rms, rank] """

def get_rms(aligned, docked):
    rms_records = []

    for code, al_mol in aligned.items():
        conf_count = 0
        for doc_mol in docked[code]:
            if al_mol.GetNumAtoms() != doc_mol.GetNumAtoms():
                print(f"Skipping {code}: Atom count mismatch")
                print(f"aligned: {Chem.MolToSmiles(al_mol)}")
                print(f"docked: {Chem.MolToSmiles(doc_mol)}")
                continue

            
            spy_doc = spyrmsd.molecule.Molecule.from_rdkit(doc_mol)
            spy_al = spyrmsd.molecule.Molecule.from_rdkit(al_mol)
            rmsd = spyrmsd.rmsd.rmsdwrapper(spy_al, spy_doc, symmetry=True, strip=True)
            rms_records.append({"code": code, "confID": conf_count, "rms": rmsd[0]})
            conf_count = conf_count + 1

    df_res = pd.DataFrame.from_records(rms_records)
    df_res["rank"] = df_res.groupby("code")["rms"].rank(method="dense", ascending=True).astype(int)
    df_res = df_res.set_index('code')

    return df_res


In [None]:
""" function to calculate fingerprints using ProLIF
params: mol (rdkit.Mol), protein (plf.Molecule)
returns: df of IFs """

def get_fingerprints(mol_temp, protein):
    mol = plf.Molecule.from_rdkit(mol_temp)
    fp_ref = plf.Fingerprint()
    fp_ref.run_from_iterable([mol], protein, progress=False)
    df_ref = fp_ref.to_dataframe(index_col="Pose")
    df_ref.rename(index={0:-1}, inplace=True)
    df_ref.rename(columns={str(mol[0].resid): "temp_name"}, inplace=True)

    df_aligned_new = df_ref.copy()
    df_aligned_new.index = ["temp_name"]
    new_columns = pd.MultiIndex.from_tuples(
        [(col[1], col[2]) for col in df_aligned_new.columns], 
        names=['Residue', 'Interaction']
    )
    
    df_aligned_new.columns = new_columns
    df_aligned_new.columns = ["{}_{}".format(res, interaction) for res, interaction in df_aligned_new.columns]

    return df_aligned_new


In [None]:
""" 
get score from IFs
params: 
returns: score calculated from bonds and bonds weights
"""
def get_score(row, protein, bonds):
    bonds_found = row[row == True].index.tolist()
    common_bonds = set(bonds.keys()) & set(bonds_found)
    extra_bonds = set(bonds_found).difference(set(bonds.keys())) # bonds that are not "supposed" to be there (are in docked, not in aligned)

    common_sum = 0
    total_sum = 0
    for bond, num in bonds.items():
        total_sum = total_sum + num 
        if bond in common_bonds:
            common_sum = common_sum + num 
        elif bond in extra_bonds:
            common_sum = common_sum - 1.5

    return common_sum / total_sum


In [None]:
""" rescore docked poses based on IFs
params: docked (dict)
returns: pd.df, columns = [code, confID, score, rank] """

def get_ifs_df(docked):
    docked_ifs = pd.DataFrame()
    for code, molecules in docked.items():
        conf_count = 0
        for mol in molecules:
            ifs = get_fingerprints(mol, protein)
            ifs["code"] = code
            ifs["confID"] = conf_count
            docked_ifs = pd.concat([docked_ifs, ifs], ignore_index=True)
            conf_count = conf_count + 1

    docked_ifs.fillna("False")
    new_cols = ["code", "confID"] + [col for col in docked_ifs.columns if col not in ["code", "confID"]]
    docked_ifs = docked_ifs[new_cols]
    return docked_ifs


In [None]:
""" 
function to evaluate weights
params: df (code, confID, rms, rank_rms), 
    docked (dict - keys = codes of mols, items are lists of confIDs),
    bonds - dict of bond:value    
returns: ratio correctly predicted / total
"""

def evaluate_weights(df, bonds):
    scores = []
    for index, row in df.iterrows():
        score = get_score(row, protein, bonds)
        scores.append(score)

    df["score"] = scores

    correct = 0
    total = 0

    for num, group in df.groupby("code"):
        group = group.sort_values("rms")
        best_true_index = group.index[0]

        group = group.sort_values("score", ascending=False)
        best_pred_index = group.index[0]

        if best_true_index == best_pred_index:
            correct = correct + 1
        
        total = total + 1
    
    return correct / total


In [None]:
""" 
loss_fn ... helper function

function to get best weights
params: df of rms, bonds (dictionary bonds:weights to optimize), 
    docked (dict - keys = codes of mols, items are lists of confIDs),
    bonds_dict (dict: bonds-weights, save only bond names, reinitialize all to "one")
returns: optimized weights
"""

def optimize_weights(df, bonds_dict):
    bonds = list(bonds_dict.keys())
    #init = np.ones(len(bonds))
    init = np.array([bonds_dict[b] for b in bonds])

    def loss_fn(weight_array):
        weights_dict = {bonds[i]: weight_array[i] for i in range(len(weight_array))}
        accuracy = evaluate_weights(df, weights_dict)
        return -accuracy  # to maximize
 
    result = minimize(loss_fn, init, method="Powell", bounds=None)    #L-BFGS-B    #[(0,10)]*len(bonds)

    best_weights = {bonds[i]: result.x[i] for i in range(len(result.x))}

    return best_weights

In [None]:
def rescore(df, bonds):
    scores = []
    for index, row in df.iterrows():
        score = get_score(row, protein, bonds)
        scores.append(score)

    df_res = df.copy()
    df["score"] = scores
    df_res["rank"] = (
        df_res.sort_values(["code", "score", "confID"], ascending=[True, False, True])
        .groupby("code")
        .cumcount() + 1
    )
    df_res = df_res.set_index('code')

    return df_res

## Analyse

### MOE

In [None]:
df_rms_moe = get_rms(aligned, mols_moe)
display(df_rms_moe.head())

ifs_moe = get_ifs_df(mols_moe)
display(ifs_moe.head())

df_moe = pd.merge(df_rms_moe, ifs_moe, on=['code', 'confID'], how='outer')

In [None]:
best_bonds = optimize_weights(df_moe, bonds)

accuracy = evaluate_weights(df_moe, best_bonds)

print(f"Accuracy: {accuracy}")
print(f"Best weights: {best_bonds}")

In [None]:
df_score_moe = rescore(df_moe, best_bonds)
df_score_moe = df_score_moe[["confID", "score", "rank"]]

display(df_score_moe)

In [None]:
# merge both dataframes
df_res_moe = pd.merge(df_rms_moe, df_score_moe, on=['code','confID'], suffixes=("_rms", "_score"))
df_res_moe["is_equal"] = df_res_moe["rank_rms"] == df_res_moe["rank_score"]
display(df_res_moe)


### Vina 1.2.7

In [None]:
df_rms_vina = get_rms(aligned, mols_vina)
display(df_rms_vina.head())

ifs_vina = get_ifs_df(mols_vina)
display(ifs_vina.head())

df_vina = pd.merge(df_rms_vina, ifs_vina, on=['code', 'confID'], how='outer')

In [None]:
best_bonds = optimize_weights(df_vina, bonds)

accuracy = evaluate_weights(df_vina, best_bonds)

print(f"Accuracy: {accuracy}")
print(f"Best weights: {best_bonds}")

In [None]:
df_score_vina = rescore(df_vina, best_bonds)
df_score_vina = df_score_vina[["confID", "score", "rank"]]

display(df_score_vina)

In [None]:
# merge both dataframes
df_res_vina = pd.merge(df_rms_vina, df_score_vina, on=['code','confID'], suffixes=("_rms", "_score"))
df_res_vina["is_equal"] = df_res_vina["rank_rms"] == df_res_vina["rank_score"]
display(df_res_vina)


### Glide XP

In [None]:
df_rms_glide_xp = get_rms(aligned, mols_glide_xp)
display(df_rms_glide_xp.head())

ifs_glide_xp = get_ifs_df(mols_glide_xp)
display(ifs_glide_xp.head())

df_glide_xp = pd.merge(df_rms_glide_xp, ifs_glide_xp, on=['code', 'confID'], how='outer')

In [None]:
best_bonds = optimize_weights(df_glide_xp, bonds)

accuracy = evaluate_weights(df_glide_xp, best_bonds)

print(f"Accuracy: {accuracy}")
print(f"Best weights: {best_bonds}")

In [None]:
df_score_glide_xp = rescore(df_glide_xp, best_bonds)
df_score_glide_xp = df_score_glide_xp[["confID", "score", "rank"]]

display(df_score_glide_xp)

In [None]:
# merge both dataframes
df_res_glide_xp = pd.merge(df_rms_glide_xp, df_score_glide_xp, on=['code','confID'], suffixes=("_rms", "_score"))
df_res_glide_xp["is_equal"] = df_res_glide_xp["rank_rms"] == df_res_glide_xp["rank_score"]
display(df_res_glide_xp)


### Glide HTVS

In [None]:
df_rms_glide_htvs = get_rms(aligned, mols_glide_htvs)
display(df_rms_glide_htvs.head())

ifs_glide_htvs = get_ifs_df(mols_glide_htvs)
display(ifs_glide_htvs.head())

df_glide_htvs = pd.merge(df_rms_glide_htvs, ifs_glide_htvs, on=['code', 'confID'], how='outer')

In [None]:
best_bonds = optimize_weights(df_glide_htvs, bonds)

accuracy = evaluate_weights(df_glide_htvs, best_bonds)

print(f"Accuracy: {accuracy}")
print(f"Best weights: {best_bonds}")

In [None]:
df_score_glide_htvs = rescore(df_glide_htvs, best_bonds)
df_score_glide_htvs = df_score_glide_htvs[["confID", "score", "rank"]]

display(df_score_glide_htvs)

In [None]:
# merge both dataframes
df_res_glide_htvs = pd.merge(df_rms_glide_htvs, df_score_glide_htvs, on=['code','confID'], suffixes=("_rms", "_score"))
df_res_glide_htvs["is_equal"] = df_res_glide_htvs["rank_rms"] == df_res_glide_htvs["rank_score"]
display(df_res_glide_htvs)


### Glide SP

In [None]:
df_rms_glide_sp = get_rms(aligned, mols_glide_sp)
display(df_rms_glide_sp.head())

ifs_glide_sp = get_ifs_df(mols_glide_sp)
display(ifs_glide_sp.head())

df_glide_sp = pd.merge(df_rms_glide_sp, ifs_glide_sp, on=['code', 'confID'], how='outer')

In [None]:
# temp = [x for x in ifs_glide_sp.columns if x not in ["code", "confID"]]
# bonds_temp = {temp[i]: 0.0 for i in range(len(temp))}
# bonds_new = {**bonds_temp, **bonds}
# bonds_new = dict(sorted(bonds_new.items(), key=lambda item: item[1], reverse = True))
best_bonds = optimize_weights(df_glide_sp, bonds)

accuracy = evaluate_weights(df_glide_sp, best_bonds)

print(f"Accuracy: {accuracy}")
print(f"Best weights: {best_bonds}")

In [None]:
df_score_glide_sp = rescore(df_glide_sp, best_bonds)
df_score_glide_sp = df_score_glide_sp[["confID", "score", "rank"]]

display(df_score_glide_sp)

In [None]:
# merge both dataframes
df_res_glide_sp = pd.merge(df_rms_glide_sp, df_score_glide_sp, on=['code','confID'], suffixes=("_rms", "_score"))
df_res_glide_sp["is_equal"] = df_res_glide_sp["rank_rms"] == df_res_glide_sp["rank_score"]
display(df_res_glide_sp)


## Count

In [None]:
# MOE
correctly_ranked_moe = df_res_moe[df_res_moe["is_equal"] == True]
correctly_first_moe = correctly_ranked_moe[correctly_ranked_moe["rank_score"] == 1]
percent_first_moe = (len(correctly_first_moe) / 25) * 100

print("MOE")
print(f"Number of correctly ranked: {len(correctly_ranked_moe)}")
print(f"Number of correctly ranked firsts: {len(correctly_first_moe)}")

print(f"Percent of correctly ranked lowest RMS: {percent_first_moe}%")
print("--------------------")

# Vina
correctly_ranked_vina = df_res_vina[df_res_vina["is_equal"] == True]
correctly_first_vina = correctly_ranked_vina[correctly_ranked_vina["rank_score"] == 1]
percent_first_vina = (len(correctly_first_vina) / 25) * 100

print("Vina")
print(f"Number of correctly ranked: {len(correctly_ranked_vina)}")
print(f"Number of correctly ranked firsts: {len(correctly_first_vina)}")

print(f"Percent of correctly ranked lowest RMS: {percent_first_vina}%")
print("--------------------")

# Glide XP
correctly_ranked_glide_xp = df_res_glide_xp[df_res_glide_xp["is_equal"] == True]
correctly_first_glide_xp = correctly_ranked_glide_xp[correctly_ranked_glide_xp["rank_score"] == 1]
percent_first_glide_xp = (len(correctly_first_glide_xp) / 25) * 100

print("Glide XP")
print(f"Number of correctly ranked: {len(correctly_ranked_glide_xp)}")
print(f"Number of correctly ranked firsts: {len(correctly_first_glide_xp)}")

print(f"Percent of correctly ranked lowest RMS: {percent_first_glide_xp}%")
print("--------------------")

# Glide HTVS
correctly_ranked_glide_htvs = df_res_glide_htvs[df_res_glide_htvs["is_equal"] == True]
correctly_first_glide_htvs = correctly_ranked_glide_htvs[correctly_ranked_glide_htvs["rank_score"] == 1]
percent_first_glide_htvs = (len(correctly_first_glide_htvs) / 25) * 100

print("Glide HTVS")
print(f"Number of correctly ranked: {len(correctly_ranked_glide_htvs)}")
print(f"Number of correctly ranked firsts: {len(correctly_first_glide_htvs)}")

print(f"Percent of correctly ranked lowest RMS: {percent_first_glide_htvs}%")
print("--------------------")

# Glide SP
correctly_ranked_glide_sp = df_res_glide_sp[df_res_glide_sp["is_equal"] == True]
correctly_first_glide_sp = correctly_ranked_glide_sp[correctly_ranked_glide_sp["rank_score"] == 1]
percent_first_glide_sp = (len(correctly_first_glide_sp) / 25) * 100

print("Glide SP")
print(f"Number of correctly ranked: {len(correctly_ranked_glide_sp)}")
print(f"Number of correctly ranked firsts: {len(correctly_first_glide_sp)}")

print(f"Percent of correctly ranked lowest RMS: {percent_first_glide_sp}%")
print("--------------------")

In [None]:
print("Ranked by new score\n")


df_res_moe = df_res_moe.reset_index(names="code")
df_res_vina = df_res_vina.reset_index(names="code")
df_res_glide_xp = df_res_glide_xp.reset_index(names="code")
df_res_glide_htvs = df_res_glide_htvs.reset_index(names="code")
df_res_glide_sp = df_res_glide_sp.reset_index(names="code")
divide = len(df_res_moe[df_res_moe["rank_score"] == 1])
print(divide)

# top1 accuracy
df_top_moe = df_res_moe[df_res_moe["rank_score"] == 1]
top_one_moe = round((len(df_top_moe[df_top_moe["rms"] < 2])) / (divide) * 100, 1)
df_top_vina = df_res_vina[df_res_vina["rank_score"] == 1]
top_one_vina = round((len(df_top_vina[df_top_vina["rms"] < 2])) / (divide) * 100, 1)
df_top_glide_xp = df_res_glide_xp[df_res_glide_xp["rank_score"] == 1]
top_one_glide_xp = round((len(df_top_glide_xp[df_top_glide_xp["rms"] < 2])) / (divide) * 100, 1)
df_top_glide_htvs = df_res_glide_htvs[df_res_glide_htvs["rank_score"] == 1]
top_one_glide_htvs = round((len(df_top_glide_htvs[df_top_glide_htvs["rms"] < 2])) / (divide) * 100, 1)
df_top_glide_sp = df_res_glide_sp[df_res_glide_sp["rank_score"] == 1]
top_one_glide_sp = round((len(df_top_glide_sp[df_top_glide_sp["rms"] < 2])) / (divide) * 100, 1)

print("top1 accuracy:")
print(f"Vina: {top_one_vina}%")
print(f"Glide XP: {top_one_glide_xp}%")
print(f"Glide SP: {top_one_glide_sp}%")
print(f"Glide  HTVS: {top_one_glide_htvs}%")
print(f"MOE: {top_one_moe}%")
print("--------------------")

# top3 accuracy
df_top_moe = df_res_moe[df_res_moe["rank_score"] <= 3]
top_three_moe = round(df_top_moe[df_top_moe["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_three_moe = round((len(df_top_moe[df_top_moe["rms"] < 2])) / (len(df_top_moe)) * 100, 1)
df_top_vina = df_res_vina[df_res_vina["rank_score"] <= 3]
top_three_vina = round(df_top_vina[df_top_vina["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_three_vina = round((len(df_top_vina[df_top_vina["rms"] < 2])) / (len(df_top_vina)) * 100, 1)
df_top_glide_xp = df_res_glide_xp[df_res_glide_xp["rank_score"] <= 3]
top_three_glide_xp = round(df_top_glide_xp[df_top_glide_xp["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_three_glide_xp = round((len(df_top_glide_xp[df_top_glide_xp["rms"] < 2])) / (len(df_top_glide_xp)) * 100, 1)
df_top_glide_htvs = df_res_glide_htvs[df_res_glide_htvs["rank_score"]<= 3]
top_three_glide_htvs = round(df_top_glide_htvs[df_top_glide_htvs["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_three_glide_htvs = round((len(df_top_glide_htvs[df_top_glide_htvs["rms"] < 2])) / (len(df_top_glide_htvs)) * 100, 1)
df_top_glide_sp = df_res_glide_sp[df_res_glide_sp["rank_score"] <= 3]
top_three_glide_sp = round(df_top_glide_sp[df_top_glide_sp["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_three_glide_sp = round((len(df_top_glide_sp[df_top_glide_sp["rms"] < 2])) / (len(df_top_glide_sp)) * 100, 1)

print("top3 accuracy:")
print(f"Vina: {top_three_vina}%")
print(f"Glide XP: {top_three_glide_xp}%")
print(f"Glide SP: {top_three_glide_sp}%")
print(f"Glide HTVS: {top_three_glide_htvs}%")
print(f"MOE: {top_three_moe}%")
print("--------------------")

# top5 accuracy
df_top_moe = df_res_moe[df_res_moe["rank_score"] <= 5]
top_five_moe = round(df_top_moe[df_top_moe["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_five_moe = round((len(df_top_moe[df_top_moe["rms"] < 2])) / (len(df_top_moe)) * 100, 1)
df_top_vina = df_res_vina[df_res_vina["rank_score"] <= 5]
top_five_vina = round(df_top_vina[df_top_vina["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_five_vina = round((len(df_top_vina[df_top_vina["rms"] < 2])) / (len(df_top_vina)) * 100, 1)
df_top_glide_xp = df_res_glide_xp[df_res_glide_xp["rank_score"] <= 5]
top_five_glide_xp = round(df_top_glide_xp[df_top_glide_xp["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_five_glide_xp = round((len(df_top_glide_xp[df_top_glide_xp["rms"] < 2])) / (len(df_top_glide_xp)) * 100, 1)
df_top_glide_htvs = df_res_glide_htvs[df_res_glide_htvs["rank_score"]<= 5]
top_five_glide_htvs = round(df_top_glide_htvs[df_top_glide_htvs["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_five_glide_htvs = round((len(df_top_glide_htvs[df_top_glide_htvs["rms"] < 2])) / (len(df_top_glide_htvs)) * 100, 1)
df_top_glide_sp = df_res_glide_sp[df_res_glide_sp["rank_score"] <= 5]
top_five_glide_sp = round(df_top_glide_sp[df_top_glide_sp["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_five_glide_sp = round((len(df_top_glide_sp[df_top_glide_sp["rms"] < 2])) / (len(df_top_glide_sp)) * 100, 1)

print("top5 accuracy:")
print(f"Vina: {top_five_vina}%")
print(f"Glide XP: {top_five_glide_xp}%")
print(f"Glide SP: {top_five_glide_sp}%")
print(f"Glide HTVS: {top_five_glide_htvs}%")
print(f"MOE: {top_five_moe}%")
print("--------------------")

# all accuracy (top ten, theres not more docked poses)
df_top_moe = df_res_moe[df_res_moe["rank_score"] <= 10]
top_all_moe = round(df_top_moe[df_top_moe["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_all_moe = round((len(df_top_moe[df_top_moe["rms"] < 2])) / (len(df_top_moe)) * 100, 1)
df_top_vina = df_res_vina[df_res_vina["rank_score"] <= 10]
top_all_vina = round(df_top_vina[df_top_vina["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_all_vina = round((len(df_top_vina[df_top_vina["rms"] < 2])) / (len(df_top_vina)) * 100, 1)
df_top_glide_xp = df_res_glide_xp[df_res_glide_xp["rank_score"] <= 10]
top_all_glide_xp = round(df_top_glide_xp[df_top_glide_xp["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_all_glide_xp = round((len(df_top_glide_xp[df_top_glide_xp["rms"] < 2])) / (len(df_top_glide_xp)) * 100, 1)
df_top_glide_htvs = df_res_glide_htvs[df_res_glide_htvs["rank_score"]<= 10]
top_all_glide_htvs = round(df_top_glide_htvs[df_top_glide_htvs["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_all_glide_htvs = round((len(df_top_glide_htvs[df_top_glide_htvs["rms"] < 2])) / (len(df_top_glide_htvs)) * 100, 1)
df_top_glide_sp = df_res_glide_sp[df_res_glide_sp["rank_score"] <= 10]
top_all_glide_sp = round(df_top_glide_sp[df_top_glide_sp["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
#top_all_glide_sp = round((len(df_top_glide_sp[df_top_glide_sp["rms"] < 2])) / (len(df_top_glide_sp)) * 100, 1)

print("all accuracy:")
print(f"Vina: {top_all_vina}%")
print(f"Glide XP: {top_all_glide_xp}%")
print(f"Glide SP: {top_all_glide_sp}%")
print(f"Glide HTVS: {top_all_glide_htvs}%")
print(f"MOE: {top_all_moe}%")
print("--------------------")


In [None]:
print("Ranked by confID\n")

# df_res_moe = df_res_moe.reset_index(names="code")
# df_res_vina = df_res_vina.reset_index(names="code")
# df_res_glide_xp = df_res_glide_xp.reset_index(names="code")
# df_res_glide_htvs = df_res_glide_htvs.reset_index(names="code")
# df_res_glide_sp = df_res_glide_sp.reset_index(names="code")
divide = len(df_res_moe[df_res_moe["confID"] == 1])

# top1 accuracy
df_top_moe = df_res_moe[df_res_moe["confID"] < 1]
top_one_moe = round((len(df_top_moe[df_top_moe["rms"] < 2])) / (divide) * 100, 1)
df_top_vina = df_res_vina[df_res_vina["confID"] < 1]
top_one_vina = round((len(df_top_vina[df_top_vina["rms"] < 2])) / (divide) * 100, 1)
df_top_glide_xp = df_res_glide_xp[df_res_glide_xp["confID"] < 1]
top_one_glide_xp = round((len(df_top_glide_xp[df_top_glide_xp["rms"] < 2])) / (divide) * 100, 1)
df_top_glide_htvs = df_res_glide_htvs[df_res_glide_htvs["confID"] < 1]
top_one_glide_htvs = round((len(df_top_glide_htvs[df_top_glide_htvs["rms"] < 2])) / (divide) * 100, 1)
df_top_glide_sp = df_res_glide_sp[df_res_glide_sp["confID"] < 1]
top_one_glide_sp = round((len(df_top_glide_sp[df_top_glide_sp["rms"] < 2])) / (divide) * 100, 1)

print("top1 accuracy:")
print(f"Vina: {top_one_vina}%")
print(f"Glide XP: {top_one_glide_xp}%")
print(f"Glide SP: {top_one_glide_sp}%")
print(f"Glide  HTVS: {top_one_glide_htvs}%")
print(f"MOE: {top_one_moe}%")
print("--------------------")

# top3 accuracy
df_top_moe = df_res_moe[df_res_moe["confID"] < 3]
top_three_moe = round(df_top_moe[df_top_moe["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
df_top_vina = df_res_vina[df_res_vina["confID"] < 3]
top_three_vina = round(df_top_vina[df_top_vina["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
df_top_glide_xp = df_res_glide_xp[df_res_glide_xp["confID"] < 3]
top_three_glide_xp = round(df_top_glide_xp[df_top_glide_xp["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
df_top_glide_htvs = df_res_glide_htvs[df_res_glide_htvs["confID"]< 3]
top_three_glide_htvs = round(df_top_glide_htvs[df_top_glide_htvs["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
df_top_glide_sp = df_res_glide_sp[df_res_glide_sp["confID"] < 3]
top_three_glide_sp = round(df_top_glide_sp[df_top_glide_sp["rms"] < 2]["code"].nunique() / (divide) * 100, 1)

print("top3 accuracy:")
print(f"Vina: {top_three_vina}%")
print(f"Glide XP: {top_three_glide_xp}%")
print(f"Glide SP: {top_three_glide_sp}%")
print(f"Glide HTVS: {top_three_glide_htvs}%")
print(f"MOE: {top_three_moe}%")
print("--------------------")

# top5 accuracy
df_top_moe = df_res_moe[df_res_moe["confID"] < 5]
top_five_moe = round(df_top_moe[df_top_moe["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
df_top_vina = df_res_vina[df_res_vina["confID"] < 5]
top_five_vina = round(df_top_vina[df_top_vina["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
df_top_glide_xp = df_res_glide_xp[df_res_glide_xp["confID"] < 5]
top_five_glide_xp = round(df_top_glide_xp[df_top_glide_xp["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
df_top_glide_htvs = df_res_glide_htvs[df_res_glide_htvs["confID"]< 5]
top_five_glide_htvs = round(df_top_glide_htvs[df_top_glide_htvs["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
df_top_glide_sp = df_res_glide_sp[df_res_glide_sp["confID"] < 5]
top_five_glide_sp = round(df_top_glide_sp[df_top_glide_sp["rms"] < 2]["code"].nunique() / (divide) * 100, 1)

print("top5 accuracy:")
print(f"Vina: {top_five_vina}%")
print(f"Glide XP: {top_five_glide_xp}%")
print(f"Glide SP: {top_five_glide_sp}%")
print(f"Glide HTVS: {top_five_glide_htvs}%")
print(f"MOE: {top_five_moe}%")
print("--------------------")

# all accuracy (top ten, theres not more docked poses)
df_top_moe = df_res_moe[df_res_moe["confID"] < 10]
top_all_moe = round(df_top_moe[df_top_moe["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
df_top_vina = df_res_vina[df_res_vina["confID"] < 10]
top_all_vina = round(df_top_vina[df_top_vina["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
df_top_glide_xp = df_res_glide_xp[df_res_glide_xp["confID"] < 10]
top_all_glide_xp = round(df_top_glide_xp[df_top_glide_xp["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
df_top_glide_htvs = df_res_glide_htvs[df_res_glide_htvs["confID"]< 10]
top_all_glide_htvs = round(df_top_glide_htvs[df_top_glide_htvs["rms"] < 2]["code"].nunique() / (divide) * 100, 1)
df_top_glide_sp = df_res_glide_sp[df_res_glide_sp["confID"] < 10]
top_all_glide_sp = round(df_top_glide_sp[df_top_glide_sp["rms"] < 2]["code"].nunique() / (divide) * 100, 1)

print("all accuracy:")
print(f"Vina: {top_all_vina}%")
print(f"Glide XP: {top_all_glide_xp}%")
print(f"Glide SP: {top_all_glide_sp}%")
print(f"Glide HTVS: {top_all_glide_htvs}%")
print(f"MOE: {top_all_moe}%")
print("--------------------")
