# Generate graphs
1. Vina and Glide SP
    * docking of crystallized molecules
    * for each molecule line showing difference between best pose by RMS and docked score
1. Accuracies
1. Comparisons
    * comparison of different algorithms
1. IFs

In [None]:
# imports
import pandas as pd
from IPython.display import display
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

## 1. Vina and Glide SP

In [None]:
# vina
df_vina = pd.read_csv(f"../materials/graphs/vina.csv")
df_vina["algorithm"] = "Vina"
df_vina = df_vina.loc[(df_vina["confID"] == 0) | (df_vina["rank_rms"] == 1)]

code_list = list(df_vina['code'])
confID_list = list(df_vina["confID"])
new_col = []
index_list = []
for i, code in enumerate(code_list):
    if (code_list.count(code) == 2):
        index = f"{code}_{confID_list[i]}"
        new_col.append(index)
        index_list.append(index)
    else:
        index = f"{code}_{confID_list[i]}"
        new_col.append(index)
        index_list.append(index)
        index_list.append(index)

df_vina["index"] = new_col
df_vina = df_vina.set_index('index').reindex(index=index_list)

# glide sp
df_sp = pd.read_csv(f"../materials/graphs/glide_sp.csv")
df_sp["algorithm"] = "Glide SP"
df_sp = df_sp.loc[(df_sp["confID"] == 0) | (df_sp["rank_rms"] == 1)]

code_list = list(df_sp['code'])
confID_list = list(df_sp["confID"])
new_col = []
index_list = []
for i, code in enumerate(code_list):
    if (code_list.count(code) == 2):
        index = f"{code}_{confID_list[i]}"
        new_col.append(index)
        index_list.append(index)
    else:
        index = f"{code}_{confID_list[i]}"
        new_col.append(index)
        index_list.append(index)
        index_list.append(index)

df_sp["index"] = new_col
df_sp = df_sp.set_index('index').reindex(index=index_list)

df_merged = pd.concat([df_vina, df_sp])
#df_merged['code'] = df_merged.index
display(df_merged)

fig = px.box(df_merged, x="code", y="rms", color="algorithm",
             labels={ 
                "code": "PDB ID",  "rms": "RMS", "algorithm": "Algorithm"
            },
            color_discrete_map={ 
                "Vina": "#0072b2", "Glide SP": "#d55e00"
            },
            width=1000
            )
fig.update_traces(quartilemethod="exclusive") 
fig.add_shape( 
    type="line", line_color="salmon", line_width=2, opacity=1, line_dash="dot",
    x0=0, x1=1, xref="paper", y0=2, y1=2, yref="y"
)
fig.show()

## 2. Accuracies

In [None]:
top_dict = {
    "top1": '1',
    "top3": '3',
    "top5": '5',
    "all ": 'All',
}
def get_accuracies_df(name, label):
    cols = {}
    file_name = f"../materials/graphs/accuracy_{name}.txt"
    with open(file_name, "r") as f:
        for i, line in enumerate(f):
            if(((i % 7) == 0) or (i == 0)):
                key = line
                cols[key] = []
            else:
                cols[key].append(line)

    #print(columns)

    data_rec = []
    for key, items in cols.items():
        col_name = ''.join(list(key[0:4])) 
        items = items[:-1]
        for item in items:
            splits = item.split(':')
            alg = splits[0]
            acc = float(splits[1].split("%")[0])
            data_rec.append({"algorithm": alg, "top": top_dict[col_name],
                             "acc": acc, "score_type": label})
    
    df = pd.DataFrame.from_records(data_rec)

    return df


df_orig = get_accuracies_df(name="original", label="Original")
df_plip_score = get_accuracies_df(name="plip", label="PLIP")
df_prolif_score = get_accuracies_df(name="prolif", label="ProLIF")


df_concat = pd.concat([df_orig, df_plip_score, df_prolif_score])


fig = px.bar(df_concat, x="algorithm", y="acc",
             color='score_type', barmode='group', # top or score_type
             #text_auto=True, 
             labels={
                 "score_type": "Scoring type", "acc": "Accuracy (%)", "algorithm":"", "top":"Top"
             },
             title="Accuracies",
             facet_col="top",
             color_discrete_sequence=["#d55e00", "#000000","#0072b2"],
             width=1100
             )
#height=500
fig.show()

## 3. Comparisons

In [None]:
def read_df(name, label):
    df = pd.read_csv(f"../materials/graphs/{name}.csv")
    df = df[df['rank_rms'] == 1]
    df = df.drop(columns=["confID", "rank_rms"])
    df["algorithm"] = label
    #display(df)
    
    return df

dfs_list = []
names = {"vina":"Vina", "moe":"MOE", "glide_xp":"Glide XP", "glide_sp":"Glide SP", "glide_htvs":"Glide HTVS"}
for n, l in names.items():
    df = read_df(n, l)
    dfs_list.append(df)

dfs = pd.concat(dfs_list)


fig = px.box(dfs, x="algorithm", y="rms", color="algorithm",
                 labels={"algorithm":"Algorithm", "rms":"RMS"},
                 width=800,
                 points="all") #size='petal_length'

fig.show()

## 4. IFs

In [None]:
# initialize dictionaries for renaming
aa_dict = {
    "ALA": "A",
    "ARG":"R",
    "ASN":"N",
    "ASP":"D",
    "CYS":"C",
    "GLU":"E",
    "GLN":"Q",
    "GLY":"G",
    "HIS":"H",
    "ILE":"I",
    "LEU":"L",
    "LYS":"K",
    "MET":"M",
    "PHE":"F",
    "PRO":"P",
    "SER":"S",
    "THR":"T",
    "TRP":"W",
    "TYR":"Y",
    "VAL":"V",
}

bond_dict_plip = {
    'hbonda': "HBA", # HBA hydrogen bond acceptors
    'pication': "PiC", # PiC Pi cation
    'pistack': "PP", # Pi stacking (Pi-Pi interactions) 
    'halogenbond' : 'HaB', # HaB halogen bond 
    'hydroph': "HI",  # HI hydrophobic interaction
    'saltbridge': "SB", # SB salt bridge 
    'hbondd': "HBD" # HBD hydrogen bond donors
}

bond_dict_prolif = {
    'PiCation': 'PiC', 
    'VdWContact': 'vdW', #vdW van der Waals interactions 
    'Hydrophobic': 'HI',  
    'PiStacking': 'PP'
}

#### PLIP

In [None]:
df_plip = pd.read_csv(f"../materials/graphs/ifs_aligned_plip.csv", index_col="code")
display(len(df_plip.columns))

new_names_plip = {}
for col in df_plip.columns:
    sections = col.split("_")
    bond = sections[0] # rename from bond_dict
    aa = sections[1] # rename from aa_dict
    num = sections[2] # leave
    name = bond_dict_plip[bond] + "_" + aa_dict[aa] + "_" + num
    new_names_plip[col] = name

df_plip = df_plip.rename(columns=new_names_plip)
display(df_plip.head())

In [None]:
plt.figure(figsize=(15, 4))
df_plip_edited = df_plip.drop("Percentage")
sns.heatmap(df_plip_edited, cmap=sns.cubehelix_palette(start=2, rot=0, dark=0.1, light=.85, reverse=False, as_cmap=True), annot=False, cbar=False, linewidths=0.1, xticklabels=True, yticklabels=True)
plt.xticks(rotation=40)
plt.title("PLIP")
plt.show()

#### ProLIF

In [None]:
df_prolif = pd.read_csv(f"../materials/graphs/ifs_aligned_prolif.csv", index_col="code")
display(len(df_prolif.columns))

new_names_prolif = {}
for col in df_prolif.columns:
    sections = col.split(".")
    bond = sections[1].split("_")[1] # rename from bond_dict
    aa = sections[0][:3] # rename from aa_dict
    num = sections[0][3:] # leave
    name = bond_dict_prolif[bond] + "_" + aa_dict[aa] + "_" + num
    new_names_prolif[col] = name

df_prolif = df_prolif.rename(columns=new_names_prolif)
display(df_prolif.head())

In [None]:
plt.figure(figsize=(15, 4))
df_prolif_edited = df_prolif.drop("Percentage")
sns.heatmap(df_prolif_edited, cmap=sns.cubehelix_palette(start=2, rot=0, dark=0.15, light=.85, reverse=False, as_cmap=True), annot=False, cbar=False, linewidths=0.1, xticklabels=True, yticklabels=True)
plt.xticks(rotation=40)
plt.title("ProLIF")
plt.show()