# Imports

In [None]:
import pandas as pd, numpy as np
import plotly.express as px
from plotly.graph_objs import Figure
from pathlib import Path
from importlib import reload
import software.analysis as a
import software.plotting as pl
reload(a)

# Load Paths

In [None]:
import sys
sys.path.append(str(Path("../../../").resolve()))
from software.paths import paths

In [None]:
local_analysis = Path("/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/")

In [None]:
df = pd.read_csv(local_analysis / "20231016_combined.csv", index_col=0)

In [None]:
df.groupby("POSIT_Method").nunique()

In [None]:
fig = pl.plot_kde(df, "RMSD", "POSIT_Method")
fig.update_layout(title="RMSD Distribution for Self-Docking Results")
fig.write_image("20231017_rmsd_kde_self_docking.png")

In [None]:
import plotly.figure_factory as ff
group_column="POSIT_Method"
groups = df[group_column].unique()
value_column="POSIT"
arrays = [df[df[group_column] == group][value_column] for group in groups]

In [None]:
fig = ff.create_distplot(arrays, group_labels=groups, bin_size=0.1, histnorm="probability", show_rug=False)
fig.update_layout(title="POSIT Score Distribution for Self-Docking", width=600, height=400)
fig.update_yaxes(title="Frequency", range=[0,1])
fig.update_xaxes(title="POSIT", range=[0,1])
fig.show()
fig.write_image("202301017_posit_kde_self_docking.png")

# In what percentage of cases is HYBRID outperforming SHAPEFIT?

In [None]:
best_poses = df.loc[df.groupby('Compound_ID')['RMSD'].idxmin()]

In [None]:
best_poses.groupby("POSIT_Method").nunique()

In [None]:
df.loc[df.groupby('Compound_ID')['RMSD'].idxmin()].groupby("POSIT_Method")[["Compound_ID"]].count() / df.Compound_ID.nunique()

In [None]:
df.loc[df.groupby('Compound_ID')['POSIT'].idxmax()].groupby("POSIT_Method")[["Compound_ID"]].count() / df.Compound_ID.nunique()

# If we pull out the best score based on POSIT, do we do a better job?

In [None]:
df = df.sample(frac=1)
combined = df.loc[df.groupby('Complex_ID')['POSIT'].idxmax()]

In [None]:
combined.groupby("POSIT_Method").nunique()

In [None]:
combined.POSIT_Method = "COMBINED"

In [None]:
full_comparison = pd.concat([df, combined])

In [None]:
fig = pl.plot_kde(full_comparison, "RMSD", "POSIT_Method")
fig.update_layout(title="RMSD Distribution for Self-Docking Results")
fig.show()
fig.write_image("20231017_rmsd_kde_self_docking_with_combined.png")

In [None]:
px.violin(full_comparison, y="POSIT", x="POSIT_Method", height=400, width=800)

In [None]:
fig = px.violin(full_comparison, y="RMSD", x="POSIT_Method", template="simple_white", width=800, height=400)
fig.show()
fig.write_image("20231017_rmsd_violin_plot.png")

## how well correlated are POSIT and RMSD for the combined case?

In [None]:
fig = px.density_contour(combined, x="POSIT", y="RMSD", marginal_x="histogram", marginal_y="histogram")
fig.update_layout(title="POSIT vs RMSD", width=600, height=400)

## For each protocol, which has the best frequency of <2Å RMSD structures?

In [None]:
rmsd_results = []
for val in [0.5, 1, 1.5, 2, 3, 5, 8]:
    rmsd_result = pd.DataFrame(a.calc_perc_good(full_comparison, 
                                 score_column="RMSD", 
                                 good_score=val, 
                                 total_mol=1, 
                                 split_cols=["POSIT_Method"]))
    rmsd_result["RMSD Cutoff (Å)"] = val
    rmsd_results.append(rmsd_result)

In [None]:
rmsd_analysis = pd.concat(rmsd_results)

In [None]:
rmsd_analysis["Percentage < Cutoff"] = rmsd_analysis.RMSD / 218

In [None]:
fig = px.line(rmsd_analysis, x="RMSD Cutoff (Å)", y="RMSD", color=rmsd_analysis.index)
fig.update_layout(title="Number Self-Docked Structures Below an RMSD Cutoff", width=600, height=400)
fig.update_yaxes(title="# of Structures")
fig.show()
fig.write_image("2023107_self_docked_below_rmsd_cutoff.png")

In [None]:
fig = px.line(rmsd_analysis, x="RMSD Cutoff (Å)", y="Percentage < Cutoff", color=rmsd_analysis.index)
fig.update_layout(title="Percentage of Self-Docked Structures Below an RMSD Cutoff", width=600, height=400)
fig.show()
fig.write_image("2023107_self_docked_below_rmsd_cutoff_perc.png")