In [None]:
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from pathlib import Path

# Load Data

In [None]:
results_csv = Path("/Users/alexpayne/Scientific_Projects/mers-drug-discovery/sars2-retrospective-analysis/20240424_multi_pose_docking_cross_docking/results_csvs/20240503_combined_results_with_data.csv")

In [None]:
data_path = results_csv.parent.parent / "analyzed_data"
figure_path = Path("figures")

In [None]:
df_paths = data_path.glob("*/*.csv")

In [None]:
dfs = [pd.read_csv(path) for path in df_paths]

In [None]:
ogdf = pd.concat(dfs)
ogdf.N_Per_Split = ogdf.N_Per_Split.astype(int)
ogdf.sort_values(["Split", "Score", "PoseSelection", "StructureChoice", "StructureChoice_Choose_N", "N_Per_Split"], inplace=True)

In [None]:
ogdf["Error_Lower"] = ogdf["Fraction"] - ogdf["CI_Lower"]
ogdf["Error_Upper"] = ogdf["CI_Upper"] - ogdf["Fraction"]

In [None]:
df = ogdf[ogdf.PoseSelection == "Default"]

# Plotting Variables

In [None]:
large_font = 24
small_font = 18 
labels = {"Fraction": "<b> Fraction of Poses Docked < 2Å from Reference </b>",
               "N_Per_Split": "<b> Total Number of Reference Structures Available to Use </b>",
          }
update_layout_dict = dict(xaxis=dict(title_font=dict(size=large_font), 
                            color='black', 
                            ),
                  yaxis=dict(range=(0,1),  
                      title_font=dict(size=large_font), 
                             color='black', 
                             ))

In [None]:
def update_traces(fig):
    for trace in fig.data:
        trace.name = trace.name.replace("_", " ")
        trace.name = trace.name.replace("Split", "")
        trace.name = trace.name.replace(", ", " - ")
        trace.name = trace.name.replace("RMSD", "RMSD (Positive Control)")
    return fig

In [None]:
def hex_to_rgb(hex_color: str) -> tuple:
    hex_color = hex_color.lstrip("#")
    if len(hex_color) == 3:
        hex_color = hex_color * 2
    return int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)

In [None]:
def rgb_to_rgba(rgb_str, alpha):
    # Split the RGB string into its components
    rgb_values = rgb_str.strip('rgb()').split(',')
    
    # Extract individual RGB values and convert them to integers
    r, g, b = map(int, rgb_values)
    
    # Construct the RGBA string
    rgba_str = f"rgba({r}, {g}, {b}, {alpha})"
    
    return rgba_str

# Figure 1: Datesplit vs Random for POSIT_Probability vs RMSD

## dataset mangling

In [None]:
dataset_split_df = df[(df.StructureChoice == "Dock_to_All")&(df.PoseSelection_Choose_N == 1)]

In [None]:
fig = px.line(dataset_split_df, 
              x="N_Per_Split", 
              y="Fraction", 
              color="Score",
              line_dash="Split",
              # error_y="Error_Upper", 
              # error_y_minus="Error_Lower", 
              template="simple_white", 
              # symbol="Score", 
              height=600, 
              width=800,
              log_x=True,
              color_discrete_sequence=px.colors.qualitative.Dark2,
              labels = labels
              )
fig.update_layout(
    font=dict(size=small_font, 
              family='Arial'
              ),
    legend=dict(title="<b> Score Function, Dataset Split </b>", 
                              x=0.4, y=0.1, 
                              # traceorder='reversed', 
                              title_font_size=large_font, 
                              font_color='black'),
    **update_layout_dict)
fig.update_yaxes(tickvals=np.arange(0, 1.1, 0.1)) 
fig = update_traces(fig)
fig.show()
fig.write_image(figure_path / "20240801_dataset_split_comparison_v3.png")