In [None]:
%cd ~/SSMuLA

In [None]:
%load_ext blackcellmagic
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [None]:
import pandas as pd

In [None]:

import holoviews as hv
from holoviews import dim


hv.extension("bokeh")

from SSMuLA.landscape_global import LIB_NAMES, TrpB_names
from SSMuLA.vis import (
    save_bokeh_hv,
    JSON_THEME,
    LIB_COLORS,
    one_decimal_x,
    one_decimal_y,
    fixmargins,
)
from SSMuLA.util import get_file_name, checkNgen_folder

hv.renderer("bokeh").theme = JSON_THEME


In [None]:
df = pd.read_csv("results/pairwise_epistasis_vis/none/scale2max.csv")
df

In [None]:
df[df["summary_type"] == "fraction"]

In [None]:
from SSMuLA.pairwise_epistasis import EPISTASIS_TYPE

In [None]:
def hook(plot,element):
    plot.handles['plot'].x_range.factors = [(lib, epistasis) for lib in LIB_NAMES for epistasis in EPISTASIS_TYPE]

# Create the Holoviews Bars element
bars = hv.Bars(df[df["summary_type"] == "fraction"], 
               kdims=["lib", "epistasis_type"], 
               vdims="value"
               ).opts(
    width=1200,
    height=400,
    show_legend=True,
    legend_position="top",
    legend_offset=(0, 5),
    ylabel="Fraction",
    multi_level=False,
    title="Fraction of pairwise epistasis types",
    xlabel="Library",
    hooks=[fixmargins, one_decimal_y, hook],
    # x_range_factor = [
    #         (lib, epistasis)
    #         for lib in LIB_NAMES
    #         for epistasis in EPISTASIS_TYPE
    #     ]
        )
bars

In [None]:
import os
from glob import glob

In [None]:
# results/pairwise_epistasis_vis/none/scale2max.csv
# make bar plots base on that and save to the same directory
def hook(plot, element):
    plot.handles["plot"].x_range.factors = [
        (lib, epistasis) for lib in LIB_NAMES for epistasis in EPISTASIS_TYPE
    ]

# Create the Holoviews Bars element
save_bokeh_hv(
    hv.Bars(
        df[df["summary_type"] == "fraction"],
        kdims=["lib", "epistasis_type"],
        vdims="value",
    ).opts(
        width=1200,
        height=400,
        show_legend=True,
        legend_position="top",
        legend_offset=(0, 5),
        ylabel="Fraction",
        multi_level=False,
        title="Fraction of pairwise epistasis types",
        xlabel="Library",
        hooks=[fixmargins, one_decimal_y, hook],
    ),
    plot_name="scale2max",
    plot_path=os.path.join("results/pairwise_epistasis_vis", "none"),
)


In [None]:
import ast

In [None]:
# try zs summary
zs_sum_df = pd.read_csv("results/zs_sum/none/zs_stat_scale2max.csv")
# make the nested dict in zs_sum_df to be flat as columns and make this a melted df
zs_sum_df_melt = zs_sum_df.melt(
    id_vars=["lib", "n_mut"],
    value_vars=["Triad_score", "ev_score", "esm_score"],
    var_name="zs_type",
    value_name="corr",
)

# Concatenate the expanded columns back to the original DataFrame
df_expanded = pd.concat(
    [
        zs_sum_df_melt.drop("corr", axis=1),
        zs_sum_df_melt["corr"].apply(ast.literal_eval).apply(pd.Series),
    ],
    axis=1,
)
df_expanded

In [None]:
df_score = df_expanded.melt(id_vars=["lib", "n_mut", "zs_type"], value_vars=["rho", "ndcg", "rocauc"], var_name="metric", value_name="value")
df_score

In [None]:
import holoviews as hv
hv.extension('bokeh')

In [None]:
from SSMuLA.zs_analysis import ZS_OPTS

In [None]:
def hook(plot,element):
    plot.handles['plot'].x_range.factors = [(lib, zs) for lib in LIB_NAMES for zs in ZS_OPTS]

In [None]:
for metric in ["rho", "ndcg", "rocauc"]:

    # Create the Holoviews Bars element
    save_bokeh_hv(
        hv.Bars(df_score[df_score["metric"] == metric], 
                kdims=["lib", "zs_type"], 
                vdims="value"
                ).opts(
        width=1200,
        height=400,
        show_legend=True,
        legend_position="top",
        legend_offset=(0, 5),
        ylabel=f"{metric} correlation",
        multi_level=False,
        title=f"ZS fitness {metric} correlation",
        xlabel="Library",
        hooks=[fixmargins, one_decimal_y, hook],
            ),
        plot_name=f"zs_stat_scale2max-{metric}",
        plot_path=os.path.join("results/zs_sum", "none"),
    )


In [None]:
def noesmhook(plot,element):
    plot.handles['plot'].x_range.factors = [(lib, zs) for lib in LIB_NAMES for zs in ["Triad_score", "ev_score"]]

for metric in ["rho", "ndcg", "rocauc"]:

    # Create the Holoviews Bars element
    save_bokeh_hv(
        hv.Bars(df_score[(df_score["metric"] == metric) & (df_score["zs_type"] != "esm_score")], 
                kdims=["lib", "zs_type"], 
                vdims="value"
                ).opts(
        width=1200,
        height=400,
        show_legend=True,
        legend_position="top",
        legend_offset=(0, 5),
        ylabel=f"{metric} correlation",
        multi_level=False,
        title=f"ZS fitness {metric} correlation",
        xlabel="Library",
        hooks=[fixmargins, one_decimal_y, noesmhook],
            ),
        plot_name=f"zs_stat_scale2max-{metric}-noesm",
        plot_path=os.path.join("results/zs_sum", "none"),
    )


In [None]:
# try de sim sum
df = pd.read_csv("results/simulations/DE-active/scale2max/all_landscape_de_summary.csv")
df

In [None]:
# Fill NaNs in 'mean_top96' and 'median_top96' from 'mean_all' and 'median_all'
df['mean_top96'] = df['mean_top96'].fillna(df['mean_all'])
df['median_top96'] = df['median_top96'].fillna(df['median_all'])

# Fill NaNs in 'mean_top384' and 'median_top384' from 'mean_top96' and 'median_top96'
df['mean_top384'] = df['mean_top384'].fillna(df['mean_top96'])
df['median_top384'] = df['median_top384'].fillna(df['median_top96'])

df

In [None]:
for lib in sorted(glob("data/TrpB/scale2max/TrpB3*.csv")):
    if "codon" not in lib:
        trpb_df = pd.read_csv(lib)
        if trpb_df.loc[trpb_df["fitness"].idxmax()]["fitness"] != 1:
            print(trpb_df.loc[trpb_df["fitness"].idxmax()])

In [None]:
# Find the index of the row with the max value in column 'C'
max_index = df['C'].idxmax()

# Retrieve the row with the max value in column 'C'
max_row = df.loc[max_index]

In [None]:
de_metric_map = {
    "mean_all": "all simulations fitness mean",
    "median_all": "all simulations fitness median",
    "mean_top96": "top 96 simulations fitness mean",
    "median_top96":	"top 96 simulations fitness median",
    "mean_top384": "top 384 simulations fitness mean",
    "median_top384": "top 384 simulations fitness median",
    "fraction_max": "fraction reached max fitness",
}

In [None]:
def de_hook(plot,element):
    plot.handles['plot'].x_range.factors = [(lib, de) for lib in LIB_NAMES for de in ["single_step_DE", "recomb_SSM", "top96_SSM"]]

for metric, metric_dets in de_metric_map.items():

    title = f"DE from active variant {metric_dets}"

    save_bokeh_hv(
        hv.Bars(df, kdims=["lib", "de_type"], vdims=metric).opts(
        width=1200,
        height=400,
        show_legend=True,
        legend_position="top",
        legend_offset=(0, 5),
        ylabel=metric_dets.capitalize(),
        multi_level=False,
        title=title,
        xlabel="Library",
        hooks=[fixmargins, one_decimal_y, de_hook],
    ),
        plot_name=title,
        plot_path=os.path.join("results/simulations/DE-active", "scale2max", "summary"),
    )

In [None]:
mlde_df = pd.read_csv("results/mlde/vis/all_df.csv")
mlde_df

In [None]:
mlde_df_2 = pd.read_csv("results/mlde_old/vis/all_df.csv")
mlde_df_2

In [None]:
pd.set_option("display.max_rows", None)

sliced_df_no_zs_top96_all = mlde_df[
    (mlde_df["zs"] == "none")
    & (mlde_df["n_top"] == 96)
    & (mlde_df["n_mut_cutoff"] == "all")
][
    [
        "lib",
        "encoding",
        "model",
        "maxes_all",
        "means_all",
        "maxes",
        "means",
        "ndcgs",
        "rhos",
        "if_truemaxs",
        "truemax_inds",
    ]
].groupby(
    ["lib", "encoding", "model"]
).mean()

sliced_df_no_zs_top96_all

In [None]:
pd.reset_option('display.max_rows')


In [None]:

import bokeh
from bokeh.io import show, export_svg, export_png
from bokeh.plotting import show
from bokeh.themes.theme import Theme

bokeh.io.output_notebook()

import holoviews as hv
from holoviews import opts

hv.extension("bokeh", "matplotlib")


from bokeh.plotting import figure, show
from bokeh.io import output_notebook

output_notebook()

In [None]:
sliced_df_no_zs_top96_all.reset_index()

In [None]:
sliced_df_no_zs_top96_all.reset_index()

In [None]:
slice_mlde_df = mlde_df[
        (mlde_df["zs"] == "none")
        & (mlde_df["n_top"] == 96)
        & (mlde_df["n_mut_cutoff"] == "all")
        & (mlde_df["encoding"].isin(["esm2_t33_650M_UR50D-flatten_site", "one-hot"]))
    ][["lib", "encoding", "model", "maxes", "rep"]]
slice_mlde_df["encoding"] = slice_mlde_df["encoding"].str.replace("esm2_t33_650M_UR50D-flatten_site", "esm")
slice_mlde_df = slice_mlde_df.reset_index(drop=True)
slice_mlde_df = slice_mlde_df.sort_values(by=["lib", "encoding", "model"])
slice_mlde_df.head()

In [None]:
slice_mlde_df[(slice_mlde_df["lib"] == "TrpB3F") & (slice_mlde_df["encoding"] == "esm") & (slice_mlde_df["model"] == "ridge")]

In [None]:
slice_mlde_df[(slice_mlde_df["lib"] == "TrpB3F") & (slice_mlde_df["encoding"] == "esm") & (slice_mlde_df["model"] == "ridge") & (slice_mlde_df["rep"] == 0)]

In [None]:
slice_mlde_df.loc[slice_mlde_df[(slice_mlde_df["lib"] == "TrpB3F") & (slice_mlde_df["encoding"] == "esm") & (slice_mlde_df["model"] == "ridge") & (slice_mlde_df["rep"] == 0)], "maxes"] = 0.999999

In [None]:
def hook(plot, element):
    plot.handles["plot"].x_range.factors = [
        (lib, encoding, model)
        for lib in LIB_NAMES
        for encoding in ["esm", "one-hot"]
        for model in ["boosting", "ridge"]
    ]


hv.Violin(
    slice_mlde_df.copy(),
    kdims=["lib", "encoding", "model"],
    vdims=["maxes"],
).opts(
    width=1200,
    height=400,
    ylim=(0, 1),
    # inner="box",
    ylabel="Max fitness",
    xlabel="Library",
    title="Max fitness distribution",
    violin_width=0.9,
    # split='model',
    violin_fill_color=hv.dim("encoding").str(),
    violin_line_color=hv.dim("model").str(),
    show_legend=True,
    # multi_level=True
    # show_legend=True,
    legend_position="top",
    legend_offset=(0, 5),
    hooks=[fixmargins, one_decimal_y, hook]
)

In [None]:
%matplotlib inline

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Assume `data` is a DataFrame with all values being the same
sns.violinplot(data=slice_mlde_df, x='lib', y='maxes', hue="encoding",)
plt.show()


In [None]:
g = sns.catplot(slice_mlde_df, x='lib', y='maxes', hue="encoding", col='model', kind='violin')
g.set_titles("{col_name}")
plt.show()


In [None]:


# results/pairwise_epistasis_vis/none/scale2max.csv
# make bar plots base on that and save to the same directory
# def hook(plot, element):
#     plot.handles["plot"].x_range.factors = [
#         (lib, epistasis) for lib in LIB_NAMES for epistasis in EPISTASIS_TYPE
#     ]

# Create the Holoviews Bars element
# save_bokeh_hv(
#     hv.Bars(
#         df[df["summary_type"] == "fraction"],
#         kdims=["lib", "epistasis_type"],
#         vdims="value",
#     ).opts(
#         width=1200,
#         height=400,
#         show_legend=True,
#         legend_position="top",
#         legend_offset=(0, 5),
#         ylabel="Fraction",
#         multi_level=False,
#         title="Fraction of pairwise epistasis types",
#         xlabel="Library",
#         hooks=[fixmargins, one_decimal_y, hook],
#     ),
#     plot_name="scale2max",
#     plot_path=os.path.join("results/pairwise_epistasis_vis", "none"),
# )


In [None]:
mlde_df = pd.read_csv("results/mlde/vis/all_df.csv")
mlde_df

In [None]:
mlde_df = pd.read_csv("results/mlde/vis/all_df.csv")
mlde_df

In [None]:
len(mlde_df[(mlde_df["encoding"] == "one-hot")])

In [None]:
mlde_df[(mlde_df["encoding"] == "one-hot") & (mlde_df["lib"] == "GB1") & (mlde_df["model"] == "boosting") & (mlde_df["rep"] == 0)]

In [None]:
len(mlde_df[(mlde_df["encoding"] == "one-hot") & (mlde_df["lib"] == "GB1") & (mlde_df["model"] == "boosting") & (mlde_df["rep"] == 0)])

In [None]:
mlde_df.truemax_inds.unique()