In [2]:
%cd ~/SSMuLA

/disk2/fli/SSMuLA


In [3]:
%load_ext autoreload
%autoreload 2
%load_ext blackcellmagic

In [42]:
"""A script for getting all results into a summary"""

from glob import glob
from tqdm import tqdm

import pandas as pd
import numpy as np

from ast import literal_eval

from scipy.stats import spearmanr

import bokeh
from bokeh.io import show, export_svg, export_png, output_notebook
from bokeh.plotting import show, figure
from bokeh.themes.theme import Theme


import holoviews as hv
from holoviews import opts, dim

from SSMuLA.landscape_global import get_file_name
from SSMuLA.zs_analysis import ZS_OPTS
from SSMuLA.vis import LIB_COLORS


output_notebook()
bokeh.io.output_notebook()

hv.extension("bokeh", "matplotlib")


########## lib stat ##########
lib_stat = pd.read_csv(
    "results/fitness_distribution/max/all_lib_stats.csv", index_col=0
)
lib_stat_all = lib_stat[lib_stat["n_mut_cuttoff"] == 0]

lib_df = pd.concat(
    [
        lib_stat_all["lib"],
        lib_stat_all["lib_basic_dict"].apply(literal_eval).apply(pd.Series),
        lib_stat_all["fit_basic_dict"].apply(literal_eval).apply(pd.Series),
        lib_stat_all["cauchy"].apply(literal_eval).apply(pd.Series),
        lib_stat_all["kde"].apply(literal_eval).apply(pd.Series),
    ],
    axis=1,
)
lib_df["parent_rank_percent"] = lib_df["parent_rank"] / lib_df["numb_measured"]
# qs = pd.DataFrame(lib_df['quartiles'].tolist(), index=lib_df.index)
# qs.columns =   # Rename columns

df_expanded = lib_df["quartiles"].apply(pd.Series)
df_expanded.columns = ["Q1", "Q2", "Q3"]  # Rename columns
df_expanded["numb_kde_peak"] = lib_df["peak_kde"].apply(len)

lib_df = pd.concat([lib_df, df_expanded], axis=1)


########## loc opt ##########

loc_opt_list = glob("results/local_optima/scale2max/*.csv")
loc_opt_df = pd.DataFrame()

for lo in loc_opt_list:
    lo_df = pd.read_csv(lo)
    loc_opt_df = loc_opt_df._append(
        {
            "lib": get_file_name(lo).replace("_loc_opt_escape", ""),
            "n_locopt": len(lo_df),
        },
        ignore_index=True,
    )

########## de stat ##########

de_df = pd.read_csv("results/de/DE-active/scale2max/all_landscape_de_summary.csv")
# clean up the nan if not full N
# Fill NaNs in 'mean_top96' and 'median_top96' from 'mean_all' and 'median_all'
de_df["mean_top96"] = de_df["mean_top96"].fillna(de_df["mean_all"])
de_df["median_top96"] = de_df["median_top96"].fillna(de_df["median_all"])

# Fill NaNs in 'mean_top384' and 'median_top384' from 'mean_top96' and 'median_top96'
de_df["mean_top384"] = de_df["mean_top384"].fillna(de_df["mean_top96"])
de_df["median_top384"] = de_df["median_top384"].fillna(de_df["median_top96"])

de_types = de_df["de_type"].unique()

de_df_cols = list(de_df.columns)

de_df_concat = pd.DataFrame({"lib": list(de_df["lib"].unique())})

for de_type in de_types:

    sliced_de_df = de_df[de_df["de_type"] == de_type]

    for de_df_col in de_df_cols:
        if de_df_col not in ["lib", "de_type"]:
            sliced_de_df = sliced_de_df.rename(
                columns={de_df_col: f"{de_type}_{de_df_col}"}
            )

    de_df_concat = pd.merge(
        de_df_concat, sliced_de_df.drop(["de_type"], axis=1), on="lib"
    )


########## mlde stat ##########

mlde_df = pd.read_csv("results/mlde/vis_3/all_df.csv")

mlde_avg = (
    mlde_df[
        (mlde_df["zs"] == "none")
        & (mlde_df["encoding"] == "one-hot")
        & (mlde_df["model"] == "boosting")
        & (mlde_df["n_mut_cutoff"] == "all")
        & (mlde_df["n_sample"] == 96)
        & (mlde_df["n_top"] == 96)
    ][["lib", "top_maxes", "top_means", "ndcgs", "rhos", "if_truemaxs"]]
    .groupby("lib")
    .mean()
)

zs_simpele = [zs.split("_score")[0] for zs in mlde_df["zs"].unique() if "score" in zs]

for zs in zs_simpele:

    rename_cols = {
        "top_maxes": f"maxes_{zs}",
        "top_means": f"means_{zs}",
        "ndcgs": f"ndcgs_{zs}",
        "rhos": f"rhos_{zs}",
        "if_truemaxs": f"if_truemaxs_{zs}",
    }

    mlde_avg = pd.merge(
        mlde_avg,
        (
            mlde_df[
                (mlde_df["zs"] == f"{zs}_score")
                & (mlde_df["encoding"] == "one-hot")
                & (mlde_df["model"] == "boosting")
                & (mlde_df["n_mut_cutoff"] == "all")
                & (mlde_df["n_sample"] == 96)
                & (mlde_df["n_top"] == 96)
            ][["lib", "top_maxes", "top_means", "ndcgs", "rhos", "if_truemaxs"]]
            .groupby("lib")
            .mean()
            .rename(columns=rename_cols)
        ),
        on="lib",
    )

merged_active_de = pd.merge(lib_df, de_df_concat, on="lib")
simple_merge = pd.merge(merged_active_de, mlde_avg, on="lib")
simple_merge = pd.merge(simple_merge, loc_opt_df, on="lib")

##### calc delta #####
for ft_col in [""] + zs_simpele:
    for de in de_types:
        if ft_col == "":
            simple_merge[f"mlde_{de}_delta"] = (
                simple_merge["top_maxes"] - simple_merge[f"{de}_mean_all"]
            )
        else:
            simple_merge[f"{ft_col}_{de}_delta"] = (
                simple_merge[f"maxes_{ft_col}"] - simple_merge[f"{de}_mean_all"]
            )


####### epistasis #######

df_pw = pd.read_csv("results/pairwise_epistasis_vis/none/scale2max.csv")

df_pw_s_rs = (
    df_pw[
        (df_pw["n_mut"] == "all")
        & (df_pw["summary_type"] == "fraction")
        & (df_pw["epistasis_type"] != "magnitude")
    ][["lib", "value"]]
    .groupby("lib")
    .sum()
    .rename(columns={"value": "fraction_non-magnitude"})
)

df_pw_rs = df_pw[
    (df_pw["n_mut"] == "all")
    & (df_pw["summary_type"] == "fraction")
    & (df_pw["epistasis_type"] == "reciprocal sign")
][["lib", "value"]].rename(columns={"value": "fraction_reciprocal-sign"})

df_pw = pd.merge(df_pw_s_rs, df_pw_rs, on="lib")

simple_merge_pw = pd.merge(simple_merge, df_pw, on="lib")

simple_merge_pw["norm_non-magnitude"] = (
    simple_merge_pw["fraction_non-magnitude"] * simple_merge_pw["percent_active"]
)
simple_merge_pw["norm_reciprocal-sign"] = (
    simple_merge_pw["fraction_reciprocal-sign"] * simple_merge_pw["percent_active"]
)
simple_merge_pw["delta_ft_mlde"] = (
    np.maximum.reduce([
        simple_merge_pw["maxes_Triad"],
        simple_merge_pw["maxes_ev"],
        simple_merge_pw["maxes_esm"],
        simple_merge_pw["maxes_esmif"]
    ])
    - simple_merge_pw["top_maxes"]
)
simple_merge_pw["delta_ft_de"] = (
    np.maximum.reduce([
        simple_merge_pw["maxes_Triad"],
        simple_merge_pw["maxes_ev"],
        simple_merge_pw["maxes_esm"],
        simple_merge_pw["maxes_esmif"]
    ])
    - simple_merge_pw["recomb_SSM_mean_all"]
)

In [43]:
simple_merge_pw.columns

Index(['lib', 'n_site', 'numb_measured', 'percent_measured', 'numb_active',
       'percent_active', 'active_fit_min', 'parent_fit', 'parent_rank', 'mean',
       ...
       'esmif_top96_SSM_delta', 'ev_single_step_DE_delta',
       'ev_recomb_SSM_delta', 'ev_top96_SSM_delta', 'fraction_non-magnitude',
       'fraction_reciprocal-sign', 'norm_non-magnitude',
       'norm_reciprocal-sign', 'delta_ft_mlde', 'delta_ft_de'],
      dtype='object', length=103)

In [49]:
mlde_df[(mlde_df["encoding"] == "one-hot") & (mlde_df["n_sample"] == 96) & (mlde_df["n_mut_cutoff"] == "all")]

Unnamed: 0,encoding,model,n_sample,ft_lib,rep,all_maxes,all_means,top_maxes,top_means,ndcgs,rhos,if_truemaxs,truemax_inds,n_mut_cutoff,lib,zs,n_top,scale_fit
13200,one-hot,boosting,96,4000,0,0.786359,0.158761,1.000000,0.409427,0.889779,0.467899,1.0,99.0,all,DHFR,Triad_score,384,max
13201,one-hot,boosting,96,4000,1,0.543335,0.155376,0.969296,0.458683,0.888022,0.449639,0.0,573.0,all,DHFR,Triad_score,384,max
13202,one-hot,boosting,96,4000,2,0.684235,0.173581,1.000000,0.385126,0.883050,0.439068,1.0,166.0,all,DHFR,Triad_score,384,max
13203,one-hot,boosting,96,4000,3,0.709322,0.178183,0.865538,0.450205,0.897087,0.445949,0.0,457.0,all,DHFR,Triad_score,384,max
13204,one-hot,boosting,96,4000,4,0.716937,0.154571,0.960769,0.455208,0.882411,0.452564,0.0,548.0,all,DHFR,Triad_score,384,max
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1228995,one-hot,ridge,96,159129,45,0.112181,0.011345,0.564187,0.134434,0.971137,0.156234,0.0,35372.0,all,TrpB4,none,96,max
1228996,one-hot,ridge,96,159129,46,0.255176,0.018187,0.555277,0.240154,0.977335,0.164372,0.0,32859.0,all,TrpB4,none,96,max
1228997,one-hot,ridge,96,159129,47,0.195014,0.020900,0.733450,0.132379,0.969924,0.079782,0.0,88349.0,all,TrpB4,none,96,max
1228998,one-hot,ridge,96,159129,48,0.328032,0.030263,0.816907,0.223496,0.976154,0.149337,0.0,407.0,all,TrpB4,none,96,max


In [60]:
lib_df

Unnamed: 0,lib,n_site,numb_measured,percent_measured,numb_active,percent_active,active_fit_min,parent_fit,parent_rank,mean,...,scale,peaks,peak_kde,percentiles,pdf_values,parent_rank_percent,Q1,Q2,Q3,numb_kde_peak
0,DHFR,3.0,8000.0,100.0,854.0,10.675,0.158614,0.231325,602.0,0.157945,...,0.001737,"[0.1275662158236592, 0.5084256342965795, 0.606...","[18.95715826406059, 0.17704519880582167, 0.098...","[0.1151661417338432, 0.1212571134074551, 0.122...","[15.199742695395578, 15.667091596432273, 16.11...",0.07525,0.125979,0.127567,0.129802,7
3,ParD2,3.0,7882.0,98.525,6533.0,82.885055,8.7e-05,0.973896,12.0,0.228092,...,0.109906,"[0.03337870027347259, 0.6106664209434821, 0.85...","[2.870133679609529, 0.40012326949250876, 0.421...","[-0.2192609576095971, -0.11180451121954202, -0...","[0.026974121384613872, 0.028121215058096317, 0...",0.001522,0.023743,0.112118,0.374642,3
6,ParD3,3.0,7882.0,98.525,7248.0,91.956356,0.000485,0.982799,7.0,0.321921,...,0.156732,"[0.12255350949782506, 0.2818697911295817, 0.89...","[1.5849621685610575, 1.5168268815479702, 0.556...","[-0.2057345859857947, -0.0879267691331092, -0....","[0.01869666257662741, 0.019490036460858295, 0....",0.000888,0.112217,0.276349,0.470212,3
9,GB1,4.0,149361.0,93.350625,34545.0,23.128528,0.001141,0.11413,3644.0,0.009189,...,0.000279,"[0.001001001001001001, 0.15015015015015015, 0....","[84.94812751924496, 0.1471129752911041, 0.0918...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[84.45520645025827, 84.94812751924496, 80.8935...",0.024397,0.000156,0.000386,0.001026,33
12,TrpB3A,3.0,7971.0,99.6375,59.0,0.740183,0.097444,1.0,1.0,-0.039231,...,0.024425,"[-0.3944951616078841, -0.3170232081852239, -0....","[0.005954476272084835, 0.020369915569709796, 0...","[-0.5794792136579096, -0.159409524561979, -0.1...","[0.0059580689404221615, 0.005853464928384204, ...",0.000125,-0.066102,-0.039751,-0.013193,9
15,TrpB3B,3.0,7996.0,99.95,18.0,0.225113,0.086085,1.0,1.0,-0.055223,...,0.017935,"[-0.3315353103440577, -0.25764490240877025, -0...","[0.00787295674657887, 0.017643469342674266, 0....","[-0.5064595413745341, -0.14947773198508532, -0...","[0.00789062638682396, 0.007669391706426372, 0....",0.000125,-0.074532,-0.055559,-0.036157,8
18,TrpB3C,3.0,7994.0,99.925,35.0,0.437828,0.207105,1.0,1.0,-0.073327,...,0.0435,"[-0.7129431024521451, -0.676881142400521, -0.4...","[0.0036129353359320897, 0.003617641013231226, ...","[-0.8012949045786242, -0.3009295839772114, -0....","[0.0035276345242041393, 0.003499097774177849, ...",0.000125,-0.120727,-0.073272,-0.026945,8
21,TrpB3D,3.0,7763.0,97.0375,719.0,9.261883,0.078985,0.820029,10.0,0.018834,...,0.020788,"[-0.4327437550162602, -0.3044598574343214, 0.0...","[0.007988036362784934, 0.024487960087067218, 1...","[-0.4399507155545714, -0.11762175754299561, -0...","[0.007441392159327368, 0.007659122367135711, 0...",0.001288,-0.014157,0.005591,0.030403,13
24,TrpB3E,3.0,7964.0,99.55,161.0,2.021597,0.04191,0.610622,5.0,0.002082,...,0.010094,"[-0.16948753940433153, -0.12729633166933874, 0...","[0.009396174163469292, 0.018638792520651776, 2...","[-0.3171567664768063, -0.05056246639934719, -0...","[0.009422634305217661, 0.009137261673449245, 0...",0.000628,-0.009732,0.00092,0.011806,15
27,TrpB3F,3.0,7737.0,96.7125,82.0,1.059842,0.092374,1.0,1.0,-0.02314,...,0.020248,"[-0.36511112988020694, -0.33868962414059, -0.2...","[0.034437567424634365, 0.011899409215382208, 0...","[-0.4663935685487384, -0.16674853131382197, -0...","[0.006074089727591127, 0.005983960305116239, 0...",0.000129,-0.044622,-0.023111,-0.001569,15


In [59]:
(
    mlde_df[
        (mlde_df["zs"] == "ev_score")
        & (mlde_df["encoding"] == "one-hot")
        & (mlde_df["model"] == "boosting")
        & (mlde_df["n_mut_cutoff"] == "all")
        & (mlde_df["n_sample"] == 96)
        & (mlde_df["n_top"] == 96)
    ][["lib", "top_maxes", "top_means", "ndcgs", "rhos", "if_truemaxs"]]
    .groupby("lib")
    .mean()
)

Unnamed: 0_level_0,top_maxes,top_means,ndcgs,rhos,if_truemaxs
lib,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DHFR,0.967801,0.584346,0.923395,0.354666,0.486667
GB1,0.603706,0.108976,0.711909,0.256579,0.033333
ParD2,0.998108,0.882099,0.985698,0.723024,0.833333
ParD3,0.993312,0.921569,0.988419,0.715301,0.493333
TrpB3A,0.460531,-0.008478,0.985587,0.253697,0.26
TrpB3B,0.201805,-0.043848,0.985403,0.116366,0.093333
TrpB3C,0.37172,-0.034676,0.983026,0.169853,0.106667
TrpB3D,0.876876,0.156393,0.975104,0.256152,0.306667
TrpB3E,0.574776,0.033998,0.982386,0.056994,0.38
TrpB3F,0.86676,0.051963,0.983977,0.111943,0.68


In [36]:
simple_corr_df = pd.DataFrame()

for des in [
    "percent_active",
    "parent_rank_percent",
    # "parent_rank",
    "mean",
    "std",
    "range",
    "iqr",
    "std_dev",
    "variance",
    "skewness",
    "kurt",
    "loc",
    "scale",
    "numb_kde_peak",
    "Q1",
    "Q2",
    "Q3",
    "n_locopt",
    "fraction_non-magnitude",
    "fraction_reciprocal-sign",
    "norm_non-magnitude",
    "norm_reciprocal-sign",
]:

    simple_corr_row = {"descriptor": des}

    for val in ['single_step_DE_mean_all', 'single_step_DE_median_all',
       'single_step_DE_mean_top96', 'single_step_DE_median_top96',
       'single_step_DE_mean_top384', 'single_step_DE_median_top384',
       'single_step_DE_fraction_max', 'recomb_SSM_mean_all',
       'recomb_SSM_median_all', 'recomb_SSM_mean_top96',
       'recomb_SSM_median_top96', 'recomb_SSM_mean_top384',
       'recomb_SSM_median_top384', 'recomb_SSM_fraction_max',
       'top96_SSM_mean_all', 'top96_SSM_median_all', 'top96_SSM_mean_top96',
       'top96_SSM_median_top96', 'top96_SSM_mean_top384',
       'top96_SSM_median_top384', 'top96_SSM_fraction_max', 'top_maxes',
       'top_means', 'ndcgs', 'rhos', 'if_truemaxs', 'maxes_Triad',
       'means_Triad', 'ndcgs_Triad', 'rhos_Triad', 'if_truemaxs_Triad',
       'maxes_ev', 'means_ev', 'ndcgs_ev', 'rhos_ev', 'if_truemaxs_ev',
       'maxes_esm', 'means_esm', 'ndcgs_esm', 'rhos_esm', 'if_truemaxs_esm',
       'maxes_esmif', 'means_esmif', 'ndcgs_esmif', 'rhos_esmif', 'if_truemaxs_esmif',
       'n_locopt', 'mlde_single_step_DE_delta', 'mlde_recomb_SSM_delta',
       'mlde_top96_SSM_delta', 'Triad_single_step_DE_delta',
       'Triad_recomb_SSM_delta', 'Triad_top96_SSM_delta',
       'ev_single_step_DE_delta', 'ev_recomb_SSM_delta', 'ev_top96_SSM_delta',
       'esm_single_step_DE_delta', 'esm_recomb_SSM_delta',
       'esm_top96_SSM_delta',
       'esmif_single_step_DE_delta', 'esmif_recomb_SSM_delta',
       'esmif_top96_SSM_delta',
       "delta_ft_mlde", "delta_ft_de"]:
        
        simple_corr_row[val] = spearmanr(simple_merge_pw[des], simple_merge_pw[val])[0]

    simple_corr_df = simple_corr_df._append(
        simple_corr_row,
        ignore_index=True,
    )

display(simple_corr_df.round(3).style.background_gradient(cmap="YlGnBu"))

  simple_corr_row[val] = spearmanr(simple_merge_pw[des], simple_merge_pw[val])[0]
  simple_corr_row[val] = spearmanr(simple_merge_pw[des], simple_merge_pw[val])[0]
  simple_corr_row[val] = spearmanr(simple_merge_pw[des], simple_merge_pw[val])[0]
  simple_corr_row[val] = spearmanr(simple_merge_pw[des], simple_merge_pw[val])[0]
  simple_corr_row[val] = spearmanr(simple_merge_pw[des], simple_merge_pw[val])[0]
  simple_corr_row[val] = spearmanr(simple_merge_pw[des], simple_merge_pw[val])[0]
  simple_corr_row[val] = spearmanr(simple_merge_pw[des], simple_merge_pw[val])[0]
  simple_corr_row[val] = spearmanr(simple_merge_pw[des], simple_merge_pw[val])[0]
  simple_corr_row[val] = spearmanr(simple_merge_pw[des], simple_merge_pw[val])[0]
  simple_corr_row[val] = spearmanr(simple_merge_pw[des], simple_merge_pw[val])[0]
  simple_corr_row[val] = spearmanr(simple_merge_pw[des], simple_merge_pw[val])[0]
  simple_corr_row[val] = spearmanr(simple_merge_pw[des], simple_merge_pw[val])[0]
  simple_corr_ro

Unnamed: 0,descriptor,single_step_DE_mean_all,single_step_DE_median_all,single_step_DE_mean_top96,single_step_DE_median_top96,single_step_DE_mean_top384,single_step_DE_median_top384,single_step_DE_fraction_max,recomb_SSM_mean_all,recomb_SSM_median_all,recomb_SSM_mean_top96,recomb_SSM_median_top96,recomb_SSM_mean_top384,recomb_SSM_median_top384,recomb_SSM_fraction_max,top96_SSM_mean_all,top96_SSM_median_all,top96_SSM_mean_top96,top96_SSM_median_top96,top96_SSM_mean_top384,top96_SSM_median_top384,top96_SSM_fraction_max,top_maxes,top_means,ndcgs,rhos,if_truemaxs,maxes_Triad,means_Triad,ndcgs_Triad,rhos_Triad,if_truemaxs_Triad,maxes_ev,means_ev,ndcgs_ev,rhos_ev,if_truemaxs_ev,maxes_esm,means_esm,ndcgs_esm,rhos_esm,if_truemaxs_esm,maxes_esmif,means_esmif,ndcgs_esmif,rhos_esmif,if_truemaxs_esmif,n_locopt,mlde_single_step_DE_delta,mlde_recomb_SSM_delta,mlde_top96_SSM_delta,Triad_single_step_DE_delta,Triad_recomb_SSM_delta,Triad_top96_SSM_delta,ev_single_step_DE_delta,ev_recomb_SSM_delta,ev_top96_SSM_delta,esm_single_step_DE_delta,esm_recomb_SSM_delta,esm_top96_SSM_delta,esmif_single_step_DE_delta,esmif_recomb_SSM_delta,esmif_top96_SSM_delta,delta_ft_mlde,delta_ft_de
0,percent_active,1.0,-1.0,,,,,-1.0,1.0,1.0,-1.0,,-1.0,-1.0,-1.0,-1.0,-1.0,,,,,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0
1,parent_rank_percent,-1.0,1.0,,,,,1.0,-1.0,-1.0,1.0,,1.0,1.0,1.0,1.0,1.0,,,,,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,1.0
2,mean,1.0,-1.0,,,,,-1.0,1.0,1.0,-1.0,,-1.0,-1.0,-1.0,-1.0,-1.0,,,,,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0
3,std,1.0,-1.0,,,,,-1.0,1.0,1.0,-1.0,,-1.0,-1.0,-1.0,-1.0,-1.0,,,,,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0
4,range,-1.0,1.0,,,,,1.0,-1.0,-1.0,1.0,,1.0,1.0,1.0,1.0,1.0,,,,,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,1.0
5,iqr,1.0,-1.0,,,,,-1.0,1.0,1.0,-1.0,,-1.0,-1.0,-1.0,-1.0,-1.0,,,,,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0
6,std_dev,-1.0,1.0,,,,,1.0,-1.0,-1.0,1.0,,1.0,1.0,1.0,1.0,1.0,,,,,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,1.0
7,variance,-1.0,1.0,,,,,1.0,-1.0,-1.0,1.0,,1.0,1.0,1.0,1.0,1.0,,,,,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,1.0
8,skewness,-1.0,1.0,,,,,1.0,-1.0,-1.0,1.0,,1.0,1.0,1.0,1.0,1.0,,,,,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,1.0
9,kurt,-1.0,1.0,,,,,1.0,-1.0,-1.0,1.0,,1.0,1.0,1.0,1.0,1.0,,,,,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,1.0


In [37]:
display(simple_corr_df[["descriptor", "mlde_recomb_SSM_delta"]].round(3).style.background_gradient(cmap="YlGnBu"))

Unnamed: 0,descriptor,mlde_recomb_SSM_delta
0,percent_active,-1.0
1,parent_rank_percent,1.0
2,mean,-1.0
3,std,-1.0
4,range,1.0
5,iqr,-1.0
6,std_dev,1.0
7,variance,1.0
8,skewness,1.0
9,kurt,1.0


In [38]:
display(simple_corr_df[["descriptor", "delta_ft_mlde"]].round(3).style.background_gradient(cmap="YlGnBu"))

Unnamed: 0,descriptor,delta_ft_mlde
0,percent_active,1.0
1,parent_rank_percent,-1.0
2,mean,1.0
3,std,1.0
4,range,-1.0
5,iqr,1.0
6,std_dev,-1.0
7,variance,-1.0
8,skewness,-1.0
9,kurt,-1.0


In [39]:
display(simple_corr_df[["descriptor", "delta_ft_de"]].round(3).style.background_gradient(cmap="YlGnBu"))

Unnamed: 0,descriptor,delta_ft_de
0,percent_active,-1.0
1,parent_rank_percent,1.0
2,mean,-1.0
3,std,-1.0
4,range,1.0
5,iqr,-1.0
6,std_dev,1.0
7,variance,1.0
8,skewness,1.0
9,kurt,1.0


In [15]:

import bokeh
from bokeh.io import show, export_svg, export_png
from bokeh.plotting import show
from bokeh.themes.theme import Theme

import holoviews as hv
from holoviews import opts, dim

from SSMuLA.vis import LIB_COLORS, save_bokeh_hv


hv.extension("bokeh", "matplotlib")
bokeh.io.output_notebook()


In [13]:
hv.Scatter(simple_merge_pw, "percent_active", ["delta_ft_mlde", "lib"]).opts(
                marker="o", size=10, color=dim("lib").categorize(LIB_COLORS)
            ).groupby("lib").overlay().opts(
                hv.opts.Scatter(
                    height=400,
                    width=500,
                    # marker="o",
                    size=10,
                    tools=["hover"],
                    show_legend=True,
                    color=dim("lib").categorize(LIB_COLORS),
                    legend_position="right",
                    legend_offset=(5, 0),
                    title="test",
                    logx=True,
                )
            )

In [40]:
for fac in tqdm([
    "percent_active",
    "skewness",
    "kurt",
    "numb_kde_peak",
    "n_locopt",
    "fraction_non-magnitude",
    "fraction_reciprocal-sign",
    "norm_non-magnitude",
    "norm_reciprocal-sign",
]):


    for delta_type, subdir in zip(["mlde_recomb_SSM_delta", "delta_ft_mlde"], ["rules", "rules_ft"]):

        if delta_type == "mlde_recomb_SSM_delta":
            title = f"{fac} vs delta MLDE and DE max fitness achieved"
        else:
            title = f"{fac} vs delta ftMLDE and MLDE max fitness achieved"

        for logx in [True, False]:

            if logx:
                plot_path = f"results/{subdir}/logx/96"
            else:
                plot_path = f"results/{subdir}/96"

            save_bokeh_hv(
                plot_obj=hv.Scatter(simple_merge_pw, fac, [delta_type, "lib"]).opts(
                    marker="o", size=10, color=dim("lib").categorize(LIB_COLORS)
                )
                .groupby("lib")
                .overlay()
                .opts(
                    hv.opts.Scatter(
                        height=400,
                        width=500,
                        marker="o",
                        size=10,
                        tools=["hover"],
                        show_legend=True,
                        color=dim("lib").categorize(LIB_COLORS),
                        legend_position="right",
                        legend_offset=(5, -20),
                        title=title,
                        logx=logx,
                    )
                ),
                plot_name=title,
                plot_path=plot_path,
                bokehorhv="hv",
                dpi=300,
                scale=2,
            )

  0%|          | 0/9 [00:00<?, ?it/s]

Making results/rules/logx/96 ...
Making results/rules/96 ...
Making results/rules_ft/logx/96 ...
Making results/rules_ft/96 ...


100%|██████████| 9/9 [00:33<00:00,  3.73s/it]
