In [1]:
%cd ~/SSMuLA

/disk2/fli/SSMuLA


In [2]:
%load_ext autoreload
%autoreload 2
%load_ext blackcellmagic

In [3]:
# Data manipulation
import pandas as pd
import numpy as np

# Basic plotting
import holoviews as hv
from bokeh.io import export_svg
from bokeh.models import NumeralTickFormatter

# Imports for AUC/ROC
from sklearn.metrics import roc_curve, auc

from bokeh.themes.theme import Theme

import panel as pn
pn.config.comms = "vscode"


# Correlation

# Large data plotting
from holoviews.operation.datashader import rasterize

hv.extension('bokeh')

from SSMuLA.vis import JSON_THEME, one_decimal_x, one_decimal_y, fixmargins

hv.renderer('bokeh').theme = JSON_THEME


In [4]:
# combine zs scores and fitness
triad = pd.read_csv("triad/DHFR/DHFR.csv")
triad.columns

Index(['AAs', 'Triad_score', 'Triad_rank'], dtype='object')

In [5]:
ev_esm = pd.read_csv("ev_esm/DHFR/DHFR.csv")
ev_esm.columns

Index(['muts', 'fit', 'split', 'seq', 'combo', 'pos', 'esm_score', 'n_mut',
       'ev_score'],
      dtype='object')

In [6]:
triad

Unnamed: 0,AAs,Triad_score,Triad_rank
0,HIF,-646.11885,1
1,HVF,-646.04873,2
2,QVF,-645.85691,3
3,TIF,-645.82673,4
4,FIF,-645.77301,5
...,...,...,...
7995,CPP,-598.55896,7996
7996,VPP,-598.08691,7997
7997,PRP,-597.97744,7998
7998,IPP,-596.24364,7999


In [7]:
ev_esm

Unnamed: 0,muts,fit,split,seq,combo,pos,esm_score,n_mut,ev_score
0,D27A:L28A,0.128071,double,MISLIAALAVDRVIGMENAMPWNLPAAAAWFKRNTLNKPVIMGRHT...,"['A', 'A']","[27, 28]",5.825695,2,-15.584210
1,D27A:L28C,0.126441,double,MISLIAALAVDRVIGMENAMPWNLPAACAWFKRNTLNKPVIMGRHT...,"['A', 'C']","[27, 28]",4.896708,2,-19.656645
2,D27A:L28D,0.126975,double,MISLIAALAVDRVIGMENAMPWNLPAADAWFKRNTLNKPVIMGRHT...,"['A', 'D']","[27, 28]",14.738744,2,-19.750032
3,D27A:L28E,0.125828,double,MISLIAALAVDRVIGMENAMPWNLPAAEAWFKRNTLNKPVIMGRHT...,"['A', 'E']","[27, 28]",11.558820,2,-17.422083
4,D27A:L28F,0.125645,double,MISLIAALAVDRVIGMENAMPWNLPAAFAWFKRNTLNKPVIMGRHT...,"['A', 'F']","[27, 28]",3.085036,2,-13.377442
...,...,...,...,...,...,...,...,...,...
7995,A26Y:D27Y:L28S,0.128348,multi,MISLIAALAVDRVIGMENAMPWNLPYYSAWFKRNTLNKPVIMGRHT...,"['Y', 'Y', 'S']","[26, 27, 28]",-26.788731,3,-21.890776
7996,A26Y:D27Y:L28T,0.126103,multi,MISLIAALAVDRVIGMENAMPWNLPYYTAWFKRNTLNKPVIMGRHT...,"['Y', 'Y', 'T']","[26, 27, 28]",-28.418034,3,-21.997396
7997,A26Y:D27Y:L28V,0.127120,multi,MISLIAALAVDRVIGMENAMPWNLPYYVAWFKRNTLNKPVIMGRHT...,"['Y', 'Y', 'V']","[26, 27, 28]",-26.637584,3,-22.079033
7998,A26Y:D27Y:L28W,0.122598,multi,MISLIAALAVDRVIGMENAMPWNLPYYWAWFKRNTLNKPVIMGRHT...,"['Y', 'Y', 'W']","[26, 27, 28]",-29.665802,3,-22.602000


In [8]:
ev_esm[ev_esm["muts"]=="WT"]

Unnamed: 0,muts,fit,split,seq,combo,pos,esm_score,n_mut,ev_score
49,WT,0.231325,single,MISLIAALAVDRVIGMENAMPWNLPADLAWFKRNTLNKPVIMGRHT...,['NA'],,,1,


In [9]:
fit_df = pd.read_csv("data/DHFR/scale2max/DHFR.csv")
fit_df.columns

Index(['AAs', 'AA1', 'AA2', 'AA3', 'fitness', 'active', 'muts'], dtype='object')

In [10]:
fit_df

Unnamed: 0,AAs,AA1,AA2,AA3,fitness,active,muts
0,***,*,*,*,0.126525,False,A26*:D27*:L28*
1,**A,*,*,A,0.123933,False,A26*:D27*:L28A
2,**C,*,*,C,0.123492,False,A26*:D27*:L28C
3,**D,*,*,D,0.125148,False,A26*:D27*:L28D
4,**E,*,*,E,0.124846,False,A26*:D27*:L28E
...,...,...,...,...,...,...,...
9256,YYS,Y,Y,S,0.128348,False,A26Y:D27Y:L28S
9257,YYT,Y,Y,T,0.126103,False,A26Y:D27Y:L28T
9258,YYV,Y,Y,V,0.127120,False,A26Y:D27Y:L28V
9259,YYW,Y,Y,W,0.122598,False,A26Y:D27Y:L28W


In [11]:
# merge the dataframes
df_no_stop = fit_df[~fit_df["AAs"].str.contains("\*")]
zs_df = pd.merge(pd.merge(df_no_stop, ev_esm.drop(columns=["fit", "seq", "combo"]), on="muts"), triad, on="AAs")
zs_df

Unnamed: 0,AAs,AA1,AA2,AA3,fitness,active,muts,split,pos,esm_score,n_mut,ev_score,Triad_score,Triad_rank
0,AAA,A,A,A,0.128071,False,D27A:L28A,double,"[27, 28]",5.825695,2,-15.584210,-635.53820,3285
1,AAC,A,A,C,0.126441,False,D27A:L28C,double,"[27, 28]",4.896708,2,-19.656645,-635.81210,3131
2,AAD,A,A,D,0.126975,False,D27A:L28D,double,"[27, 28]",14.738744,2,-19.750032,-634.26480,4108
3,AAE,A,A,E,0.125828,False,D27A:L28E,double,"[27, 28]",11.558820,2,-17.422083,-635.58947,3256
4,AAF,A,A,F,0.125645,False,D27A:L28F,double,"[27, 28]",3.085036,2,-13.377442,-642.37668,388
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7995,YYS,Y,Y,S,0.128348,False,A26Y:D27Y:L28S,multi,"[26, 27, 28]",-26.788731,3,-21.890776,-633.87757,4368
7996,YYT,Y,Y,T,0.126103,False,A26Y:D27Y:L28T,multi,"[26, 27, 28]",-28.418034,3,-21.997396,-634.12676,4204
7997,YYV,Y,Y,V,0.127120,False,A26Y:D27Y:L28V,multi,"[26, 27, 28]",-26.637584,3,-22.079033,-634.44773,3988
7998,YYW,Y,Y,W,0.122598,False,A26Y:D27Y:L28W,multi,"[26, 27, 28]",-29.665802,3,-22.602000,-637.60729,2075


In [12]:
#  add rank column for each score
for zs in ["esm", "ev"]:
    zs_df[f"{zs}_rank"] = zs_df[f"{zs}_score"].rank(ascending=False)

In [13]:
zs_df

Unnamed: 0,AAs,AA1,AA2,AA3,fitness,active,muts,split,pos,esm_score,n_mut,ev_score,Triad_score,Triad_rank,esm_rank,ev_rank
0,AAA,A,A,A,0.128071,False,D27A:L28A,double,"[27, 28]",5.825695,2,-15.584210,-635.53820,3285,613.0,813.0
1,AAC,A,A,C,0.126441,False,D27A:L28C,double,"[27, 28]",4.896708,2,-19.656645,-635.81210,3131,694.0,2309.0
2,AAD,A,A,D,0.126975,False,D27A:L28D,double,"[27, 28]",14.738744,2,-19.750032,-634.26480,4108,165.0,2379.5
3,AAE,A,A,E,0.125828,False,D27A:L28E,double,"[27, 28]",11.558820,2,-17.422083,-635.58947,3256,274.0,1336.0
4,AAF,A,A,F,0.125645,False,D27A:L28F,double,"[27, 28]",3.085036,2,-13.377442,-642.37668,388,887.0,362.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7995,YYS,Y,Y,S,0.128348,False,A26Y:D27Y:L28S,multi,"[26, 27, 28]",-26.788731,3,-21.890776,-633.87757,4368,7768.0,4252.0
7996,YYT,Y,Y,T,0.126103,False,A26Y:D27Y:L28T,multi,"[26, 27, 28]",-28.418034,3,-21.997396,-634.12676,4204,7867.0,4376.0
7997,YYV,Y,Y,V,0.127120,False,A26Y:D27Y:L28V,multi,"[26, 27, 28]",-26.637584,3,-22.079033,-634.44773,3988,7761.0,4544.0
7998,YYW,Y,Y,W,0.122598,False,A26Y:D27Y:L28W,multi,"[26, 27, 28]",-29.665802,3,-22.602000,-637.60729,2075,7913.0,5007.0


In [14]:
from SSMuLA.fitness_process_vis import LibData

In [37]:
class ZS_df(LibData):
    """
    A class to process and visualize the ZS data
    """

    def __init__(
        self,
        input_csv: str,
        scale_fit: str,
        ev_esm_folder: str = "ev_esm",
        triad_folder: str = "triad",
    ) -> None:

        """
        Args:
        - input_csv, str: path to the input csv file
        - scale_fit, str: ways to scale the fitness
            'parent' means the parent fitness = 1
            'max' means max fitness = 1
        """

        super().__init__(input_csv, scale_fit)

        self._scale_fit = scale_fit
        self._ev_esm_folder = ev_esm_folder
        self._triad_folder = triad_folder

    @property
    def df_no_stop(self) -> pd.DataFrame:
        """
        Returns the dataframe without stop codons
        """
        df = self.input_df[~self.input_df["AAs"].str.contains("\*")].copy()
        # Add rank column for fitness
        df["fitness_rank"] = df["fitness"].rank(ascending=False)
        return df

    @property
    def ev_esm_df(self) -> pd.DataFrame:
        """
        Returns the dataframe with the ESM and EV scores
        """

        df = pd.read_csv(f"{self._ev_esm_folder}/{self.lib_name}/{self.lib_name}.csv")

        # find parent row
        parent_row = df[df["muts"] == "WT"]

        for zs in ["esm", "ev"]:

            parent_zs = df.loc[parent_row.index, f"{zs}_score"].values[0]

            if np.isnan(parent_zs):
                df.loc[parent_row.index, f"{zs}_score"] = 0
            
            # Add rank column for each score
            df[f"{zs}_rank"] = df[f"{zs}_score"].rank(ascending=False)

        # prevent duplicates
        if "active" in df.columns:
            df = df.drop(columns=["active"])

        return df.drop(columns=["fit", "seq", "combo"]).copy()

    @property
    def triad_df(self) -> pd.DataFrame:

        """
        Returns the dataframe with the triad scores
        """

        df = pd.read_csv(f"{self._triad_folder}/{self.lib_name}/{self.lib_name}.csv")
        # flip triad score
        df["Triad_score"] = df["Triad_score"] * -1

        return df.copy()

    @property
    def zs_df(self) -> pd.DataFrame:
        """
        Returns the dataframe with the ZS scores
        """
        return pd.merge(
            pd.merge(self.df_no_stop, self.ev_esm_df, on="muts"),
            self.triad_df,
            on="AAs",
        ).copy()

In [38]:
trpb = ZS_df(input_csv="data/TrpB/scale2max/TrpB4.csv", scale_fit="max").zs_df
trpb

Unnamed: 0,AAs,AA1,AA2,AA3,AA4,# Stop,fitness,active,muts,fitness_rank,split,pos,esm_score,n_mut,ev_score,esm_rank,ev_rank,Triad_score,Triad_rank
0,TAAA,T,A,A,A,0.0,0.033674,False,V183T:F184A:V227A:S228A,23152.0,multi,"[183, 184, 227, 228]",6.204131,4,-15.195247,36484.0,33142.0,1427.63220,1640
1,TAAC,T,A,A,C,0.0,0.014304,False,V183T:F184A:V227A:S228C,71435.0,multi,"[183, 184, 227, 228]",5.983361,4,-17.400536,37170.0,53186.5,1423.36305,3835
2,TAAD,T,A,A,D,0.0,-0.003646,False,V183T:F184A:V227A:S228D,131753.0,multi,"[183, 184, 227, 228]",1.475934,4,-16.008246,52717.0,40133.0,1424.91372,2945
3,TAAE,T,A,A,E,0.0,0.030855,False,V183T:F184A:V227A:S228E,26741.0,multi,"[183, 184, 227, 228]",4.285971,4,-15.893011,42625.0,39049.0,1421.96361,4678
4,TAAF,T,A,A,F,0.0,0.004310,False,V183T:F184A:V227A:S228F,111471.0,multi,"[183, 184, 227, 228]",3.990672,4,-17.400536,43628.0,53186.5,1428.05818,1490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15930,WYYS,W,Y,Y,S,0.0,0.001865,False,V183W:F184Y:V227Y,118906.0,multi,"[183, 184, 227]",-5.799557,3,-23.469699,83057.0,124423.5,1427.36091,1750
15931,WYYT,W,Y,Y,T,0.0,0.012376,False,V183W:F184Y:V227Y:S228T,79773.0,multi,"[183, 184, 227, 228]",-8.237480,4,-26.340889,93727.0,151748.5,1424.72629,3037
15932,WYYV,W,Y,Y,V,0.0,0.003855,False,V183W:F184Y:V227Y:S228V,112953.0,multi,"[183, 184, 227, 228]",-2.150585,4,-26.980332,67317.0,154845.5,1422.87240,4149
15933,WYYW,W,Y,Y,W,0.0,0.015704,False,V183W:F184Y:V227Y:S228W,65641.0,multi,"[183, 184, 227, 228]",-15.023504,4,-26.470016,121159.0,152425.5,1424.94976,2922


In [48]:
gb1[gb1["fitness"]<0]

Unnamed: 0,AAs,fitness,active,muts,fitness_rank,split,pos,esm_score,n_mut,ev_score,esm_rank,ev_rank,Triad_score,Triad_rank


In [39]:
gb1 = ZS_df(input_csv="data/GB1/scale2max/GB1.csv", scale_fit="max").zs_df
gb1

Unnamed: 0,AAs,fitness,active,muts,fitness_rank,split,pos,esm_score,n_mut,ev_score,esm_rank,ev_rank,Triad_score,Triad_rank
0,VDGV,0.114130,True,WT,3644.0,single,,0.000000,1,0.000000,73797.0,1.0,190.86905,17
1,ADGV,0.007066,True,V39A,13482.0,single,[39],0.882028,1,-6.846682,63685.0,78.5,185.23132,280
2,CDGV,0.027646,True,V39C,8295.0,single,[39],-1.579453,1,-6.846682,90987.0,78.5,186.18468,149
3,DDGV,0.000739,False,V39D,47275.0,single,[39],1.118805,1,-6.846682,61072.0,78.5,183.11447,624
4,EDGV,0.003734,True,V39E,17053.0,single,[39],0.630723,1,-6.846682,66452.0,78.5,183.74956,510
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149356,YYYR,0.000154,False,V39Y:D40Y:G41Y:V54R,112286.0,multi,"[39, 40, 41, 54]",1.428794,4,-28.470377,57922.0,111920.0,142.21230,128384
149357,YYYS,0.000505,False,V39Y:D40Y:G41Y:V54S,61661.5,multi,"[39, 40, 41, 54]",3.801755,4,-28.470377,34334.0,111920.0,159.58563,79654
149358,YYYT,0.002420,True,V39Y:D40Y:G41Y:V54T,21029.5,multi,"[39, 40, 41, 54]",5.224649,4,-28.470377,23403.0,111920.0,160.73711,71573
149359,YYYW,0.001043,False,V39Y:D40Y:G41Y:V54W,36917.0,multi,"[39, 40, 41, 54]",-0.977727,4,-28.470377,84420.0,111920.0,157.12225,94039


In [36]:
from scipy.stats import spearmanr

In [42]:
gb1_rank_df = gb1[["fitness_rank", "esm_rank", "ev_rank", "Triad_rank"]]
gb1_rank_df

Unnamed: 0,fitness_rank,esm_rank,ev_rank,Triad_rank
0,3644.0,73797.0,1.0,17
1,13482.0,63685.0,78.5,280
2,8295.0,90987.0,78.5,149
3,47275.0,61072.0,78.5,624
4,17053.0,66452.0,78.5,510
...,...,...,...,...
149356,112286.0,57922.0,111920.0,128384
149357,61661.5,34334.0,111920.0,79654
149358,21029.5,23403.0,111920.0,71573
149359,36917.0,84420.0,111920.0,94039


In [46]:

rasterize(
            hv.Scatter(gb1_rank_df, kdims=["fitness_rank"], vdims=["ev_rank"])
        ).opts(
            cmap='magma', 
            hooks=[fixmargins,one_decimal_y], 
            ylabel='fitness',
            height=400, width=400
        )

BokehModel(combine_events=True, render_bundle={'docs_json': {'0ff4e7f8-c44a-45d3-8480-331d9e747031': {'version…

In [47]:

rasterize(
            hv.Scatter(gb1_rank_df, kdims=["fitness_rank"], vdims=["Triad_rank"])
        ).opts(
            cmap='magma', 
            hooks=[fixmargins,one_decimal_y], 
            ylabel='fitness',
            height=400, width=400
        )

BokehModel(combine_events=True, render_bundle={'docs_json': {'62964411-d0c7-4cc6-8a93-45dd84a8c8f4': {'version…

In [40]:
melt_df = gb1[["fitness_rank", "esm_rank", "ev_rank", "Triad_rank"]].melt()
melt_df

Unnamed: 0,variable,value
0,fitness_rank,3644.0
1,fitness_rank,13482.0
2,fitness_rank,8295.0
3,fitness_rank,47275.0
4,fitness_rank,17053.0
...,...,...
597439,Triad_rank,128384.0
597440,Triad_rank,79654.0
597441,Triad_rank,71573.0
597442,Triad_rank,94039.0


In [35]:
hv.Violin(melt_df, kdims="variable", vdims="value").opts(
    width=600, height=400, box_fill_color="black", # violin_fill_color="blue"
)

### Compute AUC/ROC for each predictor

In [18]:
hv.extension('bokeh')

In [50]:
from sklearn.metrics import ndcg_score
from scipy.stats import spearmanr

In [51]:
def ndcg_scale(true: np.ndarray, pred: np.ndarray):
    """Calculate the ndcg_score with neg correction"""
    if min(true) < 0:
        true = true - min(true)
    return ndcg_score(true[None, :], pred[None, :])

In [53]:
y_true

array([ True,  True,  True, ...,  True, False, False])

In [54]:
y_score

array([190.86905, 185.23132, 186.18468, ..., 160.73711, 157.12225,
       156.3036 ])

In [55]:
spearmanr(y_true, y_score)

SignificanceResult(statistic=0.3149629884874878, pvalue=0.0)

In [56]:
ndcg_scale(y_true, y_score)

0.9320680831322815

In [71]:
import os
from SSMuLA.vis import save_bokeh_hv

In [73]:
protein = "GB1"
lib = "GB1"
df = gb1
zs_opts = ["Triad_score", "ev_score", "esm_score"]
zs_opt_legend = {"Triad_score": "Triad", "ev_score": "EVMutation", "esm_score": "ESM"}

zs_coord_dict = {zs: {} for zs in zs_opts}

roc_plots = []

for zs in zs_opts:

    print(f"number of nan in {protein} {zs}: {np.sum(np.isnan(y_score))}")

    df = df.dropna(subset=[zs])
    y_true = df["active"].values
    y_score = df[zs].values

    pos_label = True

    # calc rho and ndcg
    zs_coord_dict[zs]["rho"] = spearmanr(y_true, y_score)[0]
    zs_coord_dict[zs]["ndcg"] = ndcg_scale(y_true, y_score)

    # roc curves
    # roc name
    roc_name = f"{lib} fitness vs zero-shot predictor ROC curves"

    fpr, tpr, thresholds = roc_curve(y_true, y_score, pos_label=pos_label)
    temp = pd.DataFrame({"False Positive Rate": fpr, "True Positive Rate": tpr})

    roc_plots.append(
        hv.Curve(
            temp,
            kdims=["False Positive Rate"],
            vdims=["True Positive Rate"],
            label=zs_opt_legend[zs],
        ).opts(
            frame_height=350,
            frame_width=350,
            xlim=(0, 1),
            ylim=(0, 1),
            hooks=[one_decimal_x, one_decimal_y, fixmargins],
            color=hv.Cycle("Category10"),
        )
    )

    roc_auc = auc(fpr, tpr)
    zs_coord_dict[zs]["rocauc"] = roc_auc

roc_plots.append(
    hv.Curve(((0, 1), (0, 1)), "False Positive Rate", "True Positive Rate",).opts(
        color="grey",
        line_dash="dashed",
    )
)

roc = hv.Overlay(roc_plots).opts(
    height=350,
    width=350,
    legend_position="bottom_right",
    xlim=(0, 1),
    ylim=(0, 1),
    hooks=[one_decimal_x, one_decimal_y, fixmargins],
    title=roc_name,
)

save_bokeh_hv(roc,
    plot_name=roc_name,
    plot_path=os.path.join("results/zs_vis", "roc"))

for k, v in zs_coord_dict.items():
    print(f"{k}: {v}")

roc

number of nan in GB1 Triad_score: 0
number of nan in GB1 ev_score: 0
number of nan in GB1 esm_score: 0




Triad_score: {'rho': 0.3149629884874878, 'ndcg': 0.9320680831322815, 'rocauc': 0.7156315512889493}
ev_score: {'rho': 0.18761262540875015, 'ndcg': 0.9014297766587999, 'rocauc': 0.6276901116005121}
esm_score: {'rho': -0.2154606717571286, 'ndcg': 0.8287194346740119, 'rocauc': 0.35249022713434386}


In [20]:
def determine_frac_active(df: pd.DataFrame, zs, score):
    passes = df[df[zs]>score]
    try:
        frac_active = sum(passes['active']) / len(passes)
    except ZeroDivisionError:
        frac_active = 0
        
    frac_library = len(passes) / len(df)
    mean_fitness = passes['fitness'].mean()
    max_fitness = passes['fitness'].max()

    return frac_active, frac_library, mean_fitness, max_fitness

(determine_frac_active(trpb, 'ev_score', -20),
determine_frac_active(gb1, 'ev_score', -15))

((0.08670267114289884,
  0.4299341073109507,
  0.027875091127936073,
  0.7561222228029201),
 (0.5298324478953821,
  0.032766250895481415,
  0.03674039772452413,
  0.7077333663997645))

In [129]:
from SSMuLA.vis import PRESENTATION_PALETTE_SATURATE

In [140]:
def plot_samples(df, protein, zs):
    
    xs = np.linspace(min(df[zs]), max(df[zs]), 100)
    
    samples = np.array([
        determine_frac_active(df, zs, thresh)
        for thresh in xs
    ]).T
    
    p = rasterize(
        hv.Scatter(
            df, 
            kdims=zs, 
            vdims='fitness'
        )
    ).opts(
        # cmap=["black", PRESENTATION_PALETTE_SATURATE["yellow"]], 
        cmap=["black", "#F1D384"],
        # cmap = "Viridis",
        hooks=[fixmargins,one_decimal_y], 
        ylabel='fitness',
    )
    
    p = p * hv.Curve(
        (xs, samples[0]),
        label='fraction active'
    ).opts(color=hv.Cycle('Category10'))*hv.Curve(
        (xs, samples[2]),
        label='mean fitness'
    ).opts(line_dash='dashed', color=hv.Cycle('Category10'))
    
    p = p.opts(
        frame_height=200,
        frame_width=300,
        show_legend=True,
        legend_position='right',
        legend_offset=(5, 0),
        ylabel='fraction or fitness',
        xlabel=f'{zs}',
        title=protein,
    )
    
    return p

In [141]:
TrpB_EVmutation = plot_samples(trpb, 'TrpB', 'ev_score')
TrpB_esm = plot_samples(trpb, 'TrpB', 'esm_score')
TrpB_Triad = plot_samples(trpb, 'TrpB', 'Triad_score')

In [108]:

GB1_EVmutation = plot_samples(gb1, 'GB1', 'ev_score')
GB1_esm = plot_samples(gb1, 'GB1', 'esm_score')
GB1_Triad = plot_samples(gb1, 'GB1', 'Triad_score')

In [142]:
TrpB_EVmutation

BokehModel(combine_events=True, render_bundle={'docs_json': {'3648e4b3-1bf7-4e08-a913-a33fdf1a51d8': {'version…

In [27]:
TrpB_esm

BokehModel(combine_events=True, render_bundle={'docs_json': {'30dba48b-a241-44fa-8d1f-ac06276756d1': {'version…

In [97]:
TrpB_Triad

BokehModel(combine_events=True, render_bundle={'docs_json': {'cfe6f477-1dc4-4b0a-8a5a-ba5ba6489b6a': {'version…

In [96]:
(
    GB1_Triad.opts(show_legend=False, title="GB1")
    + GB1_esm.opts(show_legend=False)
    + GB1_EVmutation
).opts(shared_axes=False)

BokehModel(combine_events=True, render_bundle={'docs_json': {'a06c4fd9-0c08-4c8e-bf46-c74813edf90a': {'version…

In [15]:
folder = '../../../data/output_figures/'

# ROC plot
plot = hv.render(ROC, backend='bokeh')
plot.output_backend = "svg"

filename=f'{folder}figure4a_ROC.svg'
export_svg(plot, filename=filename)

# Zero-shot predictor plots
plot=hv.render(TrpB_EVmutation, backend='bokeh')
plot.output_backend = "svg"

filename=f'{folder}figure4b_TrpB_EVmutation.svg'
export_svg(plot, filename=filename)

plot=hv.render(TrpB_Triad, backend='bokeh')
plot.output_backend = "svg"

filename=f'{folder}figure4b_TrpB_Triad.svg'
export_svg(plot, filename=filename)

plot=hv.render(GB1_EVmutation, backend='bokeh')
plot.output_backend = "svg"

filename=f'{folder}figure4c_GB1_EVmutation.svg'
export_svg(plot, filename=filename)

plot=hv.render(GB1_Triad, backend='bokeh')
plot.output_backend = "svg"

filename=f'{folder}figure4c_GB1_Triad.svg'
export_svg(plot, filename=filename)

['../../../data/output_figures/figure4c_GB1_Triad.svg']

In [16]:
import os
os.system('jupyter nbconvert --to html predictor_analysis.ipynb')

[NbConvertApp] Converting notebook predictor_analysis.ipynb to html
  {%- elif type == 'text/vnd.mermaid' -%}
[NbConvertApp] Writing 2664211 bytes to predictor_analysis.html


0