In [1]:
%cd ~/SSMuLA

/disk2/fli/SSMuLA


In [2]:
%load_ext autoreload
%autoreload 2
%load_ext blackcellmagic

In [3]:
import os
import numpy as np
import pandas as pd
import json
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import LogNorm
import pickle


font = {'family' : 'arial',
        'size'   : 16}
mpl.rc('font', **font)
mpl.rc('lines', linewidth=2)
mpl.rcParams['axes.linewidth'] = 2
mpl.rcParams['xtick.major.width'] = 2
mpl.rcParams['ytick.major.width'] = 2

# General imports
import glob
import os
import re
import pickle
import datetime

# Data manipulation
#import growth_analysis as ga
import pandas as pd
#from multiprocesspandas import applyparallel
import numpy as np
from sklearn.impute import KNNImputer

# Basic plotting
import holoviews as hv
import bokeh
from bokeh.io import export_svg
from bokeh.models import NumeralTickFormatter

from bokeh.themes.theme import Theme

import panel as pn
pn.config.comms = "vscode"

# Making graphs
import matplotlib.pyplot as plt
import itertools
import tqdm
from multiprocessing import Pool
from operator import itemgetter

hv.extension('bokeh')

In [20]:
"""A function for parsing the mlde results"""

from __future__ import annotations

import os
import re
import itertools
from glob import glob
from tqdm import tqdm
from copy import deepcopy

import numpy as np
import pandas as pd
import json
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import LogNorm
import pickle

# Basic plotting
import holoviews as hv
import bokeh
from bokeh.io import export_svg
from bokeh.models import NumeralTickFormatter


import panel as pn

pn.config.comms = "vscode"

# Making graphs
import matplotlib.pyplot as plt

hv.extension("bokeh")


from SSMuLA.landscape_global import n_mut_cutoff_dict
from SSMuLA.vis import save_bokeh_hv
from SSMuLA.util import checkNgen_folder, get_file_name


default_metrics = ["maxes", "means", "ndcgs", "rhos"]


class MLDEParser:
    """A class for parsing EACH mlde result npy file"""

    def __init__(
        self, 
        mlde_npy_path: str, 
        mlde_results_dir: str = "results/mlde/saved"
    ):

        """
        Args:
        - mlde_npy: str, the path to the mlde npy file
            ie. 'results/mlde/saved/none/none-double/scale2max/GB1/one-hot_boosting|ridge_sample384_top96.npy'
        - mlde_results_dir: str, the directory where the mlde results are saved

        Note:
        
        {'data_config': {'input_csv': 'results/zs_comb/none/scale2max/GB1.csv',
            'zs_predictor': 'none',
            'encoding': ['one-hot'],
            'ft_libs': [149361],
            'scale_fit': 'max',
            'filter_min_by': 'none',
            'n_mut_cutoff': 2},
            'model_config': {'model_classes': ['boosting', 'ridge']},
            'train_config': {'n_sample': [384],
            'n_splits': 5,
            'n_replicate': 100,
            'n_worker': 1,
            'global_seed': 42,
            'verbose': False,
            'save_model': False},
            'eval_config': {'n_top': 96}}

        """

        self._mlde_npy_path = mlde_npy_path
        self._mlde_results_dir = mlde_results_dir

        # get all npy keys as properties
        # should be ['config', 'top_seqs', 'maxes', 'means', 'ndcgs', 'rhos', 'unique', 'labelled', 'y_preds']
        for attr, val in self.npy_item.items():
            setattr(self, attr, val)
        
        if not hasattr(self, 'config'):
            print(f"no config found for {self._mlde_npy_path}")
            pass

        # set all config_dict keys as properties
        # should be ['data_config', 'model_config', 'train_config', 'eval_config']
        for attr, val in self.config.items():
            setattr(self, attr, val)
            for k, v in val.items():
                setattr(self, k, v)
                if isinstance(v, list):
                    setattr(self, f"{k}_len", len(v))

        self._metric_df = self._get_metric_df()    
            
    def _get_metric_df(self) -> pd.DataFrame:
        """Return the metric df"""

        # set up df for all metrics
        metric_df = pd.DataFrame(
                    {
                        "encoding": self.encoding_index,
                        "models": self.models_index,
                        "n_sample": self.n_sample_index,
                        "ft_lib": self.lib_index,
                        "repeats": self.repeats_index,
                    }
                )
        
        metric_df['encoding'] = metric_df['encoding'].map({i: v for i, v in enumerate(self.encoding)})
        metric_df['models'] = metric_df['models'].map({i: v for i, v in enumerate(self.model_classes)})
        metric_df['n_sample'] = metric_df['n_sample'].map({i: v for i, v in enumerate(self.n_sample)})
        metric_df['ft_lib'] = metric_df['ft_lib'].map({i: v for i, v in enumerate(self.ft_libs)})
        metric_df["n_mut_cutoff"] = n_mut_cutoff_dict[self.n_mut_cutoff]
        metric_df["lib"] = get_file_name(self.input_csv)
        metric_df["zs"] = self.zs_predictor
        metric_df["n_top"] = self.n_top
        
        # get all metrics as properties
        for m in default_metrics:
            m_array = getattr(self, m)
            # get rid of nan col
            try:
                metric_df[m] = m_array[:, ~np.isnan(m_array).any(axis=0)].flatten()
            except:
                print(self._mlde_npy_path, m_array.shape, m_array[:, ~np.isnan(m_array).any(axis=0)].shape)

        return metric_df

    @property
    def npy_item(self) -> dict:
        """Return the npy item"""
        return np.load(self._mlde_npy_path, allow_pickle=True).item()

    @property
    def npy_item_keys(self) -> list[str]:
        """Return the keys of the npy item"""
        return deepcopy(list(self.npy_item.keys()))

    @property
    def output_shape(self) -> tuple:

        """
        Return the shape of the output for maxes, means, ndcgs, rhos, unique, and labelled

            len(encodings),
            len(model_classes),
            len(n_samples),
            len(ft_libs),
            n_replicate,
        """

        return (
            self.encoding_len,
            self.model_classes_len,
            self.n_sample_len,
            self.ft_libs_len,
            self.n_replicate,
        )

    @property
    def top_seq_output_shape(self) -> tuple:

        """
        Return the shape of the output for top_seqs

            len(encodings),
            len(model_classes),
            len(n_samples),
            len(ft_libs),
            n_replicate,
            n_top,
        """

        return (
            self.encoding_len,
            self.model_classes_len,
            self.n_sample_len,
            self.ft_libs_len,
            self.n_replicate,
            self.n_top,
        )

    @property
    def encoding_index(self) -> np.ndarray:
        """Return the encoding index"""
        return np.array(
            [
                [i]
                * self.output_shape[1]
                * self.output_shape[2]
                * self.output_shape[3]
                * self.output_shape[4]
                for i in range(self.output_shape[0])
            ]
        ).flatten()

    @property
    def models_index(self) -> np.ndarray:
        """Return the models index"""
        return np.array(
            [
                [i] * self.output_shape[2] * self.output_shape[3] * self.output_shape[4]
                for i in range(self.output_shape[1])
                for _ in range(self.output_shape[0])
            ]
        ).flatten()

    @property
    def n_sample_index(self) -> np.ndarray:
        """Return the n_sample index"""
        return np.array(
            [
                [i] * self.output_shape[3] * self.output_shape[4]
                for i in range(self.output_shape[2])
                for _ in range(self.output_shape[1])
                for _ in range(self.output_shape[0])
            ]
        ).flatten()

    @property
    def lib_index(self) -> np.ndarray:
        """Return the lib index"""
        return np.array(
            [
                [i] * self.output_shape[4]
                for i in range(self.output_shape[3])
                for _ in range(self.output_shape[2])
                for _ in range(self.output_shape[1])
                for _ in range(self.output_shape[0])
            ]
        ).flatten()

    @property
    def repeats_index(self) -> np.ndarray:
        """Return the repeats index"""
        return np.array(
            [
                i
                for i in range(self.output_shape[4])
                for _ in range(self.output_shape[3])
                for _ in range(self.output_shape[2])
                for _ in range(self.output_shape[1])
                for _ in range(self.output_shape[0])
            ]
        )

    @property
    def metric_df(self) -> pd.DataFrame:
        """Return the metric df"""
        return self._metric_df

In [5]:
gb1_d_parser = MLDEParser(
        mlde_npy_path="results/mlde/saved/none/none-double/scale2max/GB1/one-hot_boosting|ridge_sample384_top96.npy", 
        # mlde_results_dir: str = "results/mlde/saved"
    )

In [6]:
gb1_d_parser.metric_df

Unnamed: 0,encoding,models,n_sample,ft_lib,repeats,n_mut_cutoff,lib,zs,n_top,maxes,means,ndcgs,rhos
0,one-hot,boosting,384,2168,0,double,GB1,none,96,0.614051,0.297802,0.801888,0.321052
1,one-hot,boosting,384,2168,0,double,GB1,none,96,0.658760,0.260686,0.789838,0.289313
2,one-hot,boosting,384,2168,1,double,GB1,none,96,0.658760,0.180876,0.755609,0.245132
3,one-hot,boosting,384,2168,1,double,GB1,none,96,0.598350,0.288863,0.787997,0.304373
4,one-hot,boosting,384,2168,2,double,GB1,none,96,0.741846,0.300828,0.791546,0.303807
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,one-hot,ridge,384,2168,47,double,GB1,none,96,0.565201,0.187707,0.771074,0.253966
96,one-hot,ridge,384,2168,48,double,GB1,none,96,0.825004,0.262832,0.769807,0.246895
97,one-hot,ridge,384,2168,48,double,GB1,none,96,0.598350,0.104960,0.749747,0.214962
98,one-hot,ridge,384,2168,49,double,GB1,none,96,0.615242,0.167068,0.750999,0.232590


In [7]:
def get_all_metric_df(mlde_results_dir: str = "results/mlde/saved") -> pd.DataFrame:
    """Return the metric df for all mlde results"""
    mlde_npy_paths = sorted(glob(f"{mlde_results_dir}/**/*.npy", recursive=True))
    # one-hot needs redo
    mlde_parsers = [MLDEParser(mlde_npy_path) for mlde_npy_path in tqdm(mlde_npy_paths) if "save_old_all" not in mlde_npy_path]
    return pd.concat([mlde_parser.metric_df for mlde_parser in mlde_parsers])

In [8]:
MLDEParser("results/mlde/saved/Triad_score/none-double/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy")

<__main__.MLDEParser at 0x7fba6c6d82e0>

In [9]:
dhfr_oh_s = MLDEParser("results/mlde_test/saved/none/none-single/scale2max/DHFR/one-hot_boosting|ridge_sample384_top96.npy")

In [10]:
dhfr_oh_s.config

{'data_config': {'input_csv': 'results/zs_comb/none/scale2max/DHFR.csv',
  'zs_predictor': 'none',
  'encoding': ['one-hot'],
  'ft_libs': [58],
  'scale_fit': 'max',
  'filter_min_by': 'none',
  'n_mut_cutoff': 1},
 'model_config': {'model_classes': ['boosting', 'ridge']},
 'train_config': {'n_sample': [384],
  'n_splits': 5,
  'n_replicate': 6,
  'boosting_n_worker': 1,
  'global_seed': 42,
  'verbose': False,
  'save_model': False},
 'eval_config': {'n_top': 96}}

In [11]:
dhfr_oh_s.rhos

array([[[[[0.60320719, 0.67373085, 0.67928259, 0.65834313, 0.67825187,
           0.68106933]]],


        [[[0.44764637, 0.54637353, 0.54318   , 0.480978  , 0.54897341,
           0.53554981]]]]])

In [12]:
dhfr_oh_s.metric_df

Unnamed: 0,encoding,models,n_sample,ft_lib,repeats,n_mut_cutoff,lib,zs,n_top,maxes,means,ndcgs,rhos
0,one-hot,boosting,384,58,0,single,DHFR,none,96,0.865538,0.634493,0.921594,0.603207
1,one-hot,boosting,384,58,0,single,DHFR,none,96,1.0,0.491384,0.89305,0.673731
2,one-hot,boosting,384,58,1,single,DHFR,none,96,1.0,0.459916,0.882704,0.679283
3,one-hot,boosting,384,58,1,single,DHFR,none,96,0.865538,0.608262,0.931114,0.658343
4,one-hot,boosting,384,58,2,single,DHFR,none,96,1.0,0.476355,0.883015,0.678252
5,one-hot,boosting,384,58,2,single,DHFR,none,96,1.0,0.490893,0.889487,0.681069
6,one-hot,ridge,384,58,3,single,DHFR,none,96,0.996537,0.563208,0.888266,0.447646
7,one-hot,ridge,384,58,3,single,DHFR,none,96,1.0,0.518441,0.897178,0.546374
8,one-hot,ridge,384,58,4,single,DHFR,none,96,1.0,0.507379,0.900229,0.54318
9,one-hot,ridge,384,58,4,single,DHFR,none,96,0.996537,0.543148,0.892386,0.480978


In [13]:
dhfr_s = np.load("results/mlde/saved/Triad_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy", allow_pickle=True).item()

In [14]:
dhfr_s["config"]

{'data_config': {'input_csv': 'results/zs_comb/none/scale2max/DHFR.csv',
  'zs_predictor': 'Triad_score',
  'encoding': ['esm2_t33_650M_UR50D-flatten_site',
   'esm2_t33_650M_UR50D-mean_all',
   'esm2_t33_650M_UR50D-mean_site'],
  'ft_libs': [58],
  'scale_fit': 'max',
  'filter_min_by': 'none',
  'n_mut_cutoff': 1},
 'model_config': {'model_classes': ['boosting', 'ridge']},
 'train_config': {'n_sample': [384],
  'n_splits': 5,
  'n_replicate': 50,
  'boosting_n_worker': 1,
  'global_seed': 42,
  'verbose': False,
  'save_model': False},
 'eval_config': {'n_top': 384}}

In [15]:
dhfr_s["ndcgs"][0][0][0][0]

array([0.91038108, 0.91038108, 0.91038108, 0.91038108, 0.91038108,
       0.91038108, 0.91038108, 0.91038108, 0.91038108, 0.91038108,
       0.91038108, 0.91038108, 0.91038108, 0.91038108, 0.91038108,
       0.91038108, 0.91038108, 0.91038108, 0.91038108, 0.91038108,
       0.91038108, 0.91038108, 0.91038108, 0.91038108, 0.91038108,
       0.91038108, 0.91038108, 0.91038108, 0.91038108, 0.91038108,
       0.91038108, 0.91038108, 0.91038108, 0.91038108, 0.91038108,
       0.91038108, 0.91038108, 0.91038108, 0.91038108, 0.91038108,
       0.91038108, 0.91038108, 0.91038108, 0.91038108, 0.91038108,
       0.91038108, 0.91038108, 0.91038108, 0.91038108, 0.91038108])

In [16]:
all_df = get_all_metric_df()
all_df

  0%|          | 0/672 [00:00<?, ?it/s]

 15%|█▍        | 100/672 [00:09<01:10,  8.16it/s]

results/mlde/saved/Triad_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_bo

 16%|█▌        | 109/672 [00:11<01:19,  7.09it/s]

results/mlde/saved/Triad_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-me

 17%|█▋        | 117/672 [00:11<00:44, 12.57it/s]

results/mlde/saved/Triad_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-me

 19%|█▉        | 128/672 [00:12<00:28, 19.36it/s]

results/mlde/saved/Triad_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-me

 20%|██        | 135/672 [00:12<00:22, 23.83it/s]

results/mlde/saved/Triad_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-me

 21%|██        | 139/672 [00:12<00:20, 26.48it/s]

results/mlde/saved/Triad_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/Triad_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-me

 36%|███▌      | 242/672 [00:23<01:01,  6.94it/s]

results/mlde/saved/esm_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|rid

 36%|███▋      | 245/672 [00:24<01:07,  6.28it/s]

results/mlde/saved/esm_score/none-single/scale2max/GB1/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/GB1/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/GB1/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/GB1/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/GB1/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sa

 38%|███▊      | 253/672 [00:25<00:56,  7.39it/s]

results/mlde/saved/esm_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_bo

 39%|███▉      | 261/672 [00:25<00:30, 13.33it/s]

results/mlde/saved/esm_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_bo

 40%|████      | 269/672 [00:25<00:19, 20.23it/s]

results/mlde/saved/esm_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_bo

 41%|████      | 277/672 [00:26<00:15, 25.50it/s]

results/mlde/saved/esm_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_bo

 42%|████▏     | 281/672 [00:26<00:14, 27.64it/s]

results/mlde/saved/esm_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_bo

 42%|████▏     | 285/672 [00:26<00:28, 13.55it/s]

results/mlde/saved/esm_score/none-single/scale2max/TrpB4/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB4/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB4/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB4/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/esm_score/none-single/scale2max/TrpB4/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boostin

 57%|█████▋    | 385/672 [00:36<00:39,  7.26it/s]

results/mlde/saved/ev_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/DHFR/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sa

 58%|█████▊    | 389/672 [00:37<00:40,  7.00it/s]

results/mlde/saved/ev_score/none-single/scale2max/GB1/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/GB1/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/GB1/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/GB1/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/GB1/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample3

 59%|█████▉    | 397/672 [00:38<00:36,  7.60it/s]

results/mlde/saved/ev_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3A/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boostin

 60%|██████    | 405/672 [00:39<00:20, 13.18it/s]

results/mlde/saved/ev_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3C/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boostin

 61%|██████▏   | 413/672 [00:39<00:13, 19.51it/s]

results/mlde/saved/ev_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3E/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boostin

 63%|██████▎   | 421/672 [00:39<00:09, 25.93it/s]

results/mlde/saved/ev_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3G/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boostin

 63%|██████▎   | 425/672 [00:39<00:08, 28.16it/s]

results/mlde/saved/ev_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB3I/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boostin

 64%|██████▍   | 429/672 [00:40<00:17, 13.72it/s]

results/mlde/saved/ev_score/none-single/scale2max/TrpB4/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB4/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB4/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB4/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|ridge_sample384_top384.npy (3, 2, 1, 3, 50) (3, 300)
results/mlde/saved/ev_score/none-single/scale2max/TrpB4/esm2_t33_650M_UR50D-flatten_site|esm2_t33_650M_UR50D-mean_all|esm2_t33_650M_UR50D-mean_site_boosting|rid

100%|██████████| 672/672 [00:46<00:00, 14.61it/s]


Unnamed: 0,encoding,models,n_sample,ft_lib,repeats,n_mut_cutoff,lib,zs,n_top,maxes,means,ndcgs,rhos
0,esm2_t33_650M_UR50D-flatten_site,boosting,384,4000,0,all,DHFR,Triad_score,384,1.000000,0.502199,0.921660,0.792539
1,esm2_t33_650M_UR50D-flatten_site,boosting,384,4000,0,all,DHFR,Triad_score,384,1.000000,0.468277,0.918007,0.737994
2,esm2_t33_650M_UR50D-flatten_site,boosting,384,4000,0,all,DHFR,Triad_score,384,0.996537,0.486061,0.915259,0.801123
3,esm2_t33_650M_UR50D-flatten_site,boosting,384,4000,0,all,DHFR,Triad_score,384,1.000000,0.449828,0.912758,0.655603
4,esm2_t33_650M_UR50D-flatten_site,boosting,384,4000,0,all,DHFR,Triad_score,384,1.000000,0.480492,0.928671,0.781353
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,one-hot,ridge,384,77,47,single,TrpB4,none,96,0.739434,0.287961,0.978963,0.322322
96,one-hot,ridge,384,77,48,single,TrpB4,none,96,0.749107,0.300604,0.979479,0.325815
97,one-hot,ridge,384,77,48,single,TrpB4,none,96,0.749107,0.333802,0.980334,0.329226
98,one-hot,ridge,384,77,49,single,TrpB4,none,96,0.749107,0.343322,0.980304,0.325455


In [17]:
all_df[
        (all_df["zs"] == "none")
        & (all_df["encoding"] != "one-hot")
        & (all_df["models"] == "boosting")
        & (all_df["n_top"] == 96)
    ].drop_duplicates()

Unnamed: 0,encoding,models,n_sample,ft_lib,repeats,n_mut_cutoff,lib,zs,n_top,maxes,means,ndcgs,rhos
0,esm2_t33_650M_UR50D-flatten_site,boosting,384,8000,0,all,DHFR,none,96,1.000000,0.600809,0.938639,0.806137
1,esm2_t33_650M_UR50D-flatten_site,boosting,384,8000,0,all,DHFR,none,96,1.000000,0.625357,0.939710,0.827160
2,esm2_t33_650M_UR50D-flatten_site,boosting,384,8000,0,all,DHFR,none,96,1.000000,0.621703,0.944101,0.847372
3,esm2_t33_650M_UR50D-flatten_site,boosting,384,8000,0,all,DHFR,none,96,1.000000,0.655619,0.955815,0.861880
4,esm2_t33_650M_UR50D-flatten_site,boosting,384,8000,0,all,DHFR,none,96,1.000000,0.608112,0.946087,0.833112
...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,esm2_t33_650M_UR50D-mean_all,boosting,384,77,24,single,TrpB4,none,96,0.688532,0.231604,0.978231,0.307402
146,esm2_t33_650M_UR50D-mean_all,boosting,384,77,24,single,TrpB4,none,96,0.705307,0.260310,0.977805,0.301282
147,esm2_t33_650M_UR50D-mean_all,boosting,384,77,24,single,TrpB4,none,96,0.806523,0.265796,0.978030,0.303354
148,esm2_t33_650M_UR50D-mean_all,boosting,384,77,24,single,TrpB4,none,96,0.794516,0.336492,0.977319,0.296668


In [18]:
all_df[
        (all_df["zs"] == "none")
        & (all_df["encoding"] == "one-hot")
        & (all_df["models"] == "boosting")
        & (all_df["n_top"] == 96)
    ]

Unnamed: 0,encoding,models,n_sample,ft_lib,repeats,n_mut_cutoff,lib,zs,n_top,maxes,means,ndcgs,rhos
0,one-hot,boosting,384,8000,0,all,DHFR,none,96,1.000000,0.551468,0.931935,0.816554
1,one-hot,boosting,384,8000,0,all,DHFR,none,96,1.000000,0.626697,0.945184,0.866116
2,one-hot,boosting,384,8000,1,all,DHFR,none,96,1.000000,0.647137,0.956728,0.850640
3,one-hot,boosting,384,8000,1,all,DHFR,none,96,1.000000,0.672220,0.964112,0.878829
4,one-hot,boosting,384,8000,2,all,DHFR,none,96,1.000000,0.679334,0.960879,0.841705
...,...,...,...,...,...,...,...,...,...,...,...,...,...
45,one-hot,boosting,384,77,22,single,TrpB4,none,96,0.658552,0.238493,0.976693,0.301108
46,one-hot,boosting,384,77,23,single,TrpB4,none,96,0.739434,0.381419,0.980915,0.341004
47,one-hot,boosting,384,77,23,single,TrpB4,none,96,0.752895,0.461929,0.982687,0.349185
48,one-hot,boosting,384,77,24,single,TrpB4,none,96,0.749107,0.442314,0.981853,0.347037


In [34]:
from SSMuLA.landscape_global import n_mut_cutoff_dict, LIB_NAMES

In [38]:
def lib_ncut_hook(plot, element):
   
    plot.handles['plot'].x_range.factors = [(lib, n_mut) for lib in LIB_NAMES for n_mut in ["single", "double", "all"]]
    plot.handles['xaxis'].major_label_text_font_size = '0pt'
    # plot.handles['xaxis'].group_text_font_size = '0pt'
    # plot.handles['yaxis'].axis_label_text_font_size = '10pt'
    # plot.handles['yaxis'].axis_label_text_font_style = 'normal'
    # plot.handles['xaxis'].axis_label_text_font_style = 'normal'

plot_name = "No ZS, One-hot, Boosting, 96, rhos"
save_bokeh_hv(
    hv.Violin(
        all_df[
            (all_df["zs"] == "none")
            & (all_df["encoding"] == "one-hot")
            & (all_df["models"] == "boosting")
            & (all_df["n_top"] == 96)
        ].sort_values(["lib", "n_mut_cutoff"], ascending =[True, False]).copy(),
        kdims=["lib", "n_mut_cutoff"],
        vdims=["rhos"],
    ).opts(
        width=1200,
        height=400,
        violin_color="n_mut_cutoff",
        show_legend=True,
        legend_position="top",
        legend_offset=(0, 5),
        title=plot_name,
        ylim=(0,1),
        # violin_color=hv.dim("n_mut_cuttoff").str(),
        hooks=[one_decimal_x, one_decimal_y, fixmargins, lib_ncut_hook],
    ),
    plot_name=plot_name,
    plot_path="results/mlde/vis-test",
)

### Let's do just onehot s2m and d2m no zs

In [4]:
from glob import glob

In [11]:
mlde_d_onehot_96 = sorted(glob("results/mlde/saved/none/none-double/scale2max/*/one-hot*96.npy"))
mlde_d_onehot_96

['results/mlde/saved/none/none-double/scale2max/DHFR/one-hot_boosting|ridge_sample384_top96.npy',
 'results/mlde/saved/none/none-double/scale2max/GB1/one-hot_boosting|ridge_sample384_top96.npy',
 'results/mlde/saved/none/none-double/scale2max/TrpB3A/one-hot_boosting|ridge_sample384_top96.npy',
 'results/mlde/saved/none/none-double/scale2max/TrpB3B/one-hot_boosting|ridge_sample384_top96.npy',
 'results/mlde/saved/none/none-double/scale2max/TrpB3C/one-hot_boosting|ridge_sample384_top96.npy',
 'results/mlde/saved/none/none-double/scale2max/TrpB3D/one-hot_boosting|ridge_sample384_top96.npy',
 'results/mlde/saved/none/none-double/scale2max/TrpB3E/one-hot_boosting|ridge_sample384_top96.npy',
 'results/mlde/saved/none/none-double/scale2max/TrpB3F/one-hot_boosting|ridge_sample384_top96.npy',
 'results/mlde/saved/none/none-double/scale2max/TrpB3G/one-hot_boosting|ridge_sample384_top96.npy',
 'results/mlde/saved/none/none-double/scale2max/TrpB3H/one-hot_boosting|ridge_sample384_top96.npy',
 'res

In [32]:
lib_dets = "double"

In [13]:
gb1_d_onehot_96 = np.load("results/mlde/saved/none/none-double/scale2max/GB1/one-hot_boosting|ridge_sample384_top96.npy", allow_pickle=True)
gb1_d_onehot_96

array({'config': {'data_config': {'input_csv': 'results/zs_comb/none/scale2max/GB1.csv', 'zs_predictor': 'none', 'encoding': ['one-hot'], 'ft_libs': [149361], 'scale_fit': 'max', 'filter_min_by': 'none', 'n_mut_cutoff': 2}, 'model_config': {'model_classes': ['boosting', 'ridge']}, 'train_config': {'n_sample': [384], 'n_splits': 5, 'n_replicate': 100, 'n_worker': 1, 'global_seed': 42, 'verbose': False, 'save_model': False}, 'eval_config': {'n_top': 96}}, 'top_seqs': array([[[[[['FFGM', 'IFGM', 'TFGM', ..., 'NYGM', 'TWGL', 'TWGV'],
           ['LWGF', 'FFGM', 'MFGM', ..., 'FFGW', 'IYGI', 'FYGQ'],
           ['LWGF', 'VWGF', 'TWGF', ..., 'WWFH', 'LWSF', 'LWRF'],
           ...,
           ['LYGF', 'LYGM', 'LYGA', ..., 'TYFG', 'TYGM', 'TYMG'],
           ['CYGL', 'IYGL', 'CYGC', ..., 'CHGV', 'CWAG', 'LYAG'],
           ['TWCA', 'LHCA', 'VYGC', ..., 'LPCA', 'PDCA', 'TRCA']]]],



        [[[['LWGC', 'LWGM', 'LFGC', ..., 'LHAC', 'RWGM', 'LHAM'],
           ['LFGC', 'LFGF', 'LYGC', ..., 'CYGC

In [14]:
gb1_d_onehot_96.item().keys()

dict_keys(['config', 'top_seqs', 'maxes', 'means', 'ndcgs', 'rhos', 'unique', 'labelled', 'y_preds'])

In [47]:
gb1_d_onehot_96.item()["unique"]

array([[[[[384., 384., 384., 384., 384., 384., 384., 384., 384., 384.,
           384., 384., 384., 384., 384., 384., 384., 384., 384., 384.,
           384., 384., 384., 384., 384., 384., 384., 384., 384., 384.,
           384., 384., 384., 384., 384., 384., 384., 384., 384., 384.,
           384., 384., 384., 384., 384., 384., 384., 384., 384., 384.,
           384., 384., 384., 384., 384., 384., 384., 384., 384., 384.,
           384., 384., 384., 384., 384., 384., 384., 384., 384., 384.,
           384., 384., 384., 384., 384., 384., 384., 384., 384., 384.,
           384., 384., 384., 384., 384., 384., 384., 384., 384., 384.,
           384., 384., 384., 384., 384., 384., 384., 384., 384., 384.]]],


        [[[384., 384., 384., 384., 384., 384., 384., 384., 384., 384.,
           384., 384., 384., 384., 384., 384., 384., 384., 384., 384.,
           384., 384., 384., 384., 384., 384., 384., 384., 384., 384.,
           384., 384., 384., 384., 384., 384., 384., 384., 384., 384.,
 

In [19]:
gb1_d_onehot_96.item()["config"]

{'data_config': {'input_csv': 'results/zs_comb/none/scale2max/GB1.csv',
  'zs_predictor': 'none',
  'encoding': ['one-hot'],
  'ft_libs': [149361],
  'scale_fit': 'max',
  'filter_min_by': 'none',
  'n_mut_cutoff': 2},
 'model_config': {'model_classes': ['boosting', 'ridge']},
 'train_config': {'n_sample': [384],
  'n_splits': 5,
  'n_replicate': 100,
  'n_worker': 1,
  'global_seed': 42,
  'verbose': False,
  'save_model': False},
 'eval_config': {'n_top': 96}}

In [15]:
gb1_d_onehot_96.item()["maxes"].shape, gb1_d_onehot_96.item()["means"].shape, gb1_d_onehot_96.item()["top_seqs"].shape, gb1_d_onehot_96.item()["y_preds"].shape

((1, 2, 1, 1, 100),
 (1, 2, 1, 1, 100),
 (1, 2, 1, 1, 100, 96),
 (1, 2, 1, 1, 100, 149361))

In [49]:
gb1_d_onehot_96.item().keys()

dict_keys(['config', 'top_seqs', 'maxes', 'means', 'ndcgs', 'rhos', 'unique', 'labelled', 'y_preds'])

In [50]:
gb1_d_onehot_96.item()["unique"].shape

(1, 2, 1, 1, 100)

In [51]:
gb1_d_onehot_96.item()["labelled"].shape

(1, 2, 1, 1, 100)

In [17]:
gb1_d_onehot_96.item()["maxes"][0][0].flatten().shape

(100,)

In [16]:
gb1_d_onehot_96.item()["maxes"][0][1]

array([[[0.68791511, 0.65875997, 0.65875997, 0.62093539, 0.59681201,
         0.68958016, 1.        , 0.68958016, 0.64126117, 0.91819031,
         0.65875997, 0.64660666, 0.51195701, 0.69890765, 0.91819031,
         0.65875997, 0.62093539, 0.91819031, 0.79071951, 0.82500438,
         0.91819031, 0.64126117, 1.        , 0.62093539, 0.65875997,
         0.61524237, 0.59835001, 0.82500438, 0.65875997, 0.91819031,
         0.62093539, 0.82500438, 0.68791511, 0.62093539, 0.82500438,
         0.59835001, 0.82500438, 0.62712818, 0.62712818, 1.        ,
         0.91819031, 0.91819031, 0.62093539, 0.82500438, 0.59053442,
         0.59681201, 0.82500438, 0.59835001, 0.61524237, 0.62093539,
         0.65875997, 0.69890765, 0.69890765, 0.65875997, 0.59681201,
         1.        , 0.69890765, 0.62093539, 0.65489649, 0.91819031,
         0.59835001, 0.62712818, 0.862211  , 0.51195701, 0.91819031,
         0.65875997, 0.64126117, 0.62712818, 0.79071951, 0.65875997,
         0.62093539, 0.61524237, 0

In [17]:
len(gb1_d_onehot_96.item()["top_seqs"][0][0][0][0][0])

96

In [18]:
gb1_d_onehot_96.item()["top_seqs"][0][0][0][0][0]

array(['FFGM', 'IFGM', 'TFGM', 'MFGM', 'LFGM', 'FYGC', 'LYGC', 'CFGM',
       'FYGM', 'MYGC', 'RFGM', 'YFGM', 'EFGM', 'KFGM', 'TYGC', 'QFGM',
       'VFGM', 'LWGF', 'IYGC', 'HFGM', 'WFGM', 'VYGC', 'LWGA', 'LYGM',
       'SFGM', 'AFGM', 'MYGM', 'NFGM', 'TWGA', 'CYGC', 'LWGM', 'TWGF',
       'FWGM', 'RYGC', 'YYGC', 'EYGC', 'GFGM', 'TWGM', 'KYGC', 'QYGC',
       'WYGC', 'LWGN', 'WFFM', 'DFGM', 'LWGS', 'TYGM', 'LWGI', 'LWGC',
       'TWGC', 'IFGC', 'AYGC', 'SYGC', 'IFGA', 'LWGL', 'LWGT', 'FWGF',
       'NYGC', 'VYGM', 'IFGF', 'CYGM', 'LWGV', 'LWGG', 'LWGW', 'FWGA',
       'IYGM', 'RYGM', 'YYGM', 'EYGM', 'KYGM', 'PYGC', 'LWGD', 'QYGM',
       'LWGY', 'HYGC', 'DYGC', 'WYGM', 'HYGM', 'FWGC', 'GYGC', 'IFGI',
       'TWGS', 'TFGC', 'TFGA', 'MHGA', 'LWGH', 'AYGM', 'SYGM', 'TWGI',
       'LWGQ', 'TFGF', 'FWGS', 'WYFM', 'MHGF', 'NYGM', 'TWGL', 'TWGV'],
      dtype='<U4')

In [48]:
arr.shape

(1, 2, 1, 1, 100)

In [36]:
# get 'maxes', 'means', 'ndcgs', 'rhos' to df

metrics = ["maxes", "means", "ndcgs", "rhos"]

arr = gb1_d_onehot_96.item()["maxes"].copy()
# Flatten the array
flattened_arr = arr.flatten()

# Create index arrays for each dimension
encoding_index = np.array([[i] * arr.shape[1] * arr.shape[2] * arr.shape[3] * arr.shape[4] for i in range(arr.shape[0])]).flatten()
models_index = np.array([[i] * arr.shape[2] * arr.shape[3] * arr.shape[4] for i in range(arr.shape[1]) for _ in range(arr.shape[0])]).flatten()
n_sample_index = np.array([[i] * arr.shape[3] * arr.shape[4] for i in range(arr.shape[2]) for _ in range(arr.shape[1]) for _ in range(arr.shape[0])]).flatten()
lib_index = np.array([[i] * arr.shape[4] for i in range(arr.shape[3]) for _ in range(arr.shape[2]) for _ in range(arr.shape[1]) for _ in range(arr.shape[0])]).flatten()
repeats_index = np.array([i for i in range(arr.shape[4]) for _ in range(arr.shape[3]) for _ in range(arr.shape[2]) for _ in range(arr.shape[1]) for _ in range(arr.shape[0])])

# Create DataFrame
df = pd.DataFrame({
    'Encoding': encoding_index,
    'Models': models_index,
    'N_Sample': n_sample_index,
    'Lib': lib_index,
    'Repeats': repeats_index,
    'maxes': flattened_arr
})

for m in metrics[1:]:
    arr = gb1_d_onehot_96.item()[m].copy()
    # Flatten the array
    flattened_arr = arr.flatten()

    # Create DataFrame
    df[m] = flattened_arr

In [37]:
df

Unnamed: 0,Encoding,Models,N_Sample,Lib,Repeats,maxes,means,ndcgs,rhos
0,0,0,0,0,0,0.614051,0.297161,0.797368,0.324864
1,0,0,0,0,0,0.658760,0.314371,0.772456,0.262550
2,0,0,0,0,1,0.723981,0.195821,0.756840,0.242435
3,0,0,0,0,1,0.598350,0.297064,0.772549,0.267467
4,0,0,0,0,2,0.741846,0.279394,0.790558,0.298536
...,...,...,...,...,...,...,...,...,...
195,0,1,0,0,97,0.918190,0.203111,0.774212,0.260462
196,0,1,0,0,98,0.658760,0.340319,0.785797,0.255417
197,0,1,0,0,98,0.825004,0.253331,0.793878,0.290284
198,0,1,0,0,99,0.918190,0.259793,0.781041,0.264912


In [38]:
gb1_d_onehot_96_config = gb1_d_onehot_96.item()["config"]
gb1_d_onehot_96_config

{'data_config': {'input_csv': 'results/zs_comb/none/scale2max/GB1.csv',
  'zs_predictor': 'none',
  'encoding': ['one-hot'],
  'ft_libs': [149361],
  'scale_fit': 'max',
  'filter_min_by': 'none',
  'n_mut_cutoff': 2},
 'model_config': {'model_classes': ['boosting', 'ridge']},
 'train_config': {'n_sample': [384],
  'n_splits': 5,
  'n_replicate': 100,
  'n_worker': 1,
  'global_seed': 42,
  'verbose': False,
  'save_model': False},
 'eval_config': {'n_top': 96}}

In [39]:
# Map integer indices to labels

df['Encoding'] = df['Encoding'].map({i: v for i, v in enumerate(gb1_d_onehot_96_config["data_config"]["encoding"])})
df['Models'] = df['Models'].map({i: v for i, v in enumerate(gb1_d_onehot_96_config["model_config"]["model_classes"])})
df['N_Sample'] = df['N_Sample'].map({i: v for i, v in enumerate(gb1_d_onehot_96_config["train_config"]["n_sample"])})
# df['Lib'] = df['Lib'].map({i: v for i, v in enumerate(gb1_d_onehot_96_config["data_config"]["ft_libs"])})
df["Libs"] = lib_dets
df


Unnamed: 0,Encoding,Models,N_Sample,Lib,Repeats,maxes,means,ndcgs,rhos
0,one-hot,boosting,384,double,0,0.614051,0.297161,0.797368,0.324864
1,one-hot,boosting,384,double,0,0.658760,0.314371,0.772456,0.262550
2,one-hot,boosting,384,double,1,0.723981,0.195821,0.756840,0.242435
3,one-hot,boosting,384,double,1,0.598350,0.297064,0.772549,0.267467
4,one-hot,boosting,384,double,2,0.741846,0.279394,0.790558,0.298536
...,...,...,...,...,...,...,...,...,...
195,one-hot,ridge,384,double,97,0.918190,0.203111,0.774212,0.260462
196,one-hot,ridge,384,double,98,0.658760,0.340319,0.785797,0.255417
197,one-hot,ridge,384,double,98,0.825004,0.253331,0.793878,0.290284
198,one-hot,ridge,384,double,99,0.918190,0.259793,0.781041,0.264912


In [None]:
df

In [28]:
#MLDE violin plots
# Plot Hooks
def one_decimal_x(plot,element):
    plot.handles['plot'].xaxis[0].formatter = NumeralTickFormatter(format="0.0")

def one_decimal_y(plot,element):
    plot.handles['plot'].yaxis[0].formatter = NumeralTickFormatter(format="0.0")

def fixmargins(plot,element):
    plot.handles['plot'].min_border_right=30
    plot.handles['plot'].min_border_left=65
    plot.handles['plot'].min_border_top=20
    plot.handles['plot'].min_border_bottom=65
    plot.handles['plot'].outline_line_color='black'
    plot.handles['plot'].outline_line_alpha=1
    plot.handles['plot'].outline_line_width=1
    plot.handles['plot'].toolbar.autohide = True
    

In [45]:
hv.Violin(df, kdims=["Models", "Lib"], vdims=["rhos"])# .opts(hooks=[one_decimal_x, one_decimal_y, fixmargins])

In [46]:
hv.Violin(df, kdims=["Models", "Lib"], vdims=["ndcgs"])# .opts(hooks=[one_decimal_x, one_decimal_y, fixmargins])

In [None]:
# plot all doubles and singles with boht models and all libraries

In [22]:
GB1_tri.item()["maxes"][0][0].shape

(1, 3, 100)

In [23]:
GB1_tri.item()["maxes"][0][0][0][0]

array([0.99653688, 0.99653688, 0.86553772, 1.        , 0.99653688,
       1.        , 1.        , 1.        , 1.        , 0.99653688,
       1.        , 0.86553772, 0.87359575, 0.99653688, 0.99653688,
       1.        , 1.        , 0.86553772, 0.99653688, 1.        ,
       1.        , 1.        , 1.        , 1.        , 0.95900307,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 0.85784747, 1.        , 0.99653688, 1.        ,
       0.87359575, 1.        , 0.99653688, 1.        , 0.99653688,
       1.        , 0.98253018, 1.        , 0.85784747, 1.        ,
       1.        , 0.99653688, 1.        , 0.86553772, 0.86553772,
       1.        , 0.99653688, 0.99653688, 0.86553772, 1.        ,
       1.        , 0.99653688, 0.99653688, 1.        , 0.98555511,
       0.95900307, 1.        , 1.        , 1.        , 0.87359575,
       1.        , 1.        , 1.        , 0.98253018, 1.        ,
       1.        , 0.86553772, 1.        , 1.        , 1.     

In [24]:
GB1_tri.item()["maxes"][0][0][0][1]

array([0.99653688, 0.99653688, 0.86553772, 1.        , 0.99653688,
       1.        , 1.        , 1.        , 1.        , 0.99653688,
       1.        , 0.86553772, 0.87359575, 0.99653688, 0.99653688,
       1.        , 1.        , 0.86553772, 0.99653688, 1.        ,
       1.        , 1.        , 1.        , 1.        , 0.95900307,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 0.85784747, 1.        , 0.99653688, 1.        ,
       0.87359575, 1.        , 0.99653688, 1.        , 0.99653688,
       1.        , 0.98253018, 1.        , 0.85784747, 1.        ,
       1.        , 0.99653688, 1.        , 0.86553772, 0.86553772,
       1.        , 0.99653688, 0.99653688, 0.86553772, 1.        ,
       1.        , 0.99653688, 0.99653688, 1.        , 0.98555511,
       0.95900307, 1.        , 1.        , 1.        , 0.87359575,
       1.        , 1.        , 1.        , 0.98253018, 1.        ,
       1.        , 0.86553772, 1.        , 1.        , 1.     

In [25]:
GB1_tri.item()["maxes"][0][0][0][2]

array([0.99653688, 0.99653688, 0.86553772, 1.        , 0.99653688,
       1.        , 1.        , 1.        , 1.        , 0.99653688,
       1.        , 0.86553772, 0.87359575, 0.99653688, 0.99653688,
       1.        , 1.        , 0.86553772, 0.99653688, 1.        ,
       1.        , 1.        , 1.        , 1.        , 0.95900307,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 0.85784747, 1.        , 0.99653688, 1.        ,
       0.87359575, 1.        , 0.99653688, 1.        , 0.99653688,
       1.        , 0.98253018, 1.        , 0.85784747, 1.        ,
       1.        , 0.99653688, 1.        , 0.86553772, 0.86553772,
       1.        , 0.99653688, 0.99653688, 0.86553772, 1.        ,
       1.        , 0.99653688, 0.99653688, 1.        , 0.98555511,
       0.95900307, 1.        , 1.        , 1.        , 0.87359575,
       1.        , 1.        , 1.        , 0.98253018, 1.        ,
       1.        , 0.86553772, 1.        , 1.        , 1.     

In [8]:
gb1_d2m = np.load("results/mlde/saved/esm_score/none-single/scale2max/DHFR/one-hot_boosting|ridge_sample384_top96.npy", allow_pickle=True)
gb1_d2m.item().keys()

dict_keys(['top_seqs', 'maxes', 'means', 'ndcgs', 'unique', 'labelled', 'y_preds'])

In [10]:
gb1_d2m.item()["maxes"].shape, gb1_d2m.item()["top_seqs"].shape, gb1_d2m.item()["y_preds"].shape

((1, 2, 1, 3, 100), (1, 2, 1, 3, 100, 96), (1, 2, 1, 3, 100, 8000))

In [None]:
# model

In [43]:

data = pd.DataFrame.from_dict(maxes2)
data = data.melt(var_name='Landscape', value_name='max_fitness')

MLDE_fig = hv.Violin(
    data.sort_index(ascending=False), ['Landscape'], 'max_fitness'
).opts(
    # split='protein',
    frame_height=300,
    frame_width=300,
    violin_width=0.8,
    fontscale=1.3,
    hooks=[fixmargins],
    # show_legend=True,
    # inner=None,
    cut = 0,
    # legend_position='top',
    ylabel='Max Fitness Achieved',
    violin_color=hv.dim('Landscape').str(),
    cmap = 'Category10',
    xlabel = 'Protein',
    ylim = (0,1)
)

MLDE_fig

In [44]:
data

Unnamed: 0,Landscape,max_fitness
0,GB1,0.796777
1,GB1,0.614051
2,GB1,0.526876
3,GB1,0.741846
4,GB1,0.475515
...,...,...
345,GB1,0.918190
346,GB1,1.000000
347,GB1,1.000000
348,GB1,0.862211
