In [66]:
import os
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from torch.utils.data import DataLoader, Dataset

import math
import random
import pandas as pd
import numpy as np
import warnings
from tqdm import tqdm, trange
from copy import deepcopy
import scipy.stats as stats
from scipy.stats import spearmanr, pearsonr
from sklearn import preprocessing
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import KFold
seed = 1337
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
warnings.filterwarnings("ignore")

def r2(x,y):
    slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
    return r_value**2

def performances(label, pred):
    
    r = r2(label, pred)
    try:
        pearson_r = pearsonr(label, pred)[0]
    except:
        pearson_r = 1e-9
    try:
        sp_cor = spearmanr(label, pred)[0]
    except:
        sp_cor = 1e-9
    
    print(f'r-squared = {r:.4f} | pearson r = {pearson_r:.4f} | spearman R = {sp_cor:.4f}')
        
    return [r, pearson_r, sp_cor]


In [67]:
global bins, labels
bins = np.arange(24, 105, 20)
labels = [ '25-44' , '45-64', '65-84', '85-100']

def results_labeled(e_test):
    if 'len' not in e_test.columns: e_test['len'] = [len(s) for s in e_test.utr]
    e_test['rng'] = pd.cut(e_test['len'], bins=bins)
    e_test['rng'] = e_test['rng'].astype('str')

    df_24_44=e_test[e_test['rng']=='(24, 44]']
    df_44_64=e_test[e_test['rng']=='(44, 64]']
    df_64_84=e_test[e_test['rng']=='(64, 84]']
    df_84_104=e_test[e_test['rng']=='(84, 104]']
    return df_24_44, df_44_64, df_64_84, df_84_104

def flatten_df(df):
    stacked = df.stack()

    # Step 2: Transpose the MultiIndex Series to a DataFrame
    new_df = stacked.to_frame().T

    # Step 3: Reset the index and use original index values as column names
    new_df.columns = [f'{col[0]}_{col[1]}' for col in new_df.columns]

    # The resulting DataFrame 'new_df' has the desired shape (1, 12*3) and index as column names
    return new_df

def results_combine_random_human(random_res, human_res, label_col = 'rl'):
    df_24_44, df_44_64, df_64_84, df_84_104 = results_labeled(random_res)
    human_24_44, human_44_64, human_64_84, human_84_104 = results_labeled(human_res)
    
    sub1 = pd.concat([df_24_44,human_24_44])
    sub2 = pd.concat([df_44_64,human_44_64])
    sub3 = pd.concat([df_64_84,human_64_84])
    sub4 = pd.concat([df_84_104,human_84_104])
    
    metrics_df_24_44 = performances(df_24_44.loc[:, label_col], df_24_44.loc[:, 'y_pred'])
    metrics_df_44_64 = performances(df_44_64.loc[:, label_col], df_44_64.loc[:, 'y_pred'])
    metrics_df_64_84 = performances(df_64_84.loc[:, label_col], df_64_84.loc[:, 'y_pred'])
    metrics_df_84_104 = performances(df_84_104.loc[:, label_col], df_84_104.loc[:, 'y_pred'])
    metrics_human_24_44 = performances(human_24_44.loc[:, label_col], human_24_44.loc[:, 'y_pred'])
    metrics_human_44_64 = performances(human_44_64.loc[:, label_col], human_44_64.loc[:, 'y_pred'])
    metrics_human_64_84 = performances(human_64_84.loc[:, label_col], human_64_84.loc[:, 'y_pred'])
    metrics_human_84_104 = performances(human_84_104.loc[:, label_col], human_84_104.loc[:, 'y_pred'])
    
    metrics_random = performances(random_res.loc[:, label_col], random_res.loc[:, 'y_pred'])
    metrics_human = performances(human_res.loc[:, label_col], human_res.loc[:, 'y_pred'])
    metrics_df = pd.DataFrame([metrics_random, metrics_df_24_44, metrics_df_44_64, metrics_df_64_84, metrics_df_84_104, 
                  metrics_human, metrics_human_24_44, metrics_human_44_64, metrics_human_64_84, metrics_human_84_104],
                 columns = ['R2', 'Pearson R', 'Spearman R'],
                 index = ['random', 'random_24_44', 'random_44_64', 'random_64_84', 'random_84_104', 
                          'human', 'human_24_44', 'human_44_64', 'human_64_84', 'human_84_104'])
    return metrics_df

# Kipoi
http://localhost:8890/notebooks/5UTR/Modelling/Model_Training-Random-10foldCV.ipynb
http://localhost:8890/notebooks/5UTR/Modelling/Model_Training-Human-10foldCV.ipynb

In [78]:
kipoi_random = pd.read_csv('/home/ubuntu/5UTR/Modelling/random_10foldcv_results.csv', index_col = 0)
kipoi_human = pd.read_csv('/home/ubuntu/5UTR/Modelling/human_10foldcv_results.csv', index_col = 0)
kipoi_metrics = results_combine_random_human(kipoi_random, kipoi_human)
kipoi_metrics.loc['random'] = pd.read_csv('/home/ubuntu/5UTR/Modelling/random_10foldcv_metrics.csv', index_col = 0).loc['mean', :]
kipoi_metrics.loc['human'] = pd.read_csv('/home/ubuntu/5UTR/Modelling/human_10foldcv_metrics.csv', index_col = 0).loc['mean', :]
flatten_df(kipoi_metrics)

r-squared = 0.8033 | pearson r = 0.8963 | spearman R = 0.8251
r-squared = 0.7721 | pearson r = 0.8787 | spearman R = 0.8403
r-squared = 0.7203 | pearson r = 0.8487 | spearman R = 0.8356
r-squared = 0.6375 | pearson r = 0.7985 | spearman R = 0.7981
r-squared = 0.4637 | pearson r = 0.6809 | spearman R = 0.6575
r-squared = 0.3566 | pearson r = 0.5972 | spearman R = 0.6139
r-squared = 0.2055 | pearson r = 0.4533 | spearman R = 0.4549
r-squared = 0.1704 | pearson r = 0.4128 | spearman R = 0.4020
r-squared = 0.7229 | pearson r = 0.8502 | spearman R = 0.8319
r-squared = 0.2481 | pearson r = 0.4981 | spearman R = 0.5162


Unnamed: 0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
0,0.75202,0.86578,0.83694,0.803294,0.896267,0.825137,0.772131,0.87871,0.840275,0.720307,...,0.657506,0.356613,0.597171,0.613899,0.205478,0.453296,0.454905,0.170372,0.412761,0.402048


In [79]:
kipoi_metrics

Unnamed: 0,R2,Pearson R,Spearman R
random,0.75202,0.86578,0.83694
random_24_44,0.803294,0.896267,0.825137
random_44_64,0.772131,0.87871,0.840275
random_64_84,0.720307,0.848709,0.835616
random_84_104,0.637534,0.798457,0.798077
human,0.33485,0.54025,0.56049
human_24_44,0.463651,0.680919,0.657506
human_44_64,0.356613,0.597171,0.613899
human_64_84,0.205478,0.453296,0.454905
human_84_104,0.170372,0.412761,0.402048


# Optimus
http://localhost:8890/notebooks/CNN/benchmark%20keras%20model-Independent-Unmod.ipynb

In [87]:
optimus_random = pd.read_csv('/home/ubuntu/CNN/Sample_Keras_RandomAll_10foldcv.csv')
optimus_human = pd.read_csv('/home/ubuntu/CNN/Sample_Keras_HumanAll_10foldcv.csv')
optimus_metrics = results_combine_random_human(optimus_random, optimus_human)

optimus_metrics.loc['random'] = pd.read_csv('/home/ubuntu/CNN/Sample_Keras_RandomAll_10foldcv_metrics.csv', index_col = 0).loc['mean', :]
optimus_metrics.loc['human'] = pd.read_csv('/home/ubuntu/CNN/Sample_Keras_HumanAll_10foldcv_metrics.csv', index_col = 0).loc['mean', :]

flatten_df(optimus_metrics)

r-squared = 0.8461 | pearson r = 0.9198 | spearman R = 0.8635
r-squared = 0.8109 | pearson r = 0.9005 | spearman R = 0.8739
r-squared = 0.7326 | pearson r = 0.8559 | spearman R = 0.8463
r-squared = 0.6424 | pearson r = 0.8015 | spearman R = 0.8046
r-squared = 0.6952 | pearson r = 0.8338 | spearman R = 0.7695
r-squared = 0.6393 | pearson r = 0.7995 | spearman R = 0.7622
r-squared = 0.6052 | pearson r = 0.7779 | spearman R = 0.7413
r-squared = 0.5905 | pearson r = 0.7684 | spearman R = 0.7370
r-squared = 0.7469 | pearson r = 0.8642 | spearman R = 0.8505
r-squared = 0.6182 | pearson r = 0.7862 | spearman R = 0.7553


Unnamed: 0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
0,0.782477,0.883108,0.862865,0.846098,0.919836,0.863525,0.81087,0.900483,0.873877,0.732567,...,0.769516,0.639255,0.799534,0.762224,0.605203,0.777948,0.741331,0.590469,0.76842,0.73701


# ESM_MLP

In [101]:
esm_mlp_random = pd.DataFrame()
esm_mlp_human = pd.DataFrame()
for i in range(10):
    esm_mlp_random = esm_mlp_random.append(pd.read_csv(f'/home/ubuntu/esm2/Sample/y_pred/ESM2SISS_FS4.1.VaryLenRandomAll50.ep300.1e-2.dr5_sequence_num83919_utr_50_10folds_rl_LabelScalerFalse_LabelLog2False_AvgEmbFalse_BosEmbTrue_CNNlayer0_epoch300_nodes40_dropout30.5_finetuneTrue_huberlossTrue_magicFalse_lr0.01_val_fold{i}.csv'))
    esm_mlp_human = esm_mlp_human.append(pd.read_csv(f'/home/ubuntu/esm2/Sample/y_pred/ESM2SISS_FS4.1.VaryLenHumanAll50.ep500.1e-2.dr5_sequence_num15555_utr_50_10folds_rl_LabelScalerFalse_LabelLog2False_AvgEmbFalse_BosEmbTrue_CNNlayer0_epoch500_nodes40_dropout30.5_finetuneTrue_huberlossTrue_magicFalse_lr0.01_val_fold{i}.csv'))
esm_mlp_metrics = results_combine_random_human(esm_mlp_random, esm_mlp_human)

esm_mlp_metrics.loc['random'] = pd.read_csv('/home/ubuntu/esm2/Sample/results/ESM2SISS_FS4.1.VaryLenRandomAll50.ep300.1e-2.dr5_sequence_num83919_utr_50_10folds_rl_LabelScalerFalse_LabelLog2False_AvgEmbFalse_BosEmbTrue_CNNlayer0_epoch300_nodes40_dropout30.5_finetuneTrue_huberlossTrue_magicFalse_lr0.01_metrics.csv', index_col = 0).loc['mean', :][['val_R2', 'val_PearsonR', 'val_SpearmanR']].values
esm_mlp_metrics.loc['human'] = pd.read_csv('/home/ubuntu/esm2/Sample/results/ESM2SISS_FS4.1.VaryLenHumanAll50.ep500.1e-2.dr5_sequence_num15555_utr_50_10folds_rl_LabelScalerFalse_LabelLog2False_AvgEmbFalse_BosEmbTrue_CNNlayer0_epoch500_nodes40_dropout30.5_finetuneTrue_huberlossTrue_magicFalse_lr0.01_metrics.csv', index_col = 0).loc['mean', :][['val_R2', 'val_PearsonR', 'val_SpearmanR']].values

flatten_df(esm_mlp_metrics)

r-squared = 0.8196 | pearson r = 0.9053 | spearman R = 0.8417
r-squared = 0.8028 | pearson r = 0.8960 | spearman R = 0.8649
r-squared = 0.7454 | pearson r = 0.8634 | spearman R = 0.8491
r-squared = 0.6701 | pearson r = 0.8186 | spearman R = 0.8148
r-squared = 0.5888 | pearson r = 0.7673 | spearman R = 0.7288
r-squared = 0.6012 | pearson r = 0.7754 | spearman R = 0.7504
r-squared = 0.5783 | pearson r = 0.7605 | spearman R = 0.7361
r-squared = 0.5325 | pearson r = 0.7297 | spearman R = 0.7042
r-squared = 0.7527 | pearson r = 0.8676 | spearman R = 0.8485
r-squared = 0.5729 | pearson r = 0.7569 | spearman R = 0.7335


Unnamed: 0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
0,0.757265,0.870007,0.852474,0.819585,0.90531,0.841668,0.802807,0.895995,0.864913,0.745448,...,0.728772,0.601203,0.775373,0.750402,0.578304,0.760463,0.736117,0.532496,0.729723,0.704153


# ESM_ResNet
http://localhost:8890/notebooks/RNA-FM/tutorials/utr-function-prediction/ESM-UTR-Function-Prediction-10foldcvBenchmarkSplit-nofrozon.ipynb#没有下面的代码

In [104]:
esm_resnet_random = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_results/ESM2_ResNet_Random_Len100_nofrozon_reloadCVmodel.csv')
esm_resnet_human = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_results/ESM2_ResNet_Human_Len100_nofrozon_reloadCVmodel.csv')
esm_resnet_metrics = results_combine_random_human(esm_resnet_random, esm_resnet_human, 'y_true')

esm_resnet_metrics.loc['random'] = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_metrics/ESM2_ResNet_Random_Len100_nofrozon_reloadCVmodel.csv', index_col = 0).loc['mean', :]
esm_resnet_metrics.loc['human'] = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_metrics/ESM2_ResNet_Human_Len100_nofrozon_reloadCVmodel.csv', index_col = 0).loc['mean', :]

flatten_df(esm_resnet_metrics)

r-squared = 0.8625 | pearson r = 0.9287 | spearman R = 0.8608
r-squared = 0.8429 | pearson r = 0.9181 | spearman R = 0.8735
r-squared = 0.7817 | pearson r = 0.8841 | spearman R = 0.8666
r-squared = 0.6812 | pearson r = 0.8253 | spearman R = 0.8246
r-squared = 0.6431 | pearson r = 0.8019 | spearman R = 0.7434
r-squared = 0.6321 | pearson r = 0.7950 | spearman R = 0.7648
r-squared = 0.6199 | pearson r = 0.7873 | spearman R = 0.7579
r-squared = 0.5949 | pearson r = 0.7713 | spearman R = 0.7392
r-squared = 0.7834 | pearson r = 0.8851 | spearman R = 0.8634
r-squared = 0.6188 | pearson r = 0.7866 | spearman R = 0.7559


Unnamed: 0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
0,0.81016,0.898676,0.869466,0.862537,0.928729,0.860809,0.842931,0.918113,0.873509,0.781709,...,0.743435,0.632053,0.795017,0.764786,0.619885,0.787328,0.757854,0.594873,0.77128,0.739226


# RNAFM_MLP
http://localhost:8890/notebooks/RNA-FM/tutorials/utr-function-prediction/UTR-MLP-Function-Prediction-BenchmarkSplit-10foldcv.ipynb

In [106]:
rnafm_mlp_random = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_results/RNAFM_MLP_Random_Len100_reloadCVmodel.csv')
rnafm_mlp_human = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_results/RNAFM_MLP_human_reloadCVmodel.csv')
rnafm_mlp_metrics = results_combine_random_human(rnafm_mlp_random, rnafm_mlp_human, 'y_true')

rnafm_mlp_metrics.loc['random'] = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_metrics/RNAFM_MLP_Random_Len100_reloadCVmodel.csv', index_col = 0).loc['mean', :]
rnafm_mlp_metrics.loc['human'] = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_metrics/RNAFM_MLP_human_reloadCVmodel.csv', index_col = 0).loc['mean', :]

flatten_df(rnafm_mlp_metrics)

r-squared = 0.1907 | pearson r = 0.4367 | spearman R = 0.4261
r-squared = 0.1864 | pearson r = 0.4318 | spearman R = 0.4232
r-squared = 0.1658 | pearson r = 0.4071 | spearman R = 0.4087
r-squared = 0.1360 | pearson r = 0.3687 | spearman R = 0.3745
r-squared = 0.1962 | pearson r = 0.4430 | spearman R = 0.4858
r-squared = 0.1811 | pearson r = 0.4256 | spearman R = 0.4836
r-squared = 0.1534 | pearson r = 0.3916 | spearman R = 0.4178
r-squared = 0.1131 | pearson r = 0.3364 | spearman R = 0.3314
r-squared = 0.1728 | pearson r = 0.4157 | spearman R = 0.4091
r-squared = 0.1544 | pearson r = 0.3929 | spearman R = 0.4275


Unnamed: 0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
0,0.174394,0.416147,0.414232,0.190718,0.436713,0.426053,0.186429,0.431774,0.42323,0.165764,...,0.485789,0.181142,0.425608,0.483644,0.153382,0.39164,0.417752,0.113143,0.336368,0.331376


# RNAFM_ResNet
http://localhost:8890/notebooks/RNA-FM/tutorials/utr-function-prediction/UTR-Function-Prediction-HumanIndependent-reloadCVmodel.ipynb
http://localhost:8890/notebooks/RNA-FM/tutorials/utr-function-prediction/UTR-Function-Prediction-RandomIndependent-reloadCVmodel.ipynb

In [108]:
rnafm_resnet_random = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_results/RNAFM_ResNet_Random_Len100_reloadCVmodel.csv')
rnafm_resnet_human = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_results/RNAFM_ResNet_Human_Len100_reloadCVmodel.csv')
rnafm_resnet_metrics = results_combine_random_human(rnafm_resnet_random, rnafm_resnet_human, 'y_true')

rnafm_resnet_metrics.loc['random'] = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_metrics/RNAFM_ResNet_Random_Len100_reloadCVmodel.csv', index_col = 0).loc['mean', :]
rnafm_resnet_metrics.loc['human'] = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_metrics/RNAFM_ResNet_Human_Len100_reloadCVmodel.csv', index_col = 0).loc['mean', :]

flatten_df(rnafm_resnet_metrics)

r-squared = 0.8497 | pearson r = 0.9218 | spearman R = 0.8546
r-squared = 0.8288 | pearson r = 0.9104 | spearman R = 0.8751
r-squared = 0.7749 | pearson r = 0.8803 | spearman R = 0.8630
r-squared = 0.6659 | pearson r = 0.8161 | spearman R = 0.8160
r-squared = 0.6187 | pearson r = 0.7866 | spearman R = 0.7247
r-squared = 0.5751 | pearson r = 0.7583 | spearman R = 0.7157
r-squared = 0.5694 | pearson r = 0.7546 | spearman R = 0.7158
r-squared = 0.5425 | pearson r = 0.7366 | spearman R = 0.6928
r-squared = 0.7718 | pearson r = 0.8785 | spearman R = 0.8589
r-squared = 0.5691 | pearson r = 0.7544 | spearman R = 0.7138


Unnamed: 0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
0,0.800273,0.893296,0.864236,0.849723,0.921804,0.854617,0.828792,0.91038,0.875059,0.774893,...,0.724697,0.575056,0.758325,0.715716,0.569414,0.754595,0.715762,0.542535,0.73657,0.692817


# 整合结果

In [110]:
metrics_df = pd.concat([flatten_df(esm_resnet_metrics),
                        flatten_df(esm_mlp_metrics),
                        flatten_df(optimus_metrics),
                        flatten_df(rnafm_resnet_metrics),
                        flatten_df(kipoi_metrics),
                        flatten_df(rnafm_mlp_metrics)], axis = 0)
metrics_df['methods'] = ['ESM_ResNet', 'ESM_MLP', 'Optimus', 'RNAFM_ResNet', 'Kipoi', 'RNAFM_MLP']
metrics_df.set_index('methods', inplace = True)
metrics_df

Unnamed: 0_level_0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ESM_ResNet,0.81016,0.898676,0.869466,0.862537,0.928729,0.860809,0.842931,0.918113,0.873509,0.781709,...,0.743435,0.632053,0.795017,0.764786,0.619885,0.787328,0.757854,0.594873,0.77128,0.739226
ESM_MLP,0.757265,0.870007,0.852474,0.819585,0.90531,0.841668,0.802807,0.895995,0.864913,0.745448,...,0.728772,0.601203,0.775373,0.750402,0.578304,0.760463,0.736117,0.532496,0.729723,0.704153
Optimus,0.782477,0.883108,0.862865,0.846098,0.919836,0.863525,0.81087,0.900483,0.873877,0.732567,...,0.769516,0.639255,0.799534,0.762224,0.605203,0.777948,0.741331,0.590469,0.76842,0.73701
RNAFM_ResNet,0.800273,0.893296,0.864236,0.849723,0.921804,0.854617,0.828792,0.91038,0.875059,0.774893,...,0.724697,0.575056,0.758325,0.715716,0.569414,0.754595,0.715762,0.542535,0.73657,0.692817
Kipoi,0.75202,0.86578,0.83694,0.803294,0.896267,0.825137,0.772131,0.87871,0.840275,0.720307,...,0.657506,0.356613,0.597171,0.613899,0.205478,0.453296,0.454905,0.170372,0.412761,0.402048
RNAFM_MLP,0.174394,0.416147,0.414232,0.190718,0.436713,0.426053,0.186429,0.431774,0.42323,0.165764,...,0.485789,0.181142,0.425608,0.483644,0.153382,0.39164,0.417752,0.113143,0.336368,0.331376


In [111]:
metrics_df[[c for c in metrics_df.columns if 'Spearman' in c and 'random' in c]]

Unnamed: 0_level_0,random_Spearman R,random_24_44_Spearman R,random_44_64_Spearman R,random_64_84_Spearman R,random_84_104_Spearman R
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ESM_ResNet,0.869466,0.860809,0.873509,0.866612,0.824605
ESM_MLP,0.852474,0.841668,0.864913,0.849126,0.814752
Optimus,0.862865,0.863525,0.873877,0.846275,0.804603
RNAFM_ResNet,0.864236,0.854617,0.875059,0.863038,0.815993
Kipoi,0.83694,0.825137,0.840275,0.835616,0.798077
RNAFM_MLP,0.414232,0.426053,0.42323,0.408661,0.374491


In [112]:
metrics_df[[c for c in metrics_df.columns if 'Spearman' in c and 'human' in c]]

Unnamed: 0_level_0,human_Spearman R,human_24_44_Spearman R,human_44_64_Spearman R,human_64_84_Spearman R,human_84_104_Spearman R
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ESM_ResNet,0.773886,0.743435,0.764786,0.757854,0.739226
ESM_MLP,0.740041,0.728772,0.750402,0.736117,0.704153
Optimus,0.785551,0.769516,0.762224,0.741331,0.73701
RNAFM_ResNet,0.733541,0.724697,0.715716,0.715762,0.692817
Kipoi,0.56049,0.657506,0.613899,0.454905,0.402048
RNAFM_MLP,0.436578,0.485789,0.483644,0.417752,0.331376
