In [1]:
import os
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from torch.utils.data import DataLoader, Dataset

import math
import random
import pandas as pd
import numpy as np
import warnings
from tqdm import tqdm, trange
from copy import deepcopy
import scipy.stats as stats
from scipy.stats import spearmanr, pearsonr
from sklearn import preprocessing
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import KFold
seed = 1337
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
warnings.filterwarnings("ignore")

def r2(x,y):
    slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
    return r_value**2

def performances(label, pred):
    
    r = r2(label, pred)
    try:
        pearson_r = pearsonr(label, pred)[0]
    except:
        pearson_r = 1e-9
    try:
        sp_cor = spearmanr(label, pred)[0]
    except:
        sp_cor = 1e-9
    
    print(f'r-squared = {r:.4f} | pearson r = {pearson_r:.4f} | spearman R = {sp_cor:.4f}')
        
    return [r, pearson_r, sp_cor]


In [13]:
global bins, labels
bins = np.arange(24, 105, 20)
labels = [ '25-44' , '45-64', '65-84', '85-100']

def results_labeled(e_test):
    if 'len' not in e_test.columns: e_test['len'] = [len(s) for s in e_test.utr]
    e_test['rng'] = pd.cut(e_test['len'], bins=bins)
    e_test['rng'] = e_test['rng'].astype('str')

    df_24_44=e_test[e_test['rng']=='(24, 44]']
    df_44_64=e_test[e_test['rng']=='(44, 64]']
    df_64_84=e_test[e_test['rng']=='(64, 84]']
    df_84_104=e_test[e_test['rng']=='(84, 104]']
    return df_24_44, df_44_64, df_64_84, df_84_104

def flatten_df(df):
    stacked = df.stack()

    # Step 2: Transpose the MultiIndex Series to a DataFrame
    new_df = stacked.to_frame().T

    # Step 3: Reset the index and use original index values as column names
    new_df.columns = [f'{col[0]}_{col[1]}' for col in new_df.columns]

    # The resulting DataFrame 'new_df' has the desired shape (1, 12*3) and index as column names
    return new_df

def results_combine_random_human(random_res, human_res, label_col = 'rl'):
    df_24_44, df_44_64, df_64_84, df_84_104 = results_labeled(random_res)
    human_24_44, human_44_64, human_64_84, human_84_104 = results_labeled(human_res)
    print(len(df_24_44))
    print(len(df_44_64))
    print(len(df_64_84))
    print(len(df_84_104))
    print(len(human_24_44))
    print(len(human_44_64))
    print(len(human_64_84))
    print(len(human_84_104))
    sub1 = pd.concat([df_24_44,human_24_44])
    sub2 = pd.concat([df_44_64,human_44_64])
    sub3 = pd.concat([df_64_84,human_64_84])
    sub4 = pd.concat([df_84_104,human_84_104])
    
    metrics_df_24_44 = performances(df_24_44.loc[:, label_col], df_24_44.loc[:, 'y_pred'])
    metrics_df_44_64 = performances(df_44_64.loc[:, label_col], df_44_64.loc[:, 'y_pred'])
    metrics_df_64_84 = performances(df_64_84.loc[:, label_col], df_64_84.loc[:, 'y_pred'])
    metrics_df_84_104 = performances(df_84_104.loc[:, label_col], df_84_104.loc[:, 'y_pred'])
    metrics_human_24_44 = performances(human_24_44.loc[:, label_col], human_24_44.loc[:, 'y_pred'])
    metrics_human_44_64 = performances(human_44_64.loc[:, label_col], human_44_64.loc[:, 'y_pred'])
    metrics_human_64_84 = performances(human_64_84.loc[:, label_col], human_64_84.loc[:, 'y_pred'])
    metrics_human_84_104 = performances(human_84_104.loc[:, label_col], human_84_104.loc[:, 'y_pred'])
    
    metrics_random = performances(random_res.loc[:, label_col], random_res.loc[:, 'y_pred'])
    metrics_human = performances(human_res.loc[:, label_col], human_res.loc[:, 'y_pred'])
    metrics_df = pd.DataFrame([metrics_random, metrics_df_24_44, metrics_df_44_64, metrics_df_64_84, metrics_df_84_104, 
                  metrics_human, metrics_human_24_44, metrics_human_44_64, metrics_human_64_84, metrics_human_84_104],
                 columns = ['R2', 'Pearson R', 'Spearman R'],
                 index = ['random', 'random_24_44', 'random_44_64', 'random_64_84', 'random_84_104', 
                          'human', 'human_24_44', 'human_44_64', 'human_64_84', 'human_84_104'])
    return metrics_df

# Kipoi
http://localhost:8890/notebooks/5UTR/Modelling/Model_Training-Independent-BenchmarkSplit-no10fold.ipynb

In [14]:
kipoi_random = pd.read_csv('/home/ubuntu/5UTR/Modelling/random_results_benchmarksplit.csv', index_col = 0)
kipoi_human = pd.read_csv('/home/ubuntu/5UTR/Modelling/human_results_benchmarksplit.csv', index_col = 0)
kipoi_metrics = results_combine_random_human(kipoi_random, kipoi_human)
flatten_df(kipoi_metrics)

2000
2000
2000
1600
2000
2000
2000
1600
r-squared = 0.8432 | pearson r = 0.9183 | spearman R = 0.8568
r-squared = 0.8533 | pearson r = 0.9238 | spearman R = 0.8846
r-squared = 0.7898 | pearson r = 0.8887 | spearman R = 0.8847
r-squared = 0.7392 | pearson r = 0.8598 | spearman R = 0.8581
r-squared = 0.7898 | pearson r = 0.8887 | spearman R = 0.7881
r-squared = 0.7996 | pearson r = 0.8942 | spearman R = 0.8283
r-squared = 0.7610 | pearson r = 0.8723 | spearman R = 0.8222
r-squared = 0.7130 | pearson r = 0.8444 | spearman R = 0.8100
r-squared = 0.8002 | pearson r = 0.8945 | spearman R = 0.8830
r-squared = 0.7561 | pearson r = 0.8695 | spearman R = 0.8211


Unnamed: 0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
0,0.80016,0.894517,0.882962,0.843216,0.918268,0.856819,0.853315,0.92375,0.884578,0.789794,...,0.788118,0.799625,0.894217,0.828308,0.76098,0.872342,0.822238,0.713043,0.844419,0.810013


# Optimus

In [4]:
optimus_random = pd.read_csv('/home/ubuntu/CNN/Sample_Keras_Random7600_BenchmarkSplit.csv')
optimus_human = pd.read_csv('/home/ubuntu/CNN/Sample_Keras_Human7600_BenchmarkSplit.csv')
optimus_metrics = results_combine_random_human(optimus_random, optimus_human)
flatten_df(optimus_metrics)

r-squared = 0.8854 | pearson r = 0.9410 | spearman R = 0.8893
r-squared = 0.8769 | pearson r = 0.9364 | spearman R = 0.9017
r-squared = 0.8308 | pearson r = 0.9115 | spearman R = 0.9060
r-squared = 0.7771 | pearson r = 0.8815 | spearman R = 0.8847
r-squared = 0.8430 | pearson r = 0.9181 | spearman R = 0.8239
r-squared = 0.8086 | pearson r = 0.8992 | spearman R = 0.8445
r-squared = 0.7809 | pearson r = 0.8837 | spearman R = 0.8355
r-squared = 0.7290 | pearson r = 0.8538 | spearman R = 0.8207
r-squared = 0.8377 | pearson r = 0.9152 | spearman R = 0.9092
r-squared = 0.7778 | pearson r = 0.8819 | spearman R = 0.8394


Unnamed: 0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
0,0.837656,0.915236,0.9092,0.885389,0.940951,0.889312,0.876931,0.936446,0.90166,0.830771,...,0.823899,0.8086,0.899222,0.844478,0.780942,0.883709,0.835465,0.728957,0.85379,0.820719


In [5]:
optimus_random = pd.read_csv('/home/ubuntu/CNN/Sample_Keras_Random7600_BenchmarkSplit_10foldcvTrain.csv')
optimus_human = pd.read_csv('/home/ubuntu/CNN/Sample_Keras_Human7600_BenchmarkSplit_10foldcvTrain.csv')
optimus_metrics = results_combine_random_human(optimus_random, optimus_human)
flatten_df(optimus_metrics)

r-squared = 0.8680 | pearson r = 0.9317 | spearman R = 0.8744
r-squared = 0.8673 | pearson r = 0.9313 | spearman R = 0.8955
r-squared = 0.8235 | pearson r = 0.9075 | spearman R = 0.9041
r-squared = 0.7753 | pearson r = 0.8805 | spearman R = 0.8851
r-squared = 0.8132 | pearson r = 0.9017 | spearman R = 0.7988
r-squared = 0.8018 | pearson r = 0.8954 | spearman R = 0.8344
r-squared = 0.7893 | pearson r = 0.8884 | spearman R = 0.8405
r-squared = 0.7335 | pearson r = 0.8564 | spearman R = 0.8207
r-squared = 0.8294 | pearson r = 0.9107 | spearman R = 0.9015
r-squared = 0.7743 | pearson r = 0.8799 | spearman R = 0.8312


Unnamed: 0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
0,0.829421,0.910725,0.901452,0.868011,0.931671,0.874385,0.867312,0.931296,0.895506,0.823475,...,0.798849,0.801793,0.895429,0.83439,0.789343,0.88845,0.840477,0.733478,0.856433,0.820659


# ESM_MLP

In [6]:
esm_mlp_random = pd.read_csv('/home/ubuntu/esm2/Sample/y_pred/ESM2SISS_FS4.1.VaryLen50.ep500.1e-2.dr5_VaryLength_utr_50_10folds_rl_LabelScalerFalse_LabelLog2False_AvgEmbFalse_BosEmbTrue_CNNlayer0_epoch500_nodes40_dropout30.5_finetuneTrue_huberlossTrue_magicFalse_lr0.01_testrandom_fold0.csv')
esm_mlp_human = pd.read_csv('/home/ubuntu/esm2/Sample/y_pred/ESM2SISS_FS4.1.VaryLen50.ep500.1e-2.dr5_VaryLength_utr_50_10folds_rl_LabelScalerFalse_LabelLog2False_AvgEmbFalse_BosEmbTrue_CNNlayer0_epoch500_nodes40_dropout30.5_finetuneTrue_huberlossTrue_magicFalse_lr0.01_testhuman_fold0.csv')
esm_mlp_metrics = results_combine_random_human(esm_mlp_random, esm_mlp_human)
flatten_df(esm_mlp_metrics)

r-squared = 0.8607 | pearson r = 0.9277 | spearman R = 0.8659
r-squared = 0.8736 | pearson r = 0.9347 | spearman R = 0.8889
r-squared = 0.7994 | pearson r = 0.8941 | spearman R = 0.8837
r-squared = 0.7057 | pearson r = 0.8400 | spearman R = 0.8390
r-squared = 0.8153 | pearson r = 0.9029 | spearman R = 0.7946
r-squared = 0.8006 | pearson r = 0.8947 | spearman R = 0.8247
r-squared = 0.7585 | pearson r = 0.8709 | spearman R = 0.8133
r-squared = 0.6876 | pearson r = 0.8292 | spearman R = 0.7860
r-squared = 0.8020 | pearson r = 0.8955 | spearman R = 0.8839
r-squared = 0.7515 | pearson r = 0.8669 | spearman R = 0.8132


Unnamed: 0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
0,0.80195,0.895517,0.883911,0.860683,0.92773,0.865874,0.873578,0.934654,0.888918,0.799383,...,0.794593,0.80055,0.894735,0.824672,0.758453,0.870892,0.81334,0.687624,0.829231,0.786027


# ESM_ResNet
http://localhost:8890/notebooks/RNA-FM/tutorials/utr-function-prediction/ESM-UTR-Function-Prediction-10foldcvBenchmarkSplit-nofrozon.ipynb#没有下面的代码

In [7]:
esm_resnet_random = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_results/ESM2_ResNet_random_10foldcvBenchmarkSplit_nofrozon.csv')
esm_resnet_human = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_results/ESM2_ResNet_human_10foldcvBenchmarkSplit_nofrozon.csv')
esm_resnet_metrics = results_combine_random_human(esm_resnet_random, esm_resnet_human, 'y_true')
flatten_df(esm_resnet_metrics)

r-squared = 0.8871 | pearson r = 0.9418 | spearman R = 0.8824
r-squared = 0.8973 | pearson r = 0.9472 | spearman R = 0.9039
r-squared = 0.8692 | pearson r = 0.9323 | spearman R = 0.9164
r-squared = 0.7915 | pearson r = 0.8897 | spearman R = 0.8864
r-squared = 0.8331 | pearson r = 0.9127 | spearman R = 0.8229
r-squared = 0.8321 | pearson r = 0.9122 | spearman R = 0.8512
r-squared = 0.8232 | pearson r = 0.9073 | spearman R = 0.8476
r-squared = 0.7610 | pearson r = 0.8723 | spearman R = 0.8293
r-squared = 0.8590 | pearson r = 0.9268 | spearman R = 0.9114
r-squared = 0.8062 | pearson r = 0.8979 | spearman R = 0.8461


Unnamed: 0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
0,0.858952,0.926796,0.911434,0.887065,0.941841,0.882382,0.897276,0.947247,0.903918,0.869227,...,0.822932,0.83211,0.912201,0.851176,0.823205,0.907306,0.847583,0.760987,0.872346,0.829278


# RNAFM_MLP
http://localhost:8890/notebooks/RNA-FM/tutorials/utr-function-prediction/UTR-MLP-Function-Prediction-BenchmarkSplit-10foldcv.ipynb

In [8]:
rnafm_mlp_random = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_results/RNAFM_MLP_Random_Len100_BenchmarkSplit_10foldCVTrain.csv')
rnafm_mlp_human = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_results/RNAFM_MLP_Human_Len100_BenchmarkSplit_10foldCVTrain.csv')
rnafm_mlp_metrics = results_combine_random_human(rnafm_mlp_random, rnafm_mlp_human, 'y_true')
flatten_df(rnafm_mlp_metrics)

r-squared = 0.2139 | pearson r = 0.4625 | spearman R = 0.4737
r-squared = 0.2089 | pearson r = 0.4570 | spearman R = 0.4482
r-squared = 0.1594 | pearson r = 0.3992 | spearman R = 0.4030
r-squared = 0.1374 | pearson r = 0.3707 | spearman R = 0.3690
r-squared = 0.1749 | pearson r = 0.4182 | spearman R = 0.3543
r-squared = 0.1869 | pearson r = 0.4324 | spearman R = 0.3811
r-squared = 0.1980 | pearson r = 0.4449 | spearman R = 0.4029
r-squared = 0.1556 | pearson r = 0.3944 | spearman R = 0.3588
r-squared = 0.1885 | pearson r = 0.4341 | spearman R = 0.4299
r-squared = 0.1712 | pearson r = 0.4138 | spearman R = 0.3395


Unnamed: 0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
0,0.188466,0.434127,0.429931,0.213908,0.462501,0.47368,0.208855,0.457006,0.448223,0.159369,...,0.354283,0.186928,0.432352,0.38112,0.19798,0.444949,0.402933,0.155558,0.394408,0.358794


# RNAFM_ResNet
http://localhost:8890/notebooks/RNA-FM/tutorials/utr-function-prediction/UTR-Function-Prediction-BenchmarkSplit-10foldcv.ipynb

In [9]:
rnafm_resnet_random = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_results/RNAFM_ResNet_Random_Len100_BenchmarkSplit_10foldCVTrain.csv')
rnafm_resnet_human = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_metrics/RNAFM_ResNet_Human_Len100_BenchmarkSplit_10foldCVTrain.csv')
rnafm_resnet_metrics = results_combine_random_human(rnafm_resnet_random, rnafm_resnet_human, 'y_true')
flatten_df(rnafm_resnet_metrics)

r-squared = 0.8506 | pearson r = 0.9223 | spearman R = 0.8652
r-squared = 0.8583 | pearson r = 0.9264 | spearman R = 0.8840
r-squared = 0.8119 | pearson r = 0.9011 | spearman R = 0.8885
r-squared = 0.7544 | pearson r = 0.8686 | spearman R = 0.8677
r-squared = 0.8128 | pearson r = 0.9016 | spearman R = 0.7975
r-squared = 0.8205 | pearson r = 0.9058 | spearman R = 0.8322
r-squared = 0.8086 | pearson r = 0.8992 | spearman R = 0.8385
r-squared = 0.7324 | pearson r = 0.8558 | spearman R = 0.8125
r-squared = 0.8158 | pearson r = 0.9032 | spearman R = 0.8901
r-squared = 0.7866 | pearson r = 0.8869 | spearman R = 0.8303


Unnamed: 0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
0,0.815805,0.903219,0.890111,0.850609,0.922284,0.865186,0.858266,0.926426,0.884046,0.811928,...,0.79752,0.820486,0.905807,0.832168,0.808586,0.899214,0.83849,0.732446,0.855831,0.812547


# 整合结果

In [10]:
metrics_df = pd.concat([flatten_df(esm_resnet_metrics),
                        flatten_df(esm_mlp_metrics),
                        flatten_df(optimus_metrics),
                        flatten_df(rnafm_resnet_metrics),
                        flatten_df(kipoi_metrics),
                        flatten_df(rnafm_mlp_metrics)], axis = 0)
metrics_df['methods'] = ['ESM_ResNet', 'ESM_MLP', 'Optimus', 'RNAFM_ResNet', 'Kipoi', 'RNAFM_MLP']
metrics_df.set_index('methods', inplace = True)
metrics_df

Unnamed: 0_level_0,random_R2,random_Pearson R,random_Spearman R,random_24_44_R2,random_24_44_Pearson R,random_24_44_Spearman R,random_44_64_R2,random_44_64_Pearson R,random_44_64_Spearman R,random_64_84_R2,...,human_24_44_Spearman R,human_44_64_R2,human_44_64_Pearson R,human_44_64_Spearman R,human_64_84_R2,human_64_84_Pearson R,human_64_84_Spearman R,human_84_104_R2,human_84_104_Pearson R,human_84_104_Spearman R
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ESM_ResNet,0.858952,0.926796,0.911434,0.887065,0.941841,0.882382,0.897276,0.947247,0.903918,0.869227,...,0.822932,0.83211,0.912201,0.851176,0.823205,0.907306,0.847583,0.760987,0.872346,0.829278
ESM_MLP,0.80195,0.895517,0.883911,0.860683,0.92773,0.865874,0.873578,0.934654,0.888918,0.799383,...,0.794593,0.80055,0.894735,0.824672,0.758453,0.870892,0.81334,0.687624,0.829231,0.786027
Optimus,0.829421,0.910725,0.901452,0.868011,0.931671,0.874385,0.867312,0.931296,0.895506,0.823475,...,0.798849,0.801793,0.895429,0.83439,0.789343,0.88845,0.840477,0.733478,0.856433,0.820659
RNAFM_ResNet,0.815805,0.903219,0.890111,0.850609,0.922284,0.865186,0.858266,0.926426,0.884046,0.811928,...,0.79752,0.820486,0.905807,0.832168,0.808586,0.899214,0.83849,0.732446,0.855831,0.812547
Kipoi,0.80016,0.894517,0.882962,0.843216,0.918268,0.856819,0.853315,0.92375,0.884578,0.789794,...,0.788118,0.799625,0.894217,0.828308,0.76098,0.872342,0.822238,0.713043,0.844419,0.810013
RNAFM_MLP,0.188466,0.434127,0.429931,0.213908,0.462501,0.47368,0.208855,0.457006,0.448223,0.159369,...,0.354283,0.186928,0.432352,0.38112,0.19798,0.444949,0.402933,0.155558,0.394408,0.358794


In [11]:
metrics_df[[c for c in metrics_df.columns if 'Spearman' in c and 'random' in c]]

Unnamed: 0_level_0,random_Spearman R,random_24_44_Spearman R,random_44_64_Spearman R,random_64_84_Spearman R,random_84_104_Spearman R
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ESM_ResNet,0.911434,0.882382,0.903918,0.916392,0.886393
ESM_MLP,0.883911,0.865874,0.888918,0.88366,0.839016
Optimus,0.901452,0.874385,0.895506,0.904123,0.885118
RNAFM_ResNet,0.890111,0.865186,0.884046,0.888459,0.867702
Kipoi,0.882962,0.856819,0.884578,0.884723,0.858112
RNAFM_MLP,0.429931,0.47368,0.448223,0.402979,0.369013


In [12]:
metrics_df[[c for c in metrics_df.columns if 'Spearman' in c and 'human' in c]]

Unnamed: 0_level_0,human_Spearman R,human_24_44_Spearman R,human_44_64_Spearman R,human_64_84_Spearman R,human_84_104_Spearman R
methods,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ESM_ResNet,0.846065,0.822932,0.851176,0.847583,0.829278
ESM_MLP,0.813245,0.794593,0.824672,0.81334,0.786027
Optimus,0.831189,0.798849,0.83439,0.840477,0.820659
RNAFM_ResNet,0.830283,0.79752,0.832168,0.83849,0.812547
Kipoi,0.821094,0.788118,0.828308,0.822238,0.810013
RNAFM_MLP,0.339463,0.354283,0.38112,0.402933,0.358794
