In [1]:
import sys
import os
import pandas as pd
from sklearn import preprocessing
from tqdm import tqdm
import numpy as np
import random
import warnings
seed = 1337 # 2021
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
from copy import deepcopy
import re

In [2]:
import scipy.stats as stats
from scipy.stats import spearmanr, pearsonr

def r2(x,y):
    slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
    return r_value**2

def performances(label, pred, print_ = True):
    
    r = r2(label, pred)
    try:
        pearson_r = pearsonr(label, pred)[0]
    except:
        pearson_r = 1e-9
    try:
        sp_cor = spearmanr(label, pred)[0]
    except:
        sp_cor = 1e-9
    
    if print_: print(f'r-squared = {r:.4f} | pearson r = {pearson_r:.4f} | spearman R = {sp_cor:.4f}')
        
    return [r, pearson_r, sp_cor]



In [15]:
metrics_optimus = pd.read_csv('/home/ubuntu/CNN/Sample_Keras_24Data_metrics.csv', index_col = 0)
metrics_optimus = metrics_optimus.add_suffix('_optimus')
metrics_optimus['data'] = [re.search(r'\d+\.\d+', s).group() for s in metrics_optimus.index]
metrics_optimus

Unnamed: 0,R2_optimus,PearsonR_optimus,SpearmanR_optimus,data
4.1_test_data_GSM3130435_egfp_unmod_1.csv,0.93385,0.966359,0.950687,4.1
4.2_test_data_GSM3130435_egfp_unmod_1.csv,0.902936,0.95023,0.92048,4.2
4.3_test_data_GSM3130435_egfp_unmod_1.csv,0.749034,0.865467,0.873845,4.3
4.4_test_data_GSM3130436_egfp_unmod_2.csv,0.866009,0.930596,0.89976,4.4
4.5_test_data_GSM3130436_egfp_unmod_2.csv,0.799938,0.894393,0.860536,4.5
4.6_test_data_GSM3130436_egfp_unmod_2.csv,0.376796,0.613837,0.706209,4.6
4.7_test_data_GSM3130437_egfp_pseudo_1.csv,0.716971,0.846742,0.843646,4.7
4.8_test_data_GSM3130437_egfp_pseudo_1.csv,0.716971,0.846742,0.843646,4.8
4.9_test_data_GSM3130437_egfp_pseudo_1.csv,0.716971,0.846742,0.843646,4.9
4.10_test_data_GSM3130438_egfp_pseudo_2.csv,0.741303,0.86099,0.816649,4.1


In [16]:
metrics_kipoi = pd.read_csv('/home/ubuntu/5UTR/Modelling/metrics_sample.csv', index_col = 0)
metrics_kipoi = metrics_kipoi.add_suffix('_kipoi')
metrics_kipoi['data'] = [re.search(r'\d+\.\d+', s).group() for s in metrics_kipoi.index]
metrics_kipoi

Unnamed: 0,R2_kipoi,PearsonR_kipoi,SpearmanR_kipoi,data
4.1_train_data_GSM3130435_egfp_unmod_1.csv,0.918511,0.95839,0.937896,4.1
4.2_train_data_GSM3130435_egfp_unmod_1.csv,0.866882,0.931065,0.895938,4.2
4.3_train_data_GSM3130435_egfp_unmod_1.csv,0.710499,0.842911,0.845669,4.3
4.4_train_data_GSM3130436_egfp_unmod_2.csv,0.870844,0.93319,0.897843,4.4
4.5_train_data_GSM3130436_egfp_unmod_2.csv,0.787631,0.887486,0.852143,4.5
4.6_train_data_GSM3130436_egfp_unmod_2.csv,0.359374,0.599478,0.685695,4.6
4.7_train_data_GSM3130437_egfp_pseudo_1.csv,0.751696,0.867004,0.878584,4.7
4.8_train_data_GSM3130437_egfp_pseudo_1.csv,0.755878,0.869413,0.87874,4.8
4.9_train_data_GSM3130437_egfp_pseudo_1.csv,0.736781,0.858359,0.864501,4.9
4.10_train_data_GSM3130438_egfp_pseudo_2.csv,0.806224,0.8979,0.878938,4.1


In [17]:
metrics_RNAFM_MLP = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_results/RNAFM_MLP_Sample_24Data_metrics.csv', index_col = 0)
metrics_RNAFM_MLP = metrics_RNAFM_MLP.add_suffix('_RNAFM_MLP')
metrics_RNAFM_MLP['data'] = [re.search(r'\d+\.\d+', s).group() for s in metrics_RNAFM_MLP.index]
metrics_RNAFM_MLP

Unnamed: 0_level_0,R2_RNAFM_MLP,Pearson R_RNAFM_MLP,Spearman R_RNAFM_MLP,data
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RNAFM_MLP_4.1_unmod_1_metrics.csv,0.274104,0.523549,0.519321,4.1
RNAFM_MLP_4.2_unmod_1_metrics.csv,0.235198,0.484973,0.488048,4.2
RNAFM_MLP_4.3_unmod_1_metrics.csv,0.176507,0.420128,0.450537,4.3
RNAFM_MLP_4.4_unmod_2_metrics.csv,0.24389,0.493852,0.502937,4.4
RNAFM_MLP_4.5_unmod_2_metrics.csv,0.201965,0.449405,0.455252,4.5
RNAFM_MLP_4.6_unmod_2_metrics.csv,0.095116,0.308408,0.377834,4.6
RNAFM_MLP_4.7_pseudo_1_metrics.csv,0.218942,0.467912,0.471177,4.7
RNAFM_MLP_4.8_pseudo_1_metrics.csv,0.22658,0.476004,0.480908,4.8
RNAFM_MLP_4.9_pseudo_1_metrics.csv,0.211675,0.460082,0.464544,4.9
RNAFM_MLP_4.10_pseudo_2_metrics.csv,0.246071,0.496056,0.507519,4.1


In [18]:
metrics_RNAFM_ResNet = pd.read_csv('/home/ubuntu/RNA-FM/tutorials/utr_results/RNAFM_ResNet_Sample_24Data_metrics.csv', index_col = 0)
metrics_RNAFM_ResNet = metrics_RNAFM_ResNet.add_suffix('_RNAFM_ResNet')
metrics_RNAFM_ResNet['data'] = [re.search(r'\d+\.\d+', s).group() for s in metrics_RNAFM_ResNet.index]
metrics_RNAFM_ResNet 

Unnamed: 0_level_0,R2_RNAFM_ResNet,Pearson R_RNAFM_ResNet,Spearman R_RNAFM_ResNet,data
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RNAFM_ResNet_4.1_unmod_1_metrics.csv,0.942682,0.970918,0.95622,4.1
RNAFM_ResNet_4.2_unmod_1_metrics.csv,0.903896,0.950735,0.921557,4.2
RNAFM_ResNet_4.3_unmod_1_metrics.csv,0.735725,0.857744,0.860356,4.3
RNAFM_ResNet_4.4_unmod_2_metrics.csv,0.896136,0.946644,0.916279,4.4
RNAFM_ResNet_4.5_unmod_2_metrics.csv,0.829579,0.910812,0.875214,4.5
RNAFM_ResNet_4.6_unmod_2_metrics.csv,0.368252,0.606838,0.691298,4.6
RNAFM_ResNet_4.7_pseudo_1_metrics.csv,0.796785,0.892628,0.895446,4.7
RNAFM_ResNet_4.8_pseudo_1_metrics.csv,0.796928,0.892708,0.897497,4.8
RNAFM_ResNet_4.9_pseudo_1_metrics.csv,0.747835,0.864774,0.869626,4.9
RNAFM_ResNet_4.10_pseudo_2_metrics.csv,0.84524,0.919369,0.900052,4.1


In [20]:
metrics_df = pd.merge(metrics_optimus, metrics_kipoi, on=['data'])
metrics_df = pd.merge(metrics_df, metrics_RNAFM_MLP, on=['data'])
metrics_df = pd.merge(metrics_df, metrics_RNAFM_ResNet, on=['data'])
metrics_df

Unnamed: 0,R2_optimus,PearsonR_optimus,SpearmanR_optimus,data,R2_kipoi,PearsonR_kipoi,SpearmanR_kipoi,R2_RNAFM_MLP,Pearson R_RNAFM_MLP,Spearman R_RNAFM_MLP,R2_RNAFM_ResNet,Pearson R_RNAFM_ResNet,Spearman R_RNAFM_ResNet
0,0.93385,0.966359,0.950687,4.1,0.918511,0.95839,0.937896,0.274104,0.523549,0.519321,0.942682,0.970918,0.95622
1,0.902936,0.95023,0.92048,4.2,0.866882,0.931065,0.895938,0.235198,0.484973,0.488048,0.903896,0.950735,0.921557
2,0.749034,0.865467,0.873845,4.3,0.710499,0.842911,0.845669,0.176507,0.420128,0.450537,0.735725,0.857744,0.860356
3,0.866009,0.930596,0.89976,4.4,0.870844,0.93319,0.897843,0.24389,0.493852,0.502937,0.896136,0.946644,0.916279
4,0.799938,0.894393,0.860536,4.5,0.787631,0.887486,0.852143,0.201965,0.449405,0.455252,0.829579,0.910812,0.875214
5,0.376796,0.613837,0.706209,4.6,0.359374,0.599478,0.685695,0.095116,0.308408,0.377834,0.368252,0.606838,0.691298
6,0.716971,0.846742,0.843646,4.7,0.751696,0.867004,0.878584,0.218942,0.467912,0.471177,0.796785,0.892628,0.895446
7,0.716971,0.846742,0.843646,4.8,0.755878,0.869413,0.87874,0.22658,0.476004,0.480908,0.796928,0.892708,0.897497
8,0.716971,0.846742,0.843646,4.9,0.736781,0.858359,0.864501,0.211675,0.460082,0.464544,0.747835,0.864774,0.869626
9,0.741303,0.86099,0.816649,4.1,0.806224,0.8979,0.878938,0.246071,0.496056,0.507519,0.84524,0.919369,0.900052
