In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import seaborn as sns
from sklearn import preprocessing
import random

import tensorflow as tf
import keras
np.random.seed(1337)

from keras.preprocessing import sequence
from tensorflow.keras.optimizers import Adam
from keras.models import Sequential
from keras.layers.core import Dense
from keras.layers.core import Dropout
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.convolutional import Conv1D

from scipy.stats import spearmanr, pearsonr
%matplotlib inline

### Parameters for plotting model results ###
pd.set_option("display.max_colwidth",100)
sns.set(style="ticks", color_codes=True)
plt.rcParams['font.weight'] = 'normal'
plt.rcParams['axes.labelweight'] = 'normal'
plt.rcParams['axes.labelpad'] = 5
plt.rcParams['axes.linewidth']= 2
plt.rcParams['xtick.labelsize']= 14
plt.rcParams['ytick.labelsize']= 14
plt.rcParams['xtick.major.size'] = 6
plt.rcParams['ytick.major.size'] = 6
plt.rcParams['xtick.minor.size'] = 3
plt.rcParams['ytick.minor.size'] = 3
plt.rcParams['xtick.minor.width'] = 1
plt.rcParams['ytick.minor.width'] = 1
plt.rcParams['xtick.major.width'] = 2
plt.rcParams['ytick.major.width'] = 2
plt.rcParams['xtick.color'] = 'black'
plt.rcParams['ytick.color'] = 'black'
plt.rcParams['axes.labelcolor'] = 'black'
plt.rcParams['axes.edgecolor'] = 'black'


def train_model(x, y, border_mode='same', inp_len=50, nodes=40, layers=3, filter_len=8, nbr_filters=120,
                dropout1=0, dropout2=0, dropout3=0, nb_epoch=3):
    ''' Build model archicture and fit.'''
    model = Sequential()
    if layers >= 1:
        model.add(Conv1D(activation="relu", input_shape=(inp_len, 4), padding=border_mode, filters=nbr_filters, kernel_size=filter_len))
    if layers >= 2:
        model.add(Conv1D(activation="relu", input_shape=(inp_len, 1), padding=border_mode, filters=nbr_filters, kernel_size=filter_len))
        model.add(Dropout(dropout1))
    if layers >= 3:
        model.add(Conv1D(activation="relu", input_shape=(inp_len, 1), padding=border_mode, filters=nbr_filters, kernel_size=filter_len))
        model.add(Dropout(dropout2))
    model.add(Flatten())

    model.add(Dense(nodes))
    model.add(Activation('relu'))
    model.add(Dropout(dropout3))
    
    model.add(Dense(1))
    model.add(Activation('linear'))

    #compile the model
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    model.compile(loss='mean_squared_error', optimizer=adam)

    model.fit(x, y, batch_size=128, epochs=nb_epoch, verbose=1)
    return model


def test_data(df, model, test_seq, obs_col, output_col='y_pred'):
    '''Predict mean ribosome load using model and test set UTRs'''
    
    # Scale the test set mean ribosome load
    scaler = preprocessing.StandardScaler()
    scaler.fit(np.array(df[obs_col]).reshape(-1,1))
    
    # Make predictions
    predictions = model.predict(test_seq).reshape(-1,1)
    
    # Inverse scaled predicted mean ribosome load and return in a column labeled 'pred'
    df.loc[:,output_col] = scaler.inverse_transform(predictions)
    return df


def one_hot_encode(df, col='utr', seq_len=50):
    # Dictionary returning one-hot encoding of nucleotides. 
    nuc_d = {'a':[1,0,0,0],'c':[0,1,0,0],'g':[0,0,1,0],'t':[0,0,0,1], 'n':[0,0,0,0]}
    
    # Creat empty matrix.
    vectors=np.empty([len(df),seq_len,4])
    
    # Iterate through UTRs and one-hot encode
    for i,seq in enumerate(df[col]): 
        seq = seq.replace('<pad>', 'n').lower()[:seq_len]
        if len(seq) != seq_len: print(seq)
        a = np.array([nuc_d[x] for x in seq])
        vectors[i] = a
    return vectors


def r2(x,y):
    slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
    return r_value**2

In [2]:
def performances(label, pred):
    
    r = r2(label, pred)
    pearson_r = pearsonr(label, pred)[0]
    sp_cor = spearmanr(label, pred)[0]
    
    print(f'r-squared = {r:.4f} | pearson r = {pearson_r:.4f} | spearman R = {sp_cor:.4f}')
        
    return [r, pearson_r, sp_cor]


In [3]:
def analysis_data(data):
#     data = data[['utr', 'utr_originial_varylength', 'label', 'y_pred', 'y_true']]
    data['y_true'] = data['label']
    data['y_pred'] = data['y_pred']
    data['diff'] = abs(data.y_true - data.y_pred)
    data.sort_values('diff', ascending = True, inplace = True)
    data['diff_rank'] = range(1, data.shape[0]+1)
    data.sort_values('y_pred', ascending = False, inplace = True)
    data['pred_rank'] = range(1, data.shape[0]+1)
    data.sort_values('y_true', ascending = False, inplace = True)
    data['true_rank'] = range(1, data.shape[0]+1)
    print(f'data.shape = {data.shape}')
    return data

def analysis_rank(data, cutoff = 10):   
    pred_n = sum(data.iloc[:cutoff].pred_rank <= cutoff)
    diff_n = sum(data.iloc[:cutoff].diff_rank <= cutoff)
    pred_r = pred_n/cutoff
    diff_r = diff_n/cutoff
#     print(f'From samples with Cut-Off={cutoff} Top Labels (y_true): \n(1) {pred_n} samples are in top {cutoff} Ranked y_pred \n(2) {diff_n} samples are in top {cutoff} Ranked diff=|y_pred-label|')
    print(f'From {len(data)} samples with CutOff={cutoff} Top Labels: The ratios (1) Diff_rank={diff_n}/{cutoff}={diff_n/cutoff:.2f} (2) Pred_rank={pred_n}/{cutoff}={pred_n/cutoff:.2f} ')
    return pred_n, diff_n, pred_r, diff_r

def multi_cutoffs_analysis(data, cutoffs):
    top_pred_n, top_diff_n, top_pred_r, top_diff_r = [], [], [], []
    for cutoff in cutoffs:
        np, nd, rp, rd = analysis_rank(data, cutoff)
        top_pred_n.append(np)
        top_diff_n.append(nd)
        top_pred_r.append(rp)
        top_diff_r.append(rd)
    data_analysis = pd.DataFrame([cutoffs, top_pred_n, top_pred_r, top_diff_n, top_diff_r], 
                    index = ['Cut-Off', '# Top Pred in Cut-Off', 'Ratio: Top Pred in Cut-Off', 
                             '# Top Diff in Cut-Off', 'Ratio: Top Diff in Cut-Off']).T
    data_analysis['# Samples'] = len(data)
    data_analysis[['# Samples', 'Cut-Off', '# Top Pred in Cut-Off', '# Top Diff in Cut-Off']] = data_analysis[['# Samples', 'Cut-Off', '# Top Pred in Cut-Off', '# Top Diff in Cut-Off']].astype('int')
    data_analysis[['Ratio: Top Pred in Cut-Off', 'Ratio: Top Diff in Cut-Off']] = data_analysis[['Ratio: Top Pred in Cut-Off', 'Ratio: Top Diff in Cut-Off']].round(2)
    return data_analysis

In [4]:
data = pd.read_csv('/home/ubuntu/Experimental_Data/Experimental_data_revised_label.csv', index_col = 0)
df = data.drop_duplicates(['utr_50', 'label']).groupby(['rvac_ID', 'utr_50' ]).agg(np.median).reset_index()
df['utr_50'] = [u.replace('<pad>', 'n') for u in df.utr_50]
df

Unnamed: 0,rvac_ID,utr_50,label,Length
0,RV-UML-m001,AGGGCGAAGAAACGTTAACGAGTATTTCAATTATTAGAGAGTTCATTGCT,1.03,50
1,RV-UML-m002,AGGTCCGTTATATTATTTATCTTGCAGATCAAACTTCAGAGAGGAGGGCC,1.33,50
2,RV-UML-m003,AGTTTCGTTCACTGTCCTTGACTAGAGAATATAATAAAAAGATTGTTGCT,0.52,50
3,RV-UML-m004,AGGGACCAAGAGTTCGATACCTCATCGAACTGCGAGTCATAAGCAGGGCC,1.20,50
4,RV-UML-m005,AGTGGACTTGTTATCTCGCATTTGCGCAATCCACTATACTGCGTTGGGCC,1.12,50
...,...,...,...,...
206,RV-UML-m371,AGGAAATCCTACCGCAACGCAGAGCGTAGATAACTAGTCTCTTCGAAGTC,1.13,50
207,RV-UML-m372,AGATAATCCACACCTCGAGAGACGTTTGCGAGATTACTAACCAAGGGGCC,1.09,50
208,RV-UML-m373,AGTTAGAACGTTATCTGGTTCGAAGAGTTGATAGACGCAGAATCAGGGCC,1.11,50
209,RV-UML-m374,AGATTCGGAGGAAACGAGAATTCCAAAGCATCCTTACCTCTCGTAGGGCT,1.13,50


In [5]:
max_len = 50
# One-hot encode both training and test UTRs
from sklearn.model_selection import KFold
num_folds = 10
e_df = pd.DataFrame()
metrics_df = []
# Initialize the KFold object
kf = KFold(n_splits=num_folds, shuffle=True)
for train_index, test_index in kf.split(df):
    e_test = df.iloc[test_index]
    e_train = df.iloc[train_index]
    print(e_train.shape, e_test.shape)

    # One-hot encode both training and test UTRs
    seq_e_train = one_hot_encode(e_train,'utr_50',seq_len=50)
    seq_e_test = one_hot_encode(e_test, 'utr_50',seq_len=50)
    # Scale the training mean ribosome load values
    e_train.loc[:,'scaled_rl'] = preprocessing.StandardScaler().fit_transform(np.array(e_train.loc[:,'label']).reshape(-1,1))


    with tf.device('/gpu:1'):
        model = train_model(seq_e_train, e_train['scaled_rl'], nb_epoch=3,border_mode='same',
                       inp_len=50, nodes=40, layers=3, nbr_filters=120, filter_len=8, dropout1=0,
                       dropout2=0,dropout3=0.2)

        test_df = test_data(df=e_test, model=model, obs_col='label',test_seq=seq_e_test)
        e_df = pd.concat([e_df, test_df], axis = 0)
        metrics = performances(test_df.label, test_df['y_pred'])
        metrics_df.append(metrics)
metrics_df = pd.DataFrame(metrics_df, columns = ['Test_R', 'Test_PearsonR', 'Test_SpearmanR'])
metrics_df.loc['mean'] = metrics_df.mean(axis = 0)
metrics_df.loc['std'] = metrics_df.std(axis = 0)
metrics_df.to_csv('/home/ubuntu/Experimental_Data/revised_results/Experimental_data_Retrained_Optimus_MRL_metrics.csv')
e_df.to_csv('/home/ubuntu/Experimental_Data/revised_results/Experimental_data_Retrained_Optimus_MRL_results.csv', index = False)
e_df

(189, 4) (22, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/3
Epoch 2/3
Epoch 3/3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

r-squared = 0.0189 | pearson r = -0.1377 | spearman R = -0.2194
(190, 4) (21, 4)
Epoch 1/3
Epoch 2/3
Epoch 3/3
r-squared = 0.0030 | pearson r = 0.0551 | spearman R = 0.0845
(190, 4) (21, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

Epoch 1/3
Epoch 2/3
Epoch 3/3
r-squared = 0.0638 | pearson r = -0.2526 | spearman R = -0.2141
(190, 4) (21, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

Epoch 1/3
Epoch 2/3
Epoch 3/3
r-squared = 0.0105 | pearson r = 0.1025 | spearman R = 0.0933
(190, 4) (21, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

Epoch 1/3
Epoch 2/3
Epoch 3/3
r-squared = 0.0003 | pearson r = 0.0164 | spearman R = 0.2404
(190, 4) (21, 4)
Epoch 1/3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

Epoch 2/3
Epoch 3/3
r-squared = 0.0027 | pearson r = -0.0517 | spearman R = -0.2877
(190, 4) (21, 4)
Epoch 1/3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

Epoch 2/3
Epoch 3/3
r-squared = 0.0313 | pearson r = -0.1769 | spearman R = -0.2790
(190, 4) (21, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

Epoch 1/3
Epoch 2/3
Epoch 3/3
r-squared = 0.0381 | pearson r = -0.1952 | spearman R = -0.1588
(190, 4) (21, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

Epoch 1/3
Epoch 2/3
Epoch 3/3
r-squared = 0.0039 | pearson r = -0.0624 | spearman R = -0.1088
(190, 4) (21, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

Epoch 1/3
Epoch 2/3
Epoch 3/3
r-squared = 0.0148 | pearson r = -0.1215 | spearman R = -0.0540


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Unnamed: 0,rvac_ID,utr_50,label,Length,y_pred
25,RV-UML-m026,AGTTGATCTTCGTCCGTGCATCACACCGGTCTTGTCCCCTGAACAGGGTA,1.16,50,0.952185
28,RV-UML-m029,AGCCGCAGATCGCTGGATCCTGTCACCGAGCCCTTCGGTGCTACAGGTTA,0.60,50,0.954489
32,RV-UML-m033,AGGAGTCATTTCGTTCGACACCTTTTACGCCTCCACACCAGTAGTGGATC,1.12,50,0.951673
35,RV-UML-m036,AGATTCAATAGGGCTTTTGGATTCTTGGATTGGTGCTACAACATCACGGT,0.95,50,0.953462
39,RV-UML-m040,AGTCAGACGCCCCATACGCATTAGGCTGGTCTATCATCTACTACCTTCGC,1.02,50,0.950143
...,...,...,...,...,...
167,RV-UML-m331,AGTTGTTCGAAGAAAACAGAGTCGTATACGGTACGAACCCCTTCCACTCG,0.96,50,1.021737
182,RV-UML-m347,AGAAACCGTCCTTTCGAATTACTCGGCTTTAGAGCCGGGGAACACATAAA,0.99,50,1.015569
189,RV-UML-m354,AGTGAGTCCCCGTCAACCGTATCGGGAGGGCGATCGACAATTCTCTACCA,1.17,50,1.024257
205,RV-UML-m370,AGACGATTTGGTCTGTACAAACGCGTACCGATATACACGAGGTAGGGGCC,1.09,50,1.058393


In [6]:
metrics_df

Unnamed: 0,Test_R,Test_PearsonR,Test_SpearmanR
0,0.01895,-0.137658,-0.219395
1,0.003035,0.055093,0.084498
2,0.063803,-0.252593,-0.214054
3,0.010502,0.102479,0.093282
4,0.000268,0.016382,0.24041
5,0.002674,-0.05171,-0.287668
6,0.031294,-0.1769,-0.279025
7,0.0381,-0.195193,-0.158803
8,0.003899,-0.06244,-0.10876
9,0.014763,-0.121501,-0.054001


In [8]:
e_df = analysis_data(e_df)
cutoffs = list(range(1, len(e_df) + 1, 1))
df_results = multi_cutoffs_analysis(e_df, cutoffs)
df_results.to_csv('/home/ubuntu/Experimental_Data/revised_results/Experimental_data_Retrained_Optimus_MRL_ranking_results.csv', index = False)

data.shape = (211, 10)
From 211 samples with CutOff=1 Top Labels: The ratios (1) Diff_rank=0/1=0.00 (2) Pred_rank=0/1=0.00 
From 211 samples with CutOff=2 Top Labels: The ratios (1) Diff_rank=0/2=0.00 (2) Pred_rank=0/2=0.00 
From 211 samples with CutOff=3 Top Labels: The ratios (1) Diff_rank=0/3=0.00 (2) Pred_rank=0/3=0.00 
From 211 samples with CutOff=4 Top Labels: The ratios (1) Diff_rank=0/4=0.00 (2) Pred_rank=0/4=0.00 
From 211 samples with CutOff=5 Top Labels: The ratios (1) Diff_rank=0/5=0.00 (2) Pred_rank=0/5=0.00 
From 211 samples with CutOff=6 Top Labels: The ratios (1) Diff_rank=0/6=0.00 (2) Pred_rank=0/6=0.00 
From 211 samples with CutOff=7 Top Labels: The ratios (1) Diff_rank=0/7=0.00 (2) Pred_rank=0/7=0.00 
From 211 samples with CutOff=8 Top Labels: The ratios (1) Diff_rank=0/8=0.00 (2) Pred_rank=0/8=0.00 
From 211 samples with CutOff=9 Top Labels: The ratios (1) Diff_rank=0/9=0.00 (2) Pred_rank=0/9=0.00 
From 211 samples with CutOff=10 Top Labels: The ratios (1) Diff_rank