In [43]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, sem
from scipy import stats
from itertools import chain
%matplotlib inline  

In [3]:
def get_element(ele_index, file):
    if ele_index == 0:
        return [file[i][ele_index] for i in range(len(file))]
    else:
        return [float(file[i][ele_index]) for i in range(len(file))]

In [52]:
# --------------------- For TRUE CV PCC calculations ---------------------#

# Load prediction fie
syn1 = np.loadtxt("./../results/A2403.true/syn1.pred", dtype=str)
syn2 = np.loadtxt("./../results/A2403.true/syn2.pred", dtype=str)
syn3 = np.loadtxt("./../results/A2403.true/syn3.pred", dtype=str)
syn4 = np.loadtxt("./../results/A2403.true/syn4.pred", dtype=str)

# Extract the elements
peptides = get_element(0, syn1)
measure = get_element(1, syn1)
pred1 = get_element(2, syn1)
pred2 = get_element(2, syn2)
pred3 = get_element(2, syn3)
pred4 = get_element(2, syn4)

# Calculate average prediction values
pred = list(zip(pred1, pred2, pred3, pred4))
mean_pred = [np.mean(pred[i][0:4]) for i in range(len(pred))]


# Performance evaluation

# PCC for each model
model_perf = []
for i in [pred1, pred2, pred3, pred4]:
    model_perf.append(pearsonr(np.asarray(measure), np.asarray(i))[0])
print("PCC of each model:", np.round(model_perf, 3))
print("Mean:", np.round(np.mean(model_perf), 3))
print("SEM:", np.round(sem(model_perf), 3))

# PCC for concatenated model
evel_perf = pearsonr(np.asarray(measure), np.asarray(mean_pred))[0]
print("PCC of concatinated model:", np.round(evel_perf, 3))

PCC of each model: [0.682 0.784 0.721 0.77 ]
Mean: 0.739
SEM: 0.023
PCC of concatinated model: 0.774


In [53]:
# --------------------- For FAKE CV PCC calculations ---------------------#

# Load prediction fie
syn1 = np.loadtxt("./../results/A2403.fake/c000.pred", dtype=str)
syn2 = np.loadtxt("./../results/A2403.fake/c001.pred", dtype=str)
syn3 = np.loadtxt("./../results/A2403.fake/c002.pred", dtype=str)
syn4 = np.loadtxt("./../results/A2403.fake/c003.pred", dtype=str)
syn5 = np.loadtxt("./../results/A2403.fake/c004.pred", dtype=str)

# Extract the elements
peptides = []
measures = []
preds = []
for syn in [syn1, syn2, syn3, syn4, syn5]:
    peptides.append(get_element(0, syn))
    measures.append(get_element(1, syn))
    preds.append(get_element(2, syn))


# Performance evaluation

# PCC for each model
model_perf = []
for i in range(len(preds)):
    model_perf.append(pearsonr(np.asarray(measures[i]), np.asarray(preds[i]))[0])
print("PCC of each model:", np.round(model_perf, 3))
print("Mean:", np.round(np.mean(model_perf), 3))
print("SEM:", np.round(sem(model_perf), 3))

# PCC for concatenated model
evel_perf = pearsonr(np.asarray(list(chain(*measures))), np.asarray(list(chain(*preds))))[0]
print("PCC of concatinated model:", np.round(evel_perf, 3))

PCC of each model: [0.824 0.896 0.704 0.366 0.816]
Mean: 0.721
SEM: 0.094
PCC of concatinated model: 0.726
