In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, sem
from scipy import stats
from itertools import chain
%matplotlib inline  

In [2]:
def get_element(ele_index, file):
    if ele_index == 0:
        return [file[i][ele_index] for i in range(len(file))]
    else:
        return [float(file[i][ele_index]) for i in range(len(file))]

In [None]:
# --------------------- For TRUE CV PCC calculations for ANN ---------------------#

# Load prediction fie
syn1 = np.loadtxt("./../results/ANN/A1101.true/syn1.pred", dtype=str)
syn2 = np.loadtxt("./../results/ANN/A1101.true/syn2.pred", dtype=str)
syn3 = np.loadtxt("./../results/ANN/A1101.true/syn3.pred", dtype=str)
syn4 = np.loadtxt("./../results/ANN/A1101.true/syn4.pred", dtype=str)

# Extract the elements
peptides = get_element(0, syn1)
measure = get_element(1, syn1)
pred1 = get_element(2, syn1)
pred2 = get_element(2, syn2)
pred3 = get_element(2, syn3)
pred4 = get_element(2, syn4)

# Calculate average prediction values
pred = list(zip(pred1, pred2, pred3, pred4))
mean_pred = [np.mean(pred[i][0:4]) for i in range(len(pred))]


# Performance evaluation

# PCC for each syn
syn_perf = []
for i in [pred1, pred2, pred3, pred4]:
    syn_perf.append(pearsonr(np.asarray(measure), np.asarray(i))[0])
print("PCC of each syn:", np.round(syn_perf, 3))
print("Mean:", np.round(np.mean(syn_perf), 3))
print("SEM:", np.round(sem(syn_perf), 3))

# PCC for concatenated syn
evel_perf = pearsonr(np.asarray(measure), np.asarray(mean_pred))[0]

print("PCC of concatinated syn:", np.round(evel_perf, 3))

In [None]:
# --------------------- For FAKE CV PCC calculations for ANN ---------------------#

# Load prediction fie
syn1 = np.loadtxt("./../results/ANN/A1101.fake/c000.pred", dtype=str)
syn2 = np.loadtxt("./../results/ANN/A1101.fake/c001.pred", dtype=str)
syn3 = np.loadtxt("./../results/ANN/A1101.fake/c002.pred", dtype=str)
syn4 = np.loadtxt("./../results/ANN/A1101.fake/c003.pred", dtype=str)
syn5 = np.loadtxt("./../results/ANN/A1101.fake/c004.pred", dtype=str)

# Extract the elements
peptides = []
measures = []
preds = []
for syn in [syn1, syn2, syn3, syn4, syn5]:
    peptides.append(get_element(0, syn))
    measures.append(get_element(1, syn))
    preds.append(get_element(2, syn))


# Performance evaluation

# PCC for each syn
syn_perf = []
for i in range(len(preds)):
    syn_perf.append(pearsonr(np.asarray(measures[i]), np.asarray(preds[i]))[0])
print("PCC of each syn:", np.round(syn_perf, 3))
print("Mean:", np.round(np.mean(syn_perf), 3))
print("SEM:", np.round(sem(syn_perf), 3))

# PCC for concatenated syn
evel_perf = pearsonr(np.asarray(list(chain(*measures))), np.asarray(list(chain(*preds))))[0]
print("PCC of concatinated syn:", np.round(evel_perf, 3))

In [9]:
# --------------------- For TRUE CV PCC calculations for SMM ---------------------#

# Load prediction fie
model1 = np.loadtxt("./../results/SMM/A2403.true/model1.pred", dtype=str)
model2 = np.loadtxt("./../results/SMM/A2403.true/model2.pred", dtype=str)
model3 = np.loadtxt("./../results/SMM/A2403.true/model3.pred", dtype=str)
model4 = np.loadtxt("./../results/SMM/A2403.true/model4.pred", dtype=str)

# Extract the elements
peptides = get_element(0, model1)
measure = get_element(2, model1)
pred1 = get_element(1, model1)
pred2 = get_element(1, model2)
pred3 = get_element(1, model3)
pred4 = get_element(1, model4)

# Calculate average prediction values
pred = list(zip(pred1, pred2, pred3, pred4))
mean_pred = [np.mean(pred[i][0:4]) for i in range(len(pred))]


# Performance evaluation

# PCC for each model
model_perf = []
for i in [pred1, pred2, pred3, pred4]:
    model_perf.append(pearsonr(np.asarray(measure), np.asarray(i))[0])
print("PCC of each model:", np.round(model_perf, 3))
print("Mean:", np.round(np.mean(model_perf), 3))
print("SEM:", np.round(sem(model_perf), 3))

# PCC for concatenated model
evel_perf = pearsonr(np.asarray(measure), np.asarray(mean_pred))[0]

print("PCC of concatinated model:", np.round(evel_perf, 3))

PCC of each model: [0.32  0.474 0.448 0.491]
Mean: 0.433
SEM: 0.039
PCC of concatinated model: 0.526


In [10]:
# --------------------- For FAKE CV PCC calculations for SMM ---------------------#

# Load prediction fie
model1 = np.loadtxt("./../results/SMM/A2403.fake/c000.pred", dtype=str)
model2 = np.loadtxt("./../results/SMM/A2403.fake/c001.pred", dtype=str)
model3 = np.loadtxt("./../results/SMM/A2403.fake/c002.pred", dtype=str)
model4 = np.loadtxt("./../results/SMM/A2403.fake/c003.pred", dtype=str)
model5 = np.loadtxt("./../results/SMM/A2403.fake/c004.pred", dtype=str)

# Extract the elements
peptides = []
measures = []
preds = []
for model in [model1, model2, model3, model4, model5]:
    peptides.append(get_element(0, model))
    measures.append(get_element(1, model))
    preds.append(get_element(2, model))


# Performance evaluation

# PCC for each model
model_perf = []
for i in range(len(preds)):
    model_perf.append(pearsonr(np.asarray(measures[i]), np.asarray(preds[i]))[0])
print("PCC of each model:", np.round(model_perf, 3))
print("Mean:", np.round(np.mean(model_perf), 3))
print("SEM:", np.round(sem(model_perf), 3))

# PCC for concatenated model
evel_perf = pearsonr(np.asarray(list(chain(*measures))), np.asarray(list(chain(*preds))))[0]
print("PCC of concatinated model:", np.round(evel_perf, 3))

PCC of each model: [0.442 0.304 0.203 0.453 0.5  ]
Mean: 0.38
SEM: 0.055
PCC of concatinated model: 0.371
