In [1]:
import os
import pickle
import datetime
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow.keras.backend as K
from sklearn.metrics import brier_score_loss
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

2024-05-06 10:22:40.251113: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
def evals(y, y_pred, posthr, negthr, rm):
	"""
	function:
		evaluate the model

	inputs:
		y - Input labels 
		y_pred - Predicted labels
		posthr - threshold for converting probability into 1: higher than posthr will be 1
		negthr - threshold for converting probability into 0: lower than negthr will be 0
		rm - values for handling probability between posthr and negthr: 0, remove these cases; 1, convert to 1; 2, convert to 0
	returns:
		Accuracy, sensititvity, specificity, F-measure, Positive predictive value, Negative predictive value
	"""

	brierScore = brier_score_loss(y, y_pred)
	new_y_pred = []
	for one in y_pred:
		if one > posthr:
			new_y_pred.append(1)
		elif one < negthr:
			new_y_pred.append(0)
		else:
			new_y_pred.append(2)
	inds = [i for i, x in enumerate(new_y_pred) if x == 2]
	if rm == 0:
		new_y = [i for j, i in enumerate(y) if j not in inds]
		new_y_pred = [i for j, i in enumerate(new_y_pred) if j not in inds]
		y = np.array(new_y)
	elif rm == 1:
		new_y_pred = [1 for j, i in enumerate(new_y_pred) if j in inds]
	elif rm == 2:
		new_y_pred = [0 for j, i in enumerate(new_y_pred) if j in inds]

	if y.shape[0] > 1:
		y = y.reshape(1, y.shape[0])
	y_pred = np.array(new_y_pred)
	if y_pred.shape[0] > 1:
		y_pred = y_pred.reshape(1, y_pred.shape[0])

	neg_y = 1 - y
	neg_y_pred = 1 - y_pred
	tp = ((y * y_pred) == 1).sum()
	fp = ((neg_y * y_pred) == 1).sum()
	fn = ((neg_y_pred * y) == 1).sum()
	tn = ((neg_y * neg_y_pred) == 1).sum()
	acc = (tp + tn) / (tp + tn + fp + fn + K.epsilon())
	sensitivity = tp / (tp + fn + K.epsilon())
	specificity = tn / (tn + fp + K.epsilon())
	Fmeasure = 2*tp / (2*tp + fp + fn + K.epsilon())
	PPV = tp / (tp + fp + K.epsilon())
	NPV = tn / (tn + fn + K.epsilon())

	return acc, sensitivity, specificity, Fmeasure, PPV, NPV, brierScore

In [34]:
posthr = 0.5
negthr = 0.5
rm = 0

exp_Path = '/mnt/TUNG_10T/Projects_10T/Projects_2024/Amanda_Prof_Proj/miRNA_v2'

res_dict = os.path.join(exp_Path, 'res_full_0415.pickle')
with open(res_dict, 'rb') as handle:
    res = pickle.load(handle)

data_dict = os.path.join(exp_Path, 'data_full_0411.pickle')
with open(data_dict, 'rb') as handle:
    data= pickle.load(handle)

#
for dt in data:
    for model in res:
        assert dt in res[model]
        #print(dt, model)
        list_max_pval = []
        list_p_ind_1 = []
        list_p_ind_2 = []
        gt_ind_1 = data[dt][4][1]
        gt_ind_2 = data[dt][5][1]
        
        for fol in res[model][dt]:
            print(dt, model, fol)
            p_val = res[model][dt][fol]['val']
            p_ind_1 = res[model][dt][fol]['ind_1']
            p_ind_2 = res[model][dt][fol]['ind_2']

            gt_val = data[dt][fol][1]

            
            res[model][dt][fol]['val_acc'] = [evals(gt_val, p, posthr, negthr, rm)[0] for p in p_val]
#            res[model][dt][fol]['ind_1_acc'] = [evals(gt_ind_1, p, posthr, negthr, rm)[0] for p in p_ind_1]
#            res[model][dt][fol]['ind_2_acc'] = [evals(gt_ind_2, p, posthr, negthr, rm)[0] for p in p_ind_2]
            val_idx = np.argmax(res[model][dt][fol]['val_acc'])
            r = evals(gt_ind_1, p_ind_1[val_idx], posthr, negthr, rm)
            print('ind-1', r)
            r = evals(gt_ind_2, p_ind_2[val_idx], posthr, negthr, rm)
            print('ind-2', r)

            list_max_pval.append(val_idx)

            list_p_ind_1.append(res[model][dt][fol]['ind_1'][val_idx])
            list_p_ind_2.append(res[model][dt][fol]['ind_2'][val_idx])

        
        print(dt, model, '4-fold essemble')
        r_ind_1 = evals(gt_ind_1, np.average(list_p_ind_1, axis=0), posthr, negthr, rm)
        r_ind_2 = evals(gt_ind_2, np.average(list_p_ind_2, axis=0), posthr, negthr, rm)
        print(r_ind_1)
        print(r_ind_2)

        if dt == 'DeepMirTar':
            p = np.average(list_p_ind_2, axis=0)
            p = np.int64(p>=0.5)
            print(p.shape, np.sum(p))
        
#list_p_ind_2

DeepMirTar miraw_org 0
ind-1 (0.9746917585350622, 0.9662775614829732, 0.9831168829892056, 0.97449313270278, 0.9828496040919724, 0.966794380464011, 0.022615990109870945)
ind-2 (0.8958333314670138, 0.8958333314670138, 0.0, 0.9450549440164232, 0.9999999976744186, 0.0, 0.08020209371072934)
DeepMirTar miraw_org 1
ind-1 (0.9824789097350759, 0.970168612066126, 0.9948051946759994, 0.9822718318462067, 0.9946808509315584, 0.9708491760493221, 0.015205573287357404)
ind-2 (0.7291666651475694, 0.7291666651475694, 0.0, 0.843373492959791, 0.9999999971428571, 0.0, 0.25813358453960883)
DeepMirTar miraw_org 2
ind-1 (0.9779364048684013, 0.9623865108998202, 0.9935064933774668, 0.9776021079724899, 0.9933065594386471, 0.9634760704076227, 0.021168057893261837)
ind-2 (0.9166666647569445, 0.9166666647569445, 0.0, 0.9565217380907373, 0.9999999977272727, 0.0, 0.09202708492231958)
DeepMirTar miraw_org 3
ind-1 (0.9779364048684013, 0.970168612066126, 0.9857142855862709, 0.9777777777138706, 0.9855072462469688, 0.9705

array([1.0000000e+00, 2.4841005e-01, 1.0000000e+00, ..., 2.6769901e-04,
       1.9712446e-05, 4.7918186e-02], dtype=float32)