In [1]:
# Current directory
import os
os.chdir('F:/Work/Experiment/pLM4ACE/model')

### Fusion models

In [2]:
import numpy as np
import pandas as pd


features = pd.read_csv("fusion_features/Data/single/ESM.csv", index_col=0, header=None)
labels = pd.read_csv("fusion_features/Data/label.csv", index_col=False, header=None)

print(features.shape)
print(labels.shape)
print(np.count_nonzero(labels==0))
print(np.count_nonzero(labels==1))

feature = np.array(features)
label = np.array(labels)

(1020, 320)
(1020, 1)
394
626


In [3]:
import math
import joblib
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_curve, auc


# Create a directory to save the model
model_GRU_dir = 'save_models/GRU/Independence'
model_CNN_dir = 'save_models/CNN/Independence'
model_CapsuleGAN_dir = 'save_models/CapsuleGAN/Independence'
model_LR_dir = 'save_models/LR/Independence'

y = label
out_dim=2

BACC_collecton = []
Sn_collecton = []
Sp_collecton = []
MCC_collecton = []
AUC_collecton = []
AP=[]

all_predictions = []
for i in range(10):
    # Loading model
    if i in (0, 1, 2, 3, 4, 5):
        [sample_num, input_dim] = np.shape(feature)
        X = np.reshape(feature, (-1,1,input_dim))
        _, X_ind_test, _, y_ind_test = train_test_split(X, y, test_size=0.2, random_state=1111)

        model_path = os.path.join(model_GRU_dir, f'ESM_{i}.h5')
        clf = load_model(model_path)
        y_score = clf.predict(X_ind_test)
        print(f'Model for fold {i} loaded from {model_path}')
    elif i in (6, 7, 8):
        [sample_num, input_dim] = np.shape(feature)
        X = np.reshape(feature, (-1,1,input_dim))
        _, X_ind_test, _, y_ind_test = train_test_split(X, y, test_size=0.2, random_state=1111)

        model_path = os.path.join(model_CNN_dir, f'ESM_{i}.h5')
        clf = load_model(model_path)
        y_score = clf.predict(X_ind_test)
        print(f'Model for fold {i} loaded from {model_path}')
    # elif i == 9:
    #     _, X_ind_test, _, y_ind_test = train_test_split(feature, y, test_size=0.2, random_state=1111)
    #     model_path = os.path.join(model_LR_dir, f'ESM.joblib')
    #     clf = joblib.load(model_path)
    #     y_score = clf.predict_proba(X_ind_test)
    #     print(f'Model for fold {i} loaded from {model_path}')
    else:
        X = feature
        _, X_ind_test, _, y_ind_test = train_test_split(X, y, test_size=0.2, random_state=1111)

        model_path = os.path.join(model_CapsuleGAN_dir, f'ESM_{i}.h5')
        clf = load_model(model_path)
        y_score = clf.predict(X_ind_test)
        print(f'Model for fold {i} loaded from {model_path}')

    # y_score = clf.predict(X_ind_test)
    all_predictions.append(y_score)


# 转换为 numpy 数组
all_predictions = np.array(all_predictions)

# 平均投票
average_predictions = np.mean(all_predictions, axis=0)
final_predictions = np.argmax(average_predictions, axis=1)

TP, FP, FN, TN = confusion_matrix(y_ind_test, final_predictions).ravel() # shape [ [True-Positive, False-positive], [False-negative, True-negative] ]
Sn_collecton = TP/(TP+FN)
Sp_collecton = TN/(TN+FP)
MCC = (TP*TN-FP*FN)/math.pow(((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)),0.5)
MCC_collecton = MCC
BACC_collecton = 0.5*TP/(TP+FN)+0.5*TN/(TN+FP)
# ROC curve
fpr, tpr, _ = roc_curve(y_ind_test, average_predictions[:, 1])
auc_roc = auc(fpr, tpr)
AUC_collecton = auc_roc
# PR curve
precision, recall, _ = precision_recall_curve(y_ind_test, average_predictions[:, 1])
average_precision = average_precision_score(y_ind_test, average_predictions[:, 1])
AP = average_precision

# Output
results = [
    f"BACC: {round(BACC_collecton, 3)}",
    f"Sn: {round(Sn_collecton, 3)}",
    f"Sp: {round(Sp_collecton, 3)}",
    f"MCC: {round(MCC_collecton, 3)}",
    f"AUC: {round(AUC_collecton, 3)}",
    f"AP: {round(AP, 3)}"
]

for result in results:
    print(result)

# Append the results to the file
with open('result/results_Vote.txt', 'a') as file:
    file.write("----------------------------------------\n")
    for result in results:
        file.write(result + '\n')


Model for fold 0 loaded from save_models/GRU/Independence\ESM_0.h5
Model for fold 1 loaded from save_models/GRU/Independence\ESM_1.h5
Model for fold 2 loaded from save_models/GRU/Independence\ESM_2.h5
Model for fold 3 loaded from save_models/GRU/Independence\ESM_3.h5
Model for fold 4 loaded from save_models/GRU/Independence\ESM_4.h5
Model for fold 5 loaded from save_models/GRU/Independence\ESM_5.h5
Model for fold 6 loaded from save_models/CNN/Independence\ESM_6.h5
Model for fold 7 loaded from save_models/CNN/Independence\ESM_7.h5
Model for fold 8 loaded from save_models/CNN/Independence\ESM_8.h5
Model for fold 9 loaded from save_models/CapsuleGAN/Independence\ESM_9.h5
BACC: 0.926
Sn: 0.96
Sp: 0.891
MCC: 0.831
AUC: 0.966
AP: 0.975


In [6]:
# 保存ROC曲线相关参数
np.savez('graph/ACE/ROC/Vote.npz', fpr=fpr, tpr=tpr, roc_auc=AUC_collecton)

# 保存PR曲线相关参数
np.savez('graph/ACE/PR/Vote.npz', recall=recall, precision=precision, average_precision=AP)

### end