In [None]:
import tensorflow as tf
import os


import sys
import keras


from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Dropout, Embedding, LSTM, Bidirectional,Multiply 
from keras.layers import BatchNormalization, merge, add
from keras.layers.core import Flatten, Reshape
from keras.layers.merge import Concatenate, concatenate, subtract, multiply
from keras.layers.convolutional import Conv1D
from keras.layers.pooling import MaxPooling1D, AveragePooling1D, GlobalAveragePooling1D, GlobalMaxPooling1D

from keras.optimizers import Adam,  RMSprop

import keras.backend.tensorflow_backend as KTF

import numpy as np
from tqdm import tqdm
from keras.layers import Input, CuDNNGRU, GRU
from numpy import linalg as LA
import scipy
from keras import backend as K
import re
from sklearn.model_selection import train_test_split
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

config = tf.ConfigProto()
config.gpu_options.allow_growth=True   #不全部占满显存, 按需分配
sess = tf.Session(config=config)

KTF.set_session(sess)

In [None]:
train_x = np.load("../../dataset_new/HIV_train_x_full.npy",allow_pickle=True)
train_y = np.load("../../dataset_new/HIV_train_y_full.npy",allow_pickle=True)
valid_x = np.load("../../dataset_new/HIV_valid_x_full.npy",allow_pickle=True)
valid_y = np.load("../../dataset_new/HIV_valid_y_full.npy",allow_pickle=True)
test_x = np.load("../../dataset_new/HIV_test_x_full.npy",allow_pickle=True)
test_y = np.load("../../dataset_new/HIV_test_y_full.npy",allow_pickle=True)
print(train_x.shape)
print(train_y.shape)
print(len(train_y))
print(len(valid_y))
print(len(test_y))

In [None]:
from sklearn.metrics import mean_squared_error, roc_auc_score
def auroc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc

In [None]:
from keras.layers import Lambda,Add, CuDNNGRU,TimeDistributed, Bidirectional,Softmax
from keras import regularizers
from keras.regularizers import l2
import tensorflow as tf
from keras import regularizers

smilen = 256
hidden_dim = 256


def se_block(input, channels, r=8):
    x = GlobalAveragePooling1D()(input)
    x = Dense(channels//r, activation="relu")(x)
    x = Dense(channels, activation="sigmoid")(x)
    return Multiply()([input, x])


def conv_block(inputs, seblock, NUM_FILTERS,FILTER_LENGTH1):
    conv1_encode = Conv1D(filters=NUM_FILTERS, kernel_size=FILTER_LENGTH1,   activation='relu', padding='valid', strides=1)(inputs)
    if seblock: 
        conv1_encode = se_block(conv1_encode,NUM_FILTERS)
    
    return conv1_encode

    
def build_model(): 
    drugInput = Input(shape=(smilen,hidden_dim))
    
    seblock = True 
    NUM_FILTERS = 512
    FILTER_LENGTH1 = 5
    n_layers = 4

    encode_smiles = conv_block(drugInput, seblock, NUM_FILTERS,FILTER_LENGTH1) 
    encode_smiles = GlobalMaxPooling1D()(encode_smiles)  
    
    FC1 = Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01))(encode_smiles)
    
    predictions = Dense(2, kernel_initializer='normal', activation='softmax')(FC1)
    
    interactionModel = Model(inputs=[drugInput], outputs=[predictions])
    interactionModel.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[auroc])
    return interactionModel

model = build_model()
print(model.summary())

In [None]:
# 运行五次
from keras.callbacks import ModelCheckpoint, EarlyStopping,ReduceLROnPlateau
from sklearn.metrics import mean_squared_error, roc_auc_score
from keras.callbacks import ReduceLROnPlateau
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve

def plot_roc_curve(fpr, tpr, label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.axis([0, 1, 0, 1])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC curve')

roc_aucs = []
for i in range(5):
    save_model_name = f'models_FP-BERT_HIV_TT64_run{i+1}_25patience_full'

    model = build_model()
    save_checkpoint = ModelCheckpoint(save_model_name, verbose=1,
                                      save_best_only=True, 
                                      monitor='val_loss', 
                                      save_weights_only=True, mode='min') 
    earlyStopping = EarlyStopping(monitor='val_loss', patience=25, verbose=1,mode='min')
    lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=25, mode='min', verbose=1)
    model.fit(x=train_x, y=train_y, batch_size=256, epochs=500, verbose=1, validation_data=(valid_x,valid_y), callbacks=[earlyStopping, save_checkpoint, lr_reduce])

    model.load_weights(save_model_name)
    y_pred = model.predict([test_x])
    roc_auc = roc_auc_score(test_y, y_pred, average='weighted')
    print(roc_auc)
    roc_aucs.append(roc_auc)
    
    Y_pred_0 = [y[0] for y in y_pred]  # 取出y中的一列
    Y_0 = [y[0] for y in test_y]

    import matplotlib.pyplot as plt
    from sklearn.metrics import roc_curve

    fpr, tpr, thresholds_keras = roc_curve(Y_0, Y_pred_0, drop_intermediate=False)   
    plt.figure()
    plot_roc_curve(fpr, tpr)
    plt.savefig(f"ROC-Curve_HIV_FP-BERT_run{i+1}_full.png")
    plt.show()
    np.save(f'HIV_FP-BERT_FPR_run{i+1}_full',fpr)
    np.save(f'HIV_FP-BERT_TPR_run{i+1}_full',tpr)

In [None]:
roc_aucs,np.mean(roc_aucs),np.std(roc_aucs)